Moved to writing solely to CSV. Added some columns and drastly improved the logics behind price after promotion column.

This commit is contained in:
KorenLazar
2021-02-25 20:54:44 +02:00
parent 8aa33cbcda
commit c86fc7c1ab
3 changed files with 151 additions and 67 deletions

14
main.py
View File

@@ -1,5 +1,4 @@
from argparse import ArgumentParser from argparse import ArgumentParser
import logging
from pathlib import Path from pathlib import Path
from promotion import main_latest_promos, get_promos_by_name from promotion import main_latest_promos, get_promos_by_name
@@ -38,7 +37,7 @@ chain_dict = {repr(chain): chain() if callable(chain) else None for chain in Sup
if __name__ == '__main__': if __name__ == '__main__':
parser = ArgumentParser() parser = ArgumentParser()
parser.add_argument('--promos', parser.add_argument('--promos',
help="generates a promos_{store_id}.log file with all the promotions in the requested store", help="generates a CSV file with all the promotions in the requested store",
metavar='store_id', metavar='store_id',
nargs=1, nargs=1,
type=SupermarketChain.store_id_type, type=SupermarketChain.store_id_type,
@@ -55,7 +54,7 @@ if __name__ == '__main__':
nargs=2, nargs=2,
) )
parser.add_argument('--find_store_id', parser.add_argument('--find_store_id',
help='prints all Shufersal stores within a city. Input should be a name of a city in Hebrew', help='prints all Shufersal stores in a given city. Input should be a city name in Hebrew',
metavar='city', metavar='city',
nargs=1, nargs=1,
) )
@@ -84,14 +83,7 @@ if __name__ == '__main__':
chain: SupermarketChain = chain_dict[args.chain] chain: SupermarketChain = chain_dict[args.chain]
if args.promos: if args.promos:
arg_store_id = int(args.promos[0]) arg_store_id = int(args.promos[0])
main_latest_promos(store_id=arg_store_id, load_xml=args.load_prices, chain=chain, load_promos=args.load_promos)
logger = logging.getLogger()
logger.setLevel(logging.INFO)
handler = logging.FileHandler(filename=f'{RESULTS_DIRNAME}/{args.chain}_promos_{arg_store_id}.log', mode='w',
encoding='utf-8')
logger.addHandler(handler)
main_latest_promos(store_id=arg_store_id, load_xml=args.load_prices, logger=logger, chain=chain,
load_promos=args.load_promos)
elif args.price: elif args.price:
get_products_prices(chain, store_id=args.price[0], load_xml=args.load_prices, product_name=args.price[1]) get_products_prices(chain, store_id=args.price[0], load_xml=args.load_prices, product_name=args.price[1])

View File

@@ -12,6 +12,8 @@ from utils import (
) )
from supermarket_chain import SupermarketChain from supermarket_chain import SupermarketChain
INVALID_OR_UNKNOWN_PROMOTION_FUNCTION = -1
PRODUCTS_TO_IGNORE = ['סירים', 'מגבות', 'מגבת', 'מפות', 'פסטיגל', 'ביגי'] PRODUCTS_TO_IGNORE = ['סירים', 'מגבות', 'מגבת', 'מפות', 'פסטיגל', 'ביגי']
@@ -28,38 +30,55 @@ class ClubID(Enum):
אחר = 3 אחר = 3
class RewardType(Enum):
NO_PROMOTION = 0
DISCOUNT_IN_AMOUNT = 1
DISCOUNT_IN_PERCENTAGE = 2
DISCOUNT_BY_THRESHOLD = 3
DISCOUNT_IN_ITEM_IF_PURCHASING_OTHER_ITEMS = 6
SECOND_OR_THIRD_INSTANCE_FOR_FREE = 7
SECOND_INSTANCE_SAME_DISCOUNT = 8
SECOND_INSTANCE_DIFFERENT_DISCOUNT = 9
DISCOUNT_IN_MULTIPLE_INSTANCES = 10
class Promotion: class Promotion:
""" """
A class of a promotion in Shufersal. A class of a promotion in Shufersal.
It contains only part of the available information in Shufersal's data. It contains only part of the available information in Shufersal's data.
""" """
def __init__(self, content: str, start_date: datetime, end_date: datetime, update_date: datetime, items: List[Item], def __init__(self, content: str, start_date: datetime, end_date: datetime, update_date: datetime, item: List[Item],
price_after_promo, club_id): promo_func: callable, club_id: ClubID, promotion_id: float, max_qty: int,
allow_multiple_discounts: bool, reward_type: RewardType):
self.content: str = content self.content: str = content
self.start_date: datetime = start_date self.start_date: datetime = start_date
self.end_date: datetime = end_date self.end_date: datetime = end_date
self.update_date: datetime = update_date self.update_date: datetime = update_date
self.price_after_promo = price_after_promo self.promo_func: callable = promo_func
self.items: List[Item] = items self.items: List[Item] = item
self.club_id = club_id self.club_id: ClubID = club_id
self.max_qty: int = max_qty
self.allow_multiple_discounts = allow_multiple_discounts
self.reward_type = reward_type
self.promotion_id = promotion_id
def __repr__(self): # def __repr__(self):
title = self.content # title = self.content
dates_range = f"Between {self.start_date} and {self.end_date}" # dates_range = f"Between {self.start_date} and {self.end_date}"
update_line = f"Updated at {self.update_date}" # update_line = f"Updated at {self.update_date}"
items = '\n'.join(str(item) for item in self.items) # items = '\n'.join(str(item) for item in self.item)
return '\n'.join([title, dates_range, update_line, items]) + '\n' # return '\n'.join([title, dates_range, update_line, items]) + '\n'
def repr_ltr(self): def repr_ltr(self):
title = self.content title = self.content
dates_range = f"Between {self.start_date} and {self.end_date}" dates_range = f"Between {self.start_date} and {self.end_date}"
update_line = f"Updated at {self.update_date}" update_line = f"Updated at {self.update_date}"
items = '\n'.join(str(item) for item in self.items) # items = '\n'.join(str(item) for item in self.item)
return '\n'.join([title, dates_range, update_line, items]) + '\n' return '\n'.join([title, dates_range, update_line, str(self.items)]) + '\n'
def __eq__(self, other): def __eq__(self, other):
return self.content == other.content and self.start_date == other.start_date and self.end_date == other.end_date return self.promotion_id == other.promotion_id
def write_promotions_to_csv(promotions: List[Promotion], output_filename: str) -> None: def write_promotions_to_csv(promotions: List[Promotion], output_filename: str) -> None:
@@ -78,11 +97,14 @@ def write_promotions_to_csv(promotions: List[Promotion], output_filename: str) -
'מחיר אחרי מבצע', 'מחיר אחרי מבצע',
'אחוז הנחה', 'אחוז הנחה',
'סוג מבצע', 'סוג מבצע',
'כמות מקסימלית',
'כפל הנחות',
'זמן תחילת מבצע', 'זמן תחילת מבצע',
'זמן סיום מבצע', 'זמן סיום מבצע',
'זמן עדכון אחרון', 'זמן עדכון אחרון',
'יצרן', 'יצרן',
'ברקוד פריט' 'ברקוד פריט',
'סוג מבצע',
]) ])
for promo in promotions: for promo in promotions:
@@ -90,15 +112,17 @@ def write_promotions_to_csv(promotions: List[Promotion], output_filename: str) -
[[promo.content, [[promo.content,
item.name, item.name,
item.price, item.price,
f'{promo.price_after_promo:.3f}', f'{promo.promo_func(item):.3f}',
f'{(float(item.price) - promo.price_after_promo) / float(item.price):.3%}', f'{(item.price - promo.promo_func(item)) / item.price:.3%}',
promo.club_id.name.replace('_', ' '), promo.club_id.name.replace('_', ' '),
promo.max_qty,
promo.allow_multiple_discounts,
promo.start_date, promo.start_date,
promo.end_date, promo.end_date,
promo.update_date, promo.update_date,
item.manufacturer, item.manufacturer,
item.code] item.code,
for item in promo.items] promo.reward_type.value] for item in promo.items]
) )
@@ -118,63 +142,131 @@ def get_available_promos(chain: SupermarketChain, store_id: int, load_prices: bo
promo_objs = list() promo_objs = list()
for promo in bs_promos.find_all(chain.promotion_tag_name): for promo in bs_promos.find_all(chain.promotion_tag_name):
discounted_price = promo.find('DiscountedPrice') promotion_id = promo.find(re.compile('PromotionId', re.IGNORECASE))
min_qty = promo.find('MinQty') if promo_objs and promo_objs[-1].promotion_id == promotion_id:
# if int(promo.find('IsGiftItem').text): promo_objs[-1].items.extend(chain.get_items(promo, items_dict))
club_id = ClubID(int(promo.find(re.compile(r'ClubI[d|D]')).text)) continue
if discounted_price and min_qty:
price_after_promo = float(discounted_price.text) / float(min_qty.text) promo_inst = create_new_promo_instance(chain, items_dict, promo, promotion_id)
else: if promo_inst:
price_after_promo = -1 promo_objs.append(promo_inst)
promo = Promotion(
content=promo.find('PromotionDescription').text,
start_date=datetime.strptime(
promo.find('PromotionStartDate').text + ' ' + promo.find('PromotionStartHour').text,
chain.date_hour_format),
end_date=datetime.strptime(promo.find(
'PromotionEndDate').text + ' ' + promo.find('PromotionEndHour').text, chain.date_hour_format),
update_date=datetime.strptime(promo.find(chain.promotion_update_tag_name).text,
chain.update_date_format),
items=chain.get_items(promo, items_dict),
price_after_promo=price_after_promo,
club_id=club_id,
)
if is_valid_promo(promo):
if promo_objs and promo_objs[-1] == promo: # Merge equal promos
promo_objs[-1].items.extend(promo.items)
else:
promo_objs.append(promo)
return promo_objs return promo_objs
def is_valid_promo(promo: Promotion): def create_new_promo_instance(chain, items_dict, promo, promotion_id):
reward_type = RewardType(int(promo.find("RewardType").text))
discounted_price = get_discounted_price(promo)
promo_description = promo.find('PromotionDescription').text
is_discount_in_percentage = reward_type == RewardType.DISCOUNT_IN_PERCENTAGE or not discounted_price
discount_rate = get_discount_rate(promo, is_discount_in_percentage)
min_qty = get_int_from_tag(promo, 'MinQty')
max_qty = get_int_from_tag(promo, 'MaxQty')
remark = promo.find("Remark")
promo_func = determine_promo_function(
reward_type=reward_type,
remark=remark,
promo_description=promo_description,
discounted_price=discounted_price,
discount_rate=discount_rate,
min_qty=min_qty,
)
promo_start_time = datetime.strptime(promo.find('PromotionStartDate').text + ' ' +
promo.find('PromotionStartHour').text,
chain.date_hour_format)
promo_end_time = datetime.strptime(promo.find('PromotionEndDate').text + ' ' +
promo.find('PromotionEndHour').text,
chain.date_hour_format)
promo_update_time = datetime.strptime(promo.find(chain.promotion_update_tag_name).text,
chain.update_date_format)
club_id = ClubID(int(promo.find(re.compile('ClubId', re.IGNORECASE)).text))
multiple_discounts_allowed = bool(int(promo.find('AllowMultipleDiscounts').text))
items = chain.get_items(promo, items_dict)
if is_valid_promo(start_time=promo_start_time, end_time=promo_end_time, description=promo_description):
return Promotion(content=promo_description, start_date=promo_start_time, end_date=promo_end_time,
update_date=promo_update_time, item=items, promo_func=promo_func,
club_id=club_id, promotion_id=promotion_id, max_qty=max_qty,
allow_multiple_discounts=multiple_discounts_allowed, reward_type=reward_type)
def get_int_from_tag(tag, int_tag):
content = tag.find(int_tag)
return int(float(content.text)) if content else 0
def get_discounted_price(promo):
discounted_price = promo.find('DiscountedPrice')
if discounted_price:
return float(discounted_price.text)
def get_discount_rate(promo, discount_in_percentage):
discount_rate = promo.find("DiscountRate")
if discount_rate:
if discount_in_percentage:
return int(discount_rate.text) * (10 ** -(len(str(discount_rate.text))))
return float(discount_rate.text)
def determine_promo_function(reward_type, remark, promo_description, discounted_price, discount_rate, min_qty):
if reward_type == RewardType.SECOND_INSTANCE_DIFFERENT_DISCOUNT:
if not discounted_price:
return lambda item: item.price * (1 - (discount_rate / min_qty))
else:
return lambda item: (item.price * (min_qty - 1) + discounted_price) / min_qty
elif reward_type == RewardType.DISCOUNT_IN_ITEM_IF_PURCHASING_OTHER_ITEMS:
return lambda item: item.price
elif reward_type == RewardType.SECOND_OR_THIRD_INSTANCE_FOR_FREE:
return lambda item: item.price * (1 - (1 / min_qty))
elif reward_type == RewardType.DISCOUNT_IN_PERCENTAGE:
return lambda item: item.price * (1 - discount_rate / (2 if "השני ב" in promo_description else 1))
elif reward_type == RewardType.SECOND_INSTANCE_SAME_DISCOUNT:
if "השני ב" in promo_description:
return lambda item: (item.price + discounted_price) / 2
else:
return lambda item: discounted_price / min_qty
elif reward_type == RewardType.DISCOUNT_BY_THRESHOLD:
return lambda item: item.price - discount_rate
elif remark and 'מחיר המבצע הינו המחיר לק"ג' in remark.text:
return lambda item: discounted_price
elif discounted_price and min_qty:
return lambda item: discounted_price / min_qty
return lambda item: INVALID_OR_UNKNOWN_PROMOTION_FUNCTION
def is_valid_promo(start_time: datetime, end_time: datetime, description):
""" """
This function returns whether a given Promotion object is currently valid. This function returns whether a given Promotion object is currently valid.
""" """
today_date: datetime = datetime.now() today_date: datetime = datetime.now()
not_expired: bool = promo.end_date >= today_date not_expired: bool = end_time >= today_date
has_started: bool = promo.start_date <= today_date has_started: bool = start_time <= today_date
has_products: bool = len(promo.items) > 0 in_promo_ignore_list: bool = any(product in description for product in PRODUCTS_TO_IGNORE)
in_promo_ignore_list: bool = any(product in promo.content for product in PRODUCTS_TO_IGNORE) return not_expired and has_started and not in_promo_ignore_list
return not_expired and has_started and has_products and not in_promo_ignore_list
def main_latest_promos(store_id: int, load_xml: bool, logger, chain: SupermarketChain, load_promos: bool): def main_latest_promos(store_id: int, load_xml: bool, chain: SupermarketChain, load_promos: bool):
""" """
This function logs the available promotions in a store with a given id sorted by their update date. This function writes to a CSV file the available promotions in a store with a given id sorted by their update date.
:param chain: The name of the requested supermarket chain :param chain: The name of the requested supermarket chain
:param store_id: A given store id :param store_id: A given store id
:param load_xml: A boolean representing whether to load an existing prices xml file :param load_xml: A boolean representing whether to load an existing prices xml file
:param load_promos: A boolean representing whether to load an existing promos xml file :param load_promos: A boolean representing whether to load an existing promos xml file
:param logger: A given logger
""" """
promotions: List[Promotion] = get_available_promos(chain, store_id, load_xml, load_promos) promotions: List[Promotion] = get_available_promos(chain, store_id, load_xml, load_promos)
promotions.sort(key=lambda promo: (max(promo.update_date.date(), promo.start_date.date()), promo.start_date - promotions.sort(key=lambda promo: (max(promo.update_date.date(), promo.start_date.date()), promo.start_date -
promo.end_date), reverse=True) promo.end_date), reverse=True)
logger.info('\n'.join(str(promotion) for promotion in promotions))
write_promotions_to_csv(promotions, f'results/{repr(type(chain))}_promos_{store_id}.csv') write_promotions_to_csv(promotions, f'results/{repr(type(chain))}_promos_{store_id}.csv')

View File

@@ -122,7 +122,7 @@ class SupermarketChain(object, metaclass=Meta):
""" """
return Item( return Item(
name=item.find(re.compile(r'ItemN[a]?m[e]?')).text, name=item.find(re.compile(r'ItemN[a]?m[e]?')).text,
price=item.find('ItemPrice').text, price=float(item.find('ItemPrice').text),
manufacturer=item.find(re.compile(r'Manufacture[r]?Name')).text, manufacturer=item.find(re.compile(r'Manufacture[r]?Name')).text,
code=item.find('ItemCode').text code=item.find('ItemCode').text
) )