diff --git a/main.py b/main.py index 6b974ea..5f2e93c 100644 --- a/main.py +++ b/main.py @@ -1,5 +1,4 @@ from argparse import ArgumentParser -import logging from pathlib import Path from promotion import main_latest_promos, get_promos_by_name @@ -38,7 +37,7 @@ chain_dict = {repr(chain): chain() if callable(chain) else None for chain in Sup if __name__ == '__main__': parser = ArgumentParser() parser.add_argument('--promos', - help="generates a promos_{store_id}.log file with all the promotions in the requested store", + help="generates a CSV file with all the promotions in the requested store", metavar='store_id', nargs=1, type=SupermarketChain.store_id_type, @@ -55,7 +54,7 @@ if __name__ == '__main__': nargs=2, ) parser.add_argument('--find_store_id', - help='prints all Shufersal stores within a city. Input should be a name of a city in Hebrew', + help='prints all Shufersal stores in a given city. Input should be a city name in Hebrew', metavar='city', nargs=1, ) @@ -84,14 +83,7 @@ if __name__ == '__main__': chain: SupermarketChain = chain_dict[args.chain] if args.promos: arg_store_id = int(args.promos[0]) - - logger = logging.getLogger() - logger.setLevel(logging.INFO) - handler = logging.FileHandler(filename=f'{RESULTS_DIRNAME}/{args.chain}_promos_{arg_store_id}.log', mode='w', - encoding='utf-8') - logger.addHandler(handler) - main_latest_promos(store_id=arg_store_id, load_xml=args.load_prices, logger=logger, chain=chain, - load_promos=args.load_promos) + main_latest_promos(store_id=arg_store_id, load_xml=args.load_prices, chain=chain, load_promos=args.load_promos) elif args.price: get_products_prices(chain, store_id=args.price[0], load_xml=args.load_prices, product_name=args.price[1]) diff --git a/promotion.py b/promotion.py index ee4b177..dbb04e8 100644 --- a/promotion.py +++ b/promotion.py @@ -12,6 +12,8 @@ from utils import ( ) from supermarket_chain import SupermarketChain +INVALID_OR_UNKNOWN_PROMOTION_FUNCTION = -1 + PRODUCTS_TO_IGNORE = ['סירים', 'מגבות', 'מגבת', 'מפות', 'פסטיגל', 'ביגי'] @@ -28,38 +30,55 @@ class ClubID(Enum): אחר = 3 +class RewardType(Enum): + NO_PROMOTION = 0 + DISCOUNT_IN_AMOUNT = 1 + DISCOUNT_IN_PERCENTAGE = 2 + DISCOUNT_BY_THRESHOLD = 3 + DISCOUNT_IN_ITEM_IF_PURCHASING_OTHER_ITEMS = 6 + SECOND_OR_THIRD_INSTANCE_FOR_FREE = 7 + SECOND_INSTANCE_SAME_DISCOUNT = 8 + SECOND_INSTANCE_DIFFERENT_DISCOUNT = 9 + DISCOUNT_IN_MULTIPLE_INSTANCES = 10 + + class Promotion: """ A class of a promotion in Shufersal. It contains only part of the available information in Shufersal's data. """ - def __init__(self, content: str, start_date: datetime, end_date: datetime, update_date: datetime, items: List[Item], - price_after_promo, club_id): + def __init__(self, content: str, start_date: datetime, end_date: datetime, update_date: datetime, item: List[Item], + promo_func: callable, club_id: ClubID, promotion_id: float, max_qty: int, + allow_multiple_discounts: bool, reward_type: RewardType): self.content: str = content self.start_date: datetime = start_date self.end_date: datetime = end_date self.update_date: datetime = update_date - self.price_after_promo = price_after_promo - self.items: List[Item] = items - self.club_id = club_id + self.promo_func: callable = promo_func + self.items: List[Item] = item + self.club_id: ClubID = club_id + self.max_qty: int = max_qty + self.allow_multiple_discounts = allow_multiple_discounts + self.reward_type = reward_type + self.promotion_id = promotion_id - def __repr__(self): - title = self.content - dates_range = f"Between {self.start_date} and {self.end_date}" - update_line = f"Updated at {self.update_date}" - items = '\n'.join(str(item) for item in self.items) - return '\n'.join([title, dates_range, update_line, items]) + '\n' + # def __repr__(self): + # title = self.content + # dates_range = f"Between {self.start_date} and {self.end_date}" + # update_line = f"Updated at {self.update_date}" + # items = '\n'.join(str(item) for item in self.item) + # return '\n'.join([title, dates_range, update_line, items]) + '\n' def repr_ltr(self): title = self.content dates_range = f"Between {self.start_date} and {self.end_date}" update_line = f"Updated at {self.update_date}" - items = '\n'.join(str(item) for item in self.items) - return '\n'.join([title, dates_range, update_line, items]) + '\n' + # items = '\n'.join(str(item) for item in self.item) + return '\n'.join([title, dates_range, update_line, str(self.items)]) + '\n' def __eq__(self, other): - return self.content == other.content and self.start_date == other.start_date and self.end_date == other.end_date + return self.promotion_id == other.promotion_id def write_promotions_to_csv(promotions: List[Promotion], output_filename: str) -> None: @@ -78,11 +97,14 @@ def write_promotions_to_csv(promotions: List[Promotion], output_filename: str) - 'מחיר אחרי מבצע', 'אחוז הנחה', 'סוג מבצע', + 'כמות מקסימלית', + 'כפל הנחות', 'זמן תחילת מבצע', 'זמן סיום מבצע', 'זמן עדכון אחרון', 'יצרן', - 'ברקוד פריט' + 'ברקוד פריט', + 'סוג מבצע', ]) for promo in promotions: @@ -90,15 +112,17 @@ def write_promotions_to_csv(promotions: List[Promotion], output_filename: str) - [[promo.content, item.name, item.price, - f'{promo.price_after_promo:.3f}', - f'{(float(item.price) - promo.price_after_promo) / float(item.price):.3%}', + f'{promo.promo_func(item):.3f}', + f'{(item.price - promo.promo_func(item)) / item.price:.3%}', promo.club_id.name.replace('_', ' '), + promo.max_qty, + promo.allow_multiple_discounts, promo.start_date, promo.end_date, promo.update_date, item.manufacturer, - item.code] - for item in promo.items] + item.code, + promo.reward_type.value] for item in promo.items] ) @@ -118,63 +142,131 @@ def get_available_promos(chain: SupermarketChain, store_id: int, load_prices: bo promo_objs = list() for promo in bs_promos.find_all(chain.promotion_tag_name): - discounted_price = promo.find('DiscountedPrice') - min_qty = promo.find('MinQty') - # if int(promo.find('IsGiftItem').text): - club_id = ClubID(int(promo.find(re.compile(r'ClubI[d|D]')).text)) - if discounted_price and min_qty: - price_after_promo = float(discounted_price.text) / float(min_qty.text) - else: - price_after_promo = -1 - promo = Promotion( - content=promo.find('PromotionDescription').text, - start_date=datetime.strptime( - promo.find('PromotionStartDate').text + ' ' + promo.find('PromotionStartHour').text, - chain.date_hour_format), - end_date=datetime.strptime(promo.find( - 'PromotionEndDate').text + ' ' + promo.find('PromotionEndHour').text, chain.date_hour_format), - update_date=datetime.strptime(promo.find(chain.promotion_update_tag_name).text, - chain.update_date_format), - items=chain.get_items(promo, items_dict), - price_after_promo=price_after_promo, - club_id=club_id, - ) + promotion_id = promo.find(re.compile('PromotionId', re.IGNORECASE)) + if promo_objs and promo_objs[-1].promotion_id == promotion_id: + promo_objs[-1].items.extend(chain.get_items(promo, items_dict)) + continue + + promo_inst = create_new_promo_instance(chain, items_dict, promo, promotion_id) + if promo_inst: + promo_objs.append(promo_inst) - if is_valid_promo(promo): - if promo_objs and promo_objs[-1] == promo: # Merge equal promos - promo_objs[-1].items.extend(promo.items) - else: - promo_objs.append(promo) return promo_objs -def is_valid_promo(promo: Promotion): +def create_new_promo_instance(chain, items_dict, promo, promotion_id): + reward_type = RewardType(int(promo.find("RewardType").text)) + discounted_price = get_discounted_price(promo) + promo_description = promo.find('PromotionDescription').text + is_discount_in_percentage = reward_type == RewardType.DISCOUNT_IN_PERCENTAGE or not discounted_price + discount_rate = get_discount_rate(promo, is_discount_in_percentage) + min_qty = get_int_from_tag(promo, 'MinQty') + max_qty = get_int_from_tag(promo, 'MaxQty') + remark = promo.find("Remark") + promo_func = determine_promo_function( + reward_type=reward_type, + remark=remark, + promo_description=promo_description, + discounted_price=discounted_price, + discount_rate=discount_rate, + min_qty=min_qty, + ) + promo_start_time = datetime.strptime(promo.find('PromotionStartDate').text + ' ' + + promo.find('PromotionStartHour').text, + chain.date_hour_format) + promo_end_time = datetime.strptime(promo.find('PromotionEndDate').text + ' ' + + promo.find('PromotionEndHour').text, + chain.date_hour_format) + promo_update_time = datetime.strptime(promo.find(chain.promotion_update_tag_name).text, + chain.update_date_format) + club_id = ClubID(int(promo.find(re.compile('ClubId', re.IGNORECASE)).text)) + multiple_discounts_allowed = bool(int(promo.find('AllowMultipleDiscounts').text)) + items = chain.get_items(promo, items_dict) + + if is_valid_promo(start_time=promo_start_time, end_time=promo_end_time, description=promo_description): + return Promotion(content=promo_description, start_date=promo_start_time, end_date=promo_end_time, + update_date=promo_update_time, item=items, promo_func=promo_func, + club_id=club_id, promotion_id=promotion_id, max_qty=max_qty, + allow_multiple_discounts=multiple_discounts_allowed, reward_type=reward_type) + + +def get_int_from_tag(tag, int_tag): + content = tag.find(int_tag) + return int(float(content.text)) if content else 0 + + +def get_discounted_price(promo): + discounted_price = promo.find('DiscountedPrice') + if discounted_price: + return float(discounted_price.text) + + +def get_discount_rate(promo, discount_in_percentage): + discount_rate = promo.find("DiscountRate") + if discount_rate: + if discount_in_percentage: + return int(discount_rate.text) * (10 ** -(len(str(discount_rate.text)))) + return float(discount_rate.text) + + +def determine_promo_function(reward_type, remark, promo_description, discounted_price, discount_rate, min_qty): + if reward_type == RewardType.SECOND_INSTANCE_DIFFERENT_DISCOUNT: + if not discounted_price: + return lambda item: item.price * (1 - (discount_rate / min_qty)) + else: + return lambda item: (item.price * (min_qty - 1) + discounted_price) / min_qty + + elif reward_type == RewardType.DISCOUNT_IN_ITEM_IF_PURCHASING_OTHER_ITEMS: + return lambda item: item.price + + elif reward_type == RewardType.SECOND_OR_THIRD_INSTANCE_FOR_FREE: + return lambda item: item.price * (1 - (1 / min_qty)) + + elif reward_type == RewardType.DISCOUNT_IN_PERCENTAGE: + return lambda item: item.price * (1 - discount_rate / (2 if "השני ב" in promo_description else 1)) + + elif reward_type == RewardType.SECOND_INSTANCE_SAME_DISCOUNT: + if "השני ב" in promo_description: + return lambda item: (item.price + discounted_price) / 2 + else: + return lambda item: discounted_price / min_qty + + elif reward_type == RewardType.DISCOUNT_BY_THRESHOLD: + return lambda item: item.price - discount_rate + + elif remark and 'מחיר המבצע הינו המחיר לק"ג' in remark.text: + return lambda item: discounted_price + + elif discounted_price and min_qty: + return lambda item: discounted_price / min_qty + + return lambda item: INVALID_OR_UNKNOWN_PROMOTION_FUNCTION + + +def is_valid_promo(start_time: datetime, end_time: datetime, description): """ This function returns whether a given Promotion object is currently valid. """ today_date: datetime = datetime.now() - not_expired: bool = promo.end_date >= today_date - has_started: bool = promo.start_date <= today_date - has_products: bool = len(promo.items) > 0 - in_promo_ignore_list: bool = any(product in promo.content for product in PRODUCTS_TO_IGNORE) - return not_expired and has_started and has_products and not in_promo_ignore_list + not_expired: bool = end_time >= today_date + has_started: bool = start_time <= today_date + in_promo_ignore_list: bool = any(product in description for product in PRODUCTS_TO_IGNORE) + return not_expired and has_started and not in_promo_ignore_list -def main_latest_promos(store_id: int, load_xml: bool, logger, chain: SupermarketChain, load_promos: bool): +def main_latest_promos(store_id: int, load_xml: bool, chain: SupermarketChain, load_promos: bool): """ - This function logs the available promotions in a store with a given id sorted by their update date. + This function writes to a CSV file the available promotions in a store with a given id sorted by their update date. :param chain: The name of the requested supermarket chain :param store_id: A given store id :param load_xml: A boolean representing whether to load an existing prices xml file :param load_promos: A boolean representing whether to load an existing promos xml file - :param logger: A given logger """ promotions: List[Promotion] = get_available_promos(chain, store_id, load_xml, load_promos) promotions.sort(key=lambda promo: (max(promo.update_date.date(), promo.start_date.date()), promo.start_date - promo.end_date), reverse=True) - logger.info('\n'.join(str(promotion) for promotion in promotions)) write_promotions_to_csv(promotions, f'results/{repr(type(chain))}_promos_{store_id}.csv') diff --git a/supermarket_chain.py b/supermarket_chain.py index 3390935..60929b8 100644 --- a/supermarket_chain.py +++ b/supermarket_chain.py @@ -122,7 +122,7 @@ class SupermarketChain(object, metaclass=Meta): """ return Item( name=item.find(re.compile(r'ItemN[a]?m[e]?')).text, - price=item.find('ItemPrice').text, + price=float(item.find('ItemPrice').text), manufacturer=item.find(re.compile(r'Manufacture[r]?Name')).text, code=item.find('ItemCode').text )