From 98dcc1c33d6709cc4897bca773b5457dfe3beb08 Mon Sep 17 00:00:00 2001 From: KorenLazar Date: Mon, 16 Aug 2021 12:39:28 +0300 Subject: [PATCH 1/5] Add price_by_measure member to Item object --- item.py | 5 +++-- supermarket_chain.py | 9 +++------ tests/test_promotions_parsing.py | 24 ++++++++++++------------ 3 files changed, 18 insertions(+), 20 deletions(-) diff --git a/item.py b/item.py index 4899c59..d0c9251 100644 --- a/item.py +++ b/item.py @@ -3,11 +3,12 @@ class Item: A class representing a product in some supermarket. """ - def __init__(self, name: str, price: float, manufacturer: str, code: str): + def __init__(self, name: str, price: float, price_by_measure: float, code: str, manufacturer: str): self.name: str = name self.price: float = price + self.price_by_measure = price_by_measure self.manufacturer: str = manufacturer self.code: str = code def __repr__(self): - return str((self.name, self.price, self.manufacturer, self.code)) + return f"\nשם: {self.name}\nמחיר: {self.price}\nיצרן: {self.manufacturer}\nקוד: {self.code}\n" diff --git a/supermarket_chain.py b/supermarket_chain.py index 60929b8..15ce0ef 100644 --- a/supermarket_chain.py +++ b/supermarket_chain.py @@ -120,9 +120,6 @@ class SupermarketChain(object, metaclass=Meta): """ This function returns a string containing important information about a given supermarket's product. """ - return Item( - name=item.find(re.compile(r'ItemN[a]?m[e]?')).text, - price=float(item.find('ItemPrice').text), - manufacturer=item.find(re.compile(r'Manufacture[r]?Name')).text, - code=item.find('ItemCode').text - ) + return Item(name=item.find(re.compile(r'ItemN[a]?m[e]?')).text, price=float(item.find('ItemPrice').text), + price_by_measure=float(item.find('UnitOfMeasurePrice').text), code=item.find('ItemCode').text, + manufacturer=item.find(re.compile(r'Manufacture[r]?Name')).text) diff --git a/tests/test_promotions_parsing.py b/tests/test_promotions_parsing.py index 72b1941..ea0d129 100644 --- a/tests/test_promotions_parsing.py +++ b/tests/test_promotions_parsing.py @@ -19,7 +19,7 @@ def test_shufersal_promo_type_1(): discount_rate=discount_rate, discounted_price=discounted_price, ) - item = Item('פטה פיראוס 20%', 113, '', '') + item = Item('פטה פיראוס 20%', 113, 1, '', '') assert promo_func(item) == 100 @@ -38,7 +38,7 @@ def test_shufersal_promo_type_2(): discount_rate=discount_rate, discounted_price=discounted_price, ) - item = Item('חגיגת גרנולה פ.יבשים500ג', 26.9, '', '') + item = Item('חגיגת גרנולה פ.יבשים500ג', 26.9, 1, '', '') assert promo_func(item) == 21.52 @@ -57,7 +57,7 @@ def test_shufersal_promo_type_6_1(): discount_rate=discount_rate, discounted_price=discounted_price, ) - item = Item('פסטרמה מקסיקנית במשקל', 89, '', '') + item = Item('פסטרמה מקסיקנית במשקל', 89, 1, '', '') assert promo_func(item) == 89 @@ -76,7 +76,7 @@ def test_shufersal_promo_type_6_2(): discount_rate=discount_rate, discounted_price=discounted_price, ) - item = Item('מכונת לוואצה ג\'ולי אדומה', 449, '', '') + item = Item('מכונת לוואצה ג\'ולי אדומה', 449, 1, '', '') assert promo_func(item) == 449 @@ -95,7 +95,7 @@ def test_shufersal_promo_type_7_1(): discount_rate=discount_rate, discounted_price=discounted_price, ) - item = Item('פינצטה 2011 שחורה/כסופה', 14.9, '', '') + item = Item('פינצטה 2011 שחורה/כסופה', 14.9, 1, '', '') assert promo_func(item) == 7.45 @@ -114,7 +114,7 @@ def test_shufersal_promo_type_7_2(): discount_rate=discount_rate, discounted_price=discounted_price, ) - item = Item('יוגורט עיזים 500 גרם', 12.9, '', '') + item = Item('יוגורט עיזים 500 גרם', 12.9, 1, '', '') assert promo_func(item) == 12.9 * 0.75 @@ -133,7 +133,7 @@ def test_shufersal_promo_type_9_1(): discount_rate=discount_rate, discounted_price=discounted_price, ) - item = Item('זיתים מבוקעים פיקנטי540ג', 9.3, '', '') + item = Item('זיתים מבוקעים פיקנטי540ג', 9.3, 1, '', '') assert promo_func(item) == 9.3 * 0.75 @@ -152,7 +152,7 @@ def test_shufersal_promo_type_9_2(): discount_rate=discount_rate, discounted_price=discounted_price, ) - item = Item('שעועית לבנה שופרסל 800גר', 18.9, '', '') + item = Item('שעועית לבנה שופרסל 800גר', 18.9, 1, '', '') assert promo_func(item) == (18.9 + 10) / 2 @@ -171,7 +171,7 @@ def test_shufersal_promo_type_9_3(): discount_rate=discount_rate, discounted_price=discounted_price, ) - item = Item('גומיות שחורות 12 יח', 9.9, '', '') + item = Item('גומיות שחורות 12 יח', 9.9, 1, '', '') assert promo_func(item) == 9.9 * 0.75 @@ -190,7 +190,7 @@ def test_shufersal_promo_type_10_1(): discount_rate=discount_rate, discounted_price=discounted_price ) - item = Item('טופו טעם טבעי 300 גרם', 10.9, '', '7296073345763') + item = Item('טופו טעם טבעי 300 גרם', 10.9, 1, '7296073345763', '') assert promo_func(item) == 5 @@ -209,7 +209,7 @@ def test_shufersal_promo_type_10_2(): discount_rate=discount_rate, discounted_price=discounted_price ) - item = Item('טופו טעם טבעי 300 גרם', 10.9, 'כפרי בריא משק ויילר', '7296073345763') + item = Item('טופו טעם טבעי 300 גרם', 10.9, 1, '7296073345763', 'כפרי בריא משק ויילר') assert promo_func(item) == 7 @@ -225,7 +225,7 @@ def assert_discount(discounted_price, item_barcode, item_manufacturer, item_name discount_rate=discount_rate, discounted_price=discounted_price ) - item = Item(item_name, orig_price, item_manufacturer, item_barcode) + item = Item(item_name, orig_price, 1, item_barcode, item_manufacturer) assert abs(promo_func(item) - price_after_discount) <= 1e-5, promo_description From ebb1e912b94c140535ebc466d113dea4627fe4da Mon Sep 17 00:00:00 2001 From: KorenLazar Date: Mon, 16 Aug 2021 12:40:06 +0300 Subject: [PATCH 2/5] Change INFO logging format --- main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.py b/main.py index 3d788a7..84736ea 100644 --- a/main.py +++ b/main.py @@ -102,7 +102,7 @@ if __name__ == '__main__': if args.debug: logging.basicConfig(level=logging.DEBUG) else: - logging.basicConfig(level=logging.INFO) + logging.basicConfig(level=logging.INFO, format='%(levelname)s:%(message)s') chain: SupermarketChain = chain_dict[args.chain] From 58bb04f1ddc87ae53ae99171b0761cfa0e056d03 Mon Sep 17 00:00:00 2001 From: KorenLazar Date: Mon, 16 Aug 2021 12:42:38 +0300 Subject: [PATCH 3/5] Added get_all_promos_tags function and included the non-full promotions file in the promotions collection. --- promotion.py | 50 +++++++++++++++++++++++++++++++------------------- 1 file changed, 31 insertions(+), 19 deletions(-) diff --git a/promotion.py b/promotion.py index 3140dc4..1a9a9aa 100644 --- a/promotion.py +++ b/promotion.py @@ -1,22 +1,25 @@ import logging -import os import re from datetime import datetime from enum import Enum -from pathlib import Path +from os import path from typing import Dict, List, Union +from bs4.element import Tag import csv import sys import pandas as pd import xlsxwriter from item import Item from utils import ( - create_items_dict, - get_float_from_tag, log_message_and_time_if_debug, xml_file_gen, - create_bs_object, + create_bs_object, create_items_dict, + get_float_from_tag, + log_message_and_time_if_debug, xml_file_gen, ) from supermarket_chain import SupermarketChain +XML_FILES_PROMOTIONS_CATEGORIES = [SupermarketChain.XMLFilesCategory.PromosFull, + SupermarketChain.XMLFilesCategory.Promos] + INVALID_OR_UNKNOWN_PROMOTION_FUNCTION = -1 PRODUCTS_TO_IGNORE = ['סירים', 'מגבות', 'מגבת', 'מפות', 'פסטיגל', 'ביגי'] @@ -169,16 +172,13 @@ def get_available_promos(chain: SupermarketChain, store_id: int, load_prices: bo :return: Promotions that are not included in PRODUCTS_TO_IGNORE and are currently available """ log_message_and_time_if_debug('Importing prices XML file') - items_dict: Dict[str, Item] = create_items_dict(chain, load_prices, store_id) - - xml_path: str = xml_file_gen(chain, store_id, chain.XMLFilesCategory.PromosFull.name) - + items_dict: Dict[str, Item] = create_items_dict(chain, store_id, load_prices) log_message_and_time_if_debug('Importing promotions XML file') - bs_promos = create_bs_object(xml_path, chain, store_id, load_promos, chain.XMLFilesCategory.PromosFull) + promo_tags = get_all_promos_tags(chain, store_id, load_promos) log_message_and_time_if_debug('Creating promotions objects') promo_objs = list() - for promo in bs_promos.find_all(chain.promotion_tag_name): + for promo in promo_tags: promotion_id = promo.find(re.compile('PromotionId', re.IGNORECASE)) if promo_objs and promo_objs[-1].promotion_id == promotion_id: promo_objs[-1].items.extend(chain.get_items(promo, items_dict)) @@ -187,6 +187,7 @@ def get_available_promos(chain: SupermarketChain, store_id: int, load_prices: bo promo_inst = create_new_promo_instance(chain, items_dict, promo, promotion_id) if promo_inst: promo_objs.append(promo_inst) + return promo_objs @@ -288,7 +289,6 @@ def main_latest_promos(store_id: int, output_filename, chain: SupermarketChain, :param load_promos: A boolean representing whether to load an existing promos xml file :param output_filename: A path to write the promotions table """ - promotions: List[Promotion] = get_available_promos(chain, store_id, load_xml, load_promos) promotions.sort(key=lambda promo: (max(promo.update_date.date(), promo.start_date.date()), promo.start_date - promo.end_date), reverse=True) @@ -316,12 +316,24 @@ def get_all_null_items_in_promos(chain, store_id) -> List[str]: """ This function finds all items appearing in the chain's promotions file but not in the chain's prices file. """ - items_dict: Dict[str, Item] = create_items_dict(chain, True, store_id) - xml_path: str = xml_file_gen(chain, store_id, chain.XMLFilesCategory.PromosFull.name) - bs_promos = create_bs_object(xml_path, chain, store_id, True, chain.XMLFilesCategory.PromosFull) + items_dict: Dict[str, Item] = create_items_dict(chain, store_id, load_xml=True) + promo_tags = get_all_promos_tags(chain, store_id, load_xml=True) + return [item for promo_tag in promo_tags for item in chain.get_null_items(promo_tag, items_dict)] - null_items = list() - for promo in bs_promos.find_all(chain.promotion_tag_name): - null_items.extend(chain.get_null_items(promo, items_dict)) - return null_items +def get_all_promos_tags(chain: SupermarketChain, store_id: int, load_xml: bool) -> List[Tag]: + """ + This function gets all the promotions tags for a given store in a given chain. + It includes both the full and not full promotions files. + + :param chain: A given supermarket chain + :param store_id: A given store ID + :param load_xml: A boolean representing whether to try loading the promotions from an existing XML file + :return: A list of promotions tags + """ + bs_objects = list() + for category in XML_FILES_PROMOTIONS_CATEGORIES: + xml_path = xml_file_gen(chain, store_id, category.name) + bs_objects.append(create_bs_object(chain, store_id, category, load_xml, xml_path)) + + return [promo for bs_obj in bs_objects for promo in bs_obj.find_all(chain.promotion_tag_name)] From e09b2da4a106e013cfaa90464b81a66dde0f48dd Mon Sep 17 00:00:00 2001 From: KorenLazar Date: Mon, 16 Aug 2021 12:43:01 +0300 Subject: [PATCH 4/5] removed get_all_deals function --- store_utils.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/store_utils.py b/store_utils.py index 4b49159..f8afeff 100644 --- a/store_utils.py +++ b/store_utils.py @@ -15,16 +15,8 @@ def log_stores_ids(city: str, load_xml: bool, chain: SupermarketChain): :param city: A string representing the city of the requested store. """ xml_path: str = xml_file_gen(chain, -1, chain.XMLFilesCategory.Stores.name) - bs_stores: BeautifulSoup = create_bs_object(xml_path, chain, -1, load_xml, chain.XMLFilesCategory.Stores) + bs_stores: BeautifulSoup = create_bs_object(chain, -1, chain.XMLFilesCategory.Stores, load_xml, xml_path) for store in bs_stores.find_all("STORE"): if store.find("CITY").text == city: - logging.info((store.find("ADDRESS").text, store.find("STOREID").text, store.find("SUBCHAINNAME").text)) - - -def get_all_deals(chain): - xml_path: str = xml_file_gen(chain, -1, chain.XMLFilesCategory.Stores.name) - bs_stores: BeautifulSoup = create_bs_object(xml_path, chain, -1, True, chain.XMLFilesCategory.Stores) - - return [int(store.find("STOREID").text) for store in bs_stores.find_all("STORE") if store.find("SUBCHAINID").text - == "2"] + logging.info((store.find("ADDRESS").text, store.find("STOREID").text, store.find("SUBCHAINNAME").text)) \ No newline at end of file From 03ff6d5281efaed86983d984c5d2efb69bb71d6b Mon Sep 17 00:00:00 2001 From: KorenLazar Date: Mon, 16 Aug 2021 12:44:32 +0300 Subject: [PATCH 5/5] Changed create_items_dict function to included non-full prices file in the items dictionary. Changed log_products_prices to work with an items dictionary and a __repr__ function of the Item class. --- utils.py | 51 ++++++++++++++++++++++++++------------------------- 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/utils.py b/utils.py index 7d2d997..49e369b 100644 --- a/utils.py +++ b/utils.py @@ -4,9 +4,10 @@ import logging import zipfile from argparse import ArgumentTypeError from datetime import datetime -from typing import AnyStr, Dict +from typing import AnyStr, Dict, List import requests from bs4 import BeautifulSoup +from bs4.element import Tag from os import path from item import Item @@ -31,8 +32,8 @@ def xml_file_gen(chain: SupermarketChain, store_id: int, category_name: str) -> return path.join(RAW_FILES_DIRNAME, f"{repr(type(chain))}-{category_name}{store_id_str}.xml") -def create_bs_object(xml_path: str, chain: SupermarketChain, store_id: int, load_xml: bool, - category: SupermarketChain.XMLFilesCategory) -> BeautifulSoup: +def create_bs_object(chain: SupermarketChain, store_id: int, category: SupermarketChain.XMLFilesCategory, + load_xml: bool, xml_path: str) -> BeautifulSoup: """ This function creates a BeautifulSoup (BS) object according to the given parameters. In case the given load_xml is True and the XML file exists, the function creates the BS object from the given @@ -47,12 +48,12 @@ def create_bs_object(xml_path: str, chain: SupermarketChain, store_id: int, load :return: A BeautifulSoup object with xml content. """ if load_xml and path.isfile(xml_path): - return create_bs_object_from_xml(xml_path) - return create_bs_object_from_link(xml_path, chain, category, store_id) + return get_bs_object_from_xml(xml_path) + return get_bs_object_from_link(chain, store_id, category, xml_path) -def create_bs_object_from_link(xml_path: str, chain: SupermarketChain, category: SupermarketChain.XMLFilesCategory, - store_id: int) -> BeautifulSoup: +def get_bs_object_from_link(chain: SupermarketChain, store_id: int, category: SupermarketChain.XMLFilesCategory, + xml_path: str) -> BeautifulSoup: """ This function creates a BeautifulSoup (BS) object by generating a download link from Shufersal's API. @@ -63,7 +64,7 @@ def create_bs_object_from_link(xml_path: str, chain: SupermarketChain, category: :return: A BeautifulSoup object with xml content. """ session = requests.Session() - download_url: str = chain.get_download_url(store_id, category, session) + download_url = chain.get_download_url(store_id, category, session) response_content = session.get(download_url).content try: xml_content: AnyStr = gzip.decompress(response_content) @@ -77,7 +78,7 @@ def create_bs_object_from_link(xml_path: str, chain: SupermarketChain, category: return BeautifulSoup(xml_content, features='xml') -def create_bs_object_from_xml(xml_path: str) -> BeautifulSoup: +def get_bs_object_from_xml(xml_path: str) -> BeautifulSoup: """ This function creates a BeautifulSoup (BS) object from a given XML file. @@ -88,17 +89,23 @@ def create_bs_object_from_xml(xml_path: str) -> BeautifulSoup: return BeautifulSoup(f_in, features='xml') -def create_items_dict(chain: SupermarketChain, load_xml, store_id: int) -> Dict[str, Item]: +def create_items_dict(chain: SupermarketChain, store_id: int, load_xml) -> Dict[str, Item]: """ This function creates a dictionary where every key is an item code and its value is its corresponding Item instance. + We take both full and not full prices files, and assume that the no full is more updated (in case of overwriting). :param chain: A given supermarket chain :param load_xml: A boolean representing whether to load an existing prices xml file :param store_id: A given store id """ - xml_path: str = xml_file_gen(chain, store_id, chain.XMLFilesCategory.PricesFull.name) - bs_prices: BeautifulSoup = create_bs_object(xml_path, chain, store_id, load_xml, chain.XMLFilesCategory.PricesFull) - return {item.find('ItemCode').text: chain.get_item_info(item) for item in bs_prices.find_all(chain.item_tag_name)} + items_dict = dict() + for category in [chain.XMLFilesCategory.PricesFull, chain.XMLFilesCategory.Prices]: + xml_path: str = xml_file_gen(chain, store_id, category.name) + bs_prices: BeautifulSoup = create_bs_object(chain, store_id, category, load_xml, xml_path) + items_tags = bs_prices.find_all(chain.item_tag_name) + items_dict.update({item.find('ItemCode').text: chain.get_item_info(item) for item in items_tags}) + + return items_dict def log_products_prices(chain: SupermarketChain, store_id: int, load_xml: bool, product_name: str) -> None: @@ -110,18 +117,12 @@ def log_products_prices(chain: SupermarketChain, store_id: int, load_xml: bool, :param product_name: A given product name :param load_xml: A boolean representing whether to load an existing xml or load an already saved one """ - xml_path: str = xml_file_gen(chain, store_id, chain.XMLFilesCategory.PricesFull.name) - bs_prices: BeautifulSoup = create_bs_object(xml_path, chain, store_id, load_xml, chain.XMLFilesCategory.PricesFull) - prods = [item for item in bs_prices.find_all("Item") if product_name in item.find("ItemName").text] - prods.sort(key=lambda x: float(x.find("UnitOfMeasurePrice").text)) - for prod in prods: - logging.info( - ( - prod.find('ItemName').text[::-1], - prod.find('ManufacturerName').text[::-1], - prod.find('ItemPrice').text - ) - ) + items_dict: Dict[str, Item] = create_items_dict(chain, store_id, load_xml) + products_by_name = [item for item in items_dict.values() if product_name in item.name] + products_by_name_sorted_by_price = sorted(products_by_name, key=lambda item: item.price_by_measure) + + for prod in products_by_name_sorted_by_price: + logging.info(prod) def get_float_from_tag(tag, int_tag) -> int: