diff --git a/.gitignore b/.gitignore index bb07b4a..3c44073 100644 --- a/.gitignore +++ b/.gitignore @@ -7,4 +7,5 @@ grading_check.py stores_* venv/ __pycache__/ -xmls/ \ No newline at end of file +xmls/ +logs/ diff --git a/README.md b/README.md index f87082f..24054fc 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,10 @@ -# Shufersal basic scraping +# Supermarket basic scraping ## Installation clone: ```cmd script -git clone https://github.com/korenLazar/shufersal-scraping.git -cd shufersal-scraping +git clone https://github.com/korenLazar/supermarket-scraping.git +cd supermarket-scraping virtualenv venv venv\bin\activate pip install -r requirements.txt @@ -16,17 +16,17 @@ pip install -r requirements.txt 2. virtualenv ## Usage -First, to find your store's id, you can run the following command (assuming you live in Jerusalem): +First, to find your Shufersal store's ID, you can run the following command (assuming you live in Jerusalem): ```cmd script -python main.py --find_store ירושלים +python main.py --find_store ירושלים --chain Shufersal ``` -After running the command, you'll be able to see the different stores in Jerusalem with their ids in "stores_ירושלים.log". +After running the command, you'll be able to see the different stores in Jerusalem with their IDs on the screen. -Now, that we have the store's id, we can get its promotions sorted by their update date by running +Now, that we have the store's ID, we can get its promotions sorted by their update date by running ```cmd script -python main.py --promos 5 +python main.py --promos 5 --chain Shufersal ``` -* We assumed that the store's id is 5. +* We assumed that the store's ID is 5. Now, you can find the promos in "promos_5.log". For other documentation and commands, you can run diff --git a/co_op.py b/co_op.py new file mode 100644 index 0000000..c20ae82 --- /dev/null +++ b/co_op.py @@ -0,0 +1,36 @@ +from typing import Dict, List +import requests +from bs4 import BeautifulSoup +from bs4.element import Tag +from supermarket_chain import SupermarketChain + + +class CoOp(SupermarketChain): + promotion_tag_name = 'Sale' + promotion_update_tag_name = 'PriceUpdateDate' + date_format = '%Y/%m/%d' + date_hour_format = '%Y/%m/%d %H:%M:%S' + item_tag_name = 'Product' + + @staticmethod + def get_download_url(store_id: int, category: SupermarketChain.XMLFilesCategory) -> str: + prefix = "http://matrixcatalog.co.il/" + url = prefix + "NBCompetitionRegulations.aspx" + req_res: requests.Response = requests.get(url) + soup = BeautifulSoup(req_res.text, features='lxml') + suffix: str = soup.find('a', href=lambda value: value and category.name.replace('s', '') in value + and f'-{store_id}-20' in value).attrs['href'] + down_url = prefix + suffix + print(down_url) + return down_url + + class XMLFilesCategory(SupermarketChain.XMLFilesCategory): + All, Promos, PromosFull, Prices, PricesFull, Stores = range(6) + + def __repr__(self): + return 'Co-Op' + + @staticmethod + def get_items(promo: Tag, items_dict: Dict[str, str]) -> List[str]: + promo_item = items_dict.get(promo.find('ItemCode').text) + return [promo_item] if promo_item else [] diff --git a/main.py b/main.py index 6f792a7..da4f68f 100644 --- a/main.py +++ b/main.py @@ -1,11 +1,22 @@ from argparse import ArgumentParser import logging from promotion import main_latest_promos, get_promos_by_name -from store import get_store_id, store_id_type -from utils import get_products_prices +from store_utils import get_store_id +from utils import LOGS_DIRNAME, XMLS_DIRNAME, get_products_prices +from supermarket_chain import SupermarketChain +from shufersal import ShuferSal +from co_op import CoOp +from pathlib import Path -# TODO: improve extendability: support addition of different supermarket chains -# TODO: fix problem of left-to-right printing in Windows' cmd +# TODO: fix problem of left-to-right printing + +Path(LOGS_DIRNAME).mkdir(exist_ok=True) +Path(XMLS_DIRNAME).mkdir(exist_ok=True) + +chain_dict = { + 'Shufersal': ShuferSal(), + 'Co-Op': CoOp(), +} if __name__ == '__main__': parser = ArgumentParser() @@ -13,7 +24,7 @@ if __name__ == '__main__': help="generates a promos_{store_id}.log file with all the promotions in the requested store", metavar='store_id', nargs=1, - type=store_id_type, + type=SupermarketChain.store_id_type, ) parser.add_argument('--find_promos_by_name', help="prints all promos containing the given promo_name in the given store", @@ -31,30 +42,44 @@ if __name__ == '__main__': metavar='city', nargs=1, ) - parser.add_argument('--load_xml', - help='boolean flag representing whether to load an existing xml', + parser.add_argument('--load_prices', + help='boolean flag representing whether to load an existing price XML file', action='store_true', ) + parser.add_argument('--load_promos', + help='boolean flag representing whether to load an existing promo XML file', + action='store_true', + ) + parser.add_argument('--load_stores', + help='boolean flag representing whether to load an existing stores XML file', + action='store_true', + ) + parser.add_argument('--chain', + required=True, + help='The name of the requested chain', + choices=['Shufersal', 'Co-Op'], + ) args = parser.parse_args() + chain: SupermarketChain = chain_dict[args.chain] if args.promos: arg_store_id = int(args.promos[0]) logger = logging.getLogger() logger.setLevel(logging.INFO) - handler = logging.FileHandler(filename=f'promos_{arg_store_id}.log', mode='w', encoding='utf-8') + handler = logging.FileHandler(filename=f'logs/{args.chain}_promos_{arg_store_id}.log', mode='w', + encoding='utf-8') logger.addHandler(handler) - main_latest_promos(store_id=arg_store_id, - load_xml=args.load_xml, - logger=logger) + main_latest_promos(store_id=arg_store_id, load_xml=args.load_prices, logger=logger, chain=chain) elif args.price: - get_products_prices(store_id=args.price[0], product_name=args.price[1], load_xml=args.load_xml) + get_products_prices(chain, store_id=args.price[0], load_xml=args.load_prices, product_name=args.price[1]) elif args.find_store_id: arg_city = args.find_store_id[0] - get_store_id(city=arg_city, load_xml=args.load_xml) + get_store_id(city=arg_city, load_xml=args.load_stores, chain=chain) elif args.find_promos_by_name: arg_store_id = int(args.find_promos_by_name[0]) - get_promos_by_name(store_id=arg_store_id, load_xml=args.load_xml, promo_name=args.find_promos_by_name[1]) + get_promos_by_name(store_id=arg_store_id, chain=chain, promo_name=args.find_promos_by_name[1], + load_prices=args.load_prices, load_promos=args.load_promos) diff --git a/promotion.py b/promotion.py index 1777fa8..c5f7b3b 100644 --- a/promotion.py +++ b/promotion.py @@ -1,11 +1,13 @@ from datetime import datetime -from typing import List +from typing import Dict, List +from bs4 import BeautifulSoup + from utils import ( - ShufersalCategories, create_items_dict, xml_file_gen, create_bs_object, ) +from supermarket_chain import SupermarketChain PRODUCTS_TO_IGNORE = ['סירים', 'מגבות', 'צלחות', 'כוסות', 'מאגים', 'מגבת', 'מפות', 'פסטיגל'] @@ -23,66 +25,93 @@ class Promotion: self.update_date: datetime = update_date self.items: List[str] = items - def __str__(self): + def __repr__(self): title = self.content - dates_range = f"Between {self.start_date.date()} and {self.end_date.date()}" - update_line = f"Updated at {self.update_date.date()}" + dates_range = f"Between {self.start_date} and {self.end_date}" + update_line = f"Updated at {self.update_date}" items = '\n'.join(str(item) for item in self.items) return '\n'.join([title, dates_range, update_line, items]) + '\n' + def repr_ltr(self): + title = self.content[::-1] + dates_range = f"Between {self.start_date} and {self.end_date}" + update_line = f"Updated at {self.update_date}" + items = '\n'.join(str(item)[::-1] for item in self.items) + return '\n'.join([title, dates_range, update_line, items]) + '\n' -def get_available_promos(store_id: int, load_xml: bool) -> List[Promotion]: + +def get_available_promos(chain: SupermarketChain, store_id: int, load_prices: bool, load_promos) -> List[Promotion]: """ This function return the available promotions given a BeautifulSoup object. + :param load_promos: + :param chain: The name of the requested supermarket chain :param store_id: A given store id - :param load_xml: A boolean representing whether to load an existing xml or load an already saved one + :param load_prices: A boolean representing whether to load an existing xml or load an already saved one :return: Promotions that are not included in PRODUCTS_TO_IGNORE and are currently available """ - items_dict = create_items_dict(store_id, load_xml) - xml_path = xml_file_gen(ShufersalCategories.PromosFull.name, store_id) - bs_promos = create_bs_object(xml_path, ShufersalCategories.PromosFull.value, store_id, False) + items_dict: Dict[str, str] = create_items_dict(chain, load_prices, store_id) + xml_path: str = xml_file_gen(chain, store_id, chain.XMLFilesCategory.PromosFull.name) + bs_promos = create_bs_object(xml_path, chain, store_id, load_promos, chain.XMLFilesCategory.PromosFull) promo_objs = list() - for cur_promo in bs_promos.find_all("Promotion"): - cur_promo = Promotion( - content=cur_promo.find('PromotionDescription').text, - start_date=datetime.strptime(cur_promo.find('PromotionStartDate').text, '%Y-%m-%d'), - end_date=datetime.strptime(cur_promo.find('PromotionEndDate').text, '%Y-%m-%d'), - update_date=datetime.strptime(cur_promo.find('PromotionUpdateDate').text, '%Y-%m-%d %H:%M'), - items=[items_dict.get(item.find('ItemCode').text) for item in cur_promo.find_all('Item') - if items_dict.get(item.find('ItemCode').text)], + for promo in bs_promos.find_all(chain.promotion_tag_name): + promo = Promotion( + content=promo.find('PromotionDescription').text, + start_date=datetime.strptime( + promo.find('PromotionStartDate').text + ' ' + promo.find('PromotionStartHour').text, + chain.date_hour_format), + end_date=datetime.strptime(promo.find( + 'PromotionEndDate').text + ' ' + promo.find('PromotionEndHour').text, chain.date_hour_format), + update_date=datetime.strptime(promo.find(chain.promotion_update_tag_name).text, chain.date_hour_format), + items=chain.get_items(promo, items_dict), ) - if is_valid_promo(cur_promo): - promo_objs.append(cur_promo) + if is_valid_promo(promo): + promo_objs.append(promo) return promo_objs def is_valid_promo(promo: Promotion): - today_date = datetime.now() - not_expired = promo.end_date.date() >= today_date.date() - has_started = promo.start_date <= today_date - has_products = len(promo.items) > 0 - in_promo_ignore_list = any(product in promo.content for product in PRODUCTS_TO_IGNORE) + """ + This function checks if a given promo object is valid. + + :param promo: A given promotion + :return: True iff the given Promotion is valid. + """ + today_date: datetime = datetime.now() + not_expired: bool = promo.end_date >= today_date + has_started: bool = promo.start_date <= today_date + has_products: bool = len(promo.items) > 0 + in_promo_ignore_list: bool = any(product in promo.content for product in PRODUCTS_TO_IGNORE) return not_expired and has_started and has_products and not in_promo_ignore_list -def main_latest_promos(store_id: int, load_xml: bool, logger): +def main_latest_promos(store_id: int, load_xml: bool, logger, chain: SupermarketChain): """ This function logs the available promos in a store with a given id sorted by their update date. + :param chain: The name of the requested supermarket chain :param store_id: A given store id :param load_xml: A boolean representing whether to load an existing prices xml file :param logger: A given logger """ - promotions = get_available_promos(store_id, load_xml) + promotions: List[Promotion] = get_available_promos(chain, store_id, load_xml, False) promotions.sort(key=lambda promo: max(promo.update_date, promo.start_date), reverse=True) logger.info('\n'.join(str(promotion) for promotion in promotions)) -def get_promos_by_name(store_id: int, load_xml: bool, promo_name: str): - promotions = get_available_promos(store_id, load_xml) +def get_promos_by_name(store_id: int, chain: SupermarketChain, promo_name: str, load_prices: bool, load_promos: bool): + """ + This function prints all promotions in a given chain and store_id containing a given promo_name. + + :param store_id: A given store ID + :param chain: A given supermarket chain + :param promo_name: A given name of a promo (or part of it) + :param load_prices: A boolean representing whether to load an saved prices XML file or scrape a new one + :param load_promos: A boolean representing whether to load an saved XML file or scrape a new one + """ + promotions: List[Promotion] = get_available_promos(chain, store_id, load_prices, load_promos) for promo in promotions: if promo_name in promo.content: - print(str(promo)) + print(promo.repr_ltr()) diff --git a/shufersal.py b/shufersal.py new file mode 100644 index 0000000..bb885a0 --- /dev/null +++ b/shufersal.py @@ -0,0 +1,33 @@ +from typing import Dict, List +import requests +from bs4 import BeautifulSoup +from bs4.element import Tag +from supermarket_chain import SupermarketChain + + +class ShuferSal(SupermarketChain): + promotion_tag_name = 'Promotion' + promotion_update_tag_name = 'PromotionUpdateDate' + date_format = '%Y-%m-%d' + date_hour_format = '%Y-%m-%d %H:%M' + item_tag_name = 'Item' + + @staticmethod + def get_download_url(store_id: int, category: SupermarketChain.XMLFilesCategory) -> str: + url = f"http://prices.shufersal.co.il/FileObject/UpdateCategory?catID={category.value}" + if SupermarketChain.is_valid_store_id(store_id): + url += f"&storeId={store_id}" + req_res: requests.Response = requests.get(url) + soup = BeautifulSoup(req_res.text, features='lxml') + return soup.find('a', text="לחץ להורדה")['href'] + + class XMLFilesCategory(SupermarketChain.XMLFilesCategory): + All, Prices, PricesFull, Promos, PromosFull, Stores = range(6) + + def __repr__(self): + return 'Shufersal' + + @staticmethod + def get_items(promo: Tag, items_dict: Dict[str, str]) -> List[str]: + return [items_dict.get(item.find('ItemCode').text) for item in promo.find_all('Item') + if items_dict.get(item.find('ItemCode').text)] diff --git a/store.py b/store.py deleted file mode 100644 index 0c0bc84..0000000 --- a/store.py +++ /dev/null @@ -1,30 +0,0 @@ -from argparse import ArgumentTypeError - -from utils import ( - ShufersalCategories, - is_valid_store_id, - xml_file_gen, - create_bs_object, -) - - -def store_id_type(store_id: str): - if not is_valid_store_id(int(store_id)): - raise ArgumentTypeError(f"Given store_id: {store_id} is not a valid store_id.") - return store_id - - -def get_store_id(city: str, load_xml: bool): - """ - This function prints the store_ids of Shufersal stores in a given city. - The city must match exactly to its spelling in Shufersal's website (hence it should be in Hebrew alphabet). - - :param load_xml: A boolean representing whether to load an existing xml or load an already saved one - :param city: A string representing the city of the requested store. - """ - xml_path = xml_file_gen(ShufersalCategories.Stores.name, -1) - bs_stores = create_bs_object(xml_path, ShufersalCategories.Stores.value, -1, load_xml) - - for store in bs_stores.find_all("STORE"): - if store.find("CITY").text == city: - print((store.find("ADDRESS").text[::-1], store.find("STOREID").text, store.find("SUBCHAINNAME").text[::-1])) diff --git a/store_utils.py b/store_utils.py new file mode 100644 index 0000000..69d98d4 --- /dev/null +++ b/store_utils.py @@ -0,0 +1,20 @@ +from utils import xml_file_gen, create_bs_object +from supermarket_chain import SupermarketChain +from bs4 import BeautifulSoup + + +def get_store_id(city: str, load_xml: bool, chain: SupermarketChain): + """ + This function prints the store_ids of stores in a given city. + The city must match exactly to its spelling in Shufersal's website (hence it should be in Hebrew alphabet). + + :param chain: A given supermarket chain + :param load_xml: A boolean representing whether to load an existing xml or load an already saved one + :param city: A string representing the city of the requested store. + """ + xml_path: str = xml_file_gen(chain, -1, chain.XMLFilesCategory.Stores.name) + bs_stores: BeautifulSoup = create_bs_object(xml_path, chain, -1, load_xml, chain.XMLFilesCategory.Stores) + + for store in bs_stores.find_all("STORE"): + if store.find("CITY").text == city: + print((store.find("ADDRESS").text, store.find("STOREID").text, store.find("SUBCHAINNAME").text)) diff --git a/supermarket_chain.py b/supermarket_chain.py new file mode 100644 index 0000000..3adf809 --- /dev/null +++ b/supermarket_chain.py @@ -0,0 +1,80 @@ +from abc import abstractmethod +from enum import Enum +from argparse import ArgumentTypeError +from typing import Dict, List +from bs4.element import Tag + + +class SupermarketChain: + """ + A class representing a supermarket chain. + """ + class XMLFilesCategory(Enum): + """ + An enum class of different XML files produced by a supermarket chain + """ + pass + + @property + @abstractmethod + def promotion_tag_name(self): pass + + @property + @abstractmethod + def promotion_update_tag_name(self): pass + + @property + @abstractmethod + def date_format(self): pass + + @property + @abstractmethod + def date_hour_format(self): pass + + @property + @abstractmethod + def item_tag_name(self): pass + + @staticmethod + def is_valid_store_id(store_id: int) -> bool: + """ + This method returns whether a given store ID is valid (French Natural number). + + :param store_id: A given store ID + """ + return isinstance(store_id, int) and store_id >= 0 + + @staticmethod + def store_id_type(store_id: str) -> str: + """ + This method used as a type verification for store_id. + + :param store_id: A given store ID + :return: The given store_id if valid, else raise an ArgumentTypeError. + """ + if not SupermarketChain.is_valid_store_id(int(store_id)): + raise ArgumentTypeError(f"Given store_id: {store_id} is not a valid store_id.") + return store_id + + @staticmethod + @abstractmethod + def get_download_url(store_id: int, category: XMLFilesCategory) -> str: + """ + This method scrapes supermarket's website and returns a url containing the data for a given store and category. + + :param store_id: A given id of a store + :param category: A given category + :return: A downloadable link of the data for a given store and category + """ + pass + + @staticmethod + @abstractmethod + def get_items(promo: Tag, items_dict: Dict[str, str]) -> List[str]: + """ + This method returns a list of the items that participate in a given promo + + :param promo: A given promo + :param items_dict: A given dictionary of products + """ + pass diff --git a/utils.py b/utils.py index 5e445b9..52c2ed9 100644 --- a/utils.py +++ b/utils.py @@ -1,78 +1,63 @@ import gzip -from enum import Enum -from typing import Dict +from typing import AnyStr, Dict import requests from bs4 import BeautifulSoup -from pathlib import Path +from bs4.element import Tag from os import path +from supermarket_chain import SupermarketChain +import re +LOGS_DIRNAME = "logs" XMLS_DIRNAME = "xmls" -Path(XMLS_DIRNAME).mkdir(exist_ok=True) -class ShufersalCategories(Enum): - All, Prices, PricesFull, Promos, PromosFull, Stores = range(6) - - -def xml_file_gen(category_name: str, store_id: int) -> str: +def xml_file_gen(chain: SupermarketChain, store_id: int, category_name: str) -> str: """ - This function generate an xml filename given a store id and a category_name + This function generate an XML filename given a store id and a category_name If the given store_id is invalid, it is ignored in the returned string. + :param chain: A given supermarket chain :param store_id: A given store_id :param category_name: A given category name :return: An xml filename """ - store_id_str = f"-{str(store_id)}" if is_valid_store_id(store_id) else "" - return path.join(XMLS_DIRNAME, f"{category_name}{store_id_str}.xml") + store_id_str: str = f"-{str(store_id)}" if SupermarketChain.is_valid_store_id(store_id) else "" + return path.join(XMLS_DIRNAME, f"{chain}-{category_name}{store_id_str}.xml") -def get_download_url(store_id: int, cat_id: int) -> str: - """ - This function scrapes Shufersal's website and returns a url that contains the data for a given store and category. - For info about the categories, see ShufersalCategories. - - :param store_id: A given id of a store - :param cat_id: A given id of a category - :return: A downloadable link of the data for a given store and category - """ - url = f"http://prices.shufersal.co.il/FileObject/UpdateCategory?catID={cat_id}" - if is_valid_store_id(store_id): - url += f"&storeId={store_id}" - req_res = requests.get(url) - soup = BeautifulSoup(req_res.text, features='lxml') - return soup.find('a', text="לחץ להורדה")['href'] - - -def create_bs_object(xml_path: str, cat_id: int, store_id: int, load_xml: bool) -> BeautifulSoup: +def create_bs_object(xml_path: str, chain: SupermarketChain, store_id: int, load_xml: bool, + category: SupermarketChain.XMLFilesCategory) -> BeautifulSoup: """ This function creates a BeautifulSoup (BS) object according to the given parameters. In case the given load_xml is True and the XML file exists, the function creates the BS object from the given xml_path, otherwise it uses Shufersal's APIs to download the xml with the relevant content and saves it for future use. + :param chain: A given supermarket chain :param xml_path: A given path to an xml file to load/save the BS object from/to. - :param cat_id: A given id of a category from ShufersalCategories + :param category: A given category :param store_id: A given id of a store :param load_xml: A flag representing whether to try loading an existing XML file :return: A BeautifulSoup object with xml content. """ if load_xml and path.isfile(xml_path): return create_bs_object_from_xml(xml_path) - return create_bs_object_from_link(xml_path, store_id, cat_id) + return create_bs_object_from_link(xml_path, chain, category, store_id) -def create_bs_object_from_link(xml_path: str, store_id: int, cat_id: int) -> BeautifulSoup: +def create_bs_object_from_link(xml_path: str, chain: SupermarketChain, category: SupermarketChain.XMLFilesCategory, + store_id: int) -> BeautifulSoup: """ This function creates a BeautifulSoup (BS) object by generating a download link from Shufersal's API. - :param xml_path: A given path to an xml file to load/save the BS object from/to. + :param chain: A given supermarket chain + :param xml_path: A given path to an XML file to load/save the BS object from/to. :param store_id: A given id of a store - :param cat_id: A given id of a category from ShufersalCategories + :param category: A given category :return: A BeautifulSoup object with xml content. """ - download_url = get_download_url(store_id, cat_id) - xml_content = gzip.decompress(requests.get(download_url).content) + download_url: str = chain.get_download_url(store_id, category) + xml_content: AnyStr = gzip.decompress(requests.get(download_url).content) with open(xml_path, 'wb') as f_out: f_out.write(xml_content) return BeautifulSoup(xml_content, features='xml') @@ -80,7 +65,7 @@ def create_bs_object_from_link(xml_path: str, store_id: int, cat_id: int) -> Bea def create_bs_object_from_xml(xml_path: str) -> BeautifulSoup: """ - This function creates a BeautifulSoup (BS) object from a given xml file. + This function creates a BeautifulSoup (BS) object from a given XML file. :param xml_path: A given path to an xml file to load/save the BS object from/to. :return: A BeautifulSoup object with xml content. @@ -89,39 +74,41 @@ def create_bs_object_from_xml(xml_path: str) -> BeautifulSoup: return BeautifulSoup(f_in, features='xml') -def create_items_dict(store_id: int, load_xml) -> Dict: +def create_items_dict(chain: SupermarketChain, load_xml, store_id: int) -> Dict[str, str]: """ This function creates a dictionary where every key is an item code and its value is the item's name and price. + :param chain: A given supermarket chain :param load_xml: A boolean representing whether to load an existing prices xml file :param store_id: A given store id :return: A dictionary where the firs """ - xml_path = xml_file_gen(ShufersalCategories.PricesFull.name, store_id) - bs_prices = create_bs_object(xml_path, ShufersalCategories.PricesFull.value, store_id, load_xml) - return {item.find('ItemCode').text: get_item_info(item) for item in bs_prices.find_all('Item')} + xml_path: str = xml_file_gen(chain, store_id, chain.XMLFilesCategory.PricesFull.name) + bs_prices: BeautifulSoup = create_bs_object(xml_path, chain, store_id, load_xml, chain.XMLFilesCategory.PricesFull) + return {item.find('ItemCode').text: get_item_info(item) for item in bs_prices.find_all(chain.item_tag_name)} -def get_item_info(item): - return str((item.find('ItemName').text, item.find('ManufacturerName').text, item.find('ItemPrice').text)) - - -def get_products_prices(store_id: int, product_name: str, load_xml: bool): +def get_item_info(item: Tag) -> str: """ - This function prints the products in a given Shufersal store which contains a given product_name. + This function returns a string containing important information about a given supermarket's product. + """ + return [item.find('ItemName').text, item.find(re.compile(r'Manufacture[r]?Name')).text, + item.find('ItemPrice').text, item.find('ItemCode').text] - :param store_id: A given Shufersal store id + +def get_products_prices(chain: SupermarketChain, store_id: int, load_xml: bool, product_name: str) -> None: + """ + This function prints the products in a given store which contains a given product_name. + + :param chain: A given supermarket chain + :param store_id: A given store id :param product_name: A given product name :param load_xml: A boolean representing whether to load an existing xml or load an already saved one """ - xml_path = xml_file_gen(ShufersalCategories.PricesFull.name, store_id) - bs_prices = create_bs_object(xml_path, ShufersalCategories.PricesFull.value, store_id, load_xml) + xml_path: str = xml_file_gen(chain, store_id, chain.XMLFilesCategory.PricesFull.name) + bs_prices: BeautifulSoup = create_bs_object(xml_path, chain, store_id, load_xml, chain.XMLFilesCategory.PricesFull) prods = [item for item in bs_prices.find_all("Item") if product_name in item.find("ItemName").text] prods.sort(key=lambda x: float(x.find("UnitOfMeasurePrice").text)) for prod in prods: print((prod.find('ItemName').text[::-1], prod.find('ManufacturerName').text[::-1], prod.find('ItemPrice').text)) - - -def is_valid_store_id(store_id: int): - return isinstance(store_id, int) and store_id >= 0