diff --git a/.gitignore b/.gitignore index 4bde7f9..bb07b4a 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ grading_check.py stores_* venv/ __pycache__/ +xmls/ \ No newline at end of file diff --git a/main.py b/main.py index b5f98ea..316fec7 100644 --- a/main.py +++ b/main.py @@ -3,86 +3,46 @@ import logging from promotion import main_latest_promos from store import get_store_id, store_id_type from utils import get_products_prices -# import json -# from bs4 import BeautifulSoup -# import requests - -# def get_coupons(): -# coupons_json = requests.get('https://www.shufersal.co.il/online/he/my-account/coupons/my-coupons') -# # with open('C:\\Users\\user\\Downloads\\my-coupons.json', "rb") as f: -# # coupons_json = json.load(f) -# bs_coupons = [BeautifulSoup(coup['display'], 'xml') for coup in coupons_json['myCoupons']] -# return [bs_coupon.find("img", src=lambda value: value and value.startswith( -# "https://res.cloudinary.com/shufersal/image/upload/f_auto," -# "q_auto/v1551800918/prod/product_images/products_medium")).contents[1] for bs_coupon in bs_coupons] if __name__ == '__main__': parser = ArgumentParser() parser.add_argument('--promos', - help="Param for getting the store's promotions", + help="generates a promos_{store_id}.log file with all the promotions in the requested store", metavar='store_id', - nargs='?', + nargs=1, type=store_id_type, - const=5, ) parser.add_argument('--price', - help='Params for calling get_products_prices', + help='prints all products that contain the given name in the requested store', metavar=('store_id', 'product_name'), nargs=2, ) - parser.add_argument('--find_store', - help='Params for calling get_store_id', + parser.add_argument('--find_store_id', + help='prints all Shufersal stores within a city. Input should be a name of a city in Hebrew', metavar='city', nargs=1, ) parser.add_argument('--load_xml', - help='Whether to load an existing xml', + help='boolean flag representing whether to load an existing xml', action='store_true', ) args = parser.parse_args() - logger = logging.getLogger() - logger.setLevel(logging.INFO) - if args.promos: - arg_store_id = int(args.promos) + arg_store_id = int(args.promos[0]) + + logger = logging.getLogger() + logger.setLevel(logging.INFO) handler = logging.FileHandler(filename=f'promos_{arg_store_id}.log', mode='w', encoding='utf-8') logger.addHandler(handler) - try: - main_latest_promos(store_id=arg_store_id, - load_xml=args.load_xml, - logger=logger) - except FileNotFoundError: - main_latest_promos(store_id=arg_store_id, - load_xml=False, - logger=logger) + main_latest_promos(store_id=arg_store_id, + load_xml=args.load_xml, + logger=logger) elif args.price: - handler = logging.FileHandler(filename='products_prices.log', mode='w', encoding='utf-8') - logger.addHandler(handler) - try: - get_products_prices(store_id=args.price[0], - product_name=args.price[1], - load_xml=args.load_xml, - logger=logger) - except FileNotFoundError: - get_products_prices(store_id=args.price[0], - product_name=args.price[1], - load_xml=False, - logger=logger) + get_products_prices(store_id=args.price[0], product_name=args.price[1], load_xml=args.load_xml) - elif args.find_store: - arg_city = args.find_store[0] - handler = logging.FileHandler(filename=f'stores_{arg_city}.log', - mode='w', - encoding='utf-8') - logger.addHandler(handler) - try: - get_store_id(city=arg_city, - load_xml=args.load_xml, - logger=logger) - except FileNotFoundError: - get_store_id(city=arg_city, - load_xml=False, - logger=logger) + elif args.find_store_id: + arg_city = args.find_store_id[0] + get_store_id(city=arg_city, load_xml=args.load_xml) diff --git a/promotion.py b/promotion.py index cb7cbd8..a9782fa 100644 --- a/promotion.py +++ b/promotion.py @@ -1,6 +1,11 @@ from datetime import datetime from typing import List -from utils import ShufersalCategories, create_bs_object, create_items_dict, get_download_url, xml_file_gen +from utils import ( + ShufersalCategories, + create_items_dict, + xml_file_gen, + create_bs_object, +) PRODUCTS_TO_IGNORE = ['סירים', 'מגבות', 'צלחות', 'כוסות', 'מאגים', 'מגבת', 'מפות', 'פסטיגל'] @@ -36,9 +41,8 @@ def get_available_promos(store_id: int, load_xml: bool) -> List[Promotion]: :return: Promotions that are not included in PRODUCTS_TO_IGNORE and are currently available """ items_dict = create_items_dict(store_id, load_xml) - - down_url = get_download_url(store_id, ShufersalCategories.PromosFull.value) - bs_promos = create_bs_object(xml_file_gen(ShufersalCategories.PromosFull.name, store_id), down_url) + xml_path = xml_file_gen(ShufersalCategories.PromosFull.name, store_id) + bs_promos = create_bs_object(xml_path, ShufersalCategories.PromosFull.value, store_id, False) promo_objs = list() for cur_promo in bs_promos.find_all("Promotion"): @@ -66,12 +70,13 @@ def is_valid_promo(promo: Promotion): def main_latest_promos(store_id: int, load_xml: bool, logger): """ - This function logs the available promos in a Shufersal store with a given id sorted by their update date. + This function logs the available promos in a store with a given id sorted by their update date. :param store_id: A given store id :param load_xml: A boolean representing whether to load an existing prices xml file + :param logger: A given logger """ promotions = get_available_promos(store_id, load_xml) promotions.sort(key=lambda promo: max(promo.update_date, promo.start_date), reverse=True) - logger.info('\n'.join(str(promotion) for promotion in promotions)) \ No newline at end of file + logger.info('\n'.join(str(promotion) for promotion in promotions)) diff --git a/store.py b/store.py index d5b7b5c..0c0bc84 100644 --- a/store.py +++ b/store.py @@ -1,6 +1,11 @@ from argparse import ArgumentTypeError -from utils import ShufersalCategories, create_bs_object, get_download_url, is_valid_store_id, xml_file_gen +from utils import ( + ShufersalCategories, + is_valid_store_id, + xml_file_gen, + create_bs_object, +) def store_id_type(store_id: str): @@ -9,17 +14,17 @@ def store_id_type(store_id: str): return store_id -def get_store_id(city: str, load_xml: bool, logger): +def get_store_id(city: str, load_xml: bool): """ - This function returns the id of a Shufersal store according to a given city. - The city must match exactly to its spelling in Shufersal's website. + This function prints the store_ids of Shufersal stores in a given city. + The city must match exactly to its spelling in Shufersal's website (hence it should be in Hebrew alphabet). :param load_xml: A boolean representing whether to load an existing xml or load an already saved one :param city: A string representing the city of the requested store. """ - down_url = "" if load_xml else get_download_url(-1, ShufersalCategories.Stores.value) - bs = create_bs_object(xml_file_gen(ShufersalCategories.Stores.name, -1), down_url) + xml_path = xml_file_gen(ShufersalCategories.Stores.name, -1) + bs_stores = create_bs_object(xml_path, ShufersalCategories.Stores.value, -1, load_xml) - for store in bs.find_all("STORE"): + for store in bs_stores.find_all("STORE"): if store.find("CITY").text == city: - logger.info((store.find("ADDRESS").text, store.find("STOREID").text, store.find("SUBCHAINNAME").text)) \ No newline at end of file + print((store.find("ADDRESS").text[::-1], store.find("STOREID").text, store.find("SUBCHAINNAME").text[::-1])) diff --git a/utils.py b/utils.py index bb34d8e..5e445b9 100644 --- a/utils.py +++ b/utils.py @@ -1,9 +1,13 @@ import gzip from enum import Enum from typing import Dict - import requests from bs4 import BeautifulSoup +from pathlib import Path +from os import path + +XMLS_DIRNAME = "xmls" +Path(XMLS_DIRNAME).mkdir(exist_ok=True) class ShufersalCategories(Enum): @@ -20,7 +24,7 @@ def xml_file_gen(category_name: str, store_id: int) -> str: :return: An xml filename """ store_id_str = f"-{str(store_id)}" if is_valid_store_id(store_id) else "" - return f"{category_name}{store_id_str}.xml" + return path.join(XMLS_DIRNAME, f"{category_name}{store_id_str}.xml") def get_download_url(store_id: int, cat_id: int) -> str: @@ -40,24 +44,49 @@ def get_download_url(store_id: int, cat_id: int) -> str: return soup.find('a', text="לחץ להורדה")['href'] -def create_bs_object(xml_path, download_url: str) -> BeautifulSoup: +def create_bs_object(xml_path: str, cat_id: int, store_id: int, load_xml: bool) -> BeautifulSoup: """ - This function creates a BeautifulSoup object according to the given xml_path and download_url. - In case the given download_url is an empty string, the function tries to read from the given xml_path, - otherwise it downloads the gzip from the download link and extract it. + This function creates a BeautifulSoup (BS) object according to the given parameters. + In case the given load_xml is True and the XML file exists, the function creates the BS object from the given + xml_path, otherwise it uses Shufersal's APIs to download the xml with the relevant content and saves it for + future use. - :param xml_path: A given path to an xml file - :param download_url: A string that may represent a link (described above) - :return: A BeautifulSoup object with xml content (either from a file or a link). + :param xml_path: A given path to an xml file to load/save the BS object from/to. + :param cat_id: A given id of a category from ShufersalCategories + :param store_id: A given id of a store + :param load_xml: A flag representing whether to try loading an existing XML file + :return: A BeautifulSoup object with xml content. """ - if download_url: - xml_content = gzip.decompress(requests.get(download_url).content) - with open(xml_path, 'wb') as f_out: - f_out.write(xml_content) - return BeautifulSoup(xml_content, features='xml') - else: - with open(xml_path, 'rb') as f_in: - return BeautifulSoup(f_in, features='xml') + if load_xml and path.isfile(xml_path): + return create_bs_object_from_xml(xml_path) + return create_bs_object_from_link(xml_path, store_id, cat_id) + + +def create_bs_object_from_link(xml_path: str, store_id: int, cat_id: int) -> BeautifulSoup: + """ + This function creates a BeautifulSoup (BS) object by generating a download link from Shufersal's API. + + :param xml_path: A given path to an xml file to load/save the BS object from/to. + :param store_id: A given id of a store + :param cat_id: A given id of a category from ShufersalCategories + :return: A BeautifulSoup object with xml content. + """ + download_url = get_download_url(store_id, cat_id) + xml_content = gzip.decompress(requests.get(download_url).content) + with open(xml_path, 'wb') as f_out: + f_out.write(xml_content) + return BeautifulSoup(xml_content, features='xml') + + +def create_bs_object_from_xml(xml_path: str) -> BeautifulSoup: + """ + This function creates a BeautifulSoup (BS) object from a given xml file. + + :param xml_path: A given path to an xml file to load/save the BS object from/to. + :return: A BeautifulSoup object with xml content. + """ + with open(xml_path, 'rb') as f_in: + return BeautifulSoup(f_in, features='xml') def create_items_dict(store_id: int, load_xml) -> Dict: @@ -68,9 +97,8 @@ def create_items_dict(store_id: int, load_xml) -> Dict: :param store_id: A given store id :return: A dictionary where the firs """ - down_url = "" if load_xml else get_download_url(store_id, ShufersalCategories.PricesFull.value) xml_path = xml_file_gen(ShufersalCategories.PricesFull.name, store_id) - bs_prices = create_bs_object(xml_path, down_url) + bs_prices = create_bs_object(xml_path, ShufersalCategories.PricesFull.value, store_id, load_xml) return {item.find('ItemCode').text: get_item_info(item) for item in bs_prices.find_all('Item')} @@ -78,21 +106,22 @@ def get_item_info(item): return str((item.find('ItemName').text, item.find('ManufacturerName').text, item.find('ItemPrice').text)) -def get_products_prices(store_id: int, product_name: str, load_xml: bool, logger): +def get_products_prices(store_id: int, product_name: str, load_xml: bool): """ - This function logs the products in a given Shufersal store which contains a given product_name. + This function prints the products in a given Shufersal store which contains a given product_name. :param store_id: A given Shufersal store id :param product_name: A given product name :param load_xml: A boolean representing whether to load an existing xml or load an already saved one """ - down_url = "" if load_xml else get_download_url(store_id, ShufersalCategories.PricesFull.value) - bs = create_bs_object(xml_file_gen(ShufersalCategories.PricesFull.name, store_id), down_url) - prods = [item for item in bs.find_all("Item") if product_name in item.find("ItemName").text] + xml_path = xml_file_gen(ShufersalCategories.PricesFull.name, store_id) + bs_prices = create_bs_object(xml_path, ShufersalCategories.PricesFull.value, store_id, load_xml) + prods = [item for item in bs_prices.find_all("Item") if product_name in item.find("ItemName").text] prods.sort(key=lambda x: float(x.find("UnitOfMeasurePrice").text)) for prod in prods: - logger.info(get_item_info(prod)) + print((prod.find('ItemName').text[::-1], prod.find('ManufacturerName').text[::-1], + prod.find('ItemPrice').text)) def is_valid_store_id(store_id: int): - return isinstance(store_id, int) and store_id >= 0 \ No newline at end of file + return isinstance(store_id, int) and store_id >= 0