changed create_bs mechanism and moved to simply printing to stdout in --find_store_id and --price

This commit is contained in:
KorenLazar
2020-12-15 11:28:17 +02:00
parent 975447dcd7
commit db6bfb3632
5 changed files with 97 additions and 97 deletions

1
.gitignore vendored
View File

@@ -7,3 +7,4 @@ grading_check.py
stores_* stores_*
venv/ venv/
__pycache__/ __pycache__/
xmls/

74
main.py
View File

@@ -3,86 +3,46 @@ import logging
from promotion import main_latest_promos from promotion import main_latest_promos
from store import get_store_id, store_id_type from store import get_store_id, store_id_type
from utils import get_products_prices from utils import get_products_prices
# import json
# from bs4 import BeautifulSoup
# import requests
# def get_coupons():
# coupons_json = requests.get('https://www.shufersal.co.il/online/he/my-account/coupons/my-coupons')
# # with open('C:\\Users\\user\\Downloads\\my-coupons.json', "rb") as f:
# # coupons_json = json.load(f)
# bs_coupons = [BeautifulSoup(coup['display'], 'xml') for coup in coupons_json['myCoupons']]
# return [bs_coupon.find("img", src=lambda value: value and value.startswith(
# "https://res.cloudinary.com/shufersal/image/upload/f_auto,"
# "q_auto/v1551800918/prod/product_images/products_medium")).contents[1] for bs_coupon in bs_coupons]
if __name__ == '__main__': if __name__ == '__main__':
parser = ArgumentParser() parser = ArgumentParser()
parser.add_argument('--promos', parser.add_argument('--promos',
help="Param for getting the store's promotions", help="generates a promos_{store_id}.log file with all the promotions in the requested store",
metavar='store_id', metavar='store_id',
nargs='?', nargs=1,
type=store_id_type, type=store_id_type,
const=5,
) )
parser.add_argument('--price', parser.add_argument('--price',
help='Params for calling get_products_prices', help='prints all products that contain the given name in the requested store',
metavar=('store_id', 'product_name'), metavar=('store_id', 'product_name'),
nargs=2, nargs=2,
) )
parser.add_argument('--find_store', parser.add_argument('--find_store_id',
help='Params for calling get_store_id', help='prints all Shufersal stores within a city. Input should be a name of a city in Hebrew',
metavar='city', metavar='city',
nargs=1, nargs=1,
) )
parser.add_argument('--load_xml', parser.add_argument('--load_xml',
help='Whether to load an existing xml', help='boolean flag representing whether to load an existing xml',
action='store_true', action='store_true',
) )
args = parser.parse_args() args = parser.parse_args()
logger = logging.getLogger()
logger.setLevel(logging.INFO)
if args.promos: if args.promos:
arg_store_id = int(args.promos) arg_store_id = int(args.promos[0])
logger = logging.getLogger()
logger.setLevel(logging.INFO)
handler = logging.FileHandler(filename=f'promos_{arg_store_id}.log', mode='w', encoding='utf-8') handler = logging.FileHandler(filename=f'promos_{arg_store_id}.log', mode='w', encoding='utf-8')
logger.addHandler(handler) logger.addHandler(handler)
try: main_latest_promos(store_id=arg_store_id,
main_latest_promos(store_id=arg_store_id, load_xml=args.load_xml,
load_xml=args.load_xml, logger=logger)
logger=logger)
except FileNotFoundError:
main_latest_promos(store_id=arg_store_id,
load_xml=False,
logger=logger)
elif args.price: elif args.price:
handler = logging.FileHandler(filename='products_prices.log', mode='w', encoding='utf-8') get_products_prices(store_id=args.price[0], product_name=args.price[1], load_xml=args.load_xml)
logger.addHandler(handler)
try:
get_products_prices(store_id=args.price[0],
product_name=args.price[1],
load_xml=args.load_xml,
logger=logger)
except FileNotFoundError:
get_products_prices(store_id=args.price[0],
product_name=args.price[1],
load_xml=False,
logger=logger)
elif args.find_store: elif args.find_store_id:
arg_city = args.find_store[0] arg_city = args.find_store_id[0]
handler = logging.FileHandler(filename=f'stores_{arg_city}.log', get_store_id(city=arg_city, load_xml=args.load_xml)
mode='w',
encoding='utf-8')
logger.addHandler(handler)
try:
get_store_id(city=arg_city,
load_xml=args.load_xml,
logger=logger)
except FileNotFoundError:
get_store_id(city=arg_city,
load_xml=False,
logger=logger)

View File

@@ -1,6 +1,11 @@
from datetime import datetime from datetime import datetime
from typing import List from typing import List
from utils import ShufersalCategories, create_bs_object, create_items_dict, get_download_url, xml_file_gen from utils import (
ShufersalCategories,
create_items_dict,
xml_file_gen,
create_bs_object,
)
PRODUCTS_TO_IGNORE = ['סירים', 'מגבות', 'צלחות', 'כוסות', 'מאגים', 'מגבת', 'מפות', 'פסטיגל'] PRODUCTS_TO_IGNORE = ['סירים', 'מגבות', 'צלחות', 'כוסות', 'מאגים', 'מגבת', 'מפות', 'פסטיגל']
@@ -36,9 +41,8 @@ def get_available_promos(store_id: int, load_xml: bool) -> List[Promotion]:
:return: Promotions that are not included in PRODUCTS_TO_IGNORE and are currently available :return: Promotions that are not included in PRODUCTS_TO_IGNORE and are currently available
""" """
items_dict = create_items_dict(store_id, load_xml) items_dict = create_items_dict(store_id, load_xml)
xml_path = xml_file_gen(ShufersalCategories.PromosFull.name, store_id)
down_url = get_download_url(store_id, ShufersalCategories.PromosFull.value) bs_promos = create_bs_object(xml_path, ShufersalCategories.PromosFull.value, store_id, False)
bs_promos = create_bs_object(xml_file_gen(ShufersalCategories.PromosFull.name, store_id), down_url)
promo_objs = list() promo_objs = list()
for cur_promo in bs_promos.find_all("Promotion"): for cur_promo in bs_promos.find_all("Promotion"):
@@ -66,10 +70,11 @@ def is_valid_promo(promo: Promotion):
def main_latest_promos(store_id: int, load_xml: bool, logger): def main_latest_promos(store_id: int, load_xml: bool, logger):
""" """
This function logs the available promos in a Shufersal store with a given id sorted by their update date. This function logs the available promos in a store with a given id sorted by their update date.
:param store_id: A given store id :param store_id: A given store id
:param load_xml: A boolean representing whether to load an existing prices xml file :param load_xml: A boolean representing whether to load an existing prices xml file
:param logger: A given logger
""" """
promotions = get_available_promos(store_id, load_xml) promotions = get_available_promos(store_id, load_xml)

View File

@@ -1,6 +1,11 @@
from argparse import ArgumentTypeError from argparse import ArgumentTypeError
from utils import ShufersalCategories, create_bs_object, get_download_url, is_valid_store_id, xml_file_gen from utils import (
ShufersalCategories,
is_valid_store_id,
xml_file_gen,
create_bs_object,
)
def store_id_type(store_id: str): def store_id_type(store_id: str):
@@ -9,17 +14,17 @@ def store_id_type(store_id: str):
return store_id return store_id
def get_store_id(city: str, load_xml: bool, logger): def get_store_id(city: str, load_xml: bool):
""" """
This function returns the id of a Shufersal store according to a given city. This function prints the store_ids of Shufersal stores in a given city.
The city must match exactly to its spelling in Shufersal's website. The city must match exactly to its spelling in Shufersal's website (hence it should be in Hebrew alphabet).
:param load_xml: A boolean representing whether to load an existing xml or load an already saved one :param load_xml: A boolean representing whether to load an existing xml or load an already saved one
:param city: A string representing the city of the requested store. :param city: A string representing the city of the requested store.
""" """
down_url = "" if load_xml else get_download_url(-1, ShufersalCategories.Stores.value) xml_path = xml_file_gen(ShufersalCategories.Stores.name, -1)
bs = create_bs_object(xml_file_gen(ShufersalCategories.Stores.name, -1), down_url) bs_stores = create_bs_object(xml_path, ShufersalCategories.Stores.value, -1, load_xml)
for store in bs.find_all("STORE"): for store in bs_stores.find_all("STORE"):
if store.find("CITY").text == city: if store.find("CITY").text == city:
logger.info((store.find("ADDRESS").text, store.find("STOREID").text, store.find("SUBCHAINNAME").text)) print((store.find("ADDRESS").text[::-1], store.find("STOREID").text, store.find("SUBCHAINNAME").text[::-1]))

View File

@@ -1,9 +1,13 @@
import gzip import gzip
from enum import Enum from enum import Enum
from typing import Dict from typing import Dict
import requests import requests
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from pathlib import Path
from os import path
XMLS_DIRNAME = "xmls"
Path(XMLS_DIRNAME).mkdir(exist_ok=True)
class ShufersalCategories(Enum): class ShufersalCategories(Enum):
@@ -20,7 +24,7 @@ def xml_file_gen(category_name: str, store_id: int) -> str:
:return: An xml filename :return: An xml filename
""" """
store_id_str = f"-{str(store_id)}" if is_valid_store_id(store_id) else "" store_id_str = f"-{str(store_id)}" if is_valid_store_id(store_id) else ""
return f"{category_name}{store_id_str}.xml" return path.join(XMLS_DIRNAME, f"{category_name}{store_id_str}.xml")
def get_download_url(store_id: int, cat_id: int) -> str: def get_download_url(store_id: int, cat_id: int) -> str:
@@ -40,24 +44,49 @@ def get_download_url(store_id: int, cat_id: int) -> str:
return soup.find('a', text="לחץ להורדה")['href'] return soup.find('a', text="לחץ להורדה")['href']
def create_bs_object(xml_path, download_url: str) -> BeautifulSoup: def create_bs_object(xml_path: str, cat_id: int, store_id: int, load_xml: bool) -> BeautifulSoup:
""" """
This function creates a BeautifulSoup object according to the given xml_path and download_url. This function creates a BeautifulSoup (BS) object according to the given parameters.
In case the given download_url is an empty string, the function tries to read from the given xml_path, In case the given load_xml is True and the XML file exists, the function creates the BS object from the given
otherwise it downloads the gzip from the download link and extract it. xml_path, otherwise it uses Shufersal's APIs to download the xml with the relevant content and saves it for
future use.
:param xml_path: A given path to an xml file :param xml_path: A given path to an xml file to load/save the BS object from/to.
:param download_url: A string that may represent a link (described above) :param cat_id: A given id of a category from ShufersalCategories
:return: A BeautifulSoup object with xml content (either from a file or a link). :param store_id: A given id of a store
:param load_xml: A flag representing whether to try loading an existing XML file
:return: A BeautifulSoup object with xml content.
""" """
if download_url: if load_xml and path.isfile(xml_path):
xml_content = gzip.decompress(requests.get(download_url).content) return create_bs_object_from_xml(xml_path)
with open(xml_path, 'wb') as f_out: return create_bs_object_from_link(xml_path, store_id, cat_id)
f_out.write(xml_content)
return BeautifulSoup(xml_content, features='xml')
else: def create_bs_object_from_link(xml_path: str, store_id: int, cat_id: int) -> BeautifulSoup:
with open(xml_path, 'rb') as f_in: """
return BeautifulSoup(f_in, features='xml') This function creates a BeautifulSoup (BS) object by generating a download link from Shufersal's API.
:param xml_path: A given path to an xml file to load/save the BS object from/to.
:param store_id: A given id of a store
:param cat_id: A given id of a category from ShufersalCategories
:return: A BeautifulSoup object with xml content.
"""
download_url = get_download_url(store_id, cat_id)
xml_content = gzip.decompress(requests.get(download_url).content)
with open(xml_path, 'wb') as f_out:
f_out.write(xml_content)
return BeautifulSoup(xml_content, features='xml')
def create_bs_object_from_xml(xml_path: str) -> BeautifulSoup:
"""
This function creates a BeautifulSoup (BS) object from a given xml file.
:param xml_path: A given path to an xml file to load/save the BS object from/to.
:return: A BeautifulSoup object with xml content.
"""
with open(xml_path, 'rb') as f_in:
return BeautifulSoup(f_in, features='xml')
def create_items_dict(store_id: int, load_xml) -> Dict: def create_items_dict(store_id: int, load_xml) -> Dict:
@@ -68,9 +97,8 @@ def create_items_dict(store_id: int, load_xml) -> Dict:
:param store_id: A given store id :param store_id: A given store id
:return: A dictionary where the firs :return: A dictionary where the firs
""" """
down_url = "" if load_xml else get_download_url(store_id, ShufersalCategories.PricesFull.value)
xml_path = xml_file_gen(ShufersalCategories.PricesFull.name, store_id) xml_path = xml_file_gen(ShufersalCategories.PricesFull.name, store_id)
bs_prices = create_bs_object(xml_path, down_url) bs_prices = create_bs_object(xml_path, ShufersalCategories.PricesFull.value, store_id, load_xml)
return {item.find('ItemCode').text: get_item_info(item) for item in bs_prices.find_all('Item')} return {item.find('ItemCode').text: get_item_info(item) for item in bs_prices.find_all('Item')}
@@ -78,20 +106,21 @@ def get_item_info(item):
return str((item.find('ItemName').text, item.find('ManufacturerName').text, item.find('ItemPrice').text)) return str((item.find('ItemName').text, item.find('ManufacturerName').text, item.find('ItemPrice').text))
def get_products_prices(store_id: int, product_name: str, load_xml: bool, logger): def get_products_prices(store_id: int, product_name: str, load_xml: bool):
""" """
This function logs the products in a given Shufersal store which contains a given product_name. This function prints the products in a given Shufersal store which contains a given product_name.
:param store_id: A given Shufersal store id :param store_id: A given Shufersal store id
:param product_name: A given product name :param product_name: A given product name
:param load_xml: A boolean representing whether to load an existing xml or load an already saved one :param load_xml: A boolean representing whether to load an existing xml or load an already saved one
""" """
down_url = "" if load_xml else get_download_url(store_id, ShufersalCategories.PricesFull.value) xml_path = xml_file_gen(ShufersalCategories.PricesFull.name, store_id)
bs = create_bs_object(xml_file_gen(ShufersalCategories.PricesFull.name, store_id), down_url) bs_prices = create_bs_object(xml_path, ShufersalCategories.PricesFull.value, store_id, load_xml)
prods = [item for item in bs.find_all("Item") if product_name in item.find("ItemName").text] prods = [item for item in bs_prices.find_all("Item") if product_name in item.find("ItemName").text]
prods.sort(key=lambda x: float(x.find("UnitOfMeasurePrice").text)) prods.sort(key=lambda x: float(x.find("UnitOfMeasurePrice").text))
for prod in prods: for prod in prods:
logger.info(get_item_info(prod)) print((prod.find('ItemName').text[::-1], prod.find('ManufacturerName').text[::-1],
prod.find('ItemPrice').text))
def is_valid_store_id(store_id: int): def is_valid_store_id(store_id: int):