changed create_bs mechanism and moved to simply printing to stdout in --find_store_id and --price

This commit is contained in:
KorenLazar
2020-12-15 11:28:17 +02:00
parent 975447dcd7
commit db6bfb3632
5 changed files with 97 additions and 97 deletions

1
.gitignore vendored
View File

@@ -7,3 +7,4 @@ grading_check.py
stores_*
venv/
__pycache__/
xmls/

74
main.py
View File

@@ -3,86 +3,46 @@ import logging
from promotion import main_latest_promos
from store import get_store_id, store_id_type
from utils import get_products_prices
# import json
# from bs4 import BeautifulSoup
# import requests
# def get_coupons():
# coupons_json = requests.get('https://www.shufersal.co.il/online/he/my-account/coupons/my-coupons')
# # with open('C:\\Users\\user\\Downloads\\my-coupons.json', "rb") as f:
# # coupons_json = json.load(f)
# bs_coupons = [BeautifulSoup(coup['display'], 'xml') for coup in coupons_json['myCoupons']]
# return [bs_coupon.find("img", src=lambda value: value and value.startswith(
# "https://res.cloudinary.com/shufersal/image/upload/f_auto,"
# "q_auto/v1551800918/prod/product_images/products_medium")).contents[1] for bs_coupon in bs_coupons]
if __name__ == '__main__':
parser = ArgumentParser()
parser.add_argument('--promos',
help="Param for getting the store's promotions",
help="generates a promos_{store_id}.log file with all the promotions in the requested store",
metavar='store_id',
nargs='?',
nargs=1,
type=store_id_type,
const=5,
)
parser.add_argument('--price',
help='Params for calling get_products_prices',
help='prints all products that contain the given name in the requested store',
metavar=('store_id', 'product_name'),
nargs=2,
)
parser.add_argument('--find_store',
help='Params for calling get_store_id',
parser.add_argument('--find_store_id',
help='prints all Shufersal stores within a city. Input should be a name of a city in Hebrew',
metavar='city',
nargs=1,
)
parser.add_argument('--load_xml',
help='Whether to load an existing xml',
help='boolean flag representing whether to load an existing xml',
action='store_true',
)
args = parser.parse_args()
logger = logging.getLogger()
logger.setLevel(logging.INFO)
if args.promos:
arg_store_id = int(args.promos)
arg_store_id = int(args.promos[0])
logger = logging.getLogger()
logger.setLevel(logging.INFO)
handler = logging.FileHandler(filename=f'promos_{arg_store_id}.log', mode='w', encoding='utf-8')
logger.addHandler(handler)
try:
main_latest_promos(store_id=arg_store_id,
load_xml=args.load_xml,
logger=logger)
except FileNotFoundError:
main_latest_promos(store_id=arg_store_id,
load_xml=False,
logger=logger)
main_latest_promos(store_id=arg_store_id,
load_xml=args.load_xml,
logger=logger)
elif args.price:
handler = logging.FileHandler(filename='products_prices.log', mode='w', encoding='utf-8')
logger.addHandler(handler)
try:
get_products_prices(store_id=args.price[0],
product_name=args.price[1],
load_xml=args.load_xml,
logger=logger)
except FileNotFoundError:
get_products_prices(store_id=args.price[0],
product_name=args.price[1],
load_xml=False,
logger=logger)
get_products_prices(store_id=args.price[0], product_name=args.price[1], load_xml=args.load_xml)
elif args.find_store:
arg_city = args.find_store[0]
handler = logging.FileHandler(filename=f'stores_{arg_city}.log',
mode='w',
encoding='utf-8')
logger.addHandler(handler)
try:
get_store_id(city=arg_city,
load_xml=args.load_xml,
logger=logger)
except FileNotFoundError:
get_store_id(city=arg_city,
load_xml=False,
logger=logger)
elif args.find_store_id:
arg_city = args.find_store_id[0]
get_store_id(city=arg_city, load_xml=args.load_xml)

View File

@@ -1,6 +1,11 @@
from datetime import datetime
from typing import List
from utils import ShufersalCategories, create_bs_object, create_items_dict, get_download_url, xml_file_gen
from utils import (
ShufersalCategories,
create_items_dict,
xml_file_gen,
create_bs_object,
)
PRODUCTS_TO_IGNORE = ['סירים', 'מגבות', 'צלחות', 'כוסות', 'מאגים', 'מגבת', 'מפות', 'פסטיגל']
@@ -36,9 +41,8 @@ def get_available_promos(store_id: int, load_xml: bool) -> List[Promotion]:
:return: Promotions that are not included in PRODUCTS_TO_IGNORE and are currently available
"""
items_dict = create_items_dict(store_id, load_xml)
down_url = get_download_url(store_id, ShufersalCategories.PromosFull.value)
bs_promos = create_bs_object(xml_file_gen(ShufersalCategories.PromosFull.name, store_id), down_url)
xml_path = xml_file_gen(ShufersalCategories.PromosFull.name, store_id)
bs_promos = create_bs_object(xml_path, ShufersalCategories.PromosFull.value, store_id, False)
promo_objs = list()
for cur_promo in bs_promos.find_all("Promotion"):
@@ -66,12 +70,13 @@ def is_valid_promo(promo: Promotion):
def main_latest_promos(store_id: int, load_xml: bool, logger):
"""
This function logs the available promos in a Shufersal store with a given id sorted by their update date.
This function logs the available promos in a store with a given id sorted by their update date.
:param store_id: A given store id
:param load_xml: A boolean representing whether to load an existing prices xml file
:param logger: A given logger
"""
promotions = get_available_promos(store_id, load_xml)
promotions.sort(key=lambda promo: max(promo.update_date, promo.start_date), reverse=True)
logger.info('\n'.join(str(promotion) for promotion in promotions))
logger.info('\n'.join(str(promotion) for promotion in promotions))

View File

@@ -1,6 +1,11 @@
from argparse import ArgumentTypeError
from utils import ShufersalCategories, create_bs_object, get_download_url, is_valid_store_id, xml_file_gen
from utils import (
ShufersalCategories,
is_valid_store_id,
xml_file_gen,
create_bs_object,
)
def store_id_type(store_id: str):
@@ -9,17 +14,17 @@ def store_id_type(store_id: str):
return store_id
def get_store_id(city: str, load_xml: bool, logger):
def get_store_id(city: str, load_xml: bool):
"""
This function returns the id of a Shufersal store according to a given city.
The city must match exactly to its spelling in Shufersal's website.
This function prints the store_ids of Shufersal stores in a given city.
The city must match exactly to its spelling in Shufersal's website (hence it should be in Hebrew alphabet).
:param load_xml: A boolean representing whether to load an existing xml or load an already saved one
:param city: A string representing the city of the requested store.
"""
down_url = "" if load_xml else get_download_url(-1, ShufersalCategories.Stores.value)
bs = create_bs_object(xml_file_gen(ShufersalCategories.Stores.name, -1), down_url)
xml_path = xml_file_gen(ShufersalCategories.Stores.name, -1)
bs_stores = create_bs_object(xml_path, ShufersalCategories.Stores.value, -1, load_xml)
for store in bs.find_all("STORE"):
for store in bs_stores.find_all("STORE"):
if store.find("CITY").text == city:
logger.info((store.find("ADDRESS").text, store.find("STOREID").text, store.find("SUBCHAINNAME").text))
print((store.find("ADDRESS").text[::-1], store.find("STOREID").text, store.find("SUBCHAINNAME").text[::-1]))

View File

@@ -1,9 +1,13 @@
import gzip
from enum import Enum
from typing import Dict
import requests
from bs4 import BeautifulSoup
from pathlib import Path
from os import path
XMLS_DIRNAME = "xmls"
Path(XMLS_DIRNAME).mkdir(exist_ok=True)
class ShufersalCategories(Enum):
@@ -20,7 +24,7 @@ def xml_file_gen(category_name: str, store_id: int) -> str:
:return: An xml filename
"""
store_id_str = f"-{str(store_id)}" if is_valid_store_id(store_id) else ""
return f"{category_name}{store_id_str}.xml"
return path.join(XMLS_DIRNAME, f"{category_name}{store_id_str}.xml")
def get_download_url(store_id: int, cat_id: int) -> str:
@@ -40,24 +44,49 @@ def get_download_url(store_id: int, cat_id: int) -> str:
return soup.find('a', text="לחץ להורדה")['href']
def create_bs_object(xml_path, download_url: str) -> BeautifulSoup:
def create_bs_object(xml_path: str, cat_id: int, store_id: int, load_xml: bool) -> BeautifulSoup:
"""
This function creates a BeautifulSoup object according to the given xml_path and download_url.
In case the given download_url is an empty string, the function tries to read from the given xml_path,
otherwise it downloads the gzip from the download link and extract it.
This function creates a BeautifulSoup (BS) object according to the given parameters.
In case the given load_xml is True and the XML file exists, the function creates the BS object from the given
xml_path, otherwise it uses Shufersal's APIs to download the xml with the relevant content and saves it for
future use.
:param xml_path: A given path to an xml file
:param download_url: A string that may represent a link (described above)
:return: A BeautifulSoup object with xml content (either from a file or a link).
:param xml_path: A given path to an xml file to load/save the BS object from/to.
:param cat_id: A given id of a category from ShufersalCategories
:param store_id: A given id of a store
:param load_xml: A flag representing whether to try loading an existing XML file
:return: A BeautifulSoup object with xml content.
"""
if download_url:
xml_content = gzip.decompress(requests.get(download_url).content)
with open(xml_path, 'wb') as f_out:
f_out.write(xml_content)
return BeautifulSoup(xml_content, features='xml')
else:
with open(xml_path, 'rb') as f_in:
return BeautifulSoup(f_in, features='xml')
if load_xml and path.isfile(xml_path):
return create_bs_object_from_xml(xml_path)
return create_bs_object_from_link(xml_path, store_id, cat_id)
def create_bs_object_from_link(xml_path: str, store_id: int, cat_id: int) -> BeautifulSoup:
"""
This function creates a BeautifulSoup (BS) object by generating a download link from Shufersal's API.
:param xml_path: A given path to an xml file to load/save the BS object from/to.
:param store_id: A given id of a store
:param cat_id: A given id of a category from ShufersalCategories
:return: A BeautifulSoup object with xml content.
"""
download_url = get_download_url(store_id, cat_id)
xml_content = gzip.decompress(requests.get(download_url).content)
with open(xml_path, 'wb') as f_out:
f_out.write(xml_content)
return BeautifulSoup(xml_content, features='xml')
def create_bs_object_from_xml(xml_path: str) -> BeautifulSoup:
"""
This function creates a BeautifulSoup (BS) object from a given xml file.
:param xml_path: A given path to an xml file to load/save the BS object from/to.
:return: A BeautifulSoup object with xml content.
"""
with open(xml_path, 'rb') as f_in:
return BeautifulSoup(f_in, features='xml')
def create_items_dict(store_id: int, load_xml) -> Dict:
@@ -68,9 +97,8 @@ def create_items_dict(store_id: int, load_xml) -> Dict:
:param store_id: A given store id
:return: A dictionary where the firs
"""
down_url = "" if load_xml else get_download_url(store_id, ShufersalCategories.PricesFull.value)
xml_path = xml_file_gen(ShufersalCategories.PricesFull.name, store_id)
bs_prices = create_bs_object(xml_path, down_url)
bs_prices = create_bs_object(xml_path, ShufersalCategories.PricesFull.value, store_id, load_xml)
return {item.find('ItemCode').text: get_item_info(item) for item in bs_prices.find_all('Item')}
@@ -78,21 +106,22 @@ def get_item_info(item):
return str((item.find('ItemName').text, item.find('ManufacturerName').text, item.find('ItemPrice').text))
def get_products_prices(store_id: int, product_name: str, load_xml: bool, logger):
def get_products_prices(store_id: int, product_name: str, load_xml: bool):
"""
This function logs the products in a given Shufersal store which contains a given product_name.
This function prints the products in a given Shufersal store which contains a given product_name.
:param store_id: A given Shufersal store id
:param product_name: A given product name
:param load_xml: A boolean representing whether to load an existing xml or load an already saved one
"""
down_url = "" if load_xml else get_download_url(store_id, ShufersalCategories.PricesFull.value)
bs = create_bs_object(xml_file_gen(ShufersalCategories.PricesFull.name, store_id), down_url)
prods = [item for item in bs.find_all("Item") if product_name in item.find("ItemName").text]
xml_path = xml_file_gen(ShufersalCategories.PricesFull.name, store_id)
bs_prices = create_bs_object(xml_path, ShufersalCategories.PricesFull.value, store_id, load_xml)
prods = [item for item in bs_prices.find_all("Item") if product_name in item.find("ItemName").text]
prods.sort(key=lambda x: float(x.find("UnitOfMeasurePrice").text))
for prod in prods:
logger.info(get_item_info(prod))
print((prod.find('ItemName').text[::-1], prod.find('ManufacturerName').text[::-1],
prod.find('ItemPrice').text))
def is_valid_store_id(store_id: int):
return isinstance(store_id, int) and store_id >= 0
return isinstance(store_id, int) and store_id >= 0