Efficiency improvements were added

This commit is contained in:
KorenLazar
2020-11-29 10:09:51 +02:00
parent 18ea20f6fd
commit 26c1103571

61
main.py
View File

@@ -6,8 +6,9 @@ import gzip
from enum import Enum from enum import Enum
from argparse import ArgumentParser, ArgumentTypeError from argparse import ArgumentParser, ArgumentTypeError
import logging import logging
import time
PRODUCTS_TO_IGNORE = ['סירים', 'מגבות', 'צלחות', 'חיסול', 'כוסות', 'מאגים', 'מגבת', 'מפות'] PRODUCTS_TO_IGNORE = ['סירים', 'מגבות', 'צלחות', 'כוסות', 'מאגים', 'מגבת', 'מפות']
STORE_ID_NOT_FOUND = -1 STORE_ID_NOT_FOUND = -1
@@ -22,7 +23,7 @@ def xml_file_gen(category_name: str, store_id: int) -> str:
If the given store_id is invalid, it is ignored in the returned string. If the given store_id is invalid, it is ignored in the returned string.
:param store_id: A given store_id :param store_id: A given store_id
:param category_name: :param category_name: A given category name
:return: An xml filename :return: An xml filename
""" """
store_id_str = f"-{str(store_id)}" if is_valid_store_id(store_id) else "" store_id_str = f"-{str(store_id)}" if is_valid_store_id(store_id) else ""
@@ -35,15 +36,15 @@ class Promotion:
It contains only part of the available information in Shufersal's data. It contains only part of the available information in Shufersal's data.
""" """
def __init__(self, promo_content, promo_end_date, promo_update_date, items): def __init__(self, content: str, end_date: datetime, update_date: datetime, code_items: List[str]):
self.promo_content: str = promo_content self.content: str = content
self.promo_end_date: datetime = promo_end_date self.end_date: datetime = end_date
self.promo_update_date: datetime = promo_update_date self.update_date: datetime = update_date
self.code_items_in_promo: List[str] = items self.code_items: List[str] = code_items
def __str__(self): def __str__(self):
items = '\n'.join(str(item) for item in self.code_items_in_promo) items = '\n'.join(str(item) for item in self.code_items)
return f"*** {self.promo_content} until {self.promo_end_date.date()} ***\n{items}\n" return f"*** {self.content} until {self.end_date.date()} ***\n{items}\n"
def get_download_url(store_id: int, cat_id: int) -> str: def get_download_url(store_id: int, cat_id: int) -> str:
@@ -70,17 +71,17 @@ def create_bs_object(xml_path, download_url: str) -> BeautifulSoup:
otherwise it downloads the gzip from the download link and extract it. otherwise it downloads the gzip from the download link and extract it.
:param xml_path: A given path to an xml file :param xml_path: A given path to an xml file
:param download_url: A string that may represent a link (described above). :param download_url: A string that may represent a link (described above)
:return: :return: A BeautifulSoup object with xml content (either from a file or a link).
""" """
if download_url: if download_url:
xml_content = gzip.decompress(requests.get(download_url).content) xml_content = gzip.decompress(requests.get(download_url).content)
with open(xml_path, 'wb') as f_out: with open(xml_path, 'wb') as f_out:
f_out.write(xml_content) f_out.write(xml_content)
return BeautifulSoup(xml_content, features='xml')
else: else:
with open(xml_path, 'rb') as f_in: with open(xml_path, 'rb') as f_in:
xml_content = f_in.read() return BeautifulSoup(f_in, features='xml')
return BeautifulSoup(xml_content, features='xml')
def get_available_promos(store_id: int, load_xml: bool) -> List[Promotion]: def get_available_promos(store_id: int, load_xml: bool) -> List[Promotion]:
@@ -91,25 +92,31 @@ def get_available_promos(store_id: int, load_xml: bool) -> List[Promotion]:
:param load_xml: A boolean representing whether to load an existing xml or load an already saved one :param load_xml: A boolean representing whether to load an existing xml or load an already saved one
:return: Promotions that are not included in PRODUCTS_TO_IGNORE and are currently available :return: Promotions that are not included in PRODUCTS_TO_IGNORE and are currently available
""" """
start = time.time()
items_dict = create_items_dict(store_id, load_xml) items_dict = create_items_dict(store_id, load_xml)
down_url = get_download_url(store_id, ShufersalCategories.PromosFull.value) down_url = get_download_url(store_id, ShufersalCategories.PromosFull.value)
bs_promos = create_bs_object(xml_file_gen(ShufersalCategories.PromosFull.name, store_id), down_url) bs_promos = create_bs_object(xml_file_gen(ShufersalCategories.PromosFull.name, store_id), down_url)
promos_objs = list() promo_objs = list()
time_now = datetime.now() for cur_promo in bs_promos.find_all("Promotion"):
for promo in bs_promos.find_all("Promotion"): cur_promo = Promotion(
promo_end_date = datetime.strptime(promo.find('PromotionEndDate').text, '%Y-%m-%d') content=cur_promo.find('PromotionDescription').text,
if promo_end_date > time_now: # If promo not expired end_date=datetime.strptime(cur_promo.find('PromotionEndDate').text, '%Y-%m-%d'),
promo_content = promo.find('PromotionDescription').text update_date=datetime.strptime(cur_promo.find('PromotionUpdateDate').text, '%Y-%m-%d %H:%M'),
if not any(product in promo_content for product in PRODUCTS_TO_IGNORE): code_items=[items_dict.get(item.find('ItemCode').text) for item in cur_promo.find_all('Item')
promo_update_date = datetime.strptime(promo.find('PromotionUpdateDate').text, '%Y-%m-%d %H:%M') if items_dict.get(item.find('ItemCode').text)],
items_in_promo = [items_dict.get(item.find('ItemCode').text) for item in promo.find_all('Item') )
if items_dict.get(item.find('ItemCode').text)] if is_valid_promo(cur_promo):
if items_in_promo: promo_objs.append(cur_promo)
promos_objs.append(Promotion(promo_content, promo_end_date, promo_update_date, items_in_promo)) print(f"Finished getting available promos in {time.time() - start}")
return promo_objs
return promos_objs
def is_valid_promo(promo: Promotion):
not_expired = promo.end_date > datetime.now()
has_products = len(promo.code_items) > 0
return not_expired and has_products and not any(product in promo.content for product in PRODUCTS_TO_IGNORE)
def create_items_dict(store_id: int, load_xml) -> Dict: def create_items_dict(store_id: int, load_xml) -> Dict:
@@ -139,7 +146,7 @@ def main_latest_promos(store_id: int, load_xml: bool):
""" """
promotions = get_available_promos(store_id, load_xml) promotions = get_available_promos(store_id, load_xml)
promotions.sort(key=lambda promo: promo.promo_update_date, reverse=True) promotions.sort(key=lambda promo: promo.update_date, reverse=True)
logger.info('\n'.join(str(promotion) for promotion in promotions)) logger.info('\n'.join(str(promotion) for promotion in promotions))