From 27b45a49997d61e978c57fb5cf33df2813ee1086 Mon Sep 17 00:00:00 2001 From: KorenLazar Date: Tue, 1 Jun 2021 21:00:40 +0300 Subject: [PATCH] finished implementing exporting promotion to xlsx table and automatically opening the xlsx file --- chains/binaproject_web_client.py | 1 - chains/cerberus_web_client.py | 1 - chains/mahsaneiHashook.py | 1 - chains/shufersal.py | 1 - main.py | 58 +++++++++++++--- promotion.py | 111 ++++++++++++++++++++++--------- store_utils.py | 6 +- utils.py | 23 ++++++- 8 files changed, 152 insertions(+), 50 deletions(-) diff --git a/chains/binaproject_web_client.py b/chains/binaproject_web_client.py index 7cc72de..8f5ecb7 100644 --- a/chains/binaproject_web_client.py +++ b/chains/binaproject_web_client.py @@ -19,7 +19,6 @@ class BinaProjectWebClient: suffix = next(cur_json["FileNm"] for cur_json in jsons_files if f'-{store_id:03d}-20' in cur_json["FileNm"] and category.name.replace('s', '') in cur_json["FileNm"]) down_url: str = '/'.join([hostname, self.path_prefix, "Download", suffix]) - print(down_url) return down_url @property diff --git a/chains/cerberus_web_client.py b/chains/cerberus_web_client.py index 38dc275..ae8ef98 100644 --- a/chains/cerberus_web_client.py +++ b/chains/cerberus_web_client.py @@ -20,7 +20,6 @@ class CerberusWebClient: suffix: str = next(d['name'] for d in s_json['aaData'] if f'-{store_id:03d}-20' in d['name']) download_url: str = hostname + "/file/d/" + suffix - print(download_url) return download_url @property diff --git a/chains/mahsaneiHashook.py b/chains/mahsaneiHashook.py index b14f9ce..9cb7c5b 100644 --- a/chains/mahsaneiHashook.py +++ b/chains/mahsaneiHashook.py @@ -24,7 +24,6 @@ class MahsaneiHashook(SupermarketChain): suffix: str = soup.find('a', href=lambda value: value and category.name.replace('s', '') in value and f'-{store_id:03d}-20' in value).attrs['href'] down_url: str = prefix + suffix - print(down_url) return down_url @staticmethod diff --git a/chains/shufersal.py b/chains/shufersal.py index 0f6e8a8..1024ec8 100644 --- a/chains/shufersal.py +++ b/chains/shufersal.py @@ -14,5 +14,4 @@ class Shufersal(SupermarketChain): req_res: requests.Response = requests.get(url) soup: BeautifulSoup = BeautifulSoup(req_res.text, features='lxml') down_url: str = soup.find('a', text="לחץ להורדה")['href'] - print(down_url) return down_url diff --git a/main.py b/main.py index 5f2e93c..3d788a7 100644 --- a/main.py +++ b/main.py @@ -1,9 +1,15 @@ +import os +import sys +import time from argparse import ArgumentParser +from datetime import datetime from pathlib import Path +import logging -from promotion import main_latest_promos, get_promos_by_name -from store_utils import get_store_id -from utils import RESULTS_DIRNAME, RAW_FILES_DIRNAME, get_products_prices +from promotion import main_latest_promos, log_promos_by_name +from store_utils import log_stores_ids +from utils import RESULTS_DIRNAME, RAW_FILES_DIRNAME, VALID_PROMOTION_FILE_EXTENSIONS, log_products_prices, \ + valid_promotion_output_file from supermarket_chain import SupermarketChain from chains import ( bareket, @@ -58,9 +64,6 @@ if __name__ == '__main__': metavar='city', nargs=1, ) - # parser.add_argument('--all_deals', - # action='store_true', - # ) parser.add_argument('--load_prices', help='boolean flag representing whether to load an existing price XML file', action='store_true', @@ -78,21 +81,56 @@ if __name__ == '__main__': help='The name of the requested chain', choices=chain_dict.keys(), ) + parser.add_argument('--file_extension', + help='The extension of the promotions output file', + choices=VALID_PROMOTION_FILE_EXTENSIONS, + default='.xlsx', + ) + parser.add_argument('--output_filename', + help='The path to write the promotions table to', + type=valid_promotion_output_file, + ) + parser.add_argument('--only_export_to_file', + help='Boolean flag representing whether only export or also open the promotion output file', + action='store_true', + ) + parser.add_argument('--debug', + help='Boolean flag representing whether to run in debug mode', + action='store_true', + ) args = parser.parse_args() + if args.debug: + logging.basicConfig(level=logging.DEBUG) + else: + logging.basicConfig(level=logging.INFO) chain: SupermarketChain = chain_dict[args.chain] + if args.promos: arg_store_id = int(args.promos[0]) - main_latest_promos(store_id=arg_store_id, load_xml=args.load_prices, chain=chain, load_promos=args.load_promos) + + if args.output_filename: + output_filename = args.output_filename + directory = os.path.dirname(output_filename) + Path(directory).mkdir(parents=True, exist_ok=True) + else: + Path(RESULTS_DIRNAME).mkdir(exist_ok=True) + output_filename = f'{RESULTS_DIRNAME}/{repr(type(chain))}_promos_{arg_store_id}{args.file_extension}' + + main_latest_promos(store_id=arg_store_id, output_filename=output_filename, chain=chain, + load_promos=args.load_promos, load_xml=args.load_prices) + if not args.only_export_to_file: + os.startfile(Path(output_filename)) + logging.debug(f'Process finished at: {datetime.now()}') elif args.price: - get_products_prices(chain, store_id=args.price[0], load_xml=args.load_prices, product_name=args.price[1]) + log_products_prices(chain, store_id=args.price[0], load_xml=args.load_prices, product_name=args.price[1]) elif args.find_store_id: arg_city = args.find_store_id[0] - get_store_id(city=arg_city, load_xml=args.load_stores, chain=chain) + log_stores_ids(city=arg_city, load_xml=args.load_stores, chain=chain) elif args.find_promos_by_name: arg_store_id = int(args.find_promos_by_name[0]) - get_promos_by_name(store_id=arg_store_id, chain=chain, promo_name=args.find_promos_by_name[1], + log_promos_by_name(store_id=arg_store_id, chain=chain, promo_name=args.find_promos_by_name[1], load_prices=args.load_prices, load_promos=args.load_promos) diff --git a/promotion.py b/promotion.py index 1546507..3140dc4 100644 --- a/promotion.py +++ b/promotion.py @@ -1,13 +1,18 @@ +import logging +import os import re from datetime import datetime from enum import Enum +from pathlib import Path from typing import Dict, List, Union import csv import sys +import pandas as pd +import xlsxwriter from item import Item from utils import ( create_items_dict, - get_float_from_tag, xml_file_gen, + get_float_from_tag, log_message_and_time_if_debug, xml_file_gen, create_bs_object, ) from supermarket_chain import SupermarketChain @@ -15,6 +20,23 @@ from supermarket_chain import SupermarketChain INVALID_OR_UNKNOWN_PROMOTION_FUNCTION = -1 PRODUCTS_TO_IGNORE = ['סירים', 'מגבות', 'מגבת', 'מפות', 'פסטיגל', 'ביגי'] +PROMOTIONS_TABLE_HEADERS = [ + 'תיאור מבצע', + 'הפריט המשתתף במבצע', + 'מחיר לפני מבצע', + 'מחיר אחרי מבצע', + 'אחוז הנחה', + 'סוג מבצע', + 'כמות מקס', + 'כפל הנחות', + 'המבצע החל', + 'זמן תחילת מבצע', + 'זמן סיום מבצע', + 'זמן עדכון אחרון', + 'יצרן', + 'ברקוד פריט', + 'סוג מבצע לפי תקנות שקיפות מחירים', +] class ClubID(Enum): @@ -69,42 +91,61 @@ class Promotion: def write_promotions_to_csv(promotions: List[Promotion], output_filename: str) -> None: """ - This function writes a given list of promotions to a given output file in a CSV format. + This function writes a promotions table to a given CSV or XLSX output file. :param promotions: A given list of promotions :param output_filename: A given file to write to """ - encoding_file = "utf_8_sig" if sys.platform == "win32" else "utf_8" + log_message_and_time_if_debug('Writing promotions to output file') + rows = [get_promotion_row_in_csv(promo, item) for promo in promotions for item in promo.items] + if output_filename.endswith('.csv'): + encoding_file = "utf_8_sig" if sys.platform == "win32" else "utf_8" + with open(output_filename, mode='w', newline='', encoding=encoding_file) as f_out: + promos_writer = csv.writer(f_out) + promos_writer.writerow(PROMOTIONS_TABLE_HEADERS) + promos_writer.writerows(rows) - with open(output_filename, mode='w', newline='', encoding=encoding_file) as f_out: - promos_writer = csv.writer(f_out) - promos_writer.writerow([ - 'תיאור מבצע', - 'הפריט המשתתף במבצע', - 'מחיר לפני מבצע', - 'מחיר אחרי מבצע', - 'אחוז הנחה', - 'סוג מבצע', - 'כמות מקס', - 'כפל הנחות', - 'המבצע החל', - 'זמן תחילת מבצע', - 'זמן סיום מבצע', - 'זמן עדכון אחרון', - 'יצרן', - 'ברקוד פריט', - 'סוג מבצע לפי תקנות שקיפות מחירים', - ]) - for promo in promotions: - promos_writer.writerows([get_promotion_row_in_csv(promo, item) for item in promo.items]) + elif output_filename.endswith('.xlsx'): + df = pd.DataFrame(rows, columns=PROMOTIONS_TABLE_HEADERS) + workbook = xlsxwriter.Workbook(output_filename) + worksheet1 = workbook.add_worksheet() + worksheet1.right_to_left() + date_time_format = workbook.add_format({'num_format': 'm/d/yy h:mm;@'}) + number_format = workbook.add_format({'num_format': '0.00'}) + percentage_format = workbook.add_format({'num_format': '0.00%'}) + worksheet1.set_column('A:A', width=35) + worksheet1.set_column('B:B', width=25) + worksheet1.set_column('C:D', cell_format=number_format) + worksheet1.set_column('E:E', cell_format=percentage_format) + worksheet1.set_column('J:L', width=15, cell_format=date_time_format) + worksheet1.add_table( + first_row=0, + first_col=0, + last_row=len(df), + last_col=len(df.columns) - 1, + options={ + "columns": [{"header": i} for i in PROMOTIONS_TABLE_HEADERS], + "data": df.values.tolist(), + 'style': 'Table Style Medium 11', + }, ) + workbook.close() + + else: + raise ValueError(f"The given output file has an invalid extension:\n{output_filename}") def get_promotion_row_in_csv(promo: Promotion, item: Item): + """ + This function returns a row in the promotions table. + :param promo: + :param item: + :return: + """ return [promo.content, item.name, item.price, - f'{promo.promo_func(item):.3f}', - f'{(item.price - promo.promo_func(item)) / item.price:.3%}', + promo.promo_func(item), + (item.price - promo.promo_func(item)) / item.price, promo.club_id.name.replace('_', ' '), promo.max_qty, promo.allow_multiple_discounts, @@ -127,10 +168,15 @@ def get_available_promos(chain: SupermarketChain, store_id: int, load_prices: bo :param load_prices: A boolean representing whether to load an existing xml or load an already saved one :return: Promotions that are not included in PRODUCTS_TO_IGNORE and are currently available """ + log_message_and_time_if_debug('Importing prices XML file') items_dict: Dict[str, Item] = create_items_dict(chain, load_prices, store_id) + xml_path: str = xml_file_gen(chain, store_id, chain.XMLFilesCategory.PromosFull.name) + + log_message_and_time_if_debug('Importing promotions XML file') bs_promos = create_bs_object(xml_path, chain, store_id, load_promos, chain.XMLFilesCategory.PromosFull) + log_message_and_time_if_debug('Creating promotions objects') promo_objs = list() for promo in bs_promos.find_all(chain.promotion_tag_name): promotion_id = promo.find(re.compile('PromotionId', re.IGNORECASE)) @@ -141,7 +187,6 @@ def get_available_promos(chain: SupermarketChain, store_id: int, load_prices: bo promo_inst = create_new_promo_instance(chain, items_dict, promo, promotion_id) if promo_inst: promo_objs.append(promo_inst) - return promo_objs @@ -232,23 +277,25 @@ def is_valid_promo(end_time: datetime, description) -> bool: return not_expired and not in_promo_ignore_list -def main_latest_promos(store_id: int, load_xml: bool, chain: SupermarketChain, load_promos: bool) -> None: +def main_latest_promos(store_id: int, output_filename, chain: SupermarketChain, load_promos: bool, + load_xml: bool) -> None: """ - This function writes to a CSV file the available promotions in a store with a given id sorted by their update date. + This function writes to a file the available promotions in a store with a given id sorted by their update date. :param chain: The name of the requested supermarket chain :param store_id: A given store id :param load_xml: A boolean representing whether to load an existing prices xml file :param load_promos: A boolean representing whether to load an existing promos xml file + :param output_filename: A path to write the promotions table """ promotions: List[Promotion] = get_available_promos(chain, store_id, load_xml, load_promos) promotions.sort(key=lambda promo: (max(promo.update_date.date(), promo.start_date.date()), promo.start_date - promo.end_date), reverse=True) - write_promotions_to_csv(promotions, f'results/{repr(type(chain))}_promos_{store_id}.csv') + write_promotions_to_csv(promotions, output_filename) -def get_promos_by_name(store_id: int, chain: SupermarketChain, promo_name: str, load_prices: bool, load_promos: bool): +def log_promos_by_name(store_id: int, chain: SupermarketChain, promo_name: str, load_prices: bool, load_promos: bool): """ This function prints all promotions in a given chain and store_id containing a given promo_name. @@ -261,7 +308,7 @@ def get_promos_by_name(store_id: int, chain: SupermarketChain, promo_name: str, promotions: List[Promotion] = get_available_promos(chain, store_id, load_prices, load_promos) for promo in promotions: if promo_name in promo.content: - print(promo.repr_ltr()) + logging.info(promo.repr_ltr()) # TODO: change to returning list of Items diff --git a/store_utils.py b/store_utils.py index b972ffa..4b49159 100644 --- a/store_utils.py +++ b/store_utils.py @@ -1,9 +1,11 @@ +import logging + from utils import xml_file_gen, create_bs_object from supermarket_chain import SupermarketChain from bs4 import BeautifulSoup -def print_stores_ids(city: str, load_xml: bool, chain: SupermarketChain): +def log_stores_ids(city: str, load_xml: bool, chain: SupermarketChain): """ This function prints the stores IDs of stores in a given city. The city must match its spelling in Shufersal's website (hence it should be in Hebrew). @@ -17,7 +19,7 @@ def print_stores_ids(city: str, load_xml: bool, chain: SupermarketChain): for store in bs_stores.find_all("STORE"): if store.find("CITY").text == city: - print((store.find("ADDRESS").text, store.find("STOREID").text, store.find("SUBCHAINNAME").text)) + logging.info((store.find("ADDRESS").text, store.find("STOREID").text, store.find("SUBCHAINNAME").text)) def get_all_deals(chain): diff --git a/utils.py b/utils.py index a2f788d..7d2d997 100644 --- a/utils.py +++ b/utils.py @@ -1,6 +1,9 @@ import gzip import io +import logging import zipfile +from argparse import ArgumentTypeError +from datetime import datetime from typing import AnyStr, Dict import requests from bs4 import BeautifulSoup @@ -11,6 +14,7 @@ from supermarket_chain import SupermarketChain RESULTS_DIRNAME = "results" RAW_FILES_DIRNAME = "raw_files" +VALID_PROMOTION_FILE_EXTENSIONS = [".csv", ".xlsx"] def xml_file_gen(chain: SupermarketChain, store_id: int, category_name: str) -> str: @@ -97,7 +101,7 @@ def create_items_dict(chain: SupermarketChain, load_xml, store_id: int) -> Dict[ return {item.find('ItemCode').text: chain.get_item_info(item) for item in bs_prices.find_all(chain.item_tag_name)} -def get_products_prices(chain: SupermarketChain, store_id: int, load_xml: bool, product_name: str) -> None: +def log_products_prices(chain: SupermarketChain, store_id: int, load_xml: bool, product_name: str) -> None: """ This function prints the products in a given store which contains a given product_name. @@ -111,7 +115,7 @@ def get_products_prices(chain: SupermarketChain, store_id: int, load_xml: bool, prods = [item for item in bs_prices.find_all("Item") if product_name in item.find("ItemName").text] prods.sort(key=lambda x: float(x.find("UnitOfMeasurePrice").text)) for prod in prods: - print( + logging.info( ( prod.find('ItemName').text[::-1], prod.find('ManufacturerName').text[::-1], @@ -123,3 +127,18 @@ def get_products_prices(chain: SupermarketChain, store_id: int, load_xml: bool, def get_float_from_tag(tag, int_tag) -> int: content = tag.find(int_tag) return float(content.text) if content else 0 + + +def is_valid_promotion_output_file(output_file: str): + return any(output_file.endswith(extension) for extension in VALID_PROMOTION_FILE_EXTENSIONS) + + +def valid_promotion_output_file(output_file: str): + if not is_valid_promotion_output_file(output_file): + raise ArgumentTypeError(f"Given output file is not a natural number:\n{output_file}") + return output_file + + +def log_message_and_time_if_debug(msg: str): + logging.info(msg) + logging.debug(datetime.now())