finished implementing exporting promotion to xlsx table and automatically opening the xlsx file

This commit is contained in:
KorenLazar
2021-06-01 21:00:40 +03:00
parent ec505dba67
commit 27b45a4999
8 changed files with 152 additions and 50 deletions

View File

@@ -19,7 +19,6 @@ class BinaProjectWebClient:
suffix = next(cur_json["FileNm"] for cur_json in jsons_files if f'-{store_id:03d}-20' in cur_json["FileNm"] suffix = next(cur_json["FileNm"] for cur_json in jsons_files if f'-{store_id:03d}-20' in cur_json["FileNm"]
and category.name.replace('s', '') in cur_json["FileNm"]) and category.name.replace('s', '') in cur_json["FileNm"])
down_url: str = '/'.join([hostname, self.path_prefix, "Download", suffix]) down_url: str = '/'.join([hostname, self.path_prefix, "Download", suffix])
print(down_url)
return down_url return down_url
@property @property

View File

@@ -20,7 +20,6 @@ class CerberusWebClient:
suffix: str = next(d['name'] for d in s_json['aaData'] if f'-{store_id:03d}-20' in d['name']) suffix: str = next(d['name'] for d in s_json['aaData'] if f'-{store_id:03d}-20' in d['name'])
download_url: str = hostname + "/file/d/" + suffix download_url: str = hostname + "/file/d/" + suffix
print(download_url)
return download_url return download_url
@property @property

View File

@@ -24,7 +24,6 @@ class MahsaneiHashook(SupermarketChain):
suffix: str = soup.find('a', href=lambda value: value and category.name.replace('s', '') in value suffix: str = soup.find('a', href=lambda value: value and category.name.replace('s', '') in value
and f'-{store_id:03d}-20' in value).attrs['href'] and f'-{store_id:03d}-20' in value).attrs['href']
down_url: str = prefix + suffix down_url: str = prefix + suffix
print(down_url)
return down_url return down_url
@staticmethod @staticmethod

View File

@@ -14,5 +14,4 @@ class Shufersal(SupermarketChain):
req_res: requests.Response = requests.get(url) req_res: requests.Response = requests.get(url)
soup: BeautifulSoup = BeautifulSoup(req_res.text, features='lxml') soup: BeautifulSoup = BeautifulSoup(req_res.text, features='lxml')
down_url: str = soup.find('a', text="לחץ להורדה")['href'] down_url: str = soup.find('a', text="לחץ להורדה")['href']
print(down_url)
return down_url return down_url

58
main.py
View File

@@ -1,9 +1,15 @@
import os
import sys
import time
from argparse import ArgumentParser from argparse import ArgumentParser
from datetime import datetime
from pathlib import Path from pathlib import Path
import logging
from promotion import main_latest_promos, get_promos_by_name from promotion import main_latest_promos, log_promos_by_name
from store_utils import get_store_id from store_utils import log_stores_ids
from utils import RESULTS_DIRNAME, RAW_FILES_DIRNAME, get_products_prices from utils import RESULTS_DIRNAME, RAW_FILES_DIRNAME, VALID_PROMOTION_FILE_EXTENSIONS, log_products_prices, \
valid_promotion_output_file
from supermarket_chain import SupermarketChain from supermarket_chain import SupermarketChain
from chains import ( from chains import (
bareket, bareket,
@@ -58,9 +64,6 @@ if __name__ == '__main__':
metavar='city', metavar='city',
nargs=1, nargs=1,
) )
# parser.add_argument('--all_deals',
# action='store_true',
# )
parser.add_argument('--load_prices', parser.add_argument('--load_prices',
help='boolean flag representing whether to load an existing price XML file', help='boolean flag representing whether to load an existing price XML file',
action='store_true', action='store_true',
@@ -78,21 +81,56 @@ if __name__ == '__main__':
help='The name of the requested chain', help='The name of the requested chain',
choices=chain_dict.keys(), choices=chain_dict.keys(),
) )
parser.add_argument('--file_extension',
help='The extension of the promotions output file',
choices=VALID_PROMOTION_FILE_EXTENSIONS,
default='.xlsx',
)
parser.add_argument('--output_filename',
help='The path to write the promotions table to',
type=valid_promotion_output_file,
)
parser.add_argument('--only_export_to_file',
help='Boolean flag representing whether only export or also open the promotion output file',
action='store_true',
)
parser.add_argument('--debug',
help='Boolean flag representing whether to run in debug mode',
action='store_true',
)
args = parser.parse_args() args = parser.parse_args()
if args.debug:
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.INFO)
chain: SupermarketChain = chain_dict[args.chain] chain: SupermarketChain = chain_dict[args.chain]
if args.promos: if args.promos:
arg_store_id = int(args.promos[0]) arg_store_id = int(args.promos[0])
main_latest_promos(store_id=arg_store_id, load_xml=args.load_prices, chain=chain, load_promos=args.load_promos)
if args.output_filename:
output_filename = args.output_filename
directory = os.path.dirname(output_filename)
Path(directory).mkdir(parents=True, exist_ok=True)
else:
Path(RESULTS_DIRNAME).mkdir(exist_ok=True)
output_filename = f'{RESULTS_DIRNAME}/{repr(type(chain))}_promos_{arg_store_id}{args.file_extension}'
main_latest_promos(store_id=arg_store_id, output_filename=output_filename, chain=chain,
load_promos=args.load_promos, load_xml=args.load_prices)
if not args.only_export_to_file:
os.startfile(Path(output_filename))
logging.debug(f'Process finished at: {datetime.now()}')
elif args.price: elif args.price:
get_products_prices(chain, store_id=args.price[0], load_xml=args.load_prices, product_name=args.price[1]) log_products_prices(chain, store_id=args.price[0], load_xml=args.load_prices, product_name=args.price[1])
elif args.find_store_id: elif args.find_store_id:
arg_city = args.find_store_id[0] arg_city = args.find_store_id[0]
get_store_id(city=arg_city, load_xml=args.load_stores, chain=chain) log_stores_ids(city=arg_city, load_xml=args.load_stores, chain=chain)
elif args.find_promos_by_name: elif args.find_promos_by_name:
arg_store_id = int(args.find_promos_by_name[0]) arg_store_id = int(args.find_promos_by_name[0])
get_promos_by_name(store_id=arg_store_id, chain=chain, promo_name=args.find_promos_by_name[1], log_promos_by_name(store_id=arg_store_id, chain=chain, promo_name=args.find_promos_by_name[1],
load_prices=args.load_prices, load_promos=args.load_promos) load_prices=args.load_prices, load_promos=args.load_promos)

View File

@@ -1,13 +1,18 @@
import logging
import os
import re import re
from datetime import datetime from datetime import datetime
from enum import Enum from enum import Enum
from pathlib import Path
from typing import Dict, List, Union from typing import Dict, List, Union
import csv import csv
import sys import sys
import pandas as pd
import xlsxwriter
from item import Item from item import Item
from utils import ( from utils import (
create_items_dict, create_items_dict,
get_float_from_tag, xml_file_gen, get_float_from_tag, log_message_and_time_if_debug, xml_file_gen,
create_bs_object, create_bs_object,
) )
from supermarket_chain import SupermarketChain from supermarket_chain import SupermarketChain
@@ -15,6 +20,23 @@ from supermarket_chain import SupermarketChain
INVALID_OR_UNKNOWN_PROMOTION_FUNCTION = -1 INVALID_OR_UNKNOWN_PROMOTION_FUNCTION = -1
PRODUCTS_TO_IGNORE = ['סירים', 'מגבות', 'מגבת', 'מפות', 'פסטיגל', 'ביגי'] PRODUCTS_TO_IGNORE = ['סירים', 'מגבות', 'מגבת', 'מפות', 'פסטיגל', 'ביגי']
PROMOTIONS_TABLE_HEADERS = [
'תיאור מבצע',
'הפריט המשתתף במבצע',
'מחיר לפני מבצע',
'מחיר אחרי מבצע',
'אחוז הנחה',
'סוג מבצע',
'כמות מקס',
'כפל הנחות',
'המבצע החל',
'זמן תחילת מבצע',
'זמן סיום מבצע',
'זמן עדכון אחרון',
'יצרן',
'ברקוד פריט',
'סוג מבצע לפי תקנות שקיפות מחירים',
]
class ClubID(Enum): class ClubID(Enum):
@@ -69,42 +91,61 @@ class Promotion:
def write_promotions_to_csv(promotions: List[Promotion], output_filename: str) -> None: def write_promotions_to_csv(promotions: List[Promotion], output_filename: str) -> None:
""" """
This function writes a given list of promotions to a given output file in a CSV format. This function writes a promotions table to a given CSV or XLSX output file.
:param promotions: A given list of promotions :param promotions: A given list of promotions
:param output_filename: A given file to write to :param output_filename: A given file to write to
""" """
encoding_file = "utf_8_sig" if sys.platform == "win32" else "utf_8" log_message_and_time_if_debug('Writing promotions to output file')
rows = [get_promotion_row_in_csv(promo, item) for promo in promotions for item in promo.items]
if output_filename.endswith('.csv'):
encoding_file = "utf_8_sig" if sys.platform == "win32" else "utf_8"
with open(output_filename, mode='w', newline='', encoding=encoding_file) as f_out:
promos_writer = csv.writer(f_out)
promos_writer.writerow(PROMOTIONS_TABLE_HEADERS)
promos_writer.writerows(rows)
with open(output_filename, mode='w', newline='', encoding=encoding_file) as f_out: elif output_filename.endswith('.xlsx'):
promos_writer = csv.writer(f_out) df = pd.DataFrame(rows, columns=PROMOTIONS_TABLE_HEADERS)
promos_writer.writerow([ workbook = xlsxwriter.Workbook(output_filename)
'תיאור מבצע', worksheet1 = workbook.add_worksheet()
'הפריט המשתתף במבצע', worksheet1.right_to_left()
'מחיר לפני מבצע', date_time_format = workbook.add_format({'num_format': 'm/d/yy h:mm;@'})
'מחיר אחרי מבצע', number_format = workbook.add_format({'num_format': '0.00'})
'אחוז הנחה', percentage_format = workbook.add_format({'num_format': '0.00%'})
'סוג מבצע', worksheet1.set_column('A:A', width=35)
'כמות מקס', worksheet1.set_column('B:B', width=25)
'כפל הנחות', worksheet1.set_column('C:D', cell_format=number_format)
'המבצע החל', worksheet1.set_column('E:E', cell_format=percentage_format)
'זמן תחילת מבצע', worksheet1.set_column('J:L', width=15, cell_format=date_time_format)
'זמן סיום מבצע', worksheet1.add_table(
'זמן עדכון אחרון', first_row=0,
'יצרן', first_col=0,
'ברקוד פריט', last_row=len(df),
'סוג מבצע לפי תקנות שקיפות מחירים', last_col=len(df.columns) - 1,
]) options={
for promo in promotions: "columns": [{"header": i} for i in PROMOTIONS_TABLE_HEADERS],
promos_writer.writerows([get_promotion_row_in_csv(promo, item) for item in promo.items]) "data": df.values.tolist(),
'style': 'Table Style Medium 11',
}, )
workbook.close()
else:
raise ValueError(f"The given output file has an invalid extension:\n{output_filename}")
def get_promotion_row_in_csv(promo: Promotion, item: Item): def get_promotion_row_in_csv(promo: Promotion, item: Item):
"""
This function returns a row in the promotions table.
:param promo:
:param item:
:return:
"""
return [promo.content, return [promo.content,
item.name, item.name,
item.price, item.price,
f'{promo.promo_func(item):.3f}', promo.promo_func(item),
f'{(item.price - promo.promo_func(item)) / item.price:.3%}', (item.price - promo.promo_func(item)) / item.price,
promo.club_id.name.replace('_', ' '), promo.club_id.name.replace('_', ' '),
promo.max_qty, promo.max_qty,
promo.allow_multiple_discounts, promo.allow_multiple_discounts,
@@ -127,10 +168,15 @@ def get_available_promos(chain: SupermarketChain, store_id: int, load_prices: bo
:param load_prices: A boolean representing whether to load an existing xml or load an already saved one :param load_prices: A boolean representing whether to load an existing xml or load an already saved one
:return: Promotions that are not included in PRODUCTS_TO_IGNORE and are currently available :return: Promotions that are not included in PRODUCTS_TO_IGNORE and are currently available
""" """
log_message_and_time_if_debug('Importing prices XML file')
items_dict: Dict[str, Item] = create_items_dict(chain, load_prices, store_id) items_dict: Dict[str, Item] = create_items_dict(chain, load_prices, store_id)
xml_path: str = xml_file_gen(chain, store_id, chain.XMLFilesCategory.PromosFull.name) xml_path: str = xml_file_gen(chain, store_id, chain.XMLFilesCategory.PromosFull.name)
log_message_and_time_if_debug('Importing promotions XML file')
bs_promos = create_bs_object(xml_path, chain, store_id, load_promos, chain.XMLFilesCategory.PromosFull) bs_promos = create_bs_object(xml_path, chain, store_id, load_promos, chain.XMLFilesCategory.PromosFull)
log_message_and_time_if_debug('Creating promotions objects')
promo_objs = list() promo_objs = list()
for promo in bs_promos.find_all(chain.promotion_tag_name): for promo in bs_promos.find_all(chain.promotion_tag_name):
promotion_id = promo.find(re.compile('PromotionId', re.IGNORECASE)) promotion_id = promo.find(re.compile('PromotionId', re.IGNORECASE))
@@ -141,7 +187,6 @@ def get_available_promos(chain: SupermarketChain, store_id: int, load_prices: bo
promo_inst = create_new_promo_instance(chain, items_dict, promo, promotion_id) promo_inst = create_new_promo_instance(chain, items_dict, promo, promotion_id)
if promo_inst: if promo_inst:
promo_objs.append(promo_inst) promo_objs.append(promo_inst)
return promo_objs return promo_objs
@@ -232,23 +277,25 @@ def is_valid_promo(end_time: datetime, description) -> bool:
return not_expired and not in_promo_ignore_list return not_expired and not in_promo_ignore_list
def main_latest_promos(store_id: int, load_xml: bool, chain: SupermarketChain, load_promos: bool) -> None: def main_latest_promos(store_id: int, output_filename, chain: SupermarketChain, load_promos: bool,
load_xml: bool) -> None:
""" """
This function writes to a CSV file the available promotions in a store with a given id sorted by their update date. This function writes to a file the available promotions in a store with a given id sorted by their update date.
:param chain: The name of the requested supermarket chain :param chain: The name of the requested supermarket chain
:param store_id: A given store id :param store_id: A given store id
:param load_xml: A boolean representing whether to load an existing prices xml file :param load_xml: A boolean representing whether to load an existing prices xml file
:param load_promos: A boolean representing whether to load an existing promos xml file :param load_promos: A boolean representing whether to load an existing promos xml file
:param output_filename: A path to write the promotions table
""" """
promotions: List[Promotion] = get_available_promos(chain, store_id, load_xml, load_promos) promotions: List[Promotion] = get_available_promos(chain, store_id, load_xml, load_promos)
promotions.sort(key=lambda promo: (max(promo.update_date.date(), promo.start_date.date()), promo.start_date - promotions.sort(key=lambda promo: (max(promo.update_date.date(), promo.start_date.date()), promo.start_date -
promo.end_date), reverse=True) promo.end_date), reverse=True)
write_promotions_to_csv(promotions, f'results/{repr(type(chain))}_promos_{store_id}.csv') write_promotions_to_csv(promotions, output_filename)
def get_promos_by_name(store_id: int, chain: SupermarketChain, promo_name: str, load_prices: bool, load_promos: bool): def log_promos_by_name(store_id: int, chain: SupermarketChain, promo_name: str, load_prices: bool, load_promos: bool):
""" """
This function prints all promotions in a given chain and store_id containing a given promo_name. This function prints all promotions in a given chain and store_id containing a given promo_name.
@@ -261,7 +308,7 @@ def get_promos_by_name(store_id: int, chain: SupermarketChain, promo_name: str,
promotions: List[Promotion] = get_available_promos(chain, store_id, load_prices, load_promos) promotions: List[Promotion] = get_available_promos(chain, store_id, load_prices, load_promos)
for promo in promotions: for promo in promotions:
if promo_name in promo.content: if promo_name in promo.content:
print(promo.repr_ltr()) logging.info(promo.repr_ltr())
# TODO: change to returning list of Items # TODO: change to returning list of Items

View File

@@ -1,9 +1,11 @@
import logging
from utils import xml_file_gen, create_bs_object from utils import xml_file_gen, create_bs_object
from supermarket_chain import SupermarketChain from supermarket_chain import SupermarketChain
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
def print_stores_ids(city: str, load_xml: bool, chain: SupermarketChain): def log_stores_ids(city: str, load_xml: bool, chain: SupermarketChain):
""" """
This function prints the stores IDs of stores in a given city. This function prints the stores IDs of stores in a given city.
The city must match its spelling in Shufersal's website (hence it should be in Hebrew). The city must match its spelling in Shufersal's website (hence it should be in Hebrew).
@@ -17,7 +19,7 @@ def print_stores_ids(city: str, load_xml: bool, chain: SupermarketChain):
for store in bs_stores.find_all("STORE"): for store in bs_stores.find_all("STORE"):
if store.find("CITY").text == city: if store.find("CITY").text == city:
print((store.find("ADDRESS").text, store.find("STOREID").text, store.find("SUBCHAINNAME").text)) logging.info((store.find("ADDRESS").text, store.find("STOREID").text, store.find("SUBCHAINNAME").text))
def get_all_deals(chain): def get_all_deals(chain):

View File

@@ -1,6 +1,9 @@
import gzip import gzip
import io import io
import logging
import zipfile import zipfile
from argparse import ArgumentTypeError
from datetime import datetime
from typing import AnyStr, Dict from typing import AnyStr, Dict
import requests import requests
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
@@ -11,6 +14,7 @@ from supermarket_chain import SupermarketChain
RESULTS_DIRNAME = "results" RESULTS_DIRNAME = "results"
RAW_FILES_DIRNAME = "raw_files" RAW_FILES_DIRNAME = "raw_files"
VALID_PROMOTION_FILE_EXTENSIONS = [".csv", ".xlsx"]
def xml_file_gen(chain: SupermarketChain, store_id: int, category_name: str) -> str: def xml_file_gen(chain: SupermarketChain, store_id: int, category_name: str) -> str:
@@ -97,7 +101,7 @@ def create_items_dict(chain: SupermarketChain, load_xml, store_id: int) -> Dict[
return {item.find('ItemCode').text: chain.get_item_info(item) for item in bs_prices.find_all(chain.item_tag_name)} return {item.find('ItemCode').text: chain.get_item_info(item) for item in bs_prices.find_all(chain.item_tag_name)}
def get_products_prices(chain: SupermarketChain, store_id: int, load_xml: bool, product_name: str) -> None: def log_products_prices(chain: SupermarketChain, store_id: int, load_xml: bool, product_name: str) -> None:
""" """
This function prints the products in a given store which contains a given product_name. This function prints the products in a given store which contains a given product_name.
@@ -111,7 +115,7 @@ def get_products_prices(chain: SupermarketChain, store_id: int, load_xml: bool,
prods = [item for item in bs_prices.find_all("Item") if product_name in item.find("ItemName").text] prods = [item for item in bs_prices.find_all("Item") if product_name in item.find("ItemName").text]
prods.sort(key=lambda x: float(x.find("UnitOfMeasurePrice").text)) prods.sort(key=lambda x: float(x.find("UnitOfMeasurePrice").text))
for prod in prods: for prod in prods:
print( logging.info(
( (
prod.find('ItemName').text[::-1], prod.find('ItemName').text[::-1],
prod.find('ManufacturerName').text[::-1], prod.find('ManufacturerName').text[::-1],
@@ -123,3 +127,18 @@ def get_products_prices(chain: SupermarketChain, store_id: int, load_xml: bool,
def get_float_from_tag(tag, int_tag) -> int: def get_float_from_tag(tag, int_tag) -> int:
content = tag.find(int_tag) content = tag.find(int_tag)
return float(content.text) if content else 0 return float(content.text) if content else 0
def is_valid_promotion_output_file(output_file: str):
return any(output_file.endswith(extension) for extension in VALID_PROMOTION_FILE_EXTENSIONS)
def valid_promotion_output_file(output_file: str):
if not is_valid_promotion_output_file(output_file):
raise ArgumentTypeError(f"Given output file is not a natural number:\n{output_file}")
return output_file
def log_message_and_time_if_debug(msg: str):
logging.info(msg)
logging.debug(datetime.now())