Merge pull request #4 from korenLazar/export-promotions-to-xlsx-table
finished implementing exporting promotion to xlsx table and automatic…
This commit is contained in:
@@ -19,7 +19,6 @@ class BinaProjectWebClient:
|
||||
suffix = next(cur_json["FileNm"] for cur_json in jsons_files if f'-{store_id:03d}-20' in cur_json["FileNm"]
|
||||
and category.name.replace('s', '') in cur_json["FileNm"])
|
||||
down_url: str = '/'.join([hostname, self.path_prefix, "Download", suffix])
|
||||
print(down_url)
|
||||
return down_url
|
||||
|
||||
@property
|
||||
|
@@ -20,7 +20,6 @@ class CerberusWebClient:
|
||||
suffix: str = next(d['name'] for d in s_json['aaData'] if f'-{store_id:03d}-20' in d['name'])
|
||||
|
||||
download_url: str = hostname + "/file/d/" + suffix
|
||||
print(download_url)
|
||||
return download_url
|
||||
|
||||
@property
|
||||
|
@@ -24,7 +24,6 @@ class MahsaneiHashook(SupermarketChain):
|
||||
suffix: str = soup.find('a', href=lambda value: value and category.name.replace('s', '') in value
|
||||
and f'-{store_id:03d}-20' in value).attrs['href']
|
||||
down_url: str = prefix + suffix
|
||||
print(down_url)
|
||||
return down_url
|
||||
|
||||
@staticmethod
|
||||
|
@@ -14,5 +14,4 @@ class Shufersal(SupermarketChain):
|
||||
req_res: requests.Response = requests.get(url)
|
||||
soup: BeautifulSoup = BeautifulSoup(req_res.text, features='lxml')
|
||||
down_url: str = soup.find('a', text="לחץ להורדה")['href']
|
||||
print(down_url)
|
||||
return down_url
|
||||
|
58
main.py
58
main.py
@@ -1,9 +1,15 @@
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from argparse import ArgumentParser
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
import logging
|
||||
|
||||
from promotion import main_latest_promos, get_promos_by_name
|
||||
from store_utils import get_store_id
|
||||
from utils import RESULTS_DIRNAME, RAW_FILES_DIRNAME, get_products_prices
|
||||
from promotion import main_latest_promos, log_promos_by_name
|
||||
from store_utils import log_stores_ids
|
||||
from utils import RESULTS_DIRNAME, RAW_FILES_DIRNAME, VALID_PROMOTION_FILE_EXTENSIONS, log_products_prices, \
|
||||
valid_promotion_output_file
|
||||
from supermarket_chain import SupermarketChain
|
||||
from chains import (
|
||||
bareket,
|
||||
@@ -58,9 +64,6 @@ if __name__ == '__main__':
|
||||
metavar='city',
|
||||
nargs=1,
|
||||
)
|
||||
# parser.add_argument('--all_deals',
|
||||
# action='store_true',
|
||||
# )
|
||||
parser.add_argument('--load_prices',
|
||||
help='boolean flag representing whether to load an existing price XML file',
|
||||
action='store_true',
|
||||
@@ -78,21 +81,56 @@ if __name__ == '__main__':
|
||||
help='The name of the requested chain',
|
||||
choices=chain_dict.keys(),
|
||||
)
|
||||
parser.add_argument('--file_extension',
|
||||
help='The extension of the promotions output file',
|
||||
choices=VALID_PROMOTION_FILE_EXTENSIONS,
|
||||
default='.xlsx',
|
||||
)
|
||||
parser.add_argument('--output_filename',
|
||||
help='The path to write the promotions table to',
|
||||
type=valid_promotion_output_file,
|
||||
)
|
||||
parser.add_argument('--only_export_to_file',
|
||||
help='Boolean flag representing whether only export or also open the promotion output file',
|
||||
action='store_true',
|
||||
)
|
||||
parser.add_argument('--debug',
|
||||
help='Boolean flag representing whether to run in debug mode',
|
||||
action='store_true',
|
||||
)
|
||||
args = parser.parse_args()
|
||||
if args.debug:
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
else:
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
chain: SupermarketChain = chain_dict[args.chain]
|
||||
|
||||
if args.promos:
|
||||
arg_store_id = int(args.promos[0])
|
||||
main_latest_promos(store_id=arg_store_id, load_xml=args.load_prices, chain=chain, load_promos=args.load_promos)
|
||||
|
||||
if args.output_filename:
|
||||
output_filename = args.output_filename
|
||||
directory = os.path.dirname(output_filename)
|
||||
Path(directory).mkdir(parents=True, exist_ok=True)
|
||||
else:
|
||||
Path(RESULTS_DIRNAME).mkdir(exist_ok=True)
|
||||
output_filename = f'{RESULTS_DIRNAME}/{repr(type(chain))}_promos_{arg_store_id}{args.file_extension}'
|
||||
|
||||
main_latest_promos(store_id=arg_store_id, output_filename=output_filename, chain=chain,
|
||||
load_promos=args.load_promos, load_xml=args.load_prices)
|
||||
if not args.only_export_to_file:
|
||||
os.startfile(Path(output_filename))
|
||||
logging.debug(f'Process finished at: {datetime.now()}')
|
||||
|
||||
elif args.price:
|
||||
get_products_prices(chain, store_id=args.price[0], load_xml=args.load_prices, product_name=args.price[1])
|
||||
log_products_prices(chain, store_id=args.price[0], load_xml=args.load_prices, product_name=args.price[1])
|
||||
|
||||
elif args.find_store_id:
|
||||
arg_city = args.find_store_id[0]
|
||||
get_store_id(city=arg_city, load_xml=args.load_stores, chain=chain)
|
||||
log_stores_ids(city=arg_city, load_xml=args.load_stores, chain=chain)
|
||||
|
||||
elif args.find_promos_by_name:
|
||||
arg_store_id = int(args.find_promos_by_name[0])
|
||||
get_promos_by_name(store_id=arg_store_id, chain=chain, promo_name=args.find_promos_by_name[1],
|
||||
log_promos_by_name(store_id=arg_store_id, chain=chain, promo_name=args.find_promos_by_name[1],
|
||||
load_prices=args.load_prices, load_promos=args.load_promos)
|
||||
|
107
promotion.py
107
promotion.py
@@ -1,13 +1,18 @@
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Union
|
||||
import csv
|
||||
import sys
|
||||
import pandas as pd
|
||||
import xlsxwriter
|
||||
from item import Item
|
||||
from utils import (
|
||||
create_items_dict,
|
||||
get_float_from_tag, xml_file_gen,
|
||||
get_float_from_tag, log_message_and_time_if_debug, xml_file_gen,
|
||||
create_bs_object,
|
||||
)
|
||||
from supermarket_chain import SupermarketChain
|
||||
@@ -15,6 +20,23 @@ from supermarket_chain import SupermarketChain
|
||||
INVALID_OR_UNKNOWN_PROMOTION_FUNCTION = -1
|
||||
|
||||
PRODUCTS_TO_IGNORE = ['סירים', 'מגבות', 'מגבת', 'מפות', 'פסטיגל', 'ביגי']
|
||||
PROMOTIONS_TABLE_HEADERS = [
|
||||
'תיאור מבצע',
|
||||
'הפריט המשתתף במבצע',
|
||||
'מחיר לפני מבצע',
|
||||
'מחיר אחרי מבצע',
|
||||
'אחוז הנחה',
|
||||
'סוג מבצע',
|
||||
'כמות מקס',
|
||||
'כפל הנחות',
|
||||
'המבצע החל',
|
||||
'זמן תחילת מבצע',
|
||||
'זמן סיום מבצע',
|
||||
'זמן עדכון אחרון',
|
||||
'יצרן',
|
||||
'ברקוד פריט',
|
||||
'סוג מבצע לפי תקנות שקיפות מחירים',
|
||||
]
|
||||
|
||||
|
||||
class ClubID(Enum):
|
||||
@@ -69,42 +91,61 @@ class Promotion:
|
||||
|
||||
def write_promotions_to_csv(promotions: List[Promotion], output_filename: str) -> None:
|
||||
"""
|
||||
This function writes a given list of promotions to a given output file in a CSV format.
|
||||
This function writes a promotions table to a given CSV or XLSX output file.
|
||||
|
||||
:param promotions: A given list of promotions
|
||||
:param output_filename: A given file to write to
|
||||
"""
|
||||
log_message_and_time_if_debug('Writing promotions to output file')
|
||||
rows = [get_promotion_row_in_csv(promo, item) for promo in promotions for item in promo.items]
|
||||
if output_filename.endswith('.csv'):
|
||||
encoding_file = "utf_8_sig" if sys.platform == "win32" else "utf_8"
|
||||
|
||||
with open(output_filename, mode='w', newline='', encoding=encoding_file) as f_out:
|
||||
promos_writer = csv.writer(f_out)
|
||||
promos_writer.writerow([
|
||||
'תיאור מבצע',
|
||||
'הפריט המשתתף במבצע',
|
||||
'מחיר לפני מבצע',
|
||||
'מחיר אחרי מבצע',
|
||||
'אחוז הנחה',
|
||||
'סוג מבצע',
|
||||
'כמות מקס',
|
||||
'כפל הנחות',
|
||||
'המבצע החל',
|
||||
'זמן תחילת מבצע',
|
||||
'זמן סיום מבצע',
|
||||
'זמן עדכון אחרון',
|
||||
'יצרן',
|
||||
'ברקוד פריט',
|
||||
'סוג מבצע לפי תקנות שקיפות מחירים',
|
||||
])
|
||||
for promo in promotions:
|
||||
promos_writer.writerows([get_promotion_row_in_csv(promo, item) for item in promo.items])
|
||||
promos_writer.writerow(PROMOTIONS_TABLE_HEADERS)
|
||||
promos_writer.writerows(rows)
|
||||
|
||||
elif output_filename.endswith('.xlsx'):
|
||||
df = pd.DataFrame(rows, columns=PROMOTIONS_TABLE_HEADERS)
|
||||
workbook = xlsxwriter.Workbook(output_filename)
|
||||
worksheet1 = workbook.add_worksheet()
|
||||
worksheet1.right_to_left()
|
||||
date_time_format = workbook.add_format({'num_format': 'm/d/yy h:mm;@'})
|
||||
number_format = workbook.add_format({'num_format': '0.00'})
|
||||
percentage_format = workbook.add_format({'num_format': '0.00%'})
|
||||
worksheet1.set_column('A:A', width=35)
|
||||
worksheet1.set_column('B:B', width=25)
|
||||
worksheet1.set_column('C:D', cell_format=number_format)
|
||||
worksheet1.set_column('E:E', cell_format=percentage_format)
|
||||
worksheet1.set_column('J:L', width=15, cell_format=date_time_format)
|
||||
worksheet1.add_table(
|
||||
first_row=0,
|
||||
first_col=0,
|
||||
last_row=len(df),
|
||||
last_col=len(df.columns) - 1,
|
||||
options={
|
||||
"columns": [{"header": i} for i in PROMOTIONS_TABLE_HEADERS],
|
||||
"data": df.values.tolist(),
|
||||
'style': 'Table Style Medium 11',
|
||||
}, )
|
||||
workbook.close()
|
||||
|
||||
else:
|
||||
raise ValueError(f"The given output file has an invalid extension:\n{output_filename}")
|
||||
|
||||
|
||||
def get_promotion_row_in_csv(promo: Promotion, item: Item):
|
||||
"""
|
||||
This function returns a row in the promotions table.
|
||||
:param promo:
|
||||
:param item:
|
||||
:return:
|
||||
"""
|
||||
return [promo.content,
|
||||
item.name,
|
||||
item.price,
|
||||
f'{promo.promo_func(item):.3f}',
|
||||
f'{(item.price - promo.promo_func(item)) / item.price:.3%}',
|
||||
promo.promo_func(item),
|
||||
(item.price - promo.promo_func(item)) / item.price,
|
||||
promo.club_id.name.replace('_', ' '),
|
||||
promo.max_qty,
|
||||
promo.allow_multiple_discounts,
|
||||
@@ -127,10 +168,15 @@ def get_available_promos(chain: SupermarketChain, store_id: int, load_prices: bo
|
||||
:param load_prices: A boolean representing whether to load an existing xml or load an already saved one
|
||||
:return: Promotions that are not included in PRODUCTS_TO_IGNORE and are currently available
|
||||
"""
|
||||
log_message_and_time_if_debug('Importing prices XML file')
|
||||
items_dict: Dict[str, Item] = create_items_dict(chain, load_prices, store_id)
|
||||
|
||||
xml_path: str = xml_file_gen(chain, store_id, chain.XMLFilesCategory.PromosFull.name)
|
||||
|
||||
log_message_and_time_if_debug('Importing promotions XML file')
|
||||
bs_promos = create_bs_object(xml_path, chain, store_id, load_promos, chain.XMLFilesCategory.PromosFull)
|
||||
|
||||
log_message_and_time_if_debug('Creating promotions objects')
|
||||
promo_objs = list()
|
||||
for promo in bs_promos.find_all(chain.promotion_tag_name):
|
||||
promotion_id = promo.find(re.compile('PromotionId', re.IGNORECASE))
|
||||
@@ -141,7 +187,6 @@ def get_available_promos(chain: SupermarketChain, store_id: int, load_prices: bo
|
||||
promo_inst = create_new_promo_instance(chain, items_dict, promo, promotion_id)
|
||||
if promo_inst:
|
||||
promo_objs.append(promo_inst)
|
||||
|
||||
return promo_objs
|
||||
|
||||
|
||||
@@ -232,23 +277,25 @@ def is_valid_promo(end_time: datetime, description) -> bool:
|
||||
return not_expired and not in_promo_ignore_list
|
||||
|
||||
|
||||
def main_latest_promos(store_id: int, load_xml: bool, chain: SupermarketChain, load_promos: bool) -> None:
|
||||
def main_latest_promos(store_id: int, output_filename, chain: SupermarketChain, load_promos: bool,
|
||||
load_xml: bool) -> None:
|
||||
"""
|
||||
This function writes to a CSV file the available promotions in a store with a given id sorted by their update date.
|
||||
This function writes to a file the available promotions in a store with a given id sorted by their update date.
|
||||
|
||||
:param chain: The name of the requested supermarket chain
|
||||
:param store_id: A given store id
|
||||
:param load_xml: A boolean representing whether to load an existing prices xml file
|
||||
:param load_promos: A boolean representing whether to load an existing promos xml file
|
||||
:param output_filename: A path to write the promotions table
|
||||
"""
|
||||
|
||||
promotions: List[Promotion] = get_available_promos(chain, store_id, load_xml, load_promos)
|
||||
promotions.sort(key=lambda promo: (max(promo.update_date.date(), promo.start_date.date()), promo.start_date -
|
||||
promo.end_date), reverse=True)
|
||||
write_promotions_to_csv(promotions, f'results/{repr(type(chain))}_promos_{store_id}.csv')
|
||||
write_promotions_to_csv(promotions, output_filename)
|
||||
|
||||
|
||||
def get_promos_by_name(store_id: int, chain: SupermarketChain, promo_name: str, load_prices: bool, load_promos: bool):
|
||||
def log_promos_by_name(store_id: int, chain: SupermarketChain, promo_name: str, load_prices: bool, load_promos: bool):
|
||||
"""
|
||||
This function prints all promotions in a given chain and store_id containing a given promo_name.
|
||||
|
||||
@@ -261,7 +308,7 @@ def get_promos_by_name(store_id: int, chain: SupermarketChain, promo_name: str,
|
||||
promotions: List[Promotion] = get_available_promos(chain, store_id, load_prices, load_promos)
|
||||
for promo in promotions:
|
||||
if promo_name in promo.content:
|
||||
print(promo.repr_ltr())
|
||||
logging.info(promo.repr_ltr())
|
||||
|
||||
|
||||
# TODO: change to returning list of Items
|
||||
|
@@ -1,9 +1,11 @@
|
||||
import logging
|
||||
|
||||
from utils import xml_file_gen, create_bs_object
|
||||
from supermarket_chain import SupermarketChain
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
||||
def print_stores_ids(city: str, load_xml: bool, chain: SupermarketChain):
|
||||
def log_stores_ids(city: str, load_xml: bool, chain: SupermarketChain):
|
||||
"""
|
||||
This function prints the stores IDs of stores in a given city.
|
||||
The city must match its spelling in Shufersal's website (hence it should be in Hebrew).
|
||||
@@ -17,7 +19,7 @@ def print_stores_ids(city: str, load_xml: bool, chain: SupermarketChain):
|
||||
|
||||
for store in bs_stores.find_all("STORE"):
|
||||
if store.find("CITY").text == city:
|
||||
print((store.find("ADDRESS").text, store.find("STOREID").text, store.find("SUBCHAINNAME").text))
|
||||
logging.info((store.find("ADDRESS").text, store.find("STOREID").text, store.find("SUBCHAINNAME").text))
|
||||
|
||||
|
||||
def get_all_deals(chain):
|
||||
|
23
utils.py
23
utils.py
@@ -1,6 +1,9 @@
|
||||
import gzip
|
||||
import io
|
||||
import logging
|
||||
import zipfile
|
||||
from argparse import ArgumentTypeError
|
||||
from datetime import datetime
|
||||
from typing import AnyStr, Dict
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
@@ -11,6 +14,7 @@ from supermarket_chain import SupermarketChain
|
||||
|
||||
RESULTS_DIRNAME = "results"
|
||||
RAW_FILES_DIRNAME = "raw_files"
|
||||
VALID_PROMOTION_FILE_EXTENSIONS = [".csv", ".xlsx"]
|
||||
|
||||
|
||||
def xml_file_gen(chain: SupermarketChain, store_id: int, category_name: str) -> str:
|
||||
@@ -97,7 +101,7 @@ def create_items_dict(chain: SupermarketChain, load_xml, store_id: int) -> Dict[
|
||||
return {item.find('ItemCode').text: chain.get_item_info(item) for item in bs_prices.find_all(chain.item_tag_name)}
|
||||
|
||||
|
||||
def get_products_prices(chain: SupermarketChain, store_id: int, load_xml: bool, product_name: str) -> None:
|
||||
def log_products_prices(chain: SupermarketChain, store_id: int, load_xml: bool, product_name: str) -> None:
|
||||
"""
|
||||
This function prints the products in a given store which contains a given product_name.
|
||||
|
||||
@@ -111,7 +115,7 @@ def get_products_prices(chain: SupermarketChain, store_id: int, load_xml: bool,
|
||||
prods = [item for item in bs_prices.find_all("Item") if product_name in item.find("ItemName").text]
|
||||
prods.sort(key=lambda x: float(x.find("UnitOfMeasurePrice").text))
|
||||
for prod in prods:
|
||||
print(
|
||||
logging.info(
|
||||
(
|
||||
prod.find('ItemName').text[::-1],
|
||||
prod.find('ManufacturerName').text[::-1],
|
||||
@@ -123,3 +127,18 @@ def get_products_prices(chain: SupermarketChain, store_id: int, load_xml: bool,
|
||||
def get_float_from_tag(tag, int_tag) -> int:
|
||||
content = tag.find(int_tag)
|
||||
return float(content.text) if content else 0
|
||||
|
||||
|
||||
def is_valid_promotion_output_file(output_file: str):
|
||||
return any(output_file.endswith(extension) for extension in VALID_PROMOTION_FILE_EXTENSIONS)
|
||||
|
||||
|
||||
def valid_promotion_output_file(output_file: str):
|
||||
if not is_valid_promotion_output_file(output_file):
|
||||
raise ArgumentTypeError(f"Given output file is not a natural number:\n{output_file}")
|
||||
return output_file
|
||||
|
||||
|
||||
def log_message_and_time_if_debug(msg: str):
|
||||
logging.info(msg)
|
||||
logging.debug(datetime.now())
|
||||
|
Reference in New Issue
Block a user