From e1f43772b965b7550aef75ed75309787f2346bbb Mon Sep 17 00:00:00 2001 From: 1kamma Date: Sat, 17 Apr 2021 20:49:08 +0300 Subject: [PATCH 1/3] now excel is working --- main.py | 11 ++++++-- promotion.py | 67 +++++++++++++++++++++++++++++++----------------- requirements.txt | 2 ++ 3 files changed, 54 insertions(+), 26 deletions(-) diff --git a/main.py b/main.py index 5f2e93c..d18177d 100644 --- a/main.py +++ b/main.py @@ -78,12 +78,19 @@ if __name__ == '__main__': help='The name of the requested chain', choices=chain_dict.keys(), ) + parser.add_argument('--type', + choices=("excel", "csv"), + default='excel', + + help="a switch flag to set set the output file to a CSV file", + required=False, + ) args = parser.parse_args() - + file_type = '.xlsx' if not args.type or args.type == 'excel' else '.csv' chain: SupermarketChain = chain_dict[args.chain] if args.promos: arg_store_id = int(args.promos[0]) - main_latest_promos(store_id=arg_store_id, load_xml=args.load_prices, chain=chain, load_promos=args.load_promos) + main_latest_promos(store_id=arg_store_id, load_xml=args.load_prices, chain=chain, load_promos=args.load_promos, file_type=file_type) elif args.price: get_products_prices(chain, store_id=args.price[0], load_xml=args.load_prices, product_name=args.price[1]) diff --git a/promotion.py b/promotion.py index 1546507..e4c4eb6 100644 --- a/promotion.py +++ b/promotion.py @@ -11,6 +11,7 @@ from utils import ( create_bs_object, ) from supermarket_chain import SupermarketChain +import pandas as pd INVALID_OR_UNKNOWN_PROMOTION_FUNCTION = -1 @@ -44,7 +45,7 @@ class Promotion: def __init__(self, content: str, start_date: datetime, end_date: datetime, update_date: datetime, items: List[Item], promo_func: callable, club_id: ClubID, promotion_id: float, max_qty: int, - allow_multiple_discounts: bool, reward_type: RewardType): + allow_multiple_discounts: bool, reward_type: RewardType, type_file: str = "excel"): self.content: str = content self.start_date: datetime = start_date self.end_date: datetime = end_date @@ -56,6 +57,7 @@ class Promotion: self.allow_multiple_discounts = allow_multiple_discounts self.reward_type = reward_type self.promotion_id = promotion_id + self.type_file = type_file def repr_ltr(self): title = self.content @@ -75,28 +77,43 @@ def write_promotions_to_csv(promotions: List[Promotion], output_filename: str) - :param output_filename: A given file to write to """ encoding_file = "utf_8_sig" if sys.platform == "win32" else "utf_8" + columns = [ + 'תיאור מבצע', + 'הפריט המשתתף במבצע', + 'מחיר לפני מבצע', + 'מחיר אחרי מבצע', + 'אחוז הנחה', + 'סוג מבצע', + 'כמות מקס', + 'כפל הנחות', + 'המבצע החל', + 'זמן תחילת מבצע', + 'זמן סיום מבצע', + 'זמן עדכון אחרון', + 'יצרן', + 'ברקוד פריט', + 'סוג מבצע לפי תקנות שקיפות מחירים', + ] + if output_filename.endswith(".csv"): + with open(output_filename, mode='w', newline='', encoding=encoding_file) as f_out: + promos_writer = csv.writer(f_out) + promos_writer.writerow(columns) + for promo in promotions: + promos_writer.writerows([get_promotion_row_in_csv(promo, item) for item in promo.items]) + else: + with pd.ExcelWriter(output_filename, 'openpyxl', datetime_format='DD/MM/YYYY') as xl: + dt = pd.DataFrame(columns=columns) + for promo in promotions: + prms = dict_promos([get_promotion_row_in_csv(promo, item) for item in promo.items], columns) + if prms: + dt = dt.append(prms, True) + else: + continue + dt.to_excel(xl, index=False, sheet_name="name") - with open(output_filename, mode='w', newline='', encoding=encoding_file) as f_out: - promos_writer = csv.writer(f_out) - promos_writer.writerow([ - 'תיאור מבצע', - 'הפריט המשתתף במבצע', - 'מחיר לפני מבצע', - 'מחיר אחרי מבצע', - 'אחוז הנחה', - 'סוג מבצע', - 'כמות מקס', - 'כפל הנחות', - 'המבצע החל', - 'זמן תחילת מבצע', - 'זמן סיום מבצע', - 'זמן עדכון אחרון', - 'יצרן', - 'ברקוד פריט', - 'סוג מבצע לפי תקנות שקיפות מחירים', - ]) - for promo in promotions: - promos_writer.writerows([get_promotion_row_in_csv(promo, item) for item in promo.items]) + +def dict_promos(promos: list, columns: list): + return {col: p for prom in promos for col, p in zip(columns, prom)} def get_promotion_row_in_csv(promo: Promotion, item: Item): @@ -232,7 +249,8 @@ def is_valid_promo(end_time: datetime, description) -> bool: return not_expired and not in_promo_ignore_list -def main_latest_promos(store_id: int, load_xml: bool, chain: SupermarketChain, load_promos: bool) -> None: +def main_latest_promos( + store_id: int, load_xml: bool, chain: SupermarketChain, load_promos: bool, file_type: str) -> None: """ This function writes to a CSV file the available promotions in a store with a given id sorted by their update date. @@ -245,7 +263,8 @@ def main_latest_promos(store_id: int, load_xml: bool, chain: SupermarketChain, l promotions: List[Promotion] = get_available_promos(chain, store_id, load_xml, load_promos) promotions.sort(key=lambda promo: (max(promo.update_date.date(), promo.start_date.date()), promo.start_date - promo.end_date), reverse=True) - write_promotions_to_csv(promotions, f'results/{repr(type(chain))}_promos_{store_id}.csv') + ex_file = f'results/{repr(type(chain))}_promos_{store_id}{file_type}' + write_promotions_to_csv(promotions, ex_file) def get_promos_by_name(store_id: int, chain: SupermarketChain, promo_name: str, load_prices: bool, load_promos: bool): diff --git a/requirements.txt b/requirements.txt index 98bb28c..f4f1e04 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,3 +6,5 @@ lxml==4.6.1 requests==2.25.0 soupsieve==2.0.1 urllib3==1.26.2 +pandas<=1.1 +openpyxl<=3.0.1 \ No newline at end of file From 5caf3e495c9fbc21740d976b16038fe7885500f6 Mon Sep 17 00:00:00 2001 From: 1kamma Date: Sat, 17 Apr 2021 22:36:18 +0300 Subject: [PATCH 2/3] mistake in the requierments fixed --- requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index f4f1e04..e966d79 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,5 +6,5 @@ lxml==4.6.1 requests==2.25.0 soupsieve==2.0.1 urllib3==1.26.2 -pandas<=1.1 -openpyxl<=3.0.1 \ No newline at end of file +pandas>=1.1 +openpyxl>=3.0.1 \ No newline at end of file From 3ae8d0283604a406b8907bfa3016e5dffb76de2d Mon Sep 17 00:00:00 2001 From: 1kamma Date: Thu, 29 Apr 2021 17:55:21 +0300 Subject: [PATCH 3/3] correction, by comments and suggestions of Koren --- .gitignore | 1 + main.py | 8 ++++---- promotion.py | 36 ++++++++++++++++++++++-------------- 3 files changed, 27 insertions(+), 18 deletions(-) diff --git a/.gitignore b/.gitignore index bc3f4dc..b76abf6 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ raw_files/ results/ all_deals.py unknown_items.csv +desktop.ini \ No newline at end of file diff --git a/main.py b/main.py index d18177d..a0cf307 100644 --- a/main.py +++ b/main.py @@ -78,19 +78,19 @@ if __name__ == '__main__': help='The name of the requested chain', choices=chain_dict.keys(), ) - parser.add_argument('--type', + parser.add_argument('--filetype', choices=("excel", "csv"), default='excel', - help="a switch flag to set set the output file to a CSV file", + help="The extension/type of the promotions output file", required=False, ) args = parser.parse_args() - file_type = '.xlsx' if not args.type or args.type == 'excel' else '.csv' + filetype = 'xlsx' if not args.filetype or args.filetype == 'excel' else 'csv' chain: SupermarketChain = chain_dict[args.chain] if args.promos: arg_store_id = int(args.promos[0]) - main_latest_promos(store_id=arg_store_id, load_xml=args.load_prices, chain=chain, load_promos=args.load_promos, file_type=file_type) + main_latest_promos(store_id=arg_store_id, load_xml=args.load_prices, chain=chain, load_promos=args.load_promos, filetype=filetype) elif args.price: get_products_prices(chain, store_id=args.price[0], load_xml=args.load_prices, product_name=args.price[1]) diff --git a/promotion.py b/promotion.py index e4c4eb6..caeed43 100644 --- a/promotion.py +++ b/promotion.py @@ -1,18 +1,17 @@ +import csv import re +import sys from datetime import datetime from enum import Enum from typing import Dict, List, Union -import csv -import sys -from item import Item -from utils import ( - create_items_dict, - get_float_from_tag, xml_file_gen, - create_bs_object, -) -from supermarket_chain import SupermarketChain + import pandas as pd +from item import Item +from supermarket_chain import SupermarketChain +from utils import (create_bs_object, create_items_dict, get_float_from_tag, + xml_file_gen) + INVALID_OR_UNKNOWN_PROMOTION_FUNCTION = -1 PRODUCTS_TO_IGNORE = ['סירים', 'מגבות', 'מגבת', 'מפות', 'פסטיגל', 'ביגי'] @@ -45,7 +44,7 @@ class Promotion: def __init__(self, content: str, start_date: datetime, end_date: datetime, update_date: datetime, items: List[Item], promo_func: callable, club_id: ClubID, promotion_id: float, max_qty: int, - allow_multiple_discounts: bool, reward_type: RewardType, type_file: str = "excel"): + allow_multiple_discounts: bool, reward_type: RewardType, filetype: str = "excel"): self.content: str = content self.start_date: datetime = start_date self.end_date: datetime = end_date @@ -57,7 +56,7 @@ class Promotion: self.allow_multiple_discounts = allow_multiple_discounts self.reward_type = reward_type self.promotion_id = promotion_id - self.type_file = type_file + self.filetype = filetype def repr_ltr(self): title = self.content @@ -112,7 +111,16 @@ def write_promotions_to_csv(promotions: List[Promotion], output_filename: str) - dt.to_excel(xl, index=False, sheet_name="name") -def dict_promos(promos: list, columns: list): +def dict_promos(promos: list, columns: list) -> dict: + '''dict_promos creates a dictionary of every promo to its place in the xml + + :param promos: list of promos, separated by comma + :type promos: list + :param columns: columns of the exact place in the xml + :type columns: list + :return: dictionary of every promo and all the parameters its need + :rtype: dict + ''' return {col: p for prom in promos for col, p in zip(columns, prom)} @@ -250,7 +258,7 @@ def is_valid_promo(end_time: datetime, description) -> bool: def main_latest_promos( - store_id: int, load_xml: bool, chain: SupermarketChain, load_promos: bool, file_type: str) -> None: + store_id: int, load_xml: bool, chain: SupermarketChain, load_promos: bool, filetype: str) -> None: """ This function writes to a CSV file the available promotions in a store with a given id sorted by their update date. @@ -263,7 +271,7 @@ def main_latest_promos( promotions: List[Promotion] = get_available_promos(chain, store_id, load_xml, load_promos) promotions.sort(key=lambda promo: (max(promo.update_date.date(), promo.start_date.date()), promo.start_date - promo.end_date), reverse=True) - ex_file = f'results/{repr(type(chain))}_promos_{store_id}{file_type}' + ex_file = f'results/{repr(type(chain))}_promos_{store_id}.{filetype}' write_promotions_to_csv(promotions, ex_file)