From 27b45a49997d61e978c57fb5cf33df2813ee1086 Mon Sep 17 00:00:00 2001
From: KorenLazar <koren507@cs.huji.ac.il>
Date: Tue, 1 Jun 2021 21:00:40 +0300
Subject: [PATCH] finished implementing exporting promotion to xlsx table and
 automatically opening the xlsx file

---
 chains/binaproject_web_client.py |   1 -
 chains/cerberus_web_client.py    |   1 -
 chains/mahsaneiHashook.py        |   1 -
 chains/shufersal.py              |   1 -
 main.py                          |  58 +++++++++++++---
 promotion.py                     | 111 ++++++++++++++++++++++---------
 store_utils.py                   |   6 +-
 utils.py                         |  23 ++++++-
 8 files changed, 152 insertions(+), 50 deletions(-)

diff --git a/chains/binaproject_web_client.py b/chains/binaproject_web_client.py
index 7cc72de..8f5ecb7 100644
--- a/chains/binaproject_web_client.py
+++ b/chains/binaproject_web_client.py
@@ -19,7 +19,6 @@ class BinaProjectWebClient:
         suffix = next(cur_json["FileNm"] for cur_json in jsons_files if f'-{store_id:03d}-20' in cur_json["FileNm"]
                       and category.name.replace('s', '') in cur_json["FileNm"])
         down_url: str = '/'.join([hostname, self.path_prefix, "Download", suffix])
-        print(down_url)
         return down_url
 
     @property
diff --git a/chains/cerberus_web_client.py b/chains/cerberus_web_client.py
index 38dc275..ae8ef98 100644
--- a/chains/cerberus_web_client.py
+++ b/chains/cerberus_web_client.py
@@ -20,7 +20,6 @@ class CerberusWebClient:
         suffix: str = next(d['name'] for d in s_json['aaData'] if f'-{store_id:03d}-20' in d['name'])
 
         download_url: str = hostname + "/file/d/" + suffix
-        print(download_url)
         return download_url
 
     @property
diff --git a/chains/mahsaneiHashook.py b/chains/mahsaneiHashook.py
index b14f9ce..9cb7c5b 100644
--- a/chains/mahsaneiHashook.py
+++ b/chains/mahsaneiHashook.py
@@ -24,7 +24,6 @@ class MahsaneiHashook(SupermarketChain):
         suffix: str = soup.find('a', href=lambda value: value and category.name.replace('s', '') in value
                                 and f'-{store_id:03d}-20' in value).attrs['href']
         down_url: str = prefix + suffix
-        print(down_url)
         return down_url
 
     @staticmethod
diff --git a/chains/shufersal.py b/chains/shufersal.py
index 0f6e8a8..1024ec8 100644
--- a/chains/shufersal.py
+++ b/chains/shufersal.py
@@ -14,5 +14,4 @@ class Shufersal(SupermarketChain):
         req_res: requests.Response = requests.get(url)
         soup: BeautifulSoup = BeautifulSoup(req_res.text, features='lxml')
         down_url: str = soup.find('a', text="לחץ להורדה")['href']
-        print(down_url)
         return down_url
diff --git a/main.py b/main.py
index 5f2e93c..3d788a7 100644
--- a/main.py
+++ b/main.py
@@ -1,9 +1,15 @@
+import os
+import sys
+import time
 from argparse import ArgumentParser
+from datetime import datetime
 from pathlib import Path
+import logging
 
-from promotion import main_latest_promos, get_promos_by_name
-from store_utils import get_store_id
-from utils import RESULTS_DIRNAME, RAW_FILES_DIRNAME, get_products_prices
+from promotion import main_latest_promos, log_promos_by_name
+from store_utils import log_stores_ids
+from utils import RESULTS_DIRNAME, RAW_FILES_DIRNAME, VALID_PROMOTION_FILE_EXTENSIONS, log_products_prices, \
+    valid_promotion_output_file
 from supermarket_chain import SupermarketChain
 from chains import (
     bareket,
@@ -58,9 +64,6 @@ if __name__ == '__main__':
                         metavar='city',
                         nargs=1,
                         )
-    # parser.add_argument('--all_deals',
-    #                     action='store_true',
-    #                     )
     parser.add_argument('--load_prices',
                         help='boolean flag representing whether to load an existing price XML file',
                         action='store_true',
@@ -78,21 +81,56 @@ if __name__ == '__main__':
                         help='The name of the requested chain',
                         choices=chain_dict.keys(),
                         )
+    parser.add_argument('--file_extension',
+                        help='The extension of the promotions output file',
+                        choices=VALID_PROMOTION_FILE_EXTENSIONS,
+                        default='.xlsx',
+                        )
+    parser.add_argument('--output_filename',
+                        help='The path to write the promotions table to',
+                        type=valid_promotion_output_file,
+                        )
+    parser.add_argument('--only_export_to_file',
+                        help='Boolean flag representing whether only export or also open the promotion output file',
+                        action='store_true',
+                        )
+    parser.add_argument('--debug',
+                        help='Boolean flag representing whether to run in debug mode',
+                        action='store_true',
+                        )
     args = parser.parse_args()
+    if args.debug:
+        logging.basicConfig(level=logging.DEBUG)
+    else:
+        logging.basicConfig(level=logging.INFO)
 
     chain: SupermarketChain = chain_dict[args.chain]
+
     if args.promos:
         arg_store_id = int(args.promos[0])
-        main_latest_promos(store_id=arg_store_id, load_xml=args.load_prices, chain=chain, load_promos=args.load_promos)
+
+        if args.output_filename:
+            output_filename = args.output_filename
+            directory = os.path.dirname(output_filename)
+            Path(directory).mkdir(parents=True, exist_ok=True)
+        else:
+            Path(RESULTS_DIRNAME).mkdir(exist_ok=True)
+            output_filename = f'{RESULTS_DIRNAME}/{repr(type(chain))}_promos_{arg_store_id}{args.file_extension}'
+
+        main_latest_promos(store_id=arg_store_id, output_filename=output_filename, chain=chain,
+                           load_promos=args.load_promos, load_xml=args.load_prices)
+        if not args.only_export_to_file:
+            os.startfile(Path(output_filename))
+        logging.debug(f'Process finished at: {datetime.now()}')
 
     elif args.price:
-        get_products_prices(chain, store_id=args.price[0], load_xml=args.load_prices, product_name=args.price[1])
+        log_products_prices(chain, store_id=args.price[0], load_xml=args.load_prices, product_name=args.price[1])
 
     elif args.find_store_id:
         arg_city = args.find_store_id[0]
-        get_store_id(city=arg_city, load_xml=args.load_stores, chain=chain)
+        log_stores_ids(city=arg_city, load_xml=args.load_stores, chain=chain)
 
     elif args.find_promos_by_name:
         arg_store_id = int(args.find_promos_by_name[0])
-        get_promos_by_name(store_id=arg_store_id, chain=chain, promo_name=args.find_promos_by_name[1],
+        log_promos_by_name(store_id=arg_store_id, chain=chain, promo_name=args.find_promos_by_name[1],
                            load_prices=args.load_prices, load_promos=args.load_promos)
diff --git a/promotion.py b/promotion.py
index 1546507..3140dc4 100644
--- a/promotion.py
+++ b/promotion.py
@@ -1,13 +1,18 @@
+import logging
+import os
 import re
 from datetime import datetime
 from enum import Enum
+from pathlib import Path
 from typing import Dict, List, Union
 import csv
 import sys
+import pandas as pd
+import xlsxwriter
 from item import Item
 from utils import (
     create_items_dict,
-    get_float_from_tag, xml_file_gen,
+    get_float_from_tag, log_message_and_time_if_debug, xml_file_gen,
     create_bs_object,
 )
 from supermarket_chain import SupermarketChain
@@ -15,6 +20,23 @@ from supermarket_chain import SupermarketChain
 INVALID_OR_UNKNOWN_PROMOTION_FUNCTION = -1
 
 PRODUCTS_TO_IGNORE = ['סירים', 'מגבות', 'מגבת', 'מפות', 'פסטיגל', 'ביגי']
+PROMOTIONS_TABLE_HEADERS = [
+    'תיאור מבצע',
+    'הפריט המשתתף במבצע',
+    'מחיר לפני מבצע',
+    'מחיר אחרי מבצע',
+    'אחוז הנחה',
+    'סוג מבצע',
+    'כמות מקס',
+    'כפל הנחות',
+    'המבצע החל',
+    'זמן תחילת מבצע',
+    'זמן סיום מבצע',
+    'זמן עדכון אחרון',
+    'יצרן',
+    'ברקוד פריט',
+    'סוג מבצע לפי תקנות שקיפות מחירים',
+]
 
 
 class ClubID(Enum):
@@ -69,42 +91,61 @@ class Promotion:
 
 def write_promotions_to_csv(promotions: List[Promotion], output_filename: str) -> None:
     """
-    This function writes a given list of promotions to a given output file in a CSV format.
+    This function writes a promotions table to a given CSV or XLSX output file.
 
     :param promotions: A given list of promotions
     :param output_filename: A given file to write to
     """
-    encoding_file = "utf_8_sig" if sys.platform == "win32" else "utf_8"
+    log_message_and_time_if_debug('Writing promotions to output file')
+    rows = [get_promotion_row_in_csv(promo, item) for promo in promotions for item in promo.items]
+    if output_filename.endswith('.csv'):
+        encoding_file = "utf_8_sig" if sys.platform == "win32" else "utf_8"
+        with open(output_filename, mode='w', newline='', encoding=encoding_file) as f_out:
+            promos_writer = csv.writer(f_out)
+            promos_writer.writerow(PROMOTIONS_TABLE_HEADERS)
+            promos_writer.writerows(rows)
 
-    with open(output_filename, mode='w', newline='', encoding=encoding_file) as f_out:
-        promos_writer = csv.writer(f_out)
-        promos_writer.writerow([
-            'תיאור מבצע',
-            'הפריט המשתתף במבצע',
-            'מחיר לפני מבצע',
-            'מחיר אחרי מבצע',
-            'אחוז הנחה',
-            'סוג מבצע',
-            'כמות מקס',
-            'כפל הנחות',
-            'המבצע החל',
-            'זמן תחילת מבצע',
-            'זמן סיום מבצע',
-            'זמן עדכון אחרון',
-            'יצרן',
-            'ברקוד פריט',
-            'סוג מבצע לפי תקנות שקיפות מחירים',
-        ])
-        for promo in promotions:
-            promos_writer.writerows([get_promotion_row_in_csv(promo, item) for item in promo.items])
+    elif output_filename.endswith('.xlsx'):
+        df = pd.DataFrame(rows, columns=PROMOTIONS_TABLE_HEADERS)
+        workbook = xlsxwriter.Workbook(output_filename)
+        worksheet1 = workbook.add_worksheet()
+        worksheet1.right_to_left()
+        date_time_format = workbook.add_format({'num_format': 'm/d/yy h:mm;@'})
+        number_format = workbook.add_format({'num_format': '0.00'})
+        percentage_format = workbook.add_format({'num_format': '0.00%'})
+        worksheet1.set_column('A:A', width=35)
+        worksheet1.set_column('B:B', width=25)
+        worksheet1.set_column('C:D', cell_format=number_format)
+        worksheet1.set_column('E:E', cell_format=percentage_format)
+        worksheet1.set_column('J:L', width=15, cell_format=date_time_format)
+        worksheet1.add_table(
+            first_row=0,
+            first_col=0,
+            last_row=len(df),
+            last_col=len(df.columns) - 1,
+            options={
+                "columns": [{"header": i} for i in PROMOTIONS_TABLE_HEADERS],
+                "data": df.values.tolist(),
+                'style': 'Table Style Medium 11',
+            }, )
+        workbook.close()
+
+    else:
+        raise ValueError(f"The given output file has an invalid extension:\n{output_filename}")
 
 
 def get_promotion_row_in_csv(promo: Promotion, item: Item):
+    """
+    This function returns a row in the promotions table.
+    :param promo:
+    :param item:
+    :return:
+    """
     return [promo.content,
             item.name,
             item.price,
-            f'{promo.promo_func(item):.3f}',
-            f'{(item.price - promo.promo_func(item)) / item.price:.3%}',
+            promo.promo_func(item),
+            (item.price - promo.promo_func(item)) / item.price,
             promo.club_id.name.replace('_', ' '),
             promo.max_qty,
             promo.allow_multiple_discounts,
@@ -127,10 +168,15 @@ def get_available_promos(chain: SupermarketChain, store_id: int, load_prices: bo
     :param load_prices: A boolean representing whether to load an existing xml or load an already saved one
     :return: Promotions that are not included in PRODUCTS_TO_IGNORE and are currently available
     """
+    log_message_and_time_if_debug('Importing prices XML file')
     items_dict: Dict[str, Item] = create_items_dict(chain, load_prices, store_id)
+
     xml_path: str = xml_file_gen(chain, store_id, chain.XMLFilesCategory.PromosFull.name)
+
+    log_message_and_time_if_debug('Importing promotions XML file')
     bs_promos = create_bs_object(xml_path, chain, store_id, load_promos, chain.XMLFilesCategory.PromosFull)
 
+    log_message_and_time_if_debug('Creating promotions objects')
     promo_objs = list()
     for promo in bs_promos.find_all(chain.promotion_tag_name):
         promotion_id = promo.find(re.compile('PromotionId', re.IGNORECASE))
@@ -141,7 +187,6 @@ def get_available_promos(chain: SupermarketChain, store_id: int, load_prices: bo
         promo_inst = create_new_promo_instance(chain, items_dict, promo, promotion_id)
         if promo_inst:
             promo_objs.append(promo_inst)
-
     return promo_objs
 
 
@@ -232,23 +277,25 @@ def is_valid_promo(end_time: datetime, description) -> bool:
     return not_expired and not in_promo_ignore_list
 
 
-def main_latest_promos(store_id: int, load_xml: bool, chain: SupermarketChain, load_promos: bool) -> None:
+def main_latest_promos(store_id: int, output_filename, chain: SupermarketChain, load_promos: bool,
+                       load_xml: bool) -> None:
     """
-    This function writes to a CSV file the available promotions in a store with a given id sorted by their update date.
+    This function writes to a file the available promotions in a store with a given id sorted by their update date.
 
     :param chain: The name of the requested supermarket chain
     :param store_id: A given store id
     :param load_xml: A boolean representing whether to load an existing prices xml file
     :param load_promos: A boolean representing whether to load an existing promos xml file
+    :param output_filename: A path to write the promotions table
     """
 
     promotions: List[Promotion] = get_available_promos(chain, store_id, load_xml, load_promos)
     promotions.sort(key=lambda promo: (max(promo.update_date.date(), promo.start_date.date()), promo.start_date -
                                        promo.end_date), reverse=True)
-    write_promotions_to_csv(promotions, f'results/{repr(type(chain))}_promos_{store_id}.csv')
+    write_promotions_to_csv(promotions, output_filename)
 
 
-def get_promos_by_name(store_id: int, chain: SupermarketChain, promo_name: str, load_prices: bool, load_promos: bool):
+def log_promos_by_name(store_id: int, chain: SupermarketChain, promo_name: str, load_prices: bool, load_promos: bool):
     """
     This function prints all promotions in a given chain and store_id containing a given promo_name.
 
@@ -261,7 +308,7 @@ def get_promos_by_name(store_id: int, chain: SupermarketChain, promo_name: str,
     promotions: List[Promotion] = get_available_promos(chain, store_id, load_prices, load_promos)
     for promo in promotions:
         if promo_name in promo.content:
-            print(promo.repr_ltr())
+            logging.info(promo.repr_ltr())
 
 
 # TODO: change to returning list of Items
diff --git a/store_utils.py b/store_utils.py
index b972ffa..4b49159 100644
--- a/store_utils.py
+++ b/store_utils.py
@@ -1,9 +1,11 @@
+import logging
+
 from utils import xml_file_gen, create_bs_object
 from supermarket_chain import SupermarketChain
 from bs4 import BeautifulSoup
 
 
-def print_stores_ids(city: str, load_xml: bool, chain: SupermarketChain):
+def log_stores_ids(city: str, load_xml: bool, chain: SupermarketChain):
     """
     This function prints the stores IDs of stores in a given city.
     The city must match its spelling in Shufersal's website (hence it should be in Hebrew).
@@ -17,7 +19,7 @@ def print_stores_ids(city: str, load_xml: bool, chain: SupermarketChain):
 
     for store in bs_stores.find_all("STORE"):
         if store.find("CITY").text == city:
-            print((store.find("ADDRESS").text, store.find("STOREID").text, store.find("SUBCHAINNAME").text))
+            logging.info((store.find("ADDRESS").text, store.find("STOREID").text, store.find("SUBCHAINNAME").text))
 
 
 def get_all_deals(chain):
diff --git a/utils.py b/utils.py
index a2f788d..7d2d997 100644
--- a/utils.py
+++ b/utils.py
@@ -1,6 +1,9 @@
 import gzip
 import io
+import logging
 import zipfile
+from argparse import ArgumentTypeError
+from datetime import datetime
 from typing import AnyStr, Dict
 import requests
 from bs4 import BeautifulSoup
@@ -11,6 +14,7 @@ from supermarket_chain import SupermarketChain
 
 RESULTS_DIRNAME = "results"
 RAW_FILES_DIRNAME = "raw_files"
+VALID_PROMOTION_FILE_EXTENSIONS = [".csv", ".xlsx"]
 
 
 def xml_file_gen(chain: SupermarketChain, store_id: int, category_name: str) -> str:
@@ -97,7 +101,7 @@ def create_items_dict(chain: SupermarketChain, load_xml, store_id: int) -> Dict[
     return {item.find('ItemCode').text: chain.get_item_info(item) for item in bs_prices.find_all(chain.item_tag_name)}
 
 
-def get_products_prices(chain: SupermarketChain, store_id: int, load_xml: bool, product_name: str) -> None:
+def log_products_prices(chain: SupermarketChain, store_id: int, load_xml: bool, product_name: str) -> None:
     """
     This function prints the products in a given store which contains a given product_name.
 
@@ -111,7 +115,7 @@ def get_products_prices(chain: SupermarketChain, store_id: int, load_xml: bool,
     prods = [item for item in bs_prices.find_all("Item") if product_name in item.find("ItemName").text]
     prods.sort(key=lambda x: float(x.find("UnitOfMeasurePrice").text))
     for prod in prods:
-        print(
+        logging.info(
             (
                 prod.find('ItemName').text[::-1],
                 prod.find('ManufacturerName').text[::-1],
@@ -123,3 +127,18 @@ def get_products_prices(chain: SupermarketChain, store_id: int, load_xml: bool,
 def get_float_from_tag(tag, int_tag) -> int:
     content = tag.find(int_tag)
     return float(content.text) if content else 0
+
+
+def is_valid_promotion_output_file(output_file: str):
+    return any(output_file.endswith(extension) for extension in VALID_PROMOTION_FILE_EXTENSIONS)
+
+
+def valid_promotion_output_file(output_file: str):
+    if not is_valid_promotion_output_file(output_file):
+        raise ArgumentTypeError(f"Given output file is not a natural number:\n{output_file}")
+    return output_file
+
+
+def log_message_and_time_if_debug(msg: str):
+    logging.info(msg)
+    logging.debug(datetime.now())