Merge pull request #5 from korenLazar/export-promotions-to-xlsx-table

Export promotions to xlsx table
This commit is contained in:
korenLazar
2021-08-16 12:51:48 +03:00
committed by GitHub
7 changed files with 78 additions and 75 deletions

View File

@@ -3,11 +3,12 @@ class Item:
A class representing a product in some supermarket.
"""
def __init__(self, name: str, price: float, manufacturer: str, code: str):
def __init__(self, name: str, price: float, price_by_measure: float, code: str, manufacturer: str):
self.name: str = name
self.price: float = price
self.price_by_measure = price_by_measure
self.manufacturer: str = manufacturer
self.code: str = code
def __repr__(self):
return str((self.name, self.price, self.manufacturer, self.code))
return f"\nשם: {self.name}\nמחיר: {self.price}\nיצרן: {self.manufacturer}\nקוד: {self.code}\n"

View File

@@ -102,7 +102,7 @@ if __name__ == '__main__':
if args.debug:
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.INFO)
logging.basicConfig(level=logging.INFO, format='%(levelname)s:%(message)s')
chain: SupermarketChain = chain_dict[args.chain]

View File

@@ -1,22 +1,25 @@
import logging
import os
import re
from datetime import datetime
from enum import Enum
from pathlib import Path
from os import path
from typing import Dict, List, Union
from bs4.element import Tag
import csv
import sys
import pandas as pd
import xlsxwriter
from item import Item
from utils import (
create_items_dict,
get_float_from_tag, log_message_and_time_if_debug, xml_file_gen,
create_bs_object,
create_bs_object, create_items_dict,
get_float_from_tag,
log_message_and_time_if_debug, xml_file_gen,
)
from supermarket_chain import SupermarketChain
XML_FILES_PROMOTIONS_CATEGORIES = [SupermarketChain.XMLFilesCategory.PromosFull,
SupermarketChain.XMLFilesCategory.Promos]
INVALID_OR_UNKNOWN_PROMOTION_FUNCTION = -1
PRODUCTS_TO_IGNORE = ['סירים', 'מגבות', 'מגבת', 'מפות', 'פסטיגל', 'ביגי']
@@ -169,16 +172,13 @@ def get_available_promos(chain: SupermarketChain, store_id: int, load_prices: bo
:return: Promotions that are not included in PRODUCTS_TO_IGNORE and are currently available
"""
log_message_and_time_if_debug('Importing prices XML file')
items_dict: Dict[str, Item] = create_items_dict(chain, load_prices, store_id)
xml_path: str = xml_file_gen(chain, store_id, chain.XMLFilesCategory.PromosFull.name)
items_dict: Dict[str, Item] = create_items_dict(chain, store_id, load_prices)
log_message_and_time_if_debug('Importing promotions XML file')
bs_promos = create_bs_object(xml_path, chain, store_id, load_promos, chain.XMLFilesCategory.PromosFull)
promo_tags = get_all_promos_tags(chain, store_id, load_promos)
log_message_and_time_if_debug('Creating promotions objects')
promo_objs = list()
for promo in bs_promos.find_all(chain.promotion_tag_name):
for promo in promo_tags:
promotion_id = promo.find(re.compile('PromotionId', re.IGNORECASE))
if promo_objs and promo_objs[-1].promotion_id == promotion_id:
promo_objs[-1].items.extend(chain.get_items(promo, items_dict))
@@ -187,6 +187,7 @@ def get_available_promos(chain: SupermarketChain, store_id: int, load_prices: bo
promo_inst = create_new_promo_instance(chain, items_dict, promo, promotion_id)
if promo_inst:
promo_objs.append(promo_inst)
return promo_objs
@@ -288,7 +289,6 @@ def main_latest_promos(store_id: int, output_filename, chain: SupermarketChain,
:param load_promos: A boolean representing whether to load an existing promos xml file
:param output_filename: A path to write the promotions table
"""
promotions: List[Promotion] = get_available_promos(chain, store_id, load_xml, load_promos)
promotions.sort(key=lambda promo: (max(promo.update_date.date(), promo.start_date.date()), promo.start_date -
promo.end_date), reverse=True)
@@ -316,12 +316,24 @@ def get_all_null_items_in_promos(chain, store_id) -> List[str]:
"""
This function finds all items appearing in the chain's promotions file but not in the chain's prices file.
"""
items_dict: Dict[str, Item] = create_items_dict(chain, True, store_id)
xml_path: str = xml_file_gen(chain, store_id, chain.XMLFilesCategory.PromosFull.name)
bs_promos = create_bs_object(xml_path, chain, store_id, True, chain.XMLFilesCategory.PromosFull)
items_dict: Dict[str, Item] = create_items_dict(chain, store_id, load_xml=True)
promo_tags = get_all_promos_tags(chain, store_id, load_xml=True)
return [item for promo_tag in promo_tags for item in chain.get_null_items(promo_tag, items_dict)]
null_items = list()
for promo in bs_promos.find_all(chain.promotion_tag_name):
null_items.extend(chain.get_null_items(promo, items_dict))
return null_items
def get_all_promos_tags(chain: SupermarketChain, store_id: int, load_xml: bool) -> List[Tag]:
"""
This function gets all the promotions tags for a given store in a given chain.
It includes both the full and not full promotions files.
:param chain: A given supermarket chain
:param store_id: A given store ID
:param load_xml: A boolean representing whether to try loading the promotions from an existing XML file
:return: A list of promotions tags
"""
bs_objects = list()
for category in XML_FILES_PROMOTIONS_CATEGORIES:
xml_path = xml_file_gen(chain, store_id, category.name)
bs_objects.append(create_bs_object(chain, store_id, category, load_xml, xml_path))
return [promo for bs_obj in bs_objects for promo in bs_obj.find_all(chain.promotion_tag_name)]

View File

@@ -15,16 +15,8 @@ def log_stores_ids(city: str, load_xml: bool, chain: SupermarketChain):
:param city: A string representing the city of the requested store.
"""
xml_path: str = xml_file_gen(chain, -1, chain.XMLFilesCategory.Stores.name)
bs_stores: BeautifulSoup = create_bs_object(xml_path, chain, -1, load_xml, chain.XMLFilesCategory.Stores)
bs_stores: BeautifulSoup = create_bs_object(chain, -1, chain.XMLFilesCategory.Stores, load_xml, xml_path)
for store in bs_stores.find_all("STORE"):
if store.find("CITY").text == city:
logging.info((store.find("ADDRESS").text, store.find("STOREID").text, store.find("SUBCHAINNAME").text))
def get_all_deals(chain):
xml_path: str = xml_file_gen(chain, -1, chain.XMLFilesCategory.Stores.name)
bs_stores: BeautifulSoup = create_bs_object(xml_path, chain, -1, True, chain.XMLFilesCategory.Stores)
return [int(store.find("STOREID").text) for store in bs_stores.find_all("STORE") if store.find("SUBCHAINID").text
== "2"]
logging.info((store.find("ADDRESS").text, store.find("STOREID").text, store.find("SUBCHAINNAME").text))

View File

@@ -120,9 +120,6 @@ class SupermarketChain(object, metaclass=Meta):
"""
This function returns a string containing important information about a given supermarket's product.
"""
return Item(
name=item.find(re.compile(r'ItemN[a]?m[e]?')).text,
price=float(item.find('ItemPrice').text),
manufacturer=item.find(re.compile(r'Manufacture[r]?Name')).text,
code=item.find('ItemCode').text
)
return Item(name=item.find(re.compile(r'ItemN[a]?m[e]?')).text, price=float(item.find('ItemPrice').text),
price_by_measure=float(item.find('UnitOfMeasurePrice').text), code=item.find('ItemCode').text,
manufacturer=item.find(re.compile(r'Manufacture[r]?Name')).text)

View File

@@ -19,7 +19,7 @@ def test_shufersal_promo_type_1():
discount_rate=discount_rate,
discounted_price=discounted_price,
)
item = Item('פטה פיראוס 20%', 113, '', '')
item = Item('פטה פיראוס 20%', 113, 1, '', '')
assert promo_func(item) == 100
@@ -38,7 +38,7 @@ def test_shufersal_promo_type_2():
discount_rate=discount_rate,
discounted_price=discounted_price,
)
item = Item('חגיגת גרנולה פ.יבשים500ג', 26.9, '', '')
item = Item('חגיגת גרנולה פ.יבשים500ג', 26.9, 1, '', '')
assert promo_func(item) == 21.52
@@ -57,7 +57,7 @@ def test_shufersal_promo_type_6_1():
discount_rate=discount_rate,
discounted_price=discounted_price,
)
item = Item('פסטרמה מקסיקנית במשקל', 89, '', '')
item = Item('פסטרמה מקסיקנית במשקל', 89, 1, '', '')
assert promo_func(item) == 89
@@ -76,7 +76,7 @@ def test_shufersal_promo_type_6_2():
discount_rate=discount_rate,
discounted_price=discounted_price,
)
item = Item('מכונת לוואצה ג\'ולי אדומה', 449, '', '')
item = Item('מכונת לוואצה ג\'ולי אדומה', 449, 1, '', '')
assert promo_func(item) == 449
@@ -95,7 +95,7 @@ def test_shufersal_promo_type_7_1():
discount_rate=discount_rate,
discounted_price=discounted_price,
)
item = Item('פינצטה 2011 שחורה/כסופה', 14.9, '', '')
item = Item('פינצטה 2011 שחורה/כסופה', 14.9, 1, '', '')
assert promo_func(item) == 7.45
@@ -114,7 +114,7 @@ def test_shufersal_promo_type_7_2():
discount_rate=discount_rate,
discounted_price=discounted_price,
)
item = Item('יוגורט עיזים 500 גרם', 12.9, '', '')
item = Item('יוגורט עיזים 500 גרם', 12.9, 1, '', '')
assert promo_func(item) == 12.9 * 0.75
@@ -133,7 +133,7 @@ def test_shufersal_promo_type_9_1():
discount_rate=discount_rate,
discounted_price=discounted_price,
)
item = Item('זיתים מבוקעים פיקנטי540ג', 9.3, '', '')
item = Item('זיתים מבוקעים פיקנטי540ג', 9.3, 1, '', '')
assert promo_func(item) == 9.3 * 0.75
@@ -152,7 +152,7 @@ def test_shufersal_promo_type_9_2():
discount_rate=discount_rate,
discounted_price=discounted_price,
)
item = Item('שעועית לבנה שופרסל 800גר', 18.9, '', '')
item = Item('שעועית לבנה שופרסל 800גר', 18.9, 1, '', '')
assert promo_func(item) == (18.9 + 10) / 2
@@ -171,7 +171,7 @@ def test_shufersal_promo_type_9_3():
discount_rate=discount_rate,
discounted_price=discounted_price,
)
item = Item('גומיות שחורות 12 יח', 9.9, '', '')
item = Item('גומיות שחורות 12 יח', 9.9, 1, '', '')
assert promo_func(item) == 9.9 * 0.75
@@ -190,7 +190,7 @@ def test_shufersal_promo_type_10_1():
discount_rate=discount_rate,
discounted_price=discounted_price
)
item = Item('טופו טעם טבעי 300 גרם', 10.9, '', '7296073345763')
item = Item('טופו טעם טבעי 300 גרם', 10.9, 1, '7296073345763', '')
assert promo_func(item) == 5
@@ -209,7 +209,7 @@ def test_shufersal_promo_type_10_2():
discount_rate=discount_rate,
discounted_price=discounted_price
)
item = Item('טופו טעם טבעי 300 גרם', 10.9, 'כפרי בריא משק ויילר', '7296073345763')
item = Item('טופו טעם טבעי 300 גרם', 10.9, 1, '7296073345763', 'כפרי בריא משק ויילר')
assert promo_func(item) == 7
@@ -225,7 +225,7 @@ def assert_discount(discounted_price, item_barcode, item_manufacturer, item_name
discount_rate=discount_rate,
discounted_price=discounted_price
)
item = Item(item_name, orig_price, item_manufacturer, item_barcode)
item = Item(item_name, orig_price, 1, item_barcode, item_manufacturer)
assert abs(promo_func(item) - price_after_discount) <= 1e-5, promo_description

View File

@@ -4,9 +4,10 @@ import logging
import zipfile
from argparse import ArgumentTypeError
from datetime import datetime
from typing import AnyStr, Dict
from typing import AnyStr, Dict, List
import requests
from bs4 import BeautifulSoup
from bs4.element import Tag
from os import path
from item import Item
@@ -31,8 +32,8 @@ def xml_file_gen(chain: SupermarketChain, store_id: int, category_name: str) ->
return path.join(RAW_FILES_DIRNAME, f"{repr(type(chain))}-{category_name}{store_id_str}.xml")
def create_bs_object(xml_path: str, chain: SupermarketChain, store_id: int, load_xml: bool,
category: SupermarketChain.XMLFilesCategory) -> BeautifulSoup:
def create_bs_object(chain: SupermarketChain, store_id: int, category: SupermarketChain.XMLFilesCategory,
load_xml: bool, xml_path: str) -> BeautifulSoup:
"""
This function creates a BeautifulSoup (BS) object according to the given parameters.
In case the given load_xml is True and the XML file exists, the function creates the BS object from the given
@@ -47,12 +48,12 @@ def create_bs_object(xml_path: str, chain: SupermarketChain, store_id: int, load
:return: A BeautifulSoup object with xml content.
"""
if load_xml and path.isfile(xml_path):
return create_bs_object_from_xml(xml_path)
return create_bs_object_from_link(xml_path, chain, category, store_id)
return get_bs_object_from_xml(xml_path)
return get_bs_object_from_link(chain, store_id, category, xml_path)
def create_bs_object_from_link(xml_path: str, chain: SupermarketChain, category: SupermarketChain.XMLFilesCategory,
store_id: int) -> BeautifulSoup:
def get_bs_object_from_link(chain: SupermarketChain, store_id: int, category: SupermarketChain.XMLFilesCategory,
xml_path: str) -> BeautifulSoup:
"""
This function creates a BeautifulSoup (BS) object by generating a download link from Shufersal's API.
@@ -63,7 +64,7 @@ def create_bs_object_from_link(xml_path: str, chain: SupermarketChain, category:
:return: A BeautifulSoup object with xml content.
"""
session = requests.Session()
download_url: str = chain.get_download_url(store_id, category, session)
download_url = chain.get_download_url(store_id, category, session)
response_content = session.get(download_url).content
try:
xml_content: AnyStr = gzip.decompress(response_content)
@@ -77,7 +78,7 @@ def create_bs_object_from_link(xml_path: str, chain: SupermarketChain, category:
return BeautifulSoup(xml_content, features='xml')
def create_bs_object_from_xml(xml_path: str) -> BeautifulSoup:
def get_bs_object_from_xml(xml_path: str) -> BeautifulSoup:
"""
This function creates a BeautifulSoup (BS) object from a given XML file.
@@ -88,17 +89,23 @@ def create_bs_object_from_xml(xml_path: str) -> BeautifulSoup:
return BeautifulSoup(f_in, features='xml')
def create_items_dict(chain: SupermarketChain, load_xml, store_id: int) -> Dict[str, Item]:
def create_items_dict(chain: SupermarketChain, store_id: int, load_xml) -> Dict[str, Item]:
"""
This function creates a dictionary where every key is an item code and its value is its corresponding Item instance.
We take both full and not full prices files, and assume that the no full is more updated (in case of overwriting).
:param chain: A given supermarket chain
:param load_xml: A boolean representing whether to load an existing prices xml file
:param store_id: A given store id
"""
xml_path: str = xml_file_gen(chain, store_id, chain.XMLFilesCategory.PricesFull.name)
bs_prices: BeautifulSoup = create_bs_object(xml_path, chain, store_id, load_xml, chain.XMLFilesCategory.PricesFull)
return {item.find('ItemCode').text: chain.get_item_info(item) for item in bs_prices.find_all(chain.item_tag_name)}
items_dict = dict()
for category in [chain.XMLFilesCategory.PricesFull, chain.XMLFilesCategory.Prices]:
xml_path: str = xml_file_gen(chain, store_id, category.name)
bs_prices: BeautifulSoup = create_bs_object(chain, store_id, category, load_xml, xml_path)
items_tags = bs_prices.find_all(chain.item_tag_name)
items_dict.update({item.find('ItemCode').text: chain.get_item_info(item) for item in items_tags})
return items_dict
def log_products_prices(chain: SupermarketChain, store_id: int, load_xml: bool, product_name: str) -> None:
@@ -110,18 +117,12 @@ def log_products_prices(chain: SupermarketChain, store_id: int, load_xml: bool,
:param product_name: A given product name
:param load_xml: A boolean representing whether to load an existing xml or load an already saved one
"""
xml_path: str = xml_file_gen(chain, store_id, chain.XMLFilesCategory.PricesFull.name)
bs_prices: BeautifulSoup = create_bs_object(xml_path, chain, store_id, load_xml, chain.XMLFilesCategory.PricesFull)
prods = [item for item in bs_prices.find_all("Item") if product_name in item.find("ItemName").text]
prods.sort(key=lambda x: float(x.find("UnitOfMeasurePrice").text))
for prod in prods:
logging.info(
(
prod.find('ItemName').text[::-1],
prod.find('ManufacturerName').text[::-1],
prod.find('ItemPrice').text
)
)
items_dict: Dict[str, Item] = create_items_dict(chain, store_id, load_xml)
products_by_name = [item for item in items_dict.values() if product_name in item.name]
products_by_name_sorted_by_price = sorted(products_by_name, key=lambda item: item.price_by_measure)
for prod in products_by_name_sorted_by_price:
logging.info(prod)
def get_float_from_tag(tag, int_tag) -> int: