Merge pull request #5 from korenLazar/export-promotions-to-xlsx-table
Export promotions to xlsx table
This commit is contained in:
5
item.py
5
item.py
@@ -3,11 +3,12 @@ class Item:
|
||||
A class representing a product in some supermarket.
|
||||
"""
|
||||
|
||||
def __init__(self, name: str, price: float, manufacturer: str, code: str):
|
||||
def __init__(self, name: str, price: float, price_by_measure: float, code: str, manufacturer: str):
|
||||
self.name: str = name
|
||||
self.price: float = price
|
||||
self.price_by_measure = price_by_measure
|
||||
self.manufacturer: str = manufacturer
|
||||
self.code: str = code
|
||||
|
||||
def __repr__(self):
|
||||
return str((self.name, self.price, self.manufacturer, self.code))
|
||||
return f"\nשם: {self.name}\nמחיר: {self.price}\nיצרן: {self.manufacturer}\nקוד: {self.code}\n"
|
||||
|
2
main.py
2
main.py
@@ -102,7 +102,7 @@ if __name__ == '__main__':
|
||||
if args.debug:
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
else:
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logging.basicConfig(level=logging.INFO, format='%(levelname)s:%(message)s')
|
||||
|
||||
chain: SupermarketChain = chain_dict[args.chain]
|
||||
|
||||
|
50
promotion.py
50
promotion.py
@@ -1,22 +1,25 @@
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from os import path
|
||||
from typing import Dict, List, Union
|
||||
from bs4.element import Tag
|
||||
import csv
|
||||
import sys
|
||||
import pandas as pd
|
||||
import xlsxwriter
|
||||
from item import Item
|
||||
from utils import (
|
||||
create_items_dict,
|
||||
get_float_from_tag, log_message_and_time_if_debug, xml_file_gen,
|
||||
create_bs_object,
|
||||
create_bs_object, create_items_dict,
|
||||
get_float_from_tag,
|
||||
log_message_and_time_if_debug, xml_file_gen,
|
||||
)
|
||||
from supermarket_chain import SupermarketChain
|
||||
|
||||
XML_FILES_PROMOTIONS_CATEGORIES = [SupermarketChain.XMLFilesCategory.PromosFull,
|
||||
SupermarketChain.XMLFilesCategory.Promos]
|
||||
|
||||
INVALID_OR_UNKNOWN_PROMOTION_FUNCTION = -1
|
||||
|
||||
PRODUCTS_TO_IGNORE = ['סירים', 'מגבות', 'מגבת', 'מפות', 'פסטיגל', 'ביגי']
|
||||
@@ -169,16 +172,13 @@ def get_available_promos(chain: SupermarketChain, store_id: int, load_prices: bo
|
||||
:return: Promotions that are not included in PRODUCTS_TO_IGNORE and are currently available
|
||||
"""
|
||||
log_message_and_time_if_debug('Importing prices XML file')
|
||||
items_dict: Dict[str, Item] = create_items_dict(chain, load_prices, store_id)
|
||||
|
||||
xml_path: str = xml_file_gen(chain, store_id, chain.XMLFilesCategory.PromosFull.name)
|
||||
|
||||
items_dict: Dict[str, Item] = create_items_dict(chain, store_id, load_prices)
|
||||
log_message_and_time_if_debug('Importing promotions XML file')
|
||||
bs_promos = create_bs_object(xml_path, chain, store_id, load_promos, chain.XMLFilesCategory.PromosFull)
|
||||
promo_tags = get_all_promos_tags(chain, store_id, load_promos)
|
||||
|
||||
log_message_and_time_if_debug('Creating promotions objects')
|
||||
promo_objs = list()
|
||||
for promo in bs_promos.find_all(chain.promotion_tag_name):
|
||||
for promo in promo_tags:
|
||||
promotion_id = promo.find(re.compile('PromotionId', re.IGNORECASE))
|
||||
if promo_objs and promo_objs[-1].promotion_id == promotion_id:
|
||||
promo_objs[-1].items.extend(chain.get_items(promo, items_dict))
|
||||
@@ -187,6 +187,7 @@ def get_available_promos(chain: SupermarketChain, store_id: int, load_prices: bo
|
||||
promo_inst = create_new_promo_instance(chain, items_dict, promo, promotion_id)
|
||||
if promo_inst:
|
||||
promo_objs.append(promo_inst)
|
||||
|
||||
return promo_objs
|
||||
|
||||
|
||||
@@ -288,7 +289,6 @@ def main_latest_promos(store_id: int, output_filename, chain: SupermarketChain,
|
||||
:param load_promos: A boolean representing whether to load an existing promos xml file
|
||||
:param output_filename: A path to write the promotions table
|
||||
"""
|
||||
|
||||
promotions: List[Promotion] = get_available_promos(chain, store_id, load_xml, load_promos)
|
||||
promotions.sort(key=lambda promo: (max(promo.update_date.date(), promo.start_date.date()), promo.start_date -
|
||||
promo.end_date), reverse=True)
|
||||
@@ -316,12 +316,24 @@ def get_all_null_items_in_promos(chain, store_id) -> List[str]:
|
||||
"""
|
||||
This function finds all items appearing in the chain's promotions file but not in the chain's prices file.
|
||||
"""
|
||||
items_dict: Dict[str, Item] = create_items_dict(chain, True, store_id)
|
||||
xml_path: str = xml_file_gen(chain, store_id, chain.XMLFilesCategory.PromosFull.name)
|
||||
bs_promos = create_bs_object(xml_path, chain, store_id, True, chain.XMLFilesCategory.PromosFull)
|
||||
items_dict: Dict[str, Item] = create_items_dict(chain, store_id, load_xml=True)
|
||||
promo_tags = get_all_promos_tags(chain, store_id, load_xml=True)
|
||||
return [item for promo_tag in promo_tags for item in chain.get_null_items(promo_tag, items_dict)]
|
||||
|
||||
null_items = list()
|
||||
for promo in bs_promos.find_all(chain.promotion_tag_name):
|
||||
null_items.extend(chain.get_null_items(promo, items_dict))
|
||||
|
||||
return null_items
|
||||
def get_all_promos_tags(chain: SupermarketChain, store_id: int, load_xml: bool) -> List[Tag]:
|
||||
"""
|
||||
This function gets all the promotions tags for a given store in a given chain.
|
||||
It includes both the full and not full promotions files.
|
||||
|
||||
:param chain: A given supermarket chain
|
||||
:param store_id: A given store ID
|
||||
:param load_xml: A boolean representing whether to try loading the promotions from an existing XML file
|
||||
:return: A list of promotions tags
|
||||
"""
|
||||
bs_objects = list()
|
||||
for category in XML_FILES_PROMOTIONS_CATEGORIES:
|
||||
xml_path = xml_file_gen(chain, store_id, category.name)
|
||||
bs_objects.append(create_bs_object(chain, store_id, category, load_xml, xml_path))
|
||||
|
||||
return [promo for bs_obj in bs_objects for promo in bs_obj.find_all(chain.promotion_tag_name)]
|
||||
|
@@ -15,16 +15,8 @@ def log_stores_ids(city: str, load_xml: bool, chain: SupermarketChain):
|
||||
:param city: A string representing the city of the requested store.
|
||||
"""
|
||||
xml_path: str = xml_file_gen(chain, -1, chain.XMLFilesCategory.Stores.name)
|
||||
bs_stores: BeautifulSoup = create_bs_object(xml_path, chain, -1, load_xml, chain.XMLFilesCategory.Stores)
|
||||
bs_stores: BeautifulSoup = create_bs_object(chain, -1, chain.XMLFilesCategory.Stores, load_xml, xml_path)
|
||||
|
||||
for store in bs_stores.find_all("STORE"):
|
||||
if store.find("CITY").text == city:
|
||||
logging.info((store.find("ADDRESS").text, store.find("STOREID").text, store.find("SUBCHAINNAME").text))
|
||||
|
||||
|
||||
def get_all_deals(chain):
|
||||
xml_path: str = xml_file_gen(chain, -1, chain.XMLFilesCategory.Stores.name)
|
||||
bs_stores: BeautifulSoup = create_bs_object(xml_path, chain, -1, True, chain.XMLFilesCategory.Stores)
|
||||
|
||||
return [int(store.find("STOREID").text) for store in bs_stores.find_all("STORE") if store.find("SUBCHAINID").text
|
||||
== "2"]
|
||||
|
@@ -120,9 +120,6 @@ class SupermarketChain(object, metaclass=Meta):
|
||||
"""
|
||||
This function returns a string containing important information about a given supermarket's product.
|
||||
"""
|
||||
return Item(
|
||||
name=item.find(re.compile(r'ItemN[a]?m[e]?')).text,
|
||||
price=float(item.find('ItemPrice').text),
|
||||
manufacturer=item.find(re.compile(r'Manufacture[r]?Name')).text,
|
||||
code=item.find('ItemCode').text
|
||||
)
|
||||
return Item(name=item.find(re.compile(r'ItemN[a]?m[e]?')).text, price=float(item.find('ItemPrice').text),
|
||||
price_by_measure=float(item.find('UnitOfMeasurePrice').text), code=item.find('ItemCode').text,
|
||||
manufacturer=item.find(re.compile(r'Manufacture[r]?Name')).text)
|
||||
|
@@ -19,7 +19,7 @@ def test_shufersal_promo_type_1():
|
||||
discount_rate=discount_rate,
|
||||
discounted_price=discounted_price,
|
||||
)
|
||||
item = Item('פטה פיראוס 20%', 113, '', '')
|
||||
item = Item('פטה פיראוס 20%', 113, 1, '', '')
|
||||
assert promo_func(item) == 100
|
||||
|
||||
|
||||
@@ -38,7 +38,7 @@ def test_shufersal_promo_type_2():
|
||||
discount_rate=discount_rate,
|
||||
discounted_price=discounted_price,
|
||||
)
|
||||
item = Item('חגיגת גרנולה פ.יבשים500ג', 26.9, '', '')
|
||||
item = Item('חגיגת גרנולה פ.יבשים500ג', 26.9, 1, '', '')
|
||||
assert promo_func(item) == 21.52
|
||||
|
||||
|
||||
@@ -57,7 +57,7 @@ def test_shufersal_promo_type_6_1():
|
||||
discount_rate=discount_rate,
|
||||
discounted_price=discounted_price,
|
||||
)
|
||||
item = Item('פסטרמה מקסיקנית במשקל', 89, '', '')
|
||||
item = Item('פסטרמה מקסיקנית במשקל', 89, 1, '', '')
|
||||
assert promo_func(item) == 89
|
||||
|
||||
|
||||
@@ -76,7 +76,7 @@ def test_shufersal_promo_type_6_2():
|
||||
discount_rate=discount_rate,
|
||||
discounted_price=discounted_price,
|
||||
)
|
||||
item = Item('מכונת לוואצה ג\'ולי אדומה', 449, '', '')
|
||||
item = Item('מכונת לוואצה ג\'ולי אדומה', 449, 1, '', '')
|
||||
assert promo_func(item) == 449
|
||||
|
||||
|
||||
@@ -95,7 +95,7 @@ def test_shufersal_promo_type_7_1():
|
||||
discount_rate=discount_rate,
|
||||
discounted_price=discounted_price,
|
||||
)
|
||||
item = Item('פינצטה 2011 שחורה/כסופה', 14.9, '', '')
|
||||
item = Item('פינצטה 2011 שחורה/כסופה', 14.9, 1, '', '')
|
||||
assert promo_func(item) == 7.45
|
||||
|
||||
|
||||
@@ -114,7 +114,7 @@ def test_shufersal_promo_type_7_2():
|
||||
discount_rate=discount_rate,
|
||||
discounted_price=discounted_price,
|
||||
)
|
||||
item = Item('יוגורט עיזים 500 גרם', 12.9, '', '')
|
||||
item = Item('יוגורט עיזים 500 גרם', 12.9, 1, '', '')
|
||||
assert promo_func(item) == 12.9 * 0.75
|
||||
|
||||
|
||||
@@ -133,7 +133,7 @@ def test_shufersal_promo_type_9_1():
|
||||
discount_rate=discount_rate,
|
||||
discounted_price=discounted_price,
|
||||
)
|
||||
item = Item('זיתים מבוקעים פיקנטי540ג', 9.3, '', '')
|
||||
item = Item('זיתים מבוקעים פיקנטי540ג', 9.3, 1, '', '')
|
||||
assert promo_func(item) == 9.3 * 0.75
|
||||
|
||||
|
||||
@@ -152,7 +152,7 @@ def test_shufersal_promo_type_9_2():
|
||||
discount_rate=discount_rate,
|
||||
discounted_price=discounted_price,
|
||||
)
|
||||
item = Item('שעועית לבנה שופרסל 800גר', 18.9, '', '')
|
||||
item = Item('שעועית לבנה שופרסל 800גר', 18.9, 1, '', '')
|
||||
assert promo_func(item) == (18.9 + 10) / 2
|
||||
|
||||
|
||||
@@ -171,7 +171,7 @@ def test_shufersal_promo_type_9_3():
|
||||
discount_rate=discount_rate,
|
||||
discounted_price=discounted_price,
|
||||
)
|
||||
item = Item('גומיות שחורות 12 יח', 9.9, '', '')
|
||||
item = Item('גומיות שחורות 12 יח', 9.9, 1, '', '')
|
||||
assert promo_func(item) == 9.9 * 0.75
|
||||
|
||||
|
||||
@@ -190,7 +190,7 @@ def test_shufersal_promo_type_10_1():
|
||||
discount_rate=discount_rate,
|
||||
discounted_price=discounted_price
|
||||
)
|
||||
item = Item('טופו טעם טבעי 300 גרם', 10.9, '', '7296073345763')
|
||||
item = Item('טופו טעם טבעי 300 גרם', 10.9, 1, '7296073345763', '')
|
||||
assert promo_func(item) == 5
|
||||
|
||||
|
||||
@@ -209,7 +209,7 @@ def test_shufersal_promo_type_10_2():
|
||||
discount_rate=discount_rate,
|
||||
discounted_price=discounted_price
|
||||
)
|
||||
item = Item('טופו טעם טבעי 300 גרם', 10.9, 'כפרי בריא משק ויילר', '7296073345763')
|
||||
item = Item('טופו טעם טבעי 300 גרם', 10.9, 1, '7296073345763', 'כפרי בריא משק ויילר')
|
||||
assert promo_func(item) == 7
|
||||
|
||||
|
||||
@@ -225,7 +225,7 @@ def assert_discount(discounted_price, item_barcode, item_manufacturer, item_name
|
||||
discount_rate=discount_rate,
|
||||
discounted_price=discounted_price
|
||||
)
|
||||
item = Item(item_name, orig_price, item_manufacturer, item_barcode)
|
||||
item = Item(item_name, orig_price, 1, item_barcode, item_manufacturer)
|
||||
assert abs(promo_func(item) - price_after_discount) <= 1e-5, promo_description
|
||||
|
||||
|
||||
|
51
utils.py
51
utils.py
@@ -4,9 +4,10 @@ import logging
|
||||
import zipfile
|
||||
from argparse import ArgumentTypeError
|
||||
from datetime import datetime
|
||||
from typing import AnyStr, Dict
|
||||
from typing import AnyStr, Dict, List
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from bs4.element import Tag
|
||||
from os import path
|
||||
|
||||
from item import Item
|
||||
@@ -31,8 +32,8 @@ def xml_file_gen(chain: SupermarketChain, store_id: int, category_name: str) ->
|
||||
return path.join(RAW_FILES_DIRNAME, f"{repr(type(chain))}-{category_name}{store_id_str}.xml")
|
||||
|
||||
|
||||
def create_bs_object(xml_path: str, chain: SupermarketChain, store_id: int, load_xml: bool,
|
||||
category: SupermarketChain.XMLFilesCategory) -> BeautifulSoup:
|
||||
def create_bs_object(chain: SupermarketChain, store_id: int, category: SupermarketChain.XMLFilesCategory,
|
||||
load_xml: bool, xml_path: str) -> BeautifulSoup:
|
||||
"""
|
||||
This function creates a BeautifulSoup (BS) object according to the given parameters.
|
||||
In case the given load_xml is True and the XML file exists, the function creates the BS object from the given
|
||||
@@ -47,12 +48,12 @@ def create_bs_object(xml_path: str, chain: SupermarketChain, store_id: int, load
|
||||
:return: A BeautifulSoup object with xml content.
|
||||
"""
|
||||
if load_xml and path.isfile(xml_path):
|
||||
return create_bs_object_from_xml(xml_path)
|
||||
return create_bs_object_from_link(xml_path, chain, category, store_id)
|
||||
return get_bs_object_from_xml(xml_path)
|
||||
return get_bs_object_from_link(chain, store_id, category, xml_path)
|
||||
|
||||
|
||||
def create_bs_object_from_link(xml_path: str, chain: SupermarketChain, category: SupermarketChain.XMLFilesCategory,
|
||||
store_id: int) -> BeautifulSoup:
|
||||
def get_bs_object_from_link(chain: SupermarketChain, store_id: int, category: SupermarketChain.XMLFilesCategory,
|
||||
xml_path: str) -> BeautifulSoup:
|
||||
"""
|
||||
This function creates a BeautifulSoup (BS) object by generating a download link from Shufersal's API.
|
||||
|
||||
@@ -63,7 +64,7 @@ def create_bs_object_from_link(xml_path: str, chain: SupermarketChain, category:
|
||||
:return: A BeautifulSoup object with xml content.
|
||||
"""
|
||||
session = requests.Session()
|
||||
download_url: str = chain.get_download_url(store_id, category, session)
|
||||
download_url = chain.get_download_url(store_id, category, session)
|
||||
response_content = session.get(download_url).content
|
||||
try:
|
||||
xml_content: AnyStr = gzip.decompress(response_content)
|
||||
@@ -77,7 +78,7 @@ def create_bs_object_from_link(xml_path: str, chain: SupermarketChain, category:
|
||||
return BeautifulSoup(xml_content, features='xml')
|
||||
|
||||
|
||||
def create_bs_object_from_xml(xml_path: str) -> BeautifulSoup:
|
||||
def get_bs_object_from_xml(xml_path: str) -> BeautifulSoup:
|
||||
"""
|
||||
This function creates a BeautifulSoup (BS) object from a given XML file.
|
||||
|
||||
@@ -88,17 +89,23 @@ def create_bs_object_from_xml(xml_path: str) -> BeautifulSoup:
|
||||
return BeautifulSoup(f_in, features='xml')
|
||||
|
||||
|
||||
def create_items_dict(chain: SupermarketChain, load_xml, store_id: int) -> Dict[str, Item]:
|
||||
def create_items_dict(chain: SupermarketChain, store_id: int, load_xml) -> Dict[str, Item]:
|
||||
"""
|
||||
This function creates a dictionary where every key is an item code and its value is its corresponding Item instance.
|
||||
We take both full and not full prices files, and assume that the no full is more updated (in case of overwriting).
|
||||
|
||||
:param chain: A given supermarket chain
|
||||
:param load_xml: A boolean representing whether to load an existing prices xml file
|
||||
:param store_id: A given store id
|
||||
"""
|
||||
xml_path: str = xml_file_gen(chain, store_id, chain.XMLFilesCategory.PricesFull.name)
|
||||
bs_prices: BeautifulSoup = create_bs_object(xml_path, chain, store_id, load_xml, chain.XMLFilesCategory.PricesFull)
|
||||
return {item.find('ItemCode').text: chain.get_item_info(item) for item in bs_prices.find_all(chain.item_tag_name)}
|
||||
items_dict = dict()
|
||||
for category in [chain.XMLFilesCategory.PricesFull, chain.XMLFilesCategory.Prices]:
|
||||
xml_path: str = xml_file_gen(chain, store_id, category.name)
|
||||
bs_prices: BeautifulSoup = create_bs_object(chain, store_id, category, load_xml, xml_path)
|
||||
items_tags = bs_prices.find_all(chain.item_tag_name)
|
||||
items_dict.update({item.find('ItemCode').text: chain.get_item_info(item) for item in items_tags})
|
||||
|
||||
return items_dict
|
||||
|
||||
|
||||
def log_products_prices(chain: SupermarketChain, store_id: int, load_xml: bool, product_name: str) -> None:
|
||||
@@ -110,18 +117,12 @@ def log_products_prices(chain: SupermarketChain, store_id: int, load_xml: bool,
|
||||
:param product_name: A given product name
|
||||
:param load_xml: A boolean representing whether to load an existing xml or load an already saved one
|
||||
"""
|
||||
xml_path: str = xml_file_gen(chain, store_id, chain.XMLFilesCategory.PricesFull.name)
|
||||
bs_prices: BeautifulSoup = create_bs_object(xml_path, chain, store_id, load_xml, chain.XMLFilesCategory.PricesFull)
|
||||
prods = [item for item in bs_prices.find_all("Item") if product_name in item.find("ItemName").text]
|
||||
prods.sort(key=lambda x: float(x.find("UnitOfMeasurePrice").text))
|
||||
for prod in prods:
|
||||
logging.info(
|
||||
(
|
||||
prod.find('ItemName').text[::-1],
|
||||
prod.find('ManufacturerName').text[::-1],
|
||||
prod.find('ItemPrice').text
|
||||
)
|
||||
)
|
||||
items_dict: Dict[str, Item] = create_items_dict(chain, store_id, load_xml)
|
||||
products_by_name = [item for item in items_dict.values() if product_name in item.name]
|
||||
products_by_name_sorted_by_price = sorted(products_by_name, key=lambda item: item.price_by_measure)
|
||||
|
||||
for prod in products_by_name_sorted_by_price:
|
||||
logging.info(prod)
|
||||
|
||||
|
||||
def get_float_from_tag(tag, int_tag) -> int:
|
||||
|
Reference in New Issue
Block a user