Changed create_items_dict function to included non-full prices file in the items dictionary.
Changed log_products_prices to work with an items dictionary and a __repr__ function of the Item class.
This commit is contained in:
51
utils.py
51
utils.py
@@ -4,9 +4,10 @@ import logging
|
|||||||
import zipfile
|
import zipfile
|
||||||
from argparse import ArgumentTypeError
|
from argparse import ArgumentTypeError
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import AnyStr, Dict
|
from typing import AnyStr, Dict, List
|
||||||
import requests
|
import requests
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
from bs4.element import Tag
|
||||||
from os import path
|
from os import path
|
||||||
|
|
||||||
from item import Item
|
from item import Item
|
||||||
@@ -31,8 +32,8 @@ def xml_file_gen(chain: SupermarketChain, store_id: int, category_name: str) ->
|
|||||||
return path.join(RAW_FILES_DIRNAME, f"{repr(type(chain))}-{category_name}{store_id_str}.xml")
|
return path.join(RAW_FILES_DIRNAME, f"{repr(type(chain))}-{category_name}{store_id_str}.xml")
|
||||||
|
|
||||||
|
|
||||||
def create_bs_object(xml_path: str, chain: SupermarketChain, store_id: int, load_xml: bool,
|
def create_bs_object(chain: SupermarketChain, store_id: int, category: SupermarketChain.XMLFilesCategory,
|
||||||
category: SupermarketChain.XMLFilesCategory) -> BeautifulSoup:
|
load_xml: bool, xml_path: str) -> BeautifulSoup:
|
||||||
"""
|
"""
|
||||||
This function creates a BeautifulSoup (BS) object according to the given parameters.
|
This function creates a BeautifulSoup (BS) object according to the given parameters.
|
||||||
In case the given load_xml is True and the XML file exists, the function creates the BS object from the given
|
In case the given load_xml is True and the XML file exists, the function creates the BS object from the given
|
||||||
@@ -47,12 +48,12 @@ def create_bs_object(xml_path: str, chain: SupermarketChain, store_id: int, load
|
|||||||
:return: A BeautifulSoup object with xml content.
|
:return: A BeautifulSoup object with xml content.
|
||||||
"""
|
"""
|
||||||
if load_xml and path.isfile(xml_path):
|
if load_xml and path.isfile(xml_path):
|
||||||
return create_bs_object_from_xml(xml_path)
|
return get_bs_object_from_xml(xml_path)
|
||||||
return create_bs_object_from_link(xml_path, chain, category, store_id)
|
return get_bs_object_from_link(chain, store_id, category, xml_path)
|
||||||
|
|
||||||
|
|
||||||
def create_bs_object_from_link(xml_path: str, chain: SupermarketChain, category: SupermarketChain.XMLFilesCategory,
|
def get_bs_object_from_link(chain: SupermarketChain, store_id: int, category: SupermarketChain.XMLFilesCategory,
|
||||||
store_id: int) -> BeautifulSoup:
|
xml_path: str) -> BeautifulSoup:
|
||||||
"""
|
"""
|
||||||
This function creates a BeautifulSoup (BS) object by generating a download link from Shufersal's API.
|
This function creates a BeautifulSoup (BS) object by generating a download link from Shufersal's API.
|
||||||
|
|
||||||
@@ -63,7 +64,7 @@ def create_bs_object_from_link(xml_path: str, chain: SupermarketChain, category:
|
|||||||
:return: A BeautifulSoup object with xml content.
|
:return: A BeautifulSoup object with xml content.
|
||||||
"""
|
"""
|
||||||
session = requests.Session()
|
session = requests.Session()
|
||||||
download_url: str = chain.get_download_url(store_id, category, session)
|
download_url = chain.get_download_url(store_id, category, session)
|
||||||
response_content = session.get(download_url).content
|
response_content = session.get(download_url).content
|
||||||
try:
|
try:
|
||||||
xml_content: AnyStr = gzip.decompress(response_content)
|
xml_content: AnyStr = gzip.decompress(response_content)
|
||||||
@@ -77,7 +78,7 @@ def create_bs_object_from_link(xml_path: str, chain: SupermarketChain, category:
|
|||||||
return BeautifulSoup(xml_content, features='xml')
|
return BeautifulSoup(xml_content, features='xml')
|
||||||
|
|
||||||
|
|
||||||
def create_bs_object_from_xml(xml_path: str) -> BeautifulSoup:
|
def get_bs_object_from_xml(xml_path: str) -> BeautifulSoup:
|
||||||
"""
|
"""
|
||||||
This function creates a BeautifulSoup (BS) object from a given XML file.
|
This function creates a BeautifulSoup (BS) object from a given XML file.
|
||||||
|
|
||||||
@@ -88,17 +89,23 @@ def create_bs_object_from_xml(xml_path: str) -> BeautifulSoup:
|
|||||||
return BeautifulSoup(f_in, features='xml')
|
return BeautifulSoup(f_in, features='xml')
|
||||||
|
|
||||||
|
|
||||||
def create_items_dict(chain: SupermarketChain, load_xml, store_id: int) -> Dict[str, Item]:
|
def create_items_dict(chain: SupermarketChain, store_id: int, load_xml) -> Dict[str, Item]:
|
||||||
"""
|
"""
|
||||||
This function creates a dictionary where every key is an item code and its value is its corresponding Item instance.
|
This function creates a dictionary where every key is an item code and its value is its corresponding Item instance.
|
||||||
|
We take both full and not full prices files, and assume that the no full is more updated (in case of overwriting).
|
||||||
|
|
||||||
:param chain: A given supermarket chain
|
:param chain: A given supermarket chain
|
||||||
:param load_xml: A boolean representing whether to load an existing prices xml file
|
:param load_xml: A boolean representing whether to load an existing prices xml file
|
||||||
:param store_id: A given store id
|
:param store_id: A given store id
|
||||||
"""
|
"""
|
||||||
xml_path: str = xml_file_gen(chain, store_id, chain.XMLFilesCategory.PricesFull.name)
|
items_dict = dict()
|
||||||
bs_prices: BeautifulSoup = create_bs_object(xml_path, chain, store_id, load_xml, chain.XMLFilesCategory.PricesFull)
|
for category in [chain.XMLFilesCategory.PricesFull, chain.XMLFilesCategory.Prices]:
|
||||||
return {item.find('ItemCode').text: chain.get_item_info(item) for item in bs_prices.find_all(chain.item_tag_name)}
|
xml_path: str = xml_file_gen(chain, store_id, category.name)
|
||||||
|
bs_prices: BeautifulSoup = create_bs_object(chain, store_id, category, load_xml, xml_path)
|
||||||
|
items_tags = bs_prices.find_all(chain.item_tag_name)
|
||||||
|
items_dict.update({item.find('ItemCode').text: chain.get_item_info(item) for item in items_tags})
|
||||||
|
|
||||||
|
return items_dict
|
||||||
|
|
||||||
|
|
||||||
def log_products_prices(chain: SupermarketChain, store_id: int, load_xml: bool, product_name: str) -> None:
|
def log_products_prices(chain: SupermarketChain, store_id: int, load_xml: bool, product_name: str) -> None:
|
||||||
@@ -110,18 +117,12 @@ def log_products_prices(chain: SupermarketChain, store_id: int, load_xml: bool,
|
|||||||
:param product_name: A given product name
|
:param product_name: A given product name
|
||||||
:param load_xml: A boolean representing whether to load an existing xml or load an already saved one
|
:param load_xml: A boolean representing whether to load an existing xml or load an already saved one
|
||||||
"""
|
"""
|
||||||
xml_path: str = xml_file_gen(chain, store_id, chain.XMLFilesCategory.PricesFull.name)
|
items_dict: Dict[str, Item] = create_items_dict(chain, store_id, load_xml)
|
||||||
bs_prices: BeautifulSoup = create_bs_object(xml_path, chain, store_id, load_xml, chain.XMLFilesCategory.PricesFull)
|
products_by_name = [item for item in items_dict.values() if product_name in item.name]
|
||||||
prods = [item for item in bs_prices.find_all("Item") if product_name in item.find("ItemName").text]
|
products_by_name_sorted_by_price = sorted(products_by_name, key=lambda item: item.price_by_measure)
|
||||||
prods.sort(key=lambda x: float(x.find("UnitOfMeasurePrice").text))
|
|
||||||
for prod in prods:
|
for prod in products_by_name_sorted_by_price:
|
||||||
logging.info(
|
logging.info(prod)
|
||||||
(
|
|
||||||
prod.find('ItemName').text[::-1],
|
|
||||||
prod.find('ManufacturerName').text[::-1],
|
|
||||||
prod.find('ItemPrice').text
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def get_float_from_tag(tag, int_tag) -> int:
|
def get_float_from_tag(tag, int_tag) -> int:
|
||||||
|
Reference in New Issue
Block a user