Changed create_items_dict function to included non-full prices file in the items dictionary.
Changed log_products_prices to work with an items dictionary and a __repr__ function of the Item class.
This commit is contained in:
		
							
								
								
									
										51
									
								
								utils.py
									
									
									
									
									
								
							
							
						
						
									
										51
									
								
								utils.py
									
									
									
									
									
								
							| @@ -4,9 +4,10 @@ import logging | |||||||
| import zipfile | import zipfile | ||||||
| from argparse import ArgumentTypeError | from argparse import ArgumentTypeError | ||||||
| from datetime import datetime | from datetime import datetime | ||||||
| from typing import AnyStr, Dict | from typing import AnyStr, Dict, List | ||||||
| import requests | import requests | ||||||
| from bs4 import BeautifulSoup | from bs4 import BeautifulSoup | ||||||
|  | from bs4.element import Tag | ||||||
| from os import path | from os import path | ||||||
|  |  | ||||||
| from item import Item | from item import Item | ||||||
| @@ -31,8 +32,8 @@ def xml_file_gen(chain: SupermarketChain, store_id: int, category_name: str) -> | |||||||
|     return path.join(RAW_FILES_DIRNAME, f"{repr(type(chain))}-{category_name}{store_id_str}.xml") |     return path.join(RAW_FILES_DIRNAME, f"{repr(type(chain))}-{category_name}{store_id_str}.xml") | ||||||
|  |  | ||||||
|  |  | ||||||
| def create_bs_object(xml_path: str, chain: SupermarketChain, store_id: int, load_xml: bool, | def create_bs_object(chain: SupermarketChain, store_id: int, category: SupermarketChain.XMLFilesCategory, | ||||||
|                      category: SupermarketChain.XMLFilesCategory) -> BeautifulSoup: |                      load_xml: bool, xml_path: str) -> BeautifulSoup: | ||||||
|     """ |     """ | ||||||
|     This function creates a BeautifulSoup (BS) object according to the given parameters. |     This function creates a BeautifulSoup (BS) object according to the given parameters. | ||||||
|     In case the given load_xml is True and the XML file exists, the function creates the BS object from the given |     In case the given load_xml is True and the XML file exists, the function creates the BS object from the given | ||||||
| @@ -47,12 +48,12 @@ def create_bs_object(xml_path: str, chain: SupermarketChain, store_id: int, load | |||||||
|     :return: A BeautifulSoup object with xml content. |     :return: A BeautifulSoup object with xml content. | ||||||
|     """ |     """ | ||||||
|     if load_xml and path.isfile(xml_path): |     if load_xml and path.isfile(xml_path): | ||||||
|         return create_bs_object_from_xml(xml_path) |         return get_bs_object_from_xml(xml_path) | ||||||
|     return create_bs_object_from_link(xml_path, chain, category, store_id) |     return get_bs_object_from_link(chain, store_id, category, xml_path) | ||||||
|  |  | ||||||
|  |  | ||||||
| def create_bs_object_from_link(xml_path: str, chain: SupermarketChain, category: SupermarketChain.XMLFilesCategory, | def get_bs_object_from_link(chain: SupermarketChain, store_id: int, category: SupermarketChain.XMLFilesCategory, | ||||||
|                                store_id: int) -> BeautifulSoup: |                             xml_path: str) -> BeautifulSoup: | ||||||
|     """ |     """ | ||||||
|     This function creates a BeautifulSoup (BS) object by generating a download link from Shufersal's API. |     This function creates a BeautifulSoup (BS) object by generating a download link from Shufersal's API. | ||||||
|  |  | ||||||
| @@ -63,7 +64,7 @@ def create_bs_object_from_link(xml_path: str, chain: SupermarketChain, category: | |||||||
|     :return: A BeautifulSoup object with xml content. |     :return: A BeautifulSoup object with xml content. | ||||||
|     """ |     """ | ||||||
|     session = requests.Session() |     session = requests.Session() | ||||||
|     download_url: str = chain.get_download_url(store_id, category, session) |     download_url = chain.get_download_url(store_id, category, session) | ||||||
|     response_content = session.get(download_url).content |     response_content = session.get(download_url).content | ||||||
|     try: |     try: | ||||||
|         xml_content: AnyStr = gzip.decompress(response_content) |         xml_content: AnyStr = gzip.decompress(response_content) | ||||||
| @@ -77,7 +78,7 @@ def create_bs_object_from_link(xml_path: str, chain: SupermarketChain, category: | |||||||
|     return BeautifulSoup(xml_content, features='xml') |     return BeautifulSoup(xml_content, features='xml') | ||||||
|  |  | ||||||
|  |  | ||||||
| def create_bs_object_from_xml(xml_path: str) -> BeautifulSoup: | def get_bs_object_from_xml(xml_path: str) -> BeautifulSoup: | ||||||
|     """ |     """ | ||||||
|     This function creates a BeautifulSoup (BS) object from a given XML file. |     This function creates a BeautifulSoup (BS) object from a given XML file. | ||||||
|  |  | ||||||
| @@ -88,17 +89,23 @@ def create_bs_object_from_xml(xml_path: str) -> BeautifulSoup: | |||||||
|         return BeautifulSoup(f_in, features='xml') |         return BeautifulSoup(f_in, features='xml') | ||||||
|  |  | ||||||
|  |  | ||||||
| def create_items_dict(chain: SupermarketChain, load_xml, store_id: int) -> Dict[str, Item]: | def create_items_dict(chain: SupermarketChain, store_id: int, load_xml) -> Dict[str, Item]: | ||||||
|     """ |     """ | ||||||
|     This function creates a dictionary where every key is an item code and its value is its corresponding Item instance. |     This function creates a dictionary where every key is an item code and its value is its corresponding Item instance. | ||||||
|  |     We take both full and not full prices files, and assume that the no full is more updated (in case of overwriting). | ||||||
|  |  | ||||||
|     :param chain: A given supermarket chain |     :param chain: A given supermarket chain | ||||||
|     :param load_xml: A boolean representing whether to load an existing prices xml file |     :param load_xml: A boolean representing whether to load an existing prices xml file | ||||||
|     :param store_id: A given store id |     :param store_id: A given store id | ||||||
|     """ |     """ | ||||||
|     xml_path: str = xml_file_gen(chain, store_id, chain.XMLFilesCategory.PricesFull.name) |     items_dict = dict() | ||||||
|     bs_prices: BeautifulSoup = create_bs_object(xml_path, chain, store_id, load_xml, chain.XMLFilesCategory.PricesFull) |     for category in [chain.XMLFilesCategory.PricesFull, chain.XMLFilesCategory.Prices]: | ||||||
|     return {item.find('ItemCode').text: chain.get_item_info(item) for item in bs_prices.find_all(chain.item_tag_name)} |         xml_path: str = xml_file_gen(chain, store_id, category.name) | ||||||
|  |         bs_prices: BeautifulSoup = create_bs_object(chain, store_id, category, load_xml, xml_path) | ||||||
|  |         items_tags = bs_prices.find_all(chain.item_tag_name) | ||||||
|  |         items_dict.update({item.find('ItemCode').text: chain.get_item_info(item) for item in items_tags}) | ||||||
|  |  | ||||||
|  |     return items_dict | ||||||
|  |  | ||||||
|  |  | ||||||
| def log_products_prices(chain: SupermarketChain, store_id: int, load_xml: bool, product_name: str) -> None: | def log_products_prices(chain: SupermarketChain, store_id: int, load_xml: bool, product_name: str) -> None: | ||||||
| @@ -110,18 +117,12 @@ def log_products_prices(chain: SupermarketChain, store_id: int, load_xml: bool, | |||||||
|     :param product_name: A given product name |     :param product_name: A given product name | ||||||
|     :param load_xml: A boolean representing whether to load an existing xml or load an already saved one |     :param load_xml: A boolean representing whether to load an existing xml or load an already saved one | ||||||
|     """ |     """ | ||||||
|     xml_path: str = xml_file_gen(chain, store_id, chain.XMLFilesCategory.PricesFull.name) |     items_dict: Dict[str, Item] = create_items_dict(chain, store_id, load_xml) | ||||||
|     bs_prices: BeautifulSoup = create_bs_object(xml_path, chain, store_id, load_xml, chain.XMLFilesCategory.PricesFull) |     products_by_name = [item for item in items_dict.values() if product_name in item.name] | ||||||
|     prods = [item for item in bs_prices.find_all("Item") if product_name in item.find("ItemName").text] |     products_by_name_sorted_by_price = sorted(products_by_name, key=lambda item: item.price_by_measure) | ||||||
|     prods.sort(key=lambda x: float(x.find("UnitOfMeasurePrice").text)) |  | ||||||
|     for prod in prods: |     for prod in products_by_name_sorted_by_price: | ||||||
|         logging.info( |         logging.info(prod) | ||||||
|             ( |  | ||||||
|                 prod.find('ItemName').text[::-1], |  | ||||||
|                 prod.find('ManufacturerName').text[::-1], |  | ||||||
|                 prod.find('ItemPrice').text |  | ||||||
|             ) |  | ||||||
|         ) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def get_float_from_tag(tag, int_tag) -> int: | def get_float_from_tag(tag, int_tag) -> int: | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user