has added Co-Op chain. Extendability to other chain has significantly improved as well.
This commit is contained in:
97
utils.py
97
utils.py
@@ -1,78 +1,63 @@
|
||||
import gzip
|
||||
from enum import Enum
|
||||
from typing import Dict
|
||||
from typing import AnyStr, Dict
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from pathlib import Path
|
||||
from bs4.element import Tag
|
||||
from os import path
|
||||
from supermarket_chain import SupermarketChain
|
||||
import re
|
||||
|
||||
LOGS_DIRNAME = "logs"
|
||||
XMLS_DIRNAME = "xmls"
|
||||
Path(XMLS_DIRNAME).mkdir(exist_ok=True)
|
||||
|
||||
|
||||
class ShufersalCategories(Enum):
|
||||
All, Prices, PricesFull, Promos, PromosFull, Stores = range(6)
|
||||
|
||||
|
||||
def xml_file_gen(category_name: str, store_id: int) -> str:
|
||||
def xml_file_gen(chain: SupermarketChain, store_id: int, category_name: str) -> str:
|
||||
"""
|
||||
This function generate an xml filename given a store id and a category_name
|
||||
This function generate an XML filename given a store id and a category_name
|
||||
If the given store_id is invalid, it is ignored in the returned string.
|
||||
|
||||
:param chain: A given supermarket chain
|
||||
:param store_id: A given store_id
|
||||
:param category_name: A given category name
|
||||
:return: An xml filename
|
||||
"""
|
||||
store_id_str = f"-{str(store_id)}" if is_valid_store_id(store_id) else ""
|
||||
return path.join(XMLS_DIRNAME, f"{category_name}{store_id_str}.xml")
|
||||
store_id_str: str = f"-{str(store_id)}" if SupermarketChain.is_valid_store_id(store_id) else ""
|
||||
return path.join(XMLS_DIRNAME, f"{chain}-{category_name}{store_id_str}.xml")
|
||||
|
||||
|
||||
def get_download_url(store_id: int, cat_id: int) -> str:
|
||||
"""
|
||||
This function scrapes Shufersal's website and returns a url that contains the data for a given store and category.
|
||||
For info about the categories, see ShufersalCategories.
|
||||
|
||||
:param store_id: A given id of a store
|
||||
:param cat_id: A given id of a category
|
||||
:return: A downloadable link of the data for a given store and category
|
||||
"""
|
||||
url = f"http://prices.shufersal.co.il/FileObject/UpdateCategory?catID={cat_id}"
|
||||
if is_valid_store_id(store_id):
|
||||
url += f"&storeId={store_id}"
|
||||
req_res = requests.get(url)
|
||||
soup = BeautifulSoup(req_res.text, features='lxml')
|
||||
return soup.find('a', text="לחץ להורדה")['href']
|
||||
|
||||
|
||||
def create_bs_object(xml_path: str, cat_id: int, store_id: int, load_xml: bool) -> BeautifulSoup:
|
||||
def create_bs_object(xml_path: str, chain: SupermarketChain, store_id: int, load_xml: bool,
|
||||
category: SupermarketChain.XMLFilesCategory) -> BeautifulSoup:
|
||||
"""
|
||||
This function creates a BeautifulSoup (BS) object according to the given parameters.
|
||||
In case the given load_xml is True and the XML file exists, the function creates the BS object from the given
|
||||
xml_path, otherwise it uses Shufersal's APIs to download the xml with the relevant content and saves it for
|
||||
future use.
|
||||
|
||||
:param chain: A given supermarket chain
|
||||
:param xml_path: A given path to an xml file to load/save the BS object from/to.
|
||||
:param cat_id: A given id of a category from ShufersalCategories
|
||||
:param category: A given category
|
||||
:param store_id: A given id of a store
|
||||
:param load_xml: A flag representing whether to try loading an existing XML file
|
||||
:return: A BeautifulSoup object with xml content.
|
||||
"""
|
||||
if load_xml and path.isfile(xml_path):
|
||||
return create_bs_object_from_xml(xml_path)
|
||||
return create_bs_object_from_link(xml_path, store_id, cat_id)
|
||||
return create_bs_object_from_link(xml_path, chain, category, store_id)
|
||||
|
||||
|
||||
def create_bs_object_from_link(xml_path: str, store_id: int, cat_id: int) -> BeautifulSoup:
|
||||
def create_bs_object_from_link(xml_path: str, chain: SupermarketChain, category: SupermarketChain.XMLFilesCategory,
|
||||
store_id: int) -> BeautifulSoup:
|
||||
"""
|
||||
This function creates a BeautifulSoup (BS) object by generating a download link from Shufersal's API.
|
||||
|
||||
:param xml_path: A given path to an xml file to load/save the BS object from/to.
|
||||
:param chain: A given supermarket chain
|
||||
:param xml_path: A given path to an XML file to load/save the BS object from/to.
|
||||
:param store_id: A given id of a store
|
||||
:param cat_id: A given id of a category from ShufersalCategories
|
||||
:param category: A given category
|
||||
:return: A BeautifulSoup object with xml content.
|
||||
"""
|
||||
download_url = get_download_url(store_id, cat_id)
|
||||
xml_content = gzip.decompress(requests.get(download_url).content)
|
||||
download_url: str = chain.get_download_url(store_id, category)
|
||||
xml_content: AnyStr = gzip.decompress(requests.get(download_url).content)
|
||||
with open(xml_path, 'wb') as f_out:
|
||||
f_out.write(xml_content)
|
||||
return BeautifulSoup(xml_content, features='xml')
|
||||
@@ -80,7 +65,7 @@ def create_bs_object_from_link(xml_path: str, store_id: int, cat_id: int) -> Bea
|
||||
|
||||
def create_bs_object_from_xml(xml_path: str) -> BeautifulSoup:
|
||||
"""
|
||||
This function creates a BeautifulSoup (BS) object from a given xml file.
|
||||
This function creates a BeautifulSoup (BS) object from a given XML file.
|
||||
|
||||
:param xml_path: A given path to an xml file to load/save the BS object from/to.
|
||||
:return: A BeautifulSoup object with xml content.
|
||||
@@ -89,39 +74,41 @@ def create_bs_object_from_xml(xml_path: str) -> BeautifulSoup:
|
||||
return BeautifulSoup(f_in, features='xml')
|
||||
|
||||
|
||||
def create_items_dict(store_id: int, load_xml) -> Dict:
|
||||
def create_items_dict(chain: SupermarketChain, load_xml, store_id: int) -> Dict[str, str]:
|
||||
"""
|
||||
This function creates a dictionary where every key is an item code and its value is the item's name and price.
|
||||
|
||||
:param chain: A given supermarket chain
|
||||
:param load_xml: A boolean representing whether to load an existing prices xml file
|
||||
:param store_id: A given store id
|
||||
:return: A dictionary where the firs
|
||||
"""
|
||||
xml_path = xml_file_gen(ShufersalCategories.PricesFull.name, store_id)
|
||||
bs_prices = create_bs_object(xml_path, ShufersalCategories.PricesFull.value, store_id, load_xml)
|
||||
return {item.find('ItemCode').text: get_item_info(item) for item in bs_prices.find_all('Item')}
|
||||
xml_path: str = xml_file_gen(chain, store_id, chain.XMLFilesCategory.PricesFull.name)
|
||||
bs_prices: BeautifulSoup = create_bs_object(xml_path, chain, store_id, load_xml, chain.XMLFilesCategory.PricesFull)
|
||||
return {item.find('ItemCode').text: get_item_info(item) for item in bs_prices.find_all(chain.item_tag_name)}
|
||||
|
||||
|
||||
def get_item_info(item):
|
||||
return str((item.find('ItemName').text, item.find('ManufacturerName').text, item.find('ItemPrice').text))
|
||||
|
||||
|
||||
def get_products_prices(store_id: int, product_name: str, load_xml: bool):
|
||||
def get_item_info(item: Tag) -> str:
|
||||
"""
|
||||
This function prints the products in a given Shufersal store which contains a given product_name.
|
||||
This function returns a string containing important information about a given supermarket's product.
|
||||
"""
|
||||
return [item.find('ItemName').text, item.find(re.compile(r'Manufacture[r]?Name')).text,
|
||||
item.find('ItemPrice').text, item.find('ItemCode').text]
|
||||
|
||||
:param store_id: A given Shufersal store id
|
||||
|
||||
def get_products_prices(chain: SupermarketChain, store_id: int, load_xml: bool, product_name: str) -> None:
|
||||
"""
|
||||
This function prints the products in a given store which contains a given product_name.
|
||||
|
||||
:param chain: A given supermarket chain
|
||||
:param store_id: A given store id
|
||||
:param product_name: A given product name
|
||||
:param load_xml: A boolean representing whether to load an existing xml or load an already saved one
|
||||
"""
|
||||
xml_path = xml_file_gen(ShufersalCategories.PricesFull.name, store_id)
|
||||
bs_prices = create_bs_object(xml_path, ShufersalCategories.PricesFull.value, store_id, load_xml)
|
||||
xml_path: str = xml_file_gen(chain, store_id, chain.XMLFilesCategory.PricesFull.name)
|
||||
bs_prices: BeautifulSoup = create_bs_object(xml_path, chain, store_id, load_xml, chain.XMLFilesCategory.PricesFull)
|
||||
prods = [item for item in bs_prices.find_all("Item") if product_name in item.find("ItemName").text]
|
||||
prods.sort(key=lambda x: float(x.find("UnitOfMeasurePrice").text))
|
||||
for prod in prods:
|
||||
print((prod.find('ItemName').text[::-1], prod.find('ManufacturerName').text[::-1],
|
||||
prod.find('ItemPrice').text))
|
||||
|
||||
|
||||
def is_valid_store_id(store_id: int):
|
||||
return isinstance(store_id, int) and store_id >= 0
|
||||
|
Reference in New Issue
Block a user