diff --git a/co_op.py b/co_op.py index c20ae82..eba45e9 100644 --- a/co_op.py +++ b/co_op.py @@ -19,7 +19,7 @@ class CoOp(SupermarketChain): req_res: requests.Response = requests.get(url) soup = BeautifulSoup(req_res.text, features='lxml') suffix: str = soup.find('a', href=lambda value: value and category.name.replace('s', '') in value - and f'-{store_id}-20' in value).attrs['href'] + and f'-{store_id:03d}-20' in value).attrs['href'] down_url = prefix + suffix print(down_url) return down_url diff --git a/main.py b/main.py index da4f68f..3b5319d 100644 --- a/main.py +++ b/main.py @@ -6,8 +6,10 @@ from utils import LOGS_DIRNAME, XMLS_DIRNAME, get_products_prices from supermarket_chain import SupermarketChain from shufersal import ShuferSal from co_op import CoOp +from zol_vebegadol import ZolVebegadol from pathlib import Path + # TODO: fix problem of left-to-right printing Path(LOGS_DIRNAME).mkdir(exist_ok=True) @@ -16,6 +18,7 @@ Path(XMLS_DIRNAME).mkdir(exist_ok=True) chain_dict = { 'Shufersal': ShuferSal(), 'Co-Op': CoOp(), + 'Zol-Vebegadol': ZolVebegadol() } if __name__ == '__main__': @@ -57,7 +60,7 @@ if __name__ == '__main__': parser.add_argument('--chain', required=True, help='The name of the requested chain', - choices=['Shufersal', 'Co-Op'], + choices=chain_dict.keys(), ) args = parser.parse_args() @@ -83,3 +86,14 @@ if __name__ == '__main__': arg_store_id = int(args.find_promos_by_name[0]) get_promos_by_name(store_id=arg_store_id, chain=chain, promo_name=args.find_promos_by_name[1], load_prices=args.load_prices, load_promos=args.load_promos) + + +# Script for Shufersal: +# store_ids = get_all_deals(chain) +# print(store_ids) +# # store_ids = [133, 234, 73, 62, 607, 610, 111, 219, 81, 606, 609, 295, 349, 496, 611, 812, 608, 300] +# null_items_lists = list() +# for store_id in store_ids[::-1]: +# print(store_id) +# null_items_lists.append(get_all_null_items_in_promos(chain, store_id)) +# print(setintersection(*[set(list) for list in null_items_lists])) diff --git a/promotion.py b/promotion.py index c5f7b3b..c1e9037 100644 --- a/promotion.py +++ b/promotion.py @@ -1,6 +1,5 @@ from datetime import datetime from typing import Dict, List -from bs4 import BeautifulSoup from utils import ( create_items_dict, @@ -33,12 +32,15 @@ class Promotion: return '\n'.join([title, dates_range, update_line, items]) + '\n' def repr_ltr(self): - title = self.content[::-1] + title = self.content dates_range = f"Between {self.start_date} and {self.end_date}" update_line = f"Updated at {self.update_date}" - items = '\n'.join(str(item)[::-1] for item in self.items) + items = '\n'.join(str(item) for item in self.items) return '\n'.join([title, dates_range, update_line, items]) + '\n' + def __eq__(self, other): + return self.content == other.content and self.start_date == other.start_date and self.end_date == other.end_date + def get_available_promos(chain: SupermarketChain, store_id: int, load_prices: bool, load_promos) -> List[Promotion]: """ @@ -67,7 +69,10 @@ def get_available_promos(chain: SupermarketChain, store_id: int, load_prices: bo items=chain.get_items(promo, items_dict), ) if is_valid_promo(promo): - promo_objs.append(promo) + if promo_objs and promo_objs[-1] == promo: # Merge equal promos + promo_objs[-1].items.extend(promo.items) + else: + promo_objs.append(promo) return promo_objs @@ -97,7 +102,7 @@ def main_latest_promos(store_id: int, load_xml: bool, logger, chain: Supermarket """ promotions: List[Promotion] = get_available_promos(chain, store_id, load_xml, False) - promotions.sort(key=lambda promo: max(promo.update_date, promo.start_date), reverse=True) + promotions.sort(key=lambda promo: (max(promo.update_date, promo.start_date), promo.start_date), reverse=True) logger.info('\n'.join(str(promotion) for promotion in promotions)) @@ -115,3 +120,15 @@ def get_promos_by_name(store_id: int, chain: SupermarketChain, promo_name: str, for promo in promotions: if promo_name in promo.content: print(promo.repr_ltr()) + + +def get_all_null_items_in_promos(chain, store_id): + items_dict: Dict[str, str] = create_items_dict(chain, True, store_id) + xml_path: str = xml_file_gen(chain, store_id, chain.XMLFilesCategory.PromosFull.name) + bs_promos = create_bs_object(xml_path, chain, store_id, True, chain.XMLFilesCategory.PromosFull) + + null_items = list() + for promo in bs_promos.find_all(chain.promotion_tag_name): + null_items.extend(chain.get_null_items(promo, items_dict)) + + return null_items diff --git a/shufersal.py b/shufersal.py index bb885a0..d3bd0f7 100644 --- a/shufersal.py +++ b/shufersal.py @@ -15,11 +15,13 @@ class ShuferSal(SupermarketChain): @staticmethod def get_download_url(store_id: int, category: SupermarketChain.XMLFilesCategory) -> str: url = f"http://prices.shufersal.co.il/FileObject/UpdateCategory?catID={category.value}" - if SupermarketChain.is_valid_store_id(store_id): + if SupermarketChain.is_valid_store_id(int(store_id)): url += f"&storeId={store_id}" req_res: requests.Response = requests.get(url) soup = BeautifulSoup(req_res.text, features='lxml') - return soup.find('a', text="לחץ להורדה")['href'] + down_url = soup.find('a', text="לחץ להורדה")['href'] + print(down_url) + return down_url class XMLFilesCategory(SupermarketChain.XMLFilesCategory): All, Prices, PricesFull, Promos, PromosFull, Stores = range(6) @@ -29,5 +31,10 @@ class ShuferSal(SupermarketChain): @staticmethod def get_items(promo: Tag, items_dict: Dict[str, str]) -> List[str]: - return [items_dict.get(item.find('ItemCode').text) for item in promo.find_all('Item') - if items_dict.get(item.find('ItemCode').text)] + items = list() + for item in promo.find_all('Item'): + item_code = item.find('ItemCode').text + full_item_info = items_dict.get(item_code) + if full_item_info: + items.append(full_item_info) + return items diff --git a/store_utils.py b/store_utils.py index 69d98d4..92d5e7c 100644 --- a/store_utils.py +++ b/store_utils.py @@ -18,3 +18,11 @@ def get_store_id(city: str, load_xml: bool, chain: SupermarketChain): for store in bs_stores.find_all("STORE"): if store.find("CITY").text == city: print((store.find("ADDRESS").text, store.find("STOREID").text, store.find("SUBCHAINNAME").text)) + + +def get_all_deals(chain): + xml_path: str = xml_file_gen(chain, -1, chain.XMLFilesCategory.Stores.name) + bs_stores: BeautifulSoup = create_bs_object(xml_path, chain, -1, True, chain.XMLFilesCategory.Stores) + + return [int(store.find("STOREID").text) for store in bs_stores.find_all("STORE") if store.find("SUBCHAINID").text + == "2"] diff --git a/supermarket_chain.py b/supermarket_chain.py index 3adf809..0b0cafc 100644 --- a/supermarket_chain.py +++ b/supermarket_chain.py @@ -9,6 +9,8 @@ class SupermarketChain: """ A class representing a supermarket chain. """ + + @abstractmethod class XMLFilesCategory(Enum): """ An enum class of different XML files produced by a supermarket chain @@ -78,3 +80,12 @@ class SupermarketChain: :param items_dict: A given dictionary of products """ pass + + @staticmethod + def get_null_items(promo: Tag, items_dict: Dict[str, str]) -> List[str]: + return [item.find('ItemCode').text for item in promo.find_all('Item') + if not items_dict.get(item.find('ItemCode').text)] + + @abstractmethod + def __repr__(self): + pass diff --git a/utils.py b/utils.py index 52c2ed9..e897a6e 100644 --- a/utils.py +++ b/utils.py @@ -1,5 +1,7 @@ import gzip -from typing import AnyStr, Dict +import io +import zipfile +from typing import AnyStr, Dict, List import requests from bs4 import BeautifulSoup from bs4.element import Tag @@ -57,7 +59,14 @@ def create_bs_object_from_link(xml_path: str, chain: SupermarketChain, category: :return: A BeautifulSoup object with xml content. """ download_url: str = chain.get_download_url(store_id, category) - xml_content: AnyStr = gzip.decompress(requests.get(download_url).content) + response_content = requests.get(download_url).content + try: + xml_content: AnyStr = gzip.decompress(response_content) + except gzip.BadGzipFile: + with zipfile.ZipFile(io.BytesIO(response_content)) as the_zip: + zip_info = the_zip.infolist()[0] + with the_zip.open(zip_info) as the_file: + xml_content = the_file.read() with open(xml_path, 'wb') as f_out: f_out.write(xml_content) return BeautifulSoup(xml_content, features='xml') @@ -88,12 +97,12 @@ def create_items_dict(chain: SupermarketChain, load_xml, store_id: int) -> Dict[ return {item.find('ItemCode').text: get_item_info(item) for item in bs_prices.find_all(chain.item_tag_name)} -def get_item_info(item: Tag) -> str: +def get_item_info(item: Tag) -> List[str]: """ This function returns a string containing important information about a given supermarket's product. """ - return [item.find('ItemName').text, item.find(re.compile(r'Manufacture[r]?Name')).text, - item.find('ItemPrice').text, item.find('ItemCode').text] + return [item.find(re.compile(r'ItemN[a]?m[e]?')).text, item.find(re.compile(r'Manufacture[r]?Name')).text, + item.find('ItemPrice').text, item.find('ItemCode').text] def get_products_prices(chain: SupermarketChain, store_id: int, load_xml: bool, product_name: str) -> None: diff --git a/zol_vebegadol.py b/zol_vebegadol.py new file mode 100644 index 0000000..eb86bb0 --- /dev/null +++ b/zol_vebegadol.py @@ -0,0 +1,42 @@ +import json +from typing import Dict, List +import requests +from bs4.element import Tag + +from supermarket_chain import SupermarketChain + + +class ZolVebegadol(SupermarketChain): + def __repr__(self): + return 'Zol-Vebegadol' + + class XMLFilesCategory(SupermarketChain.XMLFilesCategory): + All, Promos, PromosFull, Prices, PricesFull, Stores = range(6) + + promotion_tag_name = 'Promotion' + promotion_update_tag_name = 'PromotionUpdateDate' + date_format = '%Y-%m-%d' + date_hour_format = '%Y-%m-%d %H:%M:%S' + item_tag_name = 'Item' + + @staticmethod + def get_download_url(store_id: int, category: SupermarketChain.XMLFilesCategory) -> str: + prefix = "http://zolvebegadol.binaprojects.com" + url = prefix + "/MainIO_Hok.aspx" + req_res: requests.Response = requests.get(url) + jsons_files = json.loads(req_res.text) + suffix = next(cur_json["FileNm"] for cur_json in jsons_files if f'-{store_id:03d}-20' in cur_json["FileNm"] + and category.name.replace('s', '') in cur_json["FileNm"]) + down_url = '/'.join([prefix, "Download", suffix]) + print(down_url) + return down_url + + @staticmethod + def get_items(promo: Tag, items_dict: Dict[str, str]) -> List[str]: + items = list() + for item in promo.find_all('Item'): + item_code = item.find('ItemCode').text + full_item_info = items_dict.get(item_code) + if full_item_info: + items.append(full_item_info) + return items