diff --git a/chains/yeinot_bitan.py b/chains/yeinot_bitan.py index e69de29..a52d2d3 100644 --- a/chains/yeinot_bitan.py +++ b/chains/yeinot_bitan.py @@ -0,0 +1,35 @@ +import re +from datetime import datetime + +import numpy as np +import requests +from bs4 import BeautifulSoup + +from supermarket_chain import SupermarketChain + + +class YeinotBitan(SupermarketChain): + _date_hour_format = "%Y-%m-%d %H:%M:%S" + + @staticmethod + def get_download_url_or_path( + store_id: int, + category: SupermarketChain.XMLFilesCategory, + session: requests.Session, + ) -> str: + today_date_suffix = datetime.today().date().strftime("%Y%m%d") + url = f"http://publishprice.ybitan.co.il/{today_date_suffix}/" + req_res = requests.get(url) + soup = BeautifulSoup(req_res.text, features="lxml") + promo_tags = soup.findAll( + "a", + attrs={ + "href": re.compile( + rf"^{category.name.replace('s', '')}.*-{store_id:04d}-" + ) + }, + ) + most_recent_tag_ind = np.argmax( + [int(promo_tag["href"][-7:-3]) for promo_tag in promo_tags] + ) + return url + promo_tags[most_recent_tag_ind]["href"] diff --git a/main.py b/main.py index af5c803..e597cd2 100644 --- a/main.py +++ b/main.py @@ -24,6 +24,7 @@ from chains.shuk_hayir import ShukHayir from chains.stop_market import StopMarket from chains.tiv_taam import TivTaam from chains.victory import Victory +from chains.yeinot_bitan import YeinotBitan from chains.yohananof import Yohananof from chains.zol_vebegadol import ZolVebegadol from promotion import main_latest_promos, log_promos_by_name, get_all_prices @@ -58,6 +59,7 @@ CHAINS_LIST = [ ShukHayir, KingStore, ShefaBirkatHashem, + YeinotBitan, ] Path(RESULTS_DIRNAME).mkdir(exist_ok=True) Path(RAW_FILES_DIRNAME).mkdir(exist_ok=True) diff --git a/promotion.py b/promotion.py index 947524e..a7deb87 100644 --- a/promotion.py +++ b/promotion.py @@ -229,6 +229,8 @@ def get_available_promos( continue promo_inst = create_new_promo_instance(chain, items_dict, promo, promotion_id) + if len(promo_inst.items) > 1000: # Too many items -> probably illegal promotion + continue if promo_inst: promo_objs.append(promo_inst) @@ -405,17 +407,22 @@ def get_all_prices( promo_obj = None for promo in tqdm(promo_tags, desc="creating_promotions"): promotion_id = int(promo.find(re.compile("PromotionId", re.IGNORECASE)).text) + if promo_obj is None or promo_obj.promotion_id != promotion_id: promo_obj = create_new_promo_instance( chain, items_dict, promo, promotion_id ) - for item in promo.find_all("Item"): - item_code = item.find("ItemCode").text - cur_item = items_dict.get(item_code) - if cur_item is not None: - discounted_price = promo_obj.promo_func(cur_item) - if cur_item.price > discounted_price: - cur_item.final_price = discounted_price + if promo_obj.club_id == ClubID.REGULAR: + promo_items = promo.find_all("Item") + if len(promo_items) > 1000: # Too many items -> probably illegal promotion + continue + for item in promo_items: + item_code = item.find("ItemCode").text + cur_item = items_dict.get(item_code) + if cur_item is not None: + discounted_price = promo_obj.promo_func(cur_item) + if cur_item.price > discounted_price: + cur_item.final_price = discounted_price return items_dict diff --git a/tests/test_scraping.py b/tests/test_scraping.py index 2929e65..f20665a 100644 --- a/tests/test_scraping.py +++ b/tests/test_scraping.py @@ -14,6 +14,7 @@ from chains.keshet import Keshet from chains.shuk_hayir import ShukHayir from chains.stop_market import StopMarket from chains.tiv_taam import TivTaam +from chains.yeinot_bitan import YeinotBitan from chains.zol_vebegadol import ZolVebegadol from main import CHAINS_DICT from promotion import PROMOTION_COLS_NUM, main_latest_promos @@ -32,7 +33,6 @@ def test_searching_for_download_urls(chain_tuple): Test that get_download_url of each chain returns the correct download url for each category in every chain. """ chain_name, chain = chain_tuple - # for chain_name, chain in tqdm(chain_dict.items(), desc='chains'): logging.info(f"Checking download urls in chain {chain_name}") store_id: int = valid_store_id_by_chain(chain_name) @@ -118,6 +118,8 @@ def valid_store_id_by_chain(chain_name) -> int: store_id = 4 elif chain_name in [repr(StopMarket), repr(Keshet)]: store_id = 5 + elif chain_name == repr(YeinotBitan): + store_id = 3700 else: store_id = 1 return store_id