Added the chain Yeinot Bitan (also to tests).
Changed price with promos to include only regular promotions. Added filtering of promotions including too many items.
This commit is contained in:
@@ -0,0 +1,35 @@
|
|||||||
|
import re
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
from supermarket_chain import SupermarketChain
|
||||||
|
|
||||||
|
|
||||||
|
class YeinotBitan(SupermarketChain):
|
||||||
|
_date_hour_format = "%Y-%m-%d %H:%M:%S"
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_download_url_or_path(
|
||||||
|
store_id: int,
|
||||||
|
category: SupermarketChain.XMLFilesCategory,
|
||||||
|
session: requests.Session,
|
||||||
|
) -> str:
|
||||||
|
today_date_suffix = datetime.today().date().strftime("%Y%m%d")
|
||||||
|
url = f"http://publishprice.ybitan.co.il/{today_date_suffix}/"
|
||||||
|
req_res = requests.get(url)
|
||||||
|
soup = BeautifulSoup(req_res.text, features="lxml")
|
||||||
|
promo_tags = soup.findAll(
|
||||||
|
"a",
|
||||||
|
attrs={
|
||||||
|
"href": re.compile(
|
||||||
|
rf"^{category.name.replace('s', '')}.*-{store_id:04d}-"
|
||||||
|
)
|
||||||
|
},
|
||||||
|
)
|
||||||
|
most_recent_tag_ind = np.argmax(
|
||||||
|
[int(promo_tag["href"][-7:-3]) for promo_tag in promo_tags]
|
||||||
|
)
|
||||||
|
return url + promo_tags[most_recent_tag_ind]["href"]
|
||||||
|
2
main.py
2
main.py
@@ -24,6 +24,7 @@ from chains.shuk_hayir import ShukHayir
|
|||||||
from chains.stop_market import StopMarket
|
from chains.stop_market import StopMarket
|
||||||
from chains.tiv_taam import TivTaam
|
from chains.tiv_taam import TivTaam
|
||||||
from chains.victory import Victory
|
from chains.victory import Victory
|
||||||
|
from chains.yeinot_bitan import YeinotBitan
|
||||||
from chains.yohananof import Yohananof
|
from chains.yohananof import Yohananof
|
||||||
from chains.zol_vebegadol import ZolVebegadol
|
from chains.zol_vebegadol import ZolVebegadol
|
||||||
from promotion import main_latest_promos, log_promos_by_name, get_all_prices
|
from promotion import main_latest_promos, log_promos_by_name, get_all_prices
|
||||||
@@ -58,6 +59,7 @@ CHAINS_LIST = [
|
|||||||
ShukHayir,
|
ShukHayir,
|
||||||
KingStore,
|
KingStore,
|
||||||
ShefaBirkatHashem,
|
ShefaBirkatHashem,
|
||||||
|
YeinotBitan,
|
||||||
]
|
]
|
||||||
Path(RESULTS_DIRNAME).mkdir(exist_ok=True)
|
Path(RESULTS_DIRNAME).mkdir(exist_ok=True)
|
||||||
Path(RAW_FILES_DIRNAME).mkdir(exist_ok=True)
|
Path(RAW_FILES_DIRNAME).mkdir(exist_ok=True)
|
||||||
|
@@ -229,6 +229,8 @@ def get_available_promos(
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
promo_inst = create_new_promo_instance(chain, items_dict, promo, promotion_id)
|
promo_inst = create_new_promo_instance(chain, items_dict, promo, promotion_id)
|
||||||
|
if len(promo_inst.items) > 1000: # Too many items -> probably illegal promotion
|
||||||
|
continue
|
||||||
if promo_inst:
|
if promo_inst:
|
||||||
promo_objs.append(promo_inst)
|
promo_objs.append(promo_inst)
|
||||||
|
|
||||||
@@ -405,11 +407,16 @@ def get_all_prices(
|
|||||||
promo_obj = None
|
promo_obj = None
|
||||||
for promo in tqdm(promo_tags, desc="creating_promotions"):
|
for promo in tqdm(promo_tags, desc="creating_promotions"):
|
||||||
promotion_id = int(promo.find(re.compile("PromotionId", re.IGNORECASE)).text)
|
promotion_id = int(promo.find(re.compile("PromotionId", re.IGNORECASE)).text)
|
||||||
|
|
||||||
if promo_obj is None or promo_obj.promotion_id != promotion_id:
|
if promo_obj is None or promo_obj.promotion_id != promotion_id:
|
||||||
promo_obj = create_new_promo_instance(
|
promo_obj = create_new_promo_instance(
|
||||||
chain, items_dict, promo, promotion_id
|
chain, items_dict, promo, promotion_id
|
||||||
)
|
)
|
||||||
for item in promo.find_all("Item"):
|
if promo_obj.club_id == ClubID.REGULAR:
|
||||||
|
promo_items = promo.find_all("Item")
|
||||||
|
if len(promo_items) > 1000: # Too many items -> probably illegal promotion
|
||||||
|
continue
|
||||||
|
for item in promo_items:
|
||||||
item_code = item.find("ItemCode").text
|
item_code = item.find("ItemCode").text
|
||||||
cur_item = items_dict.get(item_code)
|
cur_item = items_dict.get(item_code)
|
||||||
if cur_item is not None:
|
if cur_item is not None:
|
||||||
|
@@ -14,6 +14,7 @@ from chains.keshet import Keshet
|
|||||||
from chains.shuk_hayir import ShukHayir
|
from chains.shuk_hayir import ShukHayir
|
||||||
from chains.stop_market import StopMarket
|
from chains.stop_market import StopMarket
|
||||||
from chains.tiv_taam import TivTaam
|
from chains.tiv_taam import TivTaam
|
||||||
|
from chains.yeinot_bitan import YeinotBitan
|
||||||
from chains.zol_vebegadol import ZolVebegadol
|
from chains.zol_vebegadol import ZolVebegadol
|
||||||
from main import CHAINS_DICT
|
from main import CHAINS_DICT
|
||||||
from promotion import PROMOTION_COLS_NUM, main_latest_promos
|
from promotion import PROMOTION_COLS_NUM, main_latest_promos
|
||||||
@@ -32,7 +33,6 @@ def test_searching_for_download_urls(chain_tuple):
|
|||||||
Test that get_download_url of each chain returns the correct download url for each category in every chain.
|
Test that get_download_url of each chain returns the correct download url for each category in every chain.
|
||||||
"""
|
"""
|
||||||
chain_name, chain = chain_tuple
|
chain_name, chain = chain_tuple
|
||||||
# for chain_name, chain in tqdm(chain_dict.items(), desc='chains'):
|
|
||||||
|
|
||||||
logging.info(f"Checking download urls in chain {chain_name}")
|
logging.info(f"Checking download urls in chain {chain_name}")
|
||||||
store_id: int = valid_store_id_by_chain(chain_name)
|
store_id: int = valid_store_id_by_chain(chain_name)
|
||||||
@@ -118,6 +118,8 @@ def valid_store_id_by_chain(chain_name) -> int:
|
|||||||
store_id = 4
|
store_id = 4
|
||||||
elif chain_name in [repr(StopMarket), repr(Keshet)]:
|
elif chain_name in [repr(StopMarket), repr(Keshet)]:
|
||||||
store_id = 5
|
store_id = 5
|
||||||
|
elif chain_name == repr(YeinotBitan):
|
||||||
|
store_id = 3700
|
||||||
else:
|
else:
|
||||||
store_id = 1
|
store_id = 1
|
||||||
return store_id
|
return store_id
|
||||||
|
Reference in New Issue
Block a user