Files
supermarket-scraping/chains/yeinot_bitan.py
korenlazar 9b6f63a7f0 Added the chain Yeinot Bitan (also to tests).
Changed price with promos to include only regular promotions.
Added filtering of promotions including too many items.
2022-10-04 13:36:29 +03:00

36 lines
1.0 KiB
Python

import re
from datetime import datetime
import numpy as np
import requests
from bs4 import BeautifulSoup
from supermarket_chain import SupermarketChain
class YeinotBitan(SupermarketChain):
_date_hour_format = "%Y-%m-%d %H:%M:%S"
@staticmethod
def get_download_url_or_path(
store_id: int,
category: SupermarketChain.XMLFilesCategory,
session: requests.Session,
) -> str:
today_date_suffix = datetime.today().date().strftime("%Y%m%d")
url = f"http://publishprice.ybitan.co.il/{today_date_suffix}/"
req_res = requests.get(url)
soup = BeautifulSoup(req_res.text, features="lxml")
promo_tags = soup.findAll(
"a",
attrs={
"href": re.compile(
rf"^{category.name.replace('s', '')}.*-{store_id:04d}-"
)
},
)
most_recent_tag_ind = np.argmax(
[int(promo_tag["href"][-7:-3]) for promo_tag in promo_tags]
)
return url + promo_tags[most_recent_tag_ind]["href"]