Files
supermarket-scraping/chains/binaproject_web_client.py

47 lines
1.7 KiB
Python

import json
import re
import requests
from supermarket_chain import SupermarketChain
FNAME_KEY = "FileNm"
class BinaProjectWebClient:
_date_hour_format = '%Y-%m-%d %H:%M:%S'
_update_date_format = '%Y-%m-%d %H:%M:%S'
_path_prefix = ""
_hostname_suffix = ".binaprojects.com"
def get_download_url(self, store_id: int, category: SupermarketChain.XMLFilesCategory, session: requests.Session) \
-> str:
hostname = f"http://{self.hostname_prefix}{self.hostname_suffix}"
url = '/'.join([hostname, self.path_prefix, "MainIO_Hok.aspx"])
req_res: requests.Response = session.get(url)
jsons_files = json.loads(req_res.text)
if category in [SupermarketChain.XMLFilesCategory.Promos, SupermarketChain.XMLFilesCategory.Prices]:
filter_func = lambda fname: f'-{store_id:03d}-20' in fname and category.name.replace('s', '') in fname \
and not re.search('full', fname, re.IGNORECASE)
if not any(filter_func(cur_json[FNAME_KEY]) for cur_json in jsons_files):
return "" # Could not find non-full Promos/Prices file
else:
filter_func = lambda fname: f'-{store_id:03d}-20' in fname and category.name.replace('s', '') in fname
suffix = next(
cur_json[FNAME_KEY] for cur_json in jsons_files if filter_func(cur_json[FNAME_KEY]))
down_url: str = '/'.join([hostname, self.path_prefix, "Download", suffix])
return down_url
@property
def hostname_prefix(self):
return repr(type(self))
@property
def path_prefix(self):
return type(self)._path_prefix
@property
def hostname_suffix(self):
return type(self)._hostname_suffix