Added specific searching for the download url of non-full promotions and prices files. Changed return value of get_download_url accordingly.
This commit is contained in:
@@ -1,8 +1,12 @@
|
||||
import json
|
||||
import re
|
||||
|
||||
import requests
|
||||
|
||||
from supermarket_chain import SupermarketChain
|
||||
|
||||
FNAME_KEY = "FileNm"
|
||||
|
||||
|
||||
class BinaProjectWebClient:
|
||||
_date_hour_format = '%Y-%m-%d %H:%M:%S'
|
||||
@@ -16,8 +20,16 @@ class BinaProjectWebClient:
|
||||
url = '/'.join([hostname, self.path_prefix, "MainIO_Hok.aspx"])
|
||||
req_res: requests.Response = session.get(url)
|
||||
jsons_files = json.loads(req_res.text)
|
||||
suffix = next(cur_json["FileNm"] for cur_json in jsons_files if f'-{store_id:03d}-20' in cur_json["FileNm"]
|
||||
and category.name.replace('s', '') in cur_json["FileNm"])
|
||||
|
||||
if category in [SupermarketChain.XMLFilesCategory.Promos, SupermarketChain.XMLFilesCategory.Prices]:
|
||||
filter_func = lambda fname: f'-{store_id:03d}-20' in fname and category.name.replace('s', '') in fname \
|
||||
and not re.search('full', fname, re.IGNORECASE)
|
||||
if not any(filter_func(cur_json[FNAME_KEY]) for cur_json in jsons_files):
|
||||
return "" # Could not find non-full Promos/Prices file
|
||||
else:
|
||||
filter_func = lambda fname: f'-{store_id:03d}-20' in fname and category.name.replace('s', '') in fname
|
||||
suffix = next(
|
||||
cur_json[FNAME_KEY] for cur_json in jsons_files if filter_func(cur_json[FNAME_KEY]))
|
||||
down_url: str = '/'.join([hostname, self.path_prefix, "Download", suffix])
|
||||
return down_url
|
||||
|
||||
|
@@ -1,4 +1,6 @@
|
||||
import json
|
||||
import re
|
||||
|
||||
import requests
|
||||
|
||||
from supermarket_chain import SupermarketChain
|
||||
@@ -17,7 +19,13 @@ class CerberusWebClient:
|
||||
ajax_dir_payload: dict = {'iDisplayLength': 100000, 'sSearch': category.name.replace('s', '')}
|
||||
s: requests.Response = session.post(hostname + "/file/ajax_dir", data=ajax_dir_payload)
|
||||
s_json: dict = json.loads(s.text)
|
||||
suffix: str = next(d['name'] for d in s_json['aaData'] if f'-{store_id:03d}-20' in d['name'])
|
||||
if category in [SupermarketChain.XMLFilesCategory.Promos, SupermarketChain.XMLFilesCategory.Prices]:
|
||||
filter_func = lambda d, id: f'-{id:03d}-20' in d['name'] and not re.search('full', d['name'], re.IGNORECASE)
|
||||
if not any(filter_func(d, store_id) for d in s_json['aaData']):
|
||||
return "" # Could not find non-full Prices/Promos file
|
||||
else:
|
||||
filter_func = lambda d, id: f'-{id:03d}-20' in d['name']
|
||||
suffix: str = next(d['name'] for d in s_json['aaData'] if filter_func(d, store_id))
|
||||
|
||||
download_url: str = hostname + "/file/d/" + suffix
|
||||
return download_url
|
||||
|
@@ -1,3 +1,4 @@
|
||||
import re
|
||||
from typing import Dict, List
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
@@ -21,8 +22,16 @@ class MahsaneiHashook(SupermarketChain):
|
||||
url = prefix + "NBCompetitionRegulations.aspx"
|
||||
req_res: requests.Response = requests.get(url)
|
||||
soup = BeautifulSoup(req_res.text, features='lxml')
|
||||
suffix: str = soup.find('a', href=lambda value: value and category.name.replace('s', '') in value
|
||||
and f'-{store_id:03d}-20' in value).attrs['href']
|
||||
if category in [SupermarketChain.XMLFilesCategory.Promos, SupermarketChain.XMLFilesCategory.Prices]:
|
||||
fname_filter_func = lambda fname: fname and category.name.replace('s', '') in fname \
|
||||
and f'-{store_id:03d}-20' in fname \
|
||||
and not re.search('full', fname, re.IGNORECASE)
|
||||
if soup.find('a', href=fname_filter_func) is None:
|
||||
return "" # Could not find non-full Promos/Prices file
|
||||
else:
|
||||
fname_filter_func = lambda fname: fname and category.name.replace('s', '') in fname \
|
||||
and f'-{store_id:03d}-20' in fname
|
||||
suffix: str = soup.find('a', href=fname_filter_func).attrs['href']
|
||||
down_url: str = prefix + suffix
|
||||
return down_url
|
||||
|
||||
|
2
utils.py
2
utils.py
@@ -66,6 +66,8 @@ def get_bs_object_from_link(chain: SupermarketChain, store_id: int, category: Su
|
||||
"""
|
||||
session = requests.Session()
|
||||
download_url: str = chain.get_download_url(store_id, category, session)
|
||||
if not download_url:
|
||||
return BeautifulSoup()
|
||||
response_content = session.get(download_url).content
|
||||
try:
|
||||
xml_content: AnyStr = gzip.decompress(response_content)
|
||||
|
Reference in New Issue
Block a user