Added specific searching for the download url of non-full promotions and prices files. Changed return value of get_download_url accordingly.

This commit is contained in:
KorenLazar
2021-08-17 13:06:42 +03:00
parent 3770352d04
commit cffdd84086
4 changed files with 36 additions and 5 deletions

View File

@@ -1,8 +1,12 @@
import json
import re
import requests
from supermarket_chain import SupermarketChain
FNAME_KEY = "FileNm"
class BinaProjectWebClient:
_date_hour_format = '%Y-%m-%d %H:%M:%S'
@@ -16,8 +20,16 @@ class BinaProjectWebClient:
url = '/'.join([hostname, self.path_prefix, "MainIO_Hok.aspx"])
req_res: requests.Response = session.get(url)
jsons_files = json.loads(req_res.text)
suffix = next(cur_json["FileNm"] for cur_json in jsons_files if f'-{store_id:03d}-20' in cur_json["FileNm"]
and category.name.replace('s', '') in cur_json["FileNm"])
if category in [SupermarketChain.XMLFilesCategory.Promos, SupermarketChain.XMLFilesCategory.Prices]:
filter_func = lambda fname: f'-{store_id:03d}-20' in fname and category.name.replace('s', '') in fname \
and not re.search('full', fname, re.IGNORECASE)
if not any(filter_func(cur_json[FNAME_KEY]) for cur_json in jsons_files):
return "" # Could not find non-full Promos/Prices file
else:
filter_func = lambda fname: f'-{store_id:03d}-20' in fname and category.name.replace('s', '') in fname
suffix = next(
cur_json[FNAME_KEY] for cur_json in jsons_files if filter_func(cur_json[FNAME_KEY]))
down_url: str = '/'.join([hostname, self.path_prefix, "Download", suffix])
return down_url

View File

@@ -1,4 +1,6 @@
import json
import re
import requests
from supermarket_chain import SupermarketChain
@@ -17,7 +19,13 @@ class CerberusWebClient:
ajax_dir_payload: dict = {'iDisplayLength': 100000, 'sSearch': category.name.replace('s', '')}
s: requests.Response = session.post(hostname + "/file/ajax_dir", data=ajax_dir_payload)
s_json: dict = json.loads(s.text)
suffix: str = next(d['name'] for d in s_json['aaData'] if f'-{store_id:03d}-20' in d['name'])
if category in [SupermarketChain.XMLFilesCategory.Promos, SupermarketChain.XMLFilesCategory.Prices]:
filter_func = lambda d, id: f'-{id:03d}-20' in d['name'] and not re.search('full', d['name'], re.IGNORECASE)
if not any(filter_func(d, store_id) for d in s_json['aaData']):
return "" # Could not find non-full Prices/Promos file
else:
filter_func = lambda d, id: f'-{id:03d}-20' in d['name']
suffix: str = next(d['name'] for d in s_json['aaData'] if filter_func(d, store_id))
download_url: str = hostname + "/file/d/" + suffix
return download_url

View File

@@ -1,3 +1,4 @@
import re
from typing import Dict, List
import requests
from bs4 import BeautifulSoup
@@ -21,8 +22,16 @@ class MahsaneiHashook(SupermarketChain):
url = prefix + "NBCompetitionRegulations.aspx"
req_res: requests.Response = requests.get(url)
soup = BeautifulSoup(req_res.text, features='lxml')
suffix: str = soup.find('a', href=lambda value: value and category.name.replace('s', '') in value
and f'-{store_id:03d}-20' in value).attrs['href']
if category in [SupermarketChain.XMLFilesCategory.Promos, SupermarketChain.XMLFilesCategory.Prices]:
fname_filter_func = lambda fname: fname and category.name.replace('s', '') in fname \
and f'-{store_id:03d}-20' in fname \
and not re.search('full', fname, re.IGNORECASE)
if soup.find('a', href=fname_filter_func) is None:
return "" # Could not find non-full Promos/Prices file
else:
fname_filter_func = lambda fname: fname and category.name.replace('s', '') in fname \
and f'-{store_id:03d}-20' in fname
suffix: str = soup.find('a', href=fname_filter_func).attrs['href']
down_url: str = prefix + suffix
return down_url