Added specific searching for the download url of non-full promotions and prices files. Changed return value of get_download_url accordingly.
This commit is contained in:
@@ -1,8 +1,12 @@
|
|||||||
import json
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from supermarket_chain import SupermarketChain
|
from supermarket_chain import SupermarketChain
|
||||||
|
|
||||||
|
FNAME_KEY = "FileNm"
|
||||||
|
|
||||||
|
|
||||||
class BinaProjectWebClient:
|
class BinaProjectWebClient:
|
||||||
_date_hour_format = '%Y-%m-%d %H:%M:%S'
|
_date_hour_format = '%Y-%m-%d %H:%M:%S'
|
||||||
@@ -16,8 +20,16 @@ class BinaProjectWebClient:
|
|||||||
url = '/'.join([hostname, self.path_prefix, "MainIO_Hok.aspx"])
|
url = '/'.join([hostname, self.path_prefix, "MainIO_Hok.aspx"])
|
||||||
req_res: requests.Response = session.get(url)
|
req_res: requests.Response = session.get(url)
|
||||||
jsons_files = json.loads(req_res.text)
|
jsons_files = json.loads(req_res.text)
|
||||||
suffix = next(cur_json["FileNm"] for cur_json in jsons_files if f'-{store_id:03d}-20' in cur_json["FileNm"]
|
|
||||||
and category.name.replace('s', '') in cur_json["FileNm"])
|
if category in [SupermarketChain.XMLFilesCategory.Promos, SupermarketChain.XMLFilesCategory.Prices]:
|
||||||
|
filter_func = lambda fname: f'-{store_id:03d}-20' in fname and category.name.replace('s', '') in fname \
|
||||||
|
and not re.search('full', fname, re.IGNORECASE)
|
||||||
|
if not any(filter_func(cur_json[FNAME_KEY]) for cur_json in jsons_files):
|
||||||
|
return "" # Could not find non-full Promos/Prices file
|
||||||
|
else:
|
||||||
|
filter_func = lambda fname: f'-{store_id:03d}-20' in fname and category.name.replace('s', '') in fname
|
||||||
|
suffix = next(
|
||||||
|
cur_json[FNAME_KEY] for cur_json in jsons_files if filter_func(cur_json[FNAME_KEY]))
|
||||||
down_url: str = '/'.join([hostname, self.path_prefix, "Download", suffix])
|
down_url: str = '/'.join([hostname, self.path_prefix, "Download", suffix])
|
||||||
return down_url
|
return down_url
|
||||||
|
|
||||||
|
@@ -1,4 +1,6 @@
|
|||||||
import json
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from supermarket_chain import SupermarketChain
|
from supermarket_chain import SupermarketChain
|
||||||
@@ -17,7 +19,13 @@ class CerberusWebClient:
|
|||||||
ajax_dir_payload: dict = {'iDisplayLength': 100000, 'sSearch': category.name.replace('s', '')}
|
ajax_dir_payload: dict = {'iDisplayLength': 100000, 'sSearch': category.name.replace('s', '')}
|
||||||
s: requests.Response = session.post(hostname + "/file/ajax_dir", data=ajax_dir_payload)
|
s: requests.Response = session.post(hostname + "/file/ajax_dir", data=ajax_dir_payload)
|
||||||
s_json: dict = json.loads(s.text)
|
s_json: dict = json.loads(s.text)
|
||||||
suffix: str = next(d['name'] for d in s_json['aaData'] if f'-{store_id:03d}-20' in d['name'])
|
if category in [SupermarketChain.XMLFilesCategory.Promos, SupermarketChain.XMLFilesCategory.Prices]:
|
||||||
|
filter_func = lambda d, id: f'-{id:03d}-20' in d['name'] and not re.search('full', d['name'], re.IGNORECASE)
|
||||||
|
if not any(filter_func(d, store_id) for d in s_json['aaData']):
|
||||||
|
return "" # Could not find non-full Prices/Promos file
|
||||||
|
else:
|
||||||
|
filter_func = lambda d, id: f'-{id:03d}-20' in d['name']
|
||||||
|
suffix: str = next(d['name'] for d in s_json['aaData'] if filter_func(d, store_id))
|
||||||
|
|
||||||
download_url: str = hostname + "/file/d/" + suffix
|
download_url: str = hostname + "/file/d/" + suffix
|
||||||
return download_url
|
return download_url
|
||||||
|
@@ -1,3 +1,4 @@
|
|||||||
|
import re
|
||||||
from typing import Dict, List
|
from typing import Dict, List
|
||||||
import requests
|
import requests
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
@@ -21,8 +22,16 @@ class MahsaneiHashook(SupermarketChain):
|
|||||||
url = prefix + "NBCompetitionRegulations.aspx"
|
url = prefix + "NBCompetitionRegulations.aspx"
|
||||||
req_res: requests.Response = requests.get(url)
|
req_res: requests.Response = requests.get(url)
|
||||||
soup = BeautifulSoup(req_res.text, features='lxml')
|
soup = BeautifulSoup(req_res.text, features='lxml')
|
||||||
suffix: str = soup.find('a', href=lambda value: value and category.name.replace('s', '') in value
|
if category in [SupermarketChain.XMLFilesCategory.Promos, SupermarketChain.XMLFilesCategory.Prices]:
|
||||||
and f'-{store_id:03d}-20' in value).attrs['href']
|
fname_filter_func = lambda fname: fname and category.name.replace('s', '') in fname \
|
||||||
|
and f'-{store_id:03d}-20' in fname \
|
||||||
|
and not re.search('full', fname, re.IGNORECASE)
|
||||||
|
if soup.find('a', href=fname_filter_func) is None:
|
||||||
|
return "" # Could not find non-full Promos/Prices file
|
||||||
|
else:
|
||||||
|
fname_filter_func = lambda fname: fname and category.name.replace('s', '') in fname \
|
||||||
|
and f'-{store_id:03d}-20' in fname
|
||||||
|
suffix: str = soup.find('a', href=fname_filter_func).attrs['href']
|
||||||
down_url: str = prefix + suffix
|
down_url: str = prefix + suffix
|
||||||
return down_url
|
return down_url
|
||||||
|
|
||||||
|
2
utils.py
2
utils.py
@@ -66,6 +66,8 @@ def get_bs_object_from_link(chain: SupermarketChain, store_id: int, category: Su
|
|||||||
"""
|
"""
|
||||||
session = requests.Session()
|
session = requests.Session()
|
||||||
download_url: str = chain.get_download_url(store_id, category, session)
|
download_url: str = chain.get_download_url(store_id, category, session)
|
||||||
|
if not download_url:
|
||||||
|
return BeautifulSoup()
|
||||||
response_content = session.get(download_url).content
|
response_content = session.get(download_url).content
|
||||||
try:
|
try:
|
||||||
xml_content: AnyStr = gzip.decompress(response_content)
|
xml_content: AnyStr = gzip.decompress(response_content)
|
||||||
|
Reference in New Issue
Block a user