Added specific searching for the download url of non-full promotions and prices files. Changed return value of get_download_url accordingly.

2021-08-17 13:06:42 +03:00
parent 3770352d04
commit cffdd84086
4 changed files with 36 additions and 5 deletions
--- a/chains/binaproject_web_client.py
+++ b/chains/binaproject_web_client.py
@@ -1,8 +1,12 @@
 import json
+import re
+
 import requests

 from supermarket_chain import SupermarketChain

+FNAME_KEY = "FileNm"
+

 class BinaProjectWebClient:
    _date_hour_format = '%Y-%m-%d %H:%M:%S'
@@ -16,8 +20,16 @@ class BinaProjectWebClient:
        url = '/'.join([hostname, self.path_prefix, "MainIO_Hok.aspx"])
        req_res: requests.Response = session.get(url)
        jsons_files = json.loads(req_res.text)
-        suffix = next(cur_json["FileNm"] for cur_json in jsons_files if f'-{store_id:03d}-20' in cur_json["FileNm"]
-                      and category.name.replace('s', '') in cur_json["FileNm"])
+
+        if category in [SupermarketChain.XMLFilesCategory.Promos, SupermarketChain.XMLFilesCategory.Prices]:
+            filter_func = lambda fname: f'-{store_id:03d}-20' in fname and category.name.replace('s', '') in fname \
+                                        and not re.search('full', fname, re.IGNORECASE)
+            if not any(filter_func(cur_json[FNAME_KEY]) for cur_json in jsons_files):
+                return ""  # Could not find non-full Promos/Prices file
+        else:
+            filter_func = lambda fname: f'-{store_id:03d}-20' in fname and category.name.replace('s', '') in fname
+        suffix = next(
+            cur_json[FNAME_KEY] for cur_json in jsons_files if filter_func(cur_json[FNAME_KEY]))
        down_url: str = '/'.join([hostname, self.path_prefix, "Download", suffix])
        return down_url

--- a/chains/cerberus_web_client.py
+++ b/chains/cerberus_web_client.py
@@ -1,4 +1,6 @@
 import json
+import re
+
 import requests

 from supermarket_chain import SupermarketChain
@@ -17,7 +19,13 @@ class CerberusWebClient:
        ajax_dir_payload: dict = {'iDisplayLength': 100000, 'sSearch': category.name.replace('s', '')}
        s: requests.Response = session.post(hostname + "/file/ajax_dir", data=ajax_dir_payload)
        s_json: dict = json.loads(s.text)
-        suffix: str = next(d['name'] for d in s_json['aaData'] if f'-{store_id:03d}-20' in d['name'])
+        if category in [SupermarketChain.XMLFilesCategory.Promos, SupermarketChain.XMLFilesCategory.Prices]:
+            filter_func = lambda d, id: f'-{id:03d}-20' in d['name'] and not re.search('full', d['name'], re.IGNORECASE)
+            if not any(filter_func(d, store_id) for d in s_json['aaData']):
+                return ""  # Could not find non-full Prices/Promos file
+        else:
+            filter_func = lambda d, id: f'-{id:03d}-20' in d['name']
+        suffix: str = next(d['name'] for d in s_json['aaData'] if filter_func(d, store_id))

        download_url: str = hostname + "/file/d/" + suffix
        return download_url
--- a/chains/mahsaneiHashook.py
+++ b/chains/mahsaneiHashook.py
@@ -1,3 +1,4 @@
+import re
 from typing import Dict, List
 import requests
 from bs4 import BeautifulSoup
@@ -21,8 +22,16 @@ class MahsaneiHashook(SupermarketChain):
        url = prefix + "NBCompetitionRegulations.aspx"
        req_res: requests.Response = requests.get(url)
        soup = BeautifulSoup(req_res.text, features='lxml')
-        suffix: str = soup.find('a', href=lambda value: value and category.name.replace('s', '') in value
-                                and f'-{store_id:03d}-20' in value).attrs['href']
+        if category in [SupermarketChain.XMLFilesCategory.Promos, SupermarketChain.XMLFilesCategory.Prices]:
+            fname_filter_func = lambda fname: fname and category.name.replace('s', '') in fname \
+                                              and f'-{store_id:03d}-20' in fname \
+                                              and not re.search('full', fname, re.IGNORECASE)
+            if soup.find('a', href=fname_filter_func) is None:
+                return ""  # Could not find non-full Promos/Prices file
+        else:
+            fname_filter_func = lambda fname: fname and category.name.replace('s', '') in fname \
+                                              and f'-{store_id:03d}-20' in fname
+        suffix: str = soup.find('a', href=fname_filter_func).attrs['href']
        down_url: str = prefix + suffix
        return down_url

--- a/utils.py
+++ b/utils.py
@@ -66,6 +66,8 @@ def get_bs_object_from_link(chain: SupermarketChain, store_id: int, category: Su
    """
    session = requests.Session()
    download_url: str = chain.get_download_url(store_id, category, session)
+    if not download_url:
+        return BeautifulSoup()
    response_content = session.get(download_url).content
    try:
        xml_content: AnyStr = gzip.decompress(response_content)