Fixed the bug with cerberus_web_client.py by working with Selenium. To login each chain working with it must have a username for login with Selenium. in this mechanism, a path to a gz file is returned instead of url

Added the option to output a prices json file in main.py under --prices-with-promos, where the prices are updated by the latest promotions (under the 'final_price' key, where 'price' represents the price before promotions). Fixed small bug of BinaWebCleint by checking that filename does not contain 'null'. Changed Hierarchy of chains such that it includes the webclients. Added the date to the output filenames to start storing the data over time. Black formatting (according to pip 8 guidelines). Changed the chains_dict in main to a constant one.
2022-10-04 11:42:36 +03:00
parent b5db721a3d
commit ceff48dbd9
28 changed files with 796 additions and 406 deletions
--- a/chains/mahsaneiHashook.py
+++ b/chains/mahsaneiHashook.py
@@ -1,5 +1,6 @@
 import re
 from typing import Dict, List
+
 import requests
 from bs4 import BeautifulSoup
 from bs4.element import Tag
@@ -9,33 +10,46 @@ from supermarket_chain import SupermarketChain


 class MahsaneiHashook(SupermarketChain):
-    _promotion_tag_name = 'Sale'
-    _promotion_update_tag_name = 'PriceUpdateDate'
-    _date_format = '%Y/%m/%d'
-    _date_hour_format = '%Y/%m/%d %H:%M:%S'
-    _update_date_format = '%Y/%m/%d %H:%M:%S'
-    _item_tag_name = 'Product'
+    _promotion_tag_name = "Sale"
+    _promotion_update_tag_name = "PriceUpdateDate"
+    _date_format = "%Y/%m/%d"
+    _date_hour_format = "%Y/%m/%d %H:%M:%S"
+    _update_date_format = "%Y/%m/%d %H:%M:%S"
+    _item_tag_name = "Product"

    @staticmethod
-    def get_download_url(store_id: int, category: SupermarketChain.XMLFilesCategory, session: requests.Session) -> str:
+    def get_download_url_or_path(
+        store_id: int,
+        category: SupermarketChain.XMLFilesCategory,
+        session: requests.Session,
+    ) -> str:
        prefix = "http://matrixcatalog.co.il/"
        url = prefix + "NBCompetitionRegulations.aspx"
        req_res: requests.Response = requests.get(url)
-        soup = BeautifulSoup(req_res.text, features='lxml')
-        if category in [SupermarketChain.XMLFilesCategory.Promos, SupermarketChain.XMLFilesCategory.Prices]:
-            fname_filter_func = lambda fname: fname and category.name.replace('s', '') in fname \
-                                              and f'-{store_id:03d}-20' in fname \
-                                              and not re.search('full', fname, re.IGNORECASE)
-            if soup.find('a', href=fname_filter_func) is None:
+        soup = BeautifulSoup(req_res.text, features="lxml")
+        if category in [
+            SupermarketChain.XMLFilesCategory.Promos,
+            SupermarketChain.XMLFilesCategory.Prices,
+        ]:
+            fname_filter_func = (
+                lambda fname: fname
+                and category.name.replace("s", "") in fname
+                and f"-{store_id:03d}-20" in fname
+                and not re.search("full", fname, re.IGNORECASE)
+            )
+            if soup.find("a", href=fname_filter_func) is None:
                return ""  # Could not find non-full Promos/Prices file
        else:
-            fname_filter_func = lambda fname: fname and category.name.replace('s', '') in fname \
-                                              and f'-{store_id:03d}-20' in fname
-        suffix: str = soup.find('a', href=fname_filter_func).attrs['href']
+            fname_filter_func = (
+                lambda fname: fname
+                and category.name.replace("s", "") in fname
+                and f"-{store_id:03d}-20" in fname
+            )
+        suffix: str = soup.find("a", href=fname_filter_func).attrs["href"]
        down_url: str = prefix + suffix
        return down_url

    @staticmethod
    def get_items(promo: Tag, items_dict: Dict[str, Item]) -> List[Item]:
-        promo_item = items_dict.get(promo.find('ItemCode').text)
+        promo_item = items_dict.get(promo.find("ItemCode").text)
        return [promo_item] if promo_item else []