From 850d3963fef493e04b15a293c41a2322402c9577 Mon Sep 17 00:00:00 2001 From: KorenLazar Date: Sun, 7 Feb 2021 10:46:54 +0200 Subject: [PATCH] has added binaproject clients --- README.md | 2 +- chains/binaproject_web_client.py | 35 ++++++++++++++++++++++++++++++++ chains/cerberus_web_client.py | 2 -- chains/king_store.py | 7 +++++++ chains/maayan2000.py | 6 ++++++ chains/shefa_birkat_hashem.py | 6 ++++++ chains/shuk_hayir.py | 7 +++++++ chains/stop_market.py | 1 - chains/tiv_taam.py | 2 +- chains/zol_vebegadol.py | 26 +++--------------------- main.py | 4 ++++ 11 files changed, 70 insertions(+), 28 deletions(-) create mode 100644 chains/binaproject_web_client.py create mode 100644 chains/king_store.py create mode 100644 chains/maayan2000.py create mode 100644 chains/shefa_birkat_hashem.py create mode 100644 chains/shuk_hayir.py diff --git a/README.md b/README.md index 7642e6b..b7e47fe 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ # Supermarket basic scraping -The library supports scraping from Shufersal, CoOp and Zol Vebegadol. +The library supports scraping from Shufersal, CoOp, Rami Levi, Osher Ad, Zol Vebegadol, Tiv Taam, Freshmarket, Mahsanei Hashook, Victory, Maayan2000, Yohananof, Stop Market, Keshet Taamim, Hazi Hinam, Dor Alon supermarkets, Shefa Birkat Hashem, Shuk Hayir, King Store and Super Bareket. ## Installation clone: diff --git a/chains/binaproject_web_client.py b/chains/binaproject_web_client.py new file mode 100644 index 0000000..7cc72de --- /dev/null +++ b/chains/binaproject_web_client.py @@ -0,0 +1,35 @@ +import json +import requests + +from supermarket_chain import SupermarketChain + + +class BinaProjectWebClient: + _date_hour_format = '%Y-%m-%d %H:%M:%S' + _update_date_format = '%Y-%m-%d %H:%M:%S' + _path_prefix = "" + _hostname_suffix = ".binaprojects.com" + + def get_download_url(self, store_id: int, category: SupermarketChain.XMLFilesCategory, session: requests.Session) \ + -> str: + hostname = f"http://{self.hostname_prefix}{self.hostname_suffix}" + url = '/'.join([hostname, self.path_prefix, "MainIO_Hok.aspx"]) + req_res: requests.Response = session.get(url) + jsons_files = json.loads(req_res.text) + suffix = next(cur_json["FileNm"] for cur_json in jsons_files if f'-{store_id:03d}-20' in cur_json["FileNm"] + and category.name.replace('s', '') in cur_json["FileNm"]) + down_url: str = '/'.join([hostname, self.path_prefix, "Download", suffix]) + print(down_url) + return down_url + + @property + def hostname_prefix(self): + return repr(type(self)) + + @property + def path_prefix(self): + return type(self)._path_prefix + + @property + def hostname_suffix(self): + return type(self)._hostname_suffix diff --git a/chains/cerberus_web_client.py b/chains/cerberus_web_client.py index 24cedf3..38dc275 100644 --- a/chains/cerberus_web_client.py +++ b/chains/cerberus_web_client.py @@ -1,6 +1,4 @@ import json -from abc import abstractmethod - import requests from supermarket_chain import SupermarketChain diff --git a/chains/king_store.py b/chains/king_store.py new file mode 100644 index 0000000..882a7ca --- /dev/null +++ b/chains/king_store.py @@ -0,0 +1,7 @@ +from chains.binaproject_web_client import BinaProjectWebClient +from supermarket_chain import SupermarketChain + + +class KingStore(BinaProjectWebClient, SupermarketChain): + _path_prefix = "Food_Law" + _hostname_suffix = ".co.il" diff --git a/chains/maayan2000.py b/chains/maayan2000.py new file mode 100644 index 0000000..6d47ae3 --- /dev/null +++ b/chains/maayan2000.py @@ -0,0 +1,6 @@ +from chains.binaproject_web_client import BinaProjectWebClient +from supermarket_chain import SupermarketChain + + +class Maayan2000(BinaProjectWebClient, SupermarketChain): + pass \ No newline at end of file diff --git a/chains/shefa_birkat_hashem.py b/chains/shefa_birkat_hashem.py new file mode 100644 index 0000000..dc39a9f --- /dev/null +++ b/chains/shefa_birkat_hashem.py @@ -0,0 +1,6 @@ +from chains.binaproject_web_client import BinaProjectWebClient +from supermarket_chain import SupermarketChain + + +class ShefaBirkatHashem(BinaProjectWebClient, SupermarketChain): + pass \ No newline at end of file diff --git a/chains/shuk_hayir.py b/chains/shuk_hayir.py new file mode 100644 index 0000000..8a81511 --- /dev/null +++ b/chains/shuk_hayir.py @@ -0,0 +1,7 @@ +from chains.binaproject_web_client import BinaProjectWebClient +from supermarket_chain import SupermarketChain + + +class ShukHayir(BinaProjectWebClient, SupermarketChain): + @property + def hostname_prefix(self): return "shuk-hayir" diff --git a/chains/stop_market.py b/chains/stop_market.py index 5374c7b..baf85dc 100644 --- a/chains/stop_market.py +++ b/chains/stop_market.py @@ -4,7 +4,6 @@ from supermarket_chain import SupermarketChain class StopMarket(CerberusWebClient, SupermarketChain): _date_hour_format = '%Y-%m-%d %H:%M:%S' - @property def username(self): return 'Stop_Market' diff --git a/chains/tiv_taam.py b/chains/tiv_taam.py index faf71b2..9d29d0a 100644 --- a/chains/tiv_taam.py +++ b/chains/tiv_taam.py @@ -3,4 +3,4 @@ from supermarket_chain import SupermarketChain class TivTaam(CerberusWebClient, SupermarketChain): - _class_name = 'TivTaam' + pass diff --git a/chains/zol_vebegadol.py b/chains/zol_vebegadol.py index fca7a19..baa1f15 100644 --- a/chains/zol_vebegadol.py +++ b/chains/zol_vebegadol.py @@ -1,26 +1,6 @@ -import json -import requests - +from chains.binaproject_web_client import BinaProjectWebClient from supermarket_chain import SupermarketChain -class ZolVebegadol(SupermarketChain): - _date_hour_format = '%Y-%m-%d %H:%M:%S' - _update_date_format = '%Y-%m-%d %H:%M:%S' - item_tag_name = 'Item' - - @property - def update_date_format(self): - return ZolVebegadol.date_hour_format - - @staticmethod - def get_download_url(store_id: int, category: SupermarketChain.XMLFilesCategory, session: requests.Session) -> str: - prefix = "http://zolvebegadol.binaprojects.com" - url = prefix + "/MainIO_Hok.aspx" - req_res: requests.Response = session.get(url) - jsons_files = json.loads(req_res.text) - suffix = next(cur_json["FileNm"] for cur_json in jsons_files if f'-{store_id:03d}-20' in cur_json["FileNm"] - and category.name.replace('s', '') in cur_json["FileNm"]) - down_url: str = '/'.join([prefix, "Download", suffix]) - print(down_url) - return down_url +class ZolVebegadol(BinaProjectWebClient, SupermarketChain): + pass diff --git a/main.py b/main.py index 5b450f2..6b974ea 100644 --- a/main.py +++ b/main.py @@ -22,6 +22,10 @@ from chains import ( zol_vebegadol, rami_levi, osher_ad, + maayan2000, + shuk_hayir, + king_store, + shefa_birkat_hashem, ) # TODO: fix problem of left-to-right printing