From 18f3fa32b97cc41a59693bd8b21717d9d4fbee14 Mon Sep 17 00:00:00 2001 From: KorenLazar Date: Sat, 6 Feb 2021 21:42:31 +0200 Subject: [PATCH] has added many chains --- bareket.py | 5 +++++ cerberus_web_client.py | 31 +++++++++++++++++++++++++++++++ co_op.py | 37 +++---------------------------------- dor_alon.py | 11 +++++++++++ freshmarket.py | 11 +++++++++++ hazi_hinam.py | 11 +++++++++++ keshet.py | 11 +++++++++++ mahsaneiHashook.py | 33 +++++++++++++++++++++++++++++++++ main.py | 23 +++++++++++++++++------ osher_ad.py | 12 ++++++++++++ promotion.py | 2 +- rami_levi.py | 29 ++++++----------------------- shufersal.py | 3 --- stop_market.py | 11 +++++++++++ supermarket_chain.py | 10 ++++++++-- tiv_taam.py | 10 ++++++++++ utils.py | 2 +- victory.py | 5 +++++ yohananof.py | 11 +++++++++++ zol_vebegadol.py | 3 --- 20 files changed, 198 insertions(+), 73 deletions(-) create mode 100644 bareket.py create mode 100644 cerberus_web_client.py create mode 100644 dor_alon.py create mode 100644 freshmarket.py create mode 100644 hazi_hinam.py create mode 100644 keshet.py create mode 100644 mahsaneiHashook.py create mode 100644 osher_ad.py create mode 100644 stop_market.py create mode 100644 tiv_taam.py create mode 100644 victory.py create mode 100644 yohananof.py diff --git a/bareket.py b/bareket.py new file mode 100644 index 0000000..ca1e24f --- /dev/null +++ b/bareket.py @@ -0,0 +1,5 @@ +from mahsaneiHashook import MahsaneiHashook + + +class Bareket(MahsaneiHashook): + pass diff --git a/cerberus_web_client.py b/cerberus_web_client.py new file mode 100644 index 0000000..47c7670 --- /dev/null +++ b/cerberus_web_client.py @@ -0,0 +1,31 @@ +import json +from abc import abstractmethod + +import requests + +from supermarket_chain import SupermarketChain + + +class CerberusWebClient: + + def get_download_url(self, store_id: int, category: SupermarketChain.XMLFilesCategory, session: requests.Session) \ + -> str: + hostname: str = "https://publishedprices.co.il" + + # Post the payload to the site to log in + session.post(hostname + "/login/user", data={'username': self.username}) + + # Scrape the data + ajax_dir_payload: dict = {'iDisplayLength': 100000, 'sSearch': category.name.replace('s', '')} + s: requests.Response = session.post(hostname + "/file/ajax_dir", data=ajax_dir_payload) + s_json: dict = json.loads(s.text) + suffix: str = next(d['name'] for d in s_json['aaData'] if f'-{store_id:03d}-20' in d['name']) + + download_url: str = hostname + "/file/d/" + suffix + print(download_url) + return download_url + + @property + @abstractmethod + def username(self): + pass diff --git a/co_op.py b/co_op.py index 81f0eba..8d7e7b4 100644 --- a/co_op.py +++ b/co_op.py @@ -1,36 +1,5 @@ -from typing import Dict, List -import requests -from bs4 import BeautifulSoup -from bs4.element import Tag - -from item import Item -from supermarket_chain import SupermarketChain +from mahsaneiHashook import MahsaneiHashook -class CoOp(SupermarketChain): - _promotion_tag_name = 'Sale' - _promotion_update_tag_name = 'PriceUpdateDate' - _date_format = '%Y/%m/%d' - _date_hour_format = '%Y/%m/%d %H:%M:%S' - _update_date_format = '%Y/%m/%d %H:%M:%S' - _item_tag_name = 'Product' - - @staticmethod - def get_download_url(store_id: int, category: SupermarketChain.XMLFilesCategory, session: requests.Session) -> str: - prefix = "http://matrixcatalog.co.il/" - url = prefix + "NBCompetitionRegulations.aspx" - req_res: requests.Response = requests.get(url) - soup = BeautifulSoup(req_res.text, features='lxml') - suffix: str = soup.find('a', href=lambda value: value and category.name.replace('s', '') in value - and f'-{store_id:03d}-20' in value).attrs['href'] - down_url: str = prefix + suffix - print(down_url) - return down_url - - def __repr__(self): - return 'CoOp' - - @staticmethod - def get_items(promo: Tag, items_dict: Dict[str, Item]) -> List[Item]: - promo_item = items_dict.get(promo.find('ItemCode').text) - return [promo_item] if promo_item else [] +class CoOp(MahsaneiHashook): + pass diff --git a/dor_alon.py b/dor_alon.py new file mode 100644 index 0000000..f71b4e3 --- /dev/null +++ b/dor_alon.py @@ -0,0 +1,11 @@ +from cerberus_web_client import CerberusWebClient +from supermarket_chain import SupermarketChain + + +class DorAlon(CerberusWebClient, SupermarketChain): + _date_hour_format = '%Y-%m-%d %H:%M:%S' + _class_name = 'DorAlon' + + @property + def username(self): + return self._class_name diff --git a/freshmarket.py b/freshmarket.py new file mode 100644 index 0000000..8f9b9cc --- /dev/null +++ b/freshmarket.py @@ -0,0 +1,11 @@ +from cerberus_web_client import CerberusWebClient +from supermarket_chain import SupermarketChain + + +class Freshmarket(CerberusWebClient, SupermarketChain): + _date_hour_format = '%Y-%m-%d %H:%M:%S' + _class_name = 'Freshmarket' + + @property + def username(self): + return self._class_name diff --git a/hazi_hinam.py b/hazi_hinam.py new file mode 100644 index 0000000..0bfa45e --- /dev/null +++ b/hazi_hinam.py @@ -0,0 +1,11 @@ +from cerberus_web_client import CerberusWebClient +from supermarket_chain import SupermarketChain + + +class HaziHinam(CerberusWebClient, SupermarketChain): + _date_hour_format = '%Y-%m-%d %H:%M:%S' + _class_name = 'HaziHinam' + + @property + def username(self): + return self._class_name diff --git a/keshet.py b/keshet.py new file mode 100644 index 0000000..41fe4f2 --- /dev/null +++ b/keshet.py @@ -0,0 +1,11 @@ +from cerberus_web_client import CerberusWebClient +from supermarket_chain import SupermarketChain + + +class Keshet(CerberusWebClient, SupermarketChain): + _date_hour_format = '%Y-%m-%d %H:%M:%S' + _class_name = 'Keshet' + + @property + def username(self): + return self._class_name diff --git a/mahsaneiHashook.py b/mahsaneiHashook.py new file mode 100644 index 0000000..cf1d3c3 --- /dev/null +++ b/mahsaneiHashook.py @@ -0,0 +1,33 @@ +from typing import Dict, List +import requests +from bs4 import BeautifulSoup +from bs4.element import Tag + +from item import Item +from supermarket_chain import SupermarketChain + + +class MahsaneiHashook(SupermarketChain): + _promotion_tag_name = 'Sale' + _promotion_update_tag_name = 'PriceUpdateDate' + _date_format = '%Y/%m/%d' + _date_hour_format = '%Y/%m/%d %H:%M:%S' + _update_date_format = '%Y/%m/%d %H:%M:%S' + _item_tag_name = 'Product' + + @staticmethod + def get_download_url(store_id: int, category: SupermarketChain.XMLFilesCategory, session: requests.Session) -> str: + prefix = "http://matrixcatalog.co.il/" + url = prefix + "NBCompetitionRegulations.aspx" + req_res: requests.Response = requests.get(url) + soup = BeautifulSoup(req_res.text, features='lxml') + suffix: str = soup.find('a', href=lambda value: value and category.name.replace('s', '') in value + and f'-{store_id:03d}-20' in value).attrs['href'] + down_url: str = prefix + suffix + print(down_url) + return down_url + + @staticmethod + def get_items(promo: Tag, items_dict: Dict[str, Item]) -> List[Item]: + promo_item = items_dict.get(promo.find('ItemCode').text) + return [promo_item] if promo_item else [] diff --git a/main.py b/main.py index 8a408a2..38bb4cc 100644 --- a/main.py +++ b/main.py @@ -1,13 +1,25 @@ from argparse import ArgumentParser import logging + +from bareket import Bareket +from mahsaneiHashook import MahsaneiHashook +from dor_alon import DorAlon +from freshmarket import Freshmarket +from hazi_hinam import HaziHinam +from keshet import Keshet from promotion import main_latest_promos, get_promos_by_name +from stop_market import StopMarket from store_utils import get_all_deals, get_store_id +from tiv_taam import TivTaam from utils import RESULTS_DIRNAME, RAW_FILES_DIRNAME, get_products_prices from supermarket_chain import SupermarketChain from shufersal import ShuferSal from co_op import CoOp +from victory import Victory +from yohananof import Yohananof from zol_vebegadol import ZolVebegadol from rami_levi import RamiLevi +from osher_ad import OsherAd from pathlib import Path # TODO: fix problem of left-to-right printing @@ -15,12 +27,11 @@ from pathlib import Path Path(RESULTS_DIRNAME).mkdir(exist_ok=True) Path(RAW_FILES_DIRNAME).mkdir(exist_ok=True) -chain_dict = { - 'Shufersal': ShuferSal(), - 'CoOp': CoOp(), - 'Zol-Vebegadol': ZolVebegadol(), - 'RamiLevi': RamiLevi(), -} +chains = ShuferSal, CoOp, ZolVebegadol, RamiLevi, OsherAd, Freshmarket, TivTaam, HaziHinam, Keshet, StopMarket, \ + Yohananof, DorAlon, MahsaneiHashook, Bareket, Victory + +chain_dict = {repr(chain): chain() for chain in chains} + if __name__ == '__main__': parser = ArgumentParser() diff --git a/osher_ad.py b/osher_ad.py new file mode 100644 index 0000000..0f1637c --- /dev/null +++ b/osher_ad.py @@ -0,0 +1,12 @@ +from cerberus_web_client import CerberusWebClient +from supermarket_chain import SupermarketChain + + +class OsherAd(CerberusWebClient, SupermarketChain): + + _date_hour_format = '%Y-%m-%d %H:%M:%S' + _class_name = 'OsherAd' + + @property + def username(self): + return self._class_name diff --git a/promotion.py b/promotion.py index 2b2ac18..502359d 100644 --- a/promotion.py +++ b/promotion.py @@ -140,7 +140,7 @@ def main_latest_promos(store_id: int, load_xml: bool, logger, chain: Supermarket promotions.sort(key=lambda promo: (max(promo.update_date.date(), promo.start_date.date()), promo.start_date - promo.end_date), reverse=True) logger.info('\n'.join(str(promotion) for promotion in promotions)) - write_promotions_to_csv(promotions, f'results/{chain}_promos_{store_id}.csv') + write_promotions_to_csv(promotions, f'results/{repr(type(chain))}_promos_{store_id}.csv') def get_promos_by_name(store_id: int, chain: SupermarketChain, promo_name: str, load_prices: bool, load_promos: bool): diff --git a/rami_levi.py b/rami_levi.py index 6f4b345..1f94f64 100644 --- a/rami_levi.py +++ b/rami_levi.py @@ -1,28 +1,11 @@ -import json -import requests - +from cerberus_web_client import CerberusWebClient from supermarket_chain import SupermarketChain -class RamiLevi(SupermarketChain): +class RamiLevi(CerberusWebClient, SupermarketChain): _date_hour_format = '%Y-%m-%d %H:%M:%S' + _class_name = 'RamiLevi' - @staticmethod - def get_download_url(store_id: int, category: SupermarketChain.XMLFilesCategory, session: requests.Session) -> str: - hostname: str = "https://publishedprices.co.il" - - # Post the payload to the site to log in - session.post(hostname + "/login/user", data={'username': 'ramilevi'}) - - # Scrape the data - ajax_dir_payload: dict = {'iDisplayLength': 100000, 'sSearch': category.name.replace('s', '')} - s: requests.Response = session.post(hostname + "/file/ajax_dir", data=ajax_dir_payload) - s_json: dict = json.loads(s.text) - suffix: str = next(d['name'] for d in s_json['aaData'] if f'-{store_id:03d}-20' in d['name']) - - download_url: str = hostname + "/file/d/" + suffix - print(download_url) - return download_url - - def __repr__(self): - return 'RamiLevi' + @property + def username(self): + return self._class_name diff --git a/shufersal.py b/shufersal.py index dfc4bfd..0752a41 100644 --- a/shufersal.py +++ b/shufersal.py @@ -16,6 +16,3 @@ class ShuferSal(SupermarketChain): down_url: str = soup.find('a', text="לחץ להורדה")['href'] print(down_url) return down_url - - def __repr__(self): - return 'Shufersal' diff --git a/stop_market.py b/stop_market.py new file mode 100644 index 0000000..d086b60 --- /dev/null +++ b/stop_market.py @@ -0,0 +1,11 @@ +from cerberus_web_client import CerberusWebClient +from supermarket_chain import SupermarketChain + + +class StopMarket(CerberusWebClient, SupermarketChain): + _date_hour_format = '%Y-%m-%d %H:%M:%S' + _class_name = 'Stop_Market' + + @property + def username(self): + return self._class_name diff --git a/supermarket_chain.py b/supermarket_chain.py index 4520b86..d4d29d8 100644 --- a/supermarket_chain.py +++ b/supermarket_chain.py @@ -10,7 +10,12 @@ from bs4.element import Tag from item import Item -class SupermarketChain: +class Meta(type): + def __repr__(cls): + return cls.__name__ + + +class SupermarketChain(object, metaclass=Meta): """ A class representing a supermarket chain. """ @@ -122,6 +127,7 @@ class SupermarketChain: code=item.find('ItemCode').text ) + @classmethod @abstractmethod - def __repr__(self): + def __repr__(cls): pass diff --git a/tiv_taam.py b/tiv_taam.py new file mode 100644 index 0000000..6e64cf9 --- /dev/null +++ b/tiv_taam.py @@ -0,0 +1,10 @@ +from cerberus_web_client import CerberusWebClient +from supermarket_chain import SupermarketChain + + +class TivTaam(CerberusWebClient, SupermarketChain): + _class_name = 'TivTaam' + + @property + def username(self): + return self._class_name diff --git a/utils.py b/utils.py index 70726b6..1c6f49a 100644 --- a/utils.py +++ b/utils.py @@ -24,7 +24,7 @@ def xml_file_gen(chain: SupermarketChain, store_id: int, category_name: str) -> :return: An xml filename """ store_id_str: str = f"-{str(store_id)}" if SupermarketChain.is_valid_store_id(store_id) else "" - return path.join(RAW_FILES_DIRNAME, f"{chain}-{category_name}{store_id_str}.xml") + return path.join(RAW_FILES_DIRNAME, f"{repr(type(chain))}-{category_name}{store_id_str}.xml") def create_bs_object(xml_path: str, chain: SupermarketChain, store_id: int, load_xml: bool, diff --git a/victory.py b/victory.py new file mode 100644 index 0000000..7fc1134 --- /dev/null +++ b/victory.py @@ -0,0 +1,5 @@ +from mahsaneiHashook import MahsaneiHashook + + +class Victory(MahsaneiHashook): + pass diff --git a/yohananof.py b/yohananof.py new file mode 100644 index 0000000..4a790ea --- /dev/null +++ b/yohananof.py @@ -0,0 +1,11 @@ +from cerberus_web_client import CerberusWebClient +from supermarket_chain import SupermarketChain + + +class Yohananof(CerberusWebClient, SupermarketChain): + _date_hour_format = '%Y-%m-%d %H:%M:%S' + _class_name = 'Yohananof' + + @property + def username(self): + return self._class_name diff --git a/zol_vebegadol.py b/zol_vebegadol.py index 3297c0a..fca7a19 100644 --- a/zol_vebegadol.py +++ b/zol_vebegadol.py @@ -5,9 +5,6 @@ from supermarket_chain import SupermarketChain class ZolVebegadol(SupermarketChain): - def __repr__(self): - return 'Zol-Vebegadol' - _date_hour_format = '%Y-%m-%d %H:%M:%S' _update_date_format = '%Y-%m-%d %H:%M:%S' item_tag_name = 'Item'