has added many chains

This commit is contained in:
KorenLazar
2021-02-06 21:42:31 +02:00
parent 5aa4cd734d
commit 18f3fa32b9
20 changed files with 198 additions and 73 deletions

5
bareket.py Normal file
View File

@@ -0,0 +1,5 @@
from mahsaneiHashook import MahsaneiHashook
class Bareket(MahsaneiHashook):
pass

31
cerberus_web_client.py Normal file
View File

@@ -0,0 +1,31 @@
import json
from abc import abstractmethod
import requests
from supermarket_chain import SupermarketChain
class CerberusWebClient:
def get_download_url(self, store_id: int, category: SupermarketChain.XMLFilesCategory, session: requests.Session) \
-> str:
hostname: str = "https://publishedprices.co.il"
# Post the payload to the site to log in
session.post(hostname + "/login/user", data={'username': self.username})
# Scrape the data
ajax_dir_payload: dict = {'iDisplayLength': 100000, 'sSearch': category.name.replace('s', '')}
s: requests.Response = session.post(hostname + "/file/ajax_dir", data=ajax_dir_payload)
s_json: dict = json.loads(s.text)
suffix: str = next(d['name'] for d in s_json['aaData'] if f'-{store_id:03d}-20' in d['name'])
download_url: str = hostname + "/file/d/" + suffix
print(download_url)
return download_url
@property
@abstractmethod
def username(self):
pass

View File

@@ -1,36 +1,5 @@
from typing import Dict, List
import requests
from bs4 import BeautifulSoup
from bs4.element import Tag
from item import Item
from supermarket_chain import SupermarketChain
from mahsaneiHashook import MahsaneiHashook
class CoOp(SupermarketChain):
_promotion_tag_name = 'Sale'
_promotion_update_tag_name = 'PriceUpdateDate'
_date_format = '%Y/%m/%d'
_date_hour_format = '%Y/%m/%d %H:%M:%S'
_update_date_format = '%Y/%m/%d %H:%M:%S'
_item_tag_name = 'Product'
@staticmethod
def get_download_url(store_id: int, category: SupermarketChain.XMLFilesCategory, session: requests.Session) -> str:
prefix = "http://matrixcatalog.co.il/"
url = prefix + "NBCompetitionRegulations.aspx"
req_res: requests.Response = requests.get(url)
soup = BeautifulSoup(req_res.text, features='lxml')
suffix: str = soup.find('a', href=lambda value: value and category.name.replace('s', '') in value
and f'-{store_id:03d}-20' in value).attrs['href']
down_url: str = prefix + suffix
print(down_url)
return down_url
def __repr__(self):
return 'CoOp'
@staticmethod
def get_items(promo: Tag, items_dict: Dict[str, Item]) -> List[Item]:
promo_item = items_dict.get(promo.find('ItemCode').text)
return [promo_item] if promo_item else []
class CoOp(MahsaneiHashook):
pass

11
dor_alon.py Normal file
View File

@@ -0,0 +1,11 @@
from cerberus_web_client import CerberusWebClient
from supermarket_chain import SupermarketChain
class DorAlon(CerberusWebClient, SupermarketChain):
_date_hour_format = '%Y-%m-%d %H:%M:%S'
_class_name = 'DorAlon'
@property
def username(self):
return self._class_name

11
freshmarket.py Normal file
View File

@@ -0,0 +1,11 @@
from cerberus_web_client import CerberusWebClient
from supermarket_chain import SupermarketChain
class Freshmarket(CerberusWebClient, SupermarketChain):
_date_hour_format = '%Y-%m-%d %H:%M:%S'
_class_name = 'Freshmarket'
@property
def username(self):
return self._class_name

11
hazi_hinam.py Normal file
View File

@@ -0,0 +1,11 @@
from cerberus_web_client import CerberusWebClient
from supermarket_chain import SupermarketChain
class HaziHinam(CerberusWebClient, SupermarketChain):
_date_hour_format = '%Y-%m-%d %H:%M:%S'
_class_name = 'HaziHinam'
@property
def username(self):
return self._class_name

11
keshet.py Normal file
View File

@@ -0,0 +1,11 @@
from cerberus_web_client import CerberusWebClient
from supermarket_chain import SupermarketChain
class Keshet(CerberusWebClient, SupermarketChain):
_date_hour_format = '%Y-%m-%d %H:%M:%S'
_class_name = 'Keshet'
@property
def username(self):
return self._class_name

33
mahsaneiHashook.py Normal file
View File

@@ -0,0 +1,33 @@
from typing import Dict, List
import requests
from bs4 import BeautifulSoup
from bs4.element import Tag
from item import Item
from supermarket_chain import SupermarketChain
class MahsaneiHashook(SupermarketChain):
_promotion_tag_name = 'Sale'
_promotion_update_tag_name = 'PriceUpdateDate'
_date_format = '%Y/%m/%d'
_date_hour_format = '%Y/%m/%d %H:%M:%S'
_update_date_format = '%Y/%m/%d %H:%M:%S'
_item_tag_name = 'Product'
@staticmethod
def get_download_url(store_id: int, category: SupermarketChain.XMLFilesCategory, session: requests.Session) -> str:
prefix = "http://matrixcatalog.co.il/"
url = prefix + "NBCompetitionRegulations.aspx"
req_res: requests.Response = requests.get(url)
soup = BeautifulSoup(req_res.text, features='lxml')
suffix: str = soup.find('a', href=lambda value: value and category.name.replace('s', '') in value
and f'-{store_id:03d}-20' in value).attrs['href']
down_url: str = prefix + suffix
print(down_url)
return down_url
@staticmethod
def get_items(promo: Tag, items_dict: Dict[str, Item]) -> List[Item]:
promo_item = items_dict.get(promo.find('ItemCode').text)
return [promo_item] if promo_item else []

23
main.py
View File

@@ -1,13 +1,25 @@
from argparse import ArgumentParser
import logging
from bareket import Bareket
from mahsaneiHashook import MahsaneiHashook
from dor_alon import DorAlon
from freshmarket import Freshmarket
from hazi_hinam import HaziHinam
from keshet import Keshet
from promotion import main_latest_promos, get_promos_by_name
from stop_market import StopMarket
from store_utils import get_all_deals, get_store_id
from tiv_taam import TivTaam
from utils import RESULTS_DIRNAME, RAW_FILES_DIRNAME, get_products_prices
from supermarket_chain import SupermarketChain
from shufersal import ShuferSal
from co_op import CoOp
from victory import Victory
from yohananof import Yohananof
from zol_vebegadol import ZolVebegadol
from rami_levi import RamiLevi
from osher_ad import OsherAd
from pathlib import Path
# TODO: fix problem of left-to-right printing
@@ -15,12 +27,11 @@ from pathlib import Path
Path(RESULTS_DIRNAME).mkdir(exist_ok=True)
Path(RAW_FILES_DIRNAME).mkdir(exist_ok=True)
chain_dict = {
'Shufersal': ShuferSal(),
'CoOp': CoOp(),
'Zol-Vebegadol': ZolVebegadol(),
'RamiLevi': RamiLevi(),
}
chains = ShuferSal, CoOp, ZolVebegadol, RamiLevi, OsherAd, Freshmarket, TivTaam, HaziHinam, Keshet, StopMarket, \
Yohananof, DorAlon, MahsaneiHashook, Bareket, Victory
chain_dict = {repr(chain): chain() for chain in chains}
if __name__ == '__main__':
parser = ArgumentParser()

12
osher_ad.py Normal file
View File

@@ -0,0 +1,12 @@
from cerberus_web_client import CerberusWebClient
from supermarket_chain import SupermarketChain
class OsherAd(CerberusWebClient, SupermarketChain):
_date_hour_format = '%Y-%m-%d %H:%M:%S'
_class_name = 'OsherAd'
@property
def username(self):
return self._class_name

View File

@@ -140,7 +140,7 @@ def main_latest_promos(store_id: int, load_xml: bool, logger, chain: Supermarket
promotions.sort(key=lambda promo: (max(promo.update_date.date(), promo.start_date.date()), promo.start_date -
promo.end_date), reverse=True)
logger.info('\n'.join(str(promotion) for promotion in promotions))
write_promotions_to_csv(promotions, f'results/{chain}_promos_{store_id}.csv')
write_promotions_to_csv(promotions, f'results/{repr(type(chain))}_promos_{store_id}.csv')
def get_promos_by_name(store_id: int, chain: SupermarketChain, promo_name: str, load_prices: bool, load_promos: bool):

View File

@@ -1,28 +1,11 @@
import json
import requests
from cerberus_web_client import CerberusWebClient
from supermarket_chain import SupermarketChain
class RamiLevi(SupermarketChain):
class RamiLevi(CerberusWebClient, SupermarketChain):
_date_hour_format = '%Y-%m-%d %H:%M:%S'
_class_name = 'RamiLevi'
@staticmethod
def get_download_url(store_id: int, category: SupermarketChain.XMLFilesCategory, session: requests.Session) -> str:
hostname: str = "https://publishedprices.co.il"
# Post the payload to the site to log in
session.post(hostname + "/login/user", data={'username': 'ramilevi'})
# Scrape the data
ajax_dir_payload: dict = {'iDisplayLength': 100000, 'sSearch': category.name.replace('s', '')}
s: requests.Response = session.post(hostname + "/file/ajax_dir", data=ajax_dir_payload)
s_json: dict = json.loads(s.text)
suffix: str = next(d['name'] for d in s_json['aaData'] if f'-{store_id:03d}-20' in d['name'])
download_url: str = hostname + "/file/d/" + suffix
print(download_url)
return download_url
def __repr__(self):
return 'RamiLevi'
@property
def username(self):
return self._class_name

View File

@@ -16,6 +16,3 @@ class ShuferSal(SupermarketChain):
down_url: str = soup.find('a', text="לחץ להורדה")['href']
print(down_url)
return down_url
def __repr__(self):
return 'Shufersal'

11
stop_market.py Normal file
View File

@@ -0,0 +1,11 @@
from cerberus_web_client import CerberusWebClient
from supermarket_chain import SupermarketChain
class StopMarket(CerberusWebClient, SupermarketChain):
_date_hour_format = '%Y-%m-%d %H:%M:%S'
_class_name = 'Stop_Market'
@property
def username(self):
return self._class_name

View File

@@ -10,7 +10,12 @@ from bs4.element import Tag
from item import Item
class SupermarketChain:
class Meta(type):
def __repr__(cls):
return cls.__name__
class SupermarketChain(object, metaclass=Meta):
"""
A class representing a supermarket chain.
"""
@@ -122,6 +127,7 @@ class SupermarketChain:
code=item.find('ItemCode').text
)
@classmethod
@abstractmethod
def __repr__(self):
def __repr__(cls):
pass

10
tiv_taam.py Normal file
View File

@@ -0,0 +1,10 @@
from cerberus_web_client import CerberusWebClient
from supermarket_chain import SupermarketChain
class TivTaam(CerberusWebClient, SupermarketChain):
_class_name = 'TivTaam'
@property
def username(self):
return self._class_name

View File

@@ -24,7 +24,7 @@ def xml_file_gen(chain: SupermarketChain, store_id: int, category_name: str) ->
:return: An xml filename
"""
store_id_str: str = f"-{str(store_id)}" if SupermarketChain.is_valid_store_id(store_id) else ""
return path.join(RAW_FILES_DIRNAME, f"{chain}-{category_name}{store_id_str}.xml")
return path.join(RAW_FILES_DIRNAME, f"{repr(type(chain))}-{category_name}{store_id_str}.xml")
def create_bs_object(xml_path: str, chain: SupermarketChain, store_id: int, load_xml: bool,

5
victory.py Normal file
View File

@@ -0,0 +1,5 @@
from mahsaneiHashook import MahsaneiHashook
class Victory(MahsaneiHashook):
pass

11
yohananof.py Normal file
View File

@@ -0,0 +1,11 @@
from cerberus_web_client import CerberusWebClient
from supermarket_chain import SupermarketChain
class Yohananof(CerberusWebClient, SupermarketChain):
_date_hour_format = '%Y-%m-%d %H:%M:%S'
_class_name = 'Yohananof'
@property
def username(self):
return self._class_name

View File

@@ -5,9 +5,6 @@ from supermarket_chain import SupermarketChain
class ZolVebegadol(SupermarketChain):
def __repr__(self):
return 'Zol-Vebegadol'
_date_hour_format = '%Y-%m-%d %H:%M:%S'
_update_date_format = '%Y-%m-%d %H:%M:%S'
item_tag_name = 'Item'