has added many chains
This commit is contained in:
5
bareket.py
Normal file
5
bareket.py
Normal file
@@ -0,0 +1,5 @@
|
||||
from mahsaneiHashook import MahsaneiHashook
|
||||
|
||||
|
||||
class Bareket(MahsaneiHashook):
|
||||
pass
|
31
cerberus_web_client.py
Normal file
31
cerberus_web_client.py
Normal file
@@ -0,0 +1,31 @@
|
||||
import json
|
||||
from abc import abstractmethod
|
||||
|
||||
import requests
|
||||
|
||||
from supermarket_chain import SupermarketChain
|
||||
|
||||
|
||||
class CerberusWebClient:
|
||||
|
||||
def get_download_url(self, store_id: int, category: SupermarketChain.XMLFilesCategory, session: requests.Session) \
|
||||
-> str:
|
||||
hostname: str = "https://publishedprices.co.il"
|
||||
|
||||
# Post the payload to the site to log in
|
||||
session.post(hostname + "/login/user", data={'username': self.username})
|
||||
|
||||
# Scrape the data
|
||||
ajax_dir_payload: dict = {'iDisplayLength': 100000, 'sSearch': category.name.replace('s', '')}
|
||||
s: requests.Response = session.post(hostname + "/file/ajax_dir", data=ajax_dir_payload)
|
||||
s_json: dict = json.loads(s.text)
|
||||
suffix: str = next(d['name'] for d in s_json['aaData'] if f'-{store_id:03d}-20' in d['name'])
|
||||
|
||||
download_url: str = hostname + "/file/d/" + suffix
|
||||
print(download_url)
|
||||
return download_url
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def username(self):
|
||||
pass
|
37
co_op.py
37
co_op.py
@@ -1,36 +1,5 @@
|
||||
from typing import Dict, List
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from bs4.element import Tag
|
||||
|
||||
from item import Item
|
||||
from supermarket_chain import SupermarketChain
|
||||
from mahsaneiHashook import MahsaneiHashook
|
||||
|
||||
|
||||
class CoOp(SupermarketChain):
|
||||
_promotion_tag_name = 'Sale'
|
||||
_promotion_update_tag_name = 'PriceUpdateDate'
|
||||
_date_format = '%Y/%m/%d'
|
||||
_date_hour_format = '%Y/%m/%d %H:%M:%S'
|
||||
_update_date_format = '%Y/%m/%d %H:%M:%S'
|
||||
_item_tag_name = 'Product'
|
||||
|
||||
@staticmethod
|
||||
def get_download_url(store_id: int, category: SupermarketChain.XMLFilesCategory, session: requests.Session) -> str:
|
||||
prefix = "http://matrixcatalog.co.il/"
|
||||
url = prefix + "NBCompetitionRegulations.aspx"
|
||||
req_res: requests.Response = requests.get(url)
|
||||
soup = BeautifulSoup(req_res.text, features='lxml')
|
||||
suffix: str = soup.find('a', href=lambda value: value and category.name.replace('s', '') in value
|
||||
and f'-{store_id:03d}-20' in value).attrs['href']
|
||||
down_url: str = prefix + suffix
|
||||
print(down_url)
|
||||
return down_url
|
||||
|
||||
def __repr__(self):
|
||||
return 'CoOp'
|
||||
|
||||
@staticmethod
|
||||
def get_items(promo: Tag, items_dict: Dict[str, Item]) -> List[Item]:
|
||||
promo_item = items_dict.get(promo.find('ItemCode').text)
|
||||
return [promo_item] if promo_item else []
|
||||
class CoOp(MahsaneiHashook):
|
||||
pass
|
||||
|
11
dor_alon.py
Normal file
11
dor_alon.py
Normal file
@@ -0,0 +1,11 @@
|
||||
from cerberus_web_client import CerberusWebClient
|
||||
from supermarket_chain import SupermarketChain
|
||||
|
||||
|
||||
class DorAlon(CerberusWebClient, SupermarketChain):
|
||||
_date_hour_format = '%Y-%m-%d %H:%M:%S'
|
||||
_class_name = 'DorAlon'
|
||||
|
||||
@property
|
||||
def username(self):
|
||||
return self._class_name
|
11
freshmarket.py
Normal file
11
freshmarket.py
Normal file
@@ -0,0 +1,11 @@
|
||||
from cerberus_web_client import CerberusWebClient
|
||||
from supermarket_chain import SupermarketChain
|
||||
|
||||
|
||||
class Freshmarket(CerberusWebClient, SupermarketChain):
|
||||
_date_hour_format = '%Y-%m-%d %H:%M:%S'
|
||||
_class_name = 'Freshmarket'
|
||||
|
||||
@property
|
||||
def username(self):
|
||||
return self._class_name
|
11
hazi_hinam.py
Normal file
11
hazi_hinam.py
Normal file
@@ -0,0 +1,11 @@
|
||||
from cerberus_web_client import CerberusWebClient
|
||||
from supermarket_chain import SupermarketChain
|
||||
|
||||
|
||||
class HaziHinam(CerberusWebClient, SupermarketChain):
|
||||
_date_hour_format = '%Y-%m-%d %H:%M:%S'
|
||||
_class_name = 'HaziHinam'
|
||||
|
||||
@property
|
||||
def username(self):
|
||||
return self._class_name
|
11
keshet.py
Normal file
11
keshet.py
Normal file
@@ -0,0 +1,11 @@
|
||||
from cerberus_web_client import CerberusWebClient
|
||||
from supermarket_chain import SupermarketChain
|
||||
|
||||
|
||||
class Keshet(CerberusWebClient, SupermarketChain):
|
||||
_date_hour_format = '%Y-%m-%d %H:%M:%S'
|
||||
_class_name = 'Keshet'
|
||||
|
||||
@property
|
||||
def username(self):
|
||||
return self._class_name
|
33
mahsaneiHashook.py
Normal file
33
mahsaneiHashook.py
Normal file
@@ -0,0 +1,33 @@
|
||||
from typing import Dict, List
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from bs4.element import Tag
|
||||
|
||||
from item import Item
|
||||
from supermarket_chain import SupermarketChain
|
||||
|
||||
|
||||
class MahsaneiHashook(SupermarketChain):
|
||||
_promotion_tag_name = 'Sale'
|
||||
_promotion_update_tag_name = 'PriceUpdateDate'
|
||||
_date_format = '%Y/%m/%d'
|
||||
_date_hour_format = '%Y/%m/%d %H:%M:%S'
|
||||
_update_date_format = '%Y/%m/%d %H:%M:%S'
|
||||
_item_tag_name = 'Product'
|
||||
|
||||
@staticmethod
|
||||
def get_download_url(store_id: int, category: SupermarketChain.XMLFilesCategory, session: requests.Session) -> str:
|
||||
prefix = "http://matrixcatalog.co.il/"
|
||||
url = prefix + "NBCompetitionRegulations.aspx"
|
||||
req_res: requests.Response = requests.get(url)
|
||||
soup = BeautifulSoup(req_res.text, features='lxml')
|
||||
suffix: str = soup.find('a', href=lambda value: value and category.name.replace('s', '') in value
|
||||
and f'-{store_id:03d}-20' in value).attrs['href']
|
||||
down_url: str = prefix + suffix
|
||||
print(down_url)
|
||||
return down_url
|
||||
|
||||
@staticmethod
|
||||
def get_items(promo: Tag, items_dict: Dict[str, Item]) -> List[Item]:
|
||||
promo_item = items_dict.get(promo.find('ItemCode').text)
|
||||
return [promo_item] if promo_item else []
|
23
main.py
23
main.py
@@ -1,13 +1,25 @@
|
||||
from argparse import ArgumentParser
|
||||
import logging
|
||||
|
||||
from bareket import Bareket
|
||||
from mahsaneiHashook import MahsaneiHashook
|
||||
from dor_alon import DorAlon
|
||||
from freshmarket import Freshmarket
|
||||
from hazi_hinam import HaziHinam
|
||||
from keshet import Keshet
|
||||
from promotion import main_latest_promos, get_promos_by_name
|
||||
from stop_market import StopMarket
|
||||
from store_utils import get_all_deals, get_store_id
|
||||
from tiv_taam import TivTaam
|
||||
from utils import RESULTS_DIRNAME, RAW_FILES_DIRNAME, get_products_prices
|
||||
from supermarket_chain import SupermarketChain
|
||||
from shufersal import ShuferSal
|
||||
from co_op import CoOp
|
||||
from victory import Victory
|
||||
from yohananof import Yohananof
|
||||
from zol_vebegadol import ZolVebegadol
|
||||
from rami_levi import RamiLevi
|
||||
from osher_ad import OsherAd
|
||||
from pathlib import Path
|
||||
|
||||
# TODO: fix problem of left-to-right printing
|
||||
@@ -15,12 +27,11 @@ from pathlib import Path
|
||||
Path(RESULTS_DIRNAME).mkdir(exist_ok=True)
|
||||
Path(RAW_FILES_DIRNAME).mkdir(exist_ok=True)
|
||||
|
||||
chain_dict = {
|
||||
'Shufersal': ShuferSal(),
|
||||
'CoOp': CoOp(),
|
||||
'Zol-Vebegadol': ZolVebegadol(),
|
||||
'RamiLevi': RamiLevi(),
|
||||
}
|
||||
chains = ShuferSal, CoOp, ZolVebegadol, RamiLevi, OsherAd, Freshmarket, TivTaam, HaziHinam, Keshet, StopMarket, \
|
||||
Yohananof, DorAlon, MahsaneiHashook, Bareket, Victory
|
||||
|
||||
chain_dict = {repr(chain): chain() for chain in chains}
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = ArgumentParser()
|
||||
|
12
osher_ad.py
Normal file
12
osher_ad.py
Normal file
@@ -0,0 +1,12 @@
|
||||
from cerberus_web_client import CerberusWebClient
|
||||
from supermarket_chain import SupermarketChain
|
||||
|
||||
|
||||
class OsherAd(CerberusWebClient, SupermarketChain):
|
||||
|
||||
_date_hour_format = '%Y-%m-%d %H:%M:%S'
|
||||
_class_name = 'OsherAd'
|
||||
|
||||
@property
|
||||
def username(self):
|
||||
return self._class_name
|
@@ -140,7 +140,7 @@ def main_latest_promos(store_id: int, load_xml: bool, logger, chain: Supermarket
|
||||
promotions.sort(key=lambda promo: (max(promo.update_date.date(), promo.start_date.date()), promo.start_date -
|
||||
promo.end_date), reverse=True)
|
||||
logger.info('\n'.join(str(promotion) for promotion in promotions))
|
||||
write_promotions_to_csv(promotions, f'results/{chain}_promos_{store_id}.csv')
|
||||
write_promotions_to_csv(promotions, f'results/{repr(type(chain))}_promos_{store_id}.csv')
|
||||
|
||||
|
||||
def get_promos_by_name(store_id: int, chain: SupermarketChain, promo_name: str, load_prices: bool, load_promos: bool):
|
||||
|
29
rami_levi.py
29
rami_levi.py
@@ -1,28 +1,11 @@
|
||||
import json
|
||||
import requests
|
||||
|
||||
from cerberus_web_client import CerberusWebClient
|
||||
from supermarket_chain import SupermarketChain
|
||||
|
||||
|
||||
class RamiLevi(SupermarketChain):
|
||||
class RamiLevi(CerberusWebClient, SupermarketChain):
|
||||
_date_hour_format = '%Y-%m-%d %H:%M:%S'
|
||||
_class_name = 'RamiLevi'
|
||||
|
||||
@staticmethod
|
||||
def get_download_url(store_id: int, category: SupermarketChain.XMLFilesCategory, session: requests.Session) -> str:
|
||||
hostname: str = "https://publishedprices.co.il"
|
||||
|
||||
# Post the payload to the site to log in
|
||||
session.post(hostname + "/login/user", data={'username': 'ramilevi'})
|
||||
|
||||
# Scrape the data
|
||||
ajax_dir_payload: dict = {'iDisplayLength': 100000, 'sSearch': category.name.replace('s', '')}
|
||||
s: requests.Response = session.post(hostname + "/file/ajax_dir", data=ajax_dir_payload)
|
||||
s_json: dict = json.loads(s.text)
|
||||
suffix: str = next(d['name'] for d in s_json['aaData'] if f'-{store_id:03d}-20' in d['name'])
|
||||
|
||||
download_url: str = hostname + "/file/d/" + suffix
|
||||
print(download_url)
|
||||
return download_url
|
||||
|
||||
def __repr__(self):
|
||||
return 'RamiLevi'
|
||||
@property
|
||||
def username(self):
|
||||
return self._class_name
|
||||
|
@@ -16,6 +16,3 @@ class ShuferSal(SupermarketChain):
|
||||
down_url: str = soup.find('a', text="לחץ להורדה")['href']
|
||||
print(down_url)
|
||||
return down_url
|
||||
|
||||
def __repr__(self):
|
||||
return 'Shufersal'
|
||||
|
11
stop_market.py
Normal file
11
stop_market.py
Normal file
@@ -0,0 +1,11 @@
|
||||
from cerberus_web_client import CerberusWebClient
|
||||
from supermarket_chain import SupermarketChain
|
||||
|
||||
|
||||
class StopMarket(CerberusWebClient, SupermarketChain):
|
||||
_date_hour_format = '%Y-%m-%d %H:%M:%S'
|
||||
_class_name = 'Stop_Market'
|
||||
|
||||
@property
|
||||
def username(self):
|
||||
return self._class_name
|
@@ -10,7 +10,12 @@ from bs4.element import Tag
|
||||
from item import Item
|
||||
|
||||
|
||||
class SupermarketChain:
|
||||
class Meta(type):
|
||||
def __repr__(cls):
|
||||
return cls.__name__
|
||||
|
||||
|
||||
class SupermarketChain(object, metaclass=Meta):
|
||||
"""
|
||||
A class representing a supermarket chain.
|
||||
"""
|
||||
@@ -122,6 +127,7 @@ class SupermarketChain:
|
||||
code=item.find('ItemCode').text
|
||||
)
|
||||
|
||||
@classmethod
|
||||
@abstractmethod
|
||||
def __repr__(self):
|
||||
def __repr__(cls):
|
||||
pass
|
||||
|
10
tiv_taam.py
Normal file
10
tiv_taam.py
Normal file
@@ -0,0 +1,10 @@
|
||||
from cerberus_web_client import CerberusWebClient
|
||||
from supermarket_chain import SupermarketChain
|
||||
|
||||
|
||||
class TivTaam(CerberusWebClient, SupermarketChain):
|
||||
_class_name = 'TivTaam'
|
||||
|
||||
@property
|
||||
def username(self):
|
||||
return self._class_name
|
2
utils.py
2
utils.py
@@ -24,7 +24,7 @@ def xml_file_gen(chain: SupermarketChain, store_id: int, category_name: str) ->
|
||||
:return: An xml filename
|
||||
"""
|
||||
store_id_str: str = f"-{str(store_id)}" if SupermarketChain.is_valid_store_id(store_id) else ""
|
||||
return path.join(RAW_FILES_DIRNAME, f"{chain}-{category_name}{store_id_str}.xml")
|
||||
return path.join(RAW_FILES_DIRNAME, f"{repr(type(chain))}-{category_name}{store_id_str}.xml")
|
||||
|
||||
|
||||
def create_bs_object(xml_path: str, chain: SupermarketChain, store_id: int, load_xml: bool,
|
||||
|
5
victory.py
Normal file
5
victory.py
Normal file
@@ -0,0 +1,5 @@
|
||||
from mahsaneiHashook import MahsaneiHashook
|
||||
|
||||
|
||||
class Victory(MahsaneiHashook):
|
||||
pass
|
11
yohananof.py
Normal file
11
yohananof.py
Normal file
@@ -0,0 +1,11 @@
|
||||
from cerberus_web_client import CerberusWebClient
|
||||
from supermarket_chain import SupermarketChain
|
||||
|
||||
|
||||
class Yohananof(CerberusWebClient, SupermarketChain):
|
||||
_date_hour_format = '%Y-%m-%d %H:%M:%S'
|
||||
_class_name = 'Yohananof'
|
||||
|
||||
@property
|
||||
def username(self):
|
||||
return self._class_name
|
@@ -5,9 +5,6 @@ from supermarket_chain import SupermarketChain
|
||||
|
||||
|
||||
class ZolVebegadol(SupermarketChain):
|
||||
def __repr__(self):
|
||||
return 'Zol-Vebegadol'
|
||||
|
||||
_date_hour_format = '%Y-%m-%d %H:%M:%S'
|
||||
_update_date_format = '%Y-%m-%d %H:%M:%S'
|
||||
item_tag_name = 'Item'
|
||||
|
Reference in New Issue
Block a user