has added many chains
This commit is contained in:
5
bareket.py
Normal file
5
bareket.py
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
from mahsaneiHashook import MahsaneiHashook
|
||||||
|
|
||||||
|
|
||||||
|
class Bareket(MahsaneiHashook):
|
||||||
|
pass
|
31
cerberus_web_client.py
Normal file
31
cerberus_web_client.py
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
import json
|
||||||
|
from abc import abstractmethod
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
from supermarket_chain import SupermarketChain
|
||||||
|
|
||||||
|
|
||||||
|
class CerberusWebClient:
|
||||||
|
|
||||||
|
def get_download_url(self, store_id: int, category: SupermarketChain.XMLFilesCategory, session: requests.Session) \
|
||||||
|
-> str:
|
||||||
|
hostname: str = "https://publishedprices.co.il"
|
||||||
|
|
||||||
|
# Post the payload to the site to log in
|
||||||
|
session.post(hostname + "/login/user", data={'username': self.username})
|
||||||
|
|
||||||
|
# Scrape the data
|
||||||
|
ajax_dir_payload: dict = {'iDisplayLength': 100000, 'sSearch': category.name.replace('s', '')}
|
||||||
|
s: requests.Response = session.post(hostname + "/file/ajax_dir", data=ajax_dir_payload)
|
||||||
|
s_json: dict = json.loads(s.text)
|
||||||
|
suffix: str = next(d['name'] for d in s_json['aaData'] if f'-{store_id:03d}-20' in d['name'])
|
||||||
|
|
||||||
|
download_url: str = hostname + "/file/d/" + suffix
|
||||||
|
print(download_url)
|
||||||
|
return download_url
|
||||||
|
|
||||||
|
@property
|
||||||
|
@abstractmethod
|
||||||
|
def username(self):
|
||||||
|
pass
|
37
co_op.py
37
co_op.py
@@ -1,36 +1,5 @@
|
|||||||
from typing import Dict, List
|
from mahsaneiHashook import MahsaneiHashook
|
||||||
import requests
|
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
from bs4.element import Tag
|
|
||||||
|
|
||||||
from item import Item
|
|
||||||
from supermarket_chain import SupermarketChain
|
|
||||||
|
|
||||||
|
|
||||||
class CoOp(SupermarketChain):
|
class CoOp(MahsaneiHashook):
|
||||||
_promotion_tag_name = 'Sale'
|
pass
|
||||||
_promotion_update_tag_name = 'PriceUpdateDate'
|
|
||||||
_date_format = '%Y/%m/%d'
|
|
||||||
_date_hour_format = '%Y/%m/%d %H:%M:%S'
|
|
||||||
_update_date_format = '%Y/%m/%d %H:%M:%S'
|
|
||||||
_item_tag_name = 'Product'
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def get_download_url(store_id: int, category: SupermarketChain.XMLFilesCategory, session: requests.Session) -> str:
|
|
||||||
prefix = "http://matrixcatalog.co.il/"
|
|
||||||
url = prefix + "NBCompetitionRegulations.aspx"
|
|
||||||
req_res: requests.Response = requests.get(url)
|
|
||||||
soup = BeautifulSoup(req_res.text, features='lxml')
|
|
||||||
suffix: str = soup.find('a', href=lambda value: value and category.name.replace('s', '') in value
|
|
||||||
and f'-{store_id:03d}-20' in value).attrs['href']
|
|
||||||
down_url: str = prefix + suffix
|
|
||||||
print(down_url)
|
|
||||||
return down_url
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return 'CoOp'
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def get_items(promo: Tag, items_dict: Dict[str, Item]) -> List[Item]:
|
|
||||||
promo_item = items_dict.get(promo.find('ItemCode').text)
|
|
||||||
return [promo_item] if promo_item else []
|
|
||||||
|
11
dor_alon.py
Normal file
11
dor_alon.py
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
from cerberus_web_client import CerberusWebClient
|
||||||
|
from supermarket_chain import SupermarketChain
|
||||||
|
|
||||||
|
|
||||||
|
class DorAlon(CerberusWebClient, SupermarketChain):
|
||||||
|
_date_hour_format = '%Y-%m-%d %H:%M:%S'
|
||||||
|
_class_name = 'DorAlon'
|
||||||
|
|
||||||
|
@property
|
||||||
|
def username(self):
|
||||||
|
return self._class_name
|
11
freshmarket.py
Normal file
11
freshmarket.py
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
from cerberus_web_client import CerberusWebClient
|
||||||
|
from supermarket_chain import SupermarketChain
|
||||||
|
|
||||||
|
|
||||||
|
class Freshmarket(CerberusWebClient, SupermarketChain):
|
||||||
|
_date_hour_format = '%Y-%m-%d %H:%M:%S'
|
||||||
|
_class_name = 'Freshmarket'
|
||||||
|
|
||||||
|
@property
|
||||||
|
def username(self):
|
||||||
|
return self._class_name
|
11
hazi_hinam.py
Normal file
11
hazi_hinam.py
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
from cerberus_web_client import CerberusWebClient
|
||||||
|
from supermarket_chain import SupermarketChain
|
||||||
|
|
||||||
|
|
||||||
|
class HaziHinam(CerberusWebClient, SupermarketChain):
|
||||||
|
_date_hour_format = '%Y-%m-%d %H:%M:%S'
|
||||||
|
_class_name = 'HaziHinam'
|
||||||
|
|
||||||
|
@property
|
||||||
|
def username(self):
|
||||||
|
return self._class_name
|
11
keshet.py
Normal file
11
keshet.py
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
from cerberus_web_client import CerberusWebClient
|
||||||
|
from supermarket_chain import SupermarketChain
|
||||||
|
|
||||||
|
|
||||||
|
class Keshet(CerberusWebClient, SupermarketChain):
|
||||||
|
_date_hour_format = '%Y-%m-%d %H:%M:%S'
|
||||||
|
_class_name = 'Keshet'
|
||||||
|
|
||||||
|
@property
|
||||||
|
def username(self):
|
||||||
|
return self._class_name
|
33
mahsaneiHashook.py
Normal file
33
mahsaneiHashook.py
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
from typing import Dict, List
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from bs4.element import Tag
|
||||||
|
|
||||||
|
from item import Item
|
||||||
|
from supermarket_chain import SupermarketChain
|
||||||
|
|
||||||
|
|
||||||
|
class MahsaneiHashook(SupermarketChain):
|
||||||
|
_promotion_tag_name = 'Sale'
|
||||||
|
_promotion_update_tag_name = 'PriceUpdateDate'
|
||||||
|
_date_format = '%Y/%m/%d'
|
||||||
|
_date_hour_format = '%Y/%m/%d %H:%M:%S'
|
||||||
|
_update_date_format = '%Y/%m/%d %H:%M:%S'
|
||||||
|
_item_tag_name = 'Product'
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_download_url(store_id: int, category: SupermarketChain.XMLFilesCategory, session: requests.Session) -> str:
|
||||||
|
prefix = "http://matrixcatalog.co.il/"
|
||||||
|
url = prefix + "NBCompetitionRegulations.aspx"
|
||||||
|
req_res: requests.Response = requests.get(url)
|
||||||
|
soup = BeautifulSoup(req_res.text, features='lxml')
|
||||||
|
suffix: str = soup.find('a', href=lambda value: value and category.name.replace('s', '') in value
|
||||||
|
and f'-{store_id:03d}-20' in value).attrs['href']
|
||||||
|
down_url: str = prefix + suffix
|
||||||
|
print(down_url)
|
||||||
|
return down_url
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_items(promo: Tag, items_dict: Dict[str, Item]) -> List[Item]:
|
||||||
|
promo_item = items_dict.get(promo.find('ItemCode').text)
|
||||||
|
return [promo_item] if promo_item else []
|
23
main.py
23
main.py
@@ -1,13 +1,25 @@
|
|||||||
from argparse import ArgumentParser
|
from argparse import ArgumentParser
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
|
from bareket import Bareket
|
||||||
|
from mahsaneiHashook import MahsaneiHashook
|
||||||
|
from dor_alon import DorAlon
|
||||||
|
from freshmarket import Freshmarket
|
||||||
|
from hazi_hinam import HaziHinam
|
||||||
|
from keshet import Keshet
|
||||||
from promotion import main_latest_promos, get_promos_by_name
|
from promotion import main_latest_promos, get_promos_by_name
|
||||||
|
from stop_market import StopMarket
|
||||||
from store_utils import get_all_deals, get_store_id
|
from store_utils import get_all_deals, get_store_id
|
||||||
|
from tiv_taam import TivTaam
|
||||||
from utils import RESULTS_DIRNAME, RAW_FILES_DIRNAME, get_products_prices
|
from utils import RESULTS_DIRNAME, RAW_FILES_DIRNAME, get_products_prices
|
||||||
from supermarket_chain import SupermarketChain
|
from supermarket_chain import SupermarketChain
|
||||||
from shufersal import ShuferSal
|
from shufersal import ShuferSal
|
||||||
from co_op import CoOp
|
from co_op import CoOp
|
||||||
|
from victory import Victory
|
||||||
|
from yohananof import Yohananof
|
||||||
from zol_vebegadol import ZolVebegadol
|
from zol_vebegadol import ZolVebegadol
|
||||||
from rami_levi import RamiLevi
|
from rami_levi import RamiLevi
|
||||||
|
from osher_ad import OsherAd
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
# TODO: fix problem of left-to-right printing
|
# TODO: fix problem of left-to-right printing
|
||||||
@@ -15,12 +27,11 @@ from pathlib import Path
|
|||||||
Path(RESULTS_DIRNAME).mkdir(exist_ok=True)
|
Path(RESULTS_DIRNAME).mkdir(exist_ok=True)
|
||||||
Path(RAW_FILES_DIRNAME).mkdir(exist_ok=True)
|
Path(RAW_FILES_DIRNAME).mkdir(exist_ok=True)
|
||||||
|
|
||||||
chain_dict = {
|
chains = ShuferSal, CoOp, ZolVebegadol, RamiLevi, OsherAd, Freshmarket, TivTaam, HaziHinam, Keshet, StopMarket, \
|
||||||
'Shufersal': ShuferSal(),
|
Yohananof, DorAlon, MahsaneiHashook, Bareket, Victory
|
||||||
'CoOp': CoOp(),
|
|
||||||
'Zol-Vebegadol': ZolVebegadol(),
|
chain_dict = {repr(chain): chain() for chain in chains}
|
||||||
'RamiLevi': RamiLevi(),
|
|
||||||
}
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
parser = ArgumentParser()
|
parser = ArgumentParser()
|
||||||
|
12
osher_ad.py
Normal file
12
osher_ad.py
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
from cerberus_web_client import CerberusWebClient
|
||||||
|
from supermarket_chain import SupermarketChain
|
||||||
|
|
||||||
|
|
||||||
|
class OsherAd(CerberusWebClient, SupermarketChain):
|
||||||
|
|
||||||
|
_date_hour_format = '%Y-%m-%d %H:%M:%S'
|
||||||
|
_class_name = 'OsherAd'
|
||||||
|
|
||||||
|
@property
|
||||||
|
def username(self):
|
||||||
|
return self._class_name
|
@@ -140,7 +140,7 @@ def main_latest_promos(store_id: int, load_xml: bool, logger, chain: Supermarket
|
|||||||
promotions.sort(key=lambda promo: (max(promo.update_date.date(), promo.start_date.date()), promo.start_date -
|
promotions.sort(key=lambda promo: (max(promo.update_date.date(), promo.start_date.date()), promo.start_date -
|
||||||
promo.end_date), reverse=True)
|
promo.end_date), reverse=True)
|
||||||
logger.info('\n'.join(str(promotion) for promotion in promotions))
|
logger.info('\n'.join(str(promotion) for promotion in promotions))
|
||||||
write_promotions_to_csv(promotions, f'results/{chain}_promos_{store_id}.csv')
|
write_promotions_to_csv(promotions, f'results/{repr(type(chain))}_promos_{store_id}.csv')
|
||||||
|
|
||||||
|
|
||||||
def get_promos_by_name(store_id: int, chain: SupermarketChain, promo_name: str, load_prices: bool, load_promos: bool):
|
def get_promos_by_name(store_id: int, chain: SupermarketChain, promo_name: str, load_prices: bool, load_promos: bool):
|
||||||
|
29
rami_levi.py
29
rami_levi.py
@@ -1,28 +1,11 @@
|
|||||||
import json
|
from cerberus_web_client import CerberusWebClient
|
||||||
import requests
|
|
||||||
|
|
||||||
from supermarket_chain import SupermarketChain
|
from supermarket_chain import SupermarketChain
|
||||||
|
|
||||||
|
|
||||||
class RamiLevi(SupermarketChain):
|
class RamiLevi(CerberusWebClient, SupermarketChain):
|
||||||
_date_hour_format = '%Y-%m-%d %H:%M:%S'
|
_date_hour_format = '%Y-%m-%d %H:%M:%S'
|
||||||
|
_class_name = 'RamiLevi'
|
||||||
|
|
||||||
@staticmethod
|
@property
|
||||||
def get_download_url(store_id: int, category: SupermarketChain.XMLFilesCategory, session: requests.Session) -> str:
|
def username(self):
|
||||||
hostname: str = "https://publishedprices.co.il"
|
return self._class_name
|
||||||
|
|
||||||
# Post the payload to the site to log in
|
|
||||||
session.post(hostname + "/login/user", data={'username': 'ramilevi'})
|
|
||||||
|
|
||||||
# Scrape the data
|
|
||||||
ajax_dir_payload: dict = {'iDisplayLength': 100000, 'sSearch': category.name.replace('s', '')}
|
|
||||||
s: requests.Response = session.post(hostname + "/file/ajax_dir", data=ajax_dir_payload)
|
|
||||||
s_json: dict = json.loads(s.text)
|
|
||||||
suffix: str = next(d['name'] for d in s_json['aaData'] if f'-{store_id:03d}-20' in d['name'])
|
|
||||||
|
|
||||||
download_url: str = hostname + "/file/d/" + suffix
|
|
||||||
print(download_url)
|
|
||||||
return download_url
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return 'RamiLevi'
|
|
||||||
|
@@ -16,6 +16,3 @@ class ShuferSal(SupermarketChain):
|
|||||||
down_url: str = soup.find('a', text="לחץ להורדה")['href']
|
down_url: str = soup.find('a', text="לחץ להורדה")['href']
|
||||||
print(down_url)
|
print(down_url)
|
||||||
return down_url
|
return down_url
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return 'Shufersal'
|
|
||||||
|
11
stop_market.py
Normal file
11
stop_market.py
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
from cerberus_web_client import CerberusWebClient
|
||||||
|
from supermarket_chain import SupermarketChain
|
||||||
|
|
||||||
|
|
||||||
|
class StopMarket(CerberusWebClient, SupermarketChain):
|
||||||
|
_date_hour_format = '%Y-%m-%d %H:%M:%S'
|
||||||
|
_class_name = 'Stop_Market'
|
||||||
|
|
||||||
|
@property
|
||||||
|
def username(self):
|
||||||
|
return self._class_name
|
@@ -10,7 +10,12 @@ from bs4.element import Tag
|
|||||||
from item import Item
|
from item import Item
|
||||||
|
|
||||||
|
|
||||||
class SupermarketChain:
|
class Meta(type):
|
||||||
|
def __repr__(cls):
|
||||||
|
return cls.__name__
|
||||||
|
|
||||||
|
|
||||||
|
class SupermarketChain(object, metaclass=Meta):
|
||||||
"""
|
"""
|
||||||
A class representing a supermarket chain.
|
A class representing a supermarket chain.
|
||||||
"""
|
"""
|
||||||
@@ -122,6 +127,7 @@ class SupermarketChain:
|
|||||||
code=item.find('ItemCode').text
|
code=item.find('ItemCode').text
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def __repr__(self):
|
def __repr__(cls):
|
||||||
pass
|
pass
|
||||||
|
10
tiv_taam.py
Normal file
10
tiv_taam.py
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
from cerberus_web_client import CerberusWebClient
|
||||||
|
from supermarket_chain import SupermarketChain
|
||||||
|
|
||||||
|
|
||||||
|
class TivTaam(CerberusWebClient, SupermarketChain):
|
||||||
|
_class_name = 'TivTaam'
|
||||||
|
|
||||||
|
@property
|
||||||
|
def username(self):
|
||||||
|
return self._class_name
|
2
utils.py
2
utils.py
@@ -24,7 +24,7 @@ def xml_file_gen(chain: SupermarketChain, store_id: int, category_name: str) ->
|
|||||||
:return: An xml filename
|
:return: An xml filename
|
||||||
"""
|
"""
|
||||||
store_id_str: str = f"-{str(store_id)}" if SupermarketChain.is_valid_store_id(store_id) else ""
|
store_id_str: str = f"-{str(store_id)}" if SupermarketChain.is_valid_store_id(store_id) else ""
|
||||||
return path.join(RAW_FILES_DIRNAME, f"{chain}-{category_name}{store_id_str}.xml")
|
return path.join(RAW_FILES_DIRNAME, f"{repr(type(chain))}-{category_name}{store_id_str}.xml")
|
||||||
|
|
||||||
|
|
||||||
def create_bs_object(xml_path: str, chain: SupermarketChain, store_id: int, load_xml: bool,
|
def create_bs_object(xml_path: str, chain: SupermarketChain, store_id: int, load_xml: bool,
|
||||||
|
5
victory.py
Normal file
5
victory.py
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
from mahsaneiHashook import MahsaneiHashook
|
||||||
|
|
||||||
|
|
||||||
|
class Victory(MahsaneiHashook):
|
||||||
|
pass
|
11
yohananof.py
Normal file
11
yohananof.py
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
from cerberus_web_client import CerberusWebClient
|
||||||
|
from supermarket_chain import SupermarketChain
|
||||||
|
|
||||||
|
|
||||||
|
class Yohananof(CerberusWebClient, SupermarketChain):
|
||||||
|
_date_hour_format = '%Y-%m-%d %H:%M:%S'
|
||||||
|
_class_name = 'Yohananof'
|
||||||
|
|
||||||
|
@property
|
||||||
|
def username(self):
|
||||||
|
return self._class_name
|
@@ -5,9 +5,6 @@ from supermarket_chain import SupermarketChain
|
|||||||
|
|
||||||
|
|
||||||
class ZolVebegadol(SupermarketChain):
|
class ZolVebegadol(SupermarketChain):
|
||||||
def __repr__(self):
|
|
||||||
return 'Zol-Vebegadol'
|
|
||||||
|
|
||||||
_date_hour_format = '%Y-%m-%d %H:%M:%S'
|
_date_hour_format = '%Y-%m-%d %H:%M:%S'
|
||||||
_update_date_format = '%Y-%m-%d %H:%M:%S'
|
_update_date_format = '%Y-%m-%d %H:%M:%S'
|
||||||
item_tag_name = 'Item'
|
item_tag_name = 'Item'
|
||||||
|
Reference in New Issue
Block a user