Has added Zol Vebengadol. Fixed small bug in Co-Op.
This commit is contained in:
2
co_op.py
2
co_op.py
@@ -19,7 +19,7 @@ class CoOp(SupermarketChain):
|
||||
req_res: requests.Response = requests.get(url)
|
||||
soup = BeautifulSoup(req_res.text, features='lxml')
|
||||
suffix: str = soup.find('a', href=lambda value: value and category.name.replace('s', '') in value
|
||||
and f'-{store_id}-20' in value).attrs['href']
|
||||
and f'-{store_id:03d}-20' in value).attrs['href']
|
||||
down_url = prefix + suffix
|
||||
print(down_url)
|
||||
return down_url
|
||||
|
16
main.py
16
main.py
@@ -6,8 +6,10 @@ from utils import LOGS_DIRNAME, XMLS_DIRNAME, get_products_prices
|
||||
from supermarket_chain import SupermarketChain
|
||||
from shufersal import ShuferSal
|
||||
from co_op import CoOp
|
||||
from zol_vebegadol import ZolVebegadol
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
# TODO: fix problem of left-to-right printing
|
||||
|
||||
Path(LOGS_DIRNAME).mkdir(exist_ok=True)
|
||||
@@ -16,6 +18,7 @@ Path(XMLS_DIRNAME).mkdir(exist_ok=True)
|
||||
chain_dict = {
|
||||
'Shufersal': ShuferSal(),
|
||||
'Co-Op': CoOp(),
|
||||
'Zol-Vebegadol': ZolVebegadol()
|
||||
}
|
||||
|
||||
if __name__ == '__main__':
|
||||
@@ -57,7 +60,7 @@ if __name__ == '__main__':
|
||||
parser.add_argument('--chain',
|
||||
required=True,
|
||||
help='The name of the requested chain',
|
||||
choices=['Shufersal', 'Co-Op'],
|
||||
choices=chain_dict.keys(),
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
@@ -83,3 +86,14 @@ if __name__ == '__main__':
|
||||
arg_store_id = int(args.find_promos_by_name[0])
|
||||
get_promos_by_name(store_id=arg_store_id, chain=chain, promo_name=args.find_promos_by_name[1],
|
||||
load_prices=args.load_prices, load_promos=args.load_promos)
|
||||
|
||||
|
||||
# Script for Shufersal:
|
||||
# store_ids = get_all_deals(chain)
|
||||
# print(store_ids)
|
||||
# # store_ids = [133, 234, 73, 62, 607, 610, 111, 219, 81, 606, 609, 295, 349, 496, 611, 812, 608, 300]
|
||||
# null_items_lists = list()
|
||||
# for store_id in store_ids[::-1]:
|
||||
# print(store_id)
|
||||
# null_items_lists.append(get_all_null_items_in_promos(chain, store_id))
|
||||
# print(setintersection(*[set(list) for list in null_items_lists]))
|
||||
|
25
promotion.py
25
promotion.py
@@ -1,6 +1,5 @@
|
||||
from datetime import datetime
|
||||
from typing import Dict, List
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from utils import (
|
||||
create_items_dict,
|
||||
@@ -33,12 +32,15 @@ class Promotion:
|
||||
return '\n'.join([title, dates_range, update_line, items]) + '\n'
|
||||
|
||||
def repr_ltr(self):
|
||||
title = self.content[::-1]
|
||||
title = self.content
|
||||
dates_range = f"Between {self.start_date} and {self.end_date}"
|
||||
update_line = f"Updated at {self.update_date}"
|
||||
items = '\n'.join(str(item)[::-1] for item in self.items)
|
||||
items = '\n'.join(str(item) for item in self.items)
|
||||
return '\n'.join([title, dates_range, update_line, items]) + '\n'
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.content == other.content and self.start_date == other.start_date and self.end_date == other.end_date
|
||||
|
||||
|
||||
def get_available_promos(chain: SupermarketChain, store_id: int, load_prices: bool, load_promos) -> List[Promotion]:
|
||||
"""
|
||||
@@ -67,6 +69,9 @@ def get_available_promos(chain: SupermarketChain, store_id: int, load_prices: bo
|
||||
items=chain.get_items(promo, items_dict),
|
||||
)
|
||||
if is_valid_promo(promo):
|
||||
if promo_objs and promo_objs[-1] == promo: # Merge equal promos
|
||||
promo_objs[-1].items.extend(promo.items)
|
||||
else:
|
||||
promo_objs.append(promo)
|
||||
return promo_objs
|
||||
|
||||
@@ -97,7 +102,7 @@ def main_latest_promos(store_id: int, load_xml: bool, logger, chain: Supermarket
|
||||
"""
|
||||
|
||||
promotions: List[Promotion] = get_available_promos(chain, store_id, load_xml, False)
|
||||
promotions.sort(key=lambda promo: max(promo.update_date, promo.start_date), reverse=True)
|
||||
promotions.sort(key=lambda promo: (max(promo.update_date, promo.start_date), promo.start_date), reverse=True)
|
||||
logger.info('\n'.join(str(promotion) for promotion in promotions))
|
||||
|
||||
|
||||
@@ -115,3 +120,15 @@ def get_promos_by_name(store_id: int, chain: SupermarketChain, promo_name: str,
|
||||
for promo in promotions:
|
||||
if promo_name in promo.content:
|
||||
print(promo.repr_ltr())
|
||||
|
||||
|
||||
def get_all_null_items_in_promos(chain, store_id):
|
||||
items_dict: Dict[str, str] = create_items_dict(chain, True, store_id)
|
||||
xml_path: str = xml_file_gen(chain, store_id, chain.XMLFilesCategory.PromosFull.name)
|
||||
bs_promos = create_bs_object(xml_path, chain, store_id, True, chain.XMLFilesCategory.PromosFull)
|
||||
|
||||
null_items = list()
|
||||
for promo in bs_promos.find_all(chain.promotion_tag_name):
|
||||
null_items.extend(chain.get_null_items(promo, items_dict))
|
||||
|
||||
return null_items
|
||||
|
15
shufersal.py
15
shufersal.py
@@ -15,11 +15,13 @@ class ShuferSal(SupermarketChain):
|
||||
@staticmethod
|
||||
def get_download_url(store_id: int, category: SupermarketChain.XMLFilesCategory) -> str:
|
||||
url = f"http://prices.shufersal.co.il/FileObject/UpdateCategory?catID={category.value}"
|
||||
if SupermarketChain.is_valid_store_id(store_id):
|
||||
if SupermarketChain.is_valid_store_id(int(store_id)):
|
||||
url += f"&storeId={store_id}"
|
||||
req_res: requests.Response = requests.get(url)
|
||||
soup = BeautifulSoup(req_res.text, features='lxml')
|
||||
return soup.find('a', text="לחץ להורדה")['href']
|
||||
down_url = soup.find('a', text="לחץ להורדה")['href']
|
||||
print(down_url)
|
||||
return down_url
|
||||
|
||||
class XMLFilesCategory(SupermarketChain.XMLFilesCategory):
|
||||
All, Prices, PricesFull, Promos, PromosFull, Stores = range(6)
|
||||
@@ -29,5 +31,10 @@ class ShuferSal(SupermarketChain):
|
||||
|
||||
@staticmethod
|
||||
def get_items(promo: Tag, items_dict: Dict[str, str]) -> List[str]:
|
||||
return [items_dict.get(item.find('ItemCode').text) for item in promo.find_all('Item')
|
||||
if items_dict.get(item.find('ItemCode').text)]
|
||||
items = list()
|
||||
for item in promo.find_all('Item'):
|
||||
item_code = item.find('ItemCode').text
|
||||
full_item_info = items_dict.get(item_code)
|
||||
if full_item_info:
|
||||
items.append(full_item_info)
|
||||
return items
|
||||
|
@@ -18,3 +18,11 @@ def get_store_id(city: str, load_xml: bool, chain: SupermarketChain):
|
||||
for store in bs_stores.find_all("STORE"):
|
||||
if store.find("CITY").text == city:
|
||||
print((store.find("ADDRESS").text, store.find("STOREID").text, store.find("SUBCHAINNAME").text))
|
||||
|
||||
|
||||
def get_all_deals(chain):
|
||||
xml_path: str = xml_file_gen(chain, -1, chain.XMLFilesCategory.Stores.name)
|
||||
bs_stores: BeautifulSoup = create_bs_object(xml_path, chain, -1, True, chain.XMLFilesCategory.Stores)
|
||||
|
||||
return [int(store.find("STOREID").text) for store in bs_stores.find_all("STORE") if store.find("SUBCHAINID").text
|
||||
== "2"]
|
||||
|
@@ -9,6 +9,8 @@ class SupermarketChain:
|
||||
"""
|
||||
A class representing a supermarket chain.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
class XMLFilesCategory(Enum):
|
||||
"""
|
||||
An enum class of different XML files produced by a supermarket chain
|
||||
@@ -78,3 +80,12 @@ class SupermarketChain:
|
||||
:param items_dict: A given dictionary of products
|
||||
"""
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
def get_null_items(promo: Tag, items_dict: Dict[str, str]) -> List[str]:
|
||||
return [item.find('ItemCode').text for item in promo.find_all('Item')
|
||||
if not items_dict.get(item.find('ItemCode').text)]
|
||||
|
||||
@abstractmethod
|
||||
def __repr__(self):
|
||||
pass
|
||||
|
17
utils.py
17
utils.py
@@ -1,5 +1,7 @@
|
||||
import gzip
|
||||
from typing import AnyStr, Dict
|
||||
import io
|
||||
import zipfile
|
||||
from typing import AnyStr, Dict, List
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from bs4.element import Tag
|
||||
@@ -57,7 +59,14 @@ def create_bs_object_from_link(xml_path: str, chain: SupermarketChain, category:
|
||||
:return: A BeautifulSoup object with xml content.
|
||||
"""
|
||||
download_url: str = chain.get_download_url(store_id, category)
|
||||
xml_content: AnyStr = gzip.decompress(requests.get(download_url).content)
|
||||
response_content = requests.get(download_url).content
|
||||
try:
|
||||
xml_content: AnyStr = gzip.decompress(response_content)
|
||||
except gzip.BadGzipFile:
|
||||
with zipfile.ZipFile(io.BytesIO(response_content)) as the_zip:
|
||||
zip_info = the_zip.infolist()[0]
|
||||
with the_zip.open(zip_info) as the_file:
|
||||
xml_content = the_file.read()
|
||||
with open(xml_path, 'wb') as f_out:
|
||||
f_out.write(xml_content)
|
||||
return BeautifulSoup(xml_content, features='xml')
|
||||
@@ -88,11 +97,11 @@ def create_items_dict(chain: SupermarketChain, load_xml, store_id: int) -> Dict[
|
||||
return {item.find('ItemCode').text: get_item_info(item) for item in bs_prices.find_all(chain.item_tag_name)}
|
||||
|
||||
|
||||
def get_item_info(item: Tag) -> str:
|
||||
def get_item_info(item: Tag) -> List[str]:
|
||||
"""
|
||||
This function returns a string containing important information about a given supermarket's product.
|
||||
"""
|
||||
return [item.find('ItemName').text, item.find(re.compile(r'Manufacture[r]?Name')).text,
|
||||
return [item.find(re.compile(r'ItemN[a]?m[e]?')).text, item.find(re.compile(r'Manufacture[r]?Name')).text,
|
||||
item.find('ItemPrice').text, item.find('ItemCode').text]
|
||||
|
||||
|
||||
|
42
zol_vebegadol.py
Normal file
42
zol_vebegadol.py
Normal file
@@ -0,0 +1,42 @@
|
||||
import json
|
||||
from typing import Dict, List
|
||||
import requests
|
||||
from bs4.element import Tag
|
||||
|
||||
from supermarket_chain import SupermarketChain
|
||||
|
||||
|
||||
class ZolVebegadol(SupermarketChain):
|
||||
def __repr__(self):
|
||||
return 'Zol-Vebegadol'
|
||||
|
||||
class XMLFilesCategory(SupermarketChain.XMLFilesCategory):
|
||||
All, Promos, PromosFull, Prices, PricesFull, Stores = range(6)
|
||||
|
||||
promotion_tag_name = 'Promotion'
|
||||
promotion_update_tag_name = 'PromotionUpdateDate'
|
||||
date_format = '%Y-%m-%d'
|
||||
date_hour_format = '%Y-%m-%d %H:%M:%S'
|
||||
item_tag_name = 'Item'
|
||||
|
||||
@staticmethod
|
||||
def get_download_url(store_id: int, category: SupermarketChain.XMLFilesCategory) -> str:
|
||||
prefix = "http://zolvebegadol.binaprojects.com"
|
||||
url = prefix + "/MainIO_Hok.aspx"
|
||||
req_res: requests.Response = requests.get(url)
|
||||
jsons_files = json.loads(req_res.text)
|
||||
suffix = next(cur_json["FileNm"] for cur_json in jsons_files if f'-{store_id:03d}-20' in cur_json["FileNm"]
|
||||
and category.name.replace('s', '') in cur_json["FileNm"])
|
||||
down_url = '/'.join([prefix, "Download", suffix])
|
||||
print(down_url)
|
||||
return down_url
|
||||
|
||||
@staticmethod
|
||||
def get_items(promo: Tag, items_dict: Dict[str, str]) -> List[str]:
|
||||
items = list()
|
||||
for item in promo.find_all('Item'):
|
||||
item_code = item.find('ItemCode').text
|
||||
full_item_info = items_dict.get(item_code)
|
||||
if full_item_info:
|
||||
items.append(full_item_info)
|
||||
return items
|
Reference in New Issue
Block a user