Has added Zol Vebengadol. Fixed small bug in Co-Op.

This commit is contained in:
KorenLazar
2020-12-26 17:18:21 +02:00
parent be47a5ad81
commit ee35e3436c
8 changed files with 124 additions and 16 deletions

View File

@@ -19,7 +19,7 @@ class CoOp(SupermarketChain):
req_res: requests.Response = requests.get(url)
soup = BeautifulSoup(req_res.text, features='lxml')
suffix: str = soup.find('a', href=lambda value: value and category.name.replace('s', '') in value
and f'-{store_id}-20' in value).attrs['href']
and f'-{store_id:03d}-20' in value).attrs['href']
down_url = prefix + suffix
print(down_url)
return down_url

16
main.py
View File

@@ -6,8 +6,10 @@ from utils import LOGS_DIRNAME, XMLS_DIRNAME, get_products_prices
from supermarket_chain import SupermarketChain
from shufersal import ShuferSal
from co_op import CoOp
from zol_vebegadol import ZolVebegadol
from pathlib import Path
# TODO: fix problem of left-to-right printing
Path(LOGS_DIRNAME).mkdir(exist_ok=True)
@@ -16,6 +18,7 @@ Path(XMLS_DIRNAME).mkdir(exist_ok=True)
chain_dict = {
'Shufersal': ShuferSal(),
'Co-Op': CoOp(),
'Zol-Vebegadol': ZolVebegadol()
}
if __name__ == '__main__':
@@ -57,7 +60,7 @@ if __name__ == '__main__':
parser.add_argument('--chain',
required=True,
help='The name of the requested chain',
choices=['Shufersal', 'Co-Op'],
choices=chain_dict.keys(),
)
args = parser.parse_args()
@@ -83,3 +86,14 @@ if __name__ == '__main__':
arg_store_id = int(args.find_promos_by_name[0])
get_promos_by_name(store_id=arg_store_id, chain=chain, promo_name=args.find_promos_by_name[1],
load_prices=args.load_prices, load_promos=args.load_promos)
# Script for Shufersal:
# store_ids = get_all_deals(chain)
# print(store_ids)
# # store_ids = [133, 234, 73, 62, 607, 610, 111, 219, 81, 606, 609, 295, 349, 496, 611, 812, 608, 300]
# null_items_lists = list()
# for store_id in store_ids[::-1]:
# print(store_id)
# null_items_lists.append(get_all_null_items_in_promos(chain, store_id))
# print(setintersection(*[set(list) for list in null_items_lists]))

View File

@@ -1,6 +1,5 @@
from datetime import datetime
from typing import Dict, List
from bs4 import BeautifulSoup
from utils import (
create_items_dict,
@@ -33,12 +32,15 @@ class Promotion:
return '\n'.join([title, dates_range, update_line, items]) + '\n'
def repr_ltr(self):
title = self.content[::-1]
title = self.content
dates_range = f"Between {self.start_date} and {self.end_date}"
update_line = f"Updated at {self.update_date}"
items = '\n'.join(str(item)[::-1] for item in self.items)
items = '\n'.join(str(item) for item in self.items)
return '\n'.join([title, dates_range, update_line, items]) + '\n'
def __eq__(self, other):
return self.content == other.content and self.start_date == other.start_date and self.end_date == other.end_date
def get_available_promos(chain: SupermarketChain, store_id: int, load_prices: bool, load_promos) -> List[Promotion]:
"""
@@ -67,7 +69,10 @@ def get_available_promos(chain: SupermarketChain, store_id: int, load_prices: bo
items=chain.get_items(promo, items_dict),
)
if is_valid_promo(promo):
promo_objs.append(promo)
if promo_objs and promo_objs[-1] == promo: # Merge equal promos
promo_objs[-1].items.extend(promo.items)
else:
promo_objs.append(promo)
return promo_objs
@@ -97,7 +102,7 @@ def main_latest_promos(store_id: int, load_xml: bool, logger, chain: Supermarket
"""
promotions: List[Promotion] = get_available_promos(chain, store_id, load_xml, False)
promotions.sort(key=lambda promo: max(promo.update_date, promo.start_date), reverse=True)
promotions.sort(key=lambda promo: (max(promo.update_date, promo.start_date), promo.start_date), reverse=True)
logger.info('\n'.join(str(promotion) for promotion in promotions))
@@ -115,3 +120,15 @@ def get_promos_by_name(store_id: int, chain: SupermarketChain, promo_name: str,
for promo in promotions:
if promo_name in promo.content:
print(promo.repr_ltr())
def get_all_null_items_in_promos(chain, store_id):
items_dict: Dict[str, str] = create_items_dict(chain, True, store_id)
xml_path: str = xml_file_gen(chain, store_id, chain.XMLFilesCategory.PromosFull.name)
bs_promos = create_bs_object(xml_path, chain, store_id, True, chain.XMLFilesCategory.PromosFull)
null_items = list()
for promo in bs_promos.find_all(chain.promotion_tag_name):
null_items.extend(chain.get_null_items(promo, items_dict))
return null_items

View File

@@ -15,11 +15,13 @@ class ShuferSal(SupermarketChain):
@staticmethod
def get_download_url(store_id: int, category: SupermarketChain.XMLFilesCategory) -> str:
url = f"http://prices.shufersal.co.il/FileObject/UpdateCategory?catID={category.value}"
if SupermarketChain.is_valid_store_id(store_id):
if SupermarketChain.is_valid_store_id(int(store_id)):
url += f"&storeId={store_id}"
req_res: requests.Response = requests.get(url)
soup = BeautifulSoup(req_res.text, features='lxml')
return soup.find('a', text="לחץ להורדה")['href']
down_url = soup.find('a', text="לחץ להורדה")['href']
print(down_url)
return down_url
class XMLFilesCategory(SupermarketChain.XMLFilesCategory):
All, Prices, PricesFull, Promos, PromosFull, Stores = range(6)
@@ -29,5 +31,10 @@ class ShuferSal(SupermarketChain):
@staticmethod
def get_items(promo: Tag, items_dict: Dict[str, str]) -> List[str]:
return [items_dict.get(item.find('ItemCode').text) for item in promo.find_all('Item')
if items_dict.get(item.find('ItemCode').text)]
items = list()
for item in promo.find_all('Item'):
item_code = item.find('ItemCode').text
full_item_info = items_dict.get(item_code)
if full_item_info:
items.append(full_item_info)
return items

View File

@@ -18,3 +18,11 @@ def get_store_id(city: str, load_xml: bool, chain: SupermarketChain):
for store in bs_stores.find_all("STORE"):
if store.find("CITY").text == city:
print((store.find("ADDRESS").text, store.find("STOREID").text, store.find("SUBCHAINNAME").text))
def get_all_deals(chain):
xml_path: str = xml_file_gen(chain, -1, chain.XMLFilesCategory.Stores.name)
bs_stores: BeautifulSoup = create_bs_object(xml_path, chain, -1, True, chain.XMLFilesCategory.Stores)
return [int(store.find("STOREID").text) for store in bs_stores.find_all("STORE") if store.find("SUBCHAINID").text
== "2"]

View File

@@ -9,6 +9,8 @@ class SupermarketChain:
"""
A class representing a supermarket chain.
"""
@abstractmethod
class XMLFilesCategory(Enum):
"""
An enum class of different XML files produced by a supermarket chain
@@ -78,3 +80,12 @@ class SupermarketChain:
:param items_dict: A given dictionary of products
"""
pass
@staticmethod
def get_null_items(promo: Tag, items_dict: Dict[str, str]) -> List[str]:
return [item.find('ItemCode').text for item in promo.find_all('Item')
if not items_dict.get(item.find('ItemCode').text)]
@abstractmethod
def __repr__(self):
pass

View File

@@ -1,5 +1,7 @@
import gzip
from typing import AnyStr, Dict
import io
import zipfile
from typing import AnyStr, Dict, List
import requests
from bs4 import BeautifulSoup
from bs4.element import Tag
@@ -57,7 +59,14 @@ def create_bs_object_from_link(xml_path: str, chain: SupermarketChain, category:
:return: A BeautifulSoup object with xml content.
"""
download_url: str = chain.get_download_url(store_id, category)
xml_content: AnyStr = gzip.decompress(requests.get(download_url).content)
response_content = requests.get(download_url).content
try:
xml_content: AnyStr = gzip.decompress(response_content)
except gzip.BadGzipFile:
with zipfile.ZipFile(io.BytesIO(response_content)) as the_zip:
zip_info = the_zip.infolist()[0]
with the_zip.open(zip_info) as the_file:
xml_content = the_file.read()
with open(xml_path, 'wb') as f_out:
f_out.write(xml_content)
return BeautifulSoup(xml_content, features='xml')
@@ -88,12 +97,12 @@ def create_items_dict(chain: SupermarketChain, load_xml, store_id: int) -> Dict[
return {item.find('ItemCode').text: get_item_info(item) for item in bs_prices.find_all(chain.item_tag_name)}
def get_item_info(item: Tag) -> str:
def get_item_info(item: Tag) -> List[str]:
"""
This function returns a string containing important information about a given supermarket's product.
"""
return [item.find('ItemName').text, item.find(re.compile(r'Manufacture[r]?Name')).text,
item.find('ItemPrice').text, item.find('ItemCode').text]
return [item.find(re.compile(r'ItemN[a]?m[e]?')).text, item.find(re.compile(r'Manufacture[r]?Name')).text,
item.find('ItemPrice').text, item.find('ItemCode').text]
def get_products_prices(chain: SupermarketChain, store_id: int, load_xml: bool, product_name: str) -> None:

42
zol_vebegadol.py Normal file
View File

@@ -0,0 +1,42 @@
import json
from typing import Dict, List
import requests
from bs4.element import Tag
from supermarket_chain import SupermarketChain
class ZolVebegadol(SupermarketChain):
def __repr__(self):
return 'Zol-Vebegadol'
class XMLFilesCategory(SupermarketChain.XMLFilesCategory):
All, Promos, PromosFull, Prices, PricesFull, Stores = range(6)
promotion_tag_name = 'Promotion'
promotion_update_tag_name = 'PromotionUpdateDate'
date_format = '%Y-%m-%d'
date_hour_format = '%Y-%m-%d %H:%M:%S'
item_tag_name = 'Item'
@staticmethod
def get_download_url(store_id: int, category: SupermarketChain.XMLFilesCategory) -> str:
prefix = "http://zolvebegadol.binaprojects.com"
url = prefix + "/MainIO_Hok.aspx"
req_res: requests.Response = requests.get(url)
jsons_files = json.loads(req_res.text)
suffix = next(cur_json["FileNm"] for cur_json in jsons_files if f'-{store_id:03d}-20' in cur_json["FileNm"]
and category.name.replace('s', '') in cur_json["FileNm"])
down_url = '/'.join([prefix, "Download", suffix])
print(down_url)
return down_url
@staticmethod
def get_items(promo: Tag, items_dict: Dict[str, str]) -> List[str]:
items = list()
for item in promo.find_all('Item'):
item_code = item.find('ItemCode').text
full_item_info = items_dict.get(item_code)
if full_item_info:
items.append(full_item_info)
return items