Has added Zol Vebengadol. Fixed small bug in Co-Op.

This commit is contained in:
KorenLazar
2020-12-26 17:18:21 +02:00
parent be47a5ad81
commit ee35e3436c
8 changed files with 124 additions and 16 deletions

View File

@@ -19,7 +19,7 @@ class CoOp(SupermarketChain):
req_res: requests.Response = requests.get(url) req_res: requests.Response = requests.get(url)
soup = BeautifulSoup(req_res.text, features='lxml') soup = BeautifulSoup(req_res.text, features='lxml')
suffix: str = soup.find('a', href=lambda value: value and category.name.replace('s', '') in value suffix: str = soup.find('a', href=lambda value: value and category.name.replace('s', '') in value
and f'-{store_id}-20' in value).attrs['href'] and f'-{store_id:03d}-20' in value).attrs['href']
down_url = prefix + suffix down_url = prefix + suffix
print(down_url) print(down_url)
return down_url return down_url

16
main.py
View File

@@ -6,8 +6,10 @@ from utils import LOGS_DIRNAME, XMLS_DIRNAME, get_products_prices
from supermarket_chain import SupermarketChain from supermarket_chain import SupermarketChain
from shufersal import ShuferSal from shufersal import ShuferSal
from co_op import CoOp from co_op import CoOp
from zol_vebegadol import ZolVebegadol
from pathlib import Path from pathlib import Path
# TODO: fix problem of left-to-right printing # TODO: fix problem of left-to-right printing
Path(LOGS_DIRNAME).mkdir(exist_ok=True) Path(LOGS_DIRNAME).mkdir(exist_ok=True)
@@ -16,6 +18,7 @@ Path(XMLS_DIRNAME).mkdir(exist_ok=True)
chain_dict = { chain_dict = {
'Shufersal': ShuferSal(), 'Shufersal': ShuferSal(),
'Co-Op': CoOp(), 'Co-Op': CoOp(),
'Zol-Vebegadol': ZolVebegadol()
} }
if __name__ == '__main__': if __name__ == '__main__':
@@ -57,7 +60,7 @@ if __name__ == '__main__':
parser.add_argument('--chain', parser.add_argument('--chain',
required=True, required=True,
help='The name of the requested chain', help='The name of the requested chain',
choices=['Shufersal', 'Co-Op'], choices=chain_dict.keys(),
) )
args = parser.parse_args() args = parser.parse_args()
@@ -83,3 +86,14 @@ if __name__ == '__main__':
arg_store_id = int(args.find_promos_by_name[0]) arg_store_id = int(args.find_promos_by_name[0])
get_promos_by_name(store_id=arg_store_id, chain=chain, promo_name=args.find_promos_by_name[1], get_promos_by_name(store_id=arg_store_id, chain=chain, promo_name=args.find_promos_by_name[1],
load_prices=args.load_prices, load_promos=args.load_promos) load_prices=args.load_prices, load_promos=args.load_promos)
# Script for Shufersal:
# store_ids = get_all_deals(chain)
# print(store_ids)
# # store_ids = [133, 234, 73, 62, 607, 610, 111, 219, 81, 606, 609, 295, 349, 496, 611, 812, 608, 300]
# null_items_lists = list()
# for store_id in store_ids[::-1]:
# print(store_id)
# null_items_lists.append(get_all_null_items_in_promos(chain, store_id))
# print(setintersection(*[set(list) for list in null_items_lists]))

View File

@@ -1,6 +1,5 @@
from datetime import datetime from datetime import datetime
from typing import Dict, List from typing import Dict, List
from bs4 import BeautifulSoup
from utils import ( from utils import (
create_items_dict, create_items_dict,
@@ -33,12 +32,15 @@ class Promotion:
return '\n'.join([title, dates_range, update_line, items]) + '\n' return '\n'.join([title, dates_range, update_line, items]) + '\n'
def repr_ltr(self): def repr_ltr(self):
title = self.content[::-1] title = self.content
dates_range = f"Between {self.start_date} and {self.end_date}" dates_range = f"Between {self.start_date} and {self.end_date}"
update_line = f"Updated at {self.update_date}" update_line = f"Updated at {self.update_date}"
items = '\n'.join(str(item)[::-1] for item in self.items) items = '\n'.join(str(item) for item in self.items)
return '\n'.join([title, dates_range, update_line, items]) + '\n' return '\n'.join([title, dates_range, update_line, items]) + '\n'
def __eq__(self, other):
return self.content == other.content and self.start_date == other.start_date and self.end_date == other.end_date
def get_available_promos(chain: SupermarketChain, store_id: int, load_prices: bool, load_promos) -> List[Promotion]: def get_available_promos(chain: SupermarketChain, store_id: int, load_prices: bool, load_promos) -> List[Promotion]:
""" """
@@ -67,7 +69,10 @@ def get_available_promos(chain: SupermarketChain, store_id: int, load_prices: bo
items=chain.get_items(promo, items_dict), items=chain.get_items(promo, items_dict),
) )
if is_valid_promo(promo): if is_valid_promo(promo):
promo_objs.append(promo) if promo_objs and promo_objs[-1] == promo: # Merge equal promos
promo_objs[-1].items.extend(promo.items)
else:
promo_objs.append(promo)
return promo_objs return promo_objs
@@ -97,7 +102,7 @@ def main_latest_promos(store_id: int, load_xml: bool, logger, chain: Supermarket
""" """
promotions: List[Promotion] = get_available_promos(chain, store_id, load_xml, False) promotions: List[Promotion] = get_available_promos(chain, store_id, load_xml, False)
promotions.sort(key=lambda promo: max(promo.update_date, promo.start_date), reverse=True) promotions.sort(key=lambda promo: (max(promo.update_date, promo.start_date), promo.start_date), reverse=True)
logger.info('\n'.join(str(promotion) for promotion in promotions)) logger.info('\n'.join(str(promotion) for promotion in promotions))
@@ -115,3 +120,15 @@ def get_promos_by_name(store_id: int, chain: SupermarketChain, promo_name: str,
for promo in promotions: for promo in promotions:
if promo_name in promo.content: if promo_name in promo.content:
print(promo.repr_ltr()) print(promo.repr_ltr())
def get_all_null_items_in_promos(chain, store_id):
items_dict: Dict[str, str] = create_items_dict(chain, True, store_id)
xml_path: str = xml_file_gen(chain, store_id, chain.XMLFilesCategory.PromosFull.name)
bs_promos = create_bs_object(xml_path, chain, store_id, True, chain.XMLFilesCategory.PromosFull)
null_items = list()
for promo in bs_promos.find_all(chain.promotion_tag_name):
null_items.extend(chain.get_null_items(promo, items_dict))
return null_items

View File

@@ -15,11 +15,13 @@ class ShuferSal(SupermarketChain):
@staticmethod @staticmethod
def get_download_url(store_id: int, category: SupermarketChain.XMLFilesCategory) -> str: def get_download_url(store_id: int, category: SupermarketChain.XMLFilesCategory) -> str:
url = f"http://prices.shufersal.co.il/FileObject/UpdateCategory?catID={category.value}" url = f"http://prices.shufersal.co.il/FileObject/UpdateCategory?catID={category.value}"
if SupermarketChain.is_valid_store_id(store_id): if SupermarketChain.is_valid_store_id(int(store_id)):
url += f"&storeId={store_id}" url += f"&storeId={store_id}"
req_res: requests.Response = requests.get(url) req_res: requests.Response = requests.get(url)
soup = BeautifulSoup(req_res.text, features='lxml') soup = BeautifulSoup(req_res.text, features='lxml')
return soup.find('a', text="לחץ להורדה")['href'] down_url = soup.find('a', text="לחץ להורדה")['href']
print(down_url)
return down_url
class XMLFilesCategory(SupermarketChain.XMLFilesCategory): class XMLFilesCategory(SupermarketChain.XMLFilesCategory):
All, Prices, PricesFull, Promos, PromosFull, Stores = range(6) All, Prices, PricesFull, Promos, PromosFull, Stores = range(6)
@@ -29,5 +31,10 @@ class ShuferSal(SupermarketChain):
@staticmethod @staticmethod
def get_items(promo: Tag, items_dict: Dict[str, str]) -> List[str]: def get_items(promo: Tag, items_dict: Dict[str, str]) -> List[str]:
return [items_dict.get(item.find('ItemCode').text) for item in promo.find_all('Item') items = list()
if items_dict.get(item.find('ItemCode').text)] for item in promo.find_all('Item'):
item_code = item.find('ItemCode').text
full_item_info = items_dict.get(item_code)
if full_item_info:
items.append(full_item_info)
return items

View File

@@ -18,3 +18,11 @@ def get_store_id(city: str, load_xml: bool, chain: SupermarketChain):
for store in bs_stores.find_all("STORE"): for store in bs_stores.find_all("STORE"):
if store.find("CITY").text == city: if store.find("CITY").text == city:
print((store.find("ADDRESS").text, store.find("STOREID").text, store.find("SUBCHAINNAME").text)) print((store.find("ADDRESS").text, store.find("STOREID").text, store.find("SUBCHAINNAME").text))
def get_all_deals(chain):
xml_path: str = xml_file_gen(chain, -1, chain.XMLFilesCategory.Stores.name)
bs_stores: BeautifulSoup = create_bs_object(xml_path, chain, -1, True, chain.XMLFilesCategory.Stores)
return [int(store.find("STOREID").text) for store in bs_stores.find_all("STORE") if store.find("SUBCHAINID").text
== "2"]

View File

@@ -9,6 +9,8 @@ class SupermarketChain:
""" """
A class representing a supermarket chain. A class representing a supermarket chain.
""" """
@abstractmethod
class XMLFilesCategory(Enum): class XMLFilesCategory(Enum):
""" """
An enum class of different XML files produced by a supermarket chain An enum class of different XML files produced by a supermarket chain
@@ -78,3 +80,12 @@ class SupermarketChain:
:param items_dict: A given dictionary of products :param items_dict: A given dictionary of products
""" """
pass pass
@staticmethod
def get_null_items(promo: Tag, items_dict: Dict[str, str]) -> List[str]:
return [item.find('ItemCode').text for item in promo.find_all('Item')
if not items_dict.get(item.find('ItemCode').text)]
@abstractmethod
def __repr__(self):
pass

View File

@@ -1,5 +1,7 @@
import gzip import gzip
from typing import AnyStr, Dict import io
import zipfile
from typing import AnyStr, Dict, List
import requests import requests
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from bs4.element import Tag from bs4.element import Tag
@@ -57,7 +59,14 @@ def create_bs_object_from_link(xml_path: str, chain: SupermarketChain, category:
:return: A BeautifulSoup object with xml content. :return: A BeautifulSoup object with xml content.
""" """
download_url: str = chain.get_download_url(store_id, category) download_url: str = chain.get_download_url(store_id, category)
xml_content: AnyStr = gzip.decompress(requests.get(download_url).content) response_content = requests.get(download_url).content
try:
xml_content: AnyStr = gzip.decompress(response_content)
except gzip.BadGzipFile:
with zipfile.ZipFile(io.BytesIO(response_content)) as the_zip:
zip_info = the_zip.infolist()[0]
with the_zip.open(zip_info) as the_file:
xml_content = the_file.read()
with open(xml_path, 'wb') as f_out: with open(xml_path, 'wb') as f_out:
f_out.write(xml_content) f_out.write(xml_content)
return BeautifulSoup(xml_content, features='xml') return BeautifulSoup(xml_content, features='xml')
@@ -88,12 +97,12 @@ def create_items_dict(chain: SupermarketChain, load_xml, store_id: int) -> Dict[
return {item.find('ItemCode').text: get_item_info(item) for item in bs_prices.find_all(chain.item_tag_name)} return {item.find('ItemCode').text: get_item_info(item) for item in bs_prices.find_all(chain.item_tag_name)}
def get_item_info(item: Tag) -> str: def get_item_info(item: Tag) -> List[str]:
""" """
This function returns a string containing important information about a given supermarket's product. This function returns a string containing important information about a given supermarket's product.
""" """
return [item.find('ItemName').text, item.find(re.compile(r'Manufacture[r]?Name')).text, return [item.find(re.compile(r'ItemN[a]?m[e]?')).text, item.find(re.compile(r'Manufacture[r]?Name')).text,
item.find('ItemPrice').text, item.find('ItemCode').text] item.find('ItemPrice').text, item.find('ItemCode').text]
def get_products_prices(chain: SupermarketChain, store_id: int, load_xml: bool, product_name: str) -> None: def get_products_prices(chain: SupermarketChain, store_id: int, load_xml: bool, product_name: str) -> None:

42
zol_vebegadol.py Normal file
View File

@@ -0,0 +1,42 @@
import json
from typing import Dict, List
import requests
from bs4.element import Tag
from supermarket_chain import SupermarketChain
class ZolVebegadol(SupermarketChain):
def __repr__(self):
return 'Zol-Vebegadol'
class XMLFilesCategory(SupermarketChain.XMLFilesCategory):
All, Promos, PromosFull, Prices, PricesFull, Stores = range(6)
promotion_tag_name = 'Promotion'
promotion_update_tag_name = 'PromotionUpdateDate'
date_format = '%Y-%m-%d'
date_hour_format = '%Y-%m-%d %H:%M:%S'
item_tag_name = 'Item'
@staticmethod
def get_download_url(store_id: int, category: SupermarketChain.XMLFilesCategory) -> str:
prefix = "http://zolvebegadol.binaprojects.com"
url = prefix + "/MainIO_Hok.aspx"
req_res: requests.Response = requests.get(url)
jsons_files = json.loads(req_res.text)
suffix = next(cur_json["FileNm"] for cur_json in jsons_files if f'-{store_id:03d}-20' in cur_json["FileNm"]
and category.name.replace('s', '') in cur_json["FileNm"])
down_url = '/'.join([prefix, "Download", suffix])
print(down_url)
return down_url
@staticmethod
def get_items(promo: Tag, items_dict: Dict[str, str]) -> List[str]:
items = list()
for item in promo.find_all('Item'):
item_code = item.find('ItemCode').text
full_item_info = items_dict.get(item_code)
if full_item_info:
items.append(full_item_info)
return items