Has added Zol Vebengadol. Fixed small bug in Co-Op.
This commit is contained in:
2
co_op.py
2
co_op.py
@@ -19,7 +19,7 @@ class CoOp(SupermarketChain):
|
|||||||
req_res: requests.Response = requests.get(url)
|
req_res: requests.Response = requests.get(url)
|
||||||
soup = BeautifulSoup(req_res.text, features='lxml')
|
soup = BeautifulSoup(req_res.text, features='lxml')
|
||||||
suffix: str = soup.find('a', href=lambda value: value and category.name.replace('s', '') in value
|
suffix: str = soup.find('a', href=lambda value: value and category.name.replace('s', '') in value
|
||||||
and f'-{store_id}-20' in value).attrs['href']
|
and f'-{store_id:03d}-20' in value).attrs['href']
|
||||||
down_url = prefix + suffix
|
down_url = prefix + suffix
|
||||||
print(down_url)
|
print(down_url)
|
||||||
return down_url
|
return down_url
|
||||||
|
16
main.py
16
main.py
@@ -6,8 +6,10 @@ from utils import LOGS_DIRNAME, XMLS_DIRNAME, get_products_prices
|
|||||||
from supermarket_chain import SupermarketChain
|
from supermarket_chain import SupermarketChain
|
||||||
from shufersal import ShuferSal
|
from shufersal import ShuferSal
|
||||||
from co_op import CoOp
|
from co_op import CoOp
|
||||||
|
from zol_vebegadol import ZolVebegadol
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
# TODO: fix problem of left-to-right printing
|
# TODO: fix problem of left-to-right printing
|
||||||
|
|
||||||
Path(LOGS_DIRNAME).mkdir(exist_ok=True)
|
Path(LOGS_DIRNAME).mkdir(exist_ok=True)
|
||||||
@@ -16,6 +18,7 @@ Path(XMLS_DIRNAME).mkdir(exist_ok=True)
|
|||||||
chain_dict = {
|
chain_dict = {
|
||||||
'Shufersal': ShuferSal(),
|
'Shufersal': ShuferSal(),
|
||||||
'Co-Op': CoOp(),
|
'Co-Op': CoOp(),
|
||||||
|
'Zol-Vebegadol': ZolVebegadol()
|
||||||
}
|
}
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
@@ -57,7 +60,7 @@ if __name__ == '__main__':
|
|||||||
parser.add_argument('--chain',
|
parser.add_argument('--chain',
|
||||||
required=True,
|
required=True,
|
||||||
help='The name of the requested chain',
|
help='The name of the requested chain',
|
||||||
choices=['Shufersal', 'Co-Op'],
|
choices=chain_dict.keys(),
|
||||||
)
|
)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
@@ -83,3 +86,14 @@ if __name__ == '__main__':
|
|||||||
arg_store_id = int(args.find_promos_by_name[0])
|
arg_store_id = int(args.find_promos_by_name[0])
|
||||||
get_promos_by_name(store_id=arg_store_id, chain=chain, promo_name=args.find_promos_by_name[1],
|
get_promos_by_name(store_id=arg_store_id, chain=chain, promo_name=args.find_promos_by_name[1],
|
||||||
load_prices=args.load_prices, load_promos=args.load_promos)
|
load_prices=args.load_prices, load_promos=args.load_promos)
|
||||||
|
|
||||||
|
|
||||||
|
# Script for Shufersal:
|
||||||
|
# store_ids = get_all_deals(chain)
|
||||||
|
# print(store_ids)
|
||||||
|
# # store_ids = [133, 234, 73, 62, 607, 610, 111, 219, 81, 606, 609, 295, 349, 496, 611, 812, 608, 300]
|
||||||
|
# null_items_lists = list()
|
||||||
|
# for store_id in store_ids[::-1]:
|
||||||
|
# print(store_id)
|
||||||
|
# null_items_lists.append(get_all_null_items_in_promos(chain, store_id))
|
||||||
|
# print(setintersection(*[set(list) for list in null_items_lists]))
|
||||||
|
27
promotion.py
27
promotion.py
@@ -1,6 +1,5 @@
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Dict, List
|
from typing import Dict, List
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
|
|
||||||
from utils import (
|
from utils import (
|
||||||
create_items_dict,
|
create_items_dict,
|
||||||
@@ -33,12 +32,15 @@ class Promotion:
|
|||||||
return '\n'.join([title, dates_range, update_line, items]) + '\n'
|
return '\n'.join([title, dates_range, update_line, items]) + '\n'
|
||||||
|
|
||||||
def repr_ltr(self):
|
def repr_ltr(self):
|
||||||
title = self.content[::-1]
|
title = self.content
|
||||||
dates_range = f"Between {self.start_date} and {self.end_date}"
|
dates_range = f"Between {self.start_date} and {self.end_date}"
|
||||||
update_line = f"Updated at {self.update_date}"
|
update_line = f"Updated at {self.update_date}"
|
||||||
items = '\n'.join(str(item)[::-1] for item in self.items)
|
items = '\n'.join(str(item) for item in self.items)
|
||||||
return '\n'.join([title, dates_range, update_line, items]) + '\n'
|
return '\n'.join([title, dates_range, update_line, items]) + '\n'
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
return self.content == other.content and self.start_date == other.start_date and self.end_date == other.end_date
|
||||||
|
|
||||||
|
|
||||||
def get_available_promos(chain: SupermarketChain, store_id: int, load_prices: bool, load_promos) -> List[Promotion]:
|
def get_available_promos(chain: SupermarketChain, store_id: int, load_prices: bool, load_promos) -> List[Promotion]:
|
||||||
"""
|
"""
|
||||||
@@ -67,7 +69,10 @@ def get_available_promos(chain: SupermarketChain, store_id: int, load_prices: bo
|
|||||||
items=chain.get_items(promo, items_dict),
|
items=chain.get_items(promo, items_dict),
|
||||||
)
|
)
|
||||||
if is_valid_promo(promo):
|
if is_valid_promo(promo):
|
||||||
promo_objs.append(promo)
|
if promo_objs and promo_objs[-1] == promo: # Merge equal promos
|
||||||
|
promo_objs[-1].items.extend(promo.items)
|
||||||
|
else:
|
||||||
|
promo_objs.append(promo)
|
||||||
return promo_objs
|
return promo_objs
|
||||||
|
|
||||||
|
|
||||||
@@ -97,7 +102,7 @@ def main_latest_promos(store_id: int, load_xml: bool, logger, chain: Supermarket
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
promotions: List[Promotion] = get_available_promos(chain, store_id, load_xml, False)
|
promotions: List[Promotion] = get_available_promos(chain, store_id, load_xml, False)
|
||||||
promotions.sort(key=lambda promo: max(promo.update_date, promo.start_date), reverse=True)
|
promotions.sort(key=lambda promo: (max(promo.update_date, promo.start_date), promo.start_date), reverse=True)
|
||||||
logger.info('\n'.join(str(promotion) for promotion in promotions))
|
logger.info('\n'.join(str(promotion) for promotion in promotions))
|
||||||
|
|
||||||
|
|
||||||
@@ -115,3 +120,15 @@ def get_promos_by_name(store_id: int, chain: SupermarketChain, promo_name: str,
|
|||||||
for promo in promotions:
|
for promo in promotions:
|
||||||
if promo_name in promo.content:
|
if promo_name in promo.content:
|
||||||
print(promo.repr_ltr())
|
print(promo.repr_ltr())
|
||||||
|
|
||||||
|
|
||||||
|
def get_all_null_items_in_promos(chain, store_id):
|
||||||
|
items_dict: Dict[str, str] = create_items_dict(chain, True, store_id)
|
||||||
|
xml_path: str = xml_file_gen(chain, store_id, chain.XMLFilesCategory.PromosFull.name)
|
||||||
|
bs_promos = create_bs_object(xml_path, chain, store_id, True, chain.XMLFilesCategory.PromosFull)
|
||||||
|
|
||||||
|
null_items = list()
|
||||||
|
for promo in bs_promos.find_all(chain.promotion_tag_name):
|
||||||
|
null_items.extend(chain.get_null_items(promo, items_dict))
|
||||||
|
|
||||||
|
return null_items
|
||||||
|
15
shufersal.py
15
shufersal.py
@@ -15,11 +15,13 @@ class ShuferSal(SupermarketChain):
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def get_download_url(store_id: int, category: SupermarketChain.XMLFilesCategory) -> str:
|
def get_download_url(store_id: int, category: SupermarketChain.XMLFilesCategory) -> str:
|
||||||
url = f"http://prices.shufersal.co.il/FileObject/UpdateCategory?catID={category.value}"
|
url = f"http://prices.shufersal.co.il/FileObject/UpdateCategory?catID={category.value}"
|
||||||
if SupermarketChain.is_valid_store_id(store_id):
|
if SupermarketChain.is_valid_store_id(int(store_id)):
|
||||||
url += f"&storeId={store_id}"
|
url += f"&storeId={store_id}"
|
||||||
req_res: requests.Response = requests.get(url)
|
req_res: requests.Response = requests.get(url)
|
||||||
soup = BeautifulSoup(req_res.text, features='lxml')
|
soup = BeautifulSoup(req_res.text, features='lxml')
|
||||||
return soup.find('a', text="לחץ להורדה")['href']
|
down_url = soup.find('a', text="לחץ להורדה")['href']
|
||||||
|
print(down_url)
|
||||||
|
return down_url
|
||||||
|
|
||||||
class XMLFilesCategory(SupermarketChain.XMLFilesCategory):
|
class XMLFilesCategory(SupermarketChain.XMLFilesCategory):
|
||||||
All, Prices, PricesFull, Promos, PromosFull, Stores = range(6)
|
All, Prices, PricesFull, Promos, PromosFull, Stores = range(6)
|
||||||
@@ -29,5 +31,10 @@ class ShuferSal(SupermarketChain):
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_items(promo: Tag, items_dict: Dict[str, str]) -> List[str]:
|
def get_items(promo: Tag, items_dict: Dict[str, str]) -> List[str]:
|
||||||
return [items_dict.get(item.find('ItemCode').text) for item in promo.find_all('Item')
|
items = list()
|
||||||
if items_dict.get(item.find('ItemCode').text)]
|
for item in promo.find_all('Item'):
|
||||||
|
item_code = item.find('ItemCode').text
|
||||||
|
full_item_info = items_dict.get(item_code)
|
||||||
|
if full_item_info:
|
||||||
|
items.append(full_item_info)
|
||||||
|
return items
|
||||||
|
@@ -18,3 +18,11 @@ def get_store_id(city: str, load_xml: bool, chain: SupermarketChain):
|
|||||||
for store in bs_stores.find_all("STORE"):
|
for store in bs_stores.find_all("STORE"):
|
||||||
if store.find("CITY").text == city:
|
if store.find("CITY").text == city:
|
||||||
print((store.find("ADDRESS").text, store.find("STOREID").text, store.find("SUBCHAINNAME").text))
|
print((store.find("ADDRESS").text, store.find("STOREID").text, store.find("SUBCHAINNAME").text))
|
||||||
|
|
||||||
|
|
||||||
|
def get_all_deals(chain):
|
||||||
|
xml_path: str = xml_file_gen(chain, -1, chain.XMLFilesCategory.Stores.name)
|
||||||
|
bs_stores: BeautifulSoup = create_bs_object(xml_path, chain, -1, True, chain.XMLFilesCategory.Stores)
|
||||||
|
|
||||||
|
return [int(store.find("STOREID").text) for store in bs_stores.find_all("STORE") if store.find("SUBCHAINID").text
|
||||||
|
== "2"]
|
||||||
|
@@ -9,6 +9,8 @@ class SupermarketChain:
|
|||||||
"""
|
"""
|
||||||
A class representing a supermarket chain.
|
A class representing a supermarket chain.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
class XMLFilesCategory(Enum):
|
class XMLFilesCategory(Enum):
|
||||||
"""
|
"""
|
||||||
An enum class of different XML files produced by a supermarket chain
|
An enum class of different XML files produced by a supermarket chain
|
||||||
@@ -78,3 +80,12 @@ class SupermarketChain:
|
|||||||
:param items_dict: A given dictionary of products
|
:param items_dict: A given dictionary of products
|
||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_null_items(promo: Tag, items_dict: Dict[str, str]) -> List[str]:
|
||||||
|
return [item.find('ItemCode').text for item in promo.find_all('Item')
|
||||||
|
if not items_dict.get(item.find('ItemCode').text)]
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def __repr__(self):
|
||||||
|
pass
|
||||||
|
19
utils.py
19
utils.py
@@ -1,5 +1,7 @@
|
|||||||
import gzip
|
import gzip
|
||||||
from typing import AnyStr, Dict
|
import io
|
||||||
|
import zipfile
|
||||||
|
from typing import AnyStr, Dict, List
|
||||||
import requests
|
import requests
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from bs4.element import Tag
|
from bs4.element import Tag
|
||||||
@@ -57,7 +59,14 @@ def create_bs_object_from_link(xml_path: str, chain: SupermarketChain, category:
|
|||||||
:return: A BeautifulSoup object with xml content.
|
:return: A BeautifulSoup object with xml content.
|
||||||
"""
|
"""
|
||||||
download_url: str = chain.get_download_url(store_id, category)
|
download_url: str = chain.get_download_url(store_id, category)
|
||||||
xml_content: AnyStr = gzip.decompress(requests.get(download_url).content)
|
response_content = requests.get(download_url).content
|
||||||
|
try:
|
||||||
|
xml_content: AnyStr = gzip.decompress(response_content)
|
||||||
|
except gzip.BadGzipFile:
|
||||||
|
with zipfile.ZipFile(io.BytesIO(response_content)) as the_zip:
|
||||||
|
zip_info = the_zip.infolist()[0]
|
||||||
|
with the_zip.open(zip_info) as the_file:
|
||||||
|
xml_content = the_file.read()
|
||||||
with open(xml_path, 'wb') as f_out:
|
with open(xml_path, 'wb') as f_out:
|
||||||
f_out.write(xml_content)
|
f_out.write(xml_content)
|
||||||
return BeautifulSoup(xml_content, features='xml')
|
return BeautifulSoup(xml_content, features='xml')
|
||||||
@@ -88,12 +97,12 @@ def create_items_dict(chain: SupermarketChain, load_xml, store_id: int) -> Dict[
|
|||||||
return {item.find('ItemCode').text: get_item_info(item) for item in bs_prices.find_all(chain.item_tag_name)}
|
return {item.find('ItemCode').text: get_item_info(item) for item in bs_prices.find_all(chain.item_tag_name)}
|
||||||
|
|
||||||
|
|
||||||
def get_item_info(item: Tag) -> str:
|
def get_item_info(item: Tag) -> List[str]:
|
||||||
"""
|
"""
|
||||||
This function returns a string containing important information about a given supermarket's product.
|
This function returns a string containing important information about a given supermarket's product.
|
||||||
"""
|
"""
|
||||||
return [item.find('ItemName').text, item.find(re.compile(r'Manufacture[r]?Name')).text,
|
return [item.find(re.compile(r'ItemN[a]?m[e]?')).text, item.find(re.compile(r'Manufacture[r]?Name')).text,
|
||||||
item.find('ItemPrice').text, item.find('ItemCode').text]
|
item.find('ItemPrice').text, item.find('ItemCode').text]
|
||||||
|
|
||||||
|
|
||||||
def get_products_prices(chain: SupermarketChain, store_id: int, load_xml: bool, product_name: str) -> None:
|
def get_products_prices(chain: SupermarketChain, store_id: int, load_xml: bool, product_name: str) -> None:
|
||||||
|
42
zol_vebegadol.py
Normal file
42
zol_vebegadol.py
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
import json
|
||||||
|
from typing import Dict, List
|
||||||
|
import requests
|
||||||
|
from bs4.element import Tag
|
||||||
|
|
||||||
|
from supermarket_chain import SupermarketChain
|
||||||
|
|
||||||
|
|
||||||
|
class ZolVebegadol(SupermarketChain):
|
||||||
|
def __repr__(self):
|
||||||
|
return 'Zol-Vebegadol'
|
||||||
|
|
||||||
|
class XMLFilesCategory(SupermarketChain.XMLFilesCategory):
|
||||||
|
All, Promos, PromosFull, Prices, PricesFull, Stores = range(6)
|
||||||
|
|
||||||
|
promotion_tag_name = 'Promotion'
|
||||||
|
promotion_update_tag_name = 'PromotionUpdateDate'
|
||||||
|
date_format = '%Y-%m-%d'
|
||||||
|
date_hour_format = '%Y-%m-%d %H:%M:%S'
|
||||||
|
item_tag_name = 'Item'
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_download_url(store_id: int, category: SupermarketChain.XMLFilesCategory) -> str:
|
||||||
|
prefix = "http://zolvebegadol.binaprojects.com"
|
||||||
|
url = prefix + "/MainIO_Hok.aspx"
|
||||||
|
req_res: requests.Response = requests.get(url)
|
||||||
|
jsons_files = json.loads(req_res.text)
|
||||||
|
suffix = next(cur_json["FileNm"] for cur_json in jsons_files if f'-{store_id:03d}-20' in cur_json["FileNm"]
|
||||||
|
and category.name.replace('s', '') in cur_json["FileNm"])
|
||||||
|
down_url = '/'.join([prefix, "Download", suffix])
|
||||||
|
print(down_url)
|
||||||
|
return down_url
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_items(promo: Tag, items_dict: Dict[str, str]) -> List[str]:
|
||||||
|
items = list()
|
||||||
|
for item in promo.find_all('Item'):
|
||||||
|
item_code = item.find('ItemCode').text
|
||||||
|
full_item_info = items_dict.get(item_code)
|
||||||
|
if full_item_info:
|
||||||
|
items.append(full_item_info)
|
||||||
|
return items
|
Reference in New Issue
Block a user