Added the option to output a prices json file in main.py under --prices-with-promos, where the prices are updated by the latest promotions (under the 'final_price' key, where 'price' represents the price before promotions). Fixed small bug of BinaWebCleint by checking that filename does not contain 'null'. Changed Hierarchy of chains such that it includes the webclients. Added the date to the output filenames to start storing the data over time. Black formatting (according to pip 8 guidelines). Changed the chains_dict in main to a constant one.
124 lines
3.9 KiB
Python
124 lines
3.9 KiB
Python
import logging
|
|
import os
|
|
import re
|
|
import tempfile
|
|
|
|
import pandas as pd
|
|
import pytest
|
|
import requests
|
|
|
|
from chains.bareket import Bareket
|
|
from chains.co_op import CoOp
|
|
from chains.dor_alon import DorAlon
|
|
from chains.keshet import Keshet
|
|
from chains.shuk_hayir import ShukHayir
|
|
from chains.stop_market import StopMarket
|
|
from chains.tiv_taam import TivTaam
|
|
from chains.zol_vebegadol import ZolVebegadol
|
|
from main import CHAINS_DICT
|
|
from promotion import PROMOTION_COLS_NUM, main_latest_promos
|
|
from supermarket_chain import SupermarketChain
|
|
|
|
pytest.main(args=["-s", os.path.abspath(__file__)])
|
|
|
|
session = requests.Session()
|
|
|
|
MIN_NUM_OF_PROMOS = 3
|
|
|
|
|
|
@pytest.mark.parametrize("chain_tuple", CHAINS_DICT.items())
|
|
def test_searching_for_download_urls(chain_tuple):
|
|
"""
|
|
Test that get_download_url of each chain returns the correct download url for each category in every chain.
|
|
"""
|
|
chain_name, chain = chain_tuple
|
|
# for chain_name, chain in tqdm(chain_dict.items(), desc='chains'):
|
|
|
|
logging.info(f"Checking download urls in chain {chain_name}")
|
|
store_id: int = valid_store_id_by_chain(chain_name)
|
|
|
|
_test_download_url_helper(
|
|
chain, store_id, chain.XMLFilesCategory.PromosFull, r"promo[s]?full", session
|
|
)
|
|
_test_download_url_helper(
|
|
chain, store_id, chain.XMLFilesCategory.Promos, r"promo[s]?", session
|
|
)
|
|
_test_download_url_helper(
|
|
chain, store_id, chain.XMLFilesCategory.PricesFull, r"price[s]?full", session
|
|
)
|
|
_test_download_url_helper(
|
|
chain, store_id, chain.XMLFilesCategory.Prices, r"price[s]?", session
|
|
)
|
|
|
|
|
|
def _test_download_url_helper(
|
|
chain: SupermarketChain,
|
|
store_id: int,
|
|
category: SupermarketChain.XMLFilesCategory,
|
|
regex_pat: str,
|
|
session: requests.session,
|
|
):
|
|
download_url: str = chain.get_download_url_or_path(store_id, category, session)
|
|
if not download_url: # Not found non-full Promos/Prices file
|
|
return
|
|
logging.debug(download_url)
|
|
assert re.search(
|
|
regex_pat, download_url, re.IGNORECASE
|
|
), f"Invalid {category.name} url in {repr(type(chain))}"
|
|
if category in [chain.XMLFilesCategory.Prices, chain.XMLFilesCategory.Promos]:
|
|
assert not re.search(
|
|
"full", download_url, re.IGNORECASE
|
|
), f"Downloaded the full {category.name} file mistakenly in {repr(type(chain))}"
|
|
|
|
|
|
@pytest.mark.parametrize("chain_tuple", CHAINS_DICT.items())
|
|
def test_promotions_scraping(chain_tuple):
|
|
"""
|
|
Test scraping of promotions is completed successfully and a valid xlsx file is generated as an output.
|
|
"""
|
|
chain_name, chain = chain_tuple
|
|
tf = tempfile.NamedTemporaryFile(suffix=".xlsx")
|
|
|
|
logging.info(f"Test scraping promotions from {chain_name}")
|
|
|
|
store_id: int = valid_store_id_by_chain(chain_name)
|
|
try:
|
|
main_latest_promos(
|
|
store_id=store_id,
|
|
output_filename=tf.name,
|
|
chain=chain,
|
|
load_promos=False,
|
|
load_prices=False,
|
|
)
|
|
df = pd.read_excel(tf.name)
|
|
except Exception as e:
|
|
logging.error(e)
|
|
logging.error(f"Failed loading excel of {chain_name}")
|
|
raise
|
|
|
|
assert (
|
|
df.shape[0] > MIN_NUM_OF_PROMOS and df.shape[1] == PROMOTION_COLS_NUM
|
|
), f"Failed scraping {chain_name}"
|
|
|
|
|
|
def valid_store_id_by_chain(chain_name) -> int:
|
|
"""
|
|
This function returns a valid store ID for a given chain.
|
|
|
|
:param chain_name: The name of a chain as returned by repr(ChainClassName).
|
|
:return: An integer representing a valid store ID in the given chain
|
|
"""
|
|
if chain_name == repr(DorAlon):
|
|
store_id = 501
|
|
elif chain_name in [repr(TivTaam), repr(Bareket)]:
|
|
store_id = 2
|
|
elif chain_name == repr(CoOp):
|
|
store_id = 202
|
|
elif chain_name == [repr(ShukHayir), repr(ZolVebegadol)]:
|
|
store_id = 4
|
|
elif chain_name in [repr(StopMarket), repr(Keshet)]:
|
|
store_id = 5
|
|
else:
|
|
store_id = 1
|
|
return store_id
|