Added test for searching different files' urls. Specifically, asserting the searching non-full files does not yield urls of full files.

This commit is contained in:
KorenLazar
2021-08-17 13:08:39 +03:00
parent cffdd84086
commit 294dee8cc2

View File

@@ -1,9 +1,10 @@
import logging import logging
import os import os
import pytest import pytest
import requests
from tqdm import tqdm from tqdm import tqdm
import pandas as pd import pandas as pd
import re
from chains.bareket import Bareket from chains.bareket import Bareket
from chains.co_op import CoOp from chains.co_op import CoOp
@@ -44,23 +45,43 @@ chain_dict = {repr(chain): chain() if callable(chain) else None for chain in Sup
MIN_NUM_OF_PROMOS = 3 MIN_NUM_OF_PROMOS = 3
def test_scraping(): def test_searching_for_download_urls():
"""
Test that get_download_url of each chain returns the correct download url for each category:
"""
session = requests.Session()
for chain_name, chain in tqdm(chain_dict.items(), desc='chains'):
logging.info(f'Finding download url in chain {chain_name}')
store_id: int = valid_store_id_by_chain(chain_name)
_test_download_url_helper(chain, store_id, chain.XMLFilesCategory.PromosFull, r'promo[s]?full', session)
_test_download_url_helper(chain, store_id, chain.XMLFilesCategory.Promos, r'promo[s]?', session)
_test_download_url_helper(chain, store_id, chain.XMLFilesCategory.PricesFull, r'price[s]?full', session)
_test_download_url_helper(chain, store_id, chain.XMLFilesCategory.Prices, r'price[s]?', session)
def _test_download_url_helper(chain: SupermarketChain, store_id: int, category: SupermarketChain.XMLFilesCategory,
regex_pat: str, session: requests.session):
download_url: str = chain.get_download_url(store_id, category, session)
logging.debug(download_url)
if not download_url: # Not found non-full Promos/Prices file
return
assert re.search(regex_pat, download_url, re.IGNORECASE), f'Invalid {category.name} url in {repr(type(chain))}'
if category in [chain.XMLFilesCategory.Prices, chain.XMLFilesCategory.Promos]:
assert not re.search('full', download_url, re.IGNORECASE), \
f'Downloaded the full {category.name} file mistakenly in {repr(type(chain))}'
def test_promotions_scraping():
"""
Test scraping of promotions is completed successfully and a valid xlsx file is generated as an output.
"""
filename = 'temp.xlsx' filename = 'temp.xlsx'
for chain_name, chain in tqdm(chain_dict.items(), desc='chains'): for chain_name, chain in tqdm(chain_dict.items(), desc='chains'):
logging.info(f'Test scraping promotions from {chain_name}') logging.info(f'Test scraping promotions from {chain_name}')
if chain_name == repr(DorAlon):
store_id = 501
elif chain_name in [repr(Keshet), repr(TivTaam), repr(Bareket), repr(ZolVebegadol)]:
store_id = 2
elif chain_name == repr(CoOp):
store_id = 202
elif chain_name == repr(ShukHayir):
store_id = 4
elif chain_name == repr(StopMarket):
store_id = 5
else:
store_id = 1
store_id: int = valid_store_id_by_chain(chain_name)
try: try:
main_latest_promos( main_latest_promos(
store_id=store_id, store_id=store_id,
@@ -76,3 +97,25 @@ def test_scraping():
raise raise
assert df.shape[0] > MIN_NUM_OF_PROMOS and df.shape[1] == PROMOTION_COLS_NUM, f"Failed scraping {chain_name}" assert df.shape[0] > MIN_NUM_OF_PROMOS and df.shape[1] == PROMOTION_COLS_NUM, f"Failed scraping {chain_name}"
def valid_store_id_by_chain(chain_name) -> int:
"""
This function returns a valid store ID for a given chain.
:param chain_name: The name of a chain as returned by repr(ChainClassName).
:return: An integer representing a valid store ID in the given chain
"""
if chain_name == repr(DorAlon):
store_id = 501
elif chain_name in [repr(TivTaam), repr(Bareket), repr(ZolVebegadol)]:
store_id = 2
elif chain_name == repr(CoOp):
store_id = 202
elif chain_name == repr(ShukHayir):
store_id = 4
elif chain_name in [repr(StopMarket), repr(Keshet)]:
store_id = 5
else:
store_id = 1
return store_id