Added test for searching different files' urls. Specifically, asserting the searching non-full files does not yield urls of full files.

2021-08-17 13:08:39 +03:00
parent cffdd84086
commit 294dee8cc2
1 changed files with 57 additions and 14 deletions
--- a/tests/test_scraping.py
+++ b/tests/test_scraping.py
@@ -1,9 +1,10 @@
 import logging
 import os
-
 import pytest
+import requests
 from tqdm import tqdm
 import pandas as pd
+import re

 from chains.bareket import Bareket
 from chains.co_op import CoOp
@@ -44,23 +45,43 @@ chain_dict = {repr(chain): chain() if callable(chain) else None for chain in Sup
 MIN_NUM_OF_PROMOS = 3


-def test_scraping():
+def test_searching_for_download_urls():
+    """
+    Test that get_download_url of each chain returns the correct download url for each category:
+    """
+    session = requests.Session()
+    for chain_name, chain in tqdm(chain_dict.items(), desc='chains'):
+
+        logging.info(f'Finding download url in chain {chain_name}')
+        store_id: int = valid_store_id_by_chain(chain_name)
+
+        _test_download_url_helper(chain, store_id, chain.XMLFilesCategory.PromosFull, r'promo[s]?full', session)
+        _test_download_url_helper(chain, store_id, chain.XMLFilesCategory.Promos, r'promo[s]?', session)
+        _test_download_url_helper(chain, store_id, chain.XMLFilesCategory.PricesFull, r'price[s]?full', session)
+        _test_download_url_helper(chain, store_id, chain.XMLFilesCategory.Prices, r'price[s]?', session)
+
+
+def _test_download_url_helper(chain: SupermarketChain, store_id: int, category: SupermarketChain.XMLFilesCategory,
+                              regex_pat: str, session: requests.session):
+    download_url: str = chain.get_download_url(store_id, category, session)
+    logging.debug(download_url)
+    if not download_url:  # Not found non-full Promos/Prices file
+        return
+    assert re.search(regex_pat, download_url, re.IGNORECASE), f'Invalid {category.name} url in {repr(type(chain))}'
+    if category in [chain.XMLFilesCategory.Prices, chain.XMLFilesCategory.Promos]:
+        assert not re.search('full', download_url, re.IGNORECASE), \
+            f'Downloaded the full {category.name} file mistakenly in {repr(type(chain))}'
+
+
+def test_promotions_scraping():
+    """
+    Test scraping of promotions is completed successfully and a valid xlsx file is generated as an output.
+    """
    filename = 'temp.xlsx'
    for chain_name, chain in tqdm(chain_dict.items(), desc='chains'):
        logging.info(f'Test scraping promotions from {chain_name}')
-        if chain_name == repr(DorAlon):
-            store_id = 501
-        elif chain_name in [repr(Keshet), repr(TivTaam), repr(Bareket), repr(ZolVebegadol)]:
-            store_id = 2
-        elif chain_name == repr(CoOp):
-            store_id = 202
-        elif chain_name == repr(ShukHayir):
-            store_id = 4
-        elif chain_name == repr(StopMarket):
-            store_id = 5
-        else:
-            store_id = 1

+        store_id: int = valid_store_id_by_chain(chain_name)
        try:
            main_latest_promos(
                store_id=store_id,
@@ -76,3 +97,25 @@ def test_scraping():
            raise

        assert df.shape[0] > MIN_NUM_OF_PROMOS and df.shape[1] == PROMOTION_COLS_NUM, f"Failed scraping {chain_name}"
+
+
+def valid_store_id_by_chain(chain_name) -> int:
+    """
+    This function returns a valid store ID for a given chain.
+
+    :param chain_name: The name of a chain as returned by repr(ChainClassName).
+    :return: An integer representing a valid store ID in the given chain
+    """
+    if chain_name == repr(DorAlon):
+        store_id = 501
+    elif chain_name in [repr(TivTaam), repr(Bareket), repr(ZolVebegadol)]:
+        store_id = 2
+    elif chain_name == repr(CoOp):
+        store_id = 202
+    elif chain_name == repr(ShukHayir):
+        store_id = 4
+    elif chain_name in [repr(StopMarket), repr(Keshet)]:
+        store_id = 5
+    else:
+        store_id = 1
+    return store_id