Added specific searching for the download url of non-full promotions and prices files. Changed return value of get_download_url accordingly.

This commit is contained in:
KorenLazar
2021-08-17 13:06:42 +03:00
parent 3770352d04
commit cffdd84086
4 changed files with 36 additions and 5 deletions

View File

@@ -1,3 +1,4 @@
import re
from typing import Dict, List
import requests
from bs4 import BeautifulSoup
@@ -21,8 +22,16 @@ class MahsaneiHashook(SupermarketChain):
url = prefix + "NBCompetitionRegulations.aspx"
req_res: requests.Response = requests.get(url)
soup = BeautifulSoup(req_res.text, features='lxml')
suffix: str = soup.find('a', href=lambda value: value and category.name.replace('s', '') in value
and f'-{store_id:03d}-20' in value).attrs['href']
if category in [SupermarketChain.XMLFilesCategory.Promos, SupermarketChain.XMLFilesCategory.Prices]:
fname_filter_func = lambda fname: fname and category.name.replace('s', '') in fname \
and f'-{store_id:03d}-20' in fname \
and not re.search('full', fname, re.IGNORECASE)
if soup.find('a', href=fname_filter_func) is None:
return "" # Could not find non-full Promos/Prices file
else:
fname_filter_func = lambda fname: fname and category.name.replace('s', '') in fname \
and f'-{store_id:03d}-20' in fname
suffix: str = soup.find('a', href=fname_filter_func).attrs['href']
down_url: str = prefix + suffix
return down_url