Updated README.md with latest changes and directory names to be more meaningful

This commit is contained in:
KorenLazar
2021-01-28 14:13:34 +02:00
parent 6bcbdee486
commit 47c0d04ce4
4 changed files with 25 additions and 30 deletions

11
.gitignore vendored
View File

@@ -1,11 +1,8 @@
promos*
Prices*
products_prices.log
.idea/ .idea/
Stores.xml
grading_check.py grading_check.py
stores_*
venv/ venv/
__pycache__/ __pycache__/
xmls/ raw_files/
logs/ results/
all_deals.py
unknown_items.csv

View File

@@ -1,4 +1,5 @@
# Supermarket basic scraping # Supermarket basic scraping
The library supports scraping from Shufersal, Co-Op and Zol Vebegadol
## Installation ## Installation
clone: clone:
@@ -20,18 +21,24 @@ First, to find your Shufersal store's ID, you can run the following command (ass
```cmd script ```cmd script
python main.py --find_store ירושלים --chain Shufersal python main.py --find_store ירושלים --chain Shufersal
``` ```
After running the command, you'll be able to see the different stores in Jerusalem with their IDs on the screen. In case you want a different supermarket chain, just change 'Shufersal' to a different name (the options will be
printed in case of misspelling).
Now, that we have the store's ID, we can get its promotions sorted by their update date by running After running the command, you'll be able to see the different stores in Jerusalem with their IDs in "results\Shufersal-Stores.xml".
Now, that we have the store's ID, we can get the store's relevant promotions sorted by their start date, last update
, and length.
```cmd script ```cmd script
python main.py --promos 5 --chain Shufersal python main.py --promos 5 --chain Shufersal
``` ```
* We assumed that the store's ID is 5. * We assumed that the store's ID is 5.
Now, you can find the promos in "promos_5.log". Now, you can find the promos in "results\Shufersal_promos_5.log".
For other documentation and commands, you can run For other documentation and commands, you can run
```cmd script ```cmd script
python main.py --h python main.py --h
``` ```
Any file that was downloaded in the process will be located in the "raw_files" directory.
Good luck! Good luck!

25
main.py
View File

@@ -1,19 +1,18 @@
from argparse import ArgumentParser from argparse import ArgumentParser
import logging import logging
from promotion import main_latest_promos, get_promos_by_name from promotion import main_latest_promos, get_promos_by_name
from store_utils import get_store_id from store_utils import get_all_deals, get_store_id
from utils import LOGS_DIRNAME, XMLS_DIRNAME, get_products_prices from utils import RESULTS_DIRNAME, RAW_FILES_DIRNAME, get_products_prices
from supermarket_chain import SupermarketChain from supermarket_chain import SupermarketChain
from shufersal import ShuferSal from shufersal import ShuferSal
from co_op import CoOp from co_op import CoOp
from zol_vebegadol import ZolVebegadol from zol_vebegadol import ZolVebegadol
from pathlib import Path from pathlib import Path
# TODO: fix problem of left-to-right printing # TODO: fix problem of left-to-right printing
Path(LOGS_DIRNAME).mkdir(exist_ok=True) Path(RESULTS_DIRNAME).mkdir(exist_ok=True)
Path(XMLS_DIRNAME).mkdir(exist_ok=True) Path(RAW_FILES_DIRNAME).mkdir(exist_ok=True)
chain_dict = { chain_dict = {
'Shufersal': ShuferSal(), 'Shufersal': ShuferSal(),
@@ -45,6 +44,9 @@ if __name__ == '__main__':
metavar='city', metavar='city',
nargs=1, nargs=1,
) )
# parser.add_argument('--all_deals',
# action='store_true',
# )
parser.add_argument('--load_prices', parser.add_argument('--load_prices',
help='boolean flag representing whether to load an existing price XML file', help='boolean flag representing whether to load an existing price XML file',
action='store_true', action='store_true',
@@ -70,7 +72,7 @@ if __name__ == '__main__':
logger = logging.getLogger() logger = logging.getLogger()
logger.setLevel(logging.INFO) logger.setLevel(logging.INFO)
handler = logging.FileHandler(filename=f'logs/{args.chain}_promos_{arg_store_id}.log', mode='w', handler = logging.FileHandler(filename=f'{RESULTS_DIRNAME}/{args.chain}_promos_{arg_store_id}.log', mode='w',
encoding='utf-8') encoding='utf-8')
logger.addHandler(handler) logger.addHandler(handler)
main_latest_promos(store_id=arg_store_id, load_xml=args.load_prices, logger=logger, chain=chain) main_latest_promos(store_id=arg_store_id, load_xml=args.load_prices, logger=logger, chain=chain)
@@ -86,14 +88,3 @@ if __name__ == '__main__':
arg_store_id = int(args.find_promos_by_name[0]) arg_store_id = int(args.find_promos_by_name[0])
get_promos_by_name(store_id=arg_store_id, chain=chain, promo_name=args.find_promos_by_name[1], get_promos_by_name(store_id=arg_store_id, chain=chain, promo_name=args.find_promos_by_name[1],
load_prices=args.load_prices, load_promos=args.load_promos) load_prices=args.load_prices, load_promos=args.load_promos)
# Script for Shufersal:
# store_ids = get_all_deals(chain)
# print(store_ids)
# # store_ids = [133, 234, 73, 62, 607, 610, 111, 219, 81, 606, 609, 295, 349, 496, 611, 812, 608, 300]
# null_items_lists = list()
# for store_id in store_ids[::-1]:
# print(store_id)
# null_items_lists.append(get_all_null_items_in_promos(chain, store_id))
# print(setintersection(*[set(list) for list in null_items_lists]))

View File

@@ -9,8 +9,8 @@ from os import path
from supermarket_chain import SupermarketChain from supermarket_chain import SupermarketChain
import re import re
LOGS_DIRNAME = "logs" RESULTS_DIRNAME = "results"
XMLS_DIRNAME = "xmls" RAW_FILES_DIRNAME = "raw_files"
def xml_file_gen(chain: SupermarketChain, store_id: int, category_name: str) -> str: def xml_file_gen(chain: SupermarketChain, store_id: int, category_name: str) -> str:
@@ -24,7 +24,7 @@ def xml_file_gen(chain: SupermarketChain, store_id: int, category_name: str) ->
:return: An xml filename :return: An xml filename
""" """
store_id_str: str = f"-{str(store_id)}" if SupermarketChain.is_valid_store_id(store_id) else "" store_id_str: str = f"-{str(store_id)}" if SupermarketChain.is_valid_store_id(store_id) else ""
return path.join(XMLS_DIRNAME, f"{chain}-{category_name}{store_id_str}.xml") return path.join(RAW_FILES_DIRNAME, f"{chain}-{category_name}{store_id_str}.xml")
def create_bs_object(xml_path: str, chain: SupermarketChain, store_id: int, load_xml: bool, def create_bs_object(xml_path: str, chain: SupermarketChain, store_id: int, load_xml: bool,