From 47c0d04ce456f501d242960d3b95ad6d4b3a00d6 Mon Sep 17 00:00:00 2001 From: KorenLazar Date: Thu, 28 Jan 2021 14:13:34 +0200 Subject: [PATCH] Updated README.md with latest changes and directory names to be more meaningful --- .gitignore | 11 ++++------- README.md | 13 ++++++++++--- main.py | 25 ++++++++----------------- utils.py | 6 +++--- 4 files changed, 25 insertions(+), 30 deletions(-) diff --git a/.gitignore b/.gitignore index 3c44073..bc3f4dc 100644 --- a/.gitignore +++ b/.gitignore @@ -1,11 +1,8 @@ -promos* -Prices* -products_prices.log .idea/ -Stores.xml grading_check.py -stores_* venv/ __pycache__/ -xmls/ -logs/ +raw_files/ +results/ +all_deals.py +unknown_items.csv diff --git a/README.md b/README.md index 24054fc..5481f49 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@ # Supermarket basic scraping +The library supports scraping from Shufersal, Co-Op and Zol Vebegadol ## Installation clone: @@ -20,18 +21,24 @@ First, to find your Shufersal store's ID, you can run the following command (ass ```cmd script python main.py --find_store ירושלים --chain Shufersal ``` -After running the command, you'll be able to see the different stores in Jerusalem with their IDs on the screen. +In case you want a different supermarket chain, just change 'Shufersal' to a different name (the options will be + printed in case of misspelling). -Now, that we have the store's ID, we can get its promotions sorted by their update date by running +After running the command, you'll be able to see the different stores in Jerusalem with their IDs in "results\Shufersal-Stores.xml". + +Now, that we have the store's ID, we can get the store's relevant promotions sorted by their start date, last update +, and length. ```cmd script python main.py --promos 5 --chain Shufersal ``` * We assumed that the store's ID is 5. -Now, you can find the promos in "promos_5.log". +Now, you can find the promos in "results\Shufersal_promos_5.log". For other documentation and commands, you can run ```cmd script python main.py --h ``` +Any file that was downloaded in the process will be located in the "raw_files" directory. + Good luck! diff --git a/main.py b/main.py index 3b5319d..b42d965 100644 --- a/main.py +++ b/main.py @@ -1,19 +1,18 @@ from argparse import ArgumentParser import logging from promotion import main_latest_promos, get_promos_by_name -from store_utils import get_store_id -from utils import LOGS_DIRNAME, XMLS_DIRNAME, get_products_prices +from store_utils import get_all_deals, get_store_id +from utils import RESULTS_DIRNAME, RAW_FILES_DIRNAME, get_products_prices from supermarket_chain import SupermarketChain from shufersal import ShuferSal from co_op import CoOp from zol_vebegadol import ZolVebegadol from pathlib import Path - # TODO: fix problem of left-to-right printing -Path(LOGS_DIRNAME).mkdir(exist_ok=True) -Path(XMLS_DIRNAME).mkdir(exist_ok=True) +Path(RESULTS_DIRNAME).mkdir(exist_ok=True) +Path(RAW_FILES_DIRNAME).mkdir(exist_ok=True) chain_dict = { 'Shufersal': ShuferSal(), @@ -45,6 +44,9 @@ if __name__ == '__main__': metavar='city', nargs=1, ) + # parser.add_argument('--all_deals', + # action='store_true', + # ) parser.add_argument('--load_prices', help='boolean flag representing whether to load an existing price XML file', action='store_true', @@ -70,7 +72,7 @@ if __name__ == '__main__': logger = logging.getLogger() logger.setLevel(logging.INFO) - handler = logging.FileHandler(filename=f'logs/{args.chain}_promos_{arg_store_id}.log', mode='w', + handler = logging.FileHandler(filename=f'{RESULTS_DIRNAME}/{args.chain}_promos_{arg_store_id}.log', mode='w', encoding='utf-8') logger.addHandler(handler) main_latest_promos(store_id=arg_store_id, load_xml=args.load_prices, logger=logger, chain=chain) @@ -86,14 +88,3 @@ if __name__ == '__main__': arg_store_id = int(args.find_promos_by_name[0]) get_promos_by_name(store_id=arg_store_id, chain=chain, promo_name=args.find_promos_by_name[1], load_prices=args.load_prices, load_promos=args.load_promos) - - -# Script for Shufersal: -# store_ids = get_all_deals(chain) -# print(store_ids) -# # store_ids = [133, 234, 73, 62, 607, 610, 111, 219, 81, 606, 609, 295, 349, 496, 611, 812, 608, 300] -# null_items_lists = list() -# for store_id in store_ids[::-1]: -# print(store_id) -# null_items_lists.append(get_all_null_items_in_promos(chain, store_id)) -# print(setintersection(*[set(list) for list in null_items_lists])) diff --git a/utils.py b/utils.py index e897a6e..6d7c684 100644 --- a/utils.py +++ b/utils.py @@ -9,8 +9,8 @@ from os import path from supermarket_chain import SupermarketChain import re -LOGS_DIRNAME = "logs" -XMLS_DIRNAME = "xmls" +RESULTS_DIRNAME = "results" +RAW_FILES_DIRNAME = "raw_files" def xml_file_gen(chain: SupermarketChain, store_id: int, category_name: str) -> str: @@ -24,7 +24,7 @@ def xml_file_gen(chain: SupermarketChain, store_id: int, category_name: str) -> :return: An xml filename """ store_id_str: str = f"-{str(store_id)}" if SupermarketChain.is_valid_store_id(store_id) else "" - return path.join(XMLS_DIRNAME, f"{chain}-{category_name}{store_id_str}.xml") + return path.join(RAW_FILES_DIRNAME, f"{chain}-{category_name}{store_id_str}.xml") def create_bs_object(xml_path: str, chain: SupermarketChain, store_id: int, load_xml: bool,