Updated README.md with latest changes and directory names to be more meaningful
This commit is contained in:
11
.gitignore
vendored
11
.gitignore
vendored
@@ -1,11 +1,8 @@
|
||||
promos*
|
||||
Prices*
|
||||
products_prices.log
|
||||
.idea/
|
||||
Stores.xml
|
||||
grading_check.py
|
||||
stores_*
|
||||
venv/
|
||||
__pycache__/
|
||||
xmls/
|
||||
logs/
|
||||
raw_files/
|
||||
results/
|
||||
all_deals.py
|
||||
unknown_items.csv
|
||||
|
13
README.md
13
README.md
@@ -1,4 +1,5 @@
|
||||
# Supermarket basic scraping
|
||||
The library supports scraping from Shufersal, Co-Op and Zol Vebegadol
|
||||
|
||||
## Installation
|
||||
clone:
|
||||
@@ -20,18 +21,24 @@ First, to find your Shufersal store's ID, you can run the following command (ass
|
||||
```cmd script
|
||||
python main.py --find_store ירושלים --chain Shufersal
|
||||
```
|
||||
After running the command, you'll be able to see the different stores in Jerusalem with their IDs on the screen.
|
||||
In case you want a different supermarket chain, just change 'Shufersal' to a different name (the options will be
|
||||
printed in case of misspelling).
|
||||
|
||||
Now, that we have the store's ID, we can get its promotions sorted by their update date by running
|
||||
After running the command, you'll be able to see the different stores in Jerusalem with their IDs in "results\Shufersal-Stores.xml".
|
||||
|
||||
Now, that we have the store's ID, we can get the store's relevant promotions sorted by their start date, last update
|
||||
, and length.
|
||||
```cmd script
|
||||
python main.py --promos 5 --chain Shufersal
|
||||
```
|
||||
* We assumed that the store's ID is 5.
|
||||
Now, you can find the promos in "promos_5.log".
|
||||
Now, you can find the promos in "results\Shufersal_promos_5.log".
|
||||
|
||||
For other documentation and commands, you can run
|
||||
```cmd script
|
||||
python main.py --h
|
||||
```
|
||||
|
||||
Any file that was downloaded in the process will be located in the "raw_files" directory.
|
||||
|
||||
Good luck!
|
||||
|
25
main.py
25
main.py
@@ -1,19 +1,18 @@
|
||||
from argparse import ArgumentParser
|
||||
import logging
|
||||
from promotion import main_latest_promos, get_promos_by_name
|
||||
from store_utils import get_store_id
|
||||
from utils import LOGS_DIRNAME, XMLS_DIRNAME, get_products_prices
|
||||
from store_utils import get_all_deals, get_store_id
|
||||
from utils import RESULTS_DIRNAME, RAW_FILES_DIRNAME, get_products_prices
|
||||
from supermarket_chain import SupermarketChain
|
||||
from shufersal import ShuferSal
|
||||
from co_op import CoOp
|
||||
from zol_vebegadol import ZolVebegadol
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
# TODO: fix problem of left-to-right printing
|
||||
|
||||
Path(LOGS_DIRNAME).mkdir(exist_ok=True)
|
||||
Path(XMLS_DIRNAME).mkdir(exist_ok=True)
|
||||
Path(RESULTS_DIRNAME).mkdir(exist_ok=True)
|
||||
Path(RAW_FILES_DIRNAME).mkdir(exist_ok=True)
|
||||
|
||||
chain_dict = {
|
||||
'Shufersal': ShuferSal(),
|
||||
@@ -45,6 +44,9 @@ if __name__ == '__main__':
|
||||
metavar='city',
|
||||
nargs=1,
|
||||
)
|
||||
# parser.add_argument('--all_deals',
|
||||
# action='store_true',
|
||||
# )
|
||||
parser.add_argument('--load_prices',
|
||||
help='boolean flag representing whether to load an existing price XML file',
|
||||
action='store_true',
|
||||
@@ -70,7 +72,7 @@ if __name__ == '__main__':
|
||||
|
||||
logger = logging.getLogger()
|
||||
logger.setLevel(logging.INFO)
|
||||
handler = logging.FileHandler(filename=f'logs/{args.chain}_promos_{arg_store_id}.log', mode='w',
|
||||
handler = logging.FileHandler(filename=f'{RESULTS_DIRNAME}/{args.chain}_promos_{arg_store_id}.log', mode='w',
|
||||
encoding='utf-8')
|
||||
logger.addHandler(handler)
|
||||
main_latest_promos(store_id=arg_store_id, load_xml=args.load_prices, logger=logger, chain=chain)
|
||||
@@ -86,14 +88,3 @@ if __name__ == '__main__':
|
||||
arg_store_id = int(args.find_promos_by_name[0])
|
||||
get_promos_by_name(store_id=arg_store_id, chain=chain, promo_name=args.find_promos_by_name[1],
|
||||
load_prices=args.load_prices, load_promos=args.load_promos)
|
||||
|
||||
|
||||
# Script for Shufersal:
|
||||
# store_ids = get_all_deals(chain)
|
||||
# print(store_ids)
|
||||
# # store_ids = [133, 234, 73, 62, 607, 610, 111, 219, 81, 606, 609, 295, 349, 496, 611, 812, 608, 300]
|
||||
# null_items_lists = list()
|
||||
# for store_id in store_ids[::-1]:
|
||||
# print(store_id)
|
||||
# null_items_lists.append(get_all_null_items_in_promos(chain, store_id))
|
||||
# print(setintersection(*[set(list) for list in null_items_lists]))
|
||||
|
6
utils.py
6
utils.py
@@ -9,8 +9,8 @@ from os import path
|
||||
from supermarket_chain import SupermarketChain
|
||||
import re
|
||||
|
||||
LOGS_DIRNAME = "logs"
|
||||
XMLS_DIRNAME = "xmls"
|
||||
RESULTS_DIRNAME = "results"
|
||||
RAW_FILES_DIRNAME = "raw_files"
|
||||
|
||||
|
||||
def xml_file_gen(chain: SupermarketChain, store_id: int, category_name: str) -> str:
|
||||
@@ -24,7 +24,7 @@ def xml_file_gen(chain: SupermarketChain, store_id: int, category_name: str) ->
|
||||
:return: An xml filename
|
||||
"""
|
||||
store_id_str: str = f"-{str(store_id)}" if SupermarketChain.is_valid_store_id(store_id) else ""
|
||||
return path.join(XMLS_DIRNAME, f"{chain}-{category_name}{store_id_str}.xml")
|
||||
return path.join(RAW_FILES_DIRNAME, f"{chain}-{category_name}{store_id_str}.xml")
|
||||
|
||||
|
||||
def create_bs_object(xml_path: str, chain: SupermarketChain, store_id: int, load_xml: bool,
|
||||
|
Reference in New Issue
Block a user