Updated README.md with latest changes and directory names to be more meaningful
This commit is contained in:
11
.gitignore
vendored
11
.gitignore
vendored
@@ -1,11 +1,8 @@
|
|||||||
promos*
|
|
||||||
Prices*
|
|
||||||
products_prices.log
|
|
||||||
.idea/
|
.idea/
|
||||||
Stores.xml
|
|
||||||
grading_check.py
|
grading_check.py
|
||||||
stores_*
|
|
||||||
venv/
|
venv/
|
||||||
__pycache__/
|
__pycache__/
|
||||||
xmls/
|
raw_files/
|
||||||
logs/
|
results/
|
||||||
|
all_deals.py
|
||||||
|
unknown_items.csv
|
||||||
|
13
README.md
13
README.md
@@ -1,4 +1,5 @@
|
|||||||
# Supermarket basic scraping
|
# Supermarket basic scraping
|
||||||
|
The library supports scraping from Shufersal, Co-Op and Zol Vebegadol
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
clone:
|
clone:
|
||||||
@@ -20,18 +21,24 @@ First, to find your Shufersal store's ID, you can run the following command (ass
|
|||||||
```cmd script
|
```cmd script
|
||||||
python main.py --find_store ירושלים --chain Shufersal
|
python main.py --find_store ירושלים --chain Shufersal
|
||||||
```
|
```
|
||||||
After running the command, you'll be able to see the different stores in Jerusalem with their IDs on the screen.
|
In case you want a different supermarket chain, just change 'Shufersal' to a different name (the options will be
|
||||||
|
printed in case of misspelling).
|
||||||
|
|
||||||
Now, that we have the store's ID, we can get its promotions sorted by their update date by running
|
After running the command, you'll be able to see the different stores in Jerusalem with their IDs in "results\Shufersal-Stores.xml".
|
||||||
|
|
||||||
|
Now, that we have the store's ID, we can get the store's relevant promotions sorted by their start date, last update
|
||||||
|
, and length.
|
||||||
```cmd script
|
```cmd script
|
||||||
python main.py --promos 5 --chain Shufersal
|
python main.py --promos 5 --chain Shufersal
|
||||||
```
|
```
|
||||||
* We assumed that the store's ID is 5.
|
* We assumed that the store's ID is 5.
|
||||||
Now, you can find the promos in "promos_5.log".
|
Now, you can find the promos in "results\Shufersal_promos_5.log".
|
||||||
|
|
||||||
For other documentation and commands, you can run
|
For other documentation and commands, you can run
|
||||||
```cmd script
|
```cmd script
|
||||||
python main.py --h
|
python main.py --h
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Any file that was downloaded in the process will be located in the "raw_files" directory.
|
||||||
|
|
||||||
Good luck!
|
Good luck!
|
||||||
|
25
main.py
25
main.py
@@ -1,19 +1,18 @@
|
|||||||
from argparse import ArgumentParser
|
from argparse import ArgumentParser
|
||||||
import logging
|
import logging
|
||||||
from promotion import main_latest_promos, get_promos_by_name
|
from promotion import main_latest_promos, get_promos_by_name
|
||||||
from store_utils import get_store_id
|
from store_utils import get_all_deals, get_store_id
|
||||||
from utils import LOGS_DIRNAME, XMLS_DIRNAME, get_products_prices
|
from utils import RESULTS_DIRNAME, RAW_FILES_DIRNAME, get_products_prices
|
||||||
from supermarket_chain import SupermarketChain
|
from supermarket_chain import SupermarketChain
|
||||||
from shufersal import ShuferSal
|
from shufersal import ShuferSal
|
||||||
from co_op import CoOp
|
from co_op import CoOp
|
||||||
from zol_vebegadol import ZolVebegadol
|
from zol_vebegadol import ZolVebegadol
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
# TODO: fix problem of left-to-right printing
|
# TODO: fix problem of left-to-right printing
|
||||||
|
|
||||||
Path(LOGS_DIRNAME).mkdir(exist_ok=True)
|
Path(RESULTS_DIRNAME).mkdir(exist_ok=True)
|
||||||
Path(XMLS_DIRNAME).mkdir(exist_ok=True)
|
Path(RAW_FILES_DIRNAME).mkdir(exist_ok=True)
|
||||||
|
|
||||||
chain_dict = {
|
chain_dict = {
|
||||||
'Shufersal': ShuferSal(),
|
'Shufersal': ShuferSal(),
|
||||||
@@ -45,6 +44,9 @@ if __name__ == '__main__':
|
|||||||
metavar='city',
|
metavar='city',
|
||||||
nargs=1,
|
nargs=1,
|
||||||
)
|
)
|
||||||
|
# parser.add_argument('--all_deals',
|
||||||
|
# action='store_true',
|
||||||
|
# )
|
||||||
parser.add_argument('--load_prices',
|
parser.add_argument('--load_prices',
|
||||||
help='boolean flag representing whether to load an existing price XML file',
|
help='boolean flag representing whether to load an existing price XML file',
|
||||||
action='store_true',
|
action='store_true',
|
||||||
@@ -70,7 +72,7 @@ if __name__ == '__main__':
|
|||||||
|
|
||||||
logger = logging.getLogger()
|
logger = logging.getLogger()
|
||||||
logger.setLevel(logging.INFO)
|
logger.setLevel(logging.INFO)
|
||||||
handler = logging.FileHandler(filename=f'logs/{args.chain}_promos_{arg_store_id}.log', mode='w',
|
handler = logging.FileHandler(filename=f'{RESULTS_DIRNAME}/{args.chain}_promos_{arg_store_id}.log', mode='w',
|
||||||
encoding='utf-8')
|
encoding='utf-8')
|
||||||
logger.addHandler(handler)
|
logger.addHandler(handler)
|
||||||
main_latest_promos(store_id=arg_store_id, load_xml=args.load_prices, logger=logger, chain=chain)
|
main_latest_promos(store_id=arg_store_id, load_xml=args.load_prices, logger=logger, chain=chain)
|
||||||
@@ -86,14 +88,3 @@ if __name__ == '__main__':
|
|||||||
arg_store_id = int(args.find_promos_by_name[0])
|
arg_store_id = int(args.find_promos_by_name[0])
|
||||||
get_promos_by_name(store_id=arg_store_id, chain=chain, promo_name=args.find_promos_by_name[1],
|
get_promos_by_name(store_id=arg_store_id, chain=chain, promo_name=args.find_promos_by_name[1],
|
||||||
load_prices=args.load_prices, load_promos=args.load_promos)
|
load_prices=args.load_prices, load_promos=args.load_promos)
|
||||||
|
|
||||||
|
|
||||||
# Script for Shufersal:
|
|
||||||
# store_ids = get_all_deals(chain)
|
|
||||||
# print(store_ids)
|
|
||||||
# # store_ids = [133, 234, 73, 62, 607, 610, 111, 219, 81, 606, 609, 295, 349, 496, 611, 812, 608, 300]
|
|
||||||
# null_items_lists = list()
|
|
||||||
# for store_id in store_ids[::-1]:
|
|
||||||
# print(store_id)
|
|
||||||
# null_items_lists.append(get_all_null_items_in_promos(chain, store_id))
|
|
||||||
# print(setintersection(*[set(list) for list in null_items_lists]))
|
|
||||||
|
6
utils.py
6
utils.py
@@ -9,8 +9,8 @@ from os import path
|
|||||||
from supermarket_chain import SupermarketChain
|
from supermarket_chain import SupermarketChain
|
||||||
import re
|
import re
|
||||||
|
|
||||||
LOGS_DIRNAME = "logs"
|
RESULTS_DIRNAME = "results"
|
||||||
XMLS_DIRNAME = "xmls"
|
RAW_FILES_DIRNAME = "raw_files"
|
||||||
|
|
||||||
|
|
||||||
def xml_file_gen(chain: SupermarketChain, store_id: int, category_name: str) -> str:
|
def xml_file_gen(chain: SupermarketChain, store_id: int, category_name: str) -> str:
|
||||||
@@ -24,7 +24,7 @@ def xml_file_gen(chain: SupermarketChain, store_id: int, category_name: str) ->
|
|||||||
:return: An xml filename
|
:return: An xml filename
|
||||||
"""
|
"""
|
||||||
store_id_str: str = f"-{str(store_id)}" if SupermarketChain.is_valid_store_id(store_id) else ""
|
store_id_str: str = f"-{str(store_id)}" if SupermarketChain.is_valid_store_id(store_id) else ""
|
||||||
return path.join(XMLS_DIRNAME, f"{chain}-{category_name}{store_id_str}.xml")
|
return path.join(RAW_FILES_DIRNAME, f"{chain}-{category_name}{store_id_str}.xml")
|
||||||
|
|
||||||
|
|
||||||
def create_bs_object(xml_path: str, chain: SupermarketChain, store_id: int, load_xml: bool,
|
def create_bs_object(xml_path: str, chain: SupermarketChain, store_id: int, load_xml: bool,
|
||||||
|
Reference in New Issue
Block a user