Merge pull request #3 from 1kamma/master

mistake in the requierments fixed
This commit is contained in:
Koren Lazar
2022-10-05 07:51:58 +03:00
committed by GitHub
4 changed files with 35 additions and 15 deletions

3
.gitignore vendored
View File

@@ -6,3 +6,6 @@ raw_files/
results/ results/
all_deals.py all_deals.py
unknown_items.csv unknown_items.csv
helper_*
.vscode/
desktop.ini

View File

@@ -1,6 +1,8 @@
import logging import logging
import os import os
import shutil import shutil
import platform
import sys
import time import time
from abc import abstractmethod from abc import abstractmethod
@@ -18,6 +20,26 @@ class CerberusWebClient(SupermarketChain):
@abstractmethod @abstractmethod
def username(self): def username(self):
pass pass
download_dir = f"{os.path.abspath(os.path.curdir)}/raw_files"
def is_system_headless(self) -> bool:
return sys.platform == "linux" and not os.environ.get("DISPLAY")
def set_browser_options(self) -> webdriver.ChromeOptions:
options = webdriver.ChromeOptions()
options.set_capability("download.default_directory", f"{os.path.abspath(os.path.curdir)}/raw_files")
options.add_argument("ignore-certificate-errors")
options.add_argument("--ignore-ssl-errors=yes")
options.headless = self.is_system_headless()
return options
def set_browser(self,options: webdriver.ChromeOptions) -> webdriver.Chrome:
if self.is_system_headless() and platform.machine() == 'aarch64':
return webdriver.Chrome(service=Service('/usr/bin/chromedriver'), options=options)
return webdriver.Chrome(
service=Service(ChromeDriverManager().install()), options=options
)
def get_download_url_or_path( def get_download_url_or_path(
self, self,
@@ -25,25 +47,17 @@ class CerberusWebClient(SupermarketChain):
category: SupermarketChain.XMLFilesCategory, category: SupermarketChain.XMLFilesCategory,
session: requests.Session, session: requests.Session,
) -> str: ) -> str:
options = webdriver.ChromeOptions() options=self.set_browser_options()
options.add_argument("ignore-certificate-errors") driver = self.set_browser(options)
options.add_argument("--ignore-ssl-errors=yes")
driver = webdriver.Chrome(
service=Service(ChromeDriverManager().install()), options=options
)
driver.get("https://url.retail.publishedprices.co.il/login#") driver.get("https://url.retail.publishedprices.co.il/login#")
time.sleep(2) time.sleep(2)
userElem = driver.find_element(By.NAME, "username") userElem = driver.find_element(By.NAME, "username")
userElem.send_keys(self.username) userElem.send_keys(self.username)
driver.find_element(By.NAME, "Submit").click() driver.find_element(By.NAME, "Submit").click()
time.sleep(2) time.sleep(2)
searchElem = driver.find_element(By.CLASS_NAME, "form-control") searchElem = driver.find_element(By.CLASS_NAME, "form-control")
searchElem.send_keys(category.value.lower().replace('s', '')) searchElem.send_keys(category.name.lower().replace('s', ''))
time.sleep(5) time.sleep(5)
conns = driver.find_elements(By.CLASS_NAME, "f") conns = driver.find_elements(By.CLASS_NAME, "f")
best_link = "" best_link = ""
for conn in conns: for conn in conns:
@@ -85,9 +99,9 @@ class CerberusWebClient(SupermarketChain):
return "" return ""
driver.get(best_link) driver.get(best_link)
time.sleep(3) time.sleep(3)
download_dir = "/Users/korenlazar/Downloads" filename = best_link.split("/")[-1] # don't be an idiot. it is stupid to count letters
filename = best_link[48:] # split and grab, or rename it by yourself.
path_download = os.path.join(download_dir, filename) path_download = os.path.join(self.download_dir, filename)
logging.info(f"{path_download=}") logging.info(f"{path_download=}")
path_to_save = f"raw_files/{self.username}-{filename}" path_to_save = f"raw_files/{self.username}-{filename}"
try: try:

View File

@@ -1,5 +1,6 @@
import logging import logging
import re import re
import sys
from datetime import datetime from datetime import datetime
from typing import Dict, List, Union from typing import Dict, List, Union
from bs4.element import Tag from bs4.element import Tag
@@ -19,6 +20,8 @@ from utils import (
xml_file_gen, xml_file_gen,
) )
from supermarket_chain import SupermarketChain from supermarket_chain import SupermarketChain
from utils import (create_bs_object, create_items_dict, get_float_from_tag,
xml_file_gen)
XML_FILES_PROMOTIONS_CATEGORIES = [ XML_FILES_PROMOTIONS_CATEGORIES = [
SupermarketChain.XMLFilesCategory.PromosFull, SupermarketChain.XMLFilesCategory.PromosFull,

View File

@@ -14,4 +14,4 @@ argparse~=1.4.0
XlsxWriter~=1.4.3 XlsxWriter~=1.4.3
aenum aenum
selenium selenium
webdriver-manager webdriver-manager