Merge branch 'master' of https://git.saret.tk/Mooooooooo/BulkBooks
This commit is contained in:
12
README.md
12
README.md
@@ -1,2 +1,12 @@
|
|||||||
# BulkBooks
|
# Bulk Books:
|
||||||
|
|
||||||
|
This script's goal is to help you to download books from [Kotar](https://kotar.cet.ac.il/).
|
||||||
|
|
||||||
|
## How To?
|
||||||
|
1. You need an Academic Access to Kotar.
|
||||||
|
1. You need to have python>=3.9
|
||||||
|
1. download the requiremetns. (It might be prefered by using venv).
|
||||||
|
1. Add the links to the __BooksToDownload__ file.
|
||||||
|
1. Run the script.
|
||||||
|
|
||||||
|
Enjoy.
|
@@ -21,6 +21,8 @@ Books = []
|
|||||||
THREADS = []
|
THREADS = []
|
||||||
PATHS = []
|
PATHS = []
|
||||||
OLD_REMOVE = []
|
OLD_REMOVE = []
|
||||||
|
BROWSER_PREFENCES = {"browser.download.folderList": 2, "browser.download.manager.showWhenStarting": False, "browser.download.dir": "ignore", "browser.helperApps.neverAsk.saveToDisk": "attachment/csv, text/plain, application/octet-stream, application/binary, text/csv, application/csv, application/excel, text/comma-separated-values, text/xml, application/xml, application/xls, excel/xls, application/excel 97-2003,application/Microsoft Excel 97-2003 Worksheet, application/vnd.ms-excel", "browser.helperApps.neverAsk.openFile":
|
||||||
|
"application/PDF, application/FDF, application/XFDF, application/LSL, application/LSO, application/LSS, application/IQY, application/RQY, application/XLK, application/XLS, application/XLT, application/POT application/PPS, application/PPT, application/DOS, application/DOT, application/WKS, application/BAT, application/PS, application/EPS, application/WCH, application/WCM, application/WB1, application/WB3, application/RTF, application/DOC, application/MDB, application/MDE, application/WBK, application/WB1, application/WCH, application/WCM, application/AD, application/ADP, application/vnd.ms-excel", "browser.download.panel.shown": False}
|
||||||
|
|
||||||
|
|
||||||
def remove_text(text: str):
|
def remove_text(text: str):
|
||||||
@@ -77,7 +79,7 @@ def do_action_now(index: int):
|
|||||||
for s in SOURCES[index]:
|
for s in SOURCES[index]:
|
||||||
if SOURCES[index][s]:
|
if SOURCES[index][s]:
|
||||||
if not os.path.exists(f"ignore/{name}/{files.index(s):04}.jpg"):
|
if not os.path.exists(f"ignore/{name}/{files.index(s):04}.jpg"):
|
||||||
with open(f"ignore/{name}/{files.index(s):04}.jpg","wb") as F:
|
with open(f"ignore/{name}/{files.index(s):04}.jpg", "wb") as F:
|
||||||
F.write(urllib3.PoolManager().request("GET", SOURCES[index][s]).data)
|
F.write(urllib3.PoolManager().request("GET", SOURCES[index][s]).data)
|
||||||
# files[files.index(s)] = urllib3.PoolManager().request("GET", SOURCES[index][s]).data
|
# files[files.index(s)] = urllib3.PoolManager().request("GET", SOURCES[index][s]).data
|
||||||
return files
|
return files
|
||||||
@@ -153,46 +155,37 @@ def open_firefox(url: str):
|
|||||||
def give_me_web():
|
def give_me_web():
|
||||||
options = webdriver.FirefoxOptions()
|
options = webdriver.FirefoxOptions()
|
||||||
fp = webdriver.FirefoxProfile()
|
fp = webdriver.FirefoxProfile()
|
||||||
fp.set_preference("browser.download.folderList", 2)
|
for key,val in BROWSER_PREFENCES:
|
||||||
fp.set_preference("browser.download.manager.showWhenStarting", False)
|
fp.set_preference(key, val)
|
||||||
fp.set_preference("browser.download.dir", "ignore")
|
|
||||||
fp.set_preference(
|
|
||||||
"browser.helperApps.neverAsk.saveToDisk",
|
|
||||||
"attachment/csv, text/plain, application/octet-stream, application/binary, text/csv, application/csv, application/excel, text/comma-separated-values, text/xml, application/xml, application/xls, excel/xls, application/excel 97-2003,application/Microsoft Excel 97-2003 Worksheet, application/vnd.ms-excel")
|
|
||||||
fp.set_preference(
|
|
||||||
"browser.helperApps.neverAsk.openFile",
|
|
||||||
"application/PDF, application/FDF, application/XFDF, application/LSL, application/LSO, application/LSS, application/IQY, application/RQY, application/XLK, application/XLS, application/XLT, application/POT application/PPS, application/PPT, application/DOS, application/DOT, application/WKS, application/BAT, application/PS, application/EPS, application/WCH, application/WCM, application/WB1, application/WB3, application/RTF, application/DOC, application/MDB, application/MDE, application/WBK, application/WB1, application/WCH, application/WCM, application/AD, application/ADP, application/vnd.ms-excel")
|
|
||||||
fp.set_preference("browser.download.panel.shown", False)
|
|
||||||
options.add_argument('--lang=EN')
|
options.add_argument('--lang=EN')
|
||||||
options.headless = True
|
options.headless = True
|
||||||
fire = "geckodriver"
|
fire = "geckodriver"
|
||||||
return (fp, fire, options)
|
return (fp, fire, options)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
with open("BooksToDownload", "r", encoding="utf_8") as file:
|
with open("BooksToDownload", "r", encoding="utf_8") as file:
|
||||||
books = file.read().split("\n")
|
books = file.read().split("\n")
|
||||||
|
for b in books:
|
||||||
for b in books:
|
if b.find('````') > -1 and not b.startswith("#"):
|
||||||
if b.find('````') > -1 and not b.startswith("#"):
|
OLD_REMOVE.append(b)
|
||||||
OLD_REMOVE.append(b)
|
PATHS.append(b[b.rfind('`')+1:])
|
||||||
PATHS.append(b[b.rfind('`')+1:])
|
b = b[:b.find('`')]
|
||||||
b = b[:b.find('`')]
|
elif not b.startswith("#"):
|
||||||
elif not b.startswith("#"):
|
OLD_REMOVE.append(b)
|
||||||
OLD_REMOVE.append(b)
|
PATHS.append(None)
|
||||||
PATHS.append(None)
|
t1 = threading.Thread(None, open_firefox, args=(b,))
|
||||||
t1 = threading.Thread(None, open_firefox, args=(b,))
|
t1.start()
|
||||||
t1.start()
|
THREADS.append(t1)
|
||||||
THREADS.append(t1)
|
for t in THREADS:
|
||||||
for t in THREADS:
|
t.join()
|
||||||
t.join()
|
lasts = []
|
||||||
lasts = []
|
for i in range(len(ACTS)):
|
||||||
for i in range(len(ACTS)):
|
SOURCES[i].update({key.attrs["id"]: ""
|
||||||
SOURCES[i].update({key.attrs["id"]: ""
|
for key in bs4.BeautifulSoup(ACTS[i]._driver.page_source, "html.parser").find_all(
|
||||||
for key in bs4.BeautifulSoup(ACTS[i]._driver.page_source, "html.parser").find_all(
|
"div", attrs={"class": "BV_oImage"})})
|
||||||
"div", attrs={"class": "BV_oImage"})})
|
lasts.append(list(SOURCES[i].keys())[-1])
|
||||||
lasts.append(list(SOURCES[i].keys())[-1])
|
couters = 0
|
||||||
couters = 0
|
treads = len(ACTS)-1
|
||||||
treads = len(ACTS)-1
|
for i in range(len(ACTS)):
|
||||||
for i in range(len(ACTS)):
|
T = threading.Thread(None, act_now, args=(i, PATHS[i]))
|
||||||
T = threading.Thread(None, act_now, args=(i, PATHS[i]))
|
T.start()
|
||||||
T.start()
|
|
Reference in New Issue
Block a user