Compare commits
14 Commits
955a7ed6f5
...
master
Author | SHA1 | Date | |
---|---|---|---|
520303cd03 | |||
14bd12ee8f | |||
|
6e0446af3f | ||
|
18f4876291 | ||
8163dd903d | |||
6d19d28f6e | |||
|
d73c5c579f | ||
|
4eefe79bab | ||
864ab7ad1e | |||
3055dc05f9 | |||
29f6324bc2 | |||
|
7fc8519e4a | ||
|
8cbdcee256 | ||
d9dfaaa68c |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1,2 +1,3 @@
|
||||
.vscode/
|
||||
venv/
|
||||
ignore/
|
@@ -1 +1,33 @@
|
||||
https://kotar.cet.ac.il/KotarApp/Viewer.aspx?nBookID=105179261
|
||||
https://kotar.cet.ac.il/KotarApp/Viewer.aspx?nBookID=110994706
|
||||
https://kotar.cet.ac.il/KotarApp/Viewer.aspx?nBookID=110918661
|
||||
https://kotar.cet.ac.il/KotarApp/Viewer.aspx?nBookID=110930833
|
||||
https://kotar.cet.ac.il/KotarApp/Viewer.aspx?nBookID=110933810
|
||||
https://kotar.cet.ac.il/KotarApp/Viewer.aspx?nBookID=110938002
|
||||
https://kotar.cet.ac.il/KotarApp/Viewer.aspx?nBookID=110938002
|
||||
https://kotar.cet.ac.il/KotarApp/Viewer.aspx?nBookID=110942621
|
||||
https://kotar.cet.ac.il/KotarApp/Viewer.aspx?nBookID=110942621
|
||||
https://kotar.cet.ac.il/KotarApp/Viewer.aspx?nBookID=110948032
|
||||
https://kotar.cet.ac.il/KotarApp/Viewer.aspx?nBookID=110948032
|
||||
https://kotar.cet.ac.il/KotarApp/Viewer.aspx?nBookID=110959329
|
||||
https://kotar.cet.ac.il/KotarApp/Viewer.aspx?nBookID=105256337
|
||||
https://kotar.cet.ac.il/KotarApp/Viewer.aspx?nBookID=105018830
|
||||
https://kotar.cet.ac.il/KotarApp/Viewer.aspx?nBookID=109444642
|
||||
https://kotar.cet.ac.il/KotarApp/Viewer.aspx?nBookID=109400325
|
||||
https://kotar.cet.ac.il/KotarApp/Viewer.aspx?nBookID=109392561
|
||||
https://kotar.cet.ac.il/KotarApp/Viewer.aspx?nBookID=107884166
|
||||
https://kotar.cet.ac.il/KotarApp/Viewer.aspx?nBookID=97645077
|
||||
https://kotar.cet.ac.il/KotarApp/Viewer.aspx?nBookID=97645077
|
||||
https://kotar.cet.ac.il/KotarApp/Viewer.aspx?nBookID=102594097
|
||||
https://kotar.cet.ac.il/KotarApp/Viewer.aspx?nBookID=102591827
|
||||
https://kotar.cet.ac.il/KotarApp/Viewer.aspx?nBookID=102588217
|
||||
https://kotar.cet.ac.il/KotarApp/Viewer.aspx?nBookID=102589202
|
||||
https://kotar.cet.ac.il/KotarApp/Viewer.aspx?nBookID=101052334
|
||||
https://kotar.cet.ac.il/KotarApp/Viewer.aspx?nBookID=101048613
|
||||
https://kotar.cet.ac.il/KotarApp/Viewer.aspx?nBookID=101986400
|
||||
https://kotar.cet.ac.il/KotarApp/Viewer.aspx?nBookID=100976710
|
||||
https://kotar.cet.ac.il/KotarApp/Viewer.aspx?nBookID=100974786
|
||||
https://kotar.cet.ac.il/KotarApp/Viewer.aspx?nBookID=108426718
|
||||
https://kotar.cet.ac.il/KotarApp/Viewer.aspx?nBookID=108236946
|
||||
https://kotar.cet.ac.il/KotarApp/Viewer.aspx?nBookID=106246523
|
||||
https://kotar.cet.ac.il/KotarApp/Viewer.aspx?nBookID=104502712
|
||||
https://kotar.cet.ac.il/KotarApp/Viewer.aspx?nBookID=103818662
|
||||
|
12
README.md
12
README.md
@@ -1,2 +1,12 @@
|
||||
# BulkBooks
|
||||
# Bulk Books:
|
||||
|
||||
This script's goal is to help you to download books from [Kotar](https://kotar.cet.ac.il/).
|
||||
|
||||
## How To?
|
||||
1. You need an Academic Access to Kotar.
|
||||
1. You need to have python>=3.9
|
||||
1. download the requiremetns. (It might be prefered by using venv).
|
||||
1. Add the links to the __BooksToDownload__ file.
|
||||
1. Run the script.
|
||||
|
||||
Enjoy.
|
@@ -21,6 +21,8 @@ Books = []
|
||||
THREADS = []
|
||||
PATHS = []
|
||||
OLD_REMOVE = []
|
||||
BROWSER_PREFENCES = {"browser.download.folderList": 2, "browser.download.manager.showWhenStarting": False, "browser.download.dir": "ignore", "browser.helperApps.neverAsk.saveToDisk": "attachment/csv, text/plain, application/octet-stream, application/binary, text/csv, application/csv, application/excel, text/comma-separated-values, text/xml, application/xml, application/xls, excel/xls, application/excel 97-2003,application/Microsoft Excel 97-2003 Worksheet, application/vnd.ms-excel", "browser.helperApps.neverAsk.openFile":
|
||||
"application/PDF, application/FDF, application/XFDF, application/LSL, application/LSO, application/LSS, application/IQY, application/RQY, application/XLK, application/XLS, application/XLT, application/POT application/PPS, application/PPT, application/DOS, application/DOT, application/WKS, application/BAT, application/PS, application/EPS, application/WCH, application/WCM, application/WB1, application/WB3, application/RTF, application/DOC, application/MDB, application/MDE, application/WBK, application/WB1, application/WCH, application/WCM, application/AD, application/ADP, application/vnd.ms-excel", "browser.download.panel.shown": False}
|
||||
|
||||
|
||||
def remove_text(text: str):
|
||||
@@ -76,7 +78,10 @@ def do_action_now(index: int):
|
||||
files = list(SOURCES[index])
|
||||
for s in SOURCES[index]:
|
||||
if SOURCES[index][s]:
|
||||
files[files.index(s)] = urllib3.PoolManager().request("GET", SOURCES[index][s]).data
|
||||
if not os.path.exists(f"ignore/{name}/{files.index(s):04}.jpg"):
|
||||
with open(f"ignore/{name}/{files.index(s):04}.jpg", "wb") as F:
|
||||
F.write(urllib3.PoolManager().request("GET", SOURCES[index][s]).data)
|
||||
# files[files.index(s)] = urllib3.PoolManager().request("GET", SOURCES[index][s]).data
|
||||
return files
|
||||
# def check_and_act(index: int,last):
|
||||
|
||||
@@ -110,15 +115,15 @@ def act_now(index: int, path: str = None):
|
||||
url_now = url_now[:url_now.find("#")+1] + save_first
|
||||
if SOURCES[index][last] and "" in SOURCES[index].values():
|
||||
ACTS[index]._driver.get(url_now)
|
||||
pages = do_action_now(index)
|
||||
do_action_now(index)
|
||||
SOURCES[index][last] = ""
|
||||
else:
|
||||
pages = do_action_now(index)
|
||||
do_action_now(index)
|
||||
if SOURCES[index] and "" not in SOURCES[index].values():
|
||||
couters += 1
|
||||
pathus = f'{path}/{name}.pdf' if path else f"ignore/{name}/{name}.pdf"
|
||||
with open(pathus, "wb") as file:
|
||||
file.write(img2pdf.convert(pages))
|
||||
file.write(img2pdf.convert(glob.glob(f"ignore/{name}/*.jpg")))
|
||||
ACTS[index]._driver.quit()
|
||||
remove_text(OLD_REMOVE[index])
|
||||
treads -= 1
|
||||
@@ -150,45 +155,37 @@ def open_firefox(url: str):
|
||||
def give_me_web():
|
||||
options = webdriver.FirefoxOptions()
|
||||
fp = webdriver.FirefoxProfile()
|
||||
fp.set_preference("browser.download.folderList", 2)
|
||||
fp.set_preference("browser.download.manager.showWhenStarting", False)
|
||||
fp.set_preference("browser.download.dir", "ignore")
|
||||
fp.set_preference(
|
||||
"browser.helperApps.neverAsk.saveToDisk",
|
||||
"attachment/csv, text/plain, application/octet-stream, application/binary, text/csv, application/csv, application/excel, text/comma-separated-values, text/xml, application/xml, application/xls, excel/xls, application/excel 97-2003,application/Microsoft Excel 97-2003 Worksheet, application/vnd.ms-excel")
|
||||
fp.set_preference(
|
||||
"browser.helperApps.neverAsk.openFile",
|
||||
"application/PDF, application/FDF, application/XFDF, application/LSL, application/LSO, application/LSS, application/IQY, application/RQY, application/XLK, application/XLS, application/XLT, application/POT application/PPS, application/PPT, application/DOS, application/DOT, application/WKS, application/BAT, application/PS, application/EPS, application/WCH, application/WCM, application/WB1, application/WB3, application/RTF, application/DOC, application/MDB, application/MDE, application/WBK, application/WB1, application/WCH, application/WCM, application/AD, application/ADP, application/vnd.ms-excel")
|
||||
fp.set_preference("browser.download.panel.shown", False)
|
||||
for key,val in BROWSER_PREFENCES:
|
||||
fp.set_preference(key, val)
|
||||
options.add_argument('--lang=EN')
|
||||
options.headless = True
|
||||
fire = "geckodriver"
|
||||
return (fp, fire, options)
|
||||
|
||||
|
||||
with open("BooksToDownload", "r", encoding="utf_8") as file:
|
||||
books = file.read().split("\n")
|
||||
|
||||
for b in books:
|
||||
if b.find('````') > -1 and not b.startswith("#"):
|
||||
OLD_REMOVE.append(b)
|
||||
PATHS.append(b[b.rfind('`')+1:])
|
||||
b = b[:b.find('`')]
|
||||
elif not b.startswith("#"):
|
||||
OLD_REMOVE.append(b)
|
||||
PATHS.append(None)
|
||||
t1 = threading.Thread(None, open_firefox, args=(b,))
|
||||
t1.start()
|
||||
THREADS.append(t1)
|
||||
for t in THREADS:
|
||||
t.join()
|
||||
lasts = []
|
||||
for i in range(len(ACTS)):
|
||||
SOURCES[i].update({key.attrs["id"]: ""
|
||||
for key in bs4.BeautifulSoup(ACTS[i]._driver.page_source, "html.parser").find_all(
|
||||
"div", attrs={"class": "BV_oImage"})})
|
||||
lasts.append(list(SOURCES[i].keys())[-1])
|
||||
couters = 0
|
||||
treads = len(ACTS)-1
|
||||
for i in range(len(ACTS)):
|
||||
T = threading.Thread(None, act_now, args=(i, PATHS[i]))
|
||||
T.start()
|
||||
if __name__ == "__main__":
|
||||
with open("BooksToDownload", "r", encoding="utf_8") as file:
|
||||
books = file.read().split("\n")
|
||||
for b in books:
|
||||
if b.find('````') > -1 and not b.startswith("#"):
|
||||
OLD_REMOVE.append(b)
|
||||
PATHS.append(b[b.rfind('`')+1:])
|
||||
b = b[:b.find('`')]
|
||||
elif not b.startswith("#"):
|
||||
OLD_REMOVE.append(b)
|
||||
PATHS.append(None)
|
||||
t1 = threading.Thread(None, open_firefox, args=(b,))
|
||||
t1.start()
|
||||
THREADS.append(t1)
|
||||
for t in THREADS:
|
||||
t.join()
|
||||
lasts = []
|
||||
for i in range(len(ACTS)):
|
||||
SOURCES[i].update({key.attrs["id"]: ""
|
||||
for key in bs4.BeautifulSoup(ACTS[i]._driver.page_source, "html.parser").find_all(
|
||||
"div", attrs={"class": "BV_oImage"})})
|
||||
lasts.append(list(SOURCES[i].keys())[-1])
|
||||
couters = 0
|
||||
treads = len(ACTS)-1
|
||||
for i in range(len(ACTS)):
|
||||
T = threading.Thread(None, act_now, args=(i, PATHS[i]))
|
||||
T.start()
|
25
requirements.txt
Normal file
25
requirements.txt
Normal file
@@ -0,0 +1,25 @@
|
||||
async-generator==1.10
|
||||
attrs==23.1.0
|
||||
beautifulsoup4==4.12.2
|
||||
bs4==0.0.1
|
||||
certifi==2022.12.7
|
||||
chromedriver-autoinstaller==0.4.0
|
||||
deprecation==2.1.0
|
||||
exceptiongroup==1.1.1
|
||||
h11==0.14.0
|
||||
idna==3.4
|
||||
img2pdf==0.4.4
|
||||
lxml==4.9.2
|
||||
outcome==1.2.0
|
||||
packaging==23.1
|
||||
pikepdf==7.2.0
|
||||
Pillow==9.5.0
|
||||
PySocks==1.7.1
|
||||
selenium==4.9.0
|
||||
sniffio==1.3.0
|
||||
sortedcontainers==2.4.0
|
||||
soupsieve==2.4.1
|
||||
trio==0.22.0
|
||||
trio-websocket==0.10.2
|
||||
urllib3==1.26.15
|
||||
wsproto==1.2.0
|
Reference in New Issue
Block a user