Learn not to swear so much at the screen Cover icons in the tab bar Shift Scan Library button from the Library tab to the Library toolbar
321 lines
11 KiB
Python
321 lines
11 KiB
Python
#!/usr/bin/env python3
|
|
|
|
# This file is a part of Lector, a Qt based ebook reader
|
|
# Copyright (C) 2017 BasioMeusPuga
|
|
|
|
# This program is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
# INSTRUCTIONS
|
|
# Every parser is supposed to have the following methods, even if they return None:
|
|
# read_book()
|
|
# get_title()
|
|
# get_author()
|
|
# get_year()
|
|
# get_cover_image()
|
|
# get_isbn()
|
|
# get_tags()
|
|
# get_contents() - Should return a tuple with 0: TOC 1: special_settings (dict)
|
|
# Parsers for files containing only images need to return only images_only = True
|
|
|
|
# TODO
|
|
# Maybe shift to insert or replace instead of hash checking
|
|
# See if you want to include a hash of the book's name and author
|
|
# Change thread niceness
|
|
|
|
import io
|
|
import os
|
|
import time
|
|
import pickle
|
|
import hashlib
|
|
import threading
|
|
|
|
from multiprocessing import Pool, Manager
|
|
from PyQt5 import QtCore, QtGui
|
|
from lector import database
|
|
|
|
from lector.parsers.pdf import ParsePDF
|
|
from lector.parsers.epub import ParseEPUB
|
|
from lector.parsers.mobi import ParseMOBI
|
|
from lector.parsers.comicbooks import ParseCOMIC
|
|
|
|
sorter = {
|
|
'pdf': ParsePDF,
|
|
'epub': ParseEPUB,
|
|
'mobi': ParseMOBI,
|
|
'azw': ParseMOBI,
|
|
'azw3': ParseMOBI,
|
|
'azw4': ParseMOBI,
|
|
'prc': ParseMOBI,
|
|
'cbz': ParseCOMIC,
|
|
'cbr': ParseCOMIC}
|
|
|
|
available_parsers = [i for i in sorter]
|
|
progressbar = None # This is populated by __main__
|
|
progress_emitter = None # This is to be made into a global variable
|
|
|
|
|
|
class UpdateProgress(QtCore.QObject):
|
|
# This is for thread safety
|
|
update_signal = QtCore.pyqtSignal(int)
|
|
|
|
def connect_to_progressbar(self):
|
|
self.update_signal.connect(progressbar.setValue)
|
|
|
|
def update_progress(self, progress_percent):
|
|
self.update_signal.emit(progress_percent)
|
|
|
|
|
|
class BookSorter:
|
|
def __init__(self, file_list, mode, database_path, auto_tags=True, temp_dir=None):
|
|
# Have the GUI pass a list of files straight to here
|
|
# Then, on the basis of what is needed, pass the
|
|
# filenames to the requisite functions
|
|
# This includes getting file info for the database
|
|
# Parsing for the reader proper
|
|
# Caching upon closing
|
|
self.file_list = [i for i in file_list if os.path.exists(i)]
|
|
self.statistics = [0, (len(file_list))]
|
|
self.hashes_and_paths = {}
|
|
self.work_mode = mode[0]
|
|
self.addition_mode = mode[1]
|
|
self.database_path = database_path
|
|
self.auto_tags = auto_tags
|
|
self.temp_dir = temp_dir
|
|
if database_path:
|
|
self.database_hashes()
|
|
|
|
self.threading_completed = []
|
|
self.queue = Manager().Queue()
|
|
self.processed_books = []
|
|
|
|
if self.work_mode == 'addition':
|
|
progress_object_generator()
|
|
|
|
def database_hashes(self):
|
|
all_hashes_and_paths = database.DatabaseFunctions(
|
|
self.database_path).fetch_data(
|
|
('Hash', 'Path'),
|
|
'books',
|
|
{'Hash': ''},
|
|
'LIKE')
|
|
|
|
if all_hashes_and_paths:
|
|
# self.hashes = [i[0] for i in all_hashes]
|
|
self.hashes_and_paths = {
|
|
i[0]: i[1] for i in all_hashes_and_paths}
|
|
|
|
def database_entry_for_book(self, file_hash):
|
|
database_return = database.DatabaseFunctions(
|
|
self.database_path).fetch_data(
|
|
('Title', 'Author', 'Year', 'ISBN', 'Tags', 'Position', 'Bookmarks', 'CoverImage'),
|
|
'books',
|
|
{'Hash': file_hash},
|
|
'EQUALS')[0]
|
|
|
|
book_data = []
|
|
|
|
for count, i in enumerate(database_return):
|
|
if count in (5, 6):
|
|
if i:
|
|
book_data.append(pickle.loads(i))
|
|
else:
|
|
book_data.append(None)
|
|
else:
|
|
book_data.append(i)
|
|
|
|
return book_data
|
|
|
|
def read_book(self, filename):
|
|
# filename is expected as a string containg the
|
|
# full path of the ebook file
|
|
|
|
with open(filename, 'rb') as current_book:
|
|
# This should speed up addition for larger files
|
|
# without compromising the integrity of the process
|
|
first_bytes = current_book.read(1024 * 32) # First 32KB of the file
|
|
file_md5 = hashlib.md5(first_bytes).hexdigest()
|
|
|
|
# Update the progress queue
|
|
self.queue.put(filename)
|
|
|
|
# This should not get triggered in reading mode
|
|
# IF the file is NOT being loaded into the reader,
|
|
|
|
# Do not allow addition in case the file
|
|
# is already in the database and it remains at its original path
|
|
if self.work_mode == 'addition' and file_md5 in self.hashes_and_paths:
|
|
if (self.hashes_and_paths[file_md5] == filename
|
|
or os.path.exists(self.hashes_and_paths[file_md5])):
|
|
|
|
if not self.hashes_and_paths[file_md5] == filename:
|
|
print(f'{os.path.basename(filename)} is already in database')
|
|
return
|
|
|
|
file_extension = os.path.splitext(filename)[1][1:]
|
|
try:
|
|
# Get the requisite parser from the sorter dict
|
|
book_ref = sorter[file_extension](filename, self.temp_dir, file_md5)
|
|
except KeyError:
|
|
print(filename + ' has an unsupported extension')
|
|
return
|
|
|
|
# Everything following this is standard
|
|
# None values are accounted for here
|
|
book_ref.read_book()
|
|
if book_ref.book:
|
|
|
|
|
|
this_book = {}
|
|
this_book[file_md5] = {
|
|
'hash': file_md5,
|
|
'path': filename}
|
|
|
|
# Different modes require different values
|
|
if self.work_mode == 'addition':
|
|
# Reduce the size of the incoming image
|
|
# if one is found
|
|
title = book_ref.get_title()
|
|
|
|
author = book_ref.get_author()
|
|
if not author:
|
|
author = 'Unknown'
|
|
|
|
year = book_ref.get_year()
|
|
|
|
isbn = book_ref.get_isbn()
|
|
|
|
tags = None
|
|
if self.auto_tags:
|
|
tags = book_ref.get_tags()
|
|
|
|
cover_image_raw = book_ref.get_cover_image()
|
|
if cover_image_raw:
|
|
cover_image = resize_image(cover_image_raw)
|
|
else:
|
|
cover_image = None
|
|
|
|
this_book[file_md5]['cover_image'] = cover_image
|
|
this_book[file_md5]['addition_mode'] = self.addition_mode
|
|
|
|
if self.work_mode == 'reading':
|
|
all_content = book_ref.get_contents()
|
|
|
|
# get_contents() returns a tuple. Index 1 is a collection of
|
|
# special settings that depend on the kind of data being parsed.
|
|
# Currently, this includes:
|
|
# Only images included images_only BOOL Specify only paths to images
|
|
# File will not be cached on exit
|
|
|
|
content = all_content[0]
|
|
images_only = all_content[1]['images_only']
|
|
|
|
if not content:
|
|
content = [('Invalid', 'Something went horribly wrong')]
|
|
|
|
book_data = self.database_entry_for_book(file_md5)
|
|
title = book_data[0]
|
|
author = book_data[1]
|
|
year = book_data[2]
|
|
isbn = book_data[3]
|
|
tags = book_data[4]
|
|
position = book_data[5]
|
|
bookmarks = book_data[6]
|
|
cover = book_data[7]
|
|
|
|
this_book[file_md5]['position'] = position
|
|
this_book[file_md5]['bookmarks'] = bookmarks
|
|
this_book[file_md5]['content'] = content
|
|
this_book[file_md5]['images_only'] = images_only
|
|
this_book[file_md5]['cover'] = cover
|
|
|
|
this_book[file_md5]['title'] = title
|
|
this_book[file_md5]['author'] = author
|
|
this_book[file_md5]['year'] = year
|
|
this_book[file_md5]['isbn'] = isbn
|
|
this_book[file_md5]['tags'] = tags
|
|
|
|
return this_book
|
|
|
|
def read_progress(self):
|
|
while True:
|
|
processed_file = self.queue.get()
|
|
self.threading_completed.append(processed_file)
|
|
|
|
total_number = len(self.file_list)
|
|
completed_number = len(self.threading_completed)
|
|
|
|
if progress_emitter: # Skip update in reading mode
|
|
progress_emitter.update_progress(
|
|
completed_number * 100 // total_number)
|
|
|
|
if total_number == completed_number:
|
|
break
|
|
|
|
def initiate_threads(self):
|
|
if not self.file_list:
|
|
return None
|
|
|
|
def pool_creator():
|
|
_pool = Pool(5)
|
|
self.processed_books = _pool.map(
|
|
self.read_book, self.file_list)
|
|
|
|
_pool.close()
|
|
_pool.join()
|
|
|
|
start_time = time.time()
|
|
|
|
worker_thread = threading.Thread(target=pool_creator)
|
|
progress_thread = threading.Thread(target=self.read_progress)
|
|
worker_thread.start()
|
|
progress_thread.start()
|
|
|
|
worker_thread.join()
|
|
progress_thread.join(timeout=.5)
|
|
|
|
return_books = {}
|
|
# Exclude None returns generated in case of duplication / parse errors
|
|
self.processed_books = [i for i in self.processed_books if i]
|
|
for i in self.processed_books:
|
|
for j in i:
|
|
return_books[j] = i[j]
|
|
|
|
del self.processed_books
|
|
print('Finished processing in', time.time() - start_time)
|
|
return return_books
|
|
|
|
|
|
def progress_object_generator():
|
|
# This has to be kept separate from the BookSorter class because
|
|
# the QtObject inheritance disallows pickling
|
|
global progress_emitter
|
|
progress_emitter = UpdateProgress()
|
|
progress_emitter.connect_to_progressbar()
|
|
|
|
|
|
def resize_image(cover_image_raw):
|
|
cover_image = QtGui.QImage()
|
|
cover_image.loadFromData(cover_image_raw)
|
|
cover_image = cover_image.scaled(
|
|
420, 600, QtCore.Qt.IgnoreAspectRatio)
|
|
|
|
byte_array = QtCore.QByteArray()
|
|
buffer = QtCore.QBuffer(byte_array)
|
|
buffer.open(QtCore.QIODevice.WriteOnly)
|
|
cover_image.save(buffer, 'jpg', 75)
|
|
|
|
cover_image_final = io.BytesIO(byte_array)
|
|
cover_image_final.seek(0)
|
|
return cover_image_final.getvalue()
|