From 164450a888bc4f5120024e871c02fd0759532eac Mon Sep 17 00:00:00 2001 From: BasioMeusPuga Date: Tue, 22 Jan 2019 22:36:32 +0530 Subject: [PATCH] Shift to MuPDF backend for pdf rendering --- TODO | 1 + lector/__main__.py | 4 +- lector/contentwidgets.py | 18 +++--- lector/parsers/pdf.py | 121 +++++++++++++++++++++++++-------------- lector/sorter.py | 8 +-- lector/threaded.py | 16 +++--- 6 files changed, 102 insertions(+), 66 deletions(-) diff --git a/TODO b/TODO index 3e9073b..770fc0a 100644 --- a/TODO +++ b/TODO @@ -93,6 +93,7 @@ TODO Clean up 'switch' page layout Colors aren't loaded properly for annotation previews Last line in QTextBrowser should never be cut off + Something is wrong with image alignment Secondary: Graphical themes diff --git a/lector/__main__.py b/lector/__main__.py index 8d4f5d8..4badc8f 100755 --- a/lector/__main__.py +++ b/lector/__main__.py @@ -1051,9 +1051,9 @@ def main(): QtCore.QLocale.system(), ':/translations/translations_bin/Lector_') app.installTranslator(translator) - translations_out_string = '(Translations found)' + translations_out_string = ' (Translations found)' if not translations_found: - translations_out_string = '(No translations found)' + translations_out_string = ' (No translations found)' print(f'Locale: {QtCore.QLocale.system().name()}' + translations_out_string) form = MainUI() diff --git a/lector/contentwidgets.py b/lector/contentwidgets.py index 702f5e2..9fe7b6b 100644 --- a/lector/contentwidgets.py +++ b/lector/contentwidgets.py @@ -20,13 +20,14 @@ import logging import webbrowser try: - import popplerqt5 + import fitz except ImportError: pass from PyQt5 import QtWidgets, QtGui, QtCore from lector.rarfile import rarfile +from lector.parsers.pdf import render_pdf_page from lector.threaded import BackGroundCacheRefill from lector.annotations import AnnotationPlacement @@ -40,7 +41,6 @@ class PliantQGraphicsView(QtWidgets.QGraphicsView): self.parent = parent self.main_window = main_window - self.qimage = None # Will be needed to resize pdf self.image_pixmap = None self.image_cache = [None for _ in range(4)] @@ -58,10 +58,7 @@ class PliantQGraphicsView(QtWidgets.QGraphicsView): self.book = rarfile.RarFile(self.filepath) elif self.filetype == 'pdf': - self.book = popplerqt5.Poppler.Document.load(self.filepath) - self.book.setRenderHint( - popplerqt5.Poppler.Document.Antialiasing - and popplerqt5.Poppler.Document.TextAntialiasing) + self.book = fitz.open(self.filepath) self.common_functions = PliantWidgetsCommonFunctions( self, self.main_window) @@ -86,15 +83,16 @@ class PliantQGraphicsView(QtWidgets.QGraphicsView): def load_page(current_page): def page_loader(page): - # TODO Maybe pdf image res needs a setting? pixmap = QtGui.QPixmap() + if self.filetype in ('cbz', 'cbr'): page_data = self.book.read(page) pixmap.loadFromData(page_data) + elif self.filetype == 'pdf': - page_data = self.book.page(current_page) - page_qimage = page_data.renderToImage(400, 400) - pixmap.convertFromImage(page_qimage) + page_data = self.book.loadPage(page) + pixmap = render_pdf_page(page_data) + return pixmap firstPixmap = page_loader(current_page) diff --git a/lector/parsers/pdf.py b/lector/parsers/pdf.py index eb38067..804acbe 100644 --- a/lector/parsers/pdf.py +++ b/lector/parsers/pdf.py @@ -20,77 +20,85 @@ import io import os -import logging -from PyQt5 import QtCore -from bs4 import BeautifulSoup - -import popplerqt5 - -logger = logging.getLogger(__name__) +import fitz +from PyQt5 import QtCore, QtGui class ParsePDF: def __init__(self, filename, *args): self.filename = filename self.book = None - self.metadata = None def read_book(self): - self.book = popplerqt5.Poppler.Document.load(self.filename) - if not self.book: + try: + self.book = fitz.open(self.filename) + return True + except RuntimeError: return False - self.metadata = BeautifulSoup(self.book.metadata(), 'xml') - return True - def get_title(self): - try: - title = self.metadata.find('title').text - return title.replace('\n', '') - except AttributeError: - return os.path.splitext(os.path.basename(self.filename))[0] + title = self.book.metadata['title'] + if not title: + title = os.path.splitext(os.path.basename(self.filename))[0] + return title def get_author(self): - try: - author = self.metadata.find('creator').text - return author.replace('\n', '') - except AttributeError: - return 'Unknown' + author = self.book.metadata['author'] + if not author: + author = 'Unknown' + return author def get_year(self): + creation_date = self.book.metadata['creationDate'] try: - year = self.metadata.find('MetadataDate').text - return int(year.replace('\n', '')[:4]) - except (AttributeError, ValueError): - return 9999 + year = creation_date.split(':')[1][:4] + except (ValueError, AttributeError): + year = 9999 + return year def get_cover_image(self): - self.book.setRenderHint( - popplerqt5.Poppler.Document.Antialiasing - and popplerqt5.Poppler.Document.TextAntialiasing) + # TODO + # See if there's any way to stop this roundabout way of + # getting a smaller QImage from a larger Pixmap + cover_page = self.book.loadPage(0) + coverPixmap = cover_page.getPixmap() + imageFormat = QtGui.QImage.Format_RGB888 + if coverPixmap.alpha: + imageFormat = QtGui.QImage.Format_RGBA8888 + coverQImage = QtGui.QImage( + coverPixmap.samples, + coverPixmap.width, + coverPixmap.height, + coverPixmap.stride, + imageFormat) - try: - cover_page = self.book.page(0) - cover_image = cover_page.renderToImage(300, 300) - return resize_image(cover_image) - except AttributeError: - return None + return resize_image(coverQImage) def get_isbn(self): return None def get_tags(self): - try: - tags = self.metadata.find('Keywords').text - return tags.replace('\n', '') - except AttributeError: - return None + tags = self.book.metadata['keywords'] + return tags # Fine if it returns None def get_contents(self): - file_settings = {'images_only': True} - contents = [(f'Page {i + 1}', i) for i in range(self.book.numPages())] + # Contents are to be returned as: + # Level, Title, Page Number + # Increasing the level number means the + # title is one level up in the tree + # TODO + # Better parsing of TOC + # contents = self.book.getToC() + # if not contents: + # contents = [ + # (1, f'Page {i + 1}', i) for i in range(self.book.pageCount)] + + # return contents, file_settings + + file_settings = {'images_only': True} + contents = [(f'Page {i + 1}', i) for i in range(self.book.pageCount)] return contents, file_settings @@ -106,3 +114,30 @@ def resize_image(cover_image): cover_image_final = io.BytesIO(byte_array) cover_image_final.seek(0) return cover_image_final.getvalue() + + +def render_pdf_page(page_data): + # Draw page contents on to a pixmap + pixmap = QtGui.QPixmap() + zoom_matrix = fitz.Matrix(4, 4) # Sets render quality + pagePixmap = page_data.getPixmap( + matrix=zoom_matrix) + imageFormat = QtGui.QImage.Format_RGB888 + if pagePixmap.alpha: + imageFormat = QtGui.QImage.Format_RGBA8888 + pageQImage = QtGui.QImage( + pagePixmap.samples, + pagePixmap.width, + pagePixmap.height, + pagePixmap.stride, + imageFormat) + pixmap.convertFromImage(pageQImage) + + # Draw page background + # Currently going with White - any color should be possible + finalPixmap = QtGui.QPixmap(pixmap.size()) + finalPixmap.fill(QtGui.QColor(QtCore.Qt.white)) + imagePainter = QtGui.QPainter(finalPixmap) + imagePainter.drawPixmap(0, 0, pixmap) + + return finalPixmap diff --git a/lector/sorter.py b/lector/sorter.py index 674b708..4d0ccce 100644 --- a/lector/sorter.py +++ b/lector/sorter.py @@ -62,13 +62,13 @@ sorter = { 'cbr': ParseCOMIC} # Check what dependencies are installed -# python-poppler-qt5 - Optional -poppler_check = importlib.util.find_spec('popplerqt5') -if poppler_check: +# pymupdf - Optional +mupdf_check = importlib.util.find_spec('fitz') +if mupdf_check: from lector.parsers.pdf import ParsePDF sorter['pdf'] = ParsePDF else: - error_string = 'python-poppler-qt5 is not installed. Will be unable to load PDFs.' + error_string = 'pymupdf is not installed. Will be unable to load PDFs.' print(error_string) logger.error(error_string) diff --git a/lector/threaded.py b/lector/threaded.py index 96de0ab..07fa21d 100644 --- a/lector/threaded.py +++ b/lector/threaded.py @@ -18,12 +18,13 @@ import os import re import logging import pathlib - from multiprocessing.dummy import Pool + from PyQt5 import QtCore, QtGui from lector import sorter from lector import database +from lector.parsers.pdf import render_pdf_page logger = logging.getLogger(__name__) @@ -149,16 +150,17 @@ class BackGroundCacheRefill(QtCore.QThread): def run(self): def load_page(current_page): - image_pixmap = QtGui.QPixmap() + pixmap = QtGui.QPixmap() if self.filetype in ('cbz', 'cbr'): page_data = self.book.read(current_page) - image_pixmap.loadFromData(page_data) + pixmap.loadFromData(page_data) + elif self.filetype == 'pdf': - page_data = self.book.page(current_page) - page_qimage = page_data.renderToImage(400, 400) # TODO Readjust - image_pixmap.convertFromImage(page_qimage) - return image_pixmap + page_data = self.book.loadPage(current_page) + pixmap = render_pdf_page(page_data) + + return pixmap remove_index = self.image_cache.index(self.remove_value)