Shift to MuPDF backend for pdf rendering

This commit is contained in:
BasioMeusPuga
2019-01-22 22:36:32 +05:30
parent 191ea7ef3a
commit 164450a888
6 changed files with 102 additions and 66 deletions

1
TODO
View File

@@ -93,6 +93,7 @@ TODO
Clean up 'switch' page layout Clean up 'switch' page layout
Colors aren't loaded properly for annotation previews Colors aren't loaded properly for annotation previews
Last line in QTextBrowser should never be cut off Last line in QTextBrowser should never be cut off
Something is wrong with image alignment
Secondary: Secondary:
Graphical themes Graphical themes

View File

@@ -20,13 +20,14 @@ import logging
import webbrowser import webbrowser
try: try:
import popplerqt5 import fitz
except ImportError: except ImportError:
pass pass
from PyQt5 import QtWidgets, QtGui, QtCore from PyQt5 import QtWidgets, QtGui, QtCore
from lector.rarfile import rarfile from lector.rarfile import rarfile
from lector.parsers.pdf import render_pdf_page
from lector.threaded import BackGroundCacheRefill from lector.threaded import BackGroundCacheRefill
from lector.annotations import AnnotationPlacement from lector.annotations import AnnotationPlacement
@@ -40,7 +41,6 @@ class PliantQGraphicsView(QtWidgets.QGraphicsView):
self.parent = parent self.parent = parent
self.main_window = main_window self.main_window = main_window
self.qimage = None # Will be needed to resize pdf
self.image_pixmap = None self.image_pixmap = None
self.image_cache = [None for _ in range(4)] self.image_cache = [None for _ in range(4)]
@@ -58,10 +58,7 @@ class PliantQGraphicsView(QtWidgets.QGraphicsView):
self.book = rarfile.RarFile(self.filepath) self.book = rarfile.RarFile(self.filepath)
elif self.filetype == 'pdf': elif self.filetype == 'pdf':
self.book = popplerqt5.Poppler.Document.load(self.filepath) self.book = fitz.open(self.filepath)
self.book.setRenderHint(
popplerqt5.Poppler.Document.Antialiasing
and popplerqt5.Poppler.Document.TextAntialiasing)
self.common_functions = PliantWidgetsCommonFunctions( self.common_functions = PliantWidgetsCommonFunctions(
self, self.main_window) self, self.main_window)
@@ -86,15 +83,16 @@ class PliantQGraphicsView(QtWidgets.QGraphicsView):
def load_page(current_page): def load_page(current_page):
def page_loader(page): def page_loader(page):
# TODO Maybe pdf image res needs a setting?
pixmap = QtGui.QPixmap() pixmap = QtGui.QPixmap()
if self.filetype in ('cbz', 'cbr'): if self.filetype in ('cbz', 'cbr'):
page_data = self.book.read(page) page_data = self.book.read(page)
pixmap.loadFromData(page_data) pixmap.loadFromData(page_data)
elif self.filetype == 'pdf': elif self.filetype == 'pdf':
page_data = self.book.page(current_page) page_data = self.book.loadPage(page)
page_qimage = page_data.renderToImage(400, 400) pixmap = render_pdf_page(page_data)
pixmap.convertFromImage(page_qimage)
return pixmap return pixmap
firstPixmap = page_loader(current_page) firstPixmap = page_loader(current_page)

View File

@@ -20,77 +20,85 @@
import io import io
import os import os
import logging
from PyQt5 import QtCore import fitz
from bs4 import BeautifulSoup from PyQt5 import QtCore, QtGui
import popplerqt5
logger = logging.getLogger(__name__)
class ParsePDF: class ParsePDF:
def __init__(self, filename, *args): def __init__(self, filename, *args):
self.filename = filename self.filename = filename
self.book = None self.book = None
self.metadata = None
def read_book(self): def read_book(self):
self.book = popplerqt5.Poppler.Document.load(self.filename) try:
if not self.book: self.book = fitz.open(self.filename)
return True
except RuntimeError:
return False return False
self.metadata = BeautifulSoup(self.book.metadata(), 'xml')
return True
def get_title(self): def get_title(self):
try: title = self.book.metadata['title']
title = self.metadata.find('title').text if not title:
return title.replace('\n', '') title = os.path.splitext(os.path.basename(self.filename))[0]
except AttributeError: return title
return os.path.splitext(os.path.basename(self.filename))[0]
def get_author(self): def get_author(self):
try: author = self.book.metadata['author']
author = self.metadata.find('creator').text if not author:
return author.replace('\n', '') author = 'Unknown'
except AttributeError: return author
return 'Unknown'
def get_year(self): def get_year(self):
creation_date = self.book.metadata['creationDate']
try: try:
year = self.metadata.find('MetadataDate').text year = creation_date.split(':')[1][:4]
return int(year.replace('\n', '')[:4]) except (ValueError, AttributeError):
except (AttributeError, ValueError): year = 9999
return 9999 return year
def get_cover_image(self): def get_cover_image(self):
self.book.setRenderHint( # TODO
popplerqt5.Poppler.Document.Antialiasing # See if there's any way to stop this roundabout way of
and popplerqt5.Poppler.Document.TextAntialiasing) # getting a smaller QImage from a larger Pixmap
cover_page = self.book.loadPage(0)
coverPixmap = cover_page.getPixmap()
imageFormat = QtGui.QImage.Format_RGB888
if coverPixmap.alpha:
imageFormat = QtGui.QImage.Format_RGBA8888
coverQImage = QtGui.QImage(
coverPixmap.samples,
coverPixmap.width,
coverPixmap.height,
coverPixmap.stride,
imageFormat)
try: return resize_image(coverQImage)
cover_page = self.book.page(0)
cover_image = cover_page.renderToImage(300, 300)
return resize_image(cover_image)
except AttributeError:
return None
def get_isbn(self): def get_isbn(self):
return None return None
def get_tags(self): def get_tags(self):
try: tags = self.book.metadata['keywords']
tags = self.metadata.find('Keywords').text return tags # Fine if it returns None
return tags.replace('\n', '')
except AttributeError:
return None
def get_contents(self): def get_contents(self):
file_settings = {'images_only': True} # Contents are to be returned as:
contents = [(f'Page {i + 1}', i) for i in range(self.book.numPages())] # Level, Title, Page Number
# Increasing the level number means the
# title is one level up in the tree
# TODO
# Better parsing of TOC
# contents = self.book.getToC()
# if not contents:
# contents = [
# (1, f'Page {i + 1}', i) for i in range(self.book.pageCount)]
# return contents, file_settings
file_settings = {'images_only': True}
contents = [(f'Page {i + 1}', i) for i in range(self.book.pageCount)]
return contents, file_settings return contents, file_settings
@@ -106,3 +114,30 @@ def resize_image(cover_image):
cover_image_final = io.BytesIO(byte_array) cover_image_final = io.BytesIO(byte_array)
cover_image_final.seek(0) cover_image_final.seek(0)
return cover_image_final.getvalue() return cover_image_final.getvalue()
def render_pdf_page(page_data):
# Draw page contents on to a pixmap
pixmap = QtGui.QPixmap()
zoom_matrix = fitz.Matrix(4, 4) # Sets render quality
pagePixmap = page_data.getPixmap(
matrix=zoom_matrix)
imageFormat = QtGui.QImage.Format_RGB888
if pagePixmap.alpha:
imageFormat = QtGui.QImage.Format_RGBA8888
pageQImage = QtGui.QImage(
pagePixmap.samples,
pagePixmap.width,
pagePixmap.height,
pagePixmap.stride,
imageFormat)
pixmap.convertFromImage(pageQImage)
# Draw page background
# Currently going with White - any color should be possible
finalPixmap = QtGui.QPixmap(pixmap.size())
finalPixmap.fill(QtGui.QColor(QtCore.Qt.white))
imagePainter = QtGui.QPainter(finalPixmap)
imagePainter.drawPixmap(0, 0, pixmap)
return finalPixmap

View File

@@ -62,13 +62,13 @@ sorter = {
'cbr': ParseCOMIC} 'cbr': ParseCOMIC}
# Check what dependencies are installed # Check what dependencies are installed
# python-poppler-qt5 - Optional # pymupdf - Optional
poppler_check = importlib.util.find_spec('popplerqt5') mupdf_check = importlib.util.find_spec('fitz')
if poppler_check: if mupdf_check:
from lector.parsers.pdf import ParsePDF from lector.parsers.pdf import ParsePDF
sorter['pdf'] = ParsePDF sorter['pdf'] = ParsePDF
else: else:
error_string = 'python-poppler-qt5 is not installed. Will be unable to load PDFs.' error_string = 'pymupdf is not installed. Will be unable to load PDFs.'
print(error_string) print(error_string)
logger.error(error_string) logger.error(error_string)

View File

@@ -18,12 +18,13 @@ import os
import re import re
import logging import logging
import pathlib import pathlib
from multiprocessing.dummy import Pool from multiprocessing.dummy import Pool
from PyQt5 import QtCore, QtGui from PyQt5 import QtCore, QtGui
from lector import sorter from lector import sorter
from lector import database from lector import database
from lector.parsers.pdf import render_pdf_page
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -149,16 +150,17 @@ class BackGroundCacheRefill(QtCore.QThread):
def run(self): def run(self):
def load_page(current_page): def load_page(current_page):
image_pixmap = QtGui.QPixmap() pixmap = QtGui.QPixmap()
if self.filetype in ('cbz', 'cbr'): if self.filetype in ('cbz', 'cbr'):
page_data = self.book.read(current_page) page_data = self.book.read(current_page)
image_pixmap.loadFromData(page_data) pixmap.loadFromData(page_data)
elif self.filetype == 'pdf': elif self.filetype == 'pdf':
page_data = self.book.page(current_page) page_data = self.book.loadPage(current_page)
page_qimage = page_data.renderToImage(400, 400) # TODO Readjust pixmap = render_pdf_page(page_data)
image_pixmap.convertFromImage(page_qimage)
return image_pixmap return pixmap
remove_index = self.image_cache.index(self.remove_value) remove_index = self.image_cache.index(self.remove_value)