Preliminary pdf support

Consolidate comicbook modules
Do not write to temp dir for comics any longer
This commit is contained in:
BasioMeusPuga
2018-03-16 18:46:38 +05:30
parent fc2fcb5361
commit 5b3759afe6
9 changed files with 242 additions and 289 deletions

6
TODO
View File

@@ -24,7 +24,7 @@ TODO
✓ Context menu: Cache, Read, Edit database, delete, Mark read/unread
✓ Information dialog widget
✓ Allow editing of database data through the UI + for Bookmarks
Include (action) icons with the applications
Include (action) icons with the applications
Set focus to newly added file
Reading:
✓ Drop down for TOC
@@ -54,6 +54,8 @@ TODO
Search document using QTextCursor?
Comic view keyboard shortcuts
Filetypes:
✓ pdf support
Parse TOC
✓ epub support
✓ Homegrown solution please
✓ cbz, cbr support
@@ -67,9 +69,9 @@ TODO
If there are files open and the database is deleted, TypeErrors result
Cover culling does not occur if some other tab has initial focus
Slider position change might be acting up too
Take metadata from the database when opening the file
Secondary:
pdf support
Annotations
Graphical themes
Change focus rectangle dimensions

View File

@@ -44,20 +44,21 @@ from PyQt5 import QtCore, QtGui
from lector import database
from parsers.cbz import ParseCBZ
from parsers.cbr import ParseCBR
from parsers.pdf import ParsePDF
from parsers.epub import ParseEPUB
from parsers.mobi import ParseMOBI
from parsers.comicbooks import ParseCOMIC
sorter = {
'pdf': ParsePDF,
'epub': ParseEPUB,
'mobi': ParseMOBI,
'azw': ParseMOBI,
'azw3': ParseMOBI,
'azw4': ParseMOBI,
'prc': ParseMOBI,
'cbz': ParseCBZ,
'cbr': ParseCBR,}
'cbz': ParseCOMIC,
'cbr': ParseCOMIC}
available_parsers = [i for i in sorter]
progressbar = None # This is populated by __main__

View File

@@ -24,8 +24,12 @@
import os
import uuid
import zipfile
from PyQt5 import QtWidgets, QtGui, QtCore
import popplerqt5
from rarfile import rarfile
from lector.models import BookmarkProxyModel
from lector.sorter import resize_image
from lector.delegates import BookmarkDelegate
@@ -61,7 +65,8 @@ class Tab(QtWidgets.QWidget):
# we want a QGraphicsView widget doing all the heavy lifting
# instead of a QTextBrowser
if self.are_we_doing_images_only: # Boolean
self.contentView = PliantQGraphicsView(self.window(), self)
self.contentView = PliantQGraphicsView(
self.metadata['path'], self.window(), self)
self.contentView.loadImage(chapter_content)
else:
self.contentView = PliantQTextBrowser(self.window(), self)
@@ -465,78 +470,57 @@ class Tab(QtWidgets.QWidget):
class PliantQGraphicsView(QtWidgets.QGraphicsView):
def __init__(self, main_window, parent=None):
def __init__(self, filepath, main_window, parent=None):
super(PliantQGraphicsView, self).__init__(parent)
self.main_window = main_window
self.parent = parent
self.qimage = None # Will be needed to resize pdf
self.image_pixmap = None
self.ignore_wheel_event = False
self.ignore_wheel_event_number = 0
self.setDragMode(QtWidgets.QGraphicsView.ScrollHandDrag)
self.viewport().setCursor(QtCore.Qt.ArrowCursor)
self.filepath = filepath
self.filetype = os.path.splitext(self.filepath)[1][1:]
if self.filetype == 'cbz':
self.book = zipfile.ZipFile(self.filepath)
elif self.filetype == 'cbr':
self.book = rarfile.RarFile(self.filepath)
elif self.filetype == 'pdf':
self.book = popplerqt5.Poppler.Document.load(self.filepath)
self.book.setRenderHint(
popplerqt5.Poppler.Document.Antialiasing
and popplerqt5.Poppler.Document.TextAntialiasing)
self.common_functions = PliantWidgetsCommonFunctions(
self, self.main_window)
self.setMouseTracking(True)
self.image_cache = [None for _ in range(4)]
def loadImage(self, current_image):
# TODO
# For double page view: 1 before, 1 after
# Image panning with mouse
self.ignore_wheel_event = False
self.ignore_wheel_event_number = 0
self.setMouseTracking(True)
self.setDragMode(QtWidgets.QGraphicsView.ScrollHandDrag)
self.viewport().setCursor(QtCore.Qt.ArrowCursor)
content = self.parent.metadata['content']
image_paths = [i[1] for i in content]
def loadImage(self, current_page):
# TODO
# Threaded caching will still work here
# Look at a commit where it's not been deleted
# For double page view: 1 before, 1 after
def generate_image_cache(current_image):
print('Building image cache')
current_image_index = image_paths.index(current_image)
self.image_pixmap = QtGui.QPixmap()
for i in (-1, 0, 1, 2):
try:
this_path = image_paths[current_image_index + i]
this_pixmap = QtGui.QPixmap()
this_pixmap.load(this_path)
self.image_cache[i + 1] = (this_path, this_pixmap)
except IndexError:
self.image_cache[i + 1] = None
if self.filetype in ('cbz', 'cbr'):
page_data = self.book.read(current_page)
self.image_pixmap.loadFromData(page_data)
def refill_cache(remove_value):
remove_index = self.image_cache.index(remove_value)
refill_pixmap = QtGui.QPixmap()
if self.filetype == 'pdf':
page_data = self.book.page(current_page)
page_qimage = page_data.renderToImage(350, 350)
self.image_pixmap.convertFromImage(page_qimage)
if remove_index == 1:
first_path = self.image_cache[0][0]
self.image_cache.pop(3)
previous_path = image_paths[image_paths.index(first_path) - 1]
refill_pixmap.load(previous_path)
self.image_cache.insert(0, (previous_path, refill_pixmap))
else:
self.image_cache[0] = self.image_cache[1]
self.image_cache.pop(1)
try:
last_path = self.image_cache[2][0]
next_path = image_paths[image_paths.index(last_path) + 1]
refill_pixmap.load(next_path)
self.image_cache.append((next_path, refill_pixmap))
except (IndexError, TypeError):
self.image_cache.append(None)
def check_cache(current_image):
for i in self.image_cache:
if i:
if i[0] == current_image:
return_pixmap = i[1]
refill_cache(i)
return return_pixmap
# No return happened so the image isn't in the cache
generate_image_cache(current_image)
return_pixmap = None
while not return_pixmap:
return_pixmap = check_cache(current_image)
self.image_pixmap = return_pixmap
self.resizeEvent()
def resizeEvent(self, *args):

View File

@@ -1,106 +0,0 @@
#!/usr/bin/env python3
# This file is a part of Lector, a Qt based ebook reader
# Copyright (C) 2017 BasioMeusPuga
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# TODO
# Account for files with passwords
import os
import time
import collections
from rarfile import rarfile
class ParseCBR:
def __init__(self, filename, temp_dir, file_md5):
self.filename = filename
self.book = None
self.temp_dir = temp_dir
self.file_md5 = file_md5
def read_book(self):
try:
self.book = rarfile.RarFile(self.filename)
except: # Specifying no exception types might be warranted here
print('Cannot parse ' + self.filename)
return
def get_title(self):
filename = os.path.basename(self.filename)
filename_proper = os.path.splitext(filename)[0]
return filename_proper
def get_author(self):
return None
def get_year(self):
creation_time = time.ctime(os.path.getctime(self.filename))
creation_year = creation_time.split()[-1]
return creation_year
def get_cover_image(self):
# The first image in the archive may not be the cover
# It is implied, however, that the first image in order
# will be the cover
image_list = [i.filename for i in self.book.infolist() if not i.isdir()]
image_list.sort()
cover_image_filename = image_list[0]
for i in self.book.infolist():
if not i.isdir():
if i.filename == cover_image_filename:
cover_image = self.book.read(i)
return cover_image
def get_isbn(self):
return
def get_tags(self):
return
def get_contents(self):
file_settings = {
'images_only': True}
extract_path = os.path.join(self.temp_dir, self.file_md5)
contents = []
# I'm currently choosing not to keep multiple files in memory
self.book.extractall(extract_path)
found_images = []
for i in os.walk(extract_path):
if i[2]: # Implies files were found
image_dir = i[0]
add_path_to_file = [
os.path.join(image_dir, j) for j in i[2]]
found_images.extend(add_path_to_file)
if not found_images:
print('Found nothing in ' + self.filename)
return None, file_settings
found_images.sort()
for count, i in enumerate(found_images):
page_name = 'Page ' + str(count + 1)
image_path = os.path.join(extract_path, i)
contents.append((page_name, image_path))
return contents, file_settings

View File

@@ -1,109 +0,0 @@
#!/usr/bin/env python3
# This file is a part of Lector, a Qt based ebook reader
# Copyright (C) 2017 BasioMeusPuga
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# TODO
# Account for files with passwords
import os
import time
import zipfile
import collections
class ParseCBZ:
def __init__(self, filename, temp_dir, file_md5):
self.filename = filename
self.book = None
self.temp_dir = temp_dir
self.file_md5 = file_md5
def read_book(self):
try:
self.book = zipfile.ZipFile(self.filename, mode='r', allowZip64=True)
except FileNotFoundError:
print('Invalid path for ' + self.filename)
return
except (KeyError, AttributeError, zipfile.BadZipFile):
print('Cannot parse ' + self.filename)
return
def get_title(self):
filename = os.path.basename(self.book.filename)
filename_proper = os.path.splitext(filename)[0]
return filename_proper
def get_author(self):
return None
def get_year(self):
creation_time = time.ctime(os.path.getctime(self.filename))
creation_year = creation_time.split()[-1]
return creation_year
def get_cover_image(self):
# The first image in the archive may not be the cover
# It is implied, however, that the first image in order
# will be the cover
image_list = [i.filename for i in self.book.infolist() if not i.is_dir()]
image_list.sort()
cover_image_filename = image_list[0]
for i in self.book.infolist():
if not i.is_dir():
if i.filename == cover_image_filename:
cover_image = self.book.read(i)
return cover_image
def get_isbn(self):
return
def get_tags(self):
return
def get_contents(self):
file_settings = {
'images_only': True}
extract_path = os.path.join(self.temp_dir, self.file_md5)
contents = []
# I'm currently choosing not to keep multiple files in memory
self.book.extractall(extract_path)
found_images = []
for i in os.walk(extract_path):
if i[2]: # Implies files were found
image_dir = i[0]
add_path_to_file = [
os.path.join(image_dir, j) for j in i[2]]
found_images.extend(add_path_to_file)
if not found_images:
print('Found nothing in ' + self.filename)
return None, file_settings
found_images.sort()
for count, i in enumerate(found_images):
page_name = 'Page ' + str(count + 1)
image_path = os.path.join(extract_path, i)
contents.append((page_name, image_path))
return contents, file_settings

78
parsers/comicbooks.py Normal file
View File

@@ -0,0 +1,78 @@
#!/usr/bin/env python3
# This file is a part of Lector, a Qt based ebook reader
# Copyright (C) 2017-18 BasioMeusPuga
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# TODO
# Account for files with passwords
import os
import time
import zipfile
from rarfile import rarfile
class ParseCOMIC:
def __init__(self, filename, *args):
self.filename = filename
self.book = None
self.image_list = None
self.book_extension = os.path.splitext(self.filename)
def read_book(self):
try:
if self.book_extension[1] == '.cbz':
self.book = zipfile.ZipFile(
self.filename, mode='r', allowZip64=True)
self.image_list = [i.filename for i in self.book.infolist() if not i.is_dir()]
elif self.book_extension[1] == '.cbr':
self.book = rarfile.RarFile(self.filename)
self.image_list = [i.filename for i in self.book.infolist() if not i.isdir()]
self.image_list.sort()
except: # Specifying no exception here is warranted
print('Cannot parse ' + self.filename)
return
def get_title(self):
return self.book_extension[0]
def get_author(self):
return None
def get_year(self):
creation_time = time.ctime(os.path.getctime(self.filename))
creation_year = creation_time.split()[-1]
return creation_year
def get_cover_image(self):
# The first image in the archive may not be the cover
# It is implied, however, that the first image in order
# will be the cover
return self.book.read(self.image_list[0])
def get_isbn(self):
return None
def get_tags(self):
return None
def get_contents(self):
file_settings = {'images_only': True}
contents = [(f'Page {count + 1}', i) for count, i in enumerate(self.image_list)]
return contents, file_settings

View File

@@ -28,9 +28,8 @@ class ParseEPUB:
# Maybe also include book description
self.book_ref = None
self.book = None
self.temp_dir = temp_dir
self.filename = filename
self.file_md5 = file_md5
self.extract_path = os.path.join(temp_dir, file_md5)
def read_book(self):
self.book_ref = EPUB(self.filename)
@@ -59,10 +58,9 @@ class ParseEPUB:
return self.book['tags']
def get_contents(self):
extract_path = os.path.join(self.temp_dir, self.file_md5)
zipfile.ZipFile(self.filename).extractall(extract_path)
zipfile.ZipFile(self.filename).extractall(self.extract_path)
self.book_ref.parse_chapters(temp_dir=self.temp_dir)
self.book_ref.parse_chapters(temp_dir=self.extract_path)
file_settings = {
'images_only': False}
return self.book['book_list'], file_settings

104
parsers/pdf.py Normal file
View File

@@ -0,0 +1,104 @@
#!/usr/bin/env python3
# This file is a part of Lector, a Qt based ebook reader
# Copyright (C) 2018 BasioMeusPuga
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import io
from PyQt5 import QtCore
from bs4 import BeautifulSoup
proceed = True
try:
import popplerqt5
except ImportError:
print('python-poppler-qt5 is not installed. Pdf files will not work.')
proceed = False
class ParsePDF:
def __init__(self, filename, *args):
self.filename = filename
self.book = None
self.metadata = None
def read_book(self):
if not proceed:
return
self.book = popplerqt5.Poppler.Document.load(self.filename)
if not self.book:
return
self.metadata = BeautifulSoup(self.book.metadata(), 'xml')
def get_title(self):
try:
title = self.metadata.find('title').text
return title.replace('\n', '')
except AttributeError:
return 'Unknown'
def get_author(self):
try:
author = self.metadata.find('creator').text
return author.replace('\n', '')
except AttributeError:
return 'Unknown'
def get_year(self):
try:
year = self.metadata.find('MetadataDate').text
return year.replace('\n', '')
except AttributeError:
return 9999
def get_cover_image(self):
self.book.setRenderHint(
popplerqt5.Poppler.Document.Antialiasing
and popplerqt5.Poppler.Document.TextAntialiasing)
cover_page = self.book.page(0)
cover_image = cover_page.renderToImage(300, 300)
return resize_image(cover_image)
def get_isbn(self):
return None
def get_tags(self):
try:
tags = self.metadata.find('Keywords').text
return tags.replace('\n', '')
except AttributeError:
return None
def get_contents(self):
file_settings = {'images_only': True}
contents = [(f'Page {i + 1}', i) for i in range(self.book.numPages())]
return contents, file_settings
def resize_image(cover_image):
cover_image = cover_image.scaled(
420, 600, QtCore.Qt.IgnoreAspectRatio)
byte_array = QtCore.QByteArray()
buffer = QtCore.QBuffer(byte_array)
buffer.open(QtCore.QIODevice.WriteOnly)
cover_image.save(buffer, 'jpg', 75)
cover_image_final = io.BytesIO(byte_array)
cover_image_final.seek(0)
return cover_image_final.getvalue()

View File

@@ -5,8 +5,8 @@ from setuptools import setup, find_packages
HERE = path.abspath(path.dirname(__file__))
MAJOR_VERSION = '0'
MINOR_VERSION = '1'
MICRO_VERSION = '2'
MINOR_VERSION = '2'
MICRO_VERSION = '0'
VERSION = "{}.{}.{}".format(MAJOR_VERSION, MINOR_VERSION, MICRO_VERSION)
# Get the long description from the README file
@@ -15,7 +15,8 @@ with codecs.open(path.join(HERE, 'README.md'), encoding='utf-8') as f:
INSTALL_DEPS = ['PyQt5>=5.10.1',
'requests>=2.18.4',
'beautifulsoup4>=4.6.0']
'beautifulsoup4>=4.6.0',
'python-poppler-qt5>=0.24.2']
TEST_DEPS = ['pytest',
'unittest2']
DEV_DEPS = []
@@ -46,7 +47,7 @@ setup(
],
# What does your project relate to?
keywords='qt ebook epub kindle mobi',
keywords='qt ebook epub kindle mobi comic cbz cbr pdf',
packages=find_packages(),