Cleanup parsers
This commit is contained in:
@@ -21,6 +21,7 @@ import os
|
||||
import time
|
||||
import logging
|
||||
import zipfile
|
||||
import collections
|
||||
|
||||
from lector.rarfile import rarfile
|
||||
|
||||
@@ -35,54 +36,36 @@ class ParseCOMIC:
|
||||
self.book_extension = os.path.splitext(self.filename)
|
||||
|
||||
def read_book(self):
|
||||
try:
|
||||
if self.book_extension[1] == '.cbz':
|
||||
self.book = zipfile.ZipFile(
|
||||
self.filename, mode='r', allowZip64=True)
|
||||
self.image_list = [
|
||||
i.filename for i in self.book.infolist()
|
||||
if not i.is_dir() and is_image(i.filename)]
|
||||
if self.book_extension[1] == '.cbz':
|
||||
self.book = zipfile.ZipFile(
|
||||
self.filename, mode='r', allowZip64=True)
|
||||
self.image_list = [
|
||||
i.filename for i in self.book.infolist()
|
||||
if not i.is_dir() and is_image(i.filename)]
|
||||
|
||||
elif self.book_extension[1] == '.cbr':
|
||||
self.book = rarfile.RarFile(self.filename)
|
||||
self.image_list = [
|
||||
i.filename for i in self.book.infolist()
|
||||
if not i.isdir() and is_image(i.filename)]
|
||||
elif self.book_extension[1] == '.cbr':
|
||||
self.book = rarfile.RarFile(self.filename)
|
||||
self.image_list = [
|
||||
i.filename for i in self.book.infolist()
|
||||
if not i.isdir() and is_image(i.filename)]
|
||||
|
||||
self.image_list.sort()
|
||||
if not self.image_list:
|
||||
return False
|
||||
self.image_list.sort()
|
||||
|
||||
return True
|
||||
|
||||
except: # Specifying no exception here is warranted
|
||||
return False
|
||||
|
||||
def get_title(self):
|
||||
def generate_metadata(self):
|
||||
title = os.path.basename(self.book_extension[0]).strip(' ')
|
||||
return title
|
||||
author = '<Unknown>'
|
||||
isbn = None
|
||||
tags = []
|
||||
cover = self.book.read(self.image_list[0])
|
||||
|
||||
def get_author(self):
|
||||
return 'Unknown'
|
||||
|
||||
def get_year(self):
|
||||
creation_time = time.ctime(os.path.getctime(self.filename))
|
||||
creation_year = creation_time.split()[-1]
|
||||
return creation_year
|
||||
year = creation_time.split()[-1]
|
||||
|
||||
def get_cover_image(self):
|
||||
# The first image in the archive may not be the cover
|
||||
# It is implied, however, that the first image in order
|
||||
# will be the cover
|
||||
return self.book.read(self.image_list[0])
|
||||
Metadata = collections.namedtuple(
|
||||
'Metadata', ['title', 'author', 'year', 'isbn', 'tags', 'cover'])
|
||||
return Metadata(title, author, year, isbn, tags, cover)
|
||||
|
||||
def get_isbn(self):
|
||||
return None
|
||||
|
||||
def get_tags(self):
|
||||
return None
|
||||
|
||||
def get_contents(self):
|
||||
def generate_content(self):
|
||||
image_number = len(self.image_list)
|
||||
toc = [(1, f'Page {i + 1}', i + 1) for i in range(image_number)]
|
||||
|
||||
|
@@ -14,6 +14,9 @@
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
# TODO
|
||||
# Maybe also include book description
|
||||
|
||||
import os
|
||||
import zipfile
|
||||
import logging
|
||||
@@ -25,47 +28,27 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
class ParseEPUB:
|
||||
def __init__(self, filename, temp_dir, file_md5):
|
||||
# TODO
|
||||
# Maybe also include book description
|
||||
self.book_ref = None
|
||||
self.book = None
|
||||
self.temp_dir = temp_dir
|
||||
self.filename = filename
|
||||
self.temp_dir = temp_dir
|
||||
self.extract_path = os.path.join(temp_dir, file_md5)
|
||||
|
||||
def read_book(self):
|
||||
self.book_ref = EPUB(self.filename, self.temp_dir)
|
||||
self.book_ref.generate_metadata()
|
||||
self.book = self.book_ref.book
|
||||
return True
|
||||
self.book = EPUB(self.filename, self.temp_dir)
|
||||
|
||||
def get_title(self):
|
||||
return self.book['title']
|
||||
def generate_metadata(self):
|
||||
self.book.generate_metadata()
|
||||
return self.book.metadata
|
||||
|
||||
def get_author(self):
|
||||
return self.book['author']
|
||||
|
||||
def get_year(self):
|
||||
return self.book['year']
|
||||
|
||||
def get_cover_image(self):
|
||||
return self.book['cover']
|
||||
|
||||
def get_isbn(self):
|
||||
return self.book['isbn']
|
||||
|
||||
def get_tags(self):
|
||||
return self.book['tags']
|
||||
|
||||
def get_contents(self):
|
||||
def generate_content(self):
|
||||
zipfile.ZipFile(self.filename).extractall(self.extract_path)
|
||||
|
||||
self.book_ref.generate_toc()
|
||||
self.book_ref.generate_content()
|
||||
self.book.generate_toc()
|
||||
self.book.generate_content()
|
||||
|
||||
toc = []
|
||||
content = []
|
||||
for count, i in enumerate(self.book['content']):
|
||||
for count, i in enumerate(self.book.content):
|
||||
toc.append((i[0], i[1], count + 1))
|
||||
content.append(i[2])
|
||||
|
||||
|
@@ -14,6 +14,9 @@
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
# TODO
|
||||
# Maybe also include book description
|
||||
|
||||
import os
|
||||
import logging
|
||||
|
||||
@@ -24,46 +27,24 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
class ParseFB2:
|
||||
def __init__(self, filename, temp_dir, file_md5):
|
||||
# TODO
|
||||
# Maybe also include book description
|
||||
self.book_ref = None
|
||||
self.book = None
|
||||
self.filename = filename
|
||||
self.extract_path = os.path.join(temp_dir, file_md5)
|
||||
|
||||
def read_book(self):
|
||||
self.book_ref = FB2(self.filename)
|
||||
contents_found = self.book_ref.read_fb2()
|
||||
if not contents_found:
|
||||
return False
|
||||
self.book = self.book_ref.book
|
||||
return True
|
||||
self.book = FB2(self.filename)
|
||||
|
||||
def get_title(self):
|
||||
return self.book['title']
|
||||
def generate_metadata(self):
|
||||
self.book.generate_metadata()
|
||||
return self.book.metadata
|
||||
|
||||
def get_author(self):
|
||||
return self.book['author']
|
||||
|
||||
def get_year(self):
|
||||
return self.book['year']
|
||||
|
||||
def get_cover_image(self):
|
||||
return self.book['cover']
|
||||
|
||||
def get_isbn(self):
|
||||
return self.book['isbn']
|
||||
|
||||
def get_tags(self):
|
||||
return self.book['tags']
|
||||
|
||||
def get_contents(self):
|
||||
def generate_content(self):
|
||||
os.makedirs(self.extract_path, exist_ok=True) # Manual creation is required here
|
||||
self.book_ref.parse_chapters(temp_dir=self.extract_path)
|
||||
self.book.generate_content(temp_dir=self.extract_path)
|
||||
|
||||
toc = []
|
||||
content = []
|
||||
for count, i in enumerate(self.book['book_list']):
|
||||
for count, i in enumerate(self.book.content):
|
||||
toc.append((i[0], i[1], count + 1))
|
||||
content.append(i[2])
|
||||
|
||||
|
@@ -14,11 +14,8 @@
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
# TODO
|
||||
# Error handling
|
||||
# TOC parsing
|
||||
|
||||
import os
|
||||
import collections
|
||||
|
||||
import fitz
|
||||
from PyQt5 import QtGui
|
||||
@@ -36,43 +33,39 @@ class ParsePDF:
|
||||
except RuntimeError:
|
||||
return False
|
||||
|
||||
def get_title(self):
|
||||
def generate_metadata(self):
|
||||
title = self.book.metadata['title']
|
||||
if not title:
|
||||
title = os.path.splitext(os.path.basename(self.filename))[0]
|
||||
return title
|
||||
|
||||
def get_author(self):
|
||||
author = self.book.metadata['author']
|
||||
if not author:
|
||||
author = 'Unknown'
|
||||
return author
|
||||
|
||||
def get_year(self):
|
||||
creation_date = self.book.metadata['creationDate']
|
||||
try:
|
||||
year = creation_date.split(':')[1][:4]
|
||||
except (ValueError, AttributeError):
|
||||
year = 9999
|
||||
return year
|
||||
|
||||
def get_cover_image(self):
|
||||
isbn = None
|
||||
|
||||
tags = self.book.metadata['keywords']
|
||||
if not tags:
|
||||
tags = []
|
||||
|
||||
# This is a little roundabout for the cover
|
||||
# and I'm sure it's taking a performance hit
|
||||
# But it is simple. So there's that.
|
||||
cover_page = self.book.loadPage(0)
|
||||
|
||||
# Disabling scaling gets the covers much faster
|
||||
return render_pdf_page(cover_page, True)
|
||||
cover = render_pdf_page(cover_page, True)
|
||||
|
||||
def get_isbn(self):
|
||||
return None
|
||||
Metadata = collections.namedtuple(
|
||||
'Metadata', ['title', 'author', 'year', 'isbn', 'tags', 'cover'])
|
||||
return Metadata(title, author, year, isbn, tags, cover)
|
||||
|
||||
def get_tags(self):
|
||||
tags = self.book.metadata['keywords']
|
||||
return tags # Fine if it returns None
|
||||
|
||||
def get_contents(self):
|
||||
def generate_content(self):
|
||||
content = list(range(self.book.pageCount))
|
||||
toc = self.book.getToC()
|
||||
if not toc:
|
||||
|
Reference in New Issue
Block a user