maintenance: use modules properly
This commit is contained in:
0
parsers/__init__.py
Normal file
0
parsers/__init__.py
Normal file
106
parsers/cbr.py
Normal file
106
parsers/cbr.py
Normal file
@@ -0,0 +1,106 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# This file is a part of Lector, a Qt based ebook reader
|
||||
# Copyright (C) 2017 BasioMeusPuga
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
# TODO
|
||||
# Account for files with passwords
|
||||
|
||||
import os
|
||||
import time
|
||||
import collections
|
||||
from rarfile import rarfile
|
||||
|
||||
|
||||
class ParseCBR:
|
||||
def __init__(self, filename, temp_dir, file_md5):
|
||||
self.filename = filename
|
||||
self.book = None
|
||||
self.temp_dir = temp_dir
|
||||
self.file_md5 = file_md5
|
||||
|
||||
def read_book(self):
|
||||
try:
|
||||
self.book = rarfile.RarFile(self.filename)
|
||||
except: # Specifying no exception types might be warranted here
|
||||
print('Cannot parse ' + self.filename)
|
||||
return
|
||||
|
||||
def get_title(self):
|
||||
filename = os.path.basename(self.filename)
|
||||
filename_proper = os.path.splitext(filename)[0]
|
||||
return filename_proper
|
||||
|
||||
def get_author(self):
|
||||
return None
|
||||
|
||||
def get_year(self):
|
||||
creation_time = time.ctime(os.path.getctime(self.filename))
|
||||
creation_year = creation_time.split()[-1]
|
||||
return creation_year
|
||||
|
||||
def get_cover_image(self):
|
||||
# The first image in the archive may not be the cover
|
||||
# It is implied, however, that the first image in order
|
||||
# will be the cover
|
||||
|
||||
image_list = [i.filename for i in self.book.infolist() if not i.isdir()]
|
||||
image_list.sort()
|
||||
cover_image_filename = image_list[0]
|
||||
|
||||
for i in self.book.infolist():
|
||||
if not i.isdir():
|
||||
if i.filename == cover_image_filename:
|
||||
cover_image = self.book.read(i)
|
||||
return cover_image
|
||||
|
||||
def get_isbn(self):
|
||||
return
|
||||
|
||||
def get_tags(self):
|
||||
return
|
||||
|
||||
def get_contents(self):
|
||||
file_settings = {
|
||||
'images_only': True}
|
||||
|
||||
extract_path = os.path.join(self.temp_dir, self.file_md5)
|
||||
contents = []
|
||||
|
||||
# I'm currently choosing not to keep multiple files in memory
|
||||
self.book.extractall(extract_path)
|
||||
|
||||
found_images = []
|
||||
for i in os.walk(extract_path):
|
||||
if i[2]: # Implies files were found
|
||||
image_dir = i[0]
|
||||
add_path_to_file = [
|
||||
os.path.join(image_dir, j) for j in i[2]]
|
||||
found_images.extend(add_path_to_file)
|
||||
|
||||
if not found_images:
|
||||
print('Found nothing in ' + self.filename)
|
||||
return None, file_settings
|
||||
|
||||
found_images.sort()
|
||||
|
||||
for count, i in enumerate(found_images):
|
||||
page_name = 'Page ' + str(count + 1)
|
||||
image_path = os.path.join(extract_path, i)
|
||||
|
||||
contents.append((page_name, image_path))
|
||||
|
||||
return contents, file_settings
|
109
parsers/cbz.py
Normal file
109
parsers/cbz.py
Normal file
@@ -0,0 +1,109 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# This file is a part of Lector, a Qt based ebook reader
|
||||
# Copyright (C) 2017 BasioMeusPuga
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
# TODO
|
||||
# Account for files with passwords
|
||||
|
||||
import os
|
||||
import time
|
||||
import zipfile
|
||||
import collections
|
||||
|
||||
|
||||
class ParseCBZ:
|
||||
def __init__(self, filename, temp_dir, file_md5):
|
||||
self.filename = filename
|
||||
self.book = None
|
||||
self.temp_dir = temp_dir
|
||||
self.file_md5 = file_md5
|
||||
|
||||
def read_book(self):
|
||||
try:
|
||||
self.book = zipfile.ZipFile(self.filename, mode='r', allowZip64=True)
|
||||
except FileNotFoundError:
|
||||
print('Invalid path for ' + self.filename)
|
||||
return
|
||||
except (KeyError, AttributeError, zipfile.BadZipFile):
|
||||
print('Cannot parse ' + self.filename)
|
||||
return
|
||||
|
||||
def get_title(self):
|
||||
filename = os.path.basename(self.book.filename)
|
||||
filename_proper = os.path.splitext(filename)[0]
|
||||
return filename_proper
|
||||
|
||||
def get_author(self):
|
||||
return None
|
||||
|
||||
def get_year(self):
|
||||
creation_time = time.ctime(os.path.getctime(self.filename))
|
||||
creation_year = creation_time.split()[-1]
|
||||
return creation_year
|
||||
|
||||
def get_cover_image(self):
|
||||
# The first image in the archive may not be the cover
|
||||
# It is implied, however, that the first image in order
|
||||
# will be the cover
|
||||
|
||||
image_list = [i.filename for i in self.book.infolist() if not i.is_dir()]
|
||||
image_list.sort()
|
||||
cover_image_filename = image_list[0]
|
||||
|
||||
for i in self.book.infolist():
|
||||
if not i.is_dir():
|
||||
if i.filename == cover_image_filename:
|
||||
cover_image = self.book.read(i)
|
||||
return cover_image
|
||||
|
||||
def get_isbn(self):
|
||||
return
|
||||
|
||||
def get_tags(self):
|
||||
return
|
||||
|
||||
def get_contents(self):
|
||||
file_settings = {
|
||||
'images_only': True}
|
||||
|
||||
extract_path = os.path.join(self.temp_dir, self.file_md5)
|
||||
contents = []
|
||||
|
||||
# I'm currently choosing not to keep multiple files in memory
|
||||
self.book.extractall(extract_path)
|
||||
|
||||
found_images = []
|
||||
for i in os.walk(extract_path):
|
||||
if i[2]: # Implies files were found
|
||||
image_dir = i[0]
|
||||
add_path_to_file = [
|
||||
os.path.join(image_dir, j) for j in i[2]]
|
||||
found_images.extend(add_path_to_file)
|
||||
|
||||
if not found_images:
|
||||
print('Found nothing in ' + self.filename)
|
||||
return None, file_settings
|
||||
|
||||
found_images.sort()
|
||||
|
||||
for count, i in enumerate(found_images):
|
||||
page_name = 'Page ' + str(count + 1)
|
||||
image_path = os.path.join(extract_path, i)
|
||||
|
||||
contents.append((page_name, image_path))
|
||||
|
||||
return contents, file_settings
|
68
parsers/epub.py
Normal file
68
parsers/epub.py
Normal file
@@ -0,0 +1,68 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# This file is a part of Lector, a Qt based ebook reader
|
||||
# Copyright (C) 2017 BasioMeusPuga
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import os
|
||||
import zipfile
|
||||
|
||||
from ePub.read_epub import EPUB
|
||||
|
||||
|
||||
class ParseEPUB:
|
||||
def __init__(self, filename, temp_dir, file_md5):
|
||||
# TODO
|
||||
# Maybe also include book description
|
||||
self.book_ref = None
|
||||
self.book = None
|
||||
self.temp_dir = temp_dir
|
||||
self.filename = filename
|
||||
self.file_md5 = file_md5
|
||||
|
||||
def read_book(self):
|
||||
self.book_ref = EPUB(self.filename)
|
||||
contents_found = self.book_ref.read_epub()
|
||||
if not contents_found:
|
||||
print('Cannot process: ' + self.filename)
|
||||
return
|
||||
self.book = self.book_ref.book
|
||||
|
||||
def get_title(self):
|
||||
return self.book['title']
|
||||
|
||||
def get_author(self):
|
||||
return self.book['author']
|
||||
|
||||
def get_year(self):
|
||||
return self.book['year']
|
||||
|
||||
def get_cover_image(self):
|
||||
return self.book['cover']
|
||||
|
||||
def get_isbn(self):
|
||||
return self.book['isbn']
|
||||
|
||||
def get_tags(self):
|
||||
return self.book['tags']
|
||||
|
||||
def get_contents(self):
|
||||
extract_path = os.path.join(self.temp_dir, self.file_md5)
|
||||
zipfile.ZipFile(self.filename).extractall(extract_path)
|
||||
|
||||
self.book_ref.parse_chapters(temp_dir=self.temp_dir)
|
||||
file_settings = {
|
||||
'images_only': False}
|
||||
return self.book['book_list'], file_settings
|
98
parsers/mobi.py
Normal file
98
parsers/mobi.py
Normal file
@@ -0,0 +1,98 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# This file is a part of Lector, a Qt based ebook reader
|
||||
# Copyright (C) 2017 BasioMeusPuga
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
# This module parses Amazon ebooks using KindleUnpack to first create an
|
||||
# epub that is then read the usual way
|
||||
|
||||
import os
|
||||
import sys
|
||||
import shutil
|
||||
import zipfile
|
||||
|
||||
from ePub.read_epub import EPUB
|
||||
import KindleUnpack.kindleunpack as KindleUnpack
|
||||
|
||||
|
||||
class ParseMOBI:
|
||||
def __init__(self, filename, temp_dir, file_md5):
|
||||
self.book_ref = None
|
||||
self.book = None
|
||||
self.filename = filename
|
||||
self.epub_filepath = None
|
||||
self.split_large_xml = False
|
||||
self.temp_dir = temp_dir
|
||||
self.extract_dir = os.path.join(temp_dir, file_md5)
|
||||
|
||||
def read_book(self):
|
||||
with HidePrinting():
|
||||
KindleUnpack.unpackBook(self.filename, self.extract_dir)
|
||||
|
||||
epub_filename = os.path.splitext(
|
||||
os.path.basename(self.filename))[0] + '.epub'
|
||||
|
||||
self.epub_filepath = os.path.join(
|
||||
self.extract_dir, 'mobi8', epub_filename)
|
||||
if not os.path.exists(self.epub_filepath):
|
||||
zip_dir = os.path.join(self.extract_dir, 'mobi7')
|
||||
zip_file = os.path.join(
|
||||
self.extract_dir, epub_filename)
|
||||
self.epub_filepath = shutil.make_archive(zip_file, 'zip', zip_dir)
|
||||
self.split_large_xml = True
|
||||
|
||||
self.book_ref = EPUB(self.epub_filepath)
|
||||
contents_found = self.book_ref.read_epub()
|
||||
if not contents_found:
|
||||
print('Cannot process: ' + self.filename)
|
||||
return
|
||||
self.book = self.book_ref.book
|
||||
|
||||
def get_title(self):
|
||||
return self.book['title']
|
||||
|
||||
def get_author(self):
|
||||
return self.book['author']
|
||||
|
||||
def get_year(self):
|
||||
return self.book['year']
|
||||
|
||||
def get_cover_image(self):
|
||||
return self.book['cover']
|
||||
|
||||
def get_isbn(self):
|
||||
return self.book['isbn']
|
||||
|
||||
def get_tags(self):
|
||||
return self.book['tags']
|
||||
|
||||
def get_contents(self):
|
||||
extract_path = os.path.join(self.extract_dir)
|
||||
zipfile.ZipFile(self.epub_filepath).extractall(extract_path)
|
||||
|
||||
self.book_ref.parse_chapters(
|
||||
temp_dir=self.temp_dir, split_large_xml=self.split_large_xml)
|
||||
file_settings = {
|
||||
'images_only': False}
|
||||
return self.book['book_list'], file_settings
|
||||
|
||||
class HidePrinting:
|
||||
def __enter__(self):
|
||||
self._original_stdout = sys.stdout
|
||||
sys.stdout = None
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
sys.stdout = self._original_stdout
|
Reference in New Issue
Block a user