maintenance: use modules properly

2018-03-10 19:09:19 +00:00
parent 79180885b5
commit 1cd6ff6b58
60 changed files with 26 additions and 27 deletions
--- a/parsers/init.py
+++ b/parsers/init.py
--- a/parsers/cbr.py
+++ b/parsers/cbr.py
@@ -0,0 +1,106 @@
+#!/usr/bin/env python3
+
+# This file is a part of Lector, a Qt based ebook reader
+# Copyright (C) 2017 BasioMeusPuga
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+# TODO
+# Account for files with passwords
+
+import os
+import time
+import collections
+from rarfile import rarfile
+
+
+class ParseCBR:
+    def __init__(self, filename, temp_dir, file_md5):
+        self.filename = filename
+        self.book = None
+        self.temp_dir = temp_dir
+        self.file_md5 = file_md5
+
+    def read_book(self):
+        try:
+            self.book = rarfile.RarFile(self.filename)
+        except:  # Specifying no exception types might be warranted here
+            print('Cannot parse ' + self.filename)
+            return
+
+    def get_title(self):
+        filename = os.path.basename(self.filename)
+        filename_proper = os.path.splitext(filename)[0]
+        return filename_proper
+
+    def get_author(self):
+        return None
+
+    def get_year(self):
+        creation_time = time.ctime(os.path.getctime(self.filename))
+        creation_year = creation_time.split()[-1]
+        return creation_year
+
+    def get_cover_image(self):
+        # The first image in the archive may not be the cover
+        # It is implied, however, that the first image in order
+        # will be the cover
+
+        image_list = [i.filename for i in self.book.infolist() if not i.isdir()]
+        image_list.sort()
+        cover_image_filename = image_list[0]
+
+        for i in self.book.infolist():
+            if not i.isdir():
+                if i.filename == cover_image_filename:
+                    cover_image = self.book.read(i)
+                    return cover_image
+
+    def get_isbn(self):
+        return
+
+    def get_tags(self):
+        return
+
+    def get_contents(self):
+        file_settings = {
+            'images_only': True}
+
+        extract_path = os.path.join(self.temp_dir, self.file_md5)
+        contents = []
+
+        # I'm currently choosing not to keep multiple files in memory
+        self.book.extractall(extract_path)
+
+        found_images = []
+        for i in os.walk(extract_path):
+            if i[2]:  # Implies files were found
+                image_dir = i[0]
+                add_path_to_file = [
+                    os.path.join(image_dir, j) for j in i[2]]
+                found_images.extend(add_path_to_file)
+
+        if not found_images:
+            print('Found nothing in ' + self.filename)
+            return None, file_settings
+
+        found_images.sort()
+
+        for count, i in enumerate(found_images):
+            page_name = 'Page ' + str(count + 1)
+            image_path = os.path.join(extract_path, i)
+
+            contents.append((page_name, image_path))
+
+        return contents, file_settings
--- a/parsers/cbz.py
+++ b/parsers/cbz.py
@@ -0,0 +1,109 @@
+#!/usr/bin/env python3
+
+# This file is a part of Lector, a Qt based ebook reader
+# Copyright (C) 2017 BasioMeusPuga
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+# TODO
+# Account for files with passwords
+
+import os
+import time
+import zipfile
+import collections
+
+
+class ParseCBZ:
+    def __init__(self, filename, temp_dir, file_md5):
+        self.filename = filename
+        self.book = None
+        self.temp_dir = temp_dir
+        self.file_md5 = file_md5
+
+    def read_book(self):
+        try:
+            self.book = zipfile.ZipFile(self.filename, mode='r', allowZip64=True)
+        except FileNotFoundError:
+            print('Invalid path for ' + self.filename)
+            return
+        except (KeyError, AttributeError, zipfile.BadZipFile):
+            print('Cannot parse ' + self.filename)
+            return
+
+    def get_title(self):
+        filename = os.path.basename(self.book.filename)
+        filename_proper = os.path.splitext(filename)[0]
+        return filename_proper
+
+    def get_author(self):
+        return None
+
+    def get_year(self):
+        creation_time = time.ctime(os.path.getctime(self.filename))
+        creation_year = creation_time.split()[-1]
+        return creation_year
+
+    def get_cover_image(self):
+        # The first image in the archive may not be the cover
+        # It is implied, however, that the first image in order
+        # will be the cover
+
+        image_list = [i.filename for i in self.book.infolist() if not i.is_dir()]
+        image_list.sort()
+        cover_image_filename = image_list[0]
+
+        for i in self.book.infolist():
+            if not i.is_dir():
+                if i.filename == cover_image_filename:
+                    cover_image = self.book.read(i)
+                    return cover_image
+
+    def get_isbn(self):
+        return
+
+    def get_tags(self):
+        return
+
+    def get_contents(self):
+        file_settings = {
+            'images_only': True}
+
+        extract_path = os.path.join(self.temp_dir, self.file_md5)
+        contents = []
+
+        # I'm currently choosing not to keep multiple files in memory
+        self.book.extractall(extract_path)
+
+        found_images = []
+        for i in os.walk(extract_path):
+            if i[2]:  # Implies files were found
+                image_dir = i[0]
+                add_path_to_file = [
+                    os.path.join(image_dir, j) for j in i[2]]
+                found_images.extend(add_path_to_file)
+
+        if not found_images:
+            print('Found nothing in ' + self.filename)
+            return None, file_settings
+
+        found_images.sort()
+
+        for count, i in enumerate(found_images):
+            page_name = 'Page ' + str(count + 1)
+            image_path = os.path.join(extract_path, i)
+
+            contents.append((page_name, image_path))
+
+        return contents, file_settings
--- a/parsers/epub.py
+++ b/parsers/epub.py
@@ -0,0 +1,68 @@
+#!/usr/bin/env python3
+
+# This file is a part of Lector, a Qt based ebook reader
+# Copyright (C) 2017 BasioMeusPuga
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+import os
+import zipfile
+
+from ePub.read_epub import EPUB
+
+
+class ParseEPUB:
+    def __init__(self, filename, temp_dir, file_md5):
+        # TODO
+        # Maybe also include book description
+        self.book_ref = None
+        self.book = None
+        self.temp_dir = temp_dir
+        self.filename = filename
+        self.file_md5 = file_md5
+
+    def read_book(self):
+        self.book_ref = EPUB(self.filename)
+        contents_found = self.book_ref.read_epub()
+        if not contents_found:
+            print('Cannot process: ' + self.filename)
+            return
+        self.book = self.book_ref.book
+
+    def get_title(self):
+        return self.book['title']
+
+    def get_author(self):
+        return self.book['author']
+
+    def get_year(self):
+        return self.book['year']
+
+    def get_cover_image(self):
+        return self.book['cover']
+
+    def get_isbn(self):
+        return self.book['isbn']
+
+    def get_tags(self):
+        return self.book['tags']
+
+    def get_contents(self):
+        extract_path = os.path.join(self.temp_dir, self.file_md5)
+        zipfile.ZipFile(self.filename).extractall(extract_path)
+
+        self.book_ref.parse_chapters(temp_dir=self.temp_dir)
+        file_settings = {
+            'images_only': False}
+        return self.book['book_list'], file_settings
--- a/parsers/mobi.py
+++ b/parsers/mobi.py
@@ -0,0 +1,98 @@
+#!/usr/bin/env python3
+
+# This file is a part of Lector, a Qt based ebook reader
+# Copyright (C) 2017 BasioMeusPuga
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+# This module parses Amazon ebooks using KindleUnpack to first create an
+# epub that is then read the usual way
+
+import os
+import sys
+import shutil
+import zipfile
+
+from ePub.read_epub import EPUB
+import KindleUnpack.kindleunpack as KindleUnpack
+
+
+class ParseMOBI:
+    def __init__(self, filename, temp_dir, file_md5):
+        self.book_ref = None
+        self.book = None
+        self.filename = filename
+        self.epub_filepath = None
+        self.split_large_xml = False
+        self.temp_dir = temp_dir
+        self.extract_dir = os.path.join(temp_dir, file_md5)
+
+    def read_book(self):
+        with HidePrinting():
+            KindleUnpack.unpackBook(self.filename, self.extract_dir)
+
+        epub_filename = os.path.splitext(
+            os.path.basename(self.filename))[0] + '.epub'
+
+        self.epub_filepath = os.path.join(
+            self.extract_dir, 'mobi8', epub_filename)
+        if not os.path.exists(self.epub_filepath):
+            zip_dir = os.path.join(self.extract_dir, 'mobi7')
+            zip_file = os.path.join(
+                self.extract_dir, epub_filename)
+            self.epub_filepath = shutil.make_archive(zip_file, 'zip', zip_dir)
+            self.split_large_xml = True
+
+        self.book_ref = EPUB(self.epub_filepath)
+        contents_found = self.book_ref.read_epub()
+        if not contents_found:
+            print('Cannot process: ' + self.filename)
+            return
+        self.book = self.book_ref.book
+
+    def get_title(self):
+        return self.book['title']
+
+    def get_author(self):
+        return self.book['author']
+
+    def get_year(self):
+        return self.book['year']
+
+    def get_cover_image(self):
+        return self.book['cover']
+
+    def get_isbn(self):
+        return self.book['isbn']
+
+    def get_tags(self):
+        return self.book['tags']
+
+    def get_contents(self):
+        extract_path = os.path.join(self.extract_dir)
+        zipfile.ZipFile(self.epub_filepath).extractall(extract_path)
+
+        self.book_ref.parse_chapters(
+            temp_dir=self.temp_dir, split_large_xml=self.split_large_xml)
+        file_settings = {
+            'images_only': False}
+        return self.book['book_list'], file_settings
+
+class HidePrinting:
+    def __enter__(self):
+        self._original_stdout = sys.stdout
+        sys.stdout = None
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        sys.stdout = self._original_stdout