From a0e463bc58b1b6a5be95c08775cf354eae4321b7 Mon Sep 17 00:00:00 2001
From: BasioMeusPuga <disgruntled.mob@gmail.com>
Date: Thu, 14 Jun 2018 16:10:27 -0400
Subject: [PATCH] Speed up file addition Improve fb2 parser Fix extension
 checking

---
 TODO                        |  5 ++--
 lector/parsers/epub.py      |  1 +
 lector/parsers/fb2.py       |  9 ++-----
 lector/readers/read_epub.py |  1 -
 lector/readers/read_fb2.py  | 54 ++++++++++++++++++++++++++-----------
 lector/sorter.py            | 18 ++++++++-----
 6 files changed, 56 insertions(+), 32 deletions(-)

diff --git a/TODO b/TODO
index 9c09326..9dfd4cb 100644
--- a/TODO
+++ b/TODO
@@ -77,6 +77,8 @@ TODO
         ✓ mobi, azw support
             Limit the extra files produced by KindleUnpack
                 Have them save to memory
+        ✓ fb2 support
+            Images need to show up in their placeholders
     Other:
         ✓ Define every widget in code
     Bugs:
@@ -84,7 +86,6 @@ TODO
         Clean up 'switch' page layout
         Colors aren't loaded properly for annotation previews
         Cover page shouldn't be scolled midway
-        It's possible the addition function is also parsing the whole book.
 
     Secondary:
         Graphical themes
@@ -98,7 +99,7 @@ TODO
         Use embedded fonts + CSS
         Scrolling: Smooth / By Line
         Shift to logging instead of print statements
-        txt, doc, chm, djvu, fb2 support
+        txt, doc, chm, djvu support
         Include icons for filetype emblems
         Comic view modes
             Continuous paging
diff --git a/lector/parsers/epub.py b/lector/parsers/epub.py
index 39da0d9..58eaa8c 100644
--- a/lector/parsers/epub.py
+++ b/lector/parsers/epub.py
@@ -58,6 +58,7 @@ class ParseEPUB:
     def get_contents(self):
         zipfile.ZipFile(self.filename).extractall(self.extract_path)
 
+        self.book_ref.parse_toc()
         self.book_ref.parse_chapters(temp_dir=self.extract_path)
         file_settings = {
             'images_only': False}
diff --git a/lector/parsers/fb2.py b/lector/parsers/fb2.py
index f777a6b..ffb5a8f 100644
--- a/lector/parsers/fb2.py
+++ b/lector/parsers/fb2.py
@@ -15,7 +15,6 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
 import os
-import zipfile
 
 from lector.readers.read_fb2 import FB2
 
@@ -56,12 +55,8 @@ class ParseFB2:
         return self.book['tags']
 
     def get_contents(self):
-        # TODO
-        # Make this save images to the temp path
-        # Relative file paths should then point there
-        # zipfile.ZipFile(self.filename).extractall(self.extract_path)
-
-        # self.book_ref.parse_chapters(temp_dir=self.extract_path)
+        os.makedirs(self.extract_path, exist_ok=True)  # Manual creation is required here
+        self.book_ref.parse_chapters(temp_dir=self.extract_path)
         file_settings = {
             'images_only': False}
         return self.book['book_list'], file_settings
diff --git a/lector/readers/read_epub.py b/lector/readers/read_epub.py
index 537d6a1..62b7113 100644
--- a/lector/readers/read_epub.py
+++ b/lector/readers/read_epub.py
@@ -40,7 +40,6 @@ class EPUB:
                 return False  # No (valid) opf was found so processing cannot continue
 
             self.generate_book_metadata(contents_path)
-            self.parse_toc()
         except:  # Not specifying an exception type here may be justified
             return False
 
diff --git a/lector/readers/read_fb2.py b/lector/readers/read_fb2.py
index e54a7ea..62a4d44 100644
--- a/lector/readers/read_fb2.py
+++ b/lector/readers/read_fb2.py
@@ -48,25 +48,29 @@ class FB2:
         return True
 
     def generate_book_metadata(self):
-        self.book['title'] = os.path.splitext(
-            os.path.basename(self.filename))[0]
-        self.book['author'] = 'Unknown'
         self.book['isbn'] = None
         self.book['tags'] = None
         self.book['cover'] = None
-        self.book['year'] = 9999
         self.book['book_list'] = []
 
-        # TODO
-        # Look for other components of book metadata here
-        for i in self.xml.find_all():
+        # All metadata can be parsed in one pass
+        all_tags = self.xml.find('description')
 
-            if i.name == 'section':
-                for j in i:
-                    if j.name == 'title':
-                        this_title = j.text
-                self.book['book_list'].append(
-                    (this_title, str(i)))
+        self.book['title'] = all_tags.find('book-title').text
+        if self.book['title'] == '' or self.book['title'] is None:
+            self.book['title'] = os.path.splitext(
+                os.path.basename(self.filename))[0]
+
+        self.book['author'] = all_tags.find('author').getText(separator=' ').replace('\n', ' ')
+        if self.book['author'] == '' or self.book['author'] is None:
+            self.book['author'] = 'Unknown'
+
+        # TODO
+        # Account for other date formats
+        try:
+            self.book['year'] = int(all_tags.find('date').text)
+        except ValueError:
+            self.book['year'] = 9999
 
         # Cover Image
         cover_image_xml = self.xml.find('coverpage')
@@ -75,8 +79,26 @@ class FB2:
 
         cover_image_data = self.xml.find_all('binary')
         for i in cover_image_data:
-
-            # TODO
-            # Account for other images as well
             if cover_image_name.endswith(i.get('id')):
                 self.book['cover'] = base64.decodebytes(i.text.encode())
+
+    def parse_chapters(self, temp_dir):
+        # There's no need to parse the TOC separately because
+        # everything is linear
+        for i in self.xml.find_all('section'):
+            for j in i:
+                if j.name == 'title':
+                    this_title = j.getText(separator=' ')
+            self.book['book_list'].append(
+                (this_title, str(i)))
+
+        # Extract all images to the temp_dir
+        for i in self.xml.find_all('binary'):
+            this_image_name = i.get('id')
+            this_image_path = os.path.join(temp_dir, this_image_name)
+            try:
+                this_image_data = base64.decodebytes(i.text.encode())
+                with open(this_image_path, 'wb') as outimage:
+                    outimage.write(this_image_data)
+            except AttributeError:
+                pass
diff --git a/lector/sorter.py b/lector/sorter.py
index b016798..077344b 100644
--- a/lector/sorter.py
+++ b/lector/sorter.py
@@ -175,15 +175,21 @@ class BookSorter:
                     print(f'{os.path.basename(filename)} is already in database')
                 return
 
-        # Using os.extsep like so allows for file extensions with multiple dots
-        file_extension = os.path.basename(filename).split(os.extsep, 1)[1]
-        try:
-            # Get the requisite parser from the sorter dict
-            book_ref = sorter[file_extension](filename, self.temp_dir, file_md5)
-        except KeyError:
+        # This allows for eliminating issues with filenames that have
+        # a dot in them. All hail the roundabout fix.
+        valid_extension = False
+        for i in sorter:
+            if os.path.basename(filename).endswith(i):
+                file_extension = i
+                valid_extension = True
+                break
+
+        if not valid_extension:
             print(filename + ' has an unsupported extension')
             return
 
+        book_ref = sorter[file_extension](filename, self.temp_dir, file_md5)
+
         # Everything following this is standard
         # None values are accounted for here
         book_ref.read_book()