Speed up file addition

Improve fb2 parser Fix extension checking
2018-06-14 16:10:27 -04:00
parent 4a2da61b51
commit a0e463bc58
6 changed files with 56 additions and 32 deletions
--- a/5
+++ b/5
@@ -77,6 +77,8 @@ TODO
        ✓ mobi, azw support
            Limit the extra files produced by KindleUnpack
                Have them save to memory
        ✓ fb2 support
            Images need to show up in their placeholders
    Other:
        ✓ Define every widget in code
    Bugs:
@@ -84,7 +86,6 @@ TODO
        Clean up 'switch' page layout
        Colors aren't loaded properly for annotation previews
        Cover page shouldn't be scolled midway
        It's possible the addition function is also parsing the whole book.
    Secondary:
        Graphical themes
@@ -98,7 +99,7 @@ TODO
        Use embedded fonts + CSS
        Scrolling: Smooth / By Line
        Shift to logging instead of print statements
-        txt, doc, chm, djvu, fb2 support
+        txt, doc, chm, djvu support
        Include icons for filetype emblems
        Comic view modes
            Continuous paging
--- a/lector/parsers/epub.py
+++ b/lector/parsers/epub.py
@@ -58,6 +58,7 @@ class ParseEPUB:
    def get_contents(self):
        zipfile.ZipFile(self.filename).extractall(self.extract_path)
        self.book_ref.parse_toc()
        self.book_ref.parse_chapters(temp_dir=self.extract_path)
        file_settings = {
            'images_only': False}
--- a/lector/parsers/fb2.py
+++ b/lector/parsers/fb2.py
@@ -15,7 +15,6 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 import os
 import zipfile
 from lector.readers.read_fb2 import FB2
@@ -56,12 +55,8 @@ class ParseFB2:
        return self.book['tags']
    def get_contents(self):
-        # TODO
+        os.makedirs(self.extract_path, exist_ok=True)  # Manual creation is required here
-        # Make this save images to the temp path
+        self.book_ref.parse_chapters(temp_dir=self.extract_path)
        # Relative file paths should then point there
        # zipfile.ZipFile(self.filename).extractall(self.extract_path)
        # self.book_ref.parse_chapters(temp_dir=self.extract_path)
        file_settings = {
            'images_only': False}
        return self.book['book_list'], file_settings
--- a/lector/readers/read_epub.py
+++ b/lector/readers/read_epub.py
@@ -40,7 +40,6 @@ class EPUB:
                return False  # No (valid) opf was found so processing cannot continue
            self.generate_book_metadata(contents_path)
            self.parse_toc()
        except:  # Not specifying an exception type here may be justified
            return False
--- a/lector/readers/read_fb2.py
+++ b/lector/readers/read_fb2.py
@@ -48,25 +48,29 @@ class FB2:
        return True
    def generate_book_metadata(self):
        self.book['title'] = os.path.splitext(
            os.path.basename(self.filename))[0]
        self.book['author'] = 'Unknown'
        self.book['isbn'] = None
        self.book['tags'] = None
        self.book['cover'] = None
        self.book['year'] = 9999
        self.book['book_list'] = []
-        # TODO
+        # All metadata can be parsed in one pass
-        # Look for other components of book metadata here
+        all_tags = self.xml.find('description')
        for i in self.xml.find_all():
-            if i.name == 'section':
+        self.book['title'] = all_tags.find('book-title').text
-                for j in i:
+        if self.book['title'] == '' or self.book['title'] is None:
-                    if j.name == 'title':
+            self.book['title'] = os.path.splitext(
-                        this_title = j.text
+                os.path.basename(self.filename))[0]
-                self.book['book_list'].append(
+
-                    (this_title, str(i)))
+        self.book['author'] = all_tags.find('author').getText(separator=' ').replace('\n', ' ')
        if self.book['author'] == '' or self.book['author'] is None:
            self.book['author'] = 'Unknown'
        # TODO
        # Account for other date formats
        try:
            self.book['year'] = int(all_tags.find('date').text)
        except ValueError:
            self.book['year'] = 9999
        # Cover Image
        cover_image_xml = self.xml.find('coverpage')
@@ -75,8 +79,26 @@ class FB2:
        cover_image_data = self.xml.find_all('binary')
        for i in cover_image_data:
            # TODO
            # Account for other images as well
            if cover_image_name.endswith(i.get('id')):
                self.book['cover'] = base64.decodebytes(i.text.encode())
    def parse_chapters(self, temp_dir):
        # There's no need to parse the TOC separately because
        # everything is linear
        for i in self.xml.find_all('section'):
            for j in i:
                if j.name == 'title':
                    this_title = j.getText(separator=' ')
            self.book['book_list'].append(
                (this_title, str(i)))
        # Extract all images to the temp_dir
        for i in self.xml.find_all('binary'):
            this_image_name = i.get('id')
            this_image_path = os.path.join(temp_dir, this_image_name)
            try:
                this_image_data = base64.decodebytes(i.text.encode())
                with open(this_image_path, 'wb') as outimage:
                    outimage.write(this_image_data)
            except AttributeError:
                pass
--- a/lector/sorter.py
+++ b/lector/sorter.py
@@ -175,15 +175,21 @@ class BookSorter:
                    print(f'{os.path.basename(filename)} is already in database')
                return
-        # Using os.extsep like so allows for file extensions with multiple dots
+        # This allows for eliminating issues with filenames that have
-        file_extension = os.path.basename(filename).split(os.extsep, 1)[1]
+        # a dot in them. All hail the roundabout fix.
-        try:
+        valid_extension = False
-            # Get the requisite parser from the sorter dict
+        for i in sorter:
-            book_ref = sorter[file_extension](filename, self.temp_dir, file_md5)
+            if os.path.basename(filename).endswith(i):
-        except KeyError:
+                file_extension = i
                valid_extension = True
                break
        if not valid_extension:
            print(filename + ' has an unsupported extension')
            return
        book_ref = sorter[file_extension](filename, self.temp_dir, file_md5)
        # Everything following this is standard
        # None values are accounted for here
        book_ref.read_book()