Initial file loading

epub content parsing is horribly borked
2017-11-11 23:21:49 +05:30
parent 5d495cfde3
commit 7fbea194c0
6 changed files with 236 additions and 120 deletions
--- a/sorter.py
+++ b/sorter.py
@@ -1,16 +1,20 @@
 #!/usr/bin/env python3

 # TODO
-# Methods that return None must be quantified here if needed
+# Methods that return None must be quantified within the parsing module
+# See if tags can be generated from book content
+# See if you want to include a hash of the book's name and author

+import os
 import hashlib
 from multiprocessing.dummy import Pool

 import database
 from parsers.epub import ParseEPUB

+
 class BookSorter:
-    def __init__(self, file_list, database_path):
+    def __init__(self, file_list, mode, database_path):
        # Have the GUI pass a list of files straight to here
        # Then, on the basis of what is needed, pass the
        # filenames to the requisite functions
@@ -21,7 +25,9 @@ class BookSorter:
        self.all_books = {}
        self.database_path = database_path
        self.hashes = []
-        self.database_hashes()
+        self.mode = mode
+        if database_path:
+            self.database_hashes()

    def database_hashes(self):
        all_hashes = database.DatabaseFunctions(
@@ -34,6 +40,16 @@ class BookSorter:
        if all_hashes:
            self.hashes = [i[0] for i in all_hashes]

+    def database_position(self, file_hash):
+        position = database.DatabaseFunctions(
+            self.database_path).fetch_data(
+                ('Position',),
+                'books',
+                {'Hash': file_hash},
+                'EQUALS',
+                True)
+        return position
+
    def read_book(self, filename):
        # filename is expected as a string containg the
        # full path of the ebook file
@@ -41,17 +57,21 @@ class BookSorter:
        with open(filename, 'rb') as current_book:
            file_md5 = hashlib.md5(current_book.read()).hexdigest()

+        # IF the file is NOT being loaded into the reader,
        # Do not allow addition in case the file is dupicated in the directory
        # OR is already in the database
-        # TODO
-        # See if you want to include a hash of the book's name and author
-        if file_md5 in self.all_books.items() or file_md5 in self.hashes:
+        # This should not get triggered in reading mode
+        if (self.mode == 'addition'
+                and (file_md5 in self.all_books.items() or file_md5 in self.hashes)):
            return

-        # TODO
-        # See if tags can be generated from book content
-        # Sort according to to file extension here
-        book_ref = ParseEPUB(filename)
+        # Select sorter by file extension
+        try:
+            file_extension = os.path.splitext(filename)[1][1:]
+            if file_extension == 'epub':
+                book_ref = ParseEPUB(filename)
+        except IndexError:
+            return

        # Everything following this is standard
        # Some of the None returns will have to have
@@ -61,16 +81,32 @@ class BookSorter:
            title = book_ref.get_title()
            author = book_ref.get_author()
            year = book_ref.get_year()
-            cover_image = book_ref.get_cover_image()
            isbn = book_ref.get_isbn()

-            self.all_books[file_md5] = {
-                'title': title,
-                'author': author,
-                'year': year,
-                'isbn': isbn,
-                'path': filename,
-                'cover_image': cover_image}
+            # Different modes require different values
+            if self.mode == 'addition':
+                cover_image = book_ref.get_cover_image()
+                self.all_books[file_md5] = {
+                    'title': title,
+                    'author': author,
+                    'year': year,
+                    'isbn': isbn,
+                    'path': filename,
+                    'cover_image': cover_image}
+
+            if self.mode == 'reading':
+                content = book_ref.get_contents()
+                position = self.database_position(file_md5)
+                self.all_books = {
+                    'title': title,
+                    'author': author,
+                    'year': year,
+                    'isbn': isbn,
+                    'hash': file_md5,
+                    'path': filename,
+                    'position': position,
+                    'content': content}
+

    def initiate_threads(self):
        _pool = Pool(5)