Parser functional for initial epub details

2017-11-06 14:43:01 +05:30
parent 8ec2b6e02c
commit ea399ff2c5
3 changed files with 84 additions and 39 deletions
--- a/main.py
+++ b/main.py
@@ -26,6 +26,7 @@ import sys
 from PyQt5 import QtWidgets, QtGui, QtCore
 import mainwindow
 import database
+import parser


 class MainUI(QtWidgets.QMainWindow, mainwindow.Ui_MainWindow):
@@ -65,10 +66,13 @@ class MainUI(QtWidgets.QMainWindow, mainwindow.Ui_MainWindow):
    def open_file(self):
        # TODO
        # Maybe expand this to traverse directories recursively
-        home_dir = os.path.expanduser('~')
        my_file = QtWidgets.QFileDialog.getOpenFileNames(
-            self, 'Open file', home_dir, "eBooks (*.epub *.mobi *.txt)")
-        print(my_file[0])
+            self, 'Open file', self.last_open_path, "eBooks (*.epub *.mobi *.txt)")
+        if my_file[0]:
+            self.last_open_path = os.path.dirname(my_file[0][0])
+            print(self.last_open_path)
+            books = parser.BookSorter(my_file[0])
+            books.add_to_database()

    def close_tab_class(self, tab_index):
        this_tab = Tabs(self, None)
@@ -157,12 +161,22 @@ class Settings:
            QtCore.QPoint(286, 141)))
        self.settings.endGroup()

+        self.settings.beginGroup('path')
+        self.parent_window.last_open_path = self.settings.value(
+            'path', os.path.expanduser('~'))
+        print(self.parent_window.last_open_path)
+        self.settings.endGroup()
+
    def save_settings(self):
        self.settings.beginGroup('mainWindow')
        self.settings.setValue('windowSize', self.parent_window.size())
        self.settings.setValue('windowPosition', self.parent_window.pos())
        self.settings.endGroup()

+        self.settings.beginGroup('lastOpen')
+        self.settings.setValue('path', self.parent_window.last_open_path)
+        self.settings.endGroup()
+

 class Toolbars:
    # TODO
--- a/database.py
+++ b/database.py
@@ -17,7 +17,7 @@ class DatabaseFunctions:
    def create_database(self):
        self.database.execute(
            "CREATE TABLE books \
-            (id INTEGER PRIMARY KEY, Name TEXT, Path TEXT, ISBN TEXT, CoverImage BLOB)")
+            (id INTEGER PRIMARY KEY, Name TEXT, Path TEXT, ISBN TEXT, Tags TEXT, CoverImage BLOB)")
        self.database.execute(
            "CREATE TABLE cache \
            (id INTEGER PRIMARY KEY, Name TEXT, Path TEXT, CachedDict BLOB)")
@@ -25,6 +25,6 @@ class DatabaseFunctions:
        # database at time of closing

        self.database.commit()
-    
+
    def add_to_database(self, book_data, image_data):
        pass
--- a/parser.py
+++ b/parser.py
@@ -6,59 +6,90 @@ import collections
 import ebooklib.epub


-def get_book_essentials(filename):
-    book = ebooklib.epub.read_epub(filename)
+class ParseEPUB:
+    def __init__(self, filename):
+        self.filename = filename
+        self.book_title = None
+        try:
+            self.book = ebooklib.epub.read_epub(filename)
+        except (KeyError, AttributeError):
+            print('Cannot parse ' + self.filename)
+            return

-    # Get book title
-    title = book.title.strip()
+    def get_title(self):
+        return self.book.title.strip()

-    # Get cover image
-    # This seems hack-ish, but that's never stopped me before
-    image_path = None
-    try:
-        cover = book.metadata['http://www.idpf.org/2007/opf']['cover'][0][1]['content']
-        cover_item = book.get_item_with_id(cover)
+    def get_cover_image(self):
+        # Get cover image
+        # This seems hack-ish, but that's never stopped me before
+        image_path = None
+        try:
+            cover = self.book.metadata['http://www.idpf.org/2007/opf']['cover'][0][1]['content']
+            cover_item = self.book.get_item_with_id(cover)
+            if cover_item:
+                return cover_item.get_content()

-        # In case no cover_item is returned, we search the items
-        # in the book and get the first referenced image
-        if not cover_item:
-            for j in book.guide:
+            # In case no cover_item is returned,
+            # we look for a cover in the guide
+            for j in self.book.guide:
                try:
-                    if (j['title'].lower in ['cover', 'cover-image', 'coverimage'] or j['type'] == 'coverimagestandard'):
+                    if (j['title'].lower in ['cover', 'cover-image', 'coverimage'] or
+                            j['type'] == 'coverimagestandard'):
                        image_path = j['href']
                    break
                except KeyError:
                    pass

+            # And if all else fails, we find
+            # the first image referenced in the book
+            # Fuck everything
            if not image_path:
-                for j in book.items:
+                for j in self.book.items:
                    if j.media_type == 'application/xhtml+xml':
                        _regex = re.search(r"src=\"(.*)\"\/", j.content.decode('utf-8'))
                        if _regex:
                            image_path = _regex[1]
                        break

-            for k in book.get_items_of_type(ebooklib.ITEM_IMAGE):
+            for k in self.book.get_items_of_type(ebooklib.ITEM_IMAGE):
                if os.path.basename(k.file_name) == os.path.basename(image_path):
                    image_content = k.get_content()
+                    break

-        else:
-            image_content = cover_item.get_content()
+            return image_content

-    except KeyError:
-        print('Cannot parse ' + filename)
+        except KeyError:
+            print('Cannot parse ' + self.filename)
+            return

-    # Get ISBN ID
-    isbn_id = None
-    try:
-        identifier = book.metadata['http://purl.org/dc/elements/1.1/']['identifier']
-        for i in identifier:
-            identifier_provider = i[1]['{http://www.idpf.org/2007/opf}scheme']
-            if identifier_provider.lower() == 'isbn':
-                isbn_id = i[0]
-                break
-    except KeyError:
-        pass
+    def get_isbn(self):
+        try:
+            identifier = self.book.metadata['http://purl.org/dc/elements/1.1/']['identifier']
+            for i in identifier:
+                identifier_provider = i[1]['{http://www.idpf.org/2007/opf}scheme']
+                if identifier_provider.lower() == 'isbn':
+                    isbn = i[0]
+                    return isbn
+        except KeyError:
+            return

-    with open('/home/akhil/aa.jpg', 'bw') as myimg:
-        myimg.write(image_content)
+
+class BookSorter:
+    def __init__(self, file_list):
+        # Have the GUI pass a list of files straight to here
+        # Then, on the basis of what is needed, pass the
+        # filenames to the requisite functions
+        # This includes getting file info for the database
+        # Parsing for the reader proper
+        # Caching upon closing
+        self.file_list = file_list
+
+    def add_to_database(self):
+        # Consider multithreading this
+        for i in self.file_list:
+            book_ref = ParseEPUB(i)
+            title = book_ref.get_title()
+            cover_image = book_ref.get_cover_image()
+            isbn = book_ref.get_isbn()
+
+            print(title, isbn)