Parser functional for initial epub details
This commit is contained in:
20
__main__.py
20
__main__.py
@@ -26,6 +26,7 @@ import sys
|
||||
from PyQt5 import QtWidgets, QtGui, QtCore
|
||||
import mainwindow
|
||||
import database
|
||||
import parser
|
||||
|
||||
|
||||
class MainUI(QtWidgets.QMainWindow, mainwindow.Ui_MainWindow):
|
||||
@@ -65,10 +66,13 @@ class MainUI(QtWidgets.QMainWindow, mainwindow.Ui_MainWindow):
|
||||
def open_file(self):
|
||||
# TODO
|
||||
# Maybe expand this to traverse directories recursively
|
||||
home_dir = os.path.expanduser('~')
|
||||
my_file = QtWidgets.QFileDialog.getOpenFileNames(
|
||||
self, 'Open file', home_dir, "eBooks (*.epub *.mobi *.txt)")
|
||||
print(my_file[0])
|
||||
self, 'Open file', self.last_open_path, "eBooks (*.epub *.mobi *.txt)")
|
||||
if my_file[0]:
|
||||
self.last_open_path = os.path.dirname(my_file[0][0])
|
||||
print(self.last_open_path)
|
||||
books = parser.BookSorter(my_file[0])
|
||||
books.add_to_database()
|
||||
|
||||
def close_tab_class(self, tab_index):
|
||||
this_tab = Tabs(self, None)
|
||||
@@ -157,12 +161,22 @@ class Settings:
|
||||
QtCore.QPoint(286, 141)))
|
||||
self.settings.endGroup()
|
||||
|
||||
self.settings.beginGroup('path')
|
||||
self.parent_window.last_open_path = self.settings.value(
|
||||
'path', os.path.expanduser('~'))
|
||||
print(self.parent_window.last_open_path)
|
||||
self.settings.endGroup()
|
||||
|
||||
def save_settings(self):
|
||||
self.settings.beginGroup('mainWindow')
|
||||
self.settings.setValue('windowSize', self.parent_window.size())
|
||||
self.settings.setValue('windowPosition', self.parent_window.pos())
|
||||
self.settings.endGroup()
|
||||
|
||||
self.settings.beginGroup('lastOpen')
|
||||
self.settings.setValue('path', self.parent_window.last_open_path)
|
||||
self.settings.endGroup()
|
||||
|
||||
|
||||
class Toolbars:
|
||||
# TODO
|
||||
|
@@ -17,7 +17,7 @@ class DatabaseFunctions:
|
||||
def create_database(self):
|
||||
self.database.execute(
|
||||
"CREATE TABLE books \
|
||||
(id INTEGER PRIMARY KEY, Name TEXT, Path TEXT, ISBN TEXT, CoverImage BLOB)")
|
||||
(id INTEGER PRIMARY KEY, Name TEXT, Path TEXT, ISBN TEXT, Tags TEXT, CoverImage BLOB)")
|
||||
self.database.execute(
|
||||
"CREATE TABLE cache \
|
||||
(id INTEGER PRIMARY KEY, Name TEXT, Path TEXT, CachedDict BLOB)")
|
||||
@@ -25,6 +25,6 @@ class DatabaseFunctions:
|
||||
# database at time of closing
|
||||
|
||||
self.database.commit()
|
||||
|
||||
|
||||
def add_to_database(self, book_data, image_data):
|
||||
pass
|
||||
|
99
parser.py
99
parser.py
@@ -6,59 +6,90 @@ import collections
|
||||
import ebooklib.epub
|
||||
|
||||
|
||||
def get_book_essentials(filename):
|
||||
book = ebooklib.epub.read_epub(filename)
|
||||
class ParseEPUB:
|
||||
def __init__(self, filename):
|
||||
self.filename = filename
|
||||
self.book_title = None
|
||||
try:
|
||||
self.book = ebooklib.epub.read_epub(filename)
|
||||
except (KeyError, AttributeError):
|
||||
print('Cannot parse ' + self.filename)
|
||||
return
|
||||
|
||||
# Get book title
|
||||
title = book.title.strip()
|
||||
def get_title(self):
|
||||
return self.book.title.strip()
|
||||
|
||||
# Get cover image
|
||||
# This seems hack-ish, but that's never stopped me before
|
||||
image_path = None
|
||||
try:
|
||||
cover = book.metadata['http://www.idpf.org/2007/opf']['cover'][0][1]['content']
|
||||
cover_item = book.get_item_with_id(cover)
|
||||
def get_cover_image(self):
|
||||
# Get cover image
|
||||
# This seems hack-ish, but that's never stopped me before
|
||||
image_path = None
|
||||
try:
|
||||
cover = self.book.metadata['http://www.idpf.org/2007/opf']['cover'][0][1]['content']
|
||||
cover_item = self.book.get_item_with_id(cover)
|
||||
if cover_item:
|
||||
return cover_item.get_content()
|
||||
|
||||
# In case no cover_item is returned, we search the items
|
||||
# in the book and get the first referenced image
|
||||
if not cover_item:
|
||||
for j in book.guide:
|
||||
# In case no cover_item is returned,
|
||||
# we look for a cover in the guide
|
||||
for j in self.book.guide:
|
||||
try:
|
||||
if (j['title'].lower in ['cover', 'cover-image', 'coverimage'] or j['type'] == 'coverimagestandard'):
|
||||
if (j['title'].lower in ['cover', 'cover-image', 'coverimage'] or
|
||||
j['type'] == 'coverimagestandard'):
|
||||
image_path = j['href']
|
||||
break
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
# And if all else fails, we find
|
||||
# the first image referenced in the book
|
||||
# Fuck everything
|
||||
if not image_path:
|
||||
for j in book.items:
|
||||
for j in self.book.items:
|
||||
if j.media_type == 'application/xhtml+xml':
|
||||
_regex = re.search(r"src=\"(.*)\"\/", j.content.decode('utf-8'))
|
||||
if _regex:
|
||||
image_path = _regex[1]
|
||||
break
|
||||
|
||||
for k in book.get_items_of_type(ebooklib.ITEM_IMAGE):
|
||||
for k in self.book.get_items_of_type(ebooklib.ITEM_IMAGE):
|
||||
if os.path.basename(k.file_name) == os.path.basename(image_path):
|
||||
image_content = k.get_content()
|
||||
break
|
||||
|
||||
else:
|
||||
image_content = cover_item.get_content()
|
||||
return image_content
|
||||
|
||||
except KeyError:
|
||||
print('Cannot parse ' + filename)
|
||||
except KeyError:
|
||||
print('Cannot parse ' + self.filename)
|
||||
return
|
||||
|
||||
# Get ISBN ID
|
||||
isbn_id = None
|
||||
try:
|
||||
identifier = book.metadata['http://purl.org/dc/elements/1.1/']['identifier']
|
||||
for i in identifier:
|
||||
identifier_provider = i[1]['{http://www.idpf.org/2007/opf}scheme']
|
||||
if identifier_provider.lower() == 'isbn':
|
||||
isbn_id = i[0]
|
||||
break
|
||||
except KeyError:
|
||||
pass
|
||||
def get_isbn(self):
|
||||
try:
|
||||
identifier = self.book.metadata['http://purl.org/dc/elements/1.1/']['identifier']
|
||||
for i in identifier:
|
||||
identifier_provider = i[1]['{http://www.idpf.org/2007/opf}scheme']
|
||||
if identifier_provider.lower() == 'isbn':
|
||||
isbn = i[0]
|
||||
return isbn
|
||||
except KeyError:
|
||||
return
|
||||
|
||||
with open('/home/akhil/aa.jpg', 'bw') as myimg:
|
||||
myimg.write(image_content)
|
||||
|
||||
class BookSorter:
|
||||
def __init__(self, file_list):
|
||||
# Have the GUI pass a list of files straight to here
|
||||
# Then, on the basis of what is needed, pass the
|
||||
# filenames to the requisite functions
|
||||
# This includes getting file info for the database
|
||||
# Parsing for the reader proper
|
||||
# Caching upon closing
|
||||
self.file_list = file_list
|
||||
|
||||
def add_to_database(self):
|
||||
# Consider multithreading this
|
||||
for i in self.file_list:
|
||||
book_ref = ParseEPUB(i)
|
||||
title = book_ref.get_title()
|
||||
cover_image = book_ref.get_cover_image()
|
||||
isbn = book_ref.get_isbn()
|
||||
|
||||
print(title, isbn)
|
||||
|
Reference in New Issue
Block a user