diff --git a/TODO b/TODO
index a7c2b02..e244e1a 100644
--- a/TODO
+++ b/TODO
@@ -58,6 +58,7 @@ TODO
Limit the extra files produced by KindleUnpack
Have them save to memory
epub support
+ Homegrown solution please
Other:
✓ Define every widget in code
Bugs:
diff --git a/ePub/__init__.py b/ePub/__init__.py
new file mode 100644
index 0000000..8d1c8b6
--- /dev/null
+++ b/ePub/__init__.py
@@ -0,0 +1 @@
+
diff --git a/ePub/read_epub.py b/ePub/read_epub.py
new file mode 100644
index 0000000..3df4048
--- /dev/null
+++ b/ePub/read_epub.py
@@ -0,0 +1,170 @@
+#!/usr/bin/env python3
+
+# This file is a part of Lector, a Qt based ebook reader
+# Copyright (C) 2017 BasioMeusPuga
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+import os
+import sys
+import zipfile
+
+import pprint
+import inspect
+
+import bs4
+from bs4 import BeautifulSoup
+
+
+class EPUB:
+ def __init__(self, filename):
+ self.filename = filename
+ self.zip_file = None
+ self.book = {}
+
+ def read_book(self):
+ # This is the function that should error out in
+ # case the module cannot process the file
+ self.load_zip()
+ contents_path = self.get_file_path('content.opf')
+ self.generate_book_metadata(contents_path)
+ self.parse_toc()
+
+ def load_zip(self):
+ try:
+ self.zip_file = zipfile.ZipFile(
+ self.filename, mode='r', allowZip64=True)
+ except (KeyError, AttributeError, zipfile.BadZipFile):
+ print('Cannot parse ' + self.filename)
+ return
+
+ def parse_xml(self, filename, parser):
+ try:
+ this_xml = self.zip_file.read(filename).decode()
+ except KeyError:
+ print('File not found in zip')
+ return
+
+ root = BeautifulSoup(this_xml, parser)
+ return root
+
+ def get_file_path(self, filename):
+ # Use this to get the location of the content.opf file
+ # And maybe some other file that has a more well formatted
+ # idea of the TOC
+ for i in self.zip_file.filelist:
+ if os.path.basename(i.filename) == filename:
+ return i.filename
+
+
+ def generate_book_metadata(self, contents_path):
+ item_dict = {
+ 'title': 'dc:title',
+ 'author': 'dc:creator',
+ 'date': 'dc:date'}
+
+ # Parse metadata
+ xml = self.parse_xml(contents_path, 'lxml')
+
+ for i in item_dict.items():
+ item = xml.find(i[1])
+ if item:
+ self.book[i[0]] = item.text
+
+ # Get identifier
+ xml = self.parse_xml(contents_path, 'xml')
+
+ metadata_items = xml.find('metadata')
+ for i in metadata_items.children:
+ if isinstance(i, bs4.element.Tag):
+ try:
+ if i.get('opf:scheme').lower() == 'isbn':
+ self.book['isbn'] = i.text
+ break
+ except AttributeError:
+ self.book['isbn'] = None
+
+ # Get items
+ book_items = {}
+ all_items = xml.find_all('item')
+ for i in all_items:
+ media_type = i.get('media-type')
+
+ if media_type == 'application/xhtml+xml':
+ book_items[i.get('id')] = i.get('href')
+ if media_type == 'application/x-dtbncx+xml':
+ self.book['toc_file'] = i.get('href')
+ if i.get('id') == 'cover':
+ self.book['cover'] = self.zip_file.read(i.get('href'))
+
+ # Parse spine
+ spine_items = xml.find_all('itemref')
+ spine_order = []
+ for i in spine_items:
+ spine_order.append(i.get('idref'))
+
+ # book_order = []
+ # for i in spine_order:
+ # try:
+ # book_order.append(book_items[i])
+ # except KeyError:
+ # pass
+
+ # self.book['book_order'] = book_order
+
+ def parse_toc(self):
+ # Try to get chapter names from the toc
+ try:
+ toc_file = self.book['toc_file']
+ except KeyError:
+ toc_file = self.get_file_path('toc.ncx')
+
+ xml = self.parse_xml(toc_file, 'xml')
+ navpoints = xml.find_all('navPoint')
+
+ self.book['navpoint_dict'] = {}
+ for i in navpoints:
+ chapter_title = i.find('text').text
+ chapter_source = i.find('content').get('src')
+ chapter_source = chapter_source.split('#')[0]
+ self.book['navpoint_dict'][chapter_title] = chapter_source
+
+ # self.book['navpoint_dict'] = {}
+ # for i in self.book['book_order']:
+ # try:
+ # self.book['navpoint_dict'][i] = navpoint_dict[i]
+ # except:
+ # # TODO
+ # # Create title
+ # self.book['navpoint_dict'][i] = 'Unspecified'
+
+ # # Reverse the dict
+ # reverse_dict = {i[1]: i[0] for i in self.book['navpoint_dict'].items()}
+ # self.book['navpoint_dict'] = reverse_dict
+
+ def parse_chapters(self):
+ for i in self.book['navpoint_dict'].items():
+ try:
+ self.book['navpoint_dict'][i[0]] = self.zip_file.read(i[1]).decode()
+ except KeyError:
+ print(i[1] + ' skipped')
+
+
+def main():
+ book = EPUB(sys.argv[1])
+ book.read_book()
+ book.parse_chapters()
+
+if __name__ == '__main__':
+ main()
diff --git a/parsers/epub.py b/parsers/epub.py
new file mode 100644
index 0000000..42a8122
--- /dev/null
+++ b/parsers/epub.py
@@ -0,0 +1,79 @@
+#!/usr/bin/env python3
+
+# This file is a part of Lector, a Qt based ebook reader
+# Copyright (C) 2017 BasioMeusPuga
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+import os
+import sys
+import zipfile
+
+from ePub.read_epub import EPUB
+
+
+class ParseEPUB:
+ def __init__(self, filename, temp_dir, file_md5):
+ # TODO
+ # Maybe also include book description
+ self.book_ref = None
+ self.book = None
+ self.temp_dir = temp_dir
+ self.filename = filename
+ self.file_md5 = file_md5
+
+ def read_book(self):
+ self.book_ref = EPUB(self.filename)
+ contents_path = self.book_ref.get_file_path('content.opf')
+ self.book_ref.generate_book(contents_path)
+ self.book_ref.parse_toc()
+ self.book = self.book_ref.book
+
+ def get_title(self):
+ return self.book['title']
+
+ def get_author(self):
+ return self.book['author']
+
+ def get_year(self):
+ return 9999
+
+ def get_cover_image(self):
+ try:
+ return self.book['cover']
+ except KeyError:
+ return None
+
+ def get_isbn(self):
+ return self.book['isbn']
+
+ def get_tags(self):
+ return None
+
+ def get_contents(self):
+ extract_path = os.path.join(self.temp_dir, self.file_md5)
+ zipfile.ZipFile(self.filename).extractall(extract_path)
+
+ self.book_ref.parse_chapters()
+ file_settings = {
+ 'images_only': False}
+ return self.book['navpoint_dict'], file_settings
+
+class HidePrinting:
+ def __enter__(self):
+ self._original_stdout = sys.stdout
+ sys.stdout = None
+
+ def __exit__(self, exc_type, exc_val, exc_tb):
+ sys.stdout = self._original_stdout
diff --git a/sorter.py b/sorter.py
index 5ed3f24..f4bbe0d 100644
--- a/sorter.py
+++ b/sorter.py
@@ -47,9 +47,10 @@ import database
from parsers.ebook import ParseEBook
from parsers.cbz import ParseCBZ
from parsers.cbr import ParseCBR
+from parsers.epub import ParseEPUB
sorter = {
- 'epub': ParseEBook,
+ 'epub': ParseEPUB,
'mobi': ParseEBook,
'azw': ParseEBook,
'cbz': ParseCBZ,