Begin work on ePub parser

This commit is contained in:
BasioMeusPuga
2018-03-10 09:43:41 +05:30
parent 5605ad69b8
commit 4a30c8bdc7
5 changed files with 253 additions and 1 deletions

79
parsers/epub.py Normal file
View File

@@ -0,0 +1,79 @@
#!/usr/bin/env python3
# This file is a part of Lector, a Qt based ebook reader
# Copyright (C) 2017 BasioMeusPuga
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import os
import sys
import zipfile
from ePub.read_epub import EPUB
class ParseEPUB:
def __init__(self, filename, temp_dir, file_md5):
# TODO
# Maybe also include book description
self.book_ref = None
self.book = None
self.temp_dir = temp_dir
self.filename = filename
self.file_md5 = file_md5
def read_book(self):
self.book_ref = EPUB(self.filename)
contents_path = self.book_ref.get_file_path('content.opf')
self.book_ref.generate_book(contents_path)
self.book_ref.parse_toc()
self.book = self.book_ref.book
def get_title(self):
return self.book['title']
def get_author(self):
return self.book['author']
def get_year(self):
return 9999
def get_cover_image(self):
try:
return self.book['cover']
except KeyError:
return None
def get_isbn(self):
return self.book['isbn']
def get_tags(self):
return None
def get_contents(self):
extract_path = os.path.join(self.temp_dir, self.file_md5)
zipfile.ZipFile(self.filename).extractall(extract_path)
self.book_ref.parse_chapters()
file_settings = {
'images_only': False}
return self.book['navpoint_dict'], file_settings
class HidePrinting:
def __enter__(self):
self._original_stdout = sys.stdout
sys.stdout = None
def __exit__(self, exc_type, exc_val, exc_tb):
sys.stdout = self._original_stdout