diff --git a/TODO b/TODO index b0c2546..9c09326 100644 --- a/TODO +++ b/TODO @@ -84,6 +84,7 @@ TODO Clean up 'switch' page layout Colors aren't loaded properly for annotation previews Cover page shouldn't be scolled midway + It's possible the addition function is also parsing the whole book. Secondary: Graphical themes @@ -106,3 +107,4 @@ TODO ? Add only one file type if multiple are present ? Create emblem per filetype In application notifications + Notification in case the filter is filtering out all files with no option in place diff --git a/lector/parsers/comicbooks.py b/lector/parsers/comicbooks.py index 3e91629..9c4e2ec 100644 --- a/lector/parsers/comicbooks.py +++ b/lector/parsers/comicbooks.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python3 - # This file is a part of Lector, a Qt based ebook reader # Copyright (C) 2017-18 BasioMeusPuga diff --git a/lector/parsers/epub.py b/lector/parsers/epub.py index e37834c..39da0d9 100644 --- a/lector/parsers/epub.py +++ b/lector/parsers/epub.py @@ -1,7 +1,5 @@ -#!/usr/bin/env python3 - # This file is a part of Lector, a Qt based ebook reader -# Copyright (C) 2017 BasioMeusPuga +# Copyright (C) 2017-2018 BasioMeusPuga # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -19,7 +17,7 @@ import os import zipfile -from lector.ePub.read_epub import EPUB +from lector.readers.read_epub import EPUB class ParseEPUB: diff --git a/lector/parsers/fb2.py b/lector/parsers/fb2.py new file mode 100644 index 0000000..f777a6b --- /dev/null +++ b/lector/parsers/fb2.py @@ -0,0 +1,67 @@ +# This file is a part of Lector, a Qt based ebook reader +# Copyright (C) 2017-2018 BasioMeusPuga + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import os +import zipfile + +from lector.readers.read_fb2 import FB2 + + +class ParseFB2: + def __init__(self, filename, temp_dir, file_md5): + # TODO + # Maybe also include book description + self.book_ref = None + self.book = None + self.filename = filename + self.extract_path = os.path.join(temp_dir, file_md5) + + def read_book(self): + self.book_ref = FB2(self.filename) + contents_found = self.book_ref.read_fb2() + if not contents_found: + print('Cannot process: ' + self.filename) + return + self.book = self.book_ref.book + + def get_title(self): + return self.book['title'] + + def get_author(self): + return self.book['author'] + + def get_year(self): + return self.book['year'] + + def get_cover_image(self): + return self.book['cover'] + + def get_isbn(self): + return self.book['isbn'] + + def get_tags(self): + return self.book['tags'] + + def get_contents(self): + # TODO + # Make this save images to the temp path + # Relative file paths should then point there + # zipfile.ZipFile(self.filename).extractall(self.extract_path) + + # self.book_ref.parse_chapters(temp_dir=self.extract_path) + file_settings = { + 'images_only': False} + return self.book['book_list'], file_settings diff --git a/lector/parsers/mobi.py b/lector/parsers/mobi.py index 3a156da..67a8540 100644 --- a/lector/parsers/mobi.py +++ b/lector/parsers/mobi.py @@ -1,7 +1,5 @@ -#!/usr/bin/env python3 - # This file is a part of Lector, a Qt based ebook reader -# Copyright (C) 2017 BasioMeusPuga +# Copyright (C) 2017-2018 BasioMeusPuga # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -24,7 +22,7 @@ import sys import shutil import zipfile -from lector.ePub.read_epub import EPUB +from lector.readers.read_epub import EPUB import lector.KindleUnpack.kindleunpack as KindleUnpack diff --git a/lector/parsers/pdf.py b/lector/parsers/pdf.py index 4ecbe21..ca1f8f5 100644 --- a/lector/parsers/pdf.py +++ b/lector/parsers/pdf.py @@ -1,7 +1,5 @@ -#!/usr/bin/env python3 - # This file is a part of Lector, a Qt based ebook reader -# Copyright (C) 2018 BasioMeusPuga +# Copyright (C) 2017-2018 BasioMeusPuga # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -16,6 +14,10 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . +# TODO +# Error handling +# TOC parsing + import io import os diff --git a/lector/ePub/__init__.py b/lector/readers/__init__.py similarity index 100% rename from lector/ePub/__init__.py rename to lector/readers/__init__.py diff --git a/lector/ePub/read_epub.py b/lector/readers/read_epub.py similarity index 96% rename from lector/ePub/read_epub.py rename to lector/readers/read_epub.py index a88dd96..537d6a1 100644 --- a/lector/ePub/read_epub.py +++ b/lector/readers/read_epub.py @@ -1,7 +1,5 @@ -#!/usr/bin/env python3 - # This file is a part of Lector, a Qt based ebook reader -# Copyright (C) 2017 BasioMeusPuga +# Copyright (C) 2017-2018 BasioMeusPuga # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -33,15 +31,18 @@ class EPUB: def read_epub(self): # This is the function that should error out in # case the module cannot process the file - self.load_zip() - contents_path = self.get_file_path( - None, True) + try: + self.load_zip() + contents_path = self.get_file_path( + None, True) - if not contents_path: - return False # No (valid) opf was found so processing cannot continue + if not contents_path: + return False # No (valid) opf was found so processing cannot continue - self.generate_book_metadata(contents_path) - self.parse_toc() + self.generate_book_metadata(contents_path) + self.parse_toc() + except: # Not specifying an exception type here may be justified + return False return True diff --git a/lector/readers/read_fb2.py b/lector/readers/read_fb2.py new file mode 100644 index 0000000..e54a7ea --- /dev/null +++ b/lector/readers/read_fb2.py @@ -0,0 +1,82 @@ +# This file is a part of Lector, a Qt based ebook reader +# Copyright (C) 2017-2018 BasioMeusPuga + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import os +import base64 +import zipfile + +from bs4 import BeautifulSoup + + +class FB2: + def __init__(self, filename): + self.filename = filename + self.zip_file = None + self.book = {} + self.xml = None + + def read_fb2(self): + try: + if self.filename.endswith('.fb2.zip'): + this_book = zipfile.ZipFile(self.filename, mode='r', allowZip64=True) + for i in this_book.filelist: + if os.path.splitext(i.filename)[1] == '.fb2': + book_text = this_book.read(i.filename) + break + else: + with open(self.filename, 'r') as book_file: + book_text = book_file.read() + + self.xml = BeautifulSoup(book_text, 'lxml') + self.generate_book_metadata() + except: # Not specifying an exception type here may be justified + return False + + return True + + def generate_book_metadata(self): + self.book['title'] = os.path.splitext( + os.path.basename(self.filename))[0] + self.book['author'] = 'Unknown' + self.book['isbn'] = None + self.book['tags'] = None + self.book['cover'] = None + self.book['year'] = 9999 + self.book['book_list'] = [] + + # TODO + # Look for other components of book metadata here + for i in self.xml.find_all(): + + if i.name == 'section': + for j in i: + if j.name == 'title': + this_title = j.text + self.book['book_list'].append( + (this_title, str(i))) + + # Cover Image + cover_image_xml = self.xml.find('coverpage') + for i in cover_image_xml: + cover_image_name = i.get('l:href') + + cover_image_data = self.xml.find_all('binary') + for i in cover_image_data: + + # TODO + # Account for other images as well + if cover_image_name.endswith(i.get('id')): + self.book['cover'] = base64.decodebytes(i.text.encode()) diff --git a/lector/resources/translations/Lector_zh.ts b/lector/resources/translations/Lector_zh.ts index 91029c4..b85335c 100644 --- a/lector/resources/translations/Lector_zh.ts +++ b/lector/resources/translations/Lector_zh.ts @@ -406,7 +406,7 @@ Reopen book to see changes manually added - 手动添加 + 手动添加的 diff --git a/lector/sorter.py b/lector/sorter.py index e4fd72d..b016798 100644 --- a/lector/sorter.py +++ b/lector/sorter.py @@ -50,6 +50,7 @@ from lector import database from lector.parsers.epub import ParseEPUB from lector.parsers.mobi import ParseMOBI +from lector.parsers.fb2 import ParseFB2 from lector.parsers.comicbooks import ParseCOMIC sorter = { @@ -59,6 +60,8 @@ sorter = { 'azw3': ParseMOBI, 'azw4': ParseMOBI, 'prc': ParseMOBI, + 'fb2': ParseFB2, + 'fb2.zip': ParseFB2, 'cbz': ParseCOMIC, 'cbr': ParseCOMIC} @@ -172,7 +175,8 @@ class BookSorter: print(f'{os.path.basename(filename)} is already in database') return - file_extension = os.path.splitext(filename)[1][1:] + # Using os.extsep like so allows for file extensions with multiple dots + file_extension = os.path.basename(filename).split(os.extsep, 1)[1] try: # Get the requisite parser from the sorter dict book_ref = sorter[file_extension](filename, self.temp_dir, file_md5)