# This file is a part of Lector, a Qt based ebook reader # Copyright (C) 2017-2018 BasioMeusPuga # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program. If not, see . import os import base64 import zipfile from bs4 import BeautifulSoup class FB2: def __init__(self, filename): self.filename = filename self.zip_file = None self.book = {} self.xml = None def read_fb2(self): try: if self.filename.endswith('.fb2.zip'): this_book = zipfile.ZipFile(self.filename, mode='r', allowZip64=True) for i in this_book.filelist: if os.path.splitext(i.filename)[1] == '.fb2': book_text = this_book.read(i.filename) break else: with open(self.filename, 'r') as book_file: book_text = book_file.read() self.xml = BeautifulSoup(book_text, 'lxml') self.generate_book_metadata() except: # Not specifying an exception type here may be justified return False return True def generate_book_metadata(self): self.book['isbn'] = None self.book['tags'] = None self.book['cover'] = None self.book['book_list'] = [] # All metadata can be parsed in one pass all_tags = self.xml.find('description') self.book['title'] = all_tags.find('book-title').text if self.book['title'] == '' or self.book['title'] is None: self.book['title'] = os.path.splitext( os.path.basename(self.filename))[0] self.book['author'] = all_tags.find('author').getText(separator=' ').replace('\n', ' ') if self.book['author'] == '' or self.book['author'] is None: self.book['author'] = 'Unknown' # TODO # Account for other date formats try: self.book['year'] = int(all_tags.find('date').text) except ValueError: self.book['year'] = 9999 # Cover Image cover_image_xml = self.xml.find('coverpage') for i in cover_image_xml: cover_image_name = i.get('l:href') cover_image_data = self.xml.find_all('binary') for i in cover_image_data: if cover_image_name.endswith(i.get('id')): self.book['cover'] = base64.decodebytes(i.text.encode()) def parse_chapters(self, temp_dir): # There's no need to parse the TOC separately because # everything is linear for i in self.xml.find_all('section'): for j in i: if j.name == 'title': this_title = j.getText(separator=' ') self.book['book_list'].append( (this_title, str(i))) # Extract all images to the temp_dir for i in self.xml.find_all('binary'): this_image_name = i.get('id') this_image_path = os.path.join(temp_dir, this_image_name) try: this_image_data = base64.decodebytes(i.text.encode()) with open(this_image_path, 'wb') as outimage: outimage.write(this_image_data) except AttributeError: pass