Exception handling for improperly formatted fb2 books

This commit is contained in:
BasioMeusPuga
2018-06-17 10:40:37 -04:00
parent aa093b8cc2
commit 8cb8904e58

View File

@@ -42,7 +42,7 @@ class FB2:
self.xml = BeautifulSoup(book_text, 'lxml') self.xml = BeautifulSoup(book_text, 'lxml')
self.generate_book_metadata() self.generate_book_metadata()
except: # Not specifying an exception type here may be justified except ValueError: # Not specifying an exception type here may be justified
return False return False
return True return True
@@ -50,7 +50,6 @@ class FB2:
def generate_book_metadata(self): def generate_book_metadata(self):
self.book['isbn'] = None self.book['isbn'] = None
self.book['tags'] = None self.book['tags'] = None
self.book['cover'] = None
self.book['book_list'] = [] self.book['book_list'] = []
# All metadata can be parsed in one pass # All metadata can be parsed in one pass
@@ -73,6 +72,7 @@ class FB2:
self.book['year'] = 9999 self.book['year'] = 9999
# Cover Image # Cover Image
try:
cover_image_xml = self.xml.find('coverpage') cover_image_xml = self.xml.find('coverpage')
for i in cover_image_xml: for i in cover_image_xml:
cover_image_name = i.get('l:href') cover_image_name = i.get('l:href')
@@ -81,11 +81,14 @@ class FB2:
for i in cover_image_data: for i in cover_image_data:
if cover_image_name.endswith(i.get('id')): if cover_image_name.endswith(i.get('id')):
self.book['cover'] = base64.decodebytes(i.text.encode()) self.book['cover'] = base64.decodebytes(i.text.encode())
except AttributeError:
self.book['cover'] = None
def parse_chapters(self, temp_dir): def parse_chapters(self, temp_dir):
# There's no need to parse the TOC separately because # There's no need to parse the TOC separately because
# everything is linear # everything is linear
for i in self.xml.find_all('section'): for i in self.xml.find_all('section'):
this_title = '<No title>'
for j in i: for j in i:
if j.name == 'title': if j.name == 'title':
this_title = j.getText(separator=' ') this_title = j.getText(separator=' ')