Exception handling for improperly formatted fb2 books
This commit is contained in:
@@ -42,7 +42,7 @@ class FB2:
|
|||||||
|
|
||||||
self.xml = BeautifulSoup(book_text, 'lxml')
|
self.xml = BeautifulSoup(book_text, 'lxml')
|
||||||
self.generate_book_metadata()
|
self.generate_book_metadata()
|
||||||
except: # Not specifying an exception type here may be justified
|
except ValueError: # Not specifying an exception type here may be justified
|
||||||
return False
|
return False
|
||||||
|
|
||||||
return True
|
return True
|
||||||
@@ -50,7 +50,6 @@ class FB2:
|
|||||||
def generate_book_metadata(self):
|
def generate_book_metadata(self):
|
||||||
self.book['isbn'] = None
|
self.book['isbn'] = None
|
||||||
self.book['tags'] = None
|
self.book['tags'] = None
|
||||||
self.book['cover'] = None
|
|
||||||
self.book['book_list'] = []
|
self.book['book_list'] = []
|
||||||
|
|
||||||
# All metadata can be parsed in one pass
|
# All metadata can be parsed in one pass
|
||||||
@@ -73,6 +72,7 @@ class FB2:
|
|||||||
self.book['year'] = 9999
|
self.book['year'] = 9999
|
||||||
|
|
||||||
# Cover Image
|
# Cover Image
|
||||||
|
try:
|
||||||
cover_image_xml = self.xml.find('coverpage')
|
cover_image_xml = self.xml.find('coverpage')
|
||||||
for i in cover_image_xml:
|
for i in cover_image_xml:
|
||||||
cover_image_name = i.get('l:href')
|
cover_image_name = i.get('l:href')
|
||||||
@@ -81,11 +81,14 @@ class FB2:
|
|||||||
for i in cover_image_data:
|
for i in cover_image_data:
|
||||||
if cover_image_name.endswith(i.get('id')):
|
if cover_image_name.endswith(i.get('id')):
|
||||||
self.book['cover'] = base64.decodebytes(i.text.encode())
|
self.book['cover'] = base64.decodebytes(i.text.encode())
|
||||||
|
except AttributeError:
|
||||||
|
self.book['cover'] = None
|
||||||
|
|
||||||
def parse_chapters(self, temp_dir):
|
def parse_chapters(self, temp_dir):
|
||||||
# There's no need to parse the TOC separately because
|
# There's no need to parse the TOC separately because
|
||||||
# everything is linear
|
# everything is linear
|
||||||
for i in self.xml.find_all('section'):
|
for i in self.xml.find_all('section'):
|
||||||
|
this_title = '<No title>'
|
||||||
for j in i:
|
for j in i:
|
||||||
if j.name == 'title':
|
if j.name == 'title':
|
||||||
this_title = j.getText(separator=' ')
|
this_title = j.getText(separator=' ')
|
||||||
|
Reference in New Issue
Block a user