diff --git a/ePub/read_epub.py b/ePub/read_epub.py index 710d82a..be8bc34 100644 --- a/ePub/read_epub.py +++ b/ePub/read_epub.py @@ -28,6 +28,7 @@ class EPUB: self.filename = filename self.zip_file = None self.book = {} + self.book['split_chapters'] = {} def read_epub(self): # This is the function that should error out in @@ -215,26 +216,32 @@ class EPUB: for i in navpoints: chapter_title = i.find('text').text chapter_source = i.find('content').get('src') - chapter_source = unquote(chapter_source.split('#')[0]) - self.book['navpoint_dict'][chapter_source] = chapter_title + chapter_source_file = unquote(chapter_source.split('#')[0]) + + if '#' in chapter_source: + try: + self.book['split_chapters'][chapter_source_file].append( + (chapter_source.split('#')[1], chapter_title)) + except KeyError: + self.book['split_chapters'][chapter_source_file] = [] + self.book['split_chapters'][chapter_source_file].append( + (chapter_source.split('#')[1], chapter_title)) + + self.book['navpoint_dict'][chapter_source_file] = chapter_title def parse_chapters(self, temp_dir=None, split_large_xml=False): no_title_chapter = 0 self.book['book_list'] = [] + for i in self.book['chapters_in_order']: chapter_data = self.read_from_zip(i).decode() - if not split_large_xml: - try: - self.book['book_list'].append( - (self.book['navpoint_dict'][i], chapter_data)) - except KeyError: - fallback_title = str(no_title_chapter) - self.book['book_list'].append( - (fallback_title, chapter_data)) - no_title_chapter += 1 + if i in self.book['split_chapters']: + split_chapters = get_split_content( + chapter_data, self.book['split_chapters'][i]) + self.book['book_list'].extend(split_chapters) - else: + elif split_large_xml: # https://stackoverflow.com/questions/14444732/how-to-split-a-html-page-to-multiple-pages-using-python-and-beautiful-soup markup = BeautifulSoup(chapter_data, 'xml') chapters = [] @@ -259,6 +266,15 @@ class EPUB: self.book['book_list'].append( (fallback_title, this_chapter)) no_title_chapter += 1 + else: + try: + self.book['book_list'].append( + (self.book['navpoint_dict'][i], chapter_data)) + except KeyError: + fallback_title = str(no_title_chapter) + self.book['book_list'].append( + (fallback_title, chapter_data)) + no_title_chapter += 1 cover_path = os.path.join(temp_dir, os.path.basename(self.filename)) + '- cover' if self.book['cover']: @@ -267,3 +283,35 @@ class EPUB: self.book['book_list'][0] = ( 'Cover', f'
Cover
') + + +def get_split_content(chapter_data, split_by): + split_anchors = [i[0] for i in split_by] + chapter_titles = [i[1] for i in split_by] + return_list = [] + + xml = BeautifulSoup(chapter_data, 'lxml') + xml_string = xml.body.prettify() + + for count, i in enumerate(split_anchors): + this_split = xml_string.split(i) + current_chapter = this_split[0] + + bs_obj = BeautifulSoup(current_chapter, 'lxml') + # Since tags correspond to data following them, the first + # chunk will be ignored + # As will all empty chapters + if bs_obj.text == '\n' or bs_obj.text == '' or count == 0: + continue + bs_obj_string = str(bs_obj).replace('">', '', 1) + + return_list.append( + (chapter_titles[count - 1], bs_obj_string)) + xml_string = this_split[1] + + bs_obj = BeautifulSoup(xml_string, 'lxml') + bs_obj_string = str(bs_obj).replace('">', '', 1) + return_list.append( + (chapter_titles[-1], bs_obj_string)) + + return return_list diff --git a/lector/__main__.py b/lector/__main__.py index af58682..497c478 100755 --- a/lector/__main__.py +++ b/lector/__main__.py @@ -189,7 +189,6 @@ class MainUI(QtWidgets.QMainWindow, mainwindow.Ui_MainWindow): # The library refresh button on the Library tab self.reloadLibrary.setIcon(self.QImageFactory.get_image('reload')) - self.reloadLibrary.setIconSize(QtCore.QSize(22, 22)) self.reloadLibrary.setObjectName('reloadLibrary') self.reloadLibrary.setToolTip('Scan library') self.reloadLibrary.setAutoRaise(True)