Speed up file addition
Improve fb2 parser Fix extension checking
This commit is contained in:
		
							
								
								
									
										5
									
								
								TODO
									
									
									
									
									
								
							
							
						
						
									
										5
									
								
								TODO
									
									
									
									
									
								
							| @@ -77,6 +77,8 @@ TODO | ||||
|         ✓ mobi, azw support | ||||
|             Limit the extra files produced by KindleUnpack | ||||
|                 Have them save to memory | ||||
|         ✓ fb2 support | ||||
|             Images need to show up in their placeholders | ||||
|     Other: | ||||
|         ✓ Define every widget in code | ||||
|     Bugs: | ||||
| @@ -84,7 +86,6 @@ TODO | ||||
|         Clean up 'switch' page layout | ||||
|         Colors aren't loaded properly for annotation previews | ||||
|         Cover page shouldn't be scolled midway | ||||
|         It's possible the addition function is also parsing the whole book. | ||||
|  | ||||
|     Secondary: | ||||
|         Graphical themes | ||||
| @@ -98,7 +99,7 @@ TODO | ||||
|         Use embedded fonts + CSS | ||||
|         Scrolling: Smooth / By Line | ||||
|         Shift to logging instead of print statements | ||||
|         txt, doc, chm, djvu, fb2 support | ||||
|         txt, doc, chm, djvu support | ||||
|         Include icons for filetype emblems | ||||
|         Comic view modes | ||||
|             Continuous paging | ||||
|   | ||||
| @@ -58,6 +58,7 @@ class ParseEPUB: | ||||
|     def get_contents(self): | ||||
|         zipfile.ZipFile(self.filename).extractall(self.extract_path) | ||||
|  | ||||
|         self.book_ref.parse_toc() | ||||
|         self.book_ref.parse_chapters(temp_dir=self.extract_path) | ||||
|         file_settings = { | ||||
|             'images_only': False} | ||||
|   | ||||
| @@ -15,7 +15,6 @@ | ||||
| # along with this program.  If not, see <http://www.gnu.org/licenses/>. | ||||
|  | ||||
| import os | ||||
| import zipfile | ||||
|  | ||||
| from lector.readers.read_fb2 import FB2 | ||||
|  | ||||
| @@ -56,12 +55,8 @@ class ParseFB2: | ||||
|         return self.book['tags'] | ||||
|  | ||||
|     def get_contents(self): | ||||
|         # TODO | ||||
|         # Make this save images to the temp path | ||||
|         # Relative file paths should then point there | ||||
|         # zipfile.ZipFile(self.filename).extractall(self.extract_path) | ||||
|  | ||||
|         # self.book_ref.parse_chapters(temp_dir=self.extract_path) | ||||
|         os.makedirs(self.extract_path, exist_ok=True)  # Manual creation is required here | ||||
|         self.book_ref.parse_chapters(temp_dir=self.extract_path) | ||||
|         file_settings = { | ||||
|             'images_only': False} | ||||
|         return self.book['book_list'], file_settings | ||||
|   | ||||
| @@ -40,7 +40,6 @@ class EPUB: | ||||
|                 return False  # No (valid) opf was found so processing cannot continue | ||||
|  | ||||
|             self.generate_book_metadata(contents_path) | ||||
|             self.parse_toc() | ||||
|         except:  # Not specifying an exception type here may be justified | ||||
|             return False | ||||
|  | ||||
|   | ||||
| @@ -48,25 +48,29 @@ class FB2: | ||||
|         return True | ||||
|  | ||||
|     def generate_book_metadata(self): | ||||
|         self.book['title'] = os.path.splitext( | ||||
|             os.path.basename(self.filename))[0] | ||||
|         self.book['author'] = 'Unknown' | ||||
|         self.book['isbn'] = None | ||||
|         self.book['tags'] = None | ||||
|         self.book['cover'] = None | ||||
|         self.book['year'] = 9999 | ||||
|         self.book['book_list'] = [] | ||||
|  | ||||
|         # TODO | ||||
|         # Look for other components of book metadata here | ||||
|         for i in self.xml.find_all(): | ||||
|         # All metadata can be parsed in one pass | ||||
|         all_tags = self.xml.find('description') | ||||
|  | ||||
|             if i.name == 'section': | ||||
|                 for j in i: | ||||
|                     if j.name == 'title': | ||||
|                         this_title = j.text | ||||
|                 self.book['book_list'].append( | ||||
|                     (this_title, str(i))) | ||||
|         self.book['title'] = all_tags.find('book-title').text | ||||
|         if self.book['title'] == '' or self.book['title'] is None: | ||||
|             self.book['title'] = os.path.splitext( | ||||
|                 os.path.basename(self.filename))[0] | ||||
|  | ||||
|         self.book['author'] = all_tags.find('author').getText(separator=' ').replace('\n', ' ') | ||||
|         if self.book['author'] == '' or self.book['author'] is None: | ||||
|             self.book['author'] = 'Unknown' | ||||
|  | ||||
|         # TODO | ||||
|         # Account for other date formats | ||||
|         try: | ||||
|             self.book['year'] = int(all_tags.find('date').text) | ||||
|         except ValueError: | ||||
|             self.book['year'] = 9999 | ||||
|  | ||||
|         # Cover Image | ||||
|         cover_image_xml = self.xml.find('coverpage') | ||||
| @@ -75,8 +79,26 @@ class FB2: | ||||
|  | ||||
|         cover_image_data = self.xml.find_all('binary') | ||||
|         for i in cover_image_data: | ||||
|  | ||||
|             # TODO | ||||
|             # Account for other images as well | ||||
|             if cover_image_name.endswith(i.get('id')): | ||||
|                 self.book['cover'] = base64.decodebytes(i.text.encode()) | ||||
|  | ||||
|     def parse_chapters(self, temp_dir): | ||||
|         # There's no need to parse the TOC separately because | ||||
|         # everything is linear | ||||
|         for i in self.xml.find_all('section'): | ||||
|             for j in i: | ||||
|                 if j.name == 'title': | ||||
|                     this_title = j.getText(separator=' ') | ||||
|             self.book['book_list'].append( | ||||
|                 (this_title, str(i))) | ||||
|  | ||||
|         # Extract all images to the temp_dir | ||||
|         for i in self.xml.find_all('binary'): | ||||
|             this_image_name = i.get('id') | ||||
|             this_image_path = os.path.join(temp_dir, this_image_name) | ||||
|             try: | ||||
|                 this_image_data = base64.decodebytes(i.text.encode()) | ||||
|                 with open(this_image_path, 'wb') as outimage: | ||||
|                     outimage.write(this_image_data) | ||||
|             except AttributeError: | ||||
|                 pass | ||||
|   | ||||
| @@ -175,15 +175,21 @@ class BookSorter: | ||||
|                     print(f'{os.path.basename(filename)} is already in database') | ||||
|                 return | ||||
|  | ||||
|         # Using os.extsep like so allows for file extensions with multiple dots | ||||
|         file_extension = os.path.basename(filename).split(os.extsep, 1)[1] | ||||
|         try: | ||||
|             # Get the requisite parser from the sorter dict | ||||
|             book_ref = sorter[file_extension](filename, self.temp_dir, file_md5) | ||||
|         except KeyError: | ||||
|         # This allows for eliminating issues with filenames that have | ||||
|         # a dot in them. All hail the roundabout fix. | ||||
|         valid_extension = False | ||||
|         for i in sorter: | ||||
|             if os.path.basename(filename).endswith(i): | ||||
|                 file_extension = i | ||||
|                 valid_extension = True | ||||
|                 break | ||||
|  | ||||
|         if not valid_extension: | ||||
|             print(filename + ' has an unsupported extension') | ||||
|             return | ||||
|  | ||||
|         book_ref = sorter[file_extension](filename, self.temp_dir, file_md5) | ||||
|  | ||||
|         # Everything following this is standard | ||||
|         # None values are accounted for here | ||||
|         book_ref.read_book() | ||||
|   | ||||
		Reference in New Issue
	
	Block a user