cbz parsing
This commit is contained in:
		
							
								
								
									
										13
									
								
								__main__.py
									
									
									
									
									
								
							
							
						
						
									
										13
									
								
								__main__.py
									
									
									
									
									
								
							| @@ -52,6 +52,7 @@ | |||||||
|  |  | ||||||
| import os | import os | ||||||
| import sys | import sys | ||||||
|  | import shutil | ||||||
|  |  | ||||||
| from PyQt5 import QtWidgets, QtGui, QtCore | from PyQt5 import QtWidgets, QtGui, QtCore | ||||||
|  |  | ||||||
| @@ -164,7 +165,7 @@ class MainUI(QtWidgets.QMainWindow, mainwindow.Ui_MainWindow): | |||||||
|         self.statusMessage.setText('Adding books...') |         self.statusMessage.setText('Adding books...') | ||||||
|         my_file = QtWidgets.QFileDialog.getOpenFileNames( |         my_file = QtWidgets.QFileDialog.getOpenFileNames( | ||||||
|             self, 'Open file', self.last_open_path, |             self, 'Open file', self.last_open_path, | ||||||
|             "eBooks (*.epub *.mobi *.aws *.txt *.pdf *.fb2 *.djvu)") |             "eBooks (*.epub *.mobi *.aws *.txt *.pdf *.fb2 *.djvu *.cbz)") | ||||||
|         if my_file[0]: |         if my_file[0]: | ||||||
|             self.listView.setEnabled(False) |             self.listView.setEnabled(False) | ||||||
|             self.last_open_path = os.path.dirname(my_file[0][0]) |             self.last_open_path = os.path.dirname(my_file[0][0]) | ||||||
| @@ -295,10 +296,18 @@ class MainUI(QtWidgets.QMainWindow, mainwindow.Ui_MainWindow): | |||||||
|         # print(tab_ref.book_metadata)  # Metadata upon tab creation |         # print(tab_ref.book_metadata)  # Metadata upon tab creation | ||||||
|  |  | ||||||
|     def close_tab(self, tab_index): |     def close_tab(self, tab_index): | ||||||
|         # print(self.tabWidget.widget(tab_index).metadata)  # Metadata upon tab deletion |         temp_dir = self.tabWidget.widget(tab_index).metadata['temp_dir'] | ||||||
|  |         if temp_dir: | ||||||
|  |             shutil.rmtree(temp_dir) | ||||||
|         self.tabWidget.removeTab(tab_index) |         self.tabWidget.removeTab(tab_index) | ||||||
|  |  | ||||||
|     def closeEvent(self, event=None): |     def closeEvent(self, event=None): | ||||||
|  |         # All tabs must be iterated upon here | ||||||
|  |         for i in range(1, self.tabWidget.count()): | ||||||
|  |             tab_metadata = self.tabWidget.widget(i).metadata | ||||||
|  |             if tab_metadata['temp_dir']: | ||||||
|  |                 shutil.rmtree(tab_metadata['temp_dir']) | ||||||
|  |  | ||||||
|         Settings(self).save_settings() |         Settings(self).save_settings() | ||||||
|         QtWidgets.qApp.exit() |         QtWidgets.qApp.exit() | ||||||
|  |  | ||||||
|   | |||||||
| @@ -45,8 +45,6 @@ class DatabaseFunctions: | |||||||
|             title = i[1]['title'] |             title = i[1]['title'] | ||||||
|             author = i[1]['author'] |             author = i[1]['author'] | ||||||
|             year = i[1]['year'] |             year = i[1]['year'] | ||||||
|             if not year: |  | ||||||
|                 year = 9999 |  | ||||||
|             path = i[1]['path'] |             path = i[1]['path'] | ||||||
|             cover = i[1]['cover_image'] |             cover = i[1]['cover_image'] | ||||||
|             isbn = i[1]['isbn'] |             isbn = i[1]['isbn'] | ||||||
|   | |||||||
							
								
								
									
										62
									
								
								parsers/cbz.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										62
									
								
								parsers/cbz.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,62 @@ | |||||||
|  | #!/usr/bin/env python3 | ||||||
|  |  | ||||||
|  | import os | ||||||
|  | import time | ||||||
|  | import zipfile | ||||||
|  | import tempfile | ||||||
|  | import collections | ||||||
|  |  | ||||||
|  |  | ||||||
|  | class ParseCBZ: | ||||||
|  |     def __init__(self, filename): | ||||||
|  |         # TODO | ||||||
|  |         # Maybe also include book description | ||||||
|  |         self.filename = filename | ||||||
|  |         self.book = None | ||||||
|  |  | ||||||
|  |     def read_book(self): | ||||||
|  |         try: | ||||||
|  |             self.book = zipfile.ZipFile(self.filename, mode='r', allowZip64=True) | ||||||
|  |         except (KeyError, AttributeError, FileNotFoundError, zipfile.BadZipFile): | ||||||
|  |             print('Cannot parse ' + self.filename) | ||||||
|  |             return | ||||||
|  |  | ||||||
|  |     def get_title(self): | ||||||
|  |         filename = os.path.basename(self.book.filename) | ||||||
|  |         filename_proper = os.path.splitext(filename)[0] | ||||||
|  |         return filename_proper | ||||||
|  |  | ||||||
|  |     def get_author(self): | ||||||
|  |         return None | ||||||
|  |  | ||||||
|  |     def get_year(self): | ||||||
|  |         creation_time = time.ctime(os.path.getctime(self.filename)) | ||||||
|  |         creation_year = creation_time.split()[-1] | ||||||
|  |         return creation_year | ||||||
|  |  | ||||||
|  |     def get_cover_image(self): | ||||||
|  |         cover_image_info = self.book.infolist()[0] | ||||||
|  |         cover_image = self.book.read(cover_image_info) | ||||||
|  |         return cover_image | ||||||
|  |  | ||||||
|  |     def get_isbn(self): | ||||||
|  |         return None | ||||||
|  |  | ||||||
|  |     def get_contents(self): | ||||||
|  |         contents = collections.OrderedDict() | ||||||
|  |         # This is a brute force approach | ||||||
|  |         # Maybe try reading from the file as everything | ||||||
|  |         # matures a little bit more | ||||||
|  |         tmp_dir = tempfile.mkdtemp() | ||||||
|  |  | ||||||
|  |         contents = collections.OrderedDict() | ||||||
|  |         for count, i in enumerate(self.book.infolist()): | ||||||
|  |             self.book.extract(i, path=tmp_dir) | ||||||
|  |             page_name = 'Page ' + str(count + 1) | ||||||
|  |             image_path = os.path.join(tmp_dir, i.filename) | ||||||
|  |             # This does image returns. | ||||||
|  |             # TODO | ||||||
|  |             # Image resizing, formatting | ||||||
|  |             # Cleanup after exit | ||||||
|  |             contents[page_name] = "<img src='%s'/>" % image_path | ||||||
|  |         return contents, tmp_dir | ||||||
| @@ -5,8 +5,8 @@ | |||||||
| # get_author() | # get_author() | ||||||
| # get_year() | # get_year() | ||||||
| # get_cover_image() | # get_cover_image() | ||||||
| # get_isbn | # get_isbn() | ||||||
| # TODO More for get contents, get TOC | # get_contents() - Should return a tuple with 0: TOC 1: Deletable temp_directory | ||||||
|  |  | ||||||
| import os | import os | ||||||
| import re | import re | ||||||
| @@ -89,7 +89,7 @@ class ParseEPUB: | |||||||
|             return image_content |             return image_content | ||||||
|  |  | ||||||
|         except KeyError: |         except KeyError: | ||||||
|             return |             return None | ||||||
|  |  | ||||||
|     def get_isbn(self): |     def get_isbn(self): | ||||||
|         try: |         try: | ||||||
| @@ -100,7 +100,7 @@ class ParseEPUB: | |||||||
|                     isbn = i[0] |                     isbn = i[0] | ||||||
|                     return isbn |                     return isbn | ||||||
|         except KeyError: |         except KeyError: | ||||||
|             return |             return None | ||||||
|  |  | ||||||
|     def get_contents(self): |     def get_contents(self): | ||||||
|         contents = collections.OrderedDict() |         contents = collections.OrderedDict() | ||||||
| @@ -137,4 +137,6 @@ class ParseEPUB: | |||||||
|                         raise AttributeError |                         raise AttributeError | ||||||
|                 except AttributeError: |                 except AttributeError: | ||||||
|                     contents[title] = '' |                     contents[title] = '' | ||||||
|         return contents |  | ||||||
|  |         # The 1th index is a directory that has to be cleaned up if needed | ||||||
|  |         return contents, None | ||||||
|   | |||||||
							
								
								
									
										39
									
								
								sorter.py
									
									
									
									
									
								
							
							
						
						
									
										39
									
								
								sorter.py
									
									
									
									
									
								
							| @@ -1,7 +1,6 @@ | |||||||
| #!/usr/bin/env python3 | #!/usr/bin/env python3 | ||||||
|  |  | ||||||
| # TODO | # TODO | ||||||
| # Methods that return None must be quantified within the parsing module |  | ||||||
| # See if tags can be generated from book content | # See if tags can be generated from book content | ||||||
| # See if you want to include a hash of the book's name and author | # See if you want to include a hash of the book's name and author | ||||||
|  |  | ||||||
| @@ -10,7 +9,17 @@ import hashlib | |||||||
| from multiprocessing.dummy import Pool | from multiprocessing.dummy import Pool | ||||||
|  |  | ||||||
| import database | import database | ||||||
|  |  | ||||||
|  | # Every parser is supposed to have the following methods, even if they return None: | ||||||
|  | # read_book() | ||||||
|  | # get_title() | ||||||
|  | # get_author() | ||||||
|  | # get_year() | ||||||
|  | # get_cover_image() | ||||||
|  | # get_isbn() | ||||||
|  | # get_contents() - Should return a tuple with 0: TOC 1: Deletable temp_directory | ||||||
| from parsers.epub import ParseEPUB | from parsers.epub import ParseEPUB | ||||||
|  | from parsers.cbz import ParseCBZ | ||||||
|  |  | ||||||
|  |  | ||||||
| class BookSorter: | class BookSorter: | ||||||
| @@ -65,27 +74,36 @@ class BookSorter: | |||||||
|                 and (file_md5 in self.all_books.items() or file_md5 in self.hashes)): |                 and (file_md5 in self.all_books.items() or file_md5 in self.hashes)): | ||||||
|             return |             return | ||||||
|  |  | ||||||
|         # Select sorter by file extension |         # SORTING TAKES PLACE HERE | ||||||
|         try: |         try: | ||||||
|             file_extension = os.path.splitext(filename)[1][1:] |             file_extension = os.path.splitext(filename)[1][1:] | ||||||
|             if file_extension == 'epub': |             if file_extension == 'epub': | ||||||
|                 book_ref = ParseEPUB(filename) |                 book_ref = ParseEPUB(filename) | ||||||
|  |             if file_extension == 'cbz': | ||||||
|  |                 book_ref = ParseCBZ(filename) | ||||||
|         except IndexError: |         except IndexError: | ||||||
|             return |             return | ||||||
|  |  | ||||||
|         # Everything following this is standard |         # Everything following this is standard | ||||||
|         # Some of the None returns will have to have |         # None values are accounted for here | ||||||
|         # values associated with them, though |  | ||||||
|         book_ref.read_book() |         book_ref.read_book() | ||||||
|         if book_ref.book: |         if book_ref.book: | ||||||
|             title = book_ref.get_title() |             title = book_ref.get_title().title() | ||||||
|             author = book_ref.get_author() |             author = book_ref.get_author() | ||||||
|  |             if not author: | ||||||
|  |                 author = 'Unknown' | ||||||
|             year = book_ref.get_year() |             year = book_ref.get_year() | ||||||
|  |             if not year: | ||||||
|  |                 year = 9999 | ||||||
|             isbn = book_ref.get_isbn() |             isbn = book_ref.get_isbn() | ||||||
|  |  | ||||||
|             # Different modes require different values |             # Different modes require different values | ||||||
|             if self.mode == 'addition': |             if self.mode == 'addition': | ||||||
|                 cover_image = book_ref.get_cover_image() |                 cover_image = book_ref.get_cover_image() | ||||||
|  |                 # TODO | ||||||
|  |                 if not cover_image: | ||||||
|  |                     pass | ||||||
|  |  | ||||||
|                 self.all_books[file_md5] = { |                 self.all_books[file_md5] = { | ||||||
|                     'title': title, |                     'title': title, | ||||||
|                     'author': author, |                     'author': author, | ||||||
| @@ -95,7 +113,13 @@ class BookSorter: | |||||||
|                     'cover_image': cover_image} |                     'cover_image': cover_image} | ||||||
|  |  | ||||||
|             if self.mode == 'reading': |             if self.mode == 'reading': | ||||||
|                 content = book_ref.get_contents() |                 all_content = book_ref.get_contents() | ||||||
|  |                 content = all_content[0] | ||||||
|  |                 temp_dir = all_content[1] | ||||||
|  |  | ||||||
|  |                 if not content.keys(): | ||||||
|  |                     content['Invalid'] = 'Possible Parse Error' | ||||||
|  |  | ||||||
|                 position = self.database_position(file_md5) |                 position = self.database_position(file_md5) | ||||||
|                 self.all_books = { |                 self.all_books = { | ||||||
|                     'title': title, |                     'title': title, | ||||||
| @@ -105,7 +129,8 @@ class BookSorter: | |||||||
|                     'hash': file_md5, |                     'hash': file_md5, | ||||||
|                     'path': filename, |                     'path': filename, | ||||||
|                     'position': position, |                     'position': position, | ||||||
|                     'content': content} |                     'content': content, | ||||||
|  |                     'temp_dir': temp_dir} | ||||||
|  |  | ||||||
|  |  | ||||||
|     def initiate_threads(self): |     def initiate_threads(self): | ||||||
|   | |||||||
| @@ -12,8 +12,10 @@ class BookToolBar(QtWidgets.QToolBar): | |||||||
|             QtWidgets.QSizePolicy.Expanding, QtWidgets.QSizePolicy.Expanding) |             QtWidgets.QSizePolicy.Expanding, QtWidgets.QSizePolicy.Expanding) | ||||||
|  |  | ||||||
|         # Size policy |         # Size policy | ||||||
|  |         # TODO | ||||||
|  |         # Prevent resizing | ||||||
|         sizePolicy = QtWidgets.QSizePolicy( |         sizePolicy = QtWidgets.QSizePolicy( | ||||||
|             QtWidgets.QSizePolicy.Expanding, QtWidgets.QSizePolicy.Fixed) |             QtWidgets.QSizePolicy.Minimum, QtWidgets.QSizePolicy.Fixed) | ||||||
|  |  | ||||||
|         self.setMovable(False) |         self.setMovable(False) | ||||||
|         self.setIconSize(QtCore.QSize(22, 22)) |         self.setIconSize(QtCore.QSize(22, 22)) | ||||||
| @@ -218,6 +220,8 @@ class Tab(QtWidgets.QWidget): | |||||||
|         # TODO |         # TODO | ||||||
|         # A horizontal slider to control flow |         # A horizontal slider to control flow | ||||||
|         # Keyboard shortcuts |         # Keyboard shortcuts | ||||||
|  |         # Take hint from a position function argument to open the book | ||||||
|  |         # at a specific page | ||||||
|  |  | ||||||
|         # The content display widget is currently a QTextBrowser |         # The content display widget is currently a QTextBrowser | ||||||
|         super(Tab, self).__init__(parent) |         super(Tab, self).__init__(parent) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user