cbz parsing

This commit is contained in:
BasioMeusPuga
2017-11-12 09:37:39 +05:30
parent 7fbea194c0
commit 405ea3547c
6 changed files with 117 additions and 17 deletions

View File

@@ -52,6 +52,7 @@
import os import os
import sys import sys
import shutil
from PyQt5 import QtWidgets, QtGui, QtCore from PyQt5 import QtWidgets, QtGui, QtCore
@@ -164,7 +165,7 @@ class MainUI(QtWidgets.QMainWindow, mainwindow.Ui_MainWindow):
self.statusMessage.setText('Adding books...') self.statusMessage.setText('Adding books...')
my_file = QtWidgets.QFileDialog.getOpenFileNames( my_file = QtWidgets.QFileDialog.getOpenFileNames(
self, 'Open file', self.last_open_path, self, 'Open file', self.last_open_path,
"eBooks (*.epub *.mobi *.aws *.txt *.pdf *.fb2 *.djvu)") "eBooks (*.epub *.mobi *.aws *.txt *.pdf *.fb2 *.djvu *.cbz)")
if my_file[0]: if my_file[0]:
self.listView.setEnabled(False) self.listView.setEnabled(False)
self.last_open_path = os.path.dirname(my_file[0][0]) self.last_open_path = os.path.dirname(my_file[0][0])
@@ -295,10 +296,18 @@ class MainUI(QtWidgets.QMainWindow, mainwindow.Ui_MainWindow):
# print(tab_ref.book_metadata) # Metadata upon tab creation # print(tab_ref.book_metadata) # Metadata upon tab creation
def close_tab(self, tab_index): def close_tab(self, tab_index):
# print(self.tabWidget.widget(tab_index).metadata) # Metadata upon tab deletion temp_dir = self.tabWidget.widget(tab_index).metadata['temp_dir']
if temp_dir:
shutil.rmtree(temp_dir)
self.tabWidget.removeTab(tab_index) self.tabWidget.removeTab(tab_index)
def closeEvent(self, event=None): def closeEvent(self, event=None):
# All tabs must be iterated upon here
for i in range(1, self.tabWidget.count()):
tab_metadata = self.tabWidget.widget(i).metadata
if tab_metadata['temp_dir']:
shutil.rmtree(tab_metadata['temp_dir'])
Settings(self).save_settings() Settings(self).save_settings()
QtWidgets.qApp.exit() QtWidgets.qApp.exit()

View File

@@ -45,8 +45,6 @@ class DatabaseFunctions:
title = i[1]['title'] title = i[1]['title']
author = i[1]['author'] author = i[1]['author']
year = i[1]['year'] year = i[1]['year']
if not year:
year = 9999
path = i[1]['path'] path = i[1]['path']
cover = i[1]['cover_image'] cover = i[1]['cover_image']
isbn = i[1]['isbn'] isbn = i[1]['isbn']

62
parsers/cbz.py Normal file
View File

@@ -0,0 +1,62 @@
#!/usr/bin/env python3
import os
import time
import zipfile
import tempfile
import collections
class ParseCBZ:
def __init__(self, filename):
# TODO
# Maybe also include book description
self.filename = filename
self.book = None
def read_book(self):
try:
self.book = zipfile.ZipFile(self.filename, mode='r', allowZip64=True)
except (KeyError, AttributeError, FileNotFoundError, zipfile.BadZipFile):
print('Cannot parse ' + self.filename)
return
def get_title(self):
filename = os.path.basename(self.book.filename)
filename_proper = os.path.splitext(filename)[0]
return filename_proper
def get_author(self):
return None
def get_year(self):
creation_time = time.ctime(os.path.getctime(self.filename))
creation_year = creation_time.split()[-1]
return creation_year
def get_cover_image(self):
cover_image_info = self.book.infolist()[0]
cover_image = self.book.read(cover_image_info)
return cover_image
def get_isbn(self):
return None
def get_contents(self):
contents = collections.OrderedDict()
# This is a brute force approach
# Maybe try reading from the file as everything
# matures a little bit more
tmp_dir = tempfile.mkdtemp()
contents = collections.OrderedDict()
for count, i in enumerate(self.book.infolist()):
self.book.extract(i, path=tmp_dir)
page_name = 'Page ' + str(count + 1)
image_path = os.path.join(tmp_dir, i.filename)
# This does image returns.
# TODO
# Image resizing, formatting
# Cleanup after exit
contents[page_name] = "<img src='%s'/>" % image_path
return contents, tmp_dir

View File

@@ -5,8 +5,8 @@
# get_author() # get_author()
# get_year() # get_year()
# get_cover_image() # get_cover_image()
# get_isbn # get_isbn()
# TODO More for get contents, get TOC # get_contents() - Should return a tuple with 0: TOC 1: Deletable temp_directory
import os import os
import re import re
@@ -89,7 +89,7 @@ class ParseEPUB:
return image_content return image_content
except KeyError: except KeyError:
return return None
def get_isbn(self): def get_isbn(self):
try: try:
@@ -100,7 +100,7 @@ class ParseEPUB:
isbn = i[0] isbn = i[0]
return isbn return isbn
except KeyError: except KeyError:
return return None
def get_contents(self): def get_contents(self):
contents = collections.OrderedDict() contents = collections.OrderedDict()
@@ -137,4 +137,6 @@ class ParseEPUB:
raise AttributeError raise AttributeError
except AttributeError: except AttributeError:
contents[title] = '' contents[title] = ''
return contents
# The 1th index is a directory that has to be cleaned up if needed
return contents, None

View File

@@ -1,7 +1,6 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# TODO # TODO
# Methods that return None must be quantified within the parsing module
# See if tags can be generated from book content # See if tags can be generated from book content
# See if you want to include a hash of the book's name and author # See if you want to include a hash of the book's name and author
@@ -10,7 +9,17 @@ import hashlib
from multiprocessing.dummy import Pool from multiprocessing.dummy import Pool
import database import database
# Every parser is supposed to have the following methods, even if they return None:
# read_book()
# get_title()
# get_author()
# get_year()
# get_cover_image()
# get_isbn()
# get_contents() - Should return a tuple with 0: TOC 1: Deletable temp_directory
from parsers.epub import ParseEPUB from parsers.epub import ParseEPUB
from parsers.cbz import ParseCBZ
class BookSorter: class BookSorter:
@@ -65,27 +74,36 @@ class BookSorter:
and (file_md5 in self.all_books.items() or file_md5 in self.hashes)): and (file_md5 in self.all_books.items() or file_md5 in self.hashes)):
return return
# Select sorter by file extension # SORTING TAKES PLACE HERE
try: try:
file_extension = os.path.splitext(filename)[1][1:] file_extension = os.path.splitext(filename)[1][1:]
if file_extension == 'epub': if file_extension == 'epub':
book_ref = ParseEPUB(filename) book_ref = ParseEPUB(filename)
if file_extension == 'cbz':
book_ref = ParseCBZ(filename)
except IndexError: except IndexError:
return return
# Everything following this is standard # Everything following this is standard
# Some of the None returns will have to have # None values are accounted for here
# values associated with them, though
book_ref.read_book() book_ref.read_book()
if book_ref.book: if book_ref.book:
title = book_ref.get_title() title = book_ref.get_title().title()
author = book_ref.get_author() author = book_ref.get_author()
if not author:
author = 'Unknown'
year = book_ref.get_year() year = book_ref.get_year()
if not year:
year = 9999
isbn = book_ref.get_isbn() isbn = book_ref.get_isbn()
# Different modes require different values # Different modes require different values
if self.mode == 'addition': if self.mode == 'addition':
cover_image = book_ref.get_cover_image() cover_image = book_ref.get_cover_image()
# TODO
if not cover_image:
pass
self.all_books[file_md5] = { self.all_books[file_md5] = {
'title': title, 'title': title,
'author': author, 'author': author,
@@ -95,7 +113,13 @@ class BookSorter:
'cover_image': cover_image} 'cover_image': cover_image}
if self.mode == 'reading': if self.mode == 'reading':
content = book_ref.get_contents() all_content = book_ref.get_contents()
content = all_content[0]
temp_dir = all_content[1]
if not content.keys():
content['Invalid'] = 'Possible Parse Error'
position = self.database_position(file_md5) position = self.database_position(file_md5)
self.all_books = { self.all_books = {
'title': title, 'title': title,
@@ -105,7 +129,8 @@ class BookSorter:
'hash': file_md5, 'hash': file_md5,
'path': filename, 'path': filename,
'position': position, 'position': position,
'content': content} 'content': content,
'temp_dir': temp_dir}
def initiate_threads(self): def initiate_threads(self):

View File

@@ -12,8 +12,10 @@ class BookToolBar(QtWidgets.QToolBar):
QtWidgets.QSizePolicy.Expanding, QtWidgets.QSizePolicy.Expanding) QtWidgets.QSizePolicy.Expanding, QtWidgets.QSizePolicy.Expanding)
# Size policy # Size policy
# TODO
# Prevent resizing
sizePolicy = QtWidgets.QSizePolicy( sizePolicy = QtWidgets.QSizePolicy(
QtWidgets.QSizePolicy.Expanding, QtWidgets.QSizePolicy.Fixed) QtWidgets.QSizePolicy.Minimum, QtWidgets.QSizePolicy.Fixed)
self.setMovable(False) self.setMovable(False)
self.setIconSize(QtCore.QSize(22, 22)) self.setIconSize(QtCore.QSize(22, 22))
@@ -218,6 +220,8 @@ class Tab(QtWidgets.QWidget):
# TODO # TODO
# A horizontal slider to control flow # A horizontal slider to control flow
# Keyboard shortcuts # Keyboard shortcuts
# Take hint from a position function argument to open the book
# at a specific page
# The content display widget is currently a QTextBrowser # The content display widget is currently a QTextBrowser
super(Tab, self).__init__(parent) super(Tab, self).__init__(parent)