cbz parsing
This commit is contained in:
13
__main__.py
13
__main__.py
@@ -52,6 +52,7 @@
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
import shutil
|
||||||
|
|
||||||
from PyQt5 import QtWidgets, QtGui, QtCore
|
from PyQt5 import QtWidgets, QtGui, QtCore
|
||||||
|
|
||||||
@@ -164,7 +165,7 @@ class MainUI(QtWidgets.QMainWindow, mainwindow.Ui_MainWindow):
|
|||||||
self.statusMessage.setText('Adding books...')
|
self.statusMessage.setText('Adding books...')
|
||||||
my_file = QtWidgets.QFileDialog.getOpenFileNames(
|
my_file = QtWidgets.QFileDialog.getOpenFileNames(
|
||||||
self, 'Open file', self.last_open_path,
|
self, 'Open file', self.last_open_path,
|
||||||
"eBooks (*.epub *.mobi *.aws *.txt *.pdf *.fb2 *.djvu)")
|
"eBooks (*.epub *.mobi *.aws *.txt *.pdf *.fb2 *.djvu *.cbz)")
|
||||||
if my_file[0]:
|
if my_file[0]:
|
||||||
self.listView.setEnabled(False)
|
self.listView.setEnabled(False)
|
||||||
self.last_open_path = os.path.dirname(my_file[0][0])
|
self.last_open_path = os.path.dirname(my_file[0][0])
|
||||||
@@ -295,10 +296,18 @@ class MainUI(QtWidgets.QMainWindow, mainwindow.Ui_MainWindow):
|
|||||||
# print(tab_ref.book_metadata) # Metadata upon tab creation
|
# print(tab_ref.book_metadata) # Metadata upon tab creation
|
||||||
|
|
||||||
def close_tab(self, tab_index):
|
def close_tab(self, tab_index):
|
||||||
# print(self.tabWidget.widget(tab_index).metadata) # Metadata upon tab deletion
|
temp_dir = self.tabWidget.widget(tab_index).metadata['temp_dir']
|
||||||
|
if temp_dir:
|
||||||
|
shutil.rmtree(temp_dir)
|
||||||
self.tabWidget.removeTab(tab_index)
|
self.tabWidget.removeTab(tab_index)
|
||||||
|
|
||||||
def closeEvent(self, event=None):
|
def closeEvent(self, event=None):
|
||||||
|
# All tabs must be iterated upon here
|
||||||
|
for i in range(1, self.tabWidget.count()):
|
||||||
|
tab_metadata = self.tabWidget.widget(i).metadata
|
||||||
|
if tab_metadata['temp_dir']:
|
||||||
|
shutil.rmtree(tab_metadata['temp_dir'])
|
||||||
|
|
||||||
Settings(self).save_settings()
|
Settings(self).save_settings()
|
||||||
QtWidgets.qApp.exit()
|
QtWidgets.qApp.exit()
|
||||||
|
|
||||||
|
@@ -45,8 +45,6 @@ class DatabaseFunctions:
|
|||||||
title = i[1]['title']
|
title = i[1]['title']
|
||||||
author = i[1]['author']
|
author = i[1]['author']
|
||||||
year = i[1]['year']
|
year = i[1]['year']
|
||||||
if not year:
|
|
||||||
year = 9999
|
|
||||||
path = i[1]['path']
|
path = i[1]['path']
|
||||||
cover = i[1]['cover_image']
|
cover = i[1]['cover_image']
|
||||||
isbn = i[1]['isbn']
|
isbn = i[1]['isbn']
|
||||||
|
62
parsers/cbz.py
Normal file
62
parsers/cbz.py
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
import zipfile
|
||||||
|
import tempfile
|
||||||
|
import collections
|
||||||
|
|
||||||
|
|
||||||
|
class ParseCBZ:
|
||||||
|
def __init__(self, filename):
|
||||||
|
# TODO
|
||||||
|
# Maybe also include book description
|
||||||
|
self.filename = filename
|
||||||
|
self.book = None
|
||||||
|
|
||||||
|
def read_book(self):
|
||||||
|
try:
|
||||||
|
self.book = zipfile.ZipFile(self.filename, mode='r', allowZip64=True)
|
||||||
|
except (KeyError, AttributeError, FileNotFoundError, zipfile.BadZipFile):
|
||||||
|
print('Cannot parse ' + self.filename)
|
||||||
|
return
|
||||||
|
|
||||||
|
def get_title(self):
|
||||||
|
filename = os.path.basename(self.book.filename)
|
||||||
|
filename_proper = os.path.splitext(filename)[0]
|
||||||
|
return filename_proper
|
||||||
|
|
||||||
|
def get_author(self):
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_year(self):
|
||||||
|
creation_time = time.ctime(os.path.getctime(self.filename))
|
||||||
|
creation_year = creation_time.split()[-1]
|
||||||
|
return creation_year
|
||||||
|
|
||||||
|
def get_cover_image(self):
|
||||||
|
cover_image_info = self.book.infolist()[0]
|
||||||
|
cover_image = self.book.read(cover_image_info)
|
||||||
|
return cover_image
|
||||||
|
|
||||||
|
def get_isbn(self):
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_contents(self):
|
||||||
|
contents = collections.OrderedDict()
|
||||||
|
# This is a brute force approach
|
||||||
|
# Maybe try reading from the file as everything
|
||||||
|
# matures a little bit more
|
||||||
|
tmp_dir = tempfile.mkdtemp()
|
||||||
|
|
||||||
|
contents = collections.OrderedDict()
|
||||||
|
for count, i in enumerate(self.book.infolist()):
|
||||||
|
self.book.extract(i, path=tmp_dir)
|
||||||
|
page_name = 'Page ' + str(count + 1)
|
||||||
|
image_path = os.path.join(tmp_dir, i.filename)
|
||||||
|
# This does image returns.
|
||||||
|
# TODO
|
||||||
|
# Image resizing, formatting
|
||||||
|
# Cleanup after exit
|
||||||
|
contents[page_name] = "<img src='%s'/>" % image_path
|
||||||
|
return contents, tmp_dir
|
@@ -5,8 +5,8 @@
|
|||||||
# get_author()
|
# get_author()
|
||||||
# get_year()
|
# get_year()
|
||||||
# get_cover_image()
|
# get_cover_image()
|
||||||
# get_isbn
|
# get_isbn()
|
||||||
# TODO More for get contents, get TOC
|
# get_contents() - Should return a tuple with 0: TOC 1: Deletable temp_directory
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
@@ -89,7 +89,7 @@ class ParseEPUB:
|
|||||||
return image_content
|
return image_content
|
||||||
|
|
||||||
except KeyError:
|
except KeyError:
|
||||||
return
|
return None
|
||||||
|
|
||||||
def get_isbn(self):
|
def get_isbn(self):
|
||||||
try:
|
try:
|
||||||
@@ -100,7 +100,7 @@ class ParseEPUB:
|
|||||||
isbn = i[0]
|
isbn = i[0]
|
||||||
return isbn
|
return isbn
|
||||||
except KeyError:
|
except KeyError:
|
||||||
return
|
return None
|
||||||
|
|
||||||
def get_contents(self):
|
def get_contents(self):
|
||||||
contents = collections.OrderedDict()
|
contents = collections.OrderedDict()
|
||||||
@@ -137,4 +137,6 @@ class ParseEPUB:
|
|||||||
raise AttributeError
|
raise AttributeError
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
contents[title] = ''
|
contents[title] = ''
|
||||||
return contents
|
|
||||||
|
# The 1th index is a directory that has to be cleaned up if needed
|
||||||
|
return contents, None
|
||||||
|
39
sorter.py
39
sorter.py
@@ -1,7 +1,6 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
# TODO
|
# TODO
|
||||||
# Methods that return None must be quantified within the parsing module
|
|
||||||
# See if tags can be generated from book content
|
# See if tags can be generated from book content
|
||||||
# See if you want to include a hash of the book's name and author
|
# See if you want to include a hash of the book's name and author
|
||||||
|
|
||||||
@@ -10,7 +9,17 @@ import hashlib
|
|||||||
from multiprocessing.dummy import Pool
|
from multiprocessing.dummy import Pool
|
||||||
|
|
||||||
import database
|
import database
|
||||||
|
|
||||||
|
# Every parser is supposed to have the following methods, even if they return None:
|
||||||
|
# read_book()
|
||||||
|
# get_title()
|
||||||
|
# get_author()
|
||||||
|
# get_year()
|
||||||
|
# get_cover_image()
|
||||||
|
# get_isbn()
|
||||||
|
# get_contents() - Should return a tuple with 0: TOC 1: Deletable temp_directory
|
||||||
from parsers.epub import ParseEPUB
|
from parsers.epub import ParseEPUB
|
||||||
|
from parsers.cbz import ParseCBZ
|
||||||
|
|
||||||
|
|
||||||
class BookSorter:
|
class BookSorter:
|
||||||
@@ -65,27 +74,36 @@ class BookSorter:
|
|||||||
and (file_md5 in self.all_books.items() or file_md5 in self.hashes)):
|
and (file_md5 in self.all_books.items() or file_md5 in self.hashes)):
|
||||||
return
|
return
|
||||||
|
|
||||||
# Select sorter by file extension
|
# SORTING TAKES PLACE HERE
|
||||||
try:
|
try:
|
||||||
file_extension = os.path.splitext(filename)[1][1:]
|
file_extension = os.path.splitext(filename)[1][1:]
|
||||||
if file_extension == 'epub':
|
if file_extension == 'epub':
|
||||||
book_ref = ParseEPUB(filename)
|
book_ref = ParseEPUB(filename)
|
||||||
|
if file_extension == 'cbz':
|
||||||
|
book_ref = ParseCBZ(filename)
|
||||||
except IndexError:
|
except IndexError:
|
||||||
return
|
return
|
||||||
|
|
||||||
# Everything following this is standard
|
# Everything following this is standard
|
||||||
# Some of the None returns will have to have
|
# None values are accounted for here
|
||||||
# values associated with them, though
|
|
||||||
book_ref.read_book()
|
book_ref.read_book()
|
||||||
if book_ref.book:
|
if book_ref.book:
|
||||||
title = book_ref.get_title()
|
title = book_ref.get_title().title()
|
||||||
author = book_ref.get_author()
|
author = book_ref.get_author()
|
||||||
|
if not author:
|
||||||
|
author = 'Unknown'
|
||||||
year = book_ref.get_year()
|
year = book_ref.get_year()
|
||||||
|
if not year:
|
||||||
|
year = 9999
|
||||||
isbn = book_ref.get_isbn()
|
isbn = book_ref.get_isbn()
|
||||||
|
|
||||||
# Different modes require different values
|
# Different modes require different values
|
||||||
if self.mode == 'addition':
|
if self.mode == 'addition':
|
||||||
cover_image = book_ref.get_cover_image()
|
cover_image = book_ref.get_cover_image()
|
||||||
|
# TODO
|
||||||
|
if not cover_image:
|
||||||
|
pass
|
||||||
|
|
||||||
self.all_books[file_md5] = {
|
self.all_books[file_md5] = {
|
||||||
'title': title,
|
'title': title,
|
||||||
'author': author,
|
'author': author,
|
||||||
@@ -95,7 +113,13 @@ class BookSorter:
|
|||||||
'cover_image': cover_image}
|
'cover_image': cover_image}
|
||||||
|
|
||||||
if self.mode == 'reading':
|
if self.mode == 'reading':
|
||||||
content = book_ref.get_contents()
|
all_content = book_ref.get_contents()
|
||||||
|
content = all_content[0]
|
||||||
|
temp_dir = all_content[1]
|
||||||
|
|
||||||
|
if not content.keys():
|
||||||
|
content['Invalid'] = 'Possible Parse Error'
|
||||||
|
|
||||||
position = self.database_position(file_md5)
|
position = self.database_position(file_md5)
|
||||||
self.all_books = {
|
self.all_books = {
|
||||||
'title': title,
|
'title': title,
|
||||||
@@ -105,7 +129,8 @@ class BookSorter:
|
|||||||
'hash': file_md5,
|
'hash': file_md5,
|
||||||
'path': filename,
|
'path': filename,
|
||||||
'position': position,
|
'position': position,
|
||||||
'content': content}
|
'content': content,
|
||||||
|
'temp_dir': temp_dir}
|
||||||
|
|
||||||
|
|
||||||
def initiate_threads(self):
|
def initiate_threads(self):
|
||||||
|
@@ -12,8 +12,10 @@ class BookToolBar(QtWidgets.QToolBar):
|
|||||||
QtWidgets.QSizePolicy.Expanding, QtWidgets.QSizePolicy.Expanding)
|
QtWidgets.QSizePolicy.Expanding, QtWidgets.QSizePolicy.Expanding)
|
||||||
|
|
||||||
# Size policy
|
# Size policy
|
||||||
|
# TODO
|
||||||
|
# Prevent resizing
|
||||||
sizePolicy = QtWidgets.QSizePolicy(
|
sizePolicy = QtWidgets.QSizePolicy(
|
||||||
QtWidgets.QSizePolicy.Expanding, QtWidgets.QSizePolicy.Fixed)
|
QtWidgets.QSizePolicy.Minimum, QtWidgets.QSizePolicy.Fixed)
|
||||||
|
|
||||||
self.setMovable(False)
|
self.setMovable(False)
|
||||||
self.setIconSize(QtCore.QSize(22, 22))
|
self.setIconSize(QtCore.QSize(22, 22))
|
||||||
@@ -218,6 +220,8 @@ class Tab(QtWidgets.QWidget):
|
|||||||
# TODO
|
# TODO
|
||||||
# A horizontal slider to control flow
|
# A horizontal slider to control flow
|
||||||
# Keyboard shortcuts
|
# Keyboard shortcuts
|
||||||
|
# Take hint from a position function argument to open the book
|
||||||
|
# at a specific page
|
||||||
|
|
||||||
# The content display widget is currently a QTextBrowser
|
# The content display widget is currently a QTextBrowser
|
||||||
super(Tab, self).__init__(parent)
|
super(Tab, self).__init__(parent)
|
||||||
|
Reference in New Issue
Block a user