cbz parsing
This commit is contained in:
13
__main__.py
13
__main__.py
@@ -52,6 +52,7 @@
|
||||
|
||||
import os
|
||||
import sys
|
||||
import shutil
|
||||
|
||||
from PyQt5 import QtWidgets, QtGui, QtCore
|
||||
|
||||
@@ -164,7 +165,7 @@ class MainUI(QtWidgets.QMainWindow, mainwindow.Ui_MainWindow):
|
||||
self.statusMessage.setText('Adding books...')
|
||||
my_file = QtWidgets.QFileDialog.getOpenFileNames(
|
||||
self, 'Open file', self.last_open_path,
|
||||
"eBooks (*.epub *.mobi *.aws *.txt *.pdf *.fb2 *.djvu)")
|
||||
"eBooks (*.epub *.mobi *.aws *.txt *.pdf *.fb2 *.djvu *.cbz)")
|
||||
if my_file[0]:
|
||||
self.listView.setEnabled(False)
|
||||
self.last_open_path = os.path.dirname(my_file[0][0])
|
||||
@@ -295,10 +296,18 @@ class MainUI(QtWidgets.QMainWindow, mainwindow.Ui_MainWindow):
|
||||
# print(tab_ref.book_metadata) # Metadata upon tab creation
|
||||
|
||||
def close_tab(self, tab_index):
|
||||
# print(self.tabWidget.widget(tab_index).metadata) # Metadata upon tab deletion
|
||||
temp_dir = self.tabWidget.widget(tab_index).metadata['temp_dir']
|
||||
if temp_dir:
|
||||
shutil.rmtree(temp_dir)
|
||||
self.tabWidget.removeTab(tab_index)
|
||||
|
||||
def closeEvent(self, event=None):
|
||||
# All tabs must be iterated upon here
|
||||
for i in range(1, self.tabWidget.count()):
|
||||
tab_metadata = self.tabWidget.widget(i).metadata
|
||||
if tab_metadata['temp_dir']:
|
||||
shutil.rmtree(tab_metadata['temp_dir'])
|
||||
|
||||
Settings(self).save_settings()
|
||||
QtWidgets.qApp.exit()
|
||||
|
||||
|
@@ -45,8 +45,6 @@ class DatabaseFunctions:
|
||||
title = i[1]['title']
|
||||
author = i[1]['author']
|
||||
year = i[1]['year']
|
||||
if not year:
|
||||
year = 9999
|
||||
path = i[1]['path']
|
||||
cover = i[1]['cover_image']
|
||||
isbn = i[1]['isbn']
|
||||
|
62
parsers/cbz.py
Normal file
62
parsers/cbz.py
Normal file
@@ -0,0 +1,62 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import time
|
||||
import zipfile
|
||||
import tempfile
|
||||
import collections
|
||||
|
||||
|
||||
class ParseCBZ:
|
||||
def __init__(self, filename):
|
||||
# TODO
|
||||
# Maybe also include book description
|
||||
self.filename = filename
|
||||
self.book = None
|
||||
|
||||
def read_book(self):
|
||||
try:
|
||||
self.book = zipfile.ZipFile(self.filename, mode='r', allowZip64=True)
|
||||
except (KeyError, AttributeError, FileNotFoundError, zipfile.BadZipFile):
|
||||
print('Cannot parse ' + self.filename)
|
||||
return
|
||||
|
||||
def get_title(self):
|
||||
filename = os.path.basename(self.book.filename)
|
||||
filename_proper = os.path.splitext(filename)[0]
|
||||
return filename_proper
|
||||
|
||||
def get_author(self):
|
||||
return None
|
||||
|
||||
def get_year(self):
|
||||
creation_time = time.ctime(os.path.getctime(self.filename))
|
||||
creation_year = creation_time.split()[-1]
|
||||
return creation_year
|
||||
|
||||
def get_cover_image(self):
|
||||
cover_image_info = self.book.infolist()[0]
|
||||
cover_image = self.book.read(cover_image_info)
|
||||
return cover_image
|
||||
|
||||
def get_isbn(self):
|
||||
return None
|
||||
|
||||
def get_contents(self):
|
||||
contents = collections.OrderedDict()
|
||||
# This is a brute force approach
|
||||
# Maybe try reading from the file as everything
|
||||
# matures a little bit more
|
||||
tmp_dir = tempfile.mkdtemp()
|
||||
|
||||
contents = collections.OrderedDict()
|
||||
for count, i in enumerate(self.book.infolist()):
|
||||
self.book.extract(i, path=tmp_dir)
|
||||
page_name = 'Page ' + str(count + 1)
|
||||
image_path = os.path.join(tmp_dir, i.filename)
|
||||
# This does image returns.
|
||||
# TODO
|
||||
# Image resizing, formatting
|
||||
# Cleanup after exit
|
||||
contents[page_name] = "<img src='%s'/>" % image_path
|
||||
return contents, tmp_dir
|
@@ -5,8 +5,8 @@
|
||||
# get_author()
|
||||
# get_year()
|
||||
# get_cover_image()
|
||||
# get_isbn
|
||||
# TODO More for get contents, get TOC
|
||||
# get_isbn()
|
||||
# get_contents() - Should return a tuple with 0: TOC 1: Deletable temp_directory
|
||||
|
||||
import os
|
||||
import re
|
||||
@@ -89,7 +89,7 @@ class ParseEPUB:
|
||||
return image_content
|
||||
|
||||
except KeyError:
|
||||
return
|
||||
return None
|
||||
|
||||
def get_isbn(self):
|
||||
try:
|
||||
@@ -100,7 +100,7 @@ class ParseEPUB:
|
||||
isbn = i[0]
|
||||
return isbn
|
||||
except KeyError:
|
||||
return
|
||||
return None
|
||||
|
||||
def get_contents(self):
|
||||
contents = collections.OrderedDict()
|
||||
@@ -137,4 +137,6 @@ class ParseEPUB:
|
||||
raise AttributeError
|
||||
except AttributeError:
|
||||
contents[title] = ''
|
||||
return contents
|
||||
|
||||
# The 1th index is a directory that has to be cleaned up if needed
|
||||
return contents, None
|
||||
|
39
sorter.py
39
sorter.py
@@ -1,7 +1,6 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# TODO
|
||||
# Methods that return None must be quantified within the parsing module
|
||||
# See if tags can be generated from book content
|
||||
# See if you want to include a hash of the book's name and author
|
||||
|
||||
@@ -10,7 +9,17 @@ import hashlib
|
||||
from multiprocessing.dummy import Pool
|
||||
|
||||
import database
|
||||
|
||||
# Every parser is supposed to have the following methods, even if they return None:
|
||||
# read_book()
|
||||
# get_title()
|
||||
# get_author()
|
||||
# get_year()
|
||||
# get_cover_image()
|
||||
# get_isbn()
|
||||
# get_contents() - Should return a tuple with 0: TOC 1: Deletable temp_directory
|
||||
from parsers.epub import ParseEPUB
|
||||
from parsers.cbz import ParseCBZ
|
||||
|
||||
|
||||
class BookSorter:
|
||||
@@ -65,27 +74,36 @@ class BookSorter:
|
||||
and (file_md5 in self.all_books.items() or file_md5 in self.hashes)):
|
||||
return
|
||||
|
||||
# Select sorter by file extension
|
||||
# SORTING TAKES PLACE HERE
|
||||
try:
|
||||
file_extension = os.path.splitext(filename)[1][1:]
|
||||
if file_extension == 'epub':
|
||||
book_ref = ParseEPUB(filename)
|
||||
if file_extension == 'cbz':
|
||||
book_ref = ParseCBZ(filename)
|
||||
except IndexError:
|
||||
return
|
||||
|
||||
# Everything following this is standard
|
||||
# Some of the None returns will have to have
|
||||
# values associated with them, though
|
||||
# None values are accounted for here
|
||||
book_ref.read_book()
|
||||
if book_ref.book:
|
||||
title = book_ref.get_title()
|
||||
title = book_ref.get_title().title()
|
||||
author = book_ref.get_author()
|
||||
if not author:
|
||||
author = 'Unknown'
|
||||
year = book_ref.get_year()
|
||||
if not year:
|
||||
year = 9999
|
||||
isbn = book_ref.get_isbn()
|
||||
|
||||
# Different modes require different values
|
||||
if self.mode == 'addition':
|
||||
cover_image = book_ref.get_cover_image()
|
||||
# TODO
|
||||
if not cover_image:
|
||||
pass
|
||||
|
||||
self.all_books[file_md5] = {
|
||||
'title': title,
|
||||
'author': author,
|
||||
@@ -95,7 +113,13 @@ class BookSorter:
|
||||
'cover_image': cover_image}
|
||||
|
||||
if self.mode == 'reading':
|
||||
content = book_ref.get_contents()
|
||||
all_content = book_ref.get_contents()
|
||||
content = all_content[0]
|
||||
temp_dir = all_content[1]
|
||||
|
||||
if not content.keys():
|
||||
content['Invalid'] = 'Possible Parse Error'
|
||||
|
||||
position = self.database_position(file_md5)
|
||||
self.all_books = {
|
||||
'title': title,
|
||||
@@ -105,7 +129,8 @@ class BookSorter:
|
||||
'hash': file_md5,
|
||||
'path': filename,
|
||||
'position': position,
|
||||
'content': content}
|
||||
'content': content,
|
||||
'temp_dir': temp_dir}
|
||||
|
||||
|
||||
def initiate_threads(self):
|
||||
|
@@ -12,8 +12,10 @@ class BookToolBar(QtWidgets.QToolBar):
|
||||
QtWidgets.QSizePolicy.Expanding, QtWidgets.QSizePolicy.Expanding)
|
||||
|
||||
# Size policy
|
||||
# TODO
|
||||
# Prevent resizing
|
||||
sizePolicy = QtWidgets.QSizePolicy(
|
||||
QtWidgets.QSizePolicy.Expanding, QtWidgets.QSizePolicy.Fixed)
|
||||
QtWidgets.QSizePolicy.Minimum, QtWidgets.QSizePolicy.Fixed)
|
||||
|
||||
self.setMovable(False)
|
||||
self.setIconSize(QtCore.QSize(22, 22))
|
||||
@@ -218,6 +220,8 @@ class Tab(QtWidgets.QWidget):
|
||||
# TODO
|
||||
# A horizontal slider to control flow
|
||||
# Keyboard shortcuts
|
||||
# Take hint from a position function argument to open the book
|
||||
# at a specific page
|
||||
|
||||
# The content display widget is currently a QTextBrowser
|
||||
super(Tab, self).__init__(parent)
|
||||
|
Reference in New Issue
Block a user