Initial file loading

epub content parsing is horribly borked
This commit is contained in:
BasioMeusPuga
2017-11-11 23:21:49 +05:30
parent 5d495cfde3
commit 7fbea194c0
6 changed files with 236 additions and 120 deletions

View File

@@ -1,16 +1,20 @@
#!/usr/bin/env python3
# TODO
# Methods that return None must be quantified here if needed
# Methods that return None must be quantified within the parsing module
# See if tags can be generated from book content
# See if you want to include a hash of the book's name and author
import os
import hashlib
from multiprocessing.dummy import Pool
import database
from parsers.epub import ParseEPUB
class BookSorter:
def __init__(self, file_list, database_path):
def __init__(self, file_list, mode, database_path):
# Have the GUI pass a list of files straight to here
# Then, on the basis of what is needed, pass the
# filenames to the requisite functions
@@ -21,7 +25,9 @@ class BookSorter:
self.all_books = {}
self.database_path = database_path
self.hashes = []
self.database_hashes()
self.mode = mode
if database_path:
self.database_hashes()
def database_hashes(self):
all_hashes = database.DatabaseFunctions(
@@ -34,6 +40,16 @@ class BookSorter:
if all_hashes:
self.hashes = [i[0] for i in all_hashes]
def database_position(self, file_hash):
position = database.DatabaseFunctions(
self.database_path).fetch_data(
('Position',),
'books',
{'Hash': file_hash},
'EQUALS',
True)
return position
def read_book(self, filename):
# filename is expected as a string containg the
# full path of the ebook file
@@ -41,17 +57,21 @@ class BookSorter:
with open(filename, 'rb') as current_book:
file_md5 = hashlib.md5(current_book.read()).hexdigest()
# IF the file is NOT being loaded into the reader,
# Do not allow addition in case the file is dupicated in the directory
# OR is already in the database
# TODO
# See if you want to include a hash of the book's name and author
if file_md5 in self.all_books.items() or file_md5 in self.hashes:
# This should not get triggered in reading mode
if (self.mode == 'addition'
and (file_md5 in self.all_books.items() or file_md5 in self.hashes)):
return
# TODO
# See if tags can be generated from book content
# Sort according to to file extension here
book_ref = ParseEPUB(filename)
# Select sorter by file extension
try:
file_extension = os.path.splitext(filename)[1][1:]
if file_extension == 'epub':
book_ref = ParseEPUB(filename)
except IndexError:
return
# Everything following this is standard
# Some of the None returns will have to have
@@ -61,16 +81,32 @@ class BookSorter:
title = book_ref.get_title()
author = book_ref.get_author()
year = book_ref.get_year()
cover_image = book_ref.get_cover_image()
isbn = book_ref.get_isbn()
self.all_books[file_md5] = {
'title': title,
'author': author,
'year': year,
'isbn': isbn,
'path': filename,
'cover_image': cover_image}
# Different modes require different values
if self.mode == 'addition':
cover_image = book_ref.get_cover_image()
self.all_books[file_md5] = {
'title': title,
'author': author,
'year': year,
'isbn': isbn,
'path': filename,
'cover_image': cover_image}
if self.mode == 'reading':
content = book_ref.get_contents()
position = self.database_position(file_md5)
self.all_books = {
'title': title,
'author': author,
'year': year,
'isbn': isbn,
'hash': file_md5,
'path': filename,
'position': position,
'content': content}
def initiate_threads(self):
_pool = Pool(5)