82 lines
2.5 KiB
Python
82 lines
2.5 KiB
Python
#!/usr/bin/env python3
|
|
|
|
# TODO
|
|
# Methods that return None must be quantified here if needed
|
|
|
|
import hashlib
|
|
from multiprocessing.dummy import Pool
|
|
|
|
import database
|
|
from parsers.epub import ParseEPUB
|
|
|
|
class BookSorter:
|
|
def __init__(self, file_list, database_path):
|
|
# Have the GUI pass a list of files straight to here
|
|
# Then, on the basis of what is needed, pass the
|
|
# filenames to the requisite functions
|
|
# This includes getting file info for the database
|
|
# Parsing for the reader proper
|
|
# Caching upon closing
|
|
self.file_list = file_list
|
|
self.all_books = {}
|
|
self.database_path = database_path
|
|
self.hashes = []
|
|
self.database_hashes()
|
|
|
|
def database_hashes(self):
|
|
all_hashes = database.DatabaseFunctions(
|
|
self.database_path).fetch_data(
|
|
('Hash',),
|
|
'books',
|
|
{'Hash': ''},
|
|
'LIKE')
|
|
|
|
if all_hashes:
|
|
self.hashes = [i[0] for i in all_hashes]
|
|
|
|
def read_book(self, filename):
|
|
# filename is expected as a string containg the
|
|
# full path of the ebook file
|
|
|
|
with open(filename, 'rb') as current_book:
|
|
file_md5 = hashlib.md5(current_book.read()).hexdigest()
|
|
|
|
# Do not allow addition in case the file is dupicated in the directory
|
|
# OR is already in the database
|
|
# TODO
|
|
# See if you want to include a hash of the book's name and author
|
|
if file_md5 in self.all_books.items() or file_md5 in self.hashes:
|
|
return
|
|
|
|
# TODO
|
|
# See if tags can be generated from book content
|
|
# Sort according to to file extension here
|
|
book_ref = ParseEPUB(filename)
|
|
|
|
# Everything following this is standard
|
|
# Some of the None returns will have to have
|
|
# values associated with them, though
|
|
book_ref.read_book()
|
|
if book_ref.book:
|
|
title = book_ref.get_title()
|
|
author = book_ref.get_author()
|
|
year = book_ref.get_year()
|
|
cover_image = book_ref.get_cover_image()
|
|
isbn = book_ref.get_isbn()
|
|
|
|
self.all_books[file_md5] = {
|
|
'title': title,
|
|
'author': author,
|
|
'year': year,
|
|
'isbn': isbn,
|
|
'path': filename,
|
|
'cover_image': cover_image}
|
|
|
|
def initiate_threads(self):
|
|
_pool = Pool(5)
|
|
_pool.map(self.read_book, self.file_list)
|
|
_pool.close()
|
|
_pool.join()
|
|
|
|
return self.all_books
|