Refactor parsers. Create a new sorter module.

This commit is contained in:
BasioMeusPuga
2017-11-11 05:25:47 +05:30
parent 4172cff8db
commit e3684d4cd8
4 changed files with 81 additions and 60 deletions

View File

@@ -11,22 +11,23 @@
✓ Search bar in toolbar
✓ Drop down for TOC (book view)
✓ Image reflow
✓ Implement book view settings with a(nother) toolbar
✓ Use format* icons for toolbar buttons
✓ Image delegates
Implement book view settings with a(nother) toolbar
Options:
Ignore a and the for sorting purposes
Check files (hashes) upon restart
Recursive file addition
Show what on startup
mobi support
txt, doc support
mobi, azw support
txt, doc, djvu support
pdf support?
Goodreads API: Ratings, Read, Recommendations
Get ISBN using python-isbnlib
All ebooks should first be added to the database and then returned as HTML
Theming
Pagination
Use format* icons for toolbar buttons
Information dialog widget
Library context menu: Cache, Read, Edit database, delete
Set context menu for definitions and the like
@@ -39,7 +40,7 @@ from PyQt5 import QtWidgets, QtGui, QtCore
import mainwindow
import database
import book_parser
import sorter
from widgets import LibraryToolBar, BookToolBar, Tab, LibraryDelegate
from subclasses import Settings, Library
@@ -144,7 +145,7 @@ class MainUI(QtWidgets.QMainWindow, mainwindow.Ui_MainWindow):
if my_file[0]:
self.listView.setEnabled(False)
self.last_open_path = os.path.dirname(my_file[0][0])
books = book_parser.BookSorter(my_file[0])
books = sorter.BookSorter(my_file[0])
parsed_books = books.initiate_threads()
database.DatabaseFunctions(self.database_path).add_to_database(parsed_books)
self.listView.setEnabled(True)

0
parsers/__init__.py Normal file
View File

View File

@@ -1,9 +1,15 @@
#!/usr/bin/env python3
# Every parser is supposed to have the following methods, even if they return None:
# read_book()
# get_title()
# get_author()
# get_year()
# get_cover_image()
# get_isbn
# TODO More for get contents, get TOC
import os
import re
import hashlib
from multiprocessing.dummy import Pool
import ebooklib.epub
@@ -15,7 +21,7 @@ class ParseEPUB:
self.filename = filename
self.book = None
def read_epub(self):
def read_book(self):
try:
self.book = ebooklib.epub.read_epub(self.filename)
except (KeyError, AttributeError):
@@ -94,54 +100,3 @@ class ParseEPUB:
return isbn
except KeyError:
return
class BookSorter:
def __init__(self, file_list):
# Have the GUI pass a list of files straight to here
# Then, on the basis of what is needed, pass the
# filenames to the requisite functions
# This includes getting file info for the database
# Parsing for the reader proper
# Caching upon closing
self.file_list = file_list
self.all_books = {}
def read_book(self, filename):
# filename is expected as a string containg the
# full path of the ebook file
# TODO
# See if you want to include a hash of the book's name and author
with open(filename, 'rb') as current_book:
file_md5 = hashlib.md5(current_book.read()).hexdigest()
if file_md5 in self.all_books.items():
return
# TODO
# See if tags can be generated from book content
book_ref = ParseEPUB(filename)
book_ref.read_epub()
if book_ref.book:
title = book_ref.get_title()
author = book_ref.get_author()
year = book_ref.get_year()
cover_image = book_ref.get_cover_image()
isbn = book_ref.get_isbn()
self.all_books[file_md5] = {
'title': title,
'author': author,
'year': year,
'isbn': isbn,
'path': filename,
'cover_image': cover_image}
def initiate_threads(self):
_pool = Pool(5)
_pool.map(self.read_book, self.file_list)
_pool.close()
_pool.join()
return self.all_books

65
sorter.py Normal file
View File

@@ -0,0 +1,65 @@
#!/usr/bin/env python3
# TODO
# Methods that return None must be quantified here if needed
import hashlib
from multiprocessing.dummy import Pool
from parsers.epub import ParseEPUB
class BookSorter:
def __init__(self, file_list):
# Have the GUI pass a list of files straight to here
# Then, on the basis of what is needed, pass the
# filenames to the requisite functions
# This includes getting file info for the database
# Parsing for the reader proper
# Caching upon closing
self.file_list = file_list
self.all_books = {}
def read_book(self, filename):
# filename is expected as a string containg the
# full path of the ebook file
# TODO
# See if you want to include a hash of the book's name and author
with open(filename, 'rb') as current_book:
file_md5 = hashlib.md5(current_book.read()).hexdigest()
if file_md5 in self.all_books.items():
return
# TODO
# See if tags can be generated from book content
# Sort according to to file extension here
book_ref = ParseEPUB(filename)
# Everything following this is standard
# Some of the None returns will have to have
# values associated with them, though
book_ref.read_book()
if book_ref.book:
title = book_ref.get_title()
author = book_ref.get_author()
year = book_ref.get_year()
cover_image = book_ref.get_cover_image()
isbn = book_ref.get_isbn()
self.all_books[file_md5] = {
'title': title,
'author': author,
'year': year,
'isbn': isbn,
'path': filename,
'cover_image': cover_image}
def initiate_threads(self):
_pool = Pool(5)
_pool.map(self.read_book, self.file_list)
_pool.close()
_pool.join()
return self.all_books