feature: enable python packaging

This commit is contained in:
Pau Ruiz i Safont
2018-03-10 19:00:56 +00:00
parent ede9ecdf14
commit 79180885b5
68 changed files with 190 additions and 0 deletions

View File

@@ -0,0 +1,2 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai

View File

@@ -0,0 +1,278 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
# Copyright (c) 2014 Kevin B. Hendricks, John Schember, and Doug Massay
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification,
# are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this list of
# conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice, this list
# of conditions and the following disclaimer in the documentation and/or other materials
# provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
# SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from __future__ import unicode_literals, division, absolute_import, print_function
import sys
import codecs
PY2 = sys.version_info[0] == 2
PY3 = sys.version_info[0] == 3
iswindows = sys.platform.startswith('win')
try:
from urllib.parse import unquote
except ImportError:
from urllib import unquote
if PY2:
from HTMLParser import HTMLParser
_h = HTMLParser()
elif sys.version_info[1] < 4:
import html.parser
_h = html.parser.HTMLParser()
else:
import html as _h
if PY3:
text_type = str
binary_type = bytes
# if will be printing arbitraty binary data to stdout on python 3
# sys.stdin = sys.stdin.detach()
# sys.stdout = sys.stdout.detach()
# sys.stdout = codecs.getwriter("utf-8")(sys.stdout.detach())
else:
range = xrange
text_type = unicode
binary_type = str
# if will be printing unicode under python 2 need to protect
# against sys.stdout.encoding being None stupidly forcing forcing ascii encoding of unicode
# sys.stdout = codecs.getwriter("utf-8")(sys.stdout)
# alternatively set environment variable as follows **before** launching python: export PYTHONIOENCODING=UTF-8
# NOTE: Python 3 is completely broken when accessing single bytes in bytes strings
# (and they amazingly claim by design and no bug!)
# To illustrate: this works for unicode in Python 3 and for all Python 2.X for both bytestrings and unicode
# >>> o = '123456789'
# >>> o[-3]
# '7'
# >>> type(o[-3])
# <class 'str'>
# >>> type(o)
# <class 'str'>
# Unfortunately, this is what Python 3 does for no sane reason and only for bytestrings
# >>> o = b'123456789'
# >>> o[-3]
# 55
# >>> type(o[-3])
# <class 'int'>
# >>> type(o)
# <class 'bytes'>
# This mind boggling behaviour also happens when indexing a bytestring and/or
# iteratoring over a bytestring. In other words it will return an int but not
# the byte itself!!!!!!!
# The only way to access a single byte as a byte in bytestring and get the byte in both
# Python 2 and Python 3 is to use a slice
# This problem is so common there are horrible hacks floating around the net to **try**
# to work around it, so that code that works on both Python 2 and Python 3 is possible.
# So in order to write code that works on both Python 2 and Python 3
# if you index or access a single byte and want its ord() then use the bord() function.
# If instead you want it as a single character byte use the bchar() function
# both of which are defined below.
if PY3:
# Also Note: if decode a bytestring using 'latin-1' (or any other full range 0-255 encoding)
# in place of ascii you will get a byte value to half-word or integer value
# one-to-one mapping (in the 0 - 255 range)
def bchr(s):
return bytes([s])
def bstr(s):
if isinstance(s, str):
return bytes(s, 'latin-1')
else:
return bytes(s)
def bord(s):
return s
def bchar(s):
return bytes([s])
else:
def bchr(s):
return chr(s)
def bstr(s):
return str(s)
def bord(s):
return ord(s)
def bchar(s):
return s
if PY3:
# list-producing versions of the major Python iterating functions
def lrange(*args, **kwargs):
return list(range(*args, **kwargs))
def lzip(*args, **kwargs):
return list(zip(*args, **kwargs))
def lmap(*args, **kwargs):
return list(map(*args, **kwargs))
def lfilter(*args, **kwargs):
return list(filter(*args, **kwargs))
else:
import __builtin__
# Python 2-builtin ranges produce lists
lrange = __builtin__.range
lzip = __builtin__.zip
lmap = __builtin__.map
lfilter = __builtin__.filter
# In Python 3 you can no longer use .encode('hex') on a bytestring
# instead use the following on both platforms
import binascii
def hexlify(bdata):
return (binascii.hexlify(bdata)).decode('ascii')
# If you: import struct
# Note: struct pack, unpack, unpack_from all *require* bytestring format
# data all the way up to at least Python 2.7.5, Python 3 is okay with either
# If you: import re
# note: Python 3 "re" requires the pattern to be the exact same type as the data to be
# searched ... but u"" is not allowed for the pattern itself only b""
# Python 2.X allows the pattern to be any type and converts it to match the data
# and returns the same type as the data
# convert string to be utf-8 encoded
def utf8_str(p, enc='utf-8'):
if p is None:
return None
if isinstance(p, text_type):
return p.encode('utf-8')
if enc != 'utf-8':
return p.decode(enc).encode('utf-8')
return p
# convert string to be unicode encoded
def unicode_str(p, enc='utf-8'):
if p is None:
return None
if isinstance(p, text_type):
return p
return p.decode(enc)
ASCII_CHARS = set(chr(x) for x in range(128))
URL_SAFE = set('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
'abcdefghijklmnopqrstuvwxyz'
'0123456789' '#' '_.-/~')
IRI_UNSAFE = ASCII_CHARS - URL_SAFE
# returns a quoted IRI (not a URI)
def quoteurl(href):
if isinstance(href,binary_type):
href = href.decode('utf-8')
result = []
for char in href:
if char in IRI_UNSAFE:
char = "%%%02x" % ord(char)
result.append(char)
return ''.join(result)
# unquotes url/iri
def unquoteurl(href):
if isinstance(href,binary_type):
href = href.decode('utf-8')
href = unquote(href)
return href
# unescape html
def unescapeit(sval):
return _h.unescape(sval)
# Python 2.X commandline parsing under Windows has been horribly broken for years!
# Use the following code to emulate full unicode commandline parsing on Python 2
# ie. To get sys.argv arguments and properly encode them as unicode
def unicode_argv():
global iswindows
global PY3
if PY3:
return sys.argv
if iswindows:
# Versions 2.x of Python don't support Unicode in sys.argv on
# Windows, with the underlying Windows API instead replacing multi-byte
# characters with '?'. So use shell32.GetCommandLineArgvW to get sys.argv
# as a list of Unicode strings
from ctypes import POINTER, byref, cdll, c_int, windll
from ctypes.wintypes import LPCWSTR, LPWSTR
GetCommandLineW = cdll.kernel32.GetCommandLineW
GetCommandLineW.argtypes = []
GetCommandLineW.restype = LPCWSTR
CommandLineToArgvW = windll.shell32.CommandLineToArgvW
CommandLineToArgvW.argtypes = [LPCWSTR, POINTER(c_int)]
CommandLineToArgvW.restype = POINTER(LPWSTR)
cmd = GetCommandLineW()
argc = c_int(0)
argv = CommandLineToArgvW(cmd, byref(argc))
if argc.value > 0:
# Remove Python executable and commands if present
start = argc.value - len(sys.argv)
return [argv[i] for i in
range(start, argc.value)]
# this should never happen
return None
else:
argv = []
argvencoding = sys.stdin.encoding
if argvencoding is None:
argvencoding = sys.getfilesystemencoding()
if argvencoding is None:
argvencoding = 'utf-8'
for arg in sys.argv:
if isinstance(arg, text_type):
argv.append(arg)
else:
argv.append(arg.decode(argvencoding))
return argv
# Python 2.X is broken in that it does not recognize CP65001 as UTF-8
def add_cp65001_codec():
if PY2:
try:
codecs.lookup('cp65001')
except LookupError:
codecs.register(
lambda name: name == 'cp65001' and codecs.lookup('utf-8') or None)
return

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,238 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
from __future__ import unicode_literals, division, absolute_import, print_function
from .compatibility_utils import unicode_str
from .unipath import pathof
import os
import imghdr
import struct
# note: struct pack, unpack, unpack_from all require bytestring format
# data all the way up to at least python 2.7.5, python 3 okay with bytestring
USE_SVG_WRAPPER = True
""" Set to True to use svg wrapper for default. """
FORCE_DEFAULT_TITLE = False
""" Set to True to force to use the default title. """
COVER_PAGE_FINENAME = 'cover_page.xhtml'
""" The name for the cover page. """
DEFAULT_TITLE = 'Cover'
""" The default title for the cover page. """
MAX_WIDTH = 4096
""" The max width for the svg cover page. """
MAX_HEIGHT = 4096
""" The max height for the svg cover page. """
def get_image_type(imgname, imgdata=None):
imgtype = unicode_str(imghdr.what(pathof(imgname), imgdata))
# imghdr only checks for JFIF or Exif JPEG files. Apparently, there are some
# with only the magic JPEG bytes out there...
# ImageMagick handles those, so, do it too.
if imgtype is None:
if imgdata is None:
with open(pathof(imgname), 'rb') as f:
imgdata = f.read()
if imgdata[0:2] == b'\xFF\xD8':
# Get last non-null bytes
last = len(imgdata)
while (imgdata[last-1:last] == b'\x00'):
last-=1
# Be extra safe, check the trailing bytes, too.
if imgdata[last-2:last] == b'\xFF\xD9':
imgtype = "jpeg"
return imgtype
def get_image_size(imgname, imgdata=None):
'''Determine the image type of imgname (or imgdata) and return its size.
Originally,
Determine the image type of fhandle and return its size.
from draco'''
if imgdata is None:
fhandle = open(pathof(imgname), 'rb')
head = fhandle.read(24)
else:
head = imgdata[0:24]
if len(head) != 24:
return
imgtype = get_image_type(imgname, imgdata)
if imgtype == 'png':
check = struct.unpack(b'>i', head[4:8])[0]
if check != 0x0d0a1a0a:
return
width, height = struct.unpack(b'>ii', head[16:24])
elif imgtype == 'gif':
width, height = struct.unpack(b'<HH', head[6:10])
elif imgtype == 'jpeg' and imgdata is None:
try:
fhandle.seek(0) # Read 0xff next
size = 2
ftype = 0
while not 0xc0 <= ftype <= 0xcf:
fhandle.seek(size, 1)
byte = fhandle.read(1)
while ord(byte) == 0xff:
byte = fhandle.read(1)
ftype = ord(byte)
size = struct.unpack(b'>H', fhandle.read(2))[0] - 2
# We are at a SOFn block
fhandle.seek(1, 1) # Skip `precision' byte.
height, width = struct.unpack(b'>HH', fhandle.read(4))
except Exception: # IGNORE:W0703
return
elif imgtype == 'jpeg' and imgdata is not None:
try:
pos = 0
size = 2
ftype = 0
while not 0xc0 <= ftype <= 0xcf:
pos += size
byte = imgdata[pos:pos+1]
pos += 1
while ord(byte) == 0xff:
byte = imgdata[pos:pos+1]
pos += 1
ftype = ord(byte)
size = struct.unpack(b'>H', imgdata[pos:pos+2])[0] - 2
pos += 2
# We are at a SOFn block
pos += 1 # Skip `precision' byte.
height, width = struct.unpack(b'>HH', imgdata[pos:pos+4])
pos += 4
except Exception: # IGNORE:W0703
return
else:
return
return width, height
# XXX experimental
class CoverProcessor(object):
"""Create a cover page.
"""
def __init__(self, files, metadata, rscnames, imgname=None, imgdata=None):
self.files = files
self.metadata = metadata
self.rscnames = rscnames
self.cover_page = COVER_PAGE_FINENAME
self.use_svg = USE_SVG_WRAPPER # Use svg wrapper.
self.lang = metadata.get('Language', ['en'])[0]
# This should ensure that if the methods to find the cover image's
# dimensions should fail for any reason, the SVG routine will not be used.
[self.width, self.height] = (-1,-1)
if FORCE_DEFAULT_TITLE:
self.title = DEFAULT_TITLE
else:
self.title = metadata.get('Title', [DEFAULT_TITLE])[0]
self.cover_image = None
if imgname is not None:
self.cover_image = imgname
elif 'CoverOffset' in metadata:
imageNumber = int(metadata['CoverOffset'][0])
cover_image = self.rscnames[imageNumber]
if cover_image is not None:
self.cover_image = cover_image
else:
print('Warning: Cannot identify the cover image.')
if self.use_svg:
try:
if imgdata is None:
fname = os.path.join(files.imgdir, self.cover_image)
[self.width, self.height] = get_image_size(fname)
else:
[self.width, self.height] = get_image_size(None, imgdata)
except:
self.use_svg = False
width = self.width
height = self.height
if width < 0 or height < 0 or width > MAX_WIDTH or height > MAX_HEIGHT:
self.use_svg = False
return
def getImageName(self):
return self.cover_image
def getXHTMLName(self):
return self.cover_page
def buildXHTML(self):
print('Building a cover page.')
files = self.files
cover_image = self.cover_image
title = self.title
lang = self.lang
image_dir = os.path.normpath(os.path.relpath(files.k8images, files.k8text))
image_path = os.path.join(image_dir, cover_image).replace('\\', '/')
if not self.use_svg:
data = ''
data += '<?xml version="1.0" encoding="utf-8"?><!DOCTYPE html>'
data += '<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops"'
data += ' xml:lang="{:s}">\n'.format(lang)
data += '<head>\n<title>{:s}</title>\n'.format(title)
data += '<style type="text/css">\n'
data += 'body {\n margin: 0;\n padding: 0;\n text-align: center;\n}\n'
data += 'div {\n height: 100%;\n width: 100%;\n text-align: center;\n page-break-inside: avoid;\n}\n'
data += 'img {\n display: inline-block;\n height: 100%;\n margin: 0 auto;\n}\n'
data += '</style>\n</head>\n'
data += '<body><div>\n'
data += ' <img src="{:s}" alt=""/>\n'.format(image_path)
data += '</div></body>\n</html>'
else:
width = self.width
height = self.height
viewBox = "0 0 {0:d} {1:d}".format(width, height)
data = ''
data += '<?xml version="1.0" encoding="utf-8"?><!DOCTYPE html>'
data += '<html xmlns="http://www.w3.org/1999/xhtml"'
data += ' xml:lang="{:s}">\n'.format(lang)
data += '<head>\n <title>{:s}</title>\n'.format(title)
data += '<style type="text/css">\n'
data += 'svg {padding: 0pt; margin:0pt}\n'
data += 'body { text-align: center; padding:0pt; margin: 0pt; }\n'
data += '</style>\n</head>\n'
data += '<body>\n <div>\n'
data += ' <svg xmlns="http://www.w3.org/2000/svg" height="100%" preserveAspectRatio="xMidYMid meet"'
data += ' version="1.1" viewBox="{0:s}" width="100%" xmlns:xlink="http://www.w3.org/1999/xlink">\n'.format(viewBox)
data += ' <image height="{0}" width="{1}" xlink:href="{2}"/>\n'.format(height, width, image_path)
data += ' </svg>\n'
data += ' </div>\n</body>\n</html>'
return data
def writeXHTML(self):
files = self.files
cover_page = self.cover_page
data = self.buildXHTML()
outfile = os.path.join(files.k8text, cover_page)
if os.path.exists(pathof(outfile)):
print('Warning: {:s} already exists.'.format(cover_page))
os.remove(pathof(outfile))
with open(pathof(outfile), 'wb') as f:
f.write(data.encode('utf-8'))
return
def guide_toxml(self):
files = self.files
text_dir = os.path.relpath(files.k8text, files.k8oebps)
data = '<reference type="cover" title="Cover" href="{:s}/{:s}" />\n'.format(
text_dir, self.cover_page)
return data

View File

@@ -0,0 +1,377 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
from __future__ import unicode_literals, division, absolute_import, print_function
from .compatibility_utils import PY2, PY3, utf8_str, bstr, bchr
if PY2:
range = xrange
array_format = b'B'
if PY3:
unichr = chr
array_format = "B"
import array
import struct
# note: struct pack, unpack, unpack_from all require bytestring format
# data all the way up to at least python 2.7.5, python 3 okay with bytestring
from .mobi_index import getVariableWidthValue, readTagSection, getTagMap
from .mobi_utils import toHex
DEBUG_DICT = False
class InflectionData(object):
def __init__(self, infldatas):
self.infldatas = infldatas
self.starts = []
self.counts = []
for idata in self.infldatas:
start, = struct.unpack_from(b'>L', idata, 0x14)
count, = struct.unpack_from(b'>L', idata, 0x18)
self.starts.append(start)
self.counts.append(count)
def lookup(self, lookupvalue):
i = 0
rvalue = lookupvalue
while rvalue >= self.counts[i]:
rvalue = rvalue - self.counts[i]
i += 1
if i == len(self.counts):
print("Error: Problem with multiple inflections data sections")
return lookupvalue, self.starts[0], self.counts[0], self.infldatas[0]
return rvalue, self.starts[i], self.counts[i], self.infldatas[i]
def offsets(self, value):
rvalue, start, count, data = self.lookup(value)
offset, = struct.unpack_from(b'>H', data, start + 4 + (2 * rvalue))
if rvalue + 1 < count:
nextOffset, = struct.unpack_from(b'>H',data, start + 4 + (2 * (rvalue + 1)))
else:
nextOffset = None
return offset, nextOffset, data
class dictSupport(object):
def __init__(self, mh, sect):
self.mh = mh
self.header = mh.header
self.sect = sect
self.metaOrthIndex = mh.metaOrthIndex
self.metaInflIndex = mh.metaInflIndex
def parseHeader(self, data):
"read INDX header"
if not data[:4] == b'INDX':
print("Warning: index section is not INDX")
return False
words = (
'len', 'nul1', 'type', 'gen', 'start', 'count', 'code',
'lng', 'total', 'ordt', 'ligt', 'nligt', 'nctoc'
)
num = len(words)
values = struct.unpack(bstr('>%dL' % num), data[4:4*(num+1)])
header = {}
for n in range(num):
header[words[n]] = values[n]
ordt1 = None
ordt2 = None
otype, oentries, op1, op2, otagx = struct.unpack_from(b'>LLLLL',data, 0xa4)
header['otype'] = otype
header['oentries'] = oentries
if DEBUG_DICT:
print("otype %d, oentries %d, op1 %d, op2 %d, otagx %d" % (otype, oentries, op1, op2, otagx))
if header['code'] == 0xfdea or oentries > 0:
# some dictionaries seem to be codepage 65002 (0xFDEA) which seems
# to be some sort of strange EBCDIC utf-8 or 16 encoded strings
# So we need to look for them and store them away to process leading text
# ORDT1 has 1 byte long entries, ORDT2 has 2 byte long entries
# we only ever seem to use the second but ...
#
# if otype = 0, ORDT table uses 16 bit values as offsets into the table
# if otype = 1, ORDT table uses 8 bit values as offsets inot the table
assert(data[op1:op1+4] == b'ORDT')
assert(data[op2:op2+4] == b'ORDT')
ordt1 = struct.unpack_from(bstr('>%dB' % oentries), data, op1+4)
ordt2 = struct.unpack_from(bstr('>%dH' % oentries), data, op2+4)
if DEBUG_DICT:
print("parsed INDX header:")
for key in header:
print(key, "%x" % header[key],)
print("\n")
return header, ordt1, ordt2
def getPositionMap(self):
sect = self.sect
positionMap = {}
metaOrthIndex = self.metaOrthIndex
metaInflIndex = self.metaInflIndex
decodeInflection = True
if metaOrthIndex != 0xFFFFFFFF:
print("Info: Document contains orthographic index, handle as dictionary")
if metaInflIndex == 0xFFFFFFFF:
decodeInflection = False
else:
metaInflIndexData = sect.loadSection(metaInflIndex)
print("\nParsing metaInflIndexData")
midxhdr, mhordt1, mhordt2 = self.parseHeader(metaInflIndexData)
metaIndexCount = midxhdr['count']
idatas = []
for j in range(metaIndexCount):
idatas.append(sect.loadSection(metaInflIndex + 1 + j))
dinfl = InflectionData(idatas)
inflNameData = sect.loadSection(metaInflIndex + 1 + metaIndexCount)
tagSectionStart = midxhdr['len']
inflectionControlByteCount, inflectionTagTable = readTagSection(tagSectionStart, metaInflIndexData)
if DEBUG_DICT:
print("inflectionTagTable: %s" % inflectionTagTable)
if self.hasTag(inflectionTagTable, 0x07):
print("Error: Dictionary uses obsolete inflection rule scheme which is not yet supported")
decodeInflection = False
data = sect.loadSection(metaOrthIndex)
print("\nParsing metaOrthIndex")
idxhdr, hordt1, hordt2 = self.parseHeader(data)
tagSectionStart = idxhdr['len']
controlByteCount, tagTable = readTagSection(tagSectionStart, data)
orthIndexCount = idxhdr['count']
print("orthIndexCount is", orthIndexCount)
if DEBUG_DICT:
print("orthTagTable: %s" % tagTable)
if hordt2 is not None:
print("orth entry uses ordt2 lookup table of type ", idxhdr['otype'])
hasEntryLength = self.hasTag(tagTable, 0x02)
if not hasEntryLength:
print("Info: Index doesn't contain entry length tags")
print("Read dictionary index data")
for i in range(metaOrthIndex + 1, metaOrthIndex + 1 + orthIndexCount):
data = sect.loadSection(i)
hdrinfo, ordt1, ordt2 = self.parseHeader(data)
idxtPos = hdrinfo['start']
entryCount = hdrinfo['count']
idxPositions = []
for j in range(entryCount):
pos, = struct.unpack_from(b'>H', data, idxtPos + 4 + (2 * j))
idxPositions.append(pos)
# The last entry ends before the IDXT tag (but there might be zero fill bytes we need to ignore!)
idxPositions.append(idxtPos)
for j in range(entryCount):
startPos = idxPositions[j]
endPos = idxPositions[j+1]
textLength = ord(data[startPos:startPos+1])
text = data[startPos+1:startPos+1+textLength]
if hordt2 is not None:
utext = u""
if idxhdr['otype'] == 0:
pattern = b'>H'
inc = 2
else:
pattern = b'>B'
inc = 1
pos = 0
while pos < textLength:
off, = struct.unpack_from(pattern, text, pos)
if off < len(hordt2):
utext += unichr(hordt2[off])
else:
utext += unichr(off)
pos += inc
text = utext.encode('utf-8')
tagMap = getTagMap(controlByteCount, tagTable, data, startPos+1+textLength, endPos)
if 0x01 in tagMap:
if decodeInflection and 0x2a in tagMap:
inflectionGroups = self.getInflectionGroups(text, inflectionControlByteCount, inflectionTagTable,
dinfl, inflNameData, tagMap[0x2a])
else:
inflectionGroups = b''
assert len(tagMap[0x01]) == 1
entryStartPosition = tagMap[0x01][0]
if hasEntryLength:
# The idx:entry attribute "scriptable" must be present to create entry length tags.
ml = b'<idx:entry scriptable="yes"><idx:orth value="' + text + b'">' + inflectionGroups + b'</idx:orth>'
if entryStartPosition in positionMap:
positionMap[entryStartPosition] = positionMap[entryStartPosition] + ml
else:
positionMap[entryStartPosition] = ml
assert len(tagMap[0x02]) == 1
entryEndPosition = entryStartPosition + tagMap[0x02][0]
if entryEndPosition in positionMap:
positionMap[entryEndPosition] = b"</idx:entry>" + positionMap[entryEndPosition]
else:
positionMap[entryEndPosition] = b"</idx:entry>"
else:
indexTags = b'<idx:entry>\n<idx:orth value="' + text + b'">\n' + inflectionGroups + b'</idx:entry>\n'
if entryStartPosition in positionMap:
positionMap[entryStartPosition] = positionMap[entryStartPosition] + indexTags
else:
positionMap[entryStartPosition] = indexTags
return positionMap
def hasTag(self, tagTable, tag):
'''
Test if tag table contains given tag.
@param tagTable: The tag table.
@param tag: The tag to search.
@return: True if tag table contains given tag; False otherwise.
'''
for currentTag, _, _, _ in tagTable:
if currentTag == tag:
return True
return False
def getInflectionGroups(self, mainEntry, controlByteCount, tagTable, dinfl, inflectionNames, groupList):
'''
Create string which contains the inflection groups with inflection rules as mobipocket tags.
@param mainEntry: The word to inflect.
@param controlByteCount: The number of control bytes.
@param tagTable: The tag table.
@param data: The Inflection data object to properly select the right inflection data section to use
@param inflectionNames: The inflection rule name data.
@param groupList: The list of inflection groups to process.
@return: String with inflection groups and rules or empty string if required tags are not available.
'''
result = b""
for value in groupList:
offset, nextOffset, data = dinfl.offsets(value)
# First byte seems to be always 0x00 and must be skipped.
assert ord(data[offset:offset+1]) == 0x00
tagMap = getTagMap(controlByteCount, tagTable, data, offset + 1, nextOffset)
# Make sure that the required tags are available.
if 0x05 not in tagMap:
print("Error: Required tag 0x05 not found in tagMap")
return ""
if 0x1a not in tagMap:
print("Error: Required tag 0x1a not found in tagMap")
return b''
result += b'<idx:infl>'
for i in range(len(tagMap[0x05])):
# Get name of inflection rule.
value = tagMap[0x05][i]
consumed, textLength = getVariableWidthValue(inflectionNames, value)
inflectionName = inflectionNames[value+consumed:value+consumed+textLength]
# Get and apply inflection rule across possibly multiple inflection data sections
value = tagMap[0x1a][i]
rvalue, start, count, data = dinfl.lookup(value)
offset, = struct.unpack_from(b'>H', data, start + 4 + (2 * rvalue))
textLength = ord(data[offset:offset+1])
inflection = self.applyInflectionRule(mainEntry, data, offset+1, offset+1+textLength)
if inflection is not None:
result += b' <idx:iform name="' + inflectionName + b'" value="' + inflection + b'"/>'
result += b'</idx:infl>'
return result
def applyInflectionRule(self, mainEntry, inflectionRuleData, start, end):
'''
Apply inflection rule.
@param mainEntry: The word to inflect.
@param inflectionRuleData: The inflection rules.
@param start: The start position of the inflection rule to use.
@param end: The end position of the inflection rule to use.
@return: The string with the inflected word or None if an error occurs.
'''
mode = -1
byteArray = array.array(array_format, mainEntry)
position = len(byteArray)
for charOffset in range(start, end):
char = inflectionRuleData[charOffset:charOffset+1]
abyte = ord(char)
if abyte >= 0x0a and abyte <= 0x13:
# Move cursor backwards
offset = abyte - 0x0a
if mode not in [0x02, 0x03]:
mode = 0x02
position = len(byteArray)
position -= offset
elif abyte > 0x13:
if mode == -1:
print("Error: Unexpected first byte %i of inflection rule" % abyte)
return None
elif position == -1:
print("Error: Unexpected first byte %i of inflection rule" % abyte)
return None
else:
if mode == 0x01:
# Insert at word start
byteArray.insert(position, abyte)
position += 1
elif mode == 0x02:
# Insert at word end
byteArray.insert(position, abyte)
elif mode == 0x03:
# Delete at word end
position -= 1
deleted = byteArray.pop(position)
if bchr(deleted) != char:
if DEBUG_DICT:
print("0x03: %s %s %s %s" % (mainEntry, toHex(inflectionRuleData[start:end]), char, bchr(deleted)))
print("Error: Delete operation of inflection rule failed")
return None
elif mode == 0x04:
# Delete at word start
deleted = byteArray.pop(position)
if bchr(deleted) != char:
if DEBUG_DICT:
print("0x03: %s %s %s %s" % (mainEntry, toHex(inflectionRuleData[start:end]), char, bchr(deleted)))
print("Error: Delete operation of inflection rule failed")
return None
else:
print("Error: Inflection rule mode %x is not implemented" % mode)
return None
elif abyte == 0x01:
# Insert at word start
if mode not in [0x01, 0x04]:
position = 0
mode = abyte
elif abyte == 0x02:
# Insert at word end
if mode not in [0x02, 0x03]:
position = len(byteArray)
mode = abyte
elif abyte == 0x03:
# Delete at word end
if mode not in [0x02, 0x03]:
position = len(byteArray)
mode = abyte
elif abyte == 0x04:
# Delete at word start
if mode not in [0x01, 0x04]:
position = 0
# Delete at word start
mode = abyte
else:
print("Error: Inflection rule mode %x is not implemented" % abyte)
return None
return utf8_str(byteArray.tostring())

View File

@@ -0,0 +1,934 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
from __future__ import unicode_literals, division, absolute_import, print_function
DEBUG_USE_ORDERED_DICTIONARY = False # OrderedDict is supoorted >= python 2.7.
""" set to True to use OrderedDict for MobiHeader.metadata."""
if DEBUG_USE_ORDERED_DICTIONARY:
from collections import OrderedDict as dict_
else:
dict_ = dict
from .compatibility_utils import PY2, unicode_str, hexlify, bord
if PY2:
range = xrange
import struct
import uuid
# import the mobiunpack support libraries
from .mobi_utils import getLanguage
from .mobi_uncompress import HuffcdicReader, PalmdocReader, UncompressedReader
class unpackException(Exception):
pass
def sortedHeaderKeys(mheader):
hdrkeys = sorted(list(mheader.keys()), key=lambda akey: mheader[akey][0])
return hdrkeys
# HD Containers have their own headers and their own EXTH
# this is just guesswork so far, making big assumption that
# metavalue key numbers remain the same in the CONT EXTH
# Note: The layout of the CONT Header is still unknown
# so just deal with their EXTH sections for now
def dump_contexth(cpage, extheader):
# determine text encoding
codec = 'windows-1252'
codec_map = {
1252 : 'windows-1252',
65001: 'utf-8',
}
if cpage in codec_map:
codec = codec_map[cpage]
if extheader == b'':
return
id_map_strings = {
1 : 'Drm Server Id',
2 : 'Drm Commerce Id',
3 : 'Drm Ebookbase Book Id',
4 : 'Drm Ebookbase Dep Id',
100 : 'Creator',
101 : 'Publisher',
102 : 'Imprint',
103 : 'Description',
104 : 'ISBN',
105 : 'Subject',
106 : 'Published',
107 : 'Review',
108 : 'Contributor',
109 : 'Rights',
110 : 'SubjectCode',
111 : 'Type',
112 : 'Source',
113 : 'ASIN',
114 : 'versionNumber',
117 : 'Adult',
118 : 'Retail-Price',
119 : 'Retail-Currency',
120 : 'TSC',
122 : 'fixed-layout',
123 : 'book-type',
124 : 'orientation-lock',
126 : 'original-resolution',
127 : 'zero-gutter',
128 : 'zero-margin',
129 : 'MetadataResourceURI',
132 : 'RegionMagnification',
150 : 'LendingEnabled',
200 : 'DictShortName',
501 : 'cdeType',
502 : 'last_update_time',
503 : 'Updated_Title',
504 : 'CDEContentKey',
505 : 'AmazonContentReference',
506 : 'Title-Language',
507 : 'Title-Display-Direction',
508 : 'Title-Pronunciation',
509 : 'Title-Collation',
510 : 'Secondary-Title',
511 : 'Secondary-Title-Language',
512 : 'Secondary-Title-Direction',
513 : 'Secondary-Title-Pronunciation',
514 : 'Secondary-Title-Collation',
515 : 'Author-Language',
516 : 'Author-Display-Direction',
517 : 'Author-Pronunciation',
518 : 'Author-Collation',
519 : 'Author-Type',
520 : 'Publisher-Language',
521 : 'Publisher-Display-Direction',
522 : 'Publisher-Pronunciation',
523 : 'Publisher-Collation',
524 : 'Content-Language-Tag',
525 : 'primary-writing-mode',
526 : 'NCX-Ingested-By-Software',
527 : 'page-progression-direction',
528 : 'override-kindle-fonts',
529 : 'Compression-Upgraded',
530 : 'Soft-Hyphens-In-Content',
531 : 'Dictionary_In_Langague',
532 : 'Dictionary_Out_Language',
533 : 'Font_Converted',
534 : 'Amazon_Creator_Info',
535 : 'Creator-Build-Tag',
536 : 'HD-Media-Containers-Info', # CONT_Header is 0, Ends with CONTAINER_BOUNDARY (or Asset_Type?)
538 : 'Resource-Container-Fidelity',
539 : 'HD-Container-Mimetype',
540 : 'Sample-For_Special-Purpose',
541 : 'Kindletool-Operation-Information',
542 : 'Container_Id',
543 : 'Asset-Type', # FONT_CONTAINER, BW_CONTAINER, HD_CONTAINER
544 : 'Unknown_544',
}
id_map_values = {
115 : 'sample',
116 : 'StartOffset',
121 : 'Mobi8-Boundary-Section',
125 : 'Embedded-Record-Count',
130 : 'Offline-Sample',
131 : 'Metadata-Record-Offset',
201 : 'CoverOffset',
202 : 'ThumbOffset',
203 : 'HasFakeCover',
204 : 'Creator-Software',
205 : 'Creator-Major-Version',
206 : 'Creator-Minor-Version',
207 : 'Creator-Build-Number',
401 : 'Clipping-Limit',
402 : 'Publisher-Limit',
404 : 'Text-to-Speech-Disabled',
406 : 'Rental-Expiration-Time',
}
id_map_hexstrings = {
208 : 'Watermark_(hex)',
209 : 'Tamper-Proof-Keys_(hex)',
300 : 'Font-Signature_(hex)',
403 : 'Unknown_(403)_(hex)',
405 : 'Ownership-Type_(hex)',
407 : 'Unknown_(407)_(hex)',
420 : 'Multimedia-Content-Reference_(hex)',
450 : 'Locations_Match_(hex)',
451 : 'Full-Story-Length_(hex)',
452 : 'Sample-Start_Location_(hex)',
453 : 'Sample-End-Location_(hex)',
}
_length, num_items = struct.unpack(b'>LL', extheader[4:12])
extheader = extheader[12:]
pos = 0
for _ in range(num_items):
id, size = struct.unpack(b'>LL', extheader[pos:pos+8])
content = extheader[pos + 8: pos + size]
if id in id_map_strings:
name = id_map_strings[id]
print('\n Key: "%s"\n Value: "%s"' % (name, content.decode(codec, errors='replace')))
elif id in id_map_values:
name = id_map_values[id]
if size == 9:
value, = struct.unpack(b'B',content)
print('\n Key: "%s"\n Value: 0x%01x' % (name, value))
elif size == 10:
value, = struct.unpack(b'>H',content)
print('\n Key: "%s"\n Value: 0x%02x' % (name, value))
elif size == 12:
value, = struct.unpack(b'>L',content)
print('\n Key: "%s"\n Value: 0x%04x' % (name, value))
else:
print("\nError: Value for %s has unexpected size of %s" % (name, size))
elif id in id_map_hexstrings:
name = id_map_hexstrings[id]
print('\n Key: "%s"\n Value: 0x%s' % (name, hexlify(content)))
else:
print("\nWarning: Unknown metadata with id %s found" % id)
name = str(id) + ' (hex)'
print(' Key: "%s"\n Value: 0x%s' % (name, hexlify(content)))
pos += size
return
class MobiHeader:
# all values are packed in big endian format
palmdoc_header = {
'compression_type' : (0x00, b'>H', 2),
'fill0' : (0x02, b'>H', 2),
'text_length' : (0x04, b'>L', 4),
'text_records' : (0x08, b'>H', 2),
'max_section_size' : (0x0a, b'>H', 2),
'read_pos ' : (0x0c, b'>L', 4),
}
mobi6_header = {
'compression_type' : (0x00, b'>H', 2),
'fill0' : (0x02, b'>H', 2),
'text_length' : (0x04, b'>L', 4),
'text_records' : (0x08, b'>H', 2),
'max_section_size' : (0x0a, b'>H', 2),
'crypto_type' : (0x0c, b'>H', 2),
'fill1' : (0x0e, b'>H', 2),
'magic' : (0x10, b'4s', 4),
'header_length (from MOBI)' : (0x14, b'>L', 4),
'type' : (0x18, b'>L', 4),
'codepage' : (0x1c, b'>L', 4),
'unique_id' : (0x20, b'>L', 4),
'version' : (0x24, b'>L', 4),
'metaorthindex' : (0x28, b'>L', 4),
'metainflindex' : (0x2c, b'>L', 4),
'index_names' : (0x30, b'>L', 4),
'index_keys' : (0x34, b'>L', 4),
'extra_index0' : (0x38, b'>L', 4),
'extra_index1' : (0x3c, b'>L', 4),
'extra_index2' : (0x40, b'>L', 4),
'extra_index3' : (0x44, b'>L', 4),
'extra_index4' : (0x48, b'>L', 4),
'extra_index5' : (0x4c, b'>L', 4),
'first_nontext' : (0x50, b'>L', 4),
'title_offset' : (0x54, b'>L', 4),
'title_length' : (0x58, b'>L', 4),
'language_code' : (0x5c, b'>L', 4),
'dict_in_lang' : (0x60, b'>L', 4),
'dict_out_lang' : (0x64, b'>L', 4),
'min_version' : (0x68, b'>L', 4),
'first_resc_offset' : (0x6c, b'>L', 4),
'huff_offset' : (0x70, b'>L', 4),
'huff_num' : (0x74, b'>L', 4),
'huff_tbl_offset' : (0x78, b'>L', 4),
'huff_tbl_len' : (0x7c, b'>L', 4),
'exth_flags' : (0x80, b'>L', 4),
'fill3_a' : (0x84, b'>L', 4),
'fill3_b' : (0x88, b'>L', 4),
'fill3_c' : (0x8c, b'>L', 4),
'fill3_d' : (0x90, b'>L', 4),
'fill3_e' : (0x94, b'>L', 4),
'fill3_f' : (0x98, b'>L', 4),
'fill3_g' : (0x9c, b'>L', 4),
'fill3_h' : (0xa0, b'>L', 4),
'unknown0' : (0xa4, b'>L', 4),
'drm_offset' : (0xa8, b'>L', 4),
'drm_count' : (0xac, b'>L', 4),
'drm_size' : (0xb0, b'>L', 4),
'drm_flags' : (0xb4, b'>L', 4),
'fill4_a' : (0xb8, b'>L', 4),
'fill4_b' : (0xbc, b'>L', 4),
'first_content' : (0xc0, b'>H', 2),
'last_content' : (0xc2, b'>H', 2),
'unknown0' : (0xc4, b'>L', 4),
'fcis_offset' : (0xc8, b'>L', 4),
'fcis_count' : (0xcc, b'>L', 4),
'flis_offset' : (0xd0, b'>L', 4),
'flis_count' : (0xd4, b'>L', 4),
'unknown1' : (0xd8, b'>L', 4),
'unknown2' : (0xdc, b'>L', 4),
'srcs_offset' : (0xe0, b'>L', 4),
'srcs_count' : (0xe4, b'>L', 4),
'unknown3' : (0xe8, b'>L', 4),
'unknown4' : (0xec, b'>L', 4),
'fill5' : (0xf0, b'>H', 2),
'traildata_flags' : (0xf2, b'>H', 2),
'ncx_index' : (0xf4, b'>L', 4),
'unknown5' : (0xf8, b'>L', 4),
'unknown6' : (0xfc, b'>L', 4),
'datp_offset' : (0x100, b'>L', 4),
'unknown7' : (0x104, b'>L', 4),
'Unknown ' : (0x108, b'>L', 4),
'Unknown ' : (0x10C, b'>L', 4),
'Unknown ' : (0x110, b'>L', 4),
'Unknown ' : (0x114, b'>L', 4),
'Unknown ' : (0x118, b'>L', 4),
'Unknown ' : (0x11C, b'>L', 4),
'Unknown ' : (0x120, b'>L', 4),
'Unknown ' : (0x124, b'>L', 4),
'Unknown ' : (0x128, b'>L', 4),
'Unknown ' : (0x12C, b'>L', 4),
'Unknown ' : (0x130, b'>L', 4),
'Unknown ' : (0x134, b'>L', 4),
'Unknown ' : (0x138, b'>L', 4),
'Unknown ' : (0x11C, b'>L', 4),
}
mobi8_header = {
'compression_type' : (0x00, b'>H', 2),
'fill0' : (0x02, b'>H', 2),
'text_length' : (0x04, b'>L', 4),
'text_records' : (0x08, b'>H', 2),
'max_section_size' : (0x0a, b'>H', 2),
'crypto_type' : (0x0c, b'>H', 2),
'fill1' : (0x0e, b'>H', 2),
'magic' : (0x10, b'4s', 4),
'header_length (from MOBI)' : (0x14, b'>L', 4),
'type' : (0x18, b'>L', 4),
'codepage' : (0x1c, b'>L', 4),
'unique_id' : (0x20, b'>L', 4),
'version' : (0x24, b'>L', 4),
'metaorthindex' : (0x28, b'>L', 4),
'metainflindex' : (0x2c, b'>L', 4),
'index_names' : (0x30, b'>L', 4),
'index_keys' : (0x34, b'>L', 4),
'extra_index0' : (0x38, b'>L', 4),
'extra_index1' : (0x3c, b'>L', 4),
'extra_index2' : (0x40, b'>L', 4),
'extra_index3' : (0x44, b'>L', 4),
'extra_index4' : (0x48, b'>L', 4),
'extra_index5' : (0x4c, b'>L', 4),
'first_nontext' : (0x50, b'>L', 4),
'title_offset' : (0x54, b'>L', 4),
'title_length' : (0x58, b'>L', 4),
'language_code' : (0x5c, b'>L', 4),
'dict_in_lang' : (0x60, b'>L', 4),
'dict_out_lang' : (0x64, b'>L', 4),
'min_version' : (0x68, b'>L', 4),
'first_resc_offset' : (0x6c, b'>L', 4),
'huff_offset' : (0x70, b'>L', 4),
'huff_num' : (0x74, b'>L', 4),
'huff_tbl_offset' : (0x78, b'>L', 4),
'huff_tbl_len' : (0x7c, b'>L', 4),
'exth_flags' : (0x80, b'>L', 4),
'fill3_a' : (0x84, b'>L', 4),
'fill3_b' : (0x88, b'>L', 4),
'fill3_c' : (0x8c, b'>L', 4),
'fill3_d' : (0x90, b'>L', 4),
'fill3_e' : (0x94, b'>L', 4),
'fill3_f' : (0x98, b'>L', 4),
'fill3_g' : (0x9c, b'>L', 4),
'fill3_h' : (0xa0, b'>L', 4),
'unknown0' : (0xa4, b'>L', 4),
'drm_offset' : (0xa8, b'>L', 4),
'drm_count' : (0xac, b'>L', 4),
'drm_size' : (0xb0, b'>L', 4),
'drm_flags' : (0xb4, b'>L', 4),
'fill4_a' : (0xb8, b'>L', 4),
'fill4_b' : (0xbc, b'>L', 4),
'fdst_offset' : (0xc0, b'>L', 4),
'fdst_flow_count' : (0xc4, b'>L', 4),
'fcis_offset' : (0xc8, b'>L', 4),
'fcis_count' : (0xcc, b'>L', 4),
'flis_offset' : (0xd0, b'>L', 4),
'flis_count' : (0xd4, b'>L', 4),
'unknown1' : (0xd8, b'>L', 4),
'unknown2' : (0xdc, b'>L', 4),
'srcs_offset' : (0xe0, b'>L', 4),
'srcs_count' : (0xe4, b'>L', 4),
'unknown3' : (0xe8, b'>L', 4),
'unknown4' : (0xec, b'>L', 4),
'fill5' : (0xf0, b'>H', 2),
'traildata_flags' : (0xf2, b'>H', 2),
'ncx_index' : (0xf4, b'>L', 4),
'fragment_index' : (0xf8, b'>L', 4),
'skeleton_index' : (0xfc, b'>L', 4),
'datp_offset' : (0x100, b'>L', 4),
'guide_index' : (0x104, b'>L', 4),
'Unknown ' : (0x108, b'>L', 4),
'Unknown ' : (0x10C, b'>L', 4),
'Unknown ' : (0x110, b'>L', 4),
'Unknown ' : (0x114, b'>L', 4),
'Unknown ' : (0x118, b'>L', 4),
'Unknown ' : (0x11C, b'>L', 4),
'Unknown ' : (0x120, b'>L', 4),
'Unknown ' : (0x124, b'>L', 4),
'Unknown ' : (0x128, b'>L', 4),
'Unknown ' : (0x12C, b'>L', 4),
'Unknown ' : (0x130, b'>L', 4),
'Unknown ' : (0x134, b'>L', 4),
'Unknown ' : (0x138, b'>L', 4),
'Unknown ' : (0x11C, b'>L', 4),
}
palmdoc_header_sorted_keys = sortedHeaderKeys(palmdoc_header)
mobi6_header_sorted_keys = sortedHeaderKeys(mobi6_header)
mobi8_header_sorted_keys = sortedHeaderKeys(mobi8_header)
id_map_strings = {
1 : 'Drm Server Id',
2 : 'Drm Commerce Id',
3 : 'Drm Ebookbase Book Id',
4 : 'Drm Ebookbase Dep Id',
100 : 'Creator',
101 : 'Publisher',
102 : 'Imprint',
103 : 'Description',
104 : 'ISBN',
105 : 'Subject',
106 : 'Published',
107 : 'Review',
108 : 'Contributor',
109 : 'Rights',
110 : 'SubjectCode',
111 : 'Type',
112 : 'Source',
113 : 'ASIN',
114 : 'versionNumber',
117 : 'Adult',
118 : 'Retail-Price',
119 : 'Retail-Currency',
120 : 'TSC',
122 : 'fixed-layout',
123 : 'book-type',
124 : 'orientation-lock',
126 : 'original-resolution',
127 : 'zero-gutter',
128 : 'zero-margin',
129 : 'MetadataResourceURI',
132 : 'RegionMagnification',
150 : 'LendingEnabled',
200 : 'DictShortName',
501 : 'cdeType',
502 : 'last_update_time',
503 : 'Updated_Title',
504 : 'CDEContentKey',
505 : 'AmazonContentReference',
506 : 'Title-Language',
507 : 'Title-Display-Direction',
508 : 'Title-Pronunciation',
509 : 'Title-Collation',
510 : 'Secondary-Title',
511 : 'Secondary-Title-Language',
512 : 'Secondary-Title-Direction',
513 : 'Secondary-Title-Pronunciation',
514 : 'Secondary-Title-Collation',
515 : 'Author-Language',
516 : 'Author-Display-Direction',
517 : 'Author-Pronunciation',
518 : 'Author-Collation',
519 : 'Author-Type',
520 : 'Publisher-Language',
521 : 'Publisher-Display-Direction',
522 : 'Publisher-Pronunciation',
523 : 'Publisher-Collation',
524 : 'Content-Language-Tag',
525 : 'primary-writing-mode',
526 : 'NCX-Ingested-By-Software',
527 : 'page-progression-direction',
528 : 'override-kindle-fonts',
529 : 'Compression-Upgraded',
530 : 'Soft-Hyphens-In-Content',
531 : 'Dictionary_In_Langague',
532 : 'Dictionary_Out_Language',
533 : 'Font_Converted',
534 : 'Amazon_Creator_Info',
535 : 'Creator-Build-Tag',
536 : 'HD-Media-Containers-Info', # CONT_Header is 0, Ends with CONTAINER_BOUNDARY (or Asset_Type?)
538 : 'Resource-Container-Fidelity',
539 : 'HD-Container-Mimetype',
540 : 'Sample-For_Special-Purpose',
541 : 'Kindletool-Operation-Information',
542 : 'Container_Id',
543 : 'Asset-Type', # FONT_CONTAINER, BW_CONTAINER, HD_CONTAINER
544 : 'Unknown_544',
}
id_map_values = {
115 : 'sample',
116 : 'StartOffset',
121 : 'Mobi8-Boundary-Section',
125 : 'Embedded-Record-Count',
130 : 'Offline-Sample',
131 : 'Metadata-Record-Offset',
201 : 'CoverOffset',
202 : 'ThumbOffset',
203 : 'HasFakeCover',
204 : 'Creator-Software',
205 : 'Creator-Major-Version',
206 : 'Creator-Minor-Version',
207 : 'Creator-Build-Number',
401 : 'Clipping-Limit',
402 : 'Publisher-Limit',
404 : 'Text-to-Speech-Disabled',
406 : 'Rental-Expiration-Time',
}
id_map_hexstrings = {
208 : 'Watermark_(hex)',
209 : 'Tamper-Proof-Keys_(hex)',
300 : 'Font-Signature_(hex)',
403 : 'Unknown_(403)_(hex)',
405 : 'Ownership-Type_(hex)',
407 : 'Unknown_(407)_(hex)',
420 : 'Multimedia-Content-Reference_(hex)',
450 : 'Locations_Match_(hex)',
451 : 'Full-Story-Length_(hex)',
452 : 'Sample-Start_Location_(hex)',
453 : 'Sample-End-Location_(hex)',
}
def __init__(self, sect, sectNumber):
self.sect = sect
self.start = sectNumber
self.header = self.sect.loadSection(self.start)
if len(self.header)>20 and self.header[16:20] == b'MOBI':
self.sect.setsectiondescription(0,"Mobipocket Header")
self.palm = False
elif self.sect.ident == b'TEXtREAd':
self.sect.setsectiondescription(0, "PalmDOC Header")
self.palm = True
else:
raise unpackException('Unknown File Format')
self.records, = struct.unpack_from(b'>H', self.header, 0x8)
# set defaults in case this is a PalmDOC
self.title = self.sect.palmname.decode('latin-1', errors='replace')
self.length = len(self.header)-16
self.type = 3
self.codepage = 1252
self.codec = 'windows-1252'
self.unique_id = 0
self.version = 0
self.hasExth = False
self.exth = b''
self.exth_offset = self.length + 16
self.exth_length = 0
self.crypto_type = 0
self.firstnontext = self.start+self.records + 1
self.firstresource = self.start+self.records + 1
self.ncxidx = 0xffffffff
self.metaOrthIndex = 0xffffffff
self.metaInflIndex = 0xffffffff
self.skelidx = 0xffffffff
self.fragidx = 0xffffffff
self.guideidx = 0xffffffff
self.fdst = 0xffffffff
self.mlstart = self.sect.loadSection(self.start+1)[:4]
self.rawSize = 0
self.metadata = dict_()
# set up for decompression/unpacking
self.compression, = struct.unpack_from(b'>H', self.header, 0x0)
if self.compression == 0x4448:
reader = HuffcdicReader()
huffoff, huffnum = struct.unpack_from(b'>LL', self.header, 0x70)
huffoff = huffoff + self.start
self.sect.setsectiondescription(huffoff,"Huffman Compression Seed")
reader.loadHuff(self.sect.loadSection(huffoff))
for i in range(1, huffnum):
self.sect.setsectiondescription(huffoff+i,"Huffman CDIC Compression Seed %d" % i)
reader.loadCdic(self.sect.loadSection(huffoff+i))
self.unpack = reader.unpack
elif self.compression == 2:
self.unpack = PalmdocReader().unpack
elif self.compression == 1:
self.unpack = UncompressedReader().unpack
else:
raise unpackException('invalid compression type: 0x%4x' % self.compression)
if self.palm:
return
self.length, self.type, self.codepage, self.unique_id, self.version = struct.unpack(b'>LLLLL', self.header[20:40])
codec_map = {
1252 : 'windows-1252',
65001: 'utf-8',
}
if self.codepage in codec_map:
self.codec = codec_map[self.codepage]
# title
toff, tlen = struct.unpack(b'>II', self.header[0x54:0x5c])
tend = toff + tlen
self.title=self.header[toff:tend].decode(self.codec, errors='replace')
exth_flag, = struct.unpack(b'>L', self.header[0x80:0x84])
self.hasExth = exth_flag & 0x40
self.exth_offset = self.length + 16
self.exth_length = 0
if self.hasExth:
self.exth_length, = struct.unpack_from(b'>L', self.header, self.exth_offset+4)
self.exth_length = ((self.exth_length + 3)>>2)<<2 # round to next 4 byte boundary
self.exth = self.header[self.exth_offset:self.exth_offset+self.exth_length]
# parse the exth / metadata
self.parseMetaData()
# self.mlstart = self.sect.loadSection(self.start+1)
# self.mlstart = self.mlstart[0:4]
self.crypto_type, = struct.unpack_from(b'>H', self.header, 0xC)
# Start sector for additional files such as images, fonts, resources, etc
# Can be missing so fall back to default set previously
ofst, = struct.unpack_from(b'>L', self.header, 0x6C)
if ofst != 0xffffffff:
self.firstresource = ofst + self.start
ofst, = struct.unpack_from(b'>L', self.header, 0x50)
if ofst != 0xffffffff:
self.firstnontext = ofst + self.start
if self.isPrintReplica():
return
if self.version < 8:
# Dictionary metaOrthIndex
self.metaOrthIndex, = struct.unpack_from(b'>L', self.header, 0x28)
if self.metaOrthIndex != 0xffffffff:
self.metaOrthIndex += self.start
# Dictionary metaInflIndex
self.metaInflIndex, = struct.unpack_from(b'>L', self.header, 0x2C)
if self.metaInflIndex != 0xffffffff:
self.metaInflIndex += self.start
# handle older headers without any ncxindex info and later
# specifically 0xe4 headers
if self.length + 16 < 0xf8:
return
# NCX Index
self.ncxidx, = struct.unpack(b'>L', self.header[0xf4:0xf8])
if self.ncxidx != 0xffffffff:
self.ncxidx += self.start
# K8 specific Indexes
if self.start != 0 or self.version == 8:
# Index into <xml> file skeletons in RawML
self.skelidx, = struct.unpack_from(b'>L', self.header, 0xfc)
if self.skelidx != 0xffffffff:
self.skelidx += self.start
# Index into <div> sections in RawML
self.fragidx, = struct.unpack_from(b'>L', self.header, 0xf8)
if self.fragidx != 0xffffffff:
self.fragidx += self.start
# Index into Other files
self.guideidx, = struct.unpack_from(b'>L', self.header, 0x104)
if self.guideidx != 0xffffffff:
self.guideidx += self.start
# dictionaries do not seem to use the same approach in K8's
# so disable them
self.metaOrthIndex = 0xffffffff
self.metaInflIndex = 0xffffffff
# need to use the FDST record to find out how to properly unpack
# the rawML into pieces
# it is simply a table of start and end locations for each flow piece
self.fdst, = struct.unpack_from(b'>L', self.header, 0xc0)
self.fdstcnt, = struct.unpack_from(b'>L', self.header, 0xc4)
# if cnt is 1 or less, fdst section mumber can be garbage
if self.fdstcnt <= 1:
self.fdst = 0xffffffff
if self.fdst != 0xffffffff:
self.fdst += self.start
# setting of fdst section description properly handled in mobi_kf8proc
def dump_exth(self):
# determine text encoding
codec=self.codec
if (not self.hasExth) or (self.exth_length) == 0 or (self.exth == b''):
return
num_items, = struct.unpack(b'>L', self.exth[8:12])
pos = 12
print("Key Size Decription Value")
for _ in range(num_items):
id, size = struct.unpack(b'>LL', self.exth[pos:pos+8])
contentsize = size-8
content = self.exth[pos + 8: pos + size]
if id in MobiHeader.id_map_strings:
exth_name = MobiHeader.id_map_strings[id]
print('{0: >3d} {1: >4d} {2: <30s} {3:s}'.format(id, contentsize, exth_name, content.decode(codec, errors='replace')))
elif id in MobiHeader.id_map_values:
exth_name = MobiHeader.id_map_values[id]
if size == 9:
value, = struct.unpack(b'B',content)
print('{0:3d} byte {1:<30s} {2:d}'.format(id, exth_name, value))
elif size == 10:
value, = struct.unpack(b'>H',content)
print('{0:3d} word {1:<30s} 0x{2:0>4X} ({2:d})'.format(id, exth_name, value))
elif size == 12:
value, = struct.unpack(b'>L',content)
print('{0:3d} long {1:<30s} 0x{2:0>8X} ({2:d})'.format(id, exth_name, value))
else:
print('{0: >3d} {1: >4d} {2: <30s} (0x{3:s})'.format(id, contentsize, "Bad size for "+exth_name, hexlify(content)))
elif id in MobiHeader.id_map_hexstrings:
exth_name = MobiHeader.id_map_hexstrings[id]
print('{0:3d} {1:4d} {2:<30s} 0x{3:s}'.format(id, contentsize, exth_name, hexlify(content)))
else:
exth_name = "Unknown EXTH ID {0:d}".format(id)
print("{0: >3d} {1: >4d} {2: <30s} 0x{3:s}".format(id, contentsize, exth_name, hexlify(content)))
pos += size
return
def dumpheader(self):
# first 16 bytes are not part of the official mobiheader
# but we will treat it as such
# so section 0 is 16 (decimal) + self.length in total == at least 0x108 bytes for Mobi 8 headers
print("Dumping section %d, Mobipocket Header version: %d, total length %d" % (self.start,self.version, self.length+16))
self.hdr = {}
# set it up for the proper header version
if self.version == 0:
self.mobi_header = MobiHeader.palmdoc_header
self.mobi_header_sorted_keys = MobiHeader.palmdoc_header_sorted_keys
elif self.version < 8:
self.mobi_header = MobiHeader.mobi6_header
self.mobi_header_sorted_keys = MobiHeader.mobi6_header_sorted_keys
else:
self.mobi_header = MobiHeader.mobi8_header
self.mobi_header_sorted_keys = MobiHeader.mobi8_header_sorted_keys
# parse the header information
for key in self.mobi_header_sorted_keys:
(pos, format, tot_len) = self.mobi_header[key]
if pos < (self.length + 16):
val, = struct.unpack_from(format, self.header, pos)
self.hdr[key] = val
if 'title_offset' in self.hdr:
title_offset = self.hdr['title_offset']
title_length = self.hdr['title_length']
else:
title_offset = 0
title_length = 0
if title_offset == 0:
title_offset = len(self.header)
title_length = 0
self.title = self.sect.palmname.decode('latin-1', errors='replace')
else:
self.title = self.header[title_offset:title_offset+title_length].decode(self.codec, errors='replace')
# title record always padded with two nul bytes and then padded with nuls to next 4 byte boundary
title_length = ((title_length+2+3)>>2)<<2
self.extra1 = self.header[self.exth_offset+self.exth_length:title_offset]
self.extra2 = self.header[title_offset+title_length:]
print("Mobipocket header from section %d" % self.start)
print(" Offset Value Hex Dec Description")
for key in self.mobi_header_sorted_keys:
(pos, format, tot_len) = self.mobi_header[key]
if pos < (self.length + 16):
if key != 'magic':
fmt_string = "0x{0:0>3X} ({0:3d}){1: >" + str(9-2*tot_len) +"s}0x{2:0>" + str(2*tot_len) + "X} {2:10d} {3:s}"
else:
self.hdr[key] = unicode_str(self.hdr[key])
fmt_string = "0x{0:0>3X} ({0:3d}){2:>11s} {3:s}"
print(fmt_string.format(pos, " ",self.hdr[key], key))
print("")
if self.exth_length > 0:
print("EXTH metadata, offset %d, padded length %d" % (self.exth_offset,self.exth_length))
self.dump_exth()
print("")
if len(self.extra1) > 0:
print("Extra data between EXTH and Title, length %d" % len(self.extra1))
print(hexlify(self.extra1))
print("")
if title_length > 0:
print("Title in header at offset %d, padded length %d: '%s'" %(title_offset,title_length,self.title))
print("")
if len(self.extra2) > 0:
print("Extra data between Title and end of header, length %d" % len(self.extra2))
print(hexlify(self.extra2))
print("")
def isPrintReplica(self):
return self.mlstart[0:4] == b"%MOP"
def isK8(self):
return self.start != 0 or self.version == 8
def isEncrypted(self):
return self.crypto_type != 0
def hasNCX(self):
return self.ncxidx != 0xffffffff
def isDictionary(self):
return self.metaOrthIndex != 0xffffffff
def getncxIndex(self):
return self.ncxidx
def decompress(self, data):
return self.unpack(data)
def Language(self):
langcode = struct.unpack(b'!L', self.header[0x5c:0x60])[0]
langid = langcode & 0xFF
sublangid = (langcode >> 8) & 0xFF
return getLanguage(langid, sublangid)
def DictInLanguage(self):
if self.isDictionary():
langcode = struct.unpack(b'!L', self.header[0x60:0x64])[0]
langid = langcode & 0xFF
sublangid = (langcode >> 10) & 0xFF
if langid != 0:
return getLanguage(langid, sublangid)
return False
def DictOutLanguage(self):
if self.isDictionary():
langcode = struct.unpack(b'!L', self.header[0x64:0x68])[0]
langid = langcode & 0xFF
sublangid = (langcode >> 10) & 0xFF
if langid != 0:
return getLanguage(langid, sublangid)
return False
def getRawML(self):
def getSizeOfTrailingDataEntry(data):
num = 0
for v in data[-4:]:
if bord(v) & 0x80:
num = 0
num = (num << 7) | (bord(v) & 0x7f)
return num
def trimTrailingDataEntries(data):
for _ in range(trailers):
num = getSizeOfTrailingDataEntry(data)
data = data[:-num]
if multibyte:
num = (ord(data[-1:]) & 3) + 1
data = data[:-num]
return data
multibyte = 0
trailers = 0
if self.sect.ident == b'BOOKMOBI':
mobi_length, = struct.unpack_from(b'>L', self.header, 0x14)
mobi_version, = struct.unpack_from(b'>L', self.header, 0x68)
if (mobi_length >= 0xE4) and (mobi_version >= 5):
flags, = struct.unpack_from(b'>H', self.header, 0xF2)
multibyte = flags & 1
while flags > 1:
if flags & 2:
trailers += 1
flags = flags >> 1
# get raw mobi markup languge
print("Unpacking raw markup language")
dataList = []
# offset = 0
for i in range(1, self.records+1):
data = trimTrailingDataEntries(self.sect.loadSection(self.start + i))
dataList.append(self.unpack(data))
if self.isK8():
self.sect.setsectiondescription(self.start + i,"KF8 Text Section {0:d}".format(i))
elif self.version == 0:
self.sect.setsectiondescription(self.start + i,"PalmDOC Text Section {0:d}".format(i))
else:
self.sect.setsectiondescription(self.start + i,"Mobipocket Text Section {0:d}".format(i))
rawML = b''.join(dataList)
self.rawSize = len(rawML)
return rawML
# all metadata is stored in a dictionary with key and returns a *list* of values
# a list is used to allow for multiple creators, multiple contributors, etc
def parseMetaData(self):
def addValue(name, value):
if name not in self.metadata:
self.metadata[name] = [value]
else:
self.metadata[name].append(value)
codec=self.codec
if self.hasExth:
extheader=self.exth
_length, num_items = struct.unpack(b'>LL', extheader[4:12])
extheader = extheader[12:]
pos = 0
for _ in range(num_items):
id, size = struct.unpack(b'>LL', extheader[pos:pos+8])
content = extheader[pos + 8: pos + size]
if id in MobiHeader.id_map_strings:
name = MobiHeader.id_map_strings[id]
addValue(name, content.decode(codec, errors='replace'))
elif id in MobiHeader.id_map_values:
name = MobiHeader.id_map_values[id]
if size == 9:
value, = struct.unpack(b'B',content)
addValue(name, unicode_str(str(value)))
elif size == 10:
value, = struct.unpack(b'>H',content)
addValue(name, unicode_str(str(value)))
elif size == 12:
value, = struct.unpack(b'>L',content)
# handle special case of missing CoverOffset or missing ThumbOffset
if id == 201 or id == 202:
if value != 0xffffffff:
addValue(name, unicode_str(str(value)))
else:
addValue(name, unicode_str(str(value)))
else:
print("Warning: Bad key, size, value combination detected in EXTH ", id, size, hexlify(content))
addValue(name, hexlify(content))
elif id in MobiHeader.id_map_hexstrings:
name = MobiHeader.id_map_hexstrings[id]
addValue(name, hexlify(content))
else:
name = unicode_str(str(id)) + ' (hex)'
addValue(name, hexlify(content))
pos += size
# add the basics to the metadata each as a list element
self.metadata['Language'] = [self.Language()]
self.metadata['Title'] = [unicode_str(self.title,self.codec)]
self.metadata['Codec'] = [self.codec]
self.metadata['UniqueID'] = [unicode_str(str(self.unique_id))]
# if no asin create one using a uuid
if 'ASIN' not in self.metadata:
self.metadata['ASIN'] = [unicode_str(str(uuid.uuid4()))]
# if no cdeType set it to "EBOK"
if 'cdeType' not in self.metadata:
self.metadata['cdeType'] = ['EBOK']
def getMetaData(self):
return self.metadata
def describeHeader(self, DUMP):
print("Mobi Version:", self.version)
print("Codec:", self.codec)
print("Title:", self.title)
if 'Updated_Title' in self.metadata:
print("EXTH Title:", self.metadata['Updated_Title'][0])
if self.compression == 0x4448:
print("Huffdic compression")
elif self.compression == 2:
print("Palmdoc compression")
elif self.compression == 1:
print("No compression")
if DUMP:
self.dumpheader()

View File

@@ -0,0 +1,439 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
from __future__ import unicode_literals, division, absolute_import, print_function
from .compatibility_utils import PY2, utf8_str
if PY2:
range = xrange
import re
# note: re requites the pattern to be the exact same type as the data to be searched in python3
# but u"" is not allowed for the pattern itself only b""
from .mobi_utils import fromBase32
class HTMLProcessor:
def __init__(self, files, metadata, rscnames):
self.files = files
self.metadata = metadata
self.rscnames = rscnames
# for original style mobis, default to including all image files in the opf manifest
self.used = {}
for name in rscnames:
self.used[name] = 'used'
def findAnchors(self, rawtext, indx_data, positionMap):
# process the raw text
# find anchors...
print("Find link anchors")
link_pattern = re.compile(br'''<[^<>]+filepos=['"]{0,1}(\d+)[^<>]*>''', re.IGNORECASE)
# TEST NCX: merge in filepos from indx
pos_links = [int(m.group(1)) for m in link_pattern.finditer(rawtext)]
if indx_data:
pos_indx = [e['pos'] for e in indx_data if e['pos']>0]
pos_links = list(set(pos_links + pos_indx))
for position in pos_links:
if position in positionMap:
positionMap[position] = positionMap[position] + utf8_str('<a id="filepos%d" />' % position)
else:
positionMap[position] = utf8_str('<a id="filepos%d" />' % position)
# apply dictionary metadata and anchors
print("Insert data into html")
pos = 0
lastPos = len(rawtext)
dataList = []
for end in sorted(positionMap.keys()):
if end == 0 or end > lastPos:
continue # something's up - can't put a tag in outside <html>...</html>
dataList.append(rawtext[pos:end])
dataList.append(positionMap[end])
pos = end
dataList.append(rawtext[pos:])
srctext = b"".join(dataList)
rawtext = None
dataList = None
self.srctext = srctext
self.indx_data = indx_data
return srctext
def insertHREFS(self):
srctext = self.srctext
rscnames = self.rscnames
metadata = self.metadata
# put in the hrefs
print("Insert hrefs into html")
# There doesn't seem to be a standard, so search as best as we can
link_pattern = re.compile(br'''<a([^>]*?)filepos=['"]{0,1}0*(\d+)['"]{0,1}([^>]*?)>''', re.IGNORECASE)
srctext = link_pattern.sub(br'''<a\1href="#filepos\2"\3>''', srctext)
# remove empty anchors
print("Remove empty anchors from html")
srctext = re.sub(br"<a\s*/>",br"", srctext)
srctext = re.sub(br"<a\s*>\s*</a>",br"", srctext)
# convert image references
print("Insert image references into html")
# split string into image tag pieces and other pieces
image_pattern = re.compile(br'''(<img.*?>)''', re.IGNORECASE)
image_index_pattern = re.compile(br'''recindex=['"]{0,1}([0-9]+)['"]{0,1}''', re.IGNORECASE)
srcpieces = image_pattern.split(srctext)
srctext = self.srctext = None
# all odd pieces are image tags (nulls string on even pieces if no space between them in srctext)
for i in range(1, len(srcpieces), 2):
tag = srcpieces[i]
for m in image_index_pattern.finditer(tag):
imageNumber = int(m.group(1))
imageName = rscnames[imageNumber-1]
if imageName is None:
print("Error: Referenced image %s was not recognized as a valid image" % imageNumber)
else:
replacement = b'src="Images/' + utf8_str(imageName) + b'"'
tag = image_index_pattern.sub(replacement, tag, 1)
srcpieces[i] = tag
srctext = b"".join(srcpieces)
# add in character set meta into the html header if needed
if 'Codec' in metadata:
srctext = srctext[0:12]+b'<meta http-equiv="content-type" content="text/html; charset='+utf8_str(metadata.get('Codec')[0])+b'" />'+srctext[12:]
return srctext, self.used
class XHTMLK8Processor:
def __init__(self, rscnames, k8proc):
self.rscnames = rscnames
self.k8proc = k8proc
self.used = {}
def buildXHTML(self):
# first need to update all links that are internal which
# are based on positions within the xhtml files **BEFORE**
# cutting and pasting any pieces into the xhtml text files
# kindle:pos:fid:XXXX:off:YYYYYYYYYY (used for internal link within xhtml)
# XXXX is the offset in records into divtbl
# YYYYYYYYYYYY is a base32 number you add to the divtbl insertpos to get final position
# pos:fid pattern
posfid_pattern = re.compile(br'''(<a.*?href=.*?>)''', re.IGNORECASE)
posfid_index_pattern = re.compile(br'''['"]kindle:pos:fid:([0-9|A-V]+):off:([0-9|A-V]+).*?["']''')
parts = []
print("Building proper xhtml for each file")
for i in range(self.k8proc.getNumberOfParts()):
part = self.k8proc.getPart(i)
[partnum, dir, filename, beg, end, aidtext] = self.k8proc.getPartInfo(i)
# internal links
srcpieces = posfid_pattern.split(part)
for j in range(1, len(srcpieces),2):
tag = srcpieces[j]
if tag.startswith(b'<'):
for m in posfid_index_pattern.finditer(tag):
posfid = m.group(1)
offset = m.group(2)
filename, idtag = self.k8proc.getIDTagByPosFid(posfid, offset)
if idtag == b'':
replacement= b'"' + utf8_str(filename) + b'"'
else:
replacement = b'"' + utf8_str(filename) + b'#' + idtag + b'"'
tag = posfid_index_pattern.sub(replacement, tag, 1)
srcpieces[j] = tag
part = b"".join(srcpieces)
parts.append(part)
# we are free to cut and paste as we see fit
# we can safely remove all of the Kindlegen generated aid tags
# change aid ids that are in k8proc.linked_aids to xhtml ids
find_tag_with_aid_pattern = re.compile(br'''(<[^>]*\said\s*=[^>]*>)''', re.IGNORECASE)
within_tag_aid_position_pattern = re.compile(br'''\said\s*=['"]([^'"]*)['"]''')
for i in range(len(parts)):
part = parts[i]
srcpieces = find_tag_with_aid_pattern.split(part)
for j in range(len(srcpieces)):
tag = srcpieces[j]
if tag.startswith(b'<'):
for m in within_tag_aid_position_pattern.finditer(tag):
try:
aid = m.group(1)
except IndexError:
aid = None
replacement = b''
if aid in self.k8proc.linked_aids:
replacement = b' id="aid-' + aid + b'"'
tag = within_tag_aid_position_pattern.sub(replacement, tag, 1)
srcpieces[j] = tag
part = b"".join(srcpieces)
parts[i] = part
# we can safely replace all of the Kindlegen generated data-AmznPageBreak tags
# with page-break-after style patterns
find_tag_with_AmznPageBreak_pattern = re.compile(br'''(<[^>]*\sdata-AmznPageBreak=[^>]*>)''', re.IGNORECASE)
within_tag_AmznPageBreak_position_pattern = re.compile(br'''\sdata-AmznPageBreak=['"]([^'"]*)['"]''')
for i in range(len(parts)):
part = parts[i]
srcpieces = find_tag_with_AmznPageBreak_pattern.split(part)
for j in range(len(srcpieces)):
tag = srcpieces[j]
if tag.startswith(b'<'):
srcpieces[j] = within_tag_AmznPageBreak_position_pattern.sub(
lambda m:b' style="page-break-after:' + m.group(1) + b'"', tag)
part = b"".join(srcpieces)
parts[i] = part
# we have to handle substitutions for the flows pieces first as they may
# be inlined into the xhtml text
# kindle:embed:XXXX?mime=image/gif (png, jpeg, etc) (used for images)
# kindle:flow:XXXX?mime=YYYY/ZZZ (used for style sheets, svg images, etc)
# kindle:embed:XXXX (used for fonts)
flows = []
flows.append(None)
flowinfo = []
flowinfo.append([None, None, None, None])
# regular expression search patterns
img_pattern = re.compile(br'''(<[img\s|image\s][^>]*>)''', re.IGNORECASE)
img_index_pattern = re.compile(br'''[('"]kindle:embed:([0-9|A-V]+)[^'"]*['")]''', re.IGNORECASE)
tag_pattern = re.compile(br'''(<[^>]*>)''')
flow_pattern = re.compile(br'''['"]kindle:flow:([0-9|A-V]+)\?mime=([^'"]+)['"]''', re.IGNORECASE)
url_pattern = re.compile(br'''(url\(.*?\))''', re.IGNORECASE)
url_img_index_pattern = re.compile(br'''[('"]kindle:embed:([0-9|A-V]+)\?mime=image/[^\)]*["')]''', re.IGNORECASE)
font_index_pattern = re.compile(br'''[('"]kindle:embed:([0-9|A-V]+)["')]''', re.IGNORECASE)
url_css_index_pattern = re.compile(br'''kindle:flow:([0-9|A-V]+)\?mime=text/css[^\)]*''', re.IGNORECASE)
url_svg_image_pattern = re.compile(br'''kindle:flow:([0-9|A-V]+)\?mime=image/svg\+xml[^\)]*''', re.IGNORECASE)
for i in range(1, self.k8proc.getNumberOfFlows()):
[ftype, format, dir, filename] = self.k8proc.getFlowInfo(i)
flowpart = self.k8proc.getFlow(i)
# links to raster image files from image tags
# image_pattern
srcpieces = img_pattern.split(flowpart)
for j in range(1, len(srcpieces),2):
tag = srcpieces[j]
if tag.startswith(b'<im'):
for m in img_index_pattern.finditer(tag):
imageNumber = fromBase32(m.group(1))
imageName = self.rscnames[imageNumber-1]
if imageName is not None:
replacement = b'"../Images/' + utf8_str(imageName) + b'"'
self.used[imageName] = 'used'
tag = img_index_pattern.sub(replacement, tag, 1)
else:
print("Error: Referenced image %s was not recognized as a valid image in %s" % (imageNumber, tag))
srcpieces[j] = tag
flowpart = b"".join(srcpieces)
# replacements inside css url():
srcpieces = url_pattern.split(flowpart)
for j in range(1, len(srcpieces),2):
tag = srcpieces[j]
# process links to raster image files
for m in url_img_index_pattern.finditer(tag):
imageNumber = fromBase32(m.group(1))
imageName = self.rscnames[imageNumber-1]
osep = m.group()[0:1]
csep = m.group()[-1:]
if imageName is not None:
replacement = osep + b'../Images/' + utf8_str(imageName) + csep
self.used[imageName] = 'used'
tag = url_img_index_pattern.sub(replacement, tag, 1)
else:
print("Error: Referenced image %s was not recognized as a valid image in %s" % (imageNumber, tag))
# process links to fonts
for m in font_index_pattern.finditer(tag):
fontNumber = fromBase32(m.group(1))
fontName = self.rscnames[fontNumber-1]
osep = m.group()[0:1]
csep = m.group()[-1:]
if fontName is None:
print("Error: Referenced font %s was not recognized as a valid font in %s" % (fontNumber, tag))
else:
replacement = osep + b'../Fonts/' + utf8_str(fontName) + csep
tag = font_index_pattern.sub(replacement, tag, 1)
self.used[fontName] = 'used'
# process links to other css pieces
for m in url_css_index_pattern.finditer(tag):
num = fromBase32(m.group(1))
[typ, fmt, pdir, fnm] = self.k8proc.getFlowInfo(num)
replacement = b'"../' + utf8_str(pdir) + b'/' + utf8_str(fnm) + b'"'
tag = url_css_index_pattern.sub(replacement, tag, 1)
self.used[fnm] = 'used'
# process links to svg images
for m in url_svg_image_pattern.finditer(tag):
num = fromBase32(m.group(1))
[typ, fmt, pdir, fnm] = self.k8proc.getFlowInfo(num)
replacement = b'"../' + utf8_str(pdir) + b'/' + utf8_str(fnm) + b'"'
tag = url_svg_image_pattern.sub(replacement, tag, 1)
self.used[fnm] = 'used'
srcpieces[j] = tag
flowpart = b"".join(srcpieces)
# store away in our own copy
flows.append(flowpart)
# I do not think this case exists and even if it does exist, it needs to be done in a separate
# pass to prevent inlining a flow piece into another flow piece before the inserted one or the
# target one has been fully processed
# but keep it around if it ends up we do need it
# flow pattern not inside url()
# srcpieces = tag_pattern.split(flowpart)
# for j in range(1, len(srcpieces),2):
# tag = srcpieces[j]
# if tag.startswith(b'<'):
# for m in flow_pattern.finditer(tag):
# num = fromBase32(m.group(1))
# [typ, fmt, pdir, fnm] = self.k8proc.getFlowInfo(num)
# flowtext = self.k8proc.getFlow(num)
# if fmt == b'inline':
# tag = flowtext
# else:
# replacement = b'"../' + utf8_str(pdir) + b'/' + utf8_str(fnm) + b'"'
# tag = flow_pattern.sub(replacement, tag, 1)
# self.used[fnm] = 'used'
# srcpieces[j] = tag
# flowpart = b"".join(srcpieces)
# now handle the main text xhtml parts
# Handle the flow items in the XHTML text pieces
# kindle:flow:XXXX?mime=YYYY/ZZZ (used for style sheets, svg images, etc)
tag_pattern = re.compile(br'''(<[^>]*>)''')
flow_pattern = re.compile(br'''['"]kindle:flow:([0-9|A-V]+)\?mime=([^'"]+)['"]''', re.IGNORECASE)
for i in range(len(parts)):
part = parts[i]
[partnum, dir, filename, beg, end, aidtext] = self.k8proc.partinfo[i]
# flow pattern
srcpieces = tag_pattern.split(part)
for j in range(1, len(srcpieces),2):
tag = srcpieces[j]
if tag.startswith(b'<'):
for m in flow_pattern.finditer(tag):
num = fromBase32(m.group(1))
if num > 0 and num < len(self.k8proc.flowinfo):
[typ, fmt, pdir, fnm] = self.k8proc.getFlowInfo(num)
flowpart = flows[num]
if fmt == b'inline':
tag = flowpart
else:
replacement = b'"../' + utf8_str(pdir) + b'/' + utf8_str(fnm) + b'"'
tag = flow_pattern.sub(replacement, tag, 1)
self.used[fnm] = 'used'
else:
print("warning: ignoring non-existent flow link", tag, " value 0x%x" % num)
srcpieces[j] = tag
part = b''.join(srcpieces)
# store away modified version
parts[i] = part
# Handle any embedded raster images links in style= attributes urls
style_pattern = re.compile(br'''(<[a-zA-Z0-9]+\s[^>]*style\s*=\s*[^>]*>)''', re.IGNORECASE)
img_index_pattern = re.compile(br'''[('"]kindle:embed:([0-9|A-V]+)[^'"]*['")]''', re.IGNORECASE)
for i in range(len(parts)):
part = parts[i]
[partnum, dir, filename, beg, end, aidtext] = self.k8proc.partinfo[i]
# replace urls in style attributes
srcpieces = style_pattern.split(part)
for j in range(1, len(srcpieces),2):
tag = srcpieces[j]
if b'kindle:embed' in tag:
for m in img_index_pattern.finditer(tag):
imageNumber = fromBase32(m.group(1))
imageName = self.rscnames[imageNumber-1]
osep = m.group()[0:1]
csep = m.group()[-1:]
if imageName is not None:
replacement = osep + b'../Images/'+ utf8_str(imageName) + csep
self.used[imageName] = 'used'
tag = img_index_pattern.sub(replacement, tag, 1)
else:
print("Error: Referenced image %s in style url was not recognized in %s" % (imageNumber, tag))
srcpieces[j] = tag
part = b"".join(srcpieces)
# store away modified version
parts[i] = part
# Handle any embedded raster images links in the xhtml text
# kindle:embed:XXXX?mime=image/gif (png, jpeg, etc) (used for images)
img_pattern = re.compile(br'''(<[img\s|image\s][^>]*>)''', re.IGNORECASE)
img_index_pattern = re.compile(br'''['"]kindle:embed:([0-9|A-V]+)[^'"]*['"]''')
for i in range(len(parts)):
part = parts[i]
[partnum, dir, filename, beg, end, aidtext] = self.k8proc.partinfo[i]
# links to raster image files
# image_pattern
srcpieces = img_pattern.split(part)
for j in range(1, len(srcpieces),2):
tag = srcpieces[j]
if tag.startswith(b'<im'):
for m in img_index_pattern.finditer(tag):
imageNumber = fromBase32(m.group(1))
imageName = self.rscnames[imageNumber-1]
if imageName is not None:
replacement = b'"../Images/' + utf8_str(imageName) + b'"'
self.used[imageName] = 'used'
tag = img_index_pattern.sub(replacement, tag, 1)
else:
print("Error: Referenced image %s was not recognized as a valid image in %s" % (imageNumber, tag))
srcpieces[j] = tag
part = b"".join(srcpieces)
# store away modified version
parts[i] = part
# finally perform any general cleanups needed to make valid XHTML
# these include:
# in svg tags replace "perserveaspectratio" attributes with "perserveAspectRatio"
# in svg tags replace "viewbox" attributes with "viewBox"
# in <li> remove value="XX" attributes since these are illegal
tag_pattern = re.compile(br'''(<[^>]*>)''')
li_value_pattern = re.compile(br'''\svalue\s*=\s*['"][^'"]*['"]''', re.IGNORECASE)
for i in range(len(parts)):
part = parts[i]
[partnum, dir, filename, beg, end, aidtext] = self.k8proc.partinfo[i]
# tag pattern
srcpieces = tag_pattern.split(part)
for j in range(1, len(srcpieces),2):
tag = srcpieces[j]
if tag.startswith(b'<svg') or tag.startswith(b'<SVG'):
tag = tag.replace(b'preserveaspectratio',b'preserveAspectRatio')
tag = tag.replace(b'viewbox',b'viewBox')
elif tag.startswith(b'<li ') or tag.startswith(b'<LI '):
tagpieces = li_value_pattern.split(tag)
tag = b"".join(tagpieces)
srcpieces[j] = tag
part = b"".join(srcpieces)
# store away modified version
parts[i] = part
self.k8proc.setFlows(flows)
self.k8proc.setParts(parts)
return self.used

View File

@@ -0,0 +1,276 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
from __future__ import unicode_literals, division, absolute_import, print_function
from .compatibility_utils import PY2, bchr, bstr, bord
if PY2:
range = xrange
import struct
# note: struct pack, unpack, unpack_from all require bytestring format
# data all the way up to at least python 2.7.5, python 3 okay with bytestring
from .mobi_utils import toHex
class MobiIndex:
def __init__(self, sect, DEBUG=False):
self.sect = sect
self.DEBUG = DEBUG
def getIndexData(self, idx, label="Unknown"):
sect = self.sect
outtbl = []
ctoc_text = {}
if idx != 0xffffffff:
sect.setsectiondescription(idx,"{0} Main INDX section".format(label))
data = sect.loadSection(idx)
idxhdr, hordt1, hordt2 = self.parseINDXHeader(data)
IndexCount = idxhdr['count']
# handle the case of multiple sections used for CTOC
rec_off = 0
off = idx + IndexCount + 1
for j in range(idxhdr['nctoc']):
cdata = sect.loadSection(off + j)
sect.setsectiondescription(off+j, label + ' CTOC Data ' + str(j))
ctocdict = self.readCTOC(cdata)
for k in ctocdict:
ctoc_text[k + rec_off] = ctocdict[k]
rec_off += 0x10000
tagSectionStart = idxhdr['len']
controlByteCount, tagTable = readTagSection(tagSectionStart, data)
if self.DEBUG:
print("ControlByteCount is", controlByteCount)
print("IndexCount is", IndexCount)
print("TagTable: %s" % tagTable)
for i in range(idx + 1, idx + 1 + IndexCount):
sect.setsectiondescription(i,"{0} Extra {1:d} INDX section".format(label,i-idx))
data = sect.loadSection(i)
hdrinfo, ordt1, ordt2 = self.parseINDXHeader(data)
idxtPos = hdrinfo['start']
entryCount = hdrinfo['count']
if self.DEBUG:
print(idxtPos, entryCount)
# loop through to build up the IDXT position starts
idxPositions = []
for j in range(entryCount):
pos, = struct.unpack_from(b'>H', data, idxtPos + 4 + (2 * j))
idxPositions.append(pos)
# The last entry ends before the IDXT tag (but there might be zero fill bytes we need to ignore!)
idxPositions.append(idxtPos)
# for each entry in the IDXT build up the tagMap and any associated text
for j in range(entryCount):
startPos = idxPositions[j]
endPos = idxPositions[j+1]
textLength = ord(data[startPos:startPos+1])
text = data[startPos+1:startPos+1+textLength]
if hordt2 is not None:
text = b''.join(bchr(hordt2[bord(x)]) for x in text)
tagMap = getTagMap(controlByteCount, tagTable, data, startPos+1+textLength, endPos)
outtbl.append([text, tagMap])
if self.DEBUG:
print(tagMap)
print(text)
return outtbl, ctoc_text
def parseINDXHeader(self, data):
"read INDX header"
if not data[:4] == b'INDX':
print("Warning: index section is not INDX")
return False
words = (
'len', 'nul1', 'type', 'gen', 'start', 'count', 'code',
'lng', 'total', 'ordt', 'ligt', 'nligt', 'nctoc'
)
num = len(words)
values = struct.unpack(bstr('>%dL' % num), data[4:4*(num+1)])
header = {}
for n in range(num):
header[words[n]] = values[n]
ordt1 = None
ordt2 = None
ocnt, oentries, op1, op2, otagx = struct.unpack_from(b'>LLLLL',data, 0xa4)
if header['code'] == 0xfdea or ocnt != 0 or oentries > 0:
# horribly hacked up ESP (sample) mobi books use two ORDT sections but never specify
# them in the proper place in the header. They seem to be codepage 65002 which seems
# to be some sort of strange EBCDIC utf-8 or 16 encoded strings
# so we need to look for them and store them away to process leading text
# ORDT1 has 1 byte long entries, ORDT2 has 2 byte long entries
# we only ever seem to use the seocnd but ...
assert(ocnt == 1)
assert(data[op1:op1+4] == b'ORDT')
assert(data[op2:op2+4] == b'ORDT')
ordt1 = struct.unpack_from(bstr('>%dB' % oentries), data, op1+4)
ordt2 = struct.unpack_from(bstr('>%dH' % oentries), data, op2+4)
if self.DEBUG:
print("parsed INDX header:")
for n in words:
print(n, "%X" % header[n],)
print("")
return header, ordt1, ordt2
def readCTOC(self, txtdata):
# read all blocks from CTOC
ctoc_data = {}
offset = 0
while offset<len(txtdata):
if PY2:
if txtdata[offset] == b'\0':
break
else:
if txtdata[offset] == 0:
break
idx_offs = offset
# first n bytes: name len as vwi
pos, ilen = getVariableWidthValue(txtdata, offset)
offset += pos
# <len> next bytes: name
name = txtdata[offset:offset+ilen]
offset += ilen
if self.DEBUG:
print("name length is ", ilen)
print(idx_offs, name)
ctoc_data[idx_offs] = name
return ctoc_data
def getVariableWidthValue(data, offset):
'''
Decode variable width value from given bytes.
@param data: The bytes to decode.
@param offset: The start offset into data.
@return: Tuple of consumed bytes count and decoded value.
'''
value = 0
consumed = 0
finished = False
while not finished:
v = data[offset + consumed: offset + consumed + 1]
consumed += 1
if ord(v) & 0x80:
finished = True
value = (value << 7) | (ord(v) & 0x7f)
return consumed, value
def readTagSection(start, data):
'''
Read tag section from given data.
@param start: The start position in the data.
@param data: The data to process.
@return: Tuple of control byte count and list of tag tuples.
'''
controlByteCount = 0
tags = []
if data[start:start+4] == b"TAGX":
firstEntryOffset, = struct.unpack_from(b'>L', data, start + 0x04)
controlByteCount, = struct.unpack_from(b'>L', data, start + 0x08)
# Skip the first 12 bytes already read above.
for i in range(12, firstEntryOffset, 4):
pos = start + i
tags.append((ord(data[pos:pos+1]), ord(data[pos+1:pos+2]), ord(data[pos+2:pos+3]), ord(data[pos+3:pos+4])))
return controlByteCount, tags
def countSetBits(value, bits=8):
'''
Count the set bits in the given value.
@param value: Integer value.
@param bits: The number of bits of the input value (defaults to 8).
@return: Number of set bits.
'''
count = 0
for _ in range(bits):
if value & 0x01 == 0x01:
count += 1
value = value >> 1
return count
def getTagMap(controlByteCount, tagTable, entryData, startPos, endPos):
'''
Create a map of tags and values from the given byte section.
@param controlByteCount: The number of control bytes.
@param tagTable: The tag table.
@param entryData: The data to process.
@param startPos: The starting position in entryData.
@param endPos: The end position in entryData or None if it is unknown.
@return: Hashmap of tag and list of values.
'''
tags = []
tagHashMap = {}
controlByteIndex = 0
dataStart = startPos + controlByteCount
for tag, valuesPerEntry, mask, endFlag in tagTable:
if endFlag == 0x01:
controlByteIndex += 1
continue
cbyte = ord(entryData[startPos + controlByteIndex:startPos + controlByteIndex+1])
if 0:
print("Control Byte Index %0x , Control Byte Value %0x" % (controlByteIndex, cbyte))
value = ord(entryData[startPos + controlByteIndex:startPos + controlByteIndex+1]) & mask
if value != 0:
if value == mask:
if countSetBits(mask) > 1:
# If all bits of masked value are set and the mask has more than one bit, a variable width value
# will follow after the control bytes which defines the length of bytes (NOT the value count!)
# which will contain the corresponding variable width values.
consumed, value = getVariableWidthValue(entryData, dataStart)
dataStart += consumed
tags.append((tag, None, value, valuesPerEntry))
else:
tags.append((tag, 1, None, valuesPerEntry))
else:
# Shift bits to get the masked value.
while mask & 0x01 == 0:
mask = mask >> 1
value = value >> 1
tags.append((tag, value, None, valuesPerEntry))
for tag, valueCount, valueBytes, valuesPerEntry in tags:
values = []
if valueCount is not None:
# Read valueCount * valuesPerEntry variable width values.
for _ in range(valueCount):
for _ in range(valuesPerEntry):
consumed, data = getVariableWidthValue(entryData, dataStart)
dataStart += consumed
values.append(data)
else:
# Convert valueBytes to variable width values.
totalConsumed = 0
while totalConsumed < valueBytes:
# Does this work for valuesPerEntry != 1?
consumed, data = getVariableWidthValue(entryData, dataStart)
dataStart += consumed
totalConsumed += consumed
values.append(data)
if totalConsumed != valueBytes:
print("Error: Should consume %s bytes, but consumed %s" % (valueBytes, totalConsumed))
tagHashMap[tag] = values
# Test that all bytes have been processed if endPos is given.
if endPos is not None and dataStart != endPos:
# The last entry might have some zero padding bytes, so complain only if non zero bytes are left.
for char in entryData[dataStart:endPos]:
if bord(char) != 0:
print("Warning: There are unprocessed index bytes left: %s" % toHex(entryData[dataStart:endPos]))
if 0:
print("controlByteCount: %s" % controlByteCount)
print("tagTable: %s" % tagTable)
print("data: %s" % toHex(entryData[startPos:endPos]))
print("tagHashMap: %s" % tagHashMap)
break
return tagHashMap

View File

@@ -0,0 +1,494 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
from __future__ import unicode_literals, division, absolute_import, print_function
from .compatibility_utils import PY2, bstr, utf8_str
if PY2:
range = xrange
import os
import struct
# note: struct pack, unpack, unpack_from all require bytestring format
# data all the way up to at least python 2.7.5, python 3 okay with bytestring
import re
# note: re requites the pattern to be the exact same type as the data to be searched in python3
# but u"" is not allowed for the pattern itself only b""
from .mobi_index import MobiIndex
from .mobi_utils import fromBase32
from .unipath import pathof
_guide_types = [b'cover',b'title-page',b'toc',b'index',b'glossary',b'acknowledgements',
b'bibliography',b'colophon',b'copyright-page',b'dedication',
b'epigraph',b'foreward',b'loi',b'lot',b'notes',b'preface',b'text']
# locate beginning and ending positions of tag with specific aid attribute
def locate_beg_end_of_tag(ml, aid):
pattern = utf8_str(r'''<[^>]*\said\s*=\s*['"]%s['"][^>]*>''' % aid)
aid_pattern = re.compile(pattern,re.IGNORECASE)
for m in re.finditer(aid_pattern, ml):
plt = m.start()
pgt = ml.find(b'>',plt+1)
return plt, pgt
return 0, 0
# iterate over all tags in block in reverse order, i.e. last ta to first tag
def reverse_tag_iter(block):
end = len(block)
while True:
pgt = block.rfind(b'>', 0, end)
if pgt == -1:
break
plt = block.rfind(b'<', 0, pgt)
if plt == -1:
break
yield block[plt:pgt+1]
end = plt
class K8Processor:
def __init__(self, mh, sect, files, debug=False):
self.sect = sect
self.files = files
self.mi = MobiIndex(sect)
self.mh = mh
self.skelidx = mh.skelidx
self.fragidx = mh.fragidx
self.guideidx = mh.guideidx
self.fdst = mh.fdst
self.flowmap = {}
self.flows = None
self.flowinfo = []
self.parts = None
self.partinfo = []
self.linked_aids = set()
self.fdsttbl= [0,0xffffffff]
self.DEBUG = debug
# read in and parse the FDST info which is very similar in format to the Palm DB section
# parsing except it provides offsets into rawML file and not the Palm DB file
# this is needed to split up the final css, svg, etc flow section
# that can exist at the end of the rawML file
if self.fdst != 0xffffffff:
header = self.sect.loadSection(self.fdst)
if header[0:4] == b"FDST":
num_sections, = struct.unpack_from(b'>L', header, 0x08)
self.fdsttbl = struct.unpack_from(bstr('>%dL' % (num_sections*2)), header, 12)[::2] + (mh.rawSize, )
sect.setsectiondescription(self.fdst,"KF8 FDST INDX")
if self.DEBUG:
print("\nFDST Section Map: %d sections" % num_sections)
for j in range(num_sections):
print("Section %d: 0x%08X - 0x%08X" % (j, self.fdsttbl[j],self.fdsttbl[j+1]))
else:
print("\nError: K8 Mobi with Missing FDST info")
# read/process skeleton index info to create the skeleton table
skeltbl = []
if self.skelidx != 0xffffffff:
# for i in range(2):
# fname = 'skel%04d.dat' % i
# data = self.sect.loadSection(self.skelidx + i)
# with open(pathof(fname), 'wb') as f:
# f.write(data)
outtbl, ctoc_text = self.mi.getIndexData(self.skelidx, "KF8 Skeleton")
fileptr = 0
for [text, tagMap] in outtbl:
# file number, skeleton name, fragtbl record count, start position, length
skeltbl.append([fileptr, text, tagMap[1][0], tagMap[6][0], tagMap[6][1]])
fileptr += 1
self.skeltbl = skeltbl
if self.DEBUG:
print("\nSkel Table: %d entries" % len(self.skeltbl))
print("table: filenum, skeleton name, frag tbl record count, start position, length")
for j in range(len(self.skeltbl)):
print(self.skeltbl[j])
# read/process the fragment index to create the fragment table
fragtbl = []
if self.fragidx != 0xffffffff:
# for i in range(3):
# fname = 'frag%04d.dat' % i
# data = self.sect.loadSection(self.fragidx + i)
# with open(pathof(fname), 'wb') as f:
# f.write(data)
outtbl, ctoc_text = self.mi.getIndexData(self.fragidx, "KF8 Fragment")
for [text, tagMap] in outtbl:
# insert position, ctoc offset (aidtext), file number, sequence number, start position, length
ctocoffset = tagMap[2][0]
ctocdata = ctoc_text[ctocoffset]
fragtbl.append([int(text), ctocdata, tagMap[3][0], tagMap[4][0], tagMap[6][0], tagMap[6][1]])
self.fragtbl = fragtbl
if self.DEBUG:
print("\nFragment Table: %d entries" % len(self.fragtbl))
print("table: file position, link id text, file num, sequence number, start position, length")
for j in range(len(self.fragtbl)):
print(self.fragtbl[j])
# read / process guide index for guide elements of opf
guidetbl = []
if self.guideidx != 0xffffffff:
# for i in range(3):
# fname = 'guide%04d.dat' % i
# data = self.sect.loadSection(self.guideidx + i)
# with open(pathof(fname), 'wb') as f:
# f.write(data)
outtbl, ctoc_text = self.mi.getIndexData(self.guideidx, "KF8 Guide elements)")
for [text, tagMap] in outtbl:
# ref_type, ref_title, frag number
ctocoffset = tagMap[1][0]
ref_title = ctoc_text[ctocoffset]
ref_type = text
fileno = None
if 3 in tagMap:
fileno = tagMap[3][0]
if 6 in tagMap:
fileno = tagMap[6][0]
guidetbl.append([ref_type, ref_title, fileno])
self.guidetbl = guidetbl
if self.DEBUG:
print("\nGuide Table: %d entries" % len(self.guidetbl))
print("table: ref_type, ref_title, fragtbl entry number")
for j in range(len(self.guidetbl)):
print(self.guidetbl[j])
def buildParts(self, rawML):
# now split the rawML into its flow pieces
self.flows = []
for j in range(0, len(self.fdsttbl)-1):
start = self.fdsttbl[j]
end = self.fdsttbl[j+1]
self.flows.append(rawML[start:end])
# the first piece represents the xhtml text
text = self.flows[0]
self.flows[0] = b''
# walk the <skeleton> and fragment tables to build original source xhtml files
# *without* destroying any file position information needed for later href processing
# and create final list of file separation start: stop points and etc in partinfo
if self.DEBUG:
print("\nRebuilding flow piece 0: the main body of the ebook")
self.parts = []
self.partinfo = []
fragptr = 0
baseptr = 0
cnt = 0
for [skelnum, skelname, fragcnt, skelpos, skellen] in self.skeltbl:
baseptr = skelpos + skellen
skeleton = text[skelpos: baseptr]
for i in range(fragcnt):
[insertpos, idtext, filenum, seqnum, startpos, length] = self.fragtbl[fragptr]
aidtext = idtext[12:-2]
if i == 0:
filename = 'part%04d.xhtml' % filenum
slice = text[baseptr: baseptr + length]
insertpos = insertpos - skelpos
head = skeleton[:insertpos]
tail = skeleton[insertpos:]
actual_inspos = insertpos
if (tail.find(b'>') < tail.find(b'<') or head.rfind(b'>') < head.rfind(b'<')):
# There is an incomplete tag in either the head or tail.
# This can happen for some badly formed KF8 files
print('The fragment table for %s has incorrect insert position. Calculating manually.' % skelname)
bp, ep = locate_beg_end_of_tag(skeleton, aidtext)
if bp != ep:
actual_inspos = ep + 1 + startpos
if insertpos != actual_inspos:
print("fixed corrupt fragment table insert position", insertpos+skelpos, actual_inspos+skelpos)
insertpos = actual_inspos
self.fragtbl[fragptr][0] = actual_inspos + skelpos
skeleton = skeleton[0:insertpos] + slice + skeleton[insertpos:]
baseptr = baseptr + length
fragptr += 1
cnt += 1
self.parts.append(skeleton)
self.partinfo.append([skelnum, 'Text', filename, skelpos, baseptr, aidtext])
assembled_text = b''.join(self.parts)
if self.DEBUG:
outassembled = os.path.join(self.files.k8dir, 'assembled_text.dat')
with open(pathof(outassembled),'wb') as f:
f.write(assembled_text)
# The primary css style sheet is typically stored next followed by any
# snippets of code that were previously inlined in the
# original xhtml but have been stripped out and placed here.
# This can include local CDATA snippets and and svg sections.
# The problem is that for most browsers and ereaders, you can not
# use <img src="imageXXXX.svg" /> to import any svg image that itself
# properly uses an <image/> tag to import some raster image - it
# should work according to the spec but does not for almost all browsers
# and ereaders and causes epub validation issues because those raster
# images are in manifest but not in xhtml text - since they only
# referenced from an svg image
# So we need to check the remaining flow pieces to see if they are css
# or svg images. if svg images, we must check if they have an <image />
# and if so inline them into the xhtml text pieces.
# there may be other sorts of pieces stored here but until we see one
# in the wild to reverse engineer we won't be able to tell
self.flowinfo.append([None, None, None, None])
svg_tag_pattern = re.compile(br'''(<svg[^>]*>)''', re.IGNORECASE)
image_tag_pattern = re.compile(br'''(<image[^>]*>)''', re.IGNORECASE)
for j in range(1,len(self.flows)):
flowpart = self.flows[j]
nstr = '%04d' % j
m = re.search(svg_tag_pattern, flowpart)
if m is not None:
# svg
ptype = b'svg'
start = m.start()
m2 = re.search(image_tag_pattern, flowpart)
if m2 is not None:
pformat = b'inline'
pdir = None
fname = None
# strip off anything before <svg if inlining
flowpart = flowpart[start:]
else:
pformat = b'file'
pdir = "Images"
fname = 'svgimg' + nstr + '.svg'
else:
# search for CDATA and if exists inline it
if flowpart.find(b'[CDATA[') >= 0:
ptype = b'css'
flowpart = b'<style type="text/css">\n' + flowpart + b'\n</style>\n'
pformat = b'inline'
pdir = None
fname = None
else:
# css - assume as standalone css file
ptype = b'css'
pformat = b'file'
pdir = "Styles"
fname = 'style' + nstr + '.css'
self.flows[j] = flowpart
self.flowinfo.append([ptype, pformat, pdir, fname])
if self.DEBUG:
print("\nFlow Map: %d entries" % len(self.flowinfo))
for fi in self.flowinfo:
print(fi)
print("\n")
print("\nXHTML File Part Position Information: %d entries" % len(self.partinfo))
for pi in self.partinfo:
print(pi)
if False: # self.Debug:
# dump all of the locations of the aid tags used in TEXT
# find id links only inside of tags
# inside any < > pair find all "aid=' and return whatever is inside the quotes
# [^>]* means match any amount of chars except for '>' char
# [^'"] match any amount of chars except for the quote character
# \s* means match any amount of whitespace
print("\npositions of all aid= pieces")
id_pattern = re.compile(br'''<[^>]*\said\s*=\s*['"]([^'"]*)['"][^>]*>''',re.IGNORECASE)
for m in re.finditer(id_pattern, rawML):
[filename, partnum, start, end] = self.getFileInfo(m.start())
[seqnum, idtext] = self.getFragTblInfo(m.start())
value = fromBase32(m.group(1))
print(" aid: %s value: %d at: %d -> part: %d, start: %d, end: %d" % (m.group(1), value, m.start(), partnum, start, end))
print(" %s fragtbl entry %d" % (idtext, seqnum))
return
# get information fragment table entry by pos
def getFragTblInfo(self, pos):
for j in range(len(self.fragtbl)):
[insertpos, idtext, filenum, seqnum, startpos, length] = self.fragtbl[j]
if pos >= insertpos and pos < (insertpos + length):
# why are these "in: and before: added here
return seqnum, b'in: ' + idtext
if pos < insertpos:
return seqnum, b'before: ' + idtext
return None, None
# get information about the part (file) that exists at pos in original rawML
def getFileInfo(self, pos):
for [partnum, pdir, filename, start, end, aidtext] in self.partinfo:
if pos >= start and pos < end:
return filename, partnum, start, end
return None, None, None, None
# accessor functions to properly protect the internal structure
def getNumberOfParts(self):
return len(self.parts)
def getPart(self,i):
if i >= 0 and i < len(self.parts):
return self.parts[i]
return None
def getPartInfo(self, i):
if i >= 0 and i < len(self.partinfo):
return self.partinfo[i]
return None
def getNumberOfFlows(self):
return len(self.flows)
def getFlow(self,i):
# note flows[0] is empty - it was all of the original text
if i > 0 and i < len(self.flows):
return self.flows[i]
return None
def getFlowInfo(self,i):
# note flowinfo[0] is empty - it was all of the original text
if i > 0 and i < len(self.flowinfo):
return self.flowinfo[i]
return None
def getIDTagByPosFid(self, posfid, offset):
# first convert kindle:pos:fid and offset info to position in file
# (fromBase32 can handle both string types on input)
row = fromBase32(posfid)
off = fromBase32(offset)
[insertpos, idtext, filenum, seqnm, startpos, length] = self.fragtbl[row]
pos = insertpos + off
fname, pn, skelpos, skelend = self.getFileInfo(pos)
if fname is None:
# pos does not exist
# default to skeleton pos instead
print("Link To Position", pos, "does not exist, retargeting to top of target")
pos = self.skeltbl[filenum][3]
fname, pn, skelpos, skelend = self.getFileInfo(pos)
# an existing "id=" or "name=" attribute must exist in original xhtml otherwise it would not have worked for linking.
# Amazon seems to have added its own additional "aid=" inside tags whose contents seem to represent
# some position information encoded into Base32 name.
# so find the closest "id=" before position the file by actually searching in that file
idtext = self.getIDTag(pos)
return fname, idtext
def getIDTag(self, pos):
# find the first tag with a named anchor (name or id attribute) before pos
fname, pn, skelpos, skelend = self.getFileInfo(pos)
if pn is None and skelpos is None:
print("Error: getIDTag - no file contains ", pos)
textblock = self.parts[pn]
npos = pos - skelpos
# if npos inside a tag then search all text before the its end of tag marker
pgt = textblock.find(b'>',npos)
plt = textblock.find(b'<',npos)
if plt == npos or pgt < plt:
npos = pgt + 1
# find id and name attributes only inside of tags
# use a reverse tag search since that is faster
# inside any < > pair find "id=" and "name=" attributes return it
# [^>]* means match any amount of chars except for '>' char
# [^'"] match any amount of chars except for the quote character
# \s* means match any amount of whitespace
textblock = textblock[0:npos]
id_pattern = re.compile(br'''<[^>]*\sid\s*=\s*['"]([^'"]*)['"]''',re.IGNORECASE)
name_pattern = re.compile(br'''<[^>]*\sname\s*=\s*['"]([^'"]*)['"]''',re.IGNORECASE)
aid_pattern = re.compile(br'''<[^>]+\s(?:aid|AID)\s*=\s*['"]([^'"]+)['"]''')
for tag in reverse_tag_iter(textblock):
# any ids in the body should default to top of file
if tag[0:6] == b'<body ':
return b''
if tag[0:6] != b'<meta ':
m = id_pattern.match(tag) or name_pattern.match(tag)
if m is not None:
return m.group(1)
m = aid_pattern.match(tag)
if m is not None:
self.linked_aids.add(m.group(1))
return b'aid-' + m.group(1)
return b''
# do we need to do deep copying
def setParts(self, parts):
assert(len(parts) == len(self.parts))
for i in range(len(parts)):
self.parts[i] = parts[i]
# do we need to do deep copying
def setFlows(self, flows):
assert(len(flows) == len(self.flows))
for i in range(len(flows)):
self.flows[i] = flows[i]
# get information about the part (file) that exists at pos in original rawML
def getSkelInfo(self, pos):
for [partnum, pdir, filename, start, end, aidtext] in self.partinfo:
if pos >= start and pos < end:
return [partnum, pdir, filename, start, end, aidtext]
return [None, None, None, None, None, None]
# fileno is actually a reference into fragtbl (a fragment)
def getGuideText(self):
guidetext = b''
for [ref_type, ref_title, fileno] in self.guidetbl:
if ref_type == b'thumbimagestandard':
continue
if ref_type not in _guide_types and not ref_type.startswith(b'other.'):
if ref_type == b'start':
ref_type = b'text'
else:
ref_type = b'other.' + ref_type
[pos, idtext, filenum, seqnm, startpos, length] = self.fragtbl[fileno]
[pn, pdir, filename, skelpos, skelend, aidtext] = self.getSkelInfo(pos)
idtext = self.getIDTag(pos)
linktgt = filename.encode('utf-8')
if idtext != b'':
linktgt += b'#' + idtext
guidetext += b'<reference type="'+ref_type+b'" title="'+ref_title+b'" href="'+utf8_str(pdir)+b'/'+linktgt+b'" />\n'
# opf is encoded utf-8 so must convert any titles properly
guidetext = (guidetext.decode(self.mh.codec)).encode("utf-8")
return guidetext
def getPageIDTag(self, pos):
# find the first tag with a named anchor (name or id attribute) before pos
# but page map offsets need to little more leeway so if the offset points
# into a tag look for the next ending tag "/>" or "</" and start your search from there.
fname, pn, skelpos, skelend = self.getFileInfo(pos)
if pn is None and skelpos is None:
print("Error: getIDTag - no file contains ", pos)
textblock = self.parts[pn]
npos = pos - skelpos
# if npos inside a tag then search all text before next ending tag
pgt = textblock.find(b'>',npos)
plt = textblock.find(b'<',npos)
if plt == npos or pgt < plt:
# we are in a tag
# so find first ending tag
pend1 = textblock.find(b'/>', npos)
pend2 = textblock.find(b'</', npos)
if pend1 != -1 and pend2 != -1:
pend = min(pend1, pend2)
else:
pend = max(pend1, pend2)
if pend != -1:
npos = pend
else:
npos = pgt + 1
# find id and name attributes only inside of tags
# use a reverse tag search since that is faster
# inside any < > pair find "id=" and "name=" attributes return it
# [^>]* means match any amount of chars except for '>' char
# [^'"] match any amount of chars except for the quote character
# \s* means match any amount of whitespace
textblock = textblock[0:npos]
id_pattern = re.compile(br'''<[^>]*\sid\s*=\s*['"]([^'"]*)['"]''',re.IGNORECASE)
name_pattern = re.compile(br'''<[^>]*\sname\s*=\s*['"]([^'"]*)['"]''',re.IGNORECASE)
for tag in reverse_tag_iter(textblock):
# any ids in the body should default to top of file
if tag[0:6] == b'<body ':
return b''
if tag[0:6] != b'<meta ':
m = id_pattern.match(tag) or name_pattern.match(tag)
if m is not None:
return m.group(1)
return b''

View File

@@ -0,0 +1,268 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
from __future__ import unicode_literals, division, absolute_import, print_function
DEBUG_USE_ORDERED_DICTIONARY = False # OrderedDict is supoorted >= python 2.7.
""" set to True to use OrderedDict for K8RESCProcessor.parsetag.tattr."""
if DEBUG_USE_ORDERED_DICTIONARY:
from collections import OrderedDict as dict_
else:
dict_ = dict
from .compatibility_utils import unicode_str
from .mobi_utils import fromBase32
_OPF_PARENT_TAGS = ['xml', 'package', 'metadata', 'dc-metadata',
'x-metadata', 'manifest', 'spine', 'tours', 'guide']
class K8RESCProcessor(object):
def __init__(self, data, debug=False):
self._debug = debug
self.resc = None
self.opos = 0
self.extrameta = []
self.cover_name = None
self.spine_idrefs = {}
self.spine_order = []
self.spine_pageattributes = {}
self.spine_ppd = None
# need3 indicate the book has fields which require epub3.
# but the estimation of the source epub version from the fields is difficult.
self.need3 = False
self.package_ver = None
self.extra_metadata = []
self.refines_metadata = []
self.extra_attributes = []
# get header
start_pos = data.find(b'<')
self.resc_header = data[:start_pos]
# get resc data length
start = self.resc_header.find(b'=') + 1
end = self.resc_header.find(b'&', start)
resc_size = 0
if end > 0:
resc_size = fromBase32(self.resc_header[start:end])
resc_rawbytes = len(data) - start_pos
if resc_rawbytes == resc_size:
self.resc_length = resc_size
else:
# Most RESC has a nul string at its tail but some do not.
end_pos = data.find(b'\x00', start_pos)
if end_pos < 0:
self.resc_length = resc_rawbytes
else:
self.resc_length = end_pos - start_pos
if self.resc_length != resc_size:
print("Warning: RESC section length({:d}bytes) does not match its size({:d}bytes).".format(self.resc_length, resc_size))
# now parse RESC after converting it to unicode from utf-8
self.resc = unicode_str(data[start_pos:start_pos+self.resc_length])
self.parseData()
def prepend_to_spine(self, key, idref, linear, properties):
self.spine_order = [key] + self.spine_order
self.spine_idrefs[key] = idref
attributes = {}
if linear is not None:
attributes['linear'] = linear
if properties is not None:
attributes['properties'] = properties
self.spine_pageattributes[key] = attributes
# RESC tag iterator
def resc_tag_iter(self):
tcontent = last_tattr = None
prefix = ['']
while True:
text, tag = self.parseresc()
if text is None and tag is None:
break
if text is not None:
tcontent = text.rstrip(' \r\n')
else: # we have a tag
ttype, tname, tattr = self.parsetag(tag)
if ttype == 'begin':
tcontent = None
prefix.append(tname + '.')
if tname in _OPF_PARENT_TAGS:
yield ''.join(prefix), tname, tattr, tcontent
else:
last_tattr = tattr
else: # single or end
if ttype == 'end':
prefix.pop()
tattr = last_tattr
last_tattr = None
if tname in _OPF_PARENT_TAGS:
tname += '-end'
yield ''.join(prefix), tname, tattr, tcontent
tcontent = None
# now parse the RESC to extract spine and extra metadata info
def parseData(self):
for prefix, tname, tattr, tcontent in self.resc_tag_iter():
if self._debug:
print(" Parsing RESC: ", prefix, tname, tattr, tcontent)
if tname == 'package':
self.package_ver = tattr.get('version', '2.0')
package_prefix = tattr.get('prefix','')
if self.package_ver.startswith('3') or package_prefix.startswith('rendition'):
self.need3 = True
if tname == 'spine':
self.spine_ppd = tattr.get('page-progession-direction', None)
if self.spine_ppd is not None and self.spine_ppd == 'rtl':
self.need3 = True
if tname == 'itemref':
skelid = tattr.pop('skelid', None)
if skelid is None and len(self.spine_order) == 0:
# assume it was removed initial coverpage
skelid = 'coverpage'
tattr['linear'] = 'no'
self.spine_order.append(skelid)
idref = tattr.pop('idref', None)
if idref is not None:
idref = 'x_' + idref
self.spine_idrefs[skelid] = idref
if 'id' in tattr:
del tattr['id']
# tattr["id"] = 'x_' + tattr["id"]
if 'properties' in tattr:
self.need3 = True
self.spine_pageattributes[skelid] = tattr
if tname == 'meta' or tname.startswith('dc:'):
if 'refines' in tattr or 'property' in tattr:
self.need3 = True
if tattr.get('name','') == 'cover':
cover_name = tattr.get('content',None)
if cover_name is not None:
cover_name = 'x_' + cover_name
self.cover_name = cover_name
else:
self.extrameta.append([tname, tattr, tcontent])
# parse and return either leading text or the next tag
def parseresc(self):
p = self.opos
if p >= len(self.resc):
return None, None
if self.resc[p] != '<':
res = self.resc.find('<',p)
if res == -1 :
res = len(self.resc)
self.opos = res
return self.resc[p:res], None
# handle comment as a special case
if self.resc[p:p+4] == '<!--':
te = self.resc.find('-->',p+1)
if te != -1:
te = te+2
else:
te = self.resc.find('>',p+1)
ntb = self.resc.find('<',p+1)
if ntb != -1 and ntb < te:
self.opos = ntb
return self.resc[p:ntb], None
self.opos = te + 1
return None, self.resc[p:te+1]
# parses tag to identify: [tname, ttype, tattr]
# tname: tag name
# ttype: tag type ('begin', 'end' or 'single');
# tattr: dictionary of tag atributes
def parsetag(self, s):
p = 1
tname = None
ttype = None
tattr = dict_()
while s[p:p+1] == ' ' :
p += 1
if s[p:p+1] == '/':
ttype = 'end'
p += 1
while s[p:p+1] == ' ' :
p += 1
b = p
while s[p:p+1] not in ('>', '/', ' ', '"', "'",'\r','\n') :
p += 1
tname=s[b:p].lower()
# some special cases
if tname == '?xml':
tname = 'xml'
if tname == '!--':
ttype = 'single'
comment = s[p:-3].strip()
tattr['comment'] = comment
if ttype is None:
# parse any attributes of begin or single tags
while s.find('=',p) != -1 :
while s[p:p+1] == ' ' :
p += 1
b = p
while s[p:p+1] != '=' :
p += 1
aname = s[b:p].lower()
aname = aname.rstrip(' ')
p += 1
while s[p:p+1] == ' ' :
p += 1
if s[p:p+1] in ('"', "'") :
p = p + 1
b = p
while s[p:p+1] not in ('"', "'"):
p += 1
val = s[b:p]
p += 1
else :
b = p
while s[p:p+1] not in ('>', '/', ' ') :
p += 1
val = s[b:p]
tattr[aname] = val
if ttype is None:
ttype = 'begin'
if s.find('/',p) >= 0:
ttype = 'single'
return ttype, tname, tattr
def taginfo_toxml(self, taginfo):
res = []
tname, tattr, tcontent = taginfo
res.append('<' + tname)
if tattr is not None:
for key in tattr:
res.append(' ' + key + '="'+tattr[key]+'"')
if tcontent is not None:
res.append('>' + tcontent + '</' + tname + '>\n')
else:
res.append('/>\n')
return "".join(res)
def hasSpine(self):
return len(self.spine_order) > 0
def needEPUB3(self):
return self.need3
def hasRefines(self):
for [tname, tattr, tcontent] in self.extrameta:
if 'refines' in tattr:
return True
return False
def createMetadata(self, epubver):
for taginfo in self.extrameta:
tname, tattr, tcontent = taginfo
if 'refines' in tattr:
if epubver == 'F' and 'property' in tattr:
attr = ' id="%s" opf:%s="%s"\n' % (tattr['refines'], tattr['property'], tcontent)
self.extra_attributes.append(attr)
else:
tag = self.taginfo_toxml(taginfo)
self.refines_metadata.append(tag)
else:
tag = self.taginfo_toxml(taginfo)
self.extra_metadata.append(tag)

View File

@@ -0,0 +1,186 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
from __future__ import unicode_literals, division, absolute_import, print_function
from .compatibility_utils import unicode_str
import os
from .unipath import pathof
import re
# note: re requites the pattern to be the exact same type as the data to be searched in python3
# but u"" is not allowed for the pattern itself only b""
DEBUG_NAV = False
FORCE_DEFAULT_TITLE = False
""" Set to True to force to use the default title. """
NAVIGATION_FINENAME = 'nav.xhtml'
""" The name for the navigation document. """
DEFAULT_TITLE = 'Navigation'
""" The default title for the navigation document. """
class NAVProcessor(object):
def __init__(self, files):
self.files = files
self.navname = NAVIGATION_FINENAME
def buildLandmarks(self, guidetext):
header = ''
header += ' <nav epub:type="landmarks" id="landmarks" hidden="">\n'
header += ' <h2>Guide</h2>\n'
header += ' <ol>\n'
element = ' <li><a epub:type="{:s}" href="{:s}">{:s}</a></li>\n'
footer = ''
footer += ' </ol>\n'
footer += ' </nav>\n'
type_map = {
'cover' : 'cover',
'title-page' : 'title-page',
# ?: 'frontmatter',
'text' : 'bodymatter',
# ?: 'backmatter',
'toc' : 'toc',
'loi' : 'loi',
'lot' : 'lot',
'preface' : 'preface',
'bibliography' : 'bibliography',
'index' : 'index',
'glossary' : 'glossary',
'acknowledgements' : 'acknowledgements',
'colophon' : None,
'copyright-page' : None,
'dedication' : None,
'epigraph' : None,
'foreword' : None,
'notes' : None
}
re_type = re.compile(r'\s+type\s*=\s*"(.*?)"', re.I)
re_title = re.compile(r'\s+title\s*=\s*"(.*?)"', re.I)
re_link = re.compile(r'\s+href\s*=\s*"(.*?)"', re.I)
dir_ = os.path.relpath(self.files.k8text, self.files.k8oebps).replace('\\', '/')
data = ''
references = re.findall(r'<reference\s+.*?>', unicode_str(guidetext), re.I)
for reference in references:
mo_type = re_type.search(reference)
mo_title = re_title.search(reference)
mo_link = re_link.search(reference)
if mo_type is not None:
type_ = type_map.get(mo_type.group(1), None)
else:
type_ = None
if mo_title is not None:
title = mo_title.group(1)
else:
title = None
if mo_link is not None:
link = mo_link.group(1)
else:
link = None
if type_ is not None and title is not None and link is not None:
link = os.path.relpath(link, dir_).replace('\\', '/')
data += element.format(type_, link, title)
if len(data) > 0:
return header + data + footer
else:
return ''
def buildTOC(self, indx_data):
header = ''
header += ' <nav epub:type="toc" id="toc">\n'
header += ' <h1>Table of contents</h1>\n'
footer = ' </nav>\n'
# recursive part
def recursINDX(max_lvl=0, num=0, lvl=0, start=-1, end=-1):
if start>len(indx_data) or end>len(indx_data):
print("Warning (in buildTOC): missing INDX child entries", start, end, len(indx_data))
return ''
if DEBUG_NAV:
print("recursINDX (in buildTOC) lvl %d from %d to %d" % (lvl, start, end))
xhtml = ''
if start <= 0:
start = 0
if end <= 0:
end = len(indx_data)
if lvl > max_lvl:
max_lvl = lvl
indent1 = ' ' * (2 + lvl * 2)
indent2 = ' ' * (3 + lvl * 2)
xhtml += indent1 + '<ol>\n'
for i in range(start, end):
e = indx_data[i]
htmlfile = e['filename']
desttag = e['idtag']
text = e['text']
if not e['hlvl'] == lvl:
continue
num += 1
if desttag == '':
link = htmlfile
else:
link = '{:s}#{:s}'.format(htmlfile, desttag)
xhtml += indent2 + '<li>'
entry = '<a href="{:}">{:s}</a>'.format(link, text)
xhtml += entry
# recurs
if e['child1'] >= 0:
xhtml += '\n'
xhtmlrec, max_lvl, num = recursINDX(max_lvl, num, lvl + 1,
e['child1'], e['childn'] + 1)
xhtml += xhtmlrec
xhtml += indent2
# close entry
xhtml += '</li>\n'
xhtml += indent1 + '</ol>\n'
return xhtml, max_lvl, num
data, max_lvl, num = recursINDX()
if not len(indx_data) == num:
print("Warning (in buildTOC): different number of entries in NCX", len(indx_data), num)
return header + data + footer
def buildNAV(self, ncx_data, guidetext, title, lang):
print("Building Navigation Document.")
if FORCE_DEFAULT_TITLE:
title = DEFAULT_TITLE
nav_header = ''
nav_header += '<?xml version="1.0" encoding="utf-8"?>\n<!DOCTYPE html>'
nav_header += '<html xmlns="http://www.w3.org/1999/xhtml"'
nav_header += ' xmlns:epub="http://www.idpf.org/2007/ops"'
nav_header += ' lang="{0:s}" xml:lang="{0:s}">\n'.format(lang)
nav_header += '<head>\n<title>{:s}</title>\n'.format(title)
nav_header += '<meta charset="UTF-8" />\n'
nav_header += '<style type="text/css">\n'
nav_header += 'nav#landmarks { display:none; }\n'
nav_header += '</style>\n</head>\n<body>\n'
nav_footer = '</body>\n</html>\n'
landmarks = self.buildLandmarks(guidetext)
toc = self.buildTOC(ncx_data)
data = nav_header
data += landmarks
data += toc
data += nav_footer
return data
def getNAVName(self):
return self.navname
def writeNAV(self, ncx_data, guidetext, metadata):
# build the xhtml
# print("Write Navigation Document.")
xhtml = self.buildNAV(ncx_data, guidetext, metadata.get('Title')[0], metadata.get('Language')[0])
fname = os.path.join(self.files.k8text, self.navname)
with open(pathof(fname), 'wb') as f:
f.write(xhtml.encode('utf-8'))

View File

@@ -0,0 +1,272 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
from __future__ import unicode_literals, division, absolute_import, print_function
import os
from .unipath import pathof
import re
# note: re requites the pattern to be the exact same type as the data to be searched in python3
# but u"" is not allowed for the pattern itself only b""
from .mobi_utils import toBase32
from .mobi_index import MobiIndex
DEBUG_NCX = False
class ncxExtract:
def __init__(self, mh, files):
self.mh = mh
self.sect = self.mh.sect
self.files = files
self.isNCX = False
self.mi = MobiIndex(self.sect)
self.ncxidx = self.mh.ncxidx
self.indx_data = None
def parseNCX(self):
indx_data = []
tag_fieldname_map = {
1: ['pos',0],
2: ['len',0],
3: ['noffs',0],
4: ['hlvl',0],
5: ['koffs',0],
6: ['pos_fid',0],
21: ['parent',0],
22: ['child1',0],
23: ['childn',0]
}
if self.ncxidx != 0xffffffff:
outtbl, ctoc_text = self.mi.getIndexData(self.ncxidx, "NCX")
if DEBUG_NCX:
print(ctoc_text)
print(outtbl)
num = 0
for [text, tagMap] in outtbl:
tmp = {
'name': text.decode('utf-8'),
'pos': -1,
'len': 0,
'noffs': -1,
'text' : "Unknown Text",
'hlvl' : -1,
'kind' : "Unknown Kind",
'pos_fid' : None,
'parent' : -1,
'child1' : -1,
'childn' : -1,
'num' : num
}
for tag in tag_fieldname_map:
[fieldname, i] = tag_fieldname_map[tag]
if tag in tagMap:
fieldvalue = tagMap[tag][i]
if tag == 6:
pos_fid = toBase32(fieldvalue,4).decode('utf-8')
fieldvalue2 = tagMap[tag][i+1]
pos_off = toBase32(fieldvalue2,10).decode('utf-8')
fieldvalue = 'kindle:pos:fid:%s:off:%s' % (pos_fid, pos_off)
tmp[fieldname] = fieldvalue
if tag == 3:
toctext = ctoc_text.get(fieldvalue, 'Unknown Text')
toctext = toctext.decode(self.mh.codec)
tmp['text'] = toctext
if tag == 5:
kindtext = ctoc_text.get(fieldvalue, 'Unknown Kind')
kindtext = kindtext.decode(self.mh.codec)
tmp['kind'] = kindtext
indx_data.append(tmp)
if DEBUG_NCX:
print("record number: ", num)
print("name: ", tmp['name'],)
print("position", tmp['pos']," length: ", tmp['len'])
print("text: ", tmp['text'])
print("kind: ", tmp['kind'])
print("heading level: ", tmp['hlvl'])
print("parent:", tmp['parent'])
print("first child: ",tmp['child1']," last child: ", tmp['childn'])
print("pos_fid is ", tmp['pos_fid'])
print("\n\n")
num += 1
self.indx_data = indx_data
return indx_data
def buildNCX(self, htmlfile, title, ident, lang):
indx_data = self.indx_data
ncx_header = \
'''<?xml version='1.0' encoding='utf-8'?>
<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1" xml:lang="%s">
<head>
<meta content="%s" name="dtb:uid"/>
<meta content="%d" name="dtb:depth"/>
<meta content="mobiunpack.py" name="dtb:generator"/>
<meta content="0" name="dtb:totalPageCount"/>
<meta content="0" name="dtb:maxPageNumber"/>
</head>
<docTitle>
<text>%s</text>
</docTitle>
<navMap>
'''
ncx_footer = \
''' </navMap>
</ncx>
'''
ncx_entry = \
'''<navPoint id="%s" playOrder="%d">
<navLabel>
<text>%s</text>
</navLabel>
<content src="%s"/>'''
# recursive part
def recursINDX(max_lvl=0, num=0, lvl=0, start=-1, end=-1):
if start>len(indx_data) or end>len(indx_data):
print("Warning: missing INDX child entries", start, end, len(indx_data))
return ''
if DEBUG_NCX:
print("recursINDX lvl %d from %d to %d" % (lvl, start, end))
xml = ''
if start <= 0:
start = 0
if end <= 0:
end = len(indx_data)
if lvl > max_lvl:
max_lvl = lvl
indent = ' ' * (2 + lvl)
for i in range(start, end):
e = indx_data[i]
if not e['hlvl'] == lvl:
continue
# open entry
num += 1
link = '%s#filepos%d' % (htmlfile, e['pos'])
tagid = 'np_%d' % num
entry = ncx_entry % (tagid, num, e['text'], link)
entry = re.sub(re.compile('^', re.M), indent, entry, 0)
xml += entry + '\n'
# recurs
if e['child1']>=0:
xmlrec, max_lvl, num = recursINDX(max_lvl, num, lvl + 1,
e['child1'], e['childn'] + 1)
xml += xmlrec
# close entry
xml += indent + '</navPoint>\n'
return xml, max_lvl, num
body, max_lvl, num = recursINDX()
header = ncx_header % (lang, ident, max_lvl + 1, title)
ncx = header + body + ncx_footer
if not len(indx_data) == num:
print("Warning: different number of entries in NCX", len(indx_data), num)
return ncx
def writeNCX(self, metadata):
# build the xml
self.isNCX = True
print("Write ncx")
# htmlname = os.path.basename(self.files.outbase)
# htmlname += '.html'
htmlname = 'book.html'
xml = self.buildNCX(htmlname, metadata['Title'][0], metadata['UniqueID'][0], metadata.get('Language')[0])
# write the ncx file
# ncxname = os.path.join(self.files.mobi7dir, self.files.getInputFileBasename() + '.ncx')
ncxname = os.path.join(self.files.mobi7dir, 'toc.ncx')
with open(pathof(ncxname), 'wb') as f:
f.write(xml.encode('utf-8'))
def buildK8NCX(self, indx_data, title, ident, lang):
ncx_header = \
'''<?xml version='1.0' encoding='utf-8'?>
<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1" xml:lang="%s">
<head>
<meta content="%s" name="dtb:uid"/>
<meta content="%d" name="dtb:depth"/>
<meta content="mobiunpack.py" name="dtb:generator"/>
<meta content="0" name="dtb:totalPageCount"/>
<meta content="0" name="dtb:maxPageNumber"/>
</head>
<docTitle>
<text>%s</text>
</docTitle>
<navMap>
'''
ncx_footer = \
''' </navMap>
</ncx>
'''
ncx_entry = \
'''<navPoint id="%s" playOrder="%d">
<navLabel>
<text>%s</text>
</navLabel>
<content src="%s"/>'''
# recursive part
def recursINDX(max_lvl=0, num=0, lvl=0, start=-1, end=-1):
if start>len(indx_data) or end>len(indx_data):
print("Warning: missing INDX child entries", start, end, len(indx_data))
return ''
if DEBUG_NCX:
print("recursINDX lvl %d from %d to %d" % (lvl, start, end))
xml = ''
if start <= 0:
start = 0
if end <= 0:
end = len(indx_data)
if lvl > max_lvl:
max_lvl = lvl
indent = ' ' * (2 + lvl)
for i in range(start, end):
e = indx_data[i]
htmlfile = e['filename']
desttag = e['idtag']
if not e['hlvl'] == lvl:
continue
# open entry
num += 1
if desttag == '':
link = 'Text/%s' % htmlfile
else:
link = 'Text/%s#%s' % (htmlfile, desttag)
tagid = 'np_%d' % num
entry = ncx_entry % (tagid, num, e['text'], link)
entry = re.sub(re.compile('^', re.M), indent, entry, 0)
xml += entry + '\n'
# recurs
if e['child1']>=0:
xmlrec, max_lvl, num = recursINDX(max_lvl, num, lvl + 1,
e['child1'], e['childn'] + 1)
xml += xmlrec
# close entry
xml += indent + '</navPoint>\n'
return xml, max_lvl, num
body, max_lvl, num = recursINDX()
header = ncx_header % (lang, ident, max_lvl + 1, title)
ncx = header + body + ncx_footer
if not len(indx_data) == num:
print("Warning: different number of entries in NCX", len(indx_data), num)
return ncx
def writeK8NCX(self, ncx_data, metadata):
# build the xml
self.isNCX = True
print("Write K8 ncx")
xml = self.buildK8NCX(ncx_data, metadata['Title'][0], metadata['UniqueID'][0], metadata.get('Language')[0])
bname = 'toc.ncx'
ncxname = os.path.join(self.files.k8oebps,bname)
with open(pathof(ncxname), 'wb') as f:
f.write(xml.encode('utf-8'))

View File

@@ -0,0 +1,681 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
from __future__ import unicode_literals, division, absolute_import, print_function
from .compatibility_utils import unicode_str, unescapeit
from .compatibility_utils import lzip
from .unipath import pathof
from xml.sax.saxutils import escape as xmlescape
import os
import uuid
from datetime import datetime
# In EPUB3, NCX and <guide> MAY exist in OPF, although the NCX is superseded
# by the Navigation Document and the <guide> is deprecated. Currently, EPUB3_WITH_NCX
# and EPUB3_WITH_GUIDE are set to True due to compatibility with epub2 reading systems.
# They might be change to set to False in the future.
EPUB3_WITH_NCX = True # Do not set to False except for debug.
""" Set to True to create a toc.ncx when converting to epub3. """
EPUB3_WITH_GUIDE = True # Do not set to False except for debug.
""" Set to True to create a guide element in an opf when converting to epub3. """
EPUB_OPF = 'content.opf'
""" The name for the OPF of EPUB. """
TOC_NCX = 'toc.ncx'
""" The name for the TOC of EPUB2. """
NAVIGATION_DOCUMENT = 'nav.xhtml'
""" The name for the navigation document of EPUB3. """
BEGIN_INFO_ONLY = '<!-- BEGIN INFORMATION ONLY '
""" The comment to indicate the beginning of metadata which will be ignored by kindlegen. """
END_INFO_ONLY = 'END INFORMATION ONLY -->'
""" The comment to indicate the end of metadata which will be ignored by kindlegen. """
EXTH_TITLE_FURIGANA = 'Title-Pronunciation'
""" The name for Title Furigana(similar to file-as) set by KDP. """
EXTH_CREATOR_FURIGANA = 'Author-Pronunciation'
""" The name for Creator Furigana(similar to file-as) set by KDP. """
EXTH_PUBLISHER_FURIGANA = 'Publisher-Pronunciation'
""" The name for Publisher Furigana(similar to file-as) set by KDP. """
EXTRA_ENTITIES = {'"': '&quot;', "'": "&apos;"}
class OPFProcessor(object):
def __init__(self, files, metadata, fileinfo, rscnames, hasNCX, mh, usedmap, pagemapxml='', guidetext='', k8resc=None, epubver='2'):
self.files = files
self.metadata = metadata
self.fileinfo = fileinfo
self.rscnames = rscnames
self.has_ncx = hasNCX
self.codec = mh.codec
self.isK8 = mh.isK8()
self.printReplica = mh.isPrintReplica()
self.guidetext = unicode_str(guidetext)
self.used = usedmap
self.k8resc = k8resc
self.covername = None
self.cover_id = 'cover_img'
if self.k8resc is not None and self.k8resc.cover_name is not None:
# update cover id info from RESC if available
self.cover_id = self.k8resc.cover_name
# Create a unique urn uuid
self.BookId = unicode_str(str(uuid.uuid4()))
self.pagemap = pagemapxml
self.ncxname = None
self.navname = None
# page-progression-direction is only set in spine
self.page_progression_direction = metadata.pop('page-progression-direction', [None])[0]
if 'rl' in metadata.get('primary-writing-mode', [''])[0]:
self.page_progression_direction = 'rtl'
self.epubver = epubver # the epub version set by user
self.target_epubver = epubver # the epub vertion set by user or detected automatically
if self.epubver == 'A':
self.target_epubver = self.autodetectEPUBVersion()
elif self.epubver == 'F':
self.target_epubver = '2'
elif self.epubver != '2' and self.epubver != '3':
self.target_epubver = '2'
# id for rifine attributes
self.title_id = {}
self.creator_id = {}
self.publisher_id = {}
# extra attributes
self.title_attrib = {}
self.creator_attrib = {}
self.publisher_attrib = {}
self.extra_attributes = [] # for force epub2 option
# Create epub3 metadata from EXTH.
self.exth_solved_refines_metadata = []
self.exth_refines_metadata = []
self.exth_fixedlayout_metadata = []
self.defineRefinesID()
self.processRefinesMetadata()
if self.k8resc is not None:
# Create metadata in RESC section.
self.k8resc.createMetadata(epubver)
if self.target_epubver == "3":
self.createMetadataForFixedlayout()
def escapeit(self, sval, EXTRAS=None):
# note, xmlescape and unescape do not work with utf-8 bytestrings
sval = unicode_str(sval)
if EXTRAS:
res = xmlescape(unescapeit(sval), EXTRAS)
else:
res = xmlescape(unescapeit(sval))
return res
def createMetaTag(self, data, property, content, refid=''):
refines = ''
if refid:
refines = ' refines="#%s"' % refid
data.append('<meta property="%s"%s>%s</meta>\n' % (property, refines, content))
def buildOPFMetadata(self, start_tag, has_obfuscated_fonts=False):
# convert from EXTH metadata format to target epub version metadata
# epub 3 will ignore <meta name="xxxx" content="yyyy" /> style metatags
# but allows them to be present for backwards compatibility
# instead the new format is
# <meta property="xxxx" id="iiii" ... > property_value</meta>
# and DCMES elements such as:
# <dc:blah id="iiii">value</dc:blah>
metadata = self.metadata
k8resc = self.k8resc
META_TAGS = ['Drm Server Id', 'Drm Commerce Id', 'Drm Ebookbase Book Id', 'ASIN', 'ThumbOffset', 'Fake Cover',
'Creator Software', 'Creator Major Version', 'Creator Minor Version', 'Creator Build Number',
'Watermark', 'Clipping Limit', 'Publisher Limit', 'Text to Speech Disabled', 'CDE Type',
'Updated Title', 'Font Signature (hex)', 'Tamper Proof Keys (hex)',]
# def handleTag(data, metadata, key, tag, ids={}):
def handleTag(data, metadata, key, tag, attrib={}):
'''Format metadata values.
@param data: List of formatted metadata entries.
@param metadata: The metadata dictionary.
@param key: The key of the metadata value to handle.
@param tag: The opf tag corresponds to the metadata value.
###@param ids: The ids in tags for refines property of epub3.
@param attrib: The extra attibute for refines or opf prefixs.
'''
if key in metadata:
for i, value in enumerate(metadata[key]):
closingTag = tag.split(" ")[0]
res = '<%s%s>%s</%s>\n' % (tag, attrib.get(i, ''), self.escapeit(value), closingTag)
data.append(res)
del metadata[key]
# these are allowed but ignored by epub3
def handleMetaPairs(data, metadata, key, name):
if key in metadata:
for value in metadata[key]:
res = '<meta name="%s" content="%s" />\n' % (name, self.escapeit(value, EXTRA_ENTITIES))
data.append(res)
del metadata[key]
data = []
data.append(start_tag + '\n')
# Handle standard metadata
if 'Title' in metadata:
handleTag(data, metadata, 'Title', 'dc:title', self.title_attrib)
else:
data.append('<dc:title>Untitled</dc:title>\n')
handleTag(data, metadata, 'Language', 'dc:language')
if 'UniqueID' in metadata:
handleTag(data, metadata, 'UniqueID', 'dc:identifier id="uid"')
else:
# No unique ID in original, give it a generic one.
data.append('<dc:identifier id="uid">0</dc:identifier>\n')
if self.target_epubver == '3':
# epub version 3 minimal metadata requires a dcterms:modifed date tag
self.createMetaTag(data, 'dcterms:modified', datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"))
if self.isK8 and has_obfuscated_fonts:
# Use the random generated urn:uuid so obuscated fonts work.
# It doesn't need to be _THE_ unique identifier to work as a key
# for obfuscated fonts in Sigil, ADE and calibre. Its just has
# to use the opf:scheme="UUID" and have the urn:uuid: prefix.
if self.target_epubver == '3':
data.append('<dc:identifier>urn:uuid:'+self.BookId+'</dc:identifier>\n')
else:
data.append('<dc:identifier opf:scheme="UUID">urn:uuid:'+self.BookId+'</dc:identifier>\n')
handleTag(data, metadata, 'Creator', 'dc:creator', self.creator_attrib)
handleTag(data, metadata, 'Contributor', 'dc:contributor')
handleTag(data, metadata, 'Publisher', 'dc:publisher', self.publisher_attrib)
handleTag(data, metadata, 'Source', 'dc:source')
handleTag(data, metadata, 'Type', 'dc:type')
if self.target_epubver == '3':
if 'ISBN' in metadata:
for i, value in enumerate(metadata['ISBN']):
res = '<dc:identifier>urn:isbn:%s</dc:identifier>\n' % self.escapeit(value)
data.append(res)
else:
handleTag(data, metadata, 'ISBN', 'dc:identifier opf:scheme="ISBN"')
if 'Subject' in metadata:
if 'SubjectCode' in metadata:
codeList = metadata['SubjectCode']
del metadata['SubjectCode']
else:
codeList = None
for i in range(len(metadata['Subject'])):
if codeList and i < len(codeList):
data.append('<dc:subject BASICCode="'+codeList[i]+'">')
else:
data.append('<dc:subject>')
data.append(self.escapeit(metadata['Subject'][i])+'</dc:subject>\n')
del metadata['Subject']
handleTag(data, metadata, 'Description', 'dc:description')
if self.target_epubver == '3':
if 'Published' in metadata:
for i, value in enumerate(metadata['Published']):
res = '<dc:date>%s</dc:date>\n' % self.escapeit(value)
data.append(res)
else:
handleTag(data, metadata, 'Published', 'dc:date opf:event="publication"')
handleTag(data, metadata, 'Rights', 'dc:rights')
if self.epubver == 'F':
if self.extra_attributes or k8resc is not None and k8resc.extra_attributes:
data.append('<!-- THE FOLLOWINGS ARE REQUIRED TO INSERT INTO <dc:xxx> MANUALLY\n')
if self.extra_attributes:
data += self.extra_attributes
if k8resc is not None and k8resc.extra_attributes:
data += k8resc.extra_attributes
data.append('-->\n')
else:
# Append refines metadata.
if self.exth_solved_refines_metadata:
data.append('<!-- Refines MetaData from EXTH -->\n')
data += self.exth_solved_refines_metadata
if self.exth_refines_metadata or k8resc is not None and k8resc.refines_metadata:
data.append('<!-- THE FOLLOWINGS ARE REQUIRED TO EDIT IDS MANUALLY\n')
if self.exth_refines_metadata:
data += self.exth_refines_metadata
if k8resc is not None and k8resc.refines_metadata:
data += k8resc.refines_metadata
data.append('-->\n')
# Append metadata in RESC section.
if k8resc is not None and k8resc.extra_metadata:
data.append('<!-- Extra MetaData from RESC\n')
data += k8resc.extra_metadata
data.append('-->\n')
if 'CoverOffset' in metadata:
imageNumber = int(metadata['CoverOffset'][0])
self.covername = self.rscnames[imageNumber]
if self.covername is None:
print("Error: Cover image %s was not recognized as a valid image" % imageNumber)
else:
# <meta name="cover"> is obsoleted in EPUB3, but kindlegen v2.9 requires it.
data.append('<meta name="cover" content="' + self.cover_id + '" />\n')
self.used[self.covername] = 'used'
del metadata['CoverOffset']
handleMetaPairs(data, metadata, 'Codec', 'output encoding')
# handle kindlegen specifc tags
handleTag(data, metadata, 'DictInLanguage', 'DictionaryInLanguage')
handleTag(data, metadata, 'DictOutLanguage', 'DictionaryOutLanguage')
handleMetaPairs(data, metadata, 'RegionMagnification', 'RegionMagnification')
handleMetaPairs(data, metadata, 'book-type', 'book-type')
handleMetaPairs(data, metadata, 'zero-gutter', 'zero-gutter')
handleMetaPairs(data, metadata, 'zero-margin', 'zero-margin')
handleMetaPairs(data, metadata, 'primary-writing-mode', 'primary-writing-mode')
handleMetaPairs(data, metadata, 'fixed-layout', 'fixed-layout')
handleMetaPairs(data, metadata, 'orientation-lock', 'orientation-lock')
handleMetaPairs(data, metadata, 'original-resolution', 'original-resolution')
# these are not allowed in epub2 or 3 so convert them to meta name content pairs
# perhaps these could better be mapped into the dcterms namespace instead
handleMetaPairs(data, metadata, 'Review', 'review')
handleMetaPairs(data, metadata, 'Imprint', 'imprint')
handleMetaPairs(data, metadata, 'Adult', 'adult')
handleMetaPairs(data, metadata, 'DictShortName', 'DictionaryVeryShortName')
# these are needed by kobo books upon submission but not sure if legal metadata in epub2 or epub3
if 'Price' in metadata and 'Currency' in metadata:
priceList = metadata['Price']
currencyList = metadata['Currency']
if len(priceList) != len(currencyList):
print("Error: found %s price entries, but %s currency entries.")
else:
for i in range(len(priceList)):
data.append('<SRP Currency="'+currencyList[i]+'">'+priceList[i]+'</SRP>\n')
del metadata['Price']
del metadata['Currency']
if self.target_epubver == '3':
# Append metadata for EPUB3.
if self.exth_fixedlayout_metadata:
data.append('<!-- EPUB3 MedaData converted from EXTH -->\n')
data += self.exth_fixedlayout_metadata
# all that remains is extra EXTH info we will store inside a comment inside meta name/content pairs
# so it can not impact anything and will be automatically stripped out if found again in a RESC section
data.append(BEGIN_INFO_ONLY + '\n')
if 'ThumbOffset' in metadata:
imageNumber = int(metadata['ThumbOffset'][0])
imageName = self.rscnames[imageNumber]
if imageName is None:
print("Error: Cover Thumbnail image %s was not recognized as a valid image" % imageNumber)
else:
data.append('<meta name="Cover ThumbNail Image" content="'+ 'Images/'+imageName+'" />\n')
# self.used[imageName] = 'used' # thumbnail image is always generated by Kindlegen, so don't include in manifest
self.used[imageName] = 'not used'
del metadata['ThumbOffset']
for metaName in META_TAGS:
if metaName in metadata:
for value in metadata[metaName]:
data.append('<meta name="'+metaName+'" content="'+self.escapeit(value, EXTRA_ENTITIES)+'" />\n')
del metadata[metaName]
for key in list(metadata.keys()):
for value in metadata[key]:
data.append('<meta name="'+key+'" content="'+self.escapeit(value, EXTRA_ENTITIES)+'" />\n')
del metadata[key]
data.append(END_INFO_ONLY + '\n')
data.append('</metadata>\n')
return data
def buildOPFManifest(self, ncxname, navname=None):
# buildManifest for mobi7, azw4, epub2 and epub3.
k8resc = self.k8resc
cover_id = self.cover_id
hasK8RescSpine = k8resc is not None and k8resc.hasSpine()
self.ncxname = ncxname
self.navname = navname
data = []
data.append('<manifest>\n')
media_map = {
'.jpg' : 'image/jpeg',
'.jpeg' : 'image/jpeg',
'.png' : 'image/png',
'.gif' : 'image/gif',
'.svg' : 'image/svg+xml',
'.xhtml': 'application/xhtml+xml',
'.html' : 'text/html', # for mobi7
'.pdf' : 'application/pdf', # for azw4(print replica textbook)
'.ttf' : 'application/x-font-ttf',
'.otf' : 'application/x-font-opentype', # replaced?
'.css' : 'text/css',
# '.html' : 'text/x-oeb1-document', # for mobi7
# '.otf' : 'application/vnd.ms-opentype', # [OpenType] OpenType fonts
# '.woff' : 'application/font-woff', # [WOFF] WOFF fonts
# '.smil' : 'application/smil+xml', # [MediaOverlays301] EPUB Media Overlay documents
# '.pls' : 'application/pls+xml', # [PLS] Text-to-Speech (TTS) Pronunciation lexicons
# '.mp3' : 'audio/mpeg',
# '.mp4' : 'video/mp4',
# '.js' : 'text/javascript', # not supported in K8
}
spinerefs = []
idcnt = 0
for [key,dir,fname] in self.fileinfo:
name, ext = os.path.splitext(fname)
ext = ext.lower()
media = media_map.get(ext)
ref = "item%d" % idcnt
if hasK8RescSpine:
if key is not None and key in k8resc.spine_idrefs:
ref = k8resc.spine_idrefs[key]
properties = ''
if dir != '':
fpath = dir + '/' + fname
else:
fpath = fname
data.append('<item id="{0:}" media-type="{1:}" href="{2:}" {3:}/>\n'.format(ref, media, fpath, properties))
if ext in ['.xhtml', '.html']:
spinerefs.append(ref)
idcnt += 1
for fname in self.rscnames:
if fname is not None:
if self.used.get(fname,'not used') == 'not used':
continue
name, ext = os.path.splitext(fname)
ext = ext.lower()
media = media_map.get(ext,ext[1:])
properties = ''
if fname == self.covername:
ref = cover_id
if self.target_epubver == '3':
properties = 'properties="cover-image"'
else:
ref = "item%d" % idcnt
if ext == '.ttf' or ext == '.otf':
if self.isK8: # fonts are only used in Mobi 8
fpath = 'Fonts/' + fname
data.append('<item id="{0:}" media-type="{1:}" href="{2:}" {3:}/>\n'.format(ref, media, fpath, properties))
else:
fpath = 'Images/' + fname
data.append('<item id="{0:}" media-type="{1:}" href="{2:}" {3:}/>\n'.format(ref, media, fpath, properties))
idcnt += 1
if self.target_epubver == '3' and navname is not None:
data.append('<item id="nav" media-type="application/xhtml+xml" href="Text/' + navname + '" properties="nav"/>\n')
if self.has_ncx and ncxname is not None:
data.append('<item id="ncx" media-type="application/x-dtbncx+xml" href="' + ncxname +'" />\n')
if self.pagemap != '':
data.append('<item id="map" media-type="application/oebs-page-map+xml" href="page-map.xml" />\n')
data.append('</manifest>\n')
return [data, spinerefs]
def buildOPFSpine(self, spinerefs, isNCX):
# build spine
k8resc = self.k8resc
hasK8RescSpine = k8resc is not None and k8resc.hasSpine()
data = []
ppd = ''
if self.isK8 and self.page_progression_direction is not None:
ppd = ' page-progression-direction="{:s}"'.format(self.page_progression_direction)
ncx = ''
if isNCX:
ncx = ' toc="ncx"'
map=''
if self.pagemap != '':
map = ' page-map="map"'
if self.epubver == 'F':
if ppd:
ppd = '<!--' + ppd + ' -->'
spine_start_tag = '<spine{1:s}{2:s}>{0:s}\n'.format(ppd, map, ncx)
else:
spine_start_tag = '<spine{0:s}{1:s}{2:s}>\n'.format(ppd, map, ncx)
data.append(spine_start_tag)
if hasK8RescSpine:
for key in k8resc.spine_order:
idref = k8resc.spine_idrefs[key]
attribs = k8resc.spine_pageattributes[key]
tag = '<itemref idref="%s"' % idref
for aname, val in list(attribs.items()):
if self.epubver == 'F' and aname == 'properties':
continue
if val is not None:
tag += ' %s="%s"' % (aname, val)
tag += '/>'
if self.epubver == 'F' and 'properties' in attribs:
val = attribs['properties']
if val is not None:
tag += '<!-- properties="%s" -->' % val
tag += '\n'
data.append(tag)
else:
start = 0
# special case the created coverpage if need be
[key, dir, fname] = self.fileinfo[0]
if key is not None and key == "coverpage":
entry = spinerefs[start]
data.append('<itemref idref="%s" linear="no"/>\n' % entry)
start += 1
for entry in spinerefs[start:]:
data.append('<itemref idref="' + entry + '"/>\n')
data.append('</spine>\n')
return data
def buildMobi7OPF(self):
# Build an OPF for mobi7 and azw4.
print("Building an opf for mobi7/azw4.")
data = []
data.append('<?xml version="1.0" encoding="utf-8"?>\n')
data.append('<package version="2.0" xmlns="http://www.idpf.org/2007/opf" unique-identifier="uid">\n')
metadata_tag = '<metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">'
opf_metadata = self.buildOPFMetadata(metadata_tag)
data += opf_metadata
if self.has_ncx:
# ncxname = self.files.getInputFileBasename() + '.ncx'
ncxname = 'toc.ncx'
else:
ncxname = None
[opf_manifest, spinerefs] = self.buildOPFManifest(ncxname)
data += opf_manifest
opf_spine = self.buildOPFSpine(spinerefs, self.has_ncx)
data += opf_spine
data.append('<tours>\n</tours>\n')
if not self.printReplica:
guide ='<guide>\n' + self.guidetext + '</guide>\n'
data.append(guide)
data.append('</package>\n')
return ''.join(data)
def buildEPUBOPF(self, has_obfuscated_fonts=False):
print("Building an opf for mobi8 using epub version: ", self.target_epubver)
if self.target_epubver == '2':
has_ncx = self.has_ncx
has_guide = True
ncxname = None
ncxname = TOC_NCX
navname = None
package = '<package version="2.0" xmlns="http://www.idpf.org/2007/opf" unique-identifier="uid">\n'
tours = '<tours>\n</tours>\n'
metadata_tag = '<metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">'
else:
has_ncx = EPUB3_WITH_NCX
has_guide = EPUB3_WITH_GUIDE
ncxname = None
if has_ncx:
ncxname = TOC_NCX
navname = NAVIGATION_DOCUMENT
package = '<package version="3.0" xmlns="http://www.idpf.org/2007/opf" prefix="rendition: http://www.idpf.org/vocab/rendition/#" unique-identifier="uid">\n'
tours = ''
metadata_tag = '<metadata xmlns:dc="http://purl.org/dc/elements/1.1/">'
data = []
data.append('<?xml version="1.0" encoding="utf-8"?>\n')
data.append(package)
opf_metadata = self.buildOPFMetadata(metadata_tag, has_obfuscated_fonts)
data += opf_metadata
[opf_manifest, spinerefs] = self.buildOPFManifest(ncxname, navname)
data += opf_manifest
opf_spine = self.buildOPFSpine(spinerefs, has_ncx)
data += opf_spine
data.append(tours)
if has_guide:
guide ='<guide>\n' + self.guidetext + '</guide>\n'
data.append(guide)
data.append('</package>\n')
return ''.join(data)
def writeOPF(self, has_obfuscated_fonts=False):
if self.isK8:
data = self.buildEPUBOPF(has_obfuscated_fonts)
outopf = os.path.join(self.files.k8oebps, EPUB_OPF)
with open(pathof(outopf), 'wb') as f:
f.write(data.encode('utf-8'))
return self.BookId
else:
data = self.buildMobi7OPF()
outopf = os.path.join(self.files.mobi7dir, 'content.opf')
with open(pathof(outopf), 'wb') as f:
f.write(data.encode('utf-8'))
return 0
def getBookId(self):
return self.BookId
def getNCXName(self):
return self.ncxname
def getNAVName(self):
return self.navname
def getEPUBVersion(self):
return self.target_epubver
def hasNCX(self):
return self.ncxname is not None and self.has_ncx
def hasNAV(self):
return self.navname is not None
def autodetectEPUBVersion(self):
# Determine EPUB version from metadata and RESC.
metadata = self.metadata
k8resc = self.k8resc
epubver = '2'
if 'true' == metadata.get('fixed-layout', [''])[0].lower():
epubver = '3'
elif metadata.get('orientation-lock', [''])[0].lower() in ['portrait', 'landscape']:
epubver = '3'
elif self.page_progression_direction == 'rtl':
epubver = '3'
elif EXTH_TITLE_FURIGANA in metadata:
epubver = '3'
elif EXTH_CREATOR_FURIGANA in metadata:
epubver = '3'
elif EXTH_PUBLISHER_FURIGANA in metadata:
epubver = '3'
elif k8resc is not None and k8resc.needEPUB3():
epubver = '3'
return epubver
def defineRefinesID(self):
# the following EXTH are set by KDP.
# 'Title_Furigana_(508)'
# 'Creator_Furigana_(517)',
# 'Publisher_Furigana_(522)'
# It is difficult to find correspondence between Title, Creator, Publisher
# and EXTH 508,512, 522 if they have more than two values since KDP seems not preserve the oders of EXTH 508,512 and 522.
# It is also difficult to find correspondence between them and tags which have refine attributes in RESC.
# So editing manually is required.
metadata = self.metadata
needRefinesId = False
if self.k8resc is not None:
needRefinesId = self.k8resc.hasRefines()
# Create id for rifine attributes
if (needRefinesId or EXTH_TITLE_FURIGANA in metadata) and 'Title' in metadata:
for i in range(len(metadata.get('Title'))):
self.title_id[i] = 'title%02d' % (i+1)
if (needRefinesId or EXTH_CREATOR_FURIGANA in metadata) and 'Creator' in metadata:
for i in range(len(metadata.get('Creator'))):
self.creator_id[i] = 'creator%02d' % (i+1)
if (needRefinesId or EXTH_PUBLISHER_FURIGANA in metadata) and 'Publisher' in metadata:
for i in range(len(metadata.get('Publisher'))):
self.publisher_id[i] = 'publisher%02d' % (i+1)
def processRefinesMetadata(self):
# create refines metadata defined in epub3 or convert refines property to opf: attribues for epub2.
metadata = self.metadata
refines_list = [
[EXTH_TITLE_FURIGANA, self.title_id, self.title_attrib, 'title00'],
[EXTH_CREATOR_FURIGANA, self.creator_id, self.creator_attrib, 'creator00'],
[EXTH_PUBLISHER_FURIGANA, self.publisher_id, self.publisher_attrib, 'publisher00']
]
create_refines_metadata = False
for EXTH in lzip(*refines_list)[0]:
if EXTH in metadata:
create_refines_metadata = True
break
if create_refines_metadata:
for [EXTH, id, attrib, defaultid] in refines_list:
if self.target_epubver == '3':
for i, value in list(id.items()):
attrib[i] = ' id="%s"' % value
if EXTH in metadata:
if len(metadata[EXTH]) == 1 and len(id) == 1:
self.createMetaTag(self.exth_solved_refines_metadata, 'file-as', metadata[EXTH][0], id[0])
else:
for i, value in enumerate(metadata[EXTH]):
self.createMetaTag(self.exth_refines_metadata, 'file-as', value, id.get(i, defaultid))
else:
if EXTH in metadata:
if len(metadata[EXTH]) == 1 and len(id) == 1:
attr = ' opf:file-as="%s"' % metadata[EXTH][0]
attrib[0] = attr
else:
for i, value in enumerate(metadata[EXTH]):
attr = ' id="#%s" opf:file-as="%s"\n' % (id.get(i, defaultid), value)
self.extra_attributes.append(attr)
def createMetadataForFixedlayout(self):
# convert fixed layout to epub3 format if needed.
metadata = self.metadata
if 'fixed-layout' in metadata:
fixedlayout = metadata['fixed-layout'][0]
content = {'true' : 'pre-paginated'}.get(fixedlayout.lower(), 'reflowable')
self.createMetaTag(self.exth_fixedlayout_metadata, 'rendition:layout', content)
if 'orientation-lock' in metadata:
content = metadata['orientation-lock'][0].lower()
if content == 'portrait' or content == 'landscape':
self.createMetaTag(self.exth_fixedlayout_metadata, 'rendition:orientation', content)
# according to epub3 spec about correspondence with Amazon
# if 'original-resolution' is provided it needs to be converted to
# meta viewport property tag stored in the <head></head> of **each**
# xhtml page - so this tag would need to be handled by editing each part
# before reaching this routine
# we need to add support for this to the k8html routine
# if 'original-resolution' in metadata.keys():
# resolution = metadata['original-resolution'][0].lower()
# width, height = resolution.split('x')
# if width.isdigit() and int(width) > 0 and height.isdigit() and int(height) > 0:
# viewport = 'width=%s, height=%s' % (width, height)
# self.createMetaTag(self.exth_fixedlayout_metadata, 'rendition:viewport', viewport)

View File

@@ -0,0 +1,158 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
from __future__ import unicode_literals, division, absolute_import, print_function
from .compatibility_utils import PY2, unicode_str
if PY2:
range = xrange
import struct
# note: struct pack, unpack, unpack_from all require bytestring format
# data all the way up to at least python 2.7.5, python 3 okay with bytestring
import re
# note: re requites the pattern to be the exact same type as the data to be searched in python3
# but u"" is not allowed for the pattern itself only b""
_TABLE = [('m', 1000), ('cm', 900), ('d', 500), ('cd', 400), ('c', 100), ('xc', 90), ('l', 50), ('xl', 40), ('x', 10), ('ix', 9), ('v', 5), ('iv', 4), ('i', 1)]
def int_to_roman(i):
parts = []
num = i
for letter, value in _TABLE:
while value <= num:
num -= value
parts.append(letter)
return ''.join(parts)
def roman_to_int(s):
result = 0
rnstr = s
for letter, value in _TABLE:
while rnstr.startswith(letter):
result += value
rnstr = rnstr[len(letter):]
return result
_pattern = r'''\(([^\)]*)\)'''
_tup_pattern = re.compile(_pattern,re.IGNORECASE)
def _parseNames(numpages, data):
data = unicode_str(data)
pagenames = []
pageMap = ''
for i in range(numpages):
pagenames.append(None)
for m in re.finditer(_tup_pattern, data):
tup = m.group(1)
if pageMap != '':
pageMap += ','
pageMap += '(' + tup + ')'
spos, nametype, svalue = tup.split(",")
# print(spos, nametype, svalue)
if nametype == 'a' or nametype == 'r':
svalue = int(svalue)
spos = int(spos)
for i in range(spos - 1, numpages):
if nametype == 'r':
pname = int_to_roman(svalue)
svalue += 1
elif nametype == 'a':
pname = "%s" % svalue
svalue += 1
elif nametype == 'c':
sp = svalue.find('|')
if sp == -1:
pname = svalue
else:
pname = svalue[0:sp]
svalue = svalue[sp+1:]
else:
print("Error: unknown page numbering type", nametype)
pagenames[i] = pname
return pagenames, pageMap
class PageMapProcessor:
def __init__(self, mh, data):
self.mh = mh
self.data = data
self.pagenames = []
self.pageoffsets = []
self.pageMap = ''
self.pm_len = 0
self.pm_nn = 0
self.pn_bits = 0
self.pmoff = None
self.pmstr = ''
print("Extracting Page Map Information")
rev_len, = struct.unpack_from(b'>L', self.data, 0x10)
# skip over header, revision string length data, and revision string
ptr = 0x14 + rev_len
pm_1, self.pm_len, self.pm_nn, self.pm_bits = struct.unpack_from(b'>4H', self.data, ptr)
# print(pm_1, self.pm_len, self.pm_nn, self.pm_bits)
self.pmstr = self.data[ptr+8:ptr+8+self.pm_len]
self.pmoff = self.data[ptr+8+self.pm_len:]
offsize = b">L"
offwidth = 4
if self.pm_bits == 16:
offsize = b">H"
offwidth = 2
ptr = 0
for i in range(self.pm_nn):
od, = struct.unpack_from(offsize, self.pmoff, ptr)
ptr += offwidth
self.pageoffsets.append(od)
self.pagenames, self.pageMap = _parseNames(self.pm_nn, self.pmstr)
def getPageMap(self):
return self.pageMap
def getNames(self):
return self.pagenames
def getOffsets(self):
return self.pageoffsets
# page-map.xml will be unicode but encoded to utf-8 immediately before being written to a file
def generateKF8PageMapXML(self, k8proc):
pagemapxml = '<page-map xmlns="http://www.idpf.org/2007/opf">\n'
for i in range(len(self.pagenames)):
pos = self.pageoffsets[i]
name = self.pagenames[i]
if name is not None and name != "":
[pn, dir, filename, skelpos, skelend, aidtext] = k8proc.getSkelInfo(pos)
idtext = unicode_str(k8proc.getPageIDTag(pos))
linktgt = unicode_str(filename)
if idtext != '':
linktgt += '#' + idtext
pagemapxml += '<page name="%s" href="%s/%s" />\n' % (name, dir, linktgt)
pagemapxml += "</page-map>\n"
return pagemapxml
def generateAPNX(self, apnx_meta):
if apnx_meta['format'] == 'MOBI_8':
content_header = '{"contentGuid":"%(contentGuid)s","asin":"%(asin)s","cdeType":"%(cdeType)s","format":"%(format)s","fileRevisionId":"1","acr":"%(acr)s"}' %apnx_meta
else:
content_header = '{"contentGuid":"%(contentGuid)s","asin":"%(asin)s","cdeType":"%(cdeType)s","fileRevisionId":"1"}' % apnx_meta
content_header = content_header.encode('utf-8')
page_header = '{"asin":"%(asin)s","pageMap":"%(pageMap)s"}' % apnx_meta
page_header = page_header.encode('utf-8')
apnx = struct.pack(b'>H',1) + struct.pack(b'>H',1)
apnx += struct.pack(b'>I', 12 + len(content_header))
apnx += struct.pack(b'>I', len(content_header))
apnx += content_header
apnx += struct.pack(b'>H', 1)
apnx += struct.pack(b'>H', len(page_header))
apnx += struct.pack(b'>H', self.pm_nn)
apnx += struct.pack(b'>H', 32)
apnx += page_header
for page in self.pageoffsets:
apnx += struct.pack(b'>L', page)
return apnx

View File

@@ -0,0 +1,120 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
from __future__ import unicode_literals, division, absolute_import, print_function
from .compatibility_utils import PY2, hexlify, bstr, bord, bchar
import datetime
if PY2:
range = xrange
# note: struct pack, unpack, unpack_from all require bytestring format
# data all the way up to at least python 2.7.5, python 3 okay with bytestring
import struct
from .unipath import pathof
DUMP = False
""" Set to True to dump all possible information. """
class unpackException(Exception):
pass
def describe(data):
txtans = ''
hexans = hexlify(data)
for i in data:
if bord(i) < 32 or bord(i) > 127:
txtans += '?'
else:
txtans += bchar(i).decode('latin-1')
return '"' + txtans + '"' + ' 0x'+ hexans
def datetimefrompalmtime(palmtime):
if palmtime > 0x7FFFFFFF:
pythondatetime = datetime.datetime(year=1904,month=1,day=1)+datetime.timedelta(seconds=palmtime)
else:
pythondatetime = datetime.datetime(year=1970,month=1,day=1)+datetime.timedelta(seconds=palmtime)
return pythondatetime
class Sectionizer:
def __init__(self, filename):
self.data = b''
with open(pathof(filename), 'rb') as f:
self.data = f.read()
self.palmheader = self.data[:78]
self.palmname = self.data[:32]
self.ident = self.palmheader[0x3C:0x3C+8]
self.num_sections, = struct.unpack_from(b'>H', self.palmheader, 76)
self.filelength = len(self.data)
sectionsdata = struct.unpack_from(bstr('>%dL' % (self.num_sections*2)), self.data, 78) + (self.filelength, 0)
self.sectionoffsets = sectionsdata[::2]
self.sectionattributes = sectionsdata[1::2]
self.sectiondescriptions = ["" for x in range(self.num_sections+1)]
self.sectiondescriptions[-1] = "File Length Only"
return
def dumpsectionsinfo(self):
print("Section Offset Length UID Attribs Description")
for i in range(self.num_sections):
print("%3d %3X 0x%07X 0x%05X % 8d % 7d %s" % (i,i, self.sectionoffsets[i], self.sectionoffsets[
i+1] - self.sectionoffsets[i], self.sectionattributes[i]&0xFFFFFF, (self.sectionattributes[i]>>24)&0xFF, self.sectiondescriptions[i]))
print("%3d %3X 0x%07X %s" %
(self.num_sections,self.num_sections, self.sectionoffsets[self.num_sections], self.sectiondescriptions[self.num_sections]))
def setsectiondescription(self, section, description):
if section < len(self.sectiondescriptions):
self.sectiondescriptions[section] = description
else:
print("Section out of range: %d, description %s" % (section,description))
def dumppalmheader(self):
print("Palm Database Header")
print("Database name: " + repr(self.palmheader[:32]))
dbattributes, = struct.unpack_from(b'>H', self.palmheader, 32)
print("Bitfield attributes: 0x%0X" % dbattributes,)
if dbattributes != 0:
print(" (",)
if (dbattributes & 2):
print("Read-only; ",)
if (dbattributes & 4):
print("Dirty AppInfoArea; ",)
if (dbattributes & 8):
print("Needs to be backed up; ",)
if (dbattributes & 16):
print("OK to install over newer; ",)
if (dbattributes & 32):
print("Reset after installation; ",)
if (dbattributes & 64):
print("No copying by PalmPilot beaming; ",)
print(")")
else:
print("")
print("File version: %d" % struct.unpack_from(b'>H', self.palmheader, 34)[0])
dbcreation, = struct.unpack_from(b'>L', self.palmheader, 36)
print("Creation Date: " + str(datetimefrompalmtime(dbcreation))+ (" (0x%0X)" % dbcreation))
dbmodification, = struct.unpack_from(b'>L', self.palmheader, 40)
print("Modification Date: " + str(datetimefrompalmtime(dbmodification))+ (" (0x%0X)" % dbmodification))
dbbackup, = struct.unpack_from(b'>L', self.palmheader, 44)
if dbbackup != 0:
print("Backup Date: " + str(datetimefrompalmtime(dbbackup))+ (" (0x%0X)" % dbbackup))
print("Modification No.: %d" % struct.unpack_from(b'>L', self.palmheader, 48)[0])
print("App Info offset: 0x%0X" % struct.unpack_from(b'>L', self.palmheader, 52)[0])
print("Sort Info offset: 0x%0X" % struct.unpack_from(b'>L', self.palmheader, 56)[0])
print("Type/Creator: %s/%s" % (repr(self.palmheader[60:64]), repr(self.palmheader[64:68])))
print("Unique seed: 0x%0X" % struct.unpack_from(b'>L', self.palmheader, 68)[0])
expectedzero, = struct.unpack_from(b'>L', self.palmheader, 72)
if expectedzero != 0:
print("Should be zero but isn't: %d" % struct.unpack_from(b'>L', self.palmheader, 72)[0])
print("Number of sections: %d" % struct.unpack_from(b'>H', self.palmheader, 76)[0])
return
def loadSection(self, section):
before, after = self.sectionoffsets[section:section+2]
return self.data[before:after]

438
lector/KindleUnpack/mobi_split.py Executable file
View File

@@ -0,0 +1,438 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
from __future__ import unicode_literals, division, absolute_import, print_function
import struct
# note: struct pack, unpack, unpack_from all require bytestring format
# data all the way up to at least python 2.7.5, python 3 okay with bytestring
from .unipath import pathof
# important pdb header offsets
unique_id_seed = 68
number_of_pdb_records = 76
# important palmdoc header offsets
book_length = 4
book_record_count = 8
first_pdb_record = 78
# important rec0 offsets
length_of_book = 4
mobi_header_base = 16
mobi_header_length = 20
mobi_type = 24
mobi_version = 36
first_non_text = 80
title_offset = 84
first_resc_record = 108
first_content_index = 192
last_content_index = 194
kf8_fdst_index = 192 # for KF8 mobi headers
fcis_index = 200
flis_index = 208
srcs_index = 224
srcs_count = 228
primary_index = 244
datp_index = 256
huffoff = 112
hufftbloff = 120
def getint(datain,ofs,sz=b'L'):
i, = struct.unpack_from(b'>'+sz,datain,ofs)
return i
def writeint(datain,ofs,n,len=b'L'):
if len==b'L':
return datain[:ofs]+struct.pack(b'>L',n)+datain[ofs+4:]
else:
return datain[:ofs]+struct.pack(b'>H',n)+datain[ofs+2:]
def getsecaddr(datain,secno):
nsec = getint(datain,number_of_pdb_records,b'H')
assert secno>=0 & secno<nsec,'secno %d out of range (nsec=%d)'%(secno,nsec)
secstart = getint(datain,first_pdb_record+secno*8)
if secno == nsec-1:
secend = len(datain)
else:
secend = getint(datain,first_pdb_record+(secno+1)*8)
return secstart,secend
def readsection(datain,secno):
secstart, secend = getsecaddr(datain,secno)
return datain[secstart:secend]
def writesection(datain,secno,secdata): # overwrite, accounting for different length
# dataout = deletesectionrange(datain,secno, secno)
# return insertsection(dataout, secno, secdata)
datalst = []
nsec = getint(datain,number_of_pdb_records,b'H')
zerosecstart,zerosecend = getsecaddr(datain,0)
secstart,secend = getsecaddr(datain,secno)
dif = len(secdata) - (secend - secstart)
datalst.append(datain[:unique_id_seed])
datalst.append(struct.pack(b'>L',2*nsec+1))
datalst.append(datain[unique_id_seed+4:number_of_pdb_records])
datalst.append(struct.pack(b'>H',nsec))
newstart = zerosecstart
for i in range(0,secno):
ofs, flgval = struct.unpack_from(b'>2L',datain,first_pdb_record+i*8)
datalst.append(struct.pack(b'>L',ofs) + struct.pack(b'>L', flgval))
datalst.append(struct.pack(b'>L', secstart) + struct.pack(b'>L', (2*secno)))
for i in range(secno+1,nsec):
ofs, flgval = struct.unpack_from(b'>2L',datain,first_pdb_record+i*8)
ofs = ofs + dif
datalst.append(struct.pack(b'>L',ofs) + struct.pack(b'>L',flgval))
lpad = newstart - (first_pdb_record + 8*nsec)
if lpad > 0:
datalst.append(b'\0' * lpad)
datalst.append(datain[zerosecstart:secstart])
datalst.append(secdata)
datalst.append(datain[secend:])
dataout = b''.join(datalst)
return dataout
def nullsection(datain,secno): # make it zero-length without deleting it
datalst = []
nsec = getint(datain,number_of_pdb_records,b'H')
secstart, secend = getsecaddr(datain,secno)
zerosecstart, zerosecend = getsecaddr(datain, 0)
dif = secend-secstart
datalst.append(datain[:first_pdb_record])
for i in range(0,secno+1):
ofs, flgval = struct.unpack_from(b'>2L',datain,first_pdb_record+i*8)
datalst.append(struct.pack(b'>L',ofs) + struct.pack(b'>L', flgval))
for i in range(secno+1, nsec):
ofs, flgval = struct.unpack_from(b'>2L',datain,first_pdb_record+i*8)
ofs = ofs - dif
datalst.append(struct.pack(b'>L',ofs) + struct.pack(b'>L',flgval))
lpad = zerosecstart - (first_pdb_record + 8*nsec)
if lpad > 0:
datalst.append(b'\0' * lpad)
datalst.append(datain[zerosecstart: secstart])
datalst.append(datain[secend:])
dataout = b''.join(datalst)
return dataout
def deletesectionrange(datain,firstsec,lastsec): # delete a range of sections
datalst = []
firstsecstart,firstsecend = getsecaddr(datain,firstsec)
lastsecstart,lastsecend = getsecaddr(datain,lastsec)
zerosecstart, zerosecend = getsecaddr(datain, 0)
dif = lastsecend - firstsecstart + 8*(lastsec-firstsec+1)
nsec = getint(datain,number_of_pdb_records,b'H')
datalst.append(datain[:unique_id_seed])
datalst.append(struct.pack(b'>L',2*(nsec-(lastsec-firstsec+1))+1))
datalst.append(datain[unique_id_seed+4:number_of_pdb_records])
datalst.append(struct.pack(b'>H',nsec-(lastsec-firstsec+1)))
newstart = zerosecstart - 8*(lastsec-firstsec+1)
for i in range(0,firstsec):
ofs, flgval = struct.unpack_from(b'>2L',datain,first_pdb_record+i*8)
ofs = ofs-8*(lastsec-firstsec+1)
datalst.append(struct.pack(b'>L',ofs) + struct.pack(b'>L', flgval))
for i in range(lastsec+1,nsec):
ofs, flgval = struct.unpack_from(b'>2L',datain,first_pdb_record+i*8)
ofs = ofs - dif
flgval = 2*(i-(lastsec-firstsec+1))
datalst.append(struct.pack(b'>L',ofs) + struct.pack(b'>L',flgval))
lpad = newstart - (first_pdb_record + 8*(nsec - (lastsec - firstsec + 1)))
if lpad > 0:
datalst.append(b'\0' * lpad)
datalst.append(datain[zerosecstart:firstsecstart])
datalst.append(datain[lastsecend:])
dataout = b''.join(datalst)
return dataout
def insertsection(datain,secno,secdata): # insert a new section
datalst = []
nsec = getint(datain,number_of_pdb_records,b'H')
# print("inserting secno" , secno, "into" ,nsec, "sections")
secstart,secend = getsecaddr(datain,secno)
zerosecstart,zerosecend = getsecaddr(datain,0)
dif = len(secdata)
datalst.append(datain[:unique_id_seed])
datalst.append(struct.pack(b'>L',2*(nsec+1)+1))
datalst.append(datain[unique_id_seed+4:number_of_pdb_records])
datalst.append(struct.pack(b'>H',nsec+1))
newstart = zerosecstart + 8
for i in range(0,secno):
ofs, flgval = struct.unpack_from(b'>2L',datain,first_pdb_record+i*8)
ofs += 8
datalst.append(struct.pack(b'>L',ofs) + struct.pack(b'>L', flgval))
datalst.append(struct.pack(b'>L', secstart + 8) + struct.pack(b'>L', (2*secno)))
for i in range(secno,nsec):
ofs, flgval = struct.unpack_from(b'>2L',datain,first_pdb_record+i*8)
ofs = ofs + dif + 8
flgval = 2*(i+1)
datalst.append(struct.pack(b'>L',ofs) + struct.pack(b'>L',flgval))
lpad = newstart - (first_pdb_record + 8*(nsec + 1))
if lpad > 0:
datalst.append(b'\0' * lpad)
datalst.append(datain[zerosecstart:secstart])
datalst.append(secdata)
datalst.append(datain[secstart:])
dataout = b''.join(datalst)
return dataout
def insertsectionrange(sectionsource,firstsec,lastsec,sectiontarget,targetsec): # insert a range of sections
# print("inserting secno" , firstsec, "to", lastsec, "into" ,targetsec, "sections")
# dataout = sectiontarget
# for idx in range(lastsec,firstsec-1,-1):
# dataout = insertsection(dataout,targetsec,readsection(sectionsource,idx))
# return dataout
datalst = []
nsec = getint(sectiontarget,number_of_pdb_records,b'H')
zerosecstart, zerosecend = getsecaddr(sectiontarget,0)
insstart, nul = getsecaddr(sectiontarget,targetsec)
nins = lastsec - firstsec + 1
srcstart, nul = getsecaddr(sectionsource,firstsec)
nul, srcend = getsecaddr(sectionsource,lastsec)
newstart = zerosecstart + 8*nins
datalst.append(sectiontarget[:unique_id_seed])
datalst.append(struct.pack(b'>L',2*(nsec+nins)+1))
datalst.append(sectiontarget[unique_id_seed+4:number_of_pdb_records])
datalst.append(struct.pack(b'>H',nsec+nins))
for i in range(0,targetsec):
ofs, flgval = struct.unpack_from(b'>2L',sectiontarget,first_pdb_record+i*8)
ofsnew = ofs + 8*nins
flgvalnew = flgval
datalst.append(struct.pack(b'>L',ofsnew) + struct.pack(b'>L', flgvalnew))
# print(ofsnew, flgvalnew, ofs, flgval)
srcstart0, nul = getsecaddr(sectionsource,firstsec)
for i in range(nins):
isrcstart, nul = getsecaddr(sectionsource,firstsec+i)
ofsnew = insstart + (isrcstart-srcstart0) + 8*nins
flgvalnew = 2*(targetsec+i)
datalst.append(struct.pack(b'>L',ofsnew) + struct.pack(b'>L', flgvalnew))
# print(ofsnew, flgvalnew)
dif = srcend - srcstart
for i in range(targetsec,nsec):
ofs, flgval = struct.unpack_from(b'>2L',sectiontarget,first_pdb_record+i*8)
ofsnew = ofs + dif + 8*nins
flgvalnew = 2*(i+nins)
datalst.append(struct.pack(b'>L',ofsnew) + struct.pack(b'>L',flgvalnew))
# print(ofsnew, flgvalnew, ofs, flgval)
lpad = newstart - (first_pdb_record + 8*(nsec + nins))
if lpad > 0:
datalst.append(b'\0' * lpad)
datalst.append(sectiontarget[zerosecstart:insstart])
datalst.append(sectionsource[srcstart:srcend])
datalst.append(sectiontarget[insstart:])
dataout = b''.join(datalst)
return dataout
def get_exth_params(rec0):
ebase = mobi_header_base + getint(rec0,mobi_header_length)
elen = getint(rec0,ebase+4)
enum = getint(rec0,ebase+8)
return ebase,elen,enum
def add_exth(rec0,exth_num,exth_bytes):
ebase,elen,enum = get_exth_params(rec0)
newrecsize = 8+len(exth_bytes)
newrec0 = rec0[0:ebase+4]+struct.pack(b'>L',elen+newrecsize)+struct.pack(b'>L',enum+1)+\
struct.pack(b'>L',exth_num)+struct.pack(b'>L',newrecsize)+exth_bytes+rec0[ebase+12:]
newrec0 = writeint(newrec0,title_offset,getint(newrec0,title_offset)+newrecsize)
return newrec0
def read_exth(rec0,exth_num):
exth_values = []
ebase,elen,enum = get_exth_params(rec0)
ebase = ebase+12
while enum>0:
exth_id = getint(rec0,ebase)
if exth_id == exth_num:
# We might have multiple exths, so build a list.
exth_values.append(rec0[ebase+8:ebase+getint(rec0,ebase+4)])
enum = enum-1
ebase = ebase+getint(rec0,ebase+4)
return exth_values
def write_exth(rec0,exth_num,exth_bytes):
ebase,elen,enum = get_exth_params(rec0)
ebase_idx = ebase+12
enum_idx = enum
while enum_idx>0:
exth_id = getint(rec0,ebase_idx)
if exth_id == exth_num:
dif = len(exth_bytes)+8-getint(rec0,ebase_idx+4)
newrec0 = rec0
if dif != 0:
newrec0 = writeint(newrec0,title_offset,getint(newrec0,title_offset)+dif)
return newrec0[:ebase+4]+struct.pack(b'>L',elen+len(exth_bytes)+8-getint(rec0,ebase_idx+4))+\
struct.pack(b'>L',enum)+rec0[ebase+12:ebase_idx+4]+\
struct.pack(b'>L',len(exth_bytes)+8)+exth_bytes+\
rec0[ebase_idx+getint(rec0,ebase_idx+4):]
enum_idx = enum_idx-1
ebase_idx = ebase_idx+getint(rec0,ebase_idx+4)
return rec0
def del_exth(rec0,exth_num):
ebase,elen,enum = get_exth_params(rec0)
ebase_idx = ebase+12
enum_idx = 0
while enum_idx < enum:
exth_id = getint(rec0,ebase_idx)
exth_size = getint(rec0,ebase_idx+4)
if exth_id == exth_num:
newrec0 = rec0
newrec0 = writeint(newrec0,title_offset,getint(newrec0,title_offset)-exth_size)
newrec0 = newrec0[:ebase_idx]+newrec0[ebase_idx+exth_size:]
newrec0 = newrec0[0:ebase+4]+struct.pack(b'>L',elen-exth_size)+struct.pack(b'>L',enum-1)+newrec0[ebase+12:]
return newrec0
enum_idx += 1
ebase_idx = ebase_idx+exth_size
return rec0
class mobi_split:
def __init__(self, infile):
datain = b''
with open(pathof(infile), 'rb') as f:
datain = f.read()
datain_rec0 = readsection(datain,0)
ver = getint(datain_rec0,mobi_version)
self.combo = (ver!=8)
if not self.combo:
return
exth121 = read_exth(datain_rec0,121)
if len(exth121) == 0:
self.combo = False
return
else:
# only pay attention to first exth121
# (there should only be one)
datain_kf8, = struct.unpack_from(b'>L',exth121[0],0)
if datain_kf8 == 0xffffffff:
self.combo = False
return
datain_kfrec0 =readsection(datain,datain_kf8)
# create the standalone mobi7
num_sec = getint(datain,number_of_pdb_records,b'H')
# remove BOUNDARY up to but not including ELF record
self.result_file7 = deletesectionrange(datain,datain_kf8-1,num_sec-2)
# check if there are SRCS records and delete them
srcs = getint(datain_rec0,srcs_index)
num_srcs = getint(datain_rec0,srcs_count)
if srcs != 0xffffffff and num_srcs > 0:
self.result_file7 = deletesectionrange(self.result_file7,srcs,srcs+num_srcs-1)
datain_rec0 = writeint(datain_rec0,srcs_index,0xffffffff)
datain_rec0 = writeint(datain_rec0,srcs_count,0)
# reset the EXTH 121 KF8 Boundary meta data to 0xffffffff
datain_rec0 = write_exth(datain_rec0,121, struct.pack(b'>L', 0xffffffff))
# datain_rec0 = del_exth(datain_rec0,121)
# datain_rec0 = del_exth(datain_rec0,534)
# don't remove the EXTH 125 KF8 Count of Resources, seems to be present in mobi6 files as well
# set the EXTH 129 KF8 Masthead / Cover Image string to the null string
datain_rec0 = write_exth(datain_rec0,129, b'')
# don't remove the EXTH 131 KF8 Unidentified Count, seems to be present in mobi6 files as well
# need to reset flags stored in 0x80-0x83
# old mobi with exth: 0x50, mobi7 part with exth: 0x1850, mobi8 part with exth: 0x1050
# Bit Flags
# 0x1000 = Bit 12 indicates if embedded fonts are used or not
# 0x0800 = means this Header points to *shared* images/resource/fonts ??
# 0x0080 = unknown new flag, why is this now being set by Kindlegen 2.8?
# 0x0040 = exth exists
# 0x0010 = Not sure but this is always set so far
fval, = struct.unpack_from(b'>L',datain_rec0, 0x80)
# need to remove flag 0x0800 for KindlePreviewer 2.8 and unset Bit 12 for embedded fonts
fval = fval & 0x07FF
datain_rec0 = datain_rec0[:0x80] + struct.pack(b'>L',fval) + datain_rec0[0x84:]
self.result_file7 = writesection(self.result_file7,0,datain_rec0)
# no need to replace kf8 style fcis with mobi 7 one
# fcis_secnum, = struct.unpack_from(b'>L',datain_rec0, 0xc8)
# if fcis_secnum != 0xffffffff:
# fcis_info = readsection(datain, fcis_secnum)
# text_len, = struct.unpack_from(b'>L', fcis_info, 0x14)
# new_fcis = 'FCIS\x00\x00\x00\x14\x00\x00\x00\x10\x00\x00\x00\x01\x00\x00\x00\x00'
# new_fcis += struct.pack(b'>L',text_len)
# new_fcis += '\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00\x08\x00\x01\x00\x01\x00\x00\x00\x00'
# self.result_file7 = writesection(self.result_file7, fcis_secnum, new_fcis)
firstimage = getint(datain_rec0,first_resc_record)
lastimage = getint(datain_rec0,last_content_index,b'H')
# print("Old First Image, last Image", firstimage,lastimage)
if lastimage == 0xffff:
# find the lowest of the next sections and copy up to that.
ofs_list = [(fcis_index,b'L'),(flis_index,b'L'),(datp_index,b'L'),(hufftbloff, b'L')]
for ofs,sz in ofs_list:
n = getint(datain_rec0,ofs,sz)
# print("n",n)
if n > 0 and n < lastimage:
lastimage = n-1
print("First Image, last Image", firstimage,lastimage)
# Try to null out FONT and RES, but leave the (empty) PDB record so image refs remain valid
for i in range(firstimage,lastimage):
imgsec = readsection(self.result_file7,i)
if imgsec[0:4] in [b'RESC',b'FONT']:
self.result_file7 = nullsection(self.result_file7,i)
# mobi7 finished
# create standalone mobi8
self.result_file8 = deletesectionrange(datain,0,datain_kf8-1)
target = getint(datain_kfrec0,first_resc_record)
self.result_file8 = insertsectionrange(datain,firstimage,lastimage,self.result_file8,target)
datain_kfrec0 =readsection(self.result_file8,0)
# Only keep the correct EXTH 116 StartOffset, KG 2.5 carries over the one from the mobi7 part, which then points at garbage in the mobi8 part, and confuses FW 3.4
kf8starts = read_exth(datain_kfrec0,116)
# If we have multiple StartOffset, keep only the last one
kf8start_count = len(kf8starts)
while kf8start_count > 1:
kf8start_count -= 1
datain_kfrec0 = del_exth(datain_kfrec0,116)
# update the EXTH 125 KF8 Count of Images/Fonts/Resources
datain_kfrec0 = write_exth(datain_kfrec0,125,struct.pack(b'>L',lastimage-firstimage+1))
# need to reset flags stored in 0x80-0x83
# old mobi with exth: 0x50, mobi7 part with exth: 0x1850, mobi8 part with exth: 0x1050
# standalone mobi8 with exth: 0x0050
# Bit Flags
# 0x1000 = Bit 12 indicates if embedded fonts are used or not
# 0x0800 = means this Header points to *shared* images/resource/fonts ??
# 0x0080 = unknown new flag, why is this now being set by Kindlegen 2.8?
# 0x0040 = exth exists
# 0x0010 = Not sure but this is always set so far
fval, = struct.unpack_from('>L',datain_kfrec0, 0x80)
fval = fval & 0x1FFF
fval |= 0x0800
datain_kfrec0 = datain_kfrec0[:0x80] + struct.pack(b'>L',fval) + datain_kfrec0[0x84:]
# properly update other index pointers that have been shifted by the insertion of images
ofs_list = [(kf8_fdst_index,b'L'),(fcis_index,b'L'),(flis_index,b'L'),(datp_index,b'L'),(hufftbloff, b'L')]
for ofs,sz in ofs_list:
n = getint(datain_kfrec0,ofs,sz)
if n != 0xffffffff:
datain_kfrec0 = writeint(datain_kfrec0,ofs,n+lastimage-firstimage+1,sz)
self.result_file8 = writesection(self.result_file8,0,datain_kfrec0)
# no need to replace kf8 style fcis with mobi 7 one
# fcis_secnum, = struct.unpack_from(b'>L',datain_kfrec0, 0xc8)
# if fcis_secnum != 0xffffffff:
# fcis_info = readsection(self.result_file8, fcis_secnum)
# text_len, = struct.unpack_from(b'>L', fcis_info, 0x14)
# new_fcis = 'FCIS\x00\x00\x00\x14\x00\x00\x00\x10\x00\x00\x00\x01\x00\x00\x00\x00'
# new_fcis += struct.pack(b'>L',text_len)
# new_fcis += '\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00\x08\x00\x01\x00\x01\x00\x00\x00\x00'
# self.result_file8 = writesection(self.result_file8, fcis_secnum, new_fcis)
# mobi8 finished
def getResult8(self):
return self.result_file8
def getResult7(self):
return self.result_file7

View File

@@ -0,0 +1,131 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
from __future__ import unicode_literals, division, absolute_import, print_function
from .compatibility_utils import PY2, bchr, lmap, bstr
if PY2:
range = xrange
import struct
# note: struct pack, unpack, unpack_from all require bytestring format
# data all the way up to at least python 2.7.5, python 3 okay with bytestring
class unpackException(Exception):
pass
class UncompressedReader:
def unpack(self, data):
return data
class PalmdocReader:
def unpack(self, i):
o, p = b'', 0
while p < len(i):
# for python 3 must use slice since i[p] returns int while slice returns character
c = ord(i[p:p+1])
p += 1
if (c >= 1 and c <= 8):
o += i[p:p+c]
p += c
elif (c < 128):
o += bchr(c)
elif (c >= 192):
o += b' ' + bchr(c ^ 128)
else:
if p < len(i):
c = (c << 8) | ord(i[p:p+1])
p += 1
m = (c >> 3) & 0x07ff
n = (c & 7) + 3
if (m > n):
o += o[-m:n-m]
else:
for _ in range(n):
# because of completely ass-backwards decision by python mainters for python 3
# we must use slice for bytes as i[p] returns int while slice returns character
if m == 1:
o += o[-m:]
else:
o += o[-m:-m+1]
return o
class HuffcdicReader:
q = struct.Struct(b'>Q').unpack_from
def loadHuff(self, huff):
if huff[0:8] != b'HUFF\x00\x00\x00\x18':
raise unpackException('invalid huff header')
off1, off2 = struct.unpack_from(b'>LL', huff, 8)
def dict1_unpack(v):
codelen, term, maxcode = v&0x1f, v&0x80, v>>8
assert codelen != 0
if codelen <= 8:
assert term
maxcode = ((maxcode + 1) << (32 - codelen)) - 1
return (codelen, term, maxcode)
self.dict1 = lmap(dict1_unpack, struct.unpack_from(b'>256L', huff, off1))
dict2 = struct.unpack_from(b'>64L', huff, off2)
self.mincode, self.maxcode = (), ()
for codelen, mincode in enumerate((0,) + dict2[0::2]):
self.mincode += (mincode << (32 - codelen), )
for codelen, maxcode in enumerate((0,) + dict2[1::2]):
self.maxcode += (((maxcode + 1) << (32 - codelen)) - 1, )
self.dictionary = []
def loadCdic(self, cdic):
if cdic[0:8] != b'CDIC\x00\x00\x00\x10':
raise unpackException('invalid cdic header')
phrases, bits = struct.unpack_from(b'>LL', cdic, 8)
n = min(1<<bits, phrases-len(self.dictionary))
h = struct.Struct(b'>H').unpack_from
def getslice(off):
blen, = h(cdic, 16+off)
slice = cdic[18+off:18+off+(blen&0x7fff)]
return (slice, blen&0x8000)
self.dictionary += lmap(getslice, struct.unpack_from(bstr('>%dH' % n), cdic, 16))
def unpack(self, data):
q = HuffcdicReader.q
bitsleft = len(data) * 8
data += b"\x00\x00\x00\x00\x00\x00\x00\x00"
pos = 0
x, = q(data, pos)
n = 32
s = b''
while True:
if n <= 0:
pos += 4
x, = q(data, pos)
n += 32
code = (x >> n) & ((1 << 32) - 1)
codelen, term, maxcode = self.dict1[code >> 24]
if not term:
while code < self.mincode[codelen]:
codelen += 1
maxcode = self.maxcode[codelen]
n -= codelen
bitsleft -= codelen
if bitsleft < 0:
break
r = (maxcode - code) >> (32 - codelen)
slice, flag = self.dictionary[r]
if not flag:
self.dictionary[r] = None
slice = self.unpack(slice)
self.dictionary[r] = (slice, 1)
s += slice
return s

View File

@@ -0,0 +1,191 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
# flake8: noqa
from __future__ import unicode_literals, division, absolute_import, print_function
from .compatibility_utils import PY2, text_type, bchr, bord
import binascii
if PY2:
range = xrange
from itertools import cycle
def getLanguage(langID, sublangID):
mobilangdict = {
54 : {0 : 'af'}, # Afrikaans
28 : {0 : 'sq'}, # Albanian
1 : {0 : 'ar' , 5 : 'ar-dz' , 15 : 'ar-bh' , 3 : 'ar-eg' , 2 : 'ar-iq', 11 : 'ar-jo' , 13 : 'ar-kw' , 12 : 'ar-lb' , 4: 'ar-ly',
6 : 'ar-ma' , 8 : 'ar-om' , 16 : 'ar-qa' , 1 : 'ar-sa' , 10 : 'ar-sy' , 7 : 'ar-tn' , 14 : 'ar-ae' , 9 : 'ar-ye'},
# Arabic, Arabic (Algeria), Arabic (Bahrain), Arabic (Egypt), Arabic
# (Iraq), Arabic (Jordan), Arabic (Kuwait), Arabic (Lebanon), Arabic
# (Libya), Arabic (Morocco), Arabic (Oman), Arabic (Qatar), Arabic
# (Saudi Arabia), Arabic (Syria), Arabic (Tunisia), Arabic (United Arab
# Emirates), Arabic (Yemen)
43 : {0 : 'hy'}, # Armenian
77 : {0 : 'as'}, # Assamese
44 : {0 : 'az'}, # "Azeri (IANA: Azerbaijani)
45 : {0 : 'eu'}, # Basque
35 : {0 : 'be'}, # Belarusian
69 : {0 : 'bn'}, # Bengali
2 : {0 : 'bg'}, # Bulgarian
3 : {0 : 'ca'}, # Catalan
4 : {0 : 'zh' , 3 : 'zh-hk' , 2 : 'zh-cn' , 4 : 'zh-sg' , 1 : 'zh-tw'},
# Chinese, Chinese (Hong Kong), Chinese (PRC), Chinese (Singapore), Chinese (Taiwan)
26 : {0 : 'hr', 3 : 'sr'}, # Croatian, Serbian
5 : {0 : 'cs'}, # Czech
6 : {0 : 'da'}, # Danish
19 : {0: 'nl', 1 : 'nl' , 2 : 'nl-be'}, # Dutch / Flemish, Dutch (Belgium)
9 : {0: 'en', 1 : 'en' , 3 : 'en-au' , 40 : 'en-bz' , 4 : 'en-ca' , 6 : 'en-ie' , 8 : 'en-jm' , 5 : 'en-nz' , 13 : 'en-ph' ,
7 : 'en-za' , 11 : 'en-tt' , 2 : 'en-gb', 1 : 'en-us' , 12 : 'en-zw'},
# English, English (Australia), English (Belize), English (Canada),
# English (Ireland), English (Jamaica), English (New Zealand), English
# (Philippines), English (South Africa), English (Trinidad), English
# (United Kingdom), English (United States), English (Zimbabwe)
37 : {0 : 'et'}, # Estonian
56 : {0 : 'fo'}, # Faroese
41 : {0 : 'fa'}, # Farsi / Persian
11 : {0 : 'fi'}, # Finnish
12 : {0 : 'fr', 1 : 'fr' , 2 : 'fr-be' , 3 : 'fr-ca' , 5 : 'fr-lu' , 6 : 'fr-mc' , 4 : 'fr-ch'},
# French, French (Belgium), French (Canada), French (Luxembourg), French (Monaco), French (Switzerland)
55 : {0 : 'ka'}, # Georgian
7 : {0 : 'de', 1 : 'de' , 3 : 'de-at' , 5 : 'de-li' , 4 : 'de-lu' , 2 : 'de-ch'},
# German, German (Austria), German (Liechtenstein), German (Luxembourg), German (Switzerland)
8 : {0 : 'el'}, # Greek, Modern (1453-)
71 : {0 : 'gu'}, # Gujarati
13 : {0 : 'he'}, # Hebrew (also code 'iw'?)
57 : {0 : 'hi'}, # Hindi
14 : {0 : 'hu'}, # Hungarian
15 : {0 : 'is'}, # Icelandic
33 : {0 : 'id'}, # Indonesian
16 : {0 : 'it', 1 : 'it' , 2 : 'it-ch'}, # Italian, Italian (Switzerland)
17 : {0 : 'ja'}, # Japanese
75 : {0 : 'kn'}, # Kannada
63 : {0 : 'kk'}, # Kazakh
87 : {0 : 'x-kok'}, # Konkani (real language code is 'kok'?)
18 : {0 : 'ko'}, # Korean
38 : {0 : 'lv'}, # Latvian
39 : {0 : 'lt'}, # Lithuanian
47 : {0 : 'mk'}, # Macedonian
62 : {0 : 'ms'}, # Malay
76 : {0 : 'ml'}, # Malayalam
58 : {0 : 'mt'}, # Maltese
78 : {0 : 'mr'}, # Marathi
97 : {0 : 'ne'}, # Nepali
20 : {0 : 'no'}, # Norwegian
72 : {0 : 'or'}, # Oriya
21 : {0 : 'pl'}, # Polish
22 : {0 : 'pt', 2 : 'pt' , 1 : 'pt-br'}, # Portuguese, Portuguese (Brazil)
70 : {0 : 'pa'}, # Punjabi
23 : {0 : 'rm'}, # "Rhaeto-Romanic" (IANA: Romansh)
24 : {0 : 'ro'}, # Romanian
25 : {0 : 'ru'}, # Russian
59 : {0 : 'sz'}, # "Sami (Lappish)" (not an IANA language code)
# IANA code for "Northern Sami" is 'se'
# 'SZ' is the IANA region code for Swaziland
79 : {0 : 'sa'}, # Sanskrit
27 : {0 : 'sk'}, # Slovak
36 : {0 : 'sl'}, # Slovenian
46 : {0 : 'sb'}, # "Sorbian" (not an IANA language code)
# 'SB' is IANA region code for 'Solomon Islands'
# Lower Sorbian = 'dsb'
# Upper Sorbian = 'hsb'
# Sorbian Languages = 'wen'
10 : {0 : 'es' , 4 : 'es' , 44 : 'es-ar' , 64 : 'es-bo' , 52 : 'es-cl' , 36 : 'es-co' , 20 : 'es-cr' , 28 : 'es-do' ,
48 : 'es-ec' , 68 : 'es-sv' , 16 : 'es-gt' , 72 : 'es-hn' , 8 : 'es-mx' , 76 : 'es-ni' , 24 : 'es-pa' ,
60 : 'es-py' , 40 : 'es-pe' , 80 : 'es-pr' , 56 : 'es-uy' , 32 : 'es-ve'},
# Spanish, Spanish (Mobipocket bug?), Spanish (Argentina), Spanish
# (Bolivia), Spanish (Chile), Spanish (Colombia), Spanish (Costa Rica),
# Spanish (Dominican Republic), Spanish (Ecuador), Spanish (El
# Salvador), Spanish (Guatemala), Spanish (Honduras), Spanish (Mexico),
# Spanish (Nicaragua), Spanish (Panama), Spanish (Paraguay), Spanish
# (Peru), Spanish (Puerto Rico), Spanish (Uruguay), Spanish (Venezuela)
48 : {0 : 'sx'}, # "Sutu" (not an IANA language code)
# "Sutu" is another name for "Southern Sotho"?
# IANA code for "Southern Sotho" is 'st'
65 : {0 : 'sw'}, # Swahili
29 : {0 : 'sv' , 1 : 'sv' , 8 : 'sv-fi'}, # Swedish, Swedish (Finland)
73 : {0 : 'ta'}, # Tamil
68 : {0 : 'tt'}, # Tatar
74 : {0 : 'te'}, # Telugu
30 : {0 : 'th'}, # Thai
49 : {0 : 'ts'}, # Tsonga
50 : {0 : 'tn'}, # Tswana
31 : {0 : 'tr'}, # Turkish
34 : {0 : 'uk'}, # Ukrainian
32 : {0 : 'ur'}, # Urdu
67 : {0 : 'uz', 2 : 'uz'}, # Uzbek
42 : {0 : 'vi'}, # Vietnamese
52 : {0 : 'xh'}, # Xhosa
53 : {0 : 'zu'}, # Zulu
}
lang = "en"
if langID in mobilangdict:
subdict = mobilangdict[langID]
lang = subdict[0]
if sublangID in subdict:
lang = subdict[sublangID]
return lang
def toHex(byteList):
return binascii.hexlify(byteList)
# returns base32 bytestring
def toBase32(value, npad=4):
digits = b'0123456789ABCDEFGHIJKLMNOPQRSTUV'
num_string=b''
current = value
while current != 0:
next, remainder = divmod(current, 32)
rem_string = digits[remainder:remainder+1]
num_string = rem_string + num_string
current=next
if num_string == b'':
num_string = b'0'
pad = npad - len(num_string)
if pad > 0:
num_string = b'0' * pad + num_string
return num_string
# converts base32 string to value
def fromBase32(str_num):
if isinstance(str_num, text_type):
str_num = str_num.encode('latin-1')
scalelst = [1,32,1024,32768,1048576,33554432,1073741824,34359738368]
value = 0
j = 0
n = len(str_num)
scale = 0
for i in range(n):
c = str_num[n-i-1:n-i]
if c in b'0123456789':
v = ord(c) - ord(b'0')
else:
v = ord(c) - ord(b'A') + 10
if j < len(scalelst):
scale = scalelst[j]
else:
scale = scale * 32
j += 1
if v != 0:
value = value + (v * scale)
return value
# note: if decode a bytestring using 'latin-1' (or any other 0-255 encoding)
# in place of ascii you will get a byte to half-word or integer
# one to one mapping of values from 0 - 255
def mangle_fonts(encryption_key, data):
if isinstance(encryption_key, text_type):
encryption_key = encryption_key.encode('latin-1')
crypt = data[:1024]
key = cycle(iter(map(bord, encryption_key)))
# encrypt = ''.join([chr(ord(x)^key.next()) for x in crypt])
encrypt = b''.join([bchr(bord(x)^next(key)) for x in crypt])
return encrypt + data[1024:]

View File

@@ -0,0 +1,525 @@
#! /usr/bin/python
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
# this program works in concert with the output from KindleUnpack
'''
Convert from Mobi ML to XHTML
'''
import os
import sys
import re
SPECIAL_HANDLING_TAGS = {
'?xml' : ('xmlheader', -1),
'!--' : ('comment', -3),
'!DOCTYPE' : ('doctype', -1),
}
SPECIAL_HANDLING_TYPES = ['xmlheader', 'doctype', 'comment']
SELF_CLOSING_TAGS = ['br' , 'hr', 'input', 'img', 'image', 'meta', 'spacer', 'link', 'frame', 'base', 'col', 'reference']
class MobiMLConverter(object):
PAGE_BREAK_PAT = re.compile(r'(<[/]{0,1}mbp:pagebreak\s*[/]{0,1}>)+', re.IGNORECASE)
IMAGE_ATTRS = ('lowrecindex', 'recindex', 'hirecindex')
def __init__(self, filename):
self.base_css_rules = 'blockquote { margin: 0em 0em 0em 1.25em }\n'
self.base_css_rules += 'p { margin: 0em }\n'
self.base_css_rules += '.bold { font-weight: bold }\n'
self.base_css_rules += '.italic { font-style: italic }\n'
self.base_css_rules += '.mbp_pagebreak { page-break-after: always; margin: 0; display: block }\n'
self.tag_css_rules = {}
self.tag_css_rule_cnt = 0
self.path = []
self.filename = filename
self.wipml = open(self.filename, 'rb').read()
self.pos = 0
self.opfname = self.filename.rsplit('.',1)[0] + '.opf'
self.opos = 0
self.meta = ''
self.cssname = os.path.join(os.path.dirname(self.filename),'styles.css')
self.current_font_size = 3
self.font_history = []
def cleanup_html(self):
self.wipml = re.sub(r'<div height="0(pt|px|ex|em|%){0,1}"></div>', '', self.wipml)
self.wipml = self.wipml.replace('\r\n', '\n')
self.wipml = self.wipml.replace('> <', '>\n<')
self.wipml = self.wipml.replace('<mbp: ', '<mbp:')
# self.wipml = re.sub(r'<?xml[^>]*>', '', self.wipml)
self.wipml = self.wipml.replace('<br></br>','<br/>')
def replace_page_breaks(self):
self.wipml = self.PAGE_BREAK_PAT.sub(
'<div class="mbp_pagebreak" />',
self.wipml)
# parse leading text of ml and tag
def parseml(self):
p = self.pos
if p >= len(self.wipml):
return None
if self.wipml[p] != '<':
res = self.wipml.find('<',p)
if res == -1 :
res = len(self.wipml)
self.pos = res
return self.wipml[p:res], None
# handle comment as a special case to deal with multi-line comments
if self.wipml[p:p+4] == '<!--':
te = self.wipml.find('-->',p+1)
if te != -1:
te = te+2
else :
te = self.wipml.find('>',p+1)
ntb = self.wipml.find('<',p+1)
if ntb != -1 and ntb < te:
self.pos = ntb
return self.wipml[p:ntb], None
self.pos = te + 1
return None, self.wipml[p:te+1]
# parses string version of tag to identify its name,
# its type 'begin', 'end' or 'single',
# plus build a hashtable of its attributes
# code is written to handle the possiblity of very poor formating
def parsetag(self, s):
p = 1
# get the tag name
tname = None
ttype = None
tattr = {}
while s[p:p+1] == ' ' :
p += 1
if s[p:p+1] == '/':
ttype = 'end'
p += 1
while s[p:p+1] == ' ' :
p += 1
b = p
while s[p:p+1] not in ('>', '/', ' ', '"', "'", "\r", "\n") :
p += 1
tname=s[b:p].lower()
if tname == '!doctype':
tname = '!DOCTYPE'
# special cases
if tname in SPECIAL_HANDLING_TAGS.keys():
ttype, backstep = SPECIAL_HANDLING_TAGS[tname]
tattr['special'] = s[p:backstep]
if ttype is None:
# parse any attributes
while s.find('=',p) != -1 :
while s[p:p+1] == ' ' :
p += 1
b = p
while s[p:p+1] != '=' :
p += 1
aname = s[b:p].lower()
aname = aname.rstrip(' ')
p += 1
while s[p:p+1] == ' ' :
p += 1
if s[p:p+1] in ('"', "'") :
p = p + 1
b = p
while s[p:p+1] not in ('"', "'") :
p += 1
val = s[b:p]
p += 1
else :
b = p
while s[p:p+1] not in ('>', '/', ' ') :
p += 1
val = s[b:p]
tattr[aname] = val
# label beginning and single tags
if ttype is None:
ttype = 'begin'
if s.find(' /',p) >= 0:
ttype = 'single_ext'
elif s.find('/',p) >= 0:
ttype = 'single'
return ttype, tname, tattr
# main routine to convert from mobi markup language to html
def processml(self):
# are these really needed
html_done = False
head_done = False
body_done = False
skip = False
htmlstr = ''
self.replace_page_breaks()
self.cleanup_html()
# now parse the cleaned up ml into standard xhtml
while True:
r = self.parseml()
if not r:
break
text, tag = r
if text:
if not skip:
htmlstr += text
if tag:
ttype, tname, tattr = self.parsetag(tag)
# If we run into a DTD or xml declarations inside the body ... bail.
if tname in SPECIAL_HANDLING_TAGS.keys() and tname != 'comment' and body_done:
htmlstr += '\n</body></html>'
break
# make sure self-closing tags actually self-close
if ttype == 'begin' and tname in SELF_CLOSING_TAGS:
ttype = 'single'
# make sure any end tags of self-closing tags are discarded
if ttype == 'end' and tname in SELF_CLOSING_TAGS:
continue
# remove embedded guide and refernces from old mobis
if tname in ('guide', 'ncx', 'reference') and ttype in ('begin', 'single', 'single_ext'):
tname = 'removeme:{0}'.format(tname)
tattr = None
if tname in ('guide', 'ncx', 'reference', 'font', 'span') and ttype == 'end':
if self.path[-1] == 'removeme:{0}'.format(tname):
tname = 'removeme:{0}'.format(tname)
tattr = None
# Get rid of font tags that only have a color attribute.
if tname == 'font' and ttype in ('begin', 'single', 'single_ext'):
if 'color' in tattr.keys() and len(tattr.keys()) == 1:
tname = 'removeme:{0}'.format(tname)
tattr = None
# Get rid of empty spans in the markup.
if tname == 'span' and ttype in ('begin', 'single', 'single_ext') and not len(tattr):
tname = 'removeme:{0}'.format(tname)
# need to handle fonts outside of the normal methods
# so fonts tags won't be added to the self.path since we keep track
# of font tags separately with self.font_history
if tname == 'font' and ttype == 'begin':
# check for nested font start tags
if len(self.font_history) > 0 :
# inject a font end tag
taginfo = ('end', 'font', None)
htmlstr += self.processtag(taginfo)
self.font_history.append((ttype, tname, tattr))
# handle the current font start tag
taginfo = (ttype, tname, tattr)
htmlstr += self.processtag(taginfo)
continue
# check for nested font tags and unnest them
if tname == 'font' and ttype == 'end':
self.font_history.pop()
# handle this font end tag
taginfo = ('end', 'font', None)
htmlstr += self.processtag(taginfo)
# check if we were nested
if len(self.font_history) > 0:
# inject a copy of the most recent font start tag from history
taginfo = self.font_history[-1]
htmlstr += self.processtag(taginfo)
continue
# keep track of nesting path
if ttype == 'begin':
self.path.append(tname)
elif ttype == 'end':
if tname != self.path[-1]:
print ('improper nesting: ', self.path, tname, ttype)
if tname not in self.path:
# handle case of end tag with no beginning by injecting empty begin tag
taginfo = ('begin', tname, None)
htmlstr += self.processtag(taginfo)
print " - fixed by injecting empty start tag ", tname
self.path.append(tname)
elif len(self.path) > 1 and tname == self.path[-2]:
# handle case of dangling missing end
taginfo = ('end', self.path[-1], None)
htmlstr += self.processtag(taginfo)
print " - fixed by injecting end tag ", self.path[-1]
self.path.pop()
self.path.pop()
if tname == 'removeme:{0}'.format(tname):
if ttype in ('begin', 'single', 'single_ext'):
skip = True
else:
skip = False
else:
taginfo = (ttype, tname, tattr)
htmlstr += self.processtag(taginfo)
# handle potential issue of multiple html, head, and body sections
if tname == 'html' and ttype == 'begin' and not html_done:
htmlstr += '\n'
html_done = True
if tname == 'head' and ttype == 'begin' and not head_done:
htmlstr += '\n'
# also add in metadata and style link tags
htmlstr += self.meta
htmlstr += '<link href="styles.css" rel="stylesheet" type="text/css" />\n'
head_done = True
if tname == 'body' and ttype == 'begin' and not body_done:
htmlstr += '\n'
body_done = True
# handle issue of possibly missing html, head, and body tags
# I have not seen this but the original did something like this so ...
if not body_done:
htmlstr = '<body>\n' + htmlstr + '</body>\n'
if not head_done:
headstr = '<head>\n'
headstr += self.meta
headstr += '<link href="styles.css" rel="stylesheet" type="text/css" />\n'
headstr += '</head>\n'
htmlstr = headstr + htmlstr
if not html_done:
htmlstr = '<html>\n' + htmlstr + '</html>\n'
# finally add DOCTYPE info
htmlstr = '<?xml version="1.0"?>\n<!DOCTYPE HTML PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n' + htmlstr
css = self.base_css_rules
for cls, rule in self.tag_css_rules.items():
css += '.%s { %s }\n' % (cls, rule)
return (htmlstr, css, self.cssname)
def ensure_unit(self, raw, unit='px'):
if re.search(r'\d+$', raw) is not None:
raw += unit
return raw
# flatten possibly modified tag back to string
def taginfo_tostring(self, taginfo):
(ttype, tname, tattr) = taginfo
if ttype is None or tname is None:
return ''
if ttype == 'end':
return '</%s>' % tname
if ttype in SPECIAL_HANDLING_TYPES and tattr is not None and 'special' in tattr.keys():
info = tattr['special']
if ttype == 'comment':
return '<%s %s-->' % tname, info
else:
return '<%s %s>' % tname, info
res = []
res.append('<%s' % tname)
if tattr is not None:
for key in tattr.keys():
res.append(' %s="%s"' % (key, tattr[key]))
if ttype == 'single':
res.append('/>')
elif ttype == 'single_ext':
res.append(' />')
else :
res.append('>')
return "".join(res)
# routines to convert from mobi ml tags atributes to xhtml attributes and styles
def processtag(self, taginfo):
# Converting mobi font sizes to numerics
size_map = {
'xx-small': '1',
'x-small': '2',
'small': '3',
'medium': '4',
'large': '5',
'x-large': '6',
'xx-large': '7',
}
size_to_em_map = {
'1': '.65em',
'2': '.75em',
'3': '1em',
'4': '1.125em',
'5': '1.25em',
'6': '1.5em',
'7': '2em',
}
# current tag to work on
(ttype, tname, tattr) = taginfo
if not tattr:
tattr = {}
styles = []
if tname is None or tname.startswith('removeme'):
return ''
# have not seen an example of this yet so keep it here to be safe
# until this is better understood
if tname in ('country-region', 'place', 'placetype', 'placename',
'state', 'city', 'street', 'address', 'content'):
tname = 'div' if tname == 'content' else 'span'
for key in tattr.keys():
tattr.pop(key)
# handle general case of style, height, width, bgcolor in any tag
if 'style' in tattr.keys():
style = tattr.pop('style').strip()
if style:
styles.append(style)
if 'align' in tattr.keys():
align = tattr.pop('align').strip()
if align:
if tname in ('table', 'td', 'tr'):
pass
else:
styles.append('text-align: %s' % align)
if 'height' in tattr.keys():
height = tattr.pop('height').strip()
if height and '<' not in height and '>' not in height and re.search(r'\d+', height):
if tname in ('table', 'td', 'tr'):
pass
elif tname == 'img':
tattr['height'] = height
else:
styles.append('margin-top: %s' % self.ensure_unit(height))
if 'width' in tattr.keys():
width = tattr.pop('width').strip()
if width and re.search(r'\d+', width):
if tname in ('table', 'td', 'tr'):
pass
elif tname == 'img':
tattr['width'] = width
else:
styles.append('text-indent: %s' % self.ensure_unit(width))
if width.startswith('-'):
styles.append('margin-left: %s' % self.ensure_unit(width[1:]))
if 'bgcolor' in tattr.keys():
# no proprietary html allowed
if tname == 'div':
del tattr['bgcolor']
elif tname == 'font':
# Change font tags to span tags
tname = 'span'
if ttype in ('begin', 'single', 'single_ext'):
# move the face attribute to css font-family
if 'face' in tattr.keys():
face = tattr.pop('face').strip()
styles.append('font-family: "%s"' % face)
# Monitor the constantly changing font sizes, change them to ems and move
# them to css. The following will work for 'flat' font tags, but nested font tags
# will cause things to go wonky. Need to revert to the parent font tag's size
# when a closing tag is encountered.
if 'size' in tattr.keys():
sz = tattr.pop('size').strip().lower()
try:
float(sz)
except ValueError:
if sz in size_map.keys():
sz = size_map[sz]
else:
if sz.startswith('-') or sz.startswith('+'):
sz = self.current_font_size + float(sz)
if sz > 7:
sz = 7
elif sz < 1:
sz = 1
sz = str(int(sz))
styles.append('font-size: %s' % size_to_em_map[sz])
self.current_font_size = int(sz)
elif tname == 'img':
for attr in ('width', 'height'):
if attr in tattr:
val = tattr[attr]
if val.lower().endswith('em'):
try:
nval = float(val[:-2])
nval *= 16 * (168.451/72) # Assume this was set using the Kindle profile
tattr[attr] = "%dpx"%int(nval)
except:
del tattr[attr]
elif val.lower().endswith('%'):
del tattr[attr]
# convert the anchor tags
if 'filepos-id' in tattr:
tattr['id'] = tattr.pop('filepos-id')
if 'name' in tattr and tattr['name'] != tattr['id']:
tattr['name'] = tattr['id']
if 'filepos' in tattr:
filepos = tattr.pop('filepos')
try:
tattr['href'] = "#filepos%d" % int(filepos)
except ValueError:
pass
if styles:
ncls = None
rule = '; '.join(styles)
for sel, srule in self.tag_css_rules.items():
if srule == rule:
ncls = sel
break
if ncls is None:
self.tag_css_rule_cnt += 1
ncls = 'rule_%d' % self.tag_css_rule_cnt
self.tag_css_rules[ncls] = rule
cls = tattr.get('class', '')
cls = cls + (' ' if cls else '') + ncls
tattr['class'] = cls
# convert updated tag back to string representation
if len(tattr) == 0:
tattr = None
taginfo = (ttype, tname, tattr)
return self.taginfo_tostring(taginfo)
''' main only left in for testing outside of plugin '''
def main(argv=sys.argv):
if len(argv) != 2:
return 1
else:
infile = argv[1]
try:
print 'Converting Mobi Markup Language to XHTML'
mlc = MobiMLConverter(infile)
print 'Processing ...'
htmlstr, css, cssname = mlc.processml()
outname = infile.rsplit('.',1)[0] + '_converted.html'
file(outname, 'wb').write(htmlstr)
file(cssname, 'wb').write(css)
print 'Completed'
print 'XHTML version of book can be found at: ' + outname
except ValueError, e:
print "Error: %s" % e
return 1
return 0
if __name__ == "__main__":
sys.exit(main())

93
lector/KindleUnpack/unipath.py Executable file
View File

@@ -0,0 +1,93 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
# Copyright (c) 2014 Kevin B. Hendricks, John Schember, and Doug Massay
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification,
# are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this list of
# conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice, this list
# of conditions and the following disclaimer in the documentation and/or other materials
# provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
# SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
# WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from __future__ import unicode_literals, division, absolute_import, print_function
from .compatibility_utils import PY2, text_type, binary_type
import sys
import os
# utility routines to convert all paths to be full unicode
# Under Python 2, if a bytestring, try to convert it to unicode using sys.getfilesystemencoding
# Under Python 3, if bytes, try to convert it to unicode using os.fsencode() to decode it
# Mac OS X and Windows will happily support full unicode paths
# Linux can support full unicode paths but allows arbitrary byte paths which may be inconsistent with unicode
fsencoding = sys.getfilesystemencoding()
def pathof(s, enc=fsencoding):
if s is None:
return None
if isinstance(s, text_type):
return s
if isinstance(s, binary_type):
try:
return s.decode(enc)
except:
pass
return s
def exists(s):
return os.path.exists(pathof(s))
def isfile(s):
return os.path.isfile(pathof(s))
def isdir(s):
return os.path.isdir(pathof(s))
def mkdir(s):
return os.mkdir(pathof(s))
def listdir(s):
rv = []
for file in os.listdir(pathof(s)):
rv.append(pathof(file))
return rv
def getcwd():
if PY2:
return os.getcwdu()
return os.getcwd()
def walk(top):
top = pathof(top)
rv = []
for base, dnames, names in os.walk(top):
base = pathof(base)
for name in names:
name = pathof(name)
rv.append(relpath(os.path.join(base, name), top))
return rv
def relpath(path, start=None):
return os.path.relpath(pathof(path) , pathof(start))
def abspath(path):
return os.path.abspath(pathof(path))

View File

@@ -0,0 +1,167 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
from __future__ import unicode_literals, division, absolute_import, print_function
from .compatibility_utils import text_type
from . import unipath
from .unipath import pathof
DUMP = False
""" Set to True to dump all possible information. """
import os
import re
# note: re requites the pattern to be the exact same type as the data to be searched in python3
# but u"" is not allowed for the pattern itself only b""
import zipfile
import binascii
from .mobi_utils import mangle_fonts
class unpackException(Exception):
pass
class ZipInfo(zipfile.ZipInfo):
def __init__(self, *args, **kwargs):
if 'compress_type' in kwargs:
compress_type = kwargs.pop('compress_type')
super(ZipInfo, self).__init__(*args, **kwargs)
self.compress_type = compress_type
class fileNames:
def __init__(self, infile, outdir):
self.infile = infile
self.outdir = outdir
if not unipath.exists(self.outdir):
unipath.mkdir(self.outdir)
self.mobi7dir = os.path.join(self.outdir,'mobi7')
if not unipath.exists(self.mobi7dir):
unipath.mkdir(self.mobi7dir)
self.imgdir = os.path.join(self.mobi7dir, 'Images')
if not unipath.exists(self.imgdir):
unipath.mkdir(self.imgdir)
self.hdimgdir = os.path.join(self.outdir,'HDImages')
if not unipath.exists(self.hdimgdir):
unipath.mkdir(self.hdimgdir)
self.outbase = os.path.join(self.outdir, os.path.splitext(os.path.split(infile)[1])[0])
def getInputFileBasename(self):
return os.path.splitext(os.path.basename(self.infile))[0]
def makeK8Struct(self):
self.k8dir = os.path.join(self.outdir,'mobi8')
if not unipath.exists(self.k8dir):
unipath.mkdir(self.k8dir)
self.k8metainf = os.path.join(self.k8dir,'META-INF')
if not unipath.exists(self.k8metainf):
unipath.mkdir(self.k8metainf)
self.k8oebps = os.path.join(self.k8dir,'OEBPS')
if not unipath.exists(self.k8oebps):
unipath.mkdir(self.k8oebps)
self.k8images = os.path.join(self.k8oebps,'Images')
if not unipath.exists(self.k8images):
unipath.mkdir(self.k8images)
self.k8fonts = os.path.join(self.k8oebps,'Fonts')
if not unipath.exists(self.k8fonts):
unipath.mkdir(self.k8fonts)
self.k8styles = os.path.join(self.k8oebps,'Styles')
if not unipath.exists(self.k8styles):
unipath.mkdir(self.k8styles)
self.k8text = os.path.join(self.k8oebps,'Text')
if not unipath.exists(self.k8text):
unipath.mkdir(self.k8text)
# recursive zip creation support routine
def zipUpDir(self, myzip, tdir, localname):
currentdir = tdir
if localname != "":
currentdir = os.path.join(currentdir,localname)
list = unipath.listdir(currentdir)
for file in list:
afilename = file
localfilePath = os.path.join(localname, afilename)
realfilePath = os.path.join(currentdir,file)
if unipath.isfile(realfilePath):
myzip.write(pathof(realfilePath), pathof(localfilePath), zipfile.ZIP_DEFLATED)
elif unipath.isdir(realfilePath):
self.zipUpDir(myzip, tdir, localfilePath)
def makeEPUB(self, usedmap, obfuscate_data, uid):
bname = os.path.join(self.k8dir, self.getInputFileBasename() + '.epub')
# Create an encryption key for Adobe font obfuscation
# based on the epub's uid
if isinstance(uid,text_type):
uid = uid.encode('ascii')
if obfuscate_data:
key = re.sub(br'[^a-fA-F0-9]', b'', uid)
key = binascii.unhexlify((key + key)[:32])
# copy over all images and fonts that are actually used in the ebook
# and remove all font files from mobi7 since not supported
imgnames = unipath.listdir(self.imgdir)
for name in imgnames:
if usedmap.get(name,'not used') == 'used':
filein = os.path.join(self.imgdir,name)
if name.endswith(".ttf"):
fileout = os.path.join(self.k8fonts,name)
elif name.endswith(".otf"):
fileout = os.path.join(self.k8fonts,name)
elif name.endswith(".failed"):
fileout = os.path.join(self.k8fonts,name)
else:
fileout = os.path.join(self.k8images,name)
data = b''
with open(pathof(filein),'rb') as f:
data = f.read()
if obfuscate_data:
if name in obfuscate_data:
data = mangle_fonts(key, data)
open(pathof(fileout),'wb').write(data)
if name.endswith(".ttf") or name.endswith(".otf"):
os.remove(pathof(filein))
# opf file name hard coded to "content.opf"
container = '<?xml version="1.0" encoding="UTF-8"?>\n'
container += '<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">\n'
container += ' <rootfiles>\n'
container += '<rootfile full-path="OEBPS/content.opf" media-type="application/oebps-package+xml"/>'
container += ' </rootfiles>\n</container>\n'
fileout = os.path.join(self.k8metainf,'container.xml')
with open(pathof(fileout),'wb') as f:
f.write(container.encode('utf-8'))
if obfuscate_data:
encryption = '<encryption xmlns="urn:oasis:names:tc:opendocument:xmlns:container" \
xmlns:enc="http://www.w3.org/2001/04/xmlenc#" xmlns:deenc="http://ns.adobe.com/digitaleditions/enc">\n'
for font in obfuscate_data:
encryption += ' <enc:EncryptedData>\n'
encryption += ' <enc:EncryptionMethod Algorithm="http://ns.adobe.com/pdf/enc#RC"/>\n'
encryption += ' <enc:CipherData>\n'
encryption += ' <enc:CipherReference URI="OEBPS/Fonts/' + font + '"/>\n'
encryption += ' </enc:CipherData>\n'
encryption += ' </enc:EncryptedData>\n'
encryption += '</encryption>\n'
fileout = os.path.join(self.k8metainf,'encryption.xml')
with open(pathof(fileout),'wb') as f:
f.write(encryption.encode('utf-8'))
# ready to build epub
self.outzip = zipfile.ZipFile(pathof(bname), 'w')
# add the mimetype file uncompressed
mimetype = b'application/epub+zip'
fileout = os.path.join(self.k8dir,'mimetype')
with open(pathof(fileout),'wb') as f:
f.write(mimetype)
nzinfo = ZipInfo('mimetype', compress_type=zipfile.ZIP_STORED)
nzinfo.external_attr = 0o600 << 16 # make this a normal file
self.outzip.writestr(nzinfo, mimetype)
self.zipUpDir(self.outzip,self.k8dir,'META-INF')
self.zipUpDir(self.outzip,self.k8dir,'OEBPS')
self.outzip.close()

1197
lector/__main__.py Executable file

File diff suppressed because it is too large Load Diff

222
lector/database.py Normal file
View File

@@ -0,0 +1,222 @@
#!/usr/bin/env python3
# This file is a part of Lector, a Qt based ebook reader
# Copyright (C) 2017 BasioMeusPuga
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import os
import pickle
import sqlite3
from PyQt5 import QtCore
class DatabaseInit:
def __init__(self, location_prefix):
os.makedirs(location_prefix, exist_ok=True)
database_path = os.path.join(location_prefix, 'Lector.db')
if not os.path.exists(database_path):
self.database = sqlite3.connect(database_path)
self.create_database()
def create_database(self):
# TODO
# Add separate columns for:
# addition mode
self.database.execute(
"CREATE TABLE books \
(id INTEGER PRIMARY KEY, Title TEXT, Author TEXT, Year INTEGER, DateAdded BLOB, \
Path TEXT, Position BLOB, ISBN TEXT, Tags TEXT, Hash TEXT, LastAccessed BLOB,\
Bookmarks BLOB, CoverImage BLOB)")
# CheckState is the standard QtCore.Qt.Checked / Unchecked
self.database.execute(
"CREATE TABLE directories (id INTEGER PRIMARY KEY, Path TEXT, \
Name TEXT, Tags TEXT, CheckState INTEGER)")
self.database.commit()
self.database.close()
class DatabaseFunctions:
def __init__(self, location_prefix):
database_path = os.path.join(location_prefix, 'Lector.db')
self.database = sqlite3.connect(database_path)
def set_library_paths(self, data_iterable):
# TODO
# INSERT OR REPLACE is not working
# So this is the old fashion kitchen sink approach
self.database.execute("DELETE FROM directories")
for i in data_iterable:
path = i[0]
name = i[1]
tags = i[2]
is_checked = i[3]
sql_command = (
"INSERT OR REPLACE INTO directories (ID, Path, Name, Tags, CheckState)\
VALUES ((SELECT ID FROM directories WHERE Path = ?), ?, ?, ?, ?)")
self.database.execute(sql_command, [path, path, name, tags, is_checked])
self.database.commit()
self.database.close()
def add_to_database(self, data):
# data is expected to be a dictionary
# with keys corresponding to the book hash
# and corresponding items containing
# whatever else needs insertion
# Haha I said insertion
# Add the current datetime value to each file's database entry
# current_time = datetime.datetime.now()
current_datetime = QtCore.QDateTime().currentDateTime()
current_datetime_bin = sqlite3.Binary(pickle.dumps(current_datetime))
for i in data.items():
book_hash = i[0]
title = i[1]['title']
author = i[1]['author']
year = i[1]['year']
path = i[1]['path']
cover = i[1]['cover_image']
isbn = i[1]['isbn']
tags = i[1]['tags']
if tags:
# Is a list. Needs to be a string
tags = ', '.join([str(j) for j in tags])
else:
# Is still a list. Needs to be None.
tags = None
sql_command_add = (
"INSERT OR REPLACE INTO \
books (Title, Author, Year, DateAdded, Path, ISBN, Tags, Hash, CoverImage) \
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)")
cover_insert = None
if cover:
cover_insert = sqlite3.Binary(cover)
self.database.execute(
sql_command_add,
[title, author, year, current_datetime_bin,
path, isbn, tags, book_hash, cover_insert])
self.database.commit()
self.database.close()
def fetch_data(self, columns, table, selection_criteria, equivalence, fetch_one=False):
# columns is a tuple that will be passed as a comma separated list
# table is a string that will be used as is
# selection_criteria is a dictionary which contains the name of a column linked
# to a corresponding value for selection
# Example:
# Name and AltName are expected to be the same
# sel_dict = {
# 'Name': 'sav',
# 'AltName': 'sav'
# }
# data = DatabaseFunctions().fetch_data(('Name',), 'books', sel_dict)
try:
column_list = ','.join(columns)
sql_command_fetch = f"SELECT {column_list} FROM {table}"
if selection_criteria:
sql_command_fetch += " WHERE"
if equivalence == 'EQUALS':
for i in selection_criteria.keys():
search_parameter = selection_criteria[i]
sql_command_fetch += f" {i} = '{search_parameter}' OR"
elif equivalence == 'LIKE':
for i in selection_criteria.keys():
search_parameter = "'%" + selection_criteria[i] + "%'"
sql_command_fetch += f" {i} LIKE {search_parameter} OR"
sql_command_fetch = sql_command_fetch[:-3] # Truncate the last OR
# book data is returned as a list of tuples
data = self.database.execute(sql_command_fetch).fetchall()
self.database.close()
if data:
# Because this is the result of a fetchall(), we need an
# ugly hack (tm) to get correct results
if fetch_one:
return data[0][0]
return data
else:
return None
except (KeyError, sqlite3.OperationalError):
print('SQLite is in wretched rebellion @ data fetching handling')
def fetch_covers_only(self, hash_list):
parameter_marks = ','.join(['?' for i in hash_list])
sql_command = f"SELECT Hash, CoverImage from books WHERE Hash IN ({parameter_marks})"
data = self.database.execute(sql_command, hash_list).fetchall()
self.database.close()
return data
def modify_metadata(self, metadata_dict, book_hash):
def generate_binary(column, data):
if column in ('Position', 'LastAccessed', 'Bookmarks'):
return sqlite3.Binary(pickle.dumps(data))
elif column == 'CoverImage':
return sqlite3.Binary(data)
else:
return data
sql_command = 'UPDATE books SET '
update_data = []
for i in metadata_dict.items():
sql_command += i[0] + ' = ?, '
bin_data = generate_binary(i[0], i[1])
update_data.append(bin_data)
sql_command = sql_command[:-2]
sql_command += ' WHERE Hash = ?'
update_data.append(book_hash)
try:
self.database.execute(
sql_command, update_data)
except sqlite3.OperationalError:
print('SQLite is in wretched rebellion @ metadata handling')
self.database.commit()
self.database.close()
def delete_from_database(self, column_name, target_data):
# target_data is an iterable
if column_name == '*':
self.database.execute('DELETE FROM books')
else:
sql_command = f'DELETE FROM books WHERE {column_name} = ?'
for i in target_data:
self.database.execute(sql_command, (i,))
self.database.commit()
self.database.close()
def vacuum_database(self):
self.database.execute("VACUUM")
return True

164
lector/definitionsdialog.py Normal file
View File

@@ -0,0 +1,164 @@
#!/usr/bin/env python3
# This file is a part of Lector, a Qt based ebook reader
# Copyright (C) 2017 BasioMeusPuga
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import requests
from PyQt5 import QtWidgets, QtCore, QtGui, QtMultimedia
from resources import definitions
class DefinitionsUI(QtWidgets.QDialog, definitions.Ui_Dialog):
def __init__(self, parent):
super(DefinitionsUI, self).__init__()
self.setupUi(self)
self.parent = parent
self.setWindowFlags(
QtCore.Qt.Popup |
QtCore.Qt.FramelessWindowHint)
radius = 15
path = QtGui.QPainterPath()
path.addRoundedRect(QtCore.QRectF(self.rect()), radius, radius)
mask = QtGui.QRegion(path.toFillPolygon().toPolygon())
self.setMask(mask)
self.app_id = 'bb7a91f9'
self.app_key = 'fefacdf6775c347b52e9efa2efe642ef'
self.root_url = 'https://od-api.oxforddictionaries.com:443/api/v1/inflections/'
self.define_url = 'https://od-api.oxforddictionaries.com:443/api/v1/entries/'
self.pronunciation_mp3 = None
self.okButton.clicked.connect(self.hide)
self.pronounceButton.clicked.connect(self.play_pronunciation)
def api_call(self, url, word):
language = self.parent.settings['dictionary_language']
url = url + language + '/' + word.lower()
r = requests.get(
url,
headers={'app_id': self.app_id, 'app_key': self.app_key})
if r.status_code != 200:
print('A firm nope on the dictionary finding thing')
return None
return r.json()
def find_definition(self, word):
word_root_json = self.api_call(self.root_url, word)
if not word_root_json:
self.set_text(word, None, None, True)
return
word_root = word_root_json['results'][0]['lexicalEntries'][0]['inflectionOf'][0]['id']
self.pronounceButton.setToolTip(f'Pronounce "{word_root}"')
definition_json = self.api_call(self.define_url, word_root)
if not definition_json:
return
definitions = {}
for i in definition_json['results'][0]['lexicalEntries']:
category = i['lexicalCategory']
try:
self.pronunciation_mp3 = i['pronunciations'][0]['audioFile']
except KeyError:
self.pronounceButton.setEnabled(False)
this_sense = i['entries'][0]['senses']
for j in this_sense:
try:
this_definition = j['definitions'][0].capitalize()
except KeyError:
# The API also reports crossReferenceMarkers here
pass
try:
definitions[category].add(this_definition)
except KeyError:
definitions[category] = set()
definitions[category].add(this_definition)
self.set_text(word, word_root, definitions)
def set_text(self, word, word_root, definitions, nothing_found=False):
html_string = ''
# Word heading
html_string += f'<h2><em><strong>{word}</strong></em></h2>\n'
if nothing_found:
language = self.parent.settings['dictionary_language'].upper()
html_string += f'<p><em>No definitions found in {language}<em></p>\n'
else:
# Word root
html_string += f'<p><em>Word root: <em>{word_root}</p>\n'
# Definitions per category as an ordered list
for i in definitions.items():
category = i[0]
html_string += f'<p><strong>{category}</strong>:</p>\n<ol>\n'
for j in i[1]:
html_string += f'<li>{j}</li>\n'
html_string += '</ol>\n'
self.definitionView.setHtml(html_string)
self.show()
def color_background(self, set_initial=False):
if set_initial:
background = self.parent.settings['dialog_background']
else:
self.previous_position = self.pos()
background = self.parent.get_color()
self.setStyleSheet(
"QDialog {{background-color: {0}}}".format(background.name()))
self.definitionView.setStyleSheet(
"QTextBrowser {{background-color: {0}}}".format(background.name()))
if not set_initial:
self.show()
def play_pronunciation(self):
if not self.pronunciation_mp3:
return
media_content = QtMultimedia.QMediaContent(
QtCore.QUrl(self.pronunciation_mp3))
player = QtMultimedia.QMediaPlayer(self)
player.setMedia(media_content)
player.play()
def showEvent(self, event):
self.color_background(True)
size = self.size()
desktop_size = QtWidgets.QDesktopWidget().screenGeometry()
top = (desktop_size.height() / 2) - (size.height() / 2)
left = (desktop_size.width() / 2) - (size.width() / 2)
self.move(left, top)

109
lector/delegates.py Normal file
View File

@@ -0,0 +1,109 @@
#!/usr/bin/env python3
# This file is a part of Lector, a Qt based ebook reader
# Copyright (C) 2017 BasioMeusPuga
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from PyQt5 import QtWidgets, QtGui, QtCore
from resources import pie_chart
class LibraryDelegate(QtWidgets.QStyledItemDelegate):
def __init__(self, temp_dir, parent=None):
super(LibraryDelegate, self).__init__(parent)
self.temp_dir = temp_dir
self.parent = parent
def paint(self, painter, option, index):
# This is a hint for the future
# Color icon slightly red
# if option.state & QtWidgets.QStyle.State_Selected:
# painter.fillRect(option.rect, QtGui.QColor().fromRgb(255, 0, 0, 20))
option = option.__class__(option)
file_exists = index.data(QtCore.Qt.UserRole + 5)
metadata = index.data(QtCore.Qt.UserRole + 3)
position = metadata['position']
if position:
is_read = position['is_read']
# The shadow pixmap currently is set to 420 x 600
# Only draw the cover shadow in case the setting is enabled
if self.parent.settings['cover_shadows']:
shadow_pixmap = QtGui.QPixmap()
shadow_pixmap.load(':/images/gray-shadow.png')
shadow_pixmap = shadow_pixmap.scaled(160, 230, QtCore.Qt.IgnoreAspectRatio)
shadow_x = option.rect.topLeft().x() + 10
shadow_y = option.rect.topLeft().y() - 5
painter.setOpacity(.7)
painter.drawPixmap(shadow_x, shadow_y, shadow_pixmap)
painter.setOpacity(1)
if not file_exists:
painter.setOpacity(.7)
QtWidgets.QStyledItemDelegate.paint(self, painter, option, index)
read_icon = pie_chart.pixmapper(-1, None, None, 36)
x_draw = option.rect.bottomRight().x() - 30
y_draw = option.rect.bottomRight().y() - 35
painter.drawPixmap(x_draw, y_draw, read_icon)
painter.setOpacity(1)
return
QtWidgets.QStyledItemDelegate.paint(self, painter, option, index)
if position:
if is_read:
current_chapter = total_chapters = 100
else:
try:
current_chapter = position['current_chapter']
total_chapters = position['total_chapters']
except KeyError:
return
read_icon = pie_chart.pixmapper(
current_chapter, total_chapters, self.temp_dir, 36)
x_draw = option.rect.bottomRight().x() - 30
y_draw = option.rect.bottomRight().y() - 35
if current_chapter != 1:
painter.drawPixmap(x_draw, y_draw, read_icon)
class BookmarkDelegate(QtWidgets.QStyledItemDelegate):
def __init__(self, parent=None):
super(BookmarkDelegate, self).__init__(parent)
self.parent = parent
def sizeHint(self, *args):
dockwidget_width = self.parent.width() - 20
return QtCore.QSize(dockwidget_width, 50)
def paint(self, painter, option, index):
# TODO
# Alignment of the painted item
option = option.__class__(option)
chapter_index = index.data(QtCore.Qt.UserRole)
chapter_name = self.parent.window().bookToolBar.tocBox.itemText(chapter_index - 1)
if len(chapter_name) > 25:
chapter_name = chapter_name[:25] + '...'
QtWidgets.QStyledItemDelegate.paint(self, painter, option, index)
painter.drawText(
option.rect,
QtCore.Qt.AlignBottom | QtCore.Qt.AlignRight | QtCore.Qt.TextWordWrap,
' ' + chapter_name)

1
lector/ePub/__init__.py Normal file
View File

@@ -0,0 +1 @@

276
lector/ePub/read_epub.py Normal file
View File

@@ -0,0 +1,276 @@
#!/usr/bin/env python3
# This file is a part of Lector, a Qt based ebook reader
# Copyright (C) 2017 BasioMeusPuga
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import os
import sys
import zipfile
from urllib.parse import unquote
from bs4 import BeautifulSoup
class EPUB:
def __init__(self, filename):
self.filename = filename
self.zip_file = None
self.book = {}
def read_epub(self):
# This is the function that should error out in
# case the module cannot process the file
self.load_zip()
contents_path = self.get_file_path(
None, True)
if not contents_path:
return False # No opf was found so processing cannot continue
self.generate_book_metadata(contents_path)
self.parse_toc()
return True
def load_zip(self):
try:
self.zip_file = zipfile.ZipFile(
self.filename, mode='r', allowZip64=True)
except (KeyError, AttributeError, zipfile.BadZipFile):
print('Cannot parse ' + self.filename)
return
def parse_xml(self, filename, parser):
try:
this_xml = self.zip_file.read(filename).decode()
except KeyError:
print(str(filename) + ' not found in zip')
return
root = BeautifulSoup(this_xml, parser)
return root
def get_file_path(self, filename, is_content_file=False):
# Use this to get the location of the content.opf file
# And maybe some other file that has a more well formatted
# We're going to all this trouble because there really is
# no going forward without a toc
if is_content_file:
container_location = self.get_file_path('container.xml')
xml = self.parse_xml(container_location, 'xml')
if xml:
root_item = xml.find('rootfile')
return root_item.get('full-path')
else:
possible_filenames = ('content.opf', 'package.opf')
for i in possible_filenames:
presumptive_location = self.get_file_path(i)
if presumptive_location:
return presumptive_location
for i in self.zip_file.filelist:
if os.path.basename(i.filename) == os.path.basename(filename):
return i.filename
return None
def read_from_zip(self, filename):
filename = unquote(filename)
try:
file_data = self.zip_file.read(filename)
return file_data
except KeyError:
file_path_actual = self.get_file_path(filename)
if file_path_actual:
return self.zip_file.read(file_path_actual)
else:
print('ePub module can\'t find ' + filename)
#______________________________________________________
def generate_book_metadata(self, contents_path):
self.book['title'] = 'Unknown'
self.book['author'] = 'Unknown'
self.book['isbn'] = None
self.book['tags'] = None
self.book['cover'] = None
self.book['toc_file'] = 'toc.ncx' # Overwritten if another one exists
# Parse XML
xml = self.parse_xml(contents_path, 'xml')
# Parse metadata
item_dict = {
'title': 'title',
'author': 'creator',
'year': 'date'}
for i in item_dict.items():
item = xml.find(i[1])
if item:
self.book[i[0]] = item.text
try:
self.book['year'] = int(self.book['year'][:4])
except (TypeError, KeyError, IndexError):
self.book['year'] = 9999
# Get identifier
identifier_items = xml.find_all('identifier')
for i in identifier_items:
scheme = i.get('scheme')
try:
if scheme.lower() == 'isbn':
self.book['isbn'] = i.text
except AttributeError:
self.book['isbn'] = None
# Tags
tag_items = xml.find_all('subject')
tag_list = [i.text for i in tag_items]
self.book['tags'] = tag_list
# Get items
self.book['content_dict'] = {}
all_items = xml.find_all('item')
for i in all_items:
media_type = i.get('media-type')
this_id = i.get('id')
if media_type == 'application/xhtml+xml' or media_type == 'text/html':
self.book['content_dict'][this_id] = i.get('href')
if media_type == 'application/x-dtbncx+xml':
self.book['toc_file'] = i.get('href')
# Cover image
if 'cover' in this_id and media_type.split('/')[0] == 'image':
cover_href = i.get('href')
try:
self.book['cover'] = self.zip_file.read(cover_href)
except KeyError:
# The cover cannot be found according to the
# path specified in the content reference
self.book['cover'] = self.zip_file.read(
self.get_file_path(cover_href))
if not self.book['cover']:
# If no cover is located the conventioanl way,
# we go looking for the largest image in the book
biggest_image_size = 0
biggest_image = None
for j in self.zip_file.filelist:
if os.path.splitext(j.filename)[1] in ['.jpg', '.jpeg', '.png', '.gif']:
if j.file_size > biggest_image_size:
biggest_image = j.filename
biggest_image_size = j.file_size
if biggest_image:
self.book['cover'] = self.read_from_zip(biggest_image)
else:
print('No cover found for: ' + self.filename)
# Parse spine and arrange chapter paths acquired from the opf
# according to the order IN THE SPINE
spine_items = xml.find_all('itemref')
spine_order = []
for i in spine_items:
spine_order.append(i.get('idref'))
self.book['chapters_in_order'] = []
for i in spine_order:
chapter_path = self.book['content_dict'][i]
self.book['chapters_in_order'].append(chapter_path)
def parse_toc(self):
# This has no bearing on the actual order
# We're just using this to get chapter names
self.book['navpoint_dict'] = {}
toc_file = self.book['toc_file']
if toc_file:
toc_file = self.get_file_path(toc_file)
xml = self.parse_xml(toc_file, 'xml')
if not xml:
return
navpoints = xml.find_all('navPoint')
for i in navpoints:
chapter_title = i.find('text').text
chapter_source = i.find('content').get('src')
chapter_source = unquote(chapter_source.split('#')[0])
self.book['navpoint_dict'][chapter_source] = chapter_title
def parse_chapters(self, temp_dir=None, split_large_xml=False):
no_title_chapter = 0
self.book['book_list'] = []
for i in self.book['chapters_in_order']:
chapter_data = self.read_from_zip(i).decode()
if not split_large_xml:
try:
self.book['book_list'].append(
(self.book['navpoint_dict'][i], chapter_data))
except KeyError:
fallback_title = str(no_title_chapter)
self.book['book_list'].append(
(fallback_title, chapter_data))
no_title_chapter += 1
cover_path = os.path.join(temp_dir, os.path.basename(self.filename)) + '- cover'
with open(cover_path, 'wb') as cover_temp:
cover_temp.write(self.book['cover'])
self.book['book_list'][0] = (
'Cover', f'<center><img src="{cover_path}" alt="Cover"></center>')
else:
# https://stackoverflow.com/questions/14444732/how-to-split-a-html-page-to-multiple-pages-using-python-and-beautiful-soup
markup = BeautifulSoup(chapter_data, 'xml')
chapters = []
pagebreaks = markup.find_all('pagebreak')
def next_element(elem):
while elem is not None:
elem = elem.next_sibling
if hasattr(elem, 'name'):
return elem
for pbreak in pagebreaks:
chapter = [str(pbreak)]
elem = next_element(pbreak)
while elem and elem.name != 'pagebreak':
chapter.append(str(elem))
elem = next_element(elem)
chapters.append('\n'.join(chapter))
for this_chapter in chapters:
fallback_title = str(no_title_chapter)
self.book['book_list'].append(
(fallback_title, this_chapter))
no_title_chapter += 1
def main():
book = EPUB(sys.argv[1])
book.read_epub()
book.parse_chapters()
if __name__ == '__main__':
main()

296
lector/library.py Normal file
View File

@@ -0,0 +1,296 @@
#!/usr/bin/env python3
# This file is a part of Lector, a Qt based ebook reader
# Copyright (C) 2017 BasioMeusPuga
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import os
import pickle
import pathlib
from PyQt5 import QtGui, QtCore
import database
from models import TableProxyModel, ItemProxyModel
class Library:
def __init__(self, parent):
self.parent = parent
self.view_model = None
self.item_proxy_model = None
self.table_proxy_model = None
def generate_model(self, mode, parsed_books=None, is_database_ready=True):
if mode == 'build':
self.view_model = QtGui.QStandardItemModel()
self.view_model.setColumnCount(10)
books = database.DatabaseFunctions(
self.parent.database_path).fetch_data(
('Title', 'Author', 'Year', 'DateAdded', 'Path',
'Position', 'ISBN', 'Tags', 'Hash', 'LastAccessed'),
'books',
{'Title': ''},
'LIKE')
if not books:
print('Database returned nothing')
return
elif mode == 'addition':
# Assumes self.view_model already exists and may be extended
# Because any additional books have already been added to the
# database using background threads
books = []
current_qdatetime = QtCore.QDateTime().currentDateTime()
for i in parsed_books.items():
_tags = i[1]['tags']
if _tags:
_tags = ', '.join([j for j in _tags if j])
books.append([
i[1]['title'], i[1]['author'], i[1]['year'], current_qdatetime,
i[1]['path'], None, i[1]['isbn'], _tags, i[0], None])
else:
return
for i in books:
# The database query returns (or the extension data is)
# an iterable with the following indices:
title = i[0]
author = i[1]
year = i[2]
path = i[4]
last_accessed = i[9]
tags = i[7]
if isinstance(tags, list): # When files are added for the first time
if tags:
tags = ', '.join(str(this_tag) for this_tag in tags)
else:
tags = None
try:
date_added = pickle.loads(i[3])
except TypeError: # Because of datetime.datetime.now() above
date_added = i[3]
position_perc = None
position = i[5]
if position:
position = pickle.loads(position)
if position['is_read']:
position_perc = 100
else:
try:
position_perc = (
position['current_chapter'] * 100 / position['total_chapters'])
except KeyError:
position_perc = None
file_exists = os.path.exists(path)
all_metadata = {
'title': title,
'author': author,
'year': year,
'date_added': date_added,
'path': path,
'position': position,
'isbn': i[6],
'tags': tags,
'hash': i[8],
'last_accessed': last_accessed,
'file_exists': file_exists}
tooltip_string = title + '\nAuthor: ' + author + '\nYear: ' + str(year)
# Additional data can be set using an incrementing
# QtCore.Qt.UserRole
# QtCore.Qt.DisplayRole is the same as item.setText()
# The model is a single row and has no columns
# No covers are set at this time
# That is to be achieved by way of the culling function
img_pixmap = QtGui.QPixmap()
img_pixmap.load(':/images/blank.png')
img_pixmap = img_pixmap.scaled(
420, 600, QtCore.Qt.IgnoreAspectRatio)
item = QtGui.QStandardItem()
item.setToolTip(tooltip_string)
# Just keep the following order. It's way too much trouble otherwise
item.setData(title, QtCore.Qt.UserRole)
item.setData(author, QtCore.Qt.UserRole + 1)
item.setData(year, QtCore.Qt.UserRole + 2)
item.setData(all_metadata, QtCore.Qt.UserRole + 3)
item.setData(tags, QtCore.Qt.UserRole + 4)
item.setData(file_exists, QtCore.Qt.UserRole + 5)
item.setData(i[8], QtCore.Qt.UserRole + 6) # File hash
item.setData(position_perc, QtCore.Qt.UserRole + 7)
item.setData(False, QtCore.Qt.UserRole + 8) # Is the cover being displayed?
item.setData(date_added, QtCore.Qt.UserRole + 9)
item.setData(last_accessed, QtCore.Qt.UserRole + 12)
item.setIcon(QtGui.QIcon(img_pixmap))
self.view_model.appendRow(item)
# The is_database_ready boolean is required when a new thread sends
# books here for model generation.
if not self.parent.settings['perform_culling'] and is_database_ready:
self.parent.load_all_covers()
def generate_proxymodels(self):
self.item_proxy_model = ItemProxyModel()
self.item_proxy_model.setSourceModel(self.view_model)
self.item_proxy_model.setSortCaseSensitivity(False)
s = QtCore.QSize(160, 250) # Set icon sizing here
self.parent.listView.setIconSize(s)
self.parent.listView.setModel(self.item_proxy_model)
self.table_proxy_model = TableProxyModel(self.parent.temp_dir.path())
self.table_proxy_model.setSourceModel(self.view_model)
self.table_proxy_model.setSortCaseSensitivity(False)
self.parent.tableView.setModel(self.table_proxy_model)
self.update_proxymodels()
def update_proxymodels(self):
# Table proxy model
self.table_proxy_model.invalidateFilter()
self.table_proxy_model.setFilterParams(
self.parent.libraryToolBar.searchBar.text(),
self.parent.active_library_filters,
0) # This doesn't need to know the sorting box position
self.table_proxy_model.setFilterFixedString(
self.parent.libraryToolBar.searchBar.text())
# ^^^ This isn't needed, but it forces a model update every time the
# text in the line edit changes. So I guess it is needed.
# Item proxy model
self.item_proxy_model.invalidateFilter()
self.item_proxy_model.setFilterParams(
self.parent.libraryToolBar.searchBar.text(),
self.parent.active_library_filters,
self.parent.libraryToolBar.sortingBox.currentIndex())
self.item_proxy_model.setFilterFixedString(
self.parent.libraryToolBar.searchBar.text())
self.parent.statusMessage.setText(
str(self.item_proxy_model.rowCount()) + ' books')
# TODO
# Allow sorting by type
# Index of the sorting drop down corresponding to the
# UserRole of the item model
# This keeps from having to rearrange all the UserRoles in the
# existing model
sort_roles = {
0: 0,
1: 1,
2: 2,
3: 9,
4: 12}
# Sorting according to roles and the drop down in the library toolbar
self.item_proxy_model.setSortRole(
QtCore.Qt.UserRole + sort_roles[self.parent.libraryToolBar.sortingBox.currentIndex()])
# This can be expanded to other fields by appending to the list
sort_order = QtCore.Qt.AscendingOrder
if self.parent.libraryToolBar.sortingBox.currentIndex() in [3, 4]:
sort_order = QtCore.Qt.DescendingOrder
self.item_proxy_model.sort(0, sort_order)
self.parent.start_culling_timer()
def generate_library_tags(self):
db_library_directories = database.DatabaseFunctions(
self.parent.database_path).fetch_data(
('Path', 'Name', 'Tags'),
'directories', # This checks the directories table NOT the book one
{'Path': ''},
'LIKE')
if not db_library_directories: # Empty database / table
return
library_directories = {
i[0]: (i[1], i[2]) for i in db_library_directories}
def get_tags(all_metadata):
path = os.path.dirname(all_metadata['path'])
path_ref = pathlib.Path(path)
for i in library_directories:
if i == path or pathlib.Path(i) in path_ref.parents:
directory_name = library_directories[i][0]
if directory_name:
directory_name = directory_name.lower()
else:
directory_name = path.rsplit('/')[-1].lower()
directory_tags = library_directories[i][1]
if directory_tags:
directory_tags = directory_tags.lower()
return directory_name, directory_tags
return 'manually added', None
# Generate tags for the QStandardItemModel
for i in range(self.view_model.rowCount()):
this_item = self.view_model.item(i, 0)
all_metadata = this_item.data(QtCore.Qt.UserRole + 3)
directory_name, directory_tags = get_tags(all_metadata)
this_item.setData(directory_name, QtCore.Qt.UserRole + 10)
this_item.setData(directory_tags, QtCore.Qt.UserRole + 11)
def prune_models(self, valid_paths):
# To be executed when the library is updated by folder
# All files in unselected directories will have to be removed
# from both of the models
# They will also have to be deleted from the library
valid_paths = set(valid_paths)
# Get all paths
all_paths = set()
for i in range(self.view_model.rowCount()):
item = self.view_model.item(i, 0)
item_metadata = item.data(QtCore.Qt.UserRole + 3)
book_path = item_metadata['path']
all_paths.add(book_path)
invalid_paths = all_paths - valid_paths
deletable_persistent_indexes = []
for i in range(self.view_model.rowCount()):
item = self.view_model.item(i)
path = item.data(QtCore.Qt.UserRole + 3)['path']
if path in invalid_paths:
deletable_persistent_indexes.append(
QtCore.QPersistentModelIndex(item.index()))
if deletable_persistent_indexes:
for i in deletable_persistent_indexes:
self.view_model.removeRow(i.row())
# Remove invalid paths from the database as well
database.DatabaseFunctions(
self.parent.database_path).delete_from_database('Path', invalid_paths)

167
lector/metadatadialog.py Normal file
View File

@@ -0,0 +1,167 @@
#!/usr/bin/env python3
# This file is a part of Lector, a Qt based ebook reader
# Copyright (C) 2017 BasioMeusPuga
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from PyQt5 import QtWidgets, QtCore, QtGui
import database
from resources import metadata
from widgets import PliantQGraphicsScene
class MetadataUI(QtWidgets.QDialog, metadata.Ui_Dialog):
def __init__(self, parent):
super(MetadataUI, self).__init__()
self.setupUi(self)
self.setWindowFlags(
QtCore.Qt.Popup |
QtCore.Qt.FramelessWindowHint)
self.parent = parent
radius = 15
path = QtGui.QPainterPath()
path.addRoundedRect(QtCore.QRectF(self.rect()), radius, radius)
mask = QtGui.QRegion(path.toFillPolygon().toPolygon())
self.setMask(mask)
self.parent = parent
self.database_path = self.parent.database_path
self.book_index = None
self.book_year = None
self.previous_position = None
self.cover_for_database = None
self.coverView.setHorizontalScrollBarPolicy(QtCore.Qt.ScrollBarAlwaysOff)
self.coverView.setVerticalScrollBarPolicy(QtCore.Qt.ScrollBarAlwaysOff)
self.okButton.clicked.connect(self.ok_pressed)
self.cancelButton.clicked.connect(self.cancel_pressed)
self.dialogBackground.clicked.connect(self.color_background)
self.titleLine.returnPressed.connect(self.ok_pressed)
self.authorLine.returnPressed.connect(self.ok_pressed)
self.yearLine.returnPressed.connect(self.ok_pressed)
self.tagsLine.returnPressed.connect(self.ok_pressed)
def load_book(self, cover, title, author, year, tags, book_index):
self.previous_position = None
self.cover_for_database = None
self.book_index = book_index
self.book_year = year
self.load_cover(cover)
self.titleLine.setText(title)
self.authorLine.setText(author)
self.yearLine.setText(year)
self.tagsLine.setText(tags)
def load_cover(self, cover, use_as_is=False):
if use_as_is:
image_pixmap = cover
else:
image_pixmap = cover.pixmap(QtCore.QSize(140, 205))
graphics_scene = PliantQGraphicsScene(self)
graphics_scene.addPixmap(image_pixmap)
self.coverView.setScene(graphics_scene)
def ok_pressed(self, event):
book_item = self.parent.lib_ref.view_model.item(self.book_index.row())
title = self.titleLine.text()
author = self.authorLine.text()
tags = self.tagsLine.text()
try:
year = int(self.yearLine.text())
except ValueError:
year = self.book_year
tooltip_string = title + '\nAuthor: ' + author + '\nYear: ' + str(year)
book_item.setData(title, QtCore.Qt.UserRole)
book_item.setData(author, QtCore.Qt.UserRole + 1)
book_item.setData(year, QtCore.Qt.UserRole + 2)
book_item.setData(tags, QtCore.Qt.UserRole + 4)
book_item.setToolTip(tooltip_string)
book_hash = book_item.data(QtCore.Qt.UserRole + 6)
database_dict = {
'Title': title,
'Author': author,
'Year': year,
'Tags': tags}
if self.cover_for_database:
database_dict['CoverImage'] = self.cover_for_database
self.parent.cover_loader(
book_item, self.cover_for_database)
self.parent.lib_ref.update_proxymodels()
self.hide()
database.DatabaseFunctions(self.database_path).modify_metadata(
database_dict, book_hash)
def cancel_pressed(self, event):
self.hide()
def generate_display_position(self, mouse_cursor_position):
size = self.size()
desktop_size = QtWidgets.QDesktopWidget().screenGeometry()
display_x = mouse_cursor_position.x()
display_y = mouse_cursor_position.y()
if display_x + size.width() > desktop_size.width():
display_x = desktop_size.width() - size.width()
if display_y + size.height() > desktop_size.height():
display_y = desktop_size.height() - size.height()
return QtCore.QPoint(display_x, display_y)
def color_background(self, set_initial=False):
if set_initial:
background = self.parent.settings['dialog_background']
else:
self.previous_position = self.pos()
background = self.parent.get_color()
self.setStyleSheet(
"QDialog {{background-color: {0}}}".format(background.name()))
self.coverView.setStyleSheet(
"QGraphicsView {{background-color: {0}}}".format(background.name()))
if not set_initial:
self.show()
def showEvent(self, event):
if self.previous_position:
self.move(self.previous_position)
else:
display_position = self.generate_display_position(QtGui.QCursor.pos())
self.move(display_position)
self.titleLine.setFocus()
self.color_background(True)

343
lector/models.py Normal file
View File

@@ -0,0 +1,343 @@
#!/usr/bin/env python3
# This file is a part of Lector, a Qt based ebook reader
# Copyright (C) 2017 BasioMeusPuga
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import pathlib
from PyQt5 import QtCore, QtWidgets
from resources import pie_chart
class BookmarkProxyModel(QtCore.QSortFilterProxyModel):
def __init__(self, parent=None):
super(BookmarkProxyModel, self).__init__(parent)
self.parent = parent
self.filter_string = None
def setFilterParams(self, filter_text):
self.filter_text = filter_text
def filterAcceptsRow(self, row, parent):
# TODO
# Connect this to the search bar
return True
def setData(self, index, value, role):
if role == QtCore.Qt.EditRole:
source_index = self.mapToSource(index)
identifier = self.sourceModel().data(source_index, QtCore.Qt.UserRole + 2)
self.sourceModel().setData(source_index, value, QtCore.Qt.DisplayRole)
self.parent.metadata['bookmarks'][identifier]['description'] = value
return True
class ItemProxyModel(QtCore.QSortFilterProxyModel):
def __init__(self, parent=None):
super(ItemProxyModel, self).__init__(parent)
self.filter_text = None
self.active_library_filters = None
self.sorting_box_position = None
self.common_functions = ProxyModelsCommonFunctions(self)
def setFilterParams(self, filter_text, active_library_filters, sorting_box_position):
self.common_functions.setFilterParams(
filter_text, active_library_filters, sorting_box_position)
def filterAcceptsRow(self, row, parent):
output = self.common_functions.filterAcceptsRow(row, parent)
return output
class TableProxyModel(QtCore.QSortFilterProxyModel):
def __init__(self, temp_dir, parent=None):
super(TableProxyModel, self).__init__(parent)
self.header_data = [
None, 'Title', 'Author', 'Year', '%', 'Tags']
self.temp_dir = temp_dir
self.filter_text = None
self.active_library_filters = None
self.sorting_box_position = None
self.role_dictionary = {
1: QtCore.Qt.UserRole, # Title
2: QtCore.Qt.UserRole + 1, # Author
3: QtCore.Qt.UserRole + 2, # Year
4: QtCore.Qt.UserRole + 7, # Position percentage
5: QtCore.Qt.UserRole + 4} # Tags
self.common_functions = ProxyModelsCommonFunctions(self)
def columnCount(self, parent):
return 6
def headerData(self, column, orientation, role):
if role == QtCore.Qt.DisplayRole:
return self.header_data[column]
def flags(self, index):
# Tag editing will take place by way of a right click menu
# These tags denote clickable and that's about it
return QtCore.Qt.ItemIsEnabled | QtCore.Qt.ItemIsSelectable
def data(self, index, role):
source_index = self.mapToSource(index)
item = self.sourceModel().item(source_index.row(), 0)
if role == QtCore.Qt.TextAlignmentRole and index.column() == 3:
return QtCore.Qt.AlignHCenter
if role == QtCore.Qt.DecorationRole:
if index.column() == 4:
return_pixmap = None
file_exists = item.data(QtCore.Qt.UserRole + 5)
metadata = item.data(QtCore.Qt.UserRole + 3)
position = metadata['position']
if position:
is_read = position['is_read']
if not file_exists:
return pie_chart.pixmapper(
-1, None, None, QtCore.Qt.SizeHintRole + 10)
if position:
if is_read:
current_chapter = total_chapters = 100
else:
try:
current_chapter = position['current_chapter']
total_chapters = position['total_chapters']
# TODO
# See if there's any rationale for this
if current_chapter == 1:
raise KeyError
except KeyError:
return
return_pixmap = pie_chart.pixmapper(
current_chapter, total_chapters, self.temp_dir,
QtCore.Qt.SizeHintRole + 10)
return return_pixmap
elif role == QtCore.Qt.DisplayRole or role == QtCore.Qt.EditRole:
if index.column() in (0, 4): # Cover and Status
return QtCore.QVariant()
return item.data(self.role_dictionary[index.column()])
else:
return QtCore.QVariant()
def setFilterParams(self, filter_text, active_library_filters, sorting_box_position):
self.common_functions.setFilterParams(
filter_text, active_library_filters, sorting_box_position)
def filterAcceptsRow(self, row, parent):
output = self.common_functions.filterAcceptsRow(row, parent)
return output
def sort_table_columns(self, column):
sorting_order = self.sender().sortIndicatorOrder()
self.sort(0, sorting_order)
self.setSortRole(self.role_dictionary[column])
class ProxyModelsCommonFunctions:
def __init__(self, parent_model):
self.parent_model = parent_model
def setFilterParams(self, filter_text, active_library_filters, sorting_box_position):
self.parent_model.filter_text = filter_text
self.parent_model.active_library_filters = [i.lower() for i in active_library_filters]
self.parent_model.sorting_box_position = sorting_box_position
def filterAcceptsRow(self, row, parent):
model = self.parent_model.sourceModel()
this_index = model.index(row, 0)
title = model.data(this_index, QtCore.Qt.UserRole)
author = model.data(this_index, QtCore.Qt.UserRole + 1)
tags = model.data(this_index, QtCore.Qt.UserRole + 4)
directory_name = model.data(this_index, QtCore.Qt.UserRole + 10)
directory_tags = model.data(this_index, QtCore.Qt.UserRole + 11)
last_accessed = model.data(this_index, QtCore.Qt.UserRole + 12)
# Hide untouched files when sorting by last accessed
if self.parent_model.sorting_box_position == 4 and not last_accessed:
return False
if self.parent_model.active_library_filters:
if directory_name not in self.parent_model.active_library_filters:
return False
else:
return False
if not self.parent_model.filter_text:
return True
else:
valid_data = [
i.lower() for i in (
title, author, tags, directory_name, directory_tags) if i is not None]
for i in valid_data:
if self.parent_model.filter_text.lower() in i:
return True
return False
class MostExcellentFileSystemModel(QtWidgets.QFileSystemModel):
# Directories are tracked on the basis of their paths
# Poll the tag_data dictionary to get User selection
def __init__(self, tag_data, parent=None):
super(MostExcellentFileSystemModel, self).__init__(parent)
self.tag_data = tag_data
self.field_dict = {
0: 'check_state',
4: 'name',
5: 'tags'}
def columnCount(self, parent):
# The QFileSystemModel returns 4 columns by default
# Columns 1, 2, 3 will be present but hidden
return 6
def headerData(self, col, orientation, role):
# Columns not mentioned here will be hidden
if orientation == QtCore.Qt.Horizontal and role == QtCore.Qt.DisplayRole:
column_dict = {
0: 'Path',
4: 'Name',
5: 'Tags'}
try:
return column_dict[col]
except KeyError:
pass
def data(self, index, role):
if (index.column() in (4, 5)
and (role == QtCore.Qt.DisplayRole or role == QtCore.Qt.EditRole)):
read_field = self.field_dict[index.column()]
try:
return self.tag_data[self.filePath(index)][read_field]
except KeyError:
return QtCore.QVariant()
if role == QtCore.Qt.CheckStateRole and index.column() == 0:
return self.checkState(index)
return QtWidgets.QFileSystemModel.data(self, index, role)
def flags(self, index):
if index.column() in (4, 5):
return QtCore.Qt.ItemIsEnabled | QtCore.Qt.ItemIsSelectable | QtCore.Qt.ItemIsEditable
else:
return QtWidgets.QFileSystemModel.flags(self, index) | QtCore.Qt.ItemIsUserCheckable
def checkState(self, index):
while index.isValid():
index_path = self.filePath(index)
if index_path in self.tag_data:
return self.tag_data[index_path]['check_state']
index = index.parent()
return QtCore.Qt.Unchecked
def setData(self, index, value, role):
if (role == QtCore.Qt.EditRole or role == QtCore.Qt.CheckStateRole) and index.isValid():
write_field = self.field_dict[index.column()]
self.layoutAboutToBeChanged.emit()
this_path = self.filePath(index)
if this_path not in self.tag_data:
self.populate_dictionary(this_path)
self.tag_data[this_path][write_field] = value
self.depopulate_dictionary()
self.layoutChanged.emit()
return True
def populate_dictionary(self, path):
self.tag_data[path] = {}
self.tag_data[path]['name'] = None
self.tag_data[path]['tags'] = None
self.tag_data[path]['check_state'] = QtCore.Qt.Checked
def depopulate_dictionary(self):
# This keeps the tag_data dictionary manageable as well as preventing
# weird ass behaviour when something is deselected and its tags are cleared
deletable = set()
for i in self.tag_data.items():
all_data = [j[1] for j in i[1].items()]
filtered_down = list(filter(lambda x: x is not None and x != 0, all_data))
if not filtered_down:
deletable.add(i[0])
# Get untagged subdirectories too
all_dirs = [i for i in self.tag_data]
all_dirs.sort()
def is_child(this_dir):
this_path = pathlib.Path(this_dir)
for i in all_dirs:
if pathlib.Path(i) in this_path.parents:
# If a parent folder has tags, we only want the deletion
# to kick in in case the parent is also checked
if self.tag_data[i]['check_state'] == QtCore.Qt.Checked:
return True
return False
for i in all_dirs:
if is_child(i):
dir_tags = (self.tag_data[i]['name'], self.tag_data[i]['tags'])
filtered_down = list(filter(lambda x: x is not None and x != '', dir_tags))
if not filtered_down:
deletable.add(i)
for i in deletable:
del self.tag_data[i]
# TODO
# Unbork this
class FileSystemProxyModel(QtCore.QSortFilterProxyModel):
def __init__(self, parent=None):
super(FileSystemProxyModel, self).__init__(parent)
def filterAcceptsRow(self, row_num, parent):
model = self.sourceModel()
filter_out = [
'boot', 'dev', 'etc', 'lost+found', 'opt', 'pdb',
'proc', 'root', 'run', 'srv', 'sys', 'tmp', 'twonky',
'usr', 'var', 'bin', 'kdeinit5__0', 'lib', 'lib64', 'sbin']
name_index = model.index(row_num, 0)
valid_data = model.data(name_index)
print(valid_data)
return True
try:
if valid_data in filter_out:
return False
except AttributeError:
pass
return True

View File

106
lector/parsers/cbr.py Normal file
View File

@@ -0,0 +1,106 @@
#!/usr/bin/env python3
# This file is a part of Lector, a Qt based ebook reader
# Copyright (C) 2017 BasioMeusPuga
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# TODO
# Account for files with passwords
import os
import time
import collections
from rarfile import rarfile
class ParseCBR:
def __init__(self, filename, temp_dir, file_md5):
self.filename = filename
self.book = None
self.temp_dir = temp_dir
self.file_md5 = file_md5
def read_book(self):
try:
self.book = rarfile.RarFile(self.filename)
except: # Specifying no exception types might be warranted here
print('Cannot parse ' + self.filename)
return
def get_title(self):
filename = os.path.basename(self.filename)
filename_proper = os.path.splitext(filename)[0]
return filename_proper
def get_author(self):
return None
def get_year(self):
creation_time = time.ctime(os.path.getctime(self.filename))
creation_year = creation_time.split()[-1]
return creation_year
def get_cover_image(self):
# The first image in the archive may not be the cover
# It is implied, however, that the first image in order
# will be the cover
image_list = [i.filename for i in self.book.infolist() if not i.isdir()]
image_list.sort()
cover_image_filename = image_list[0]
for i in self.book.infolist():
if not i.isdir():
if i.filename == cover_image_filename:
cover_image = self.book.read(i)
return cover_image
def get_isbn(self):
return
def get_tags(self):
return
def get_contents(self):
file_settings = {
'images_only': True}
extract_path = os.path.join(self.temp_dir, self.file_md5)
contents = []
# I'm currently choosing not to keep multiple files in memory
self.book.extractall(extract_path)
found_images = []
for i in os.walk(extract_path):
if i[2]: # Implies files were found
image_dir = i[0]
add_path_to_file = [
os.path.join(image_dir, j) for j in i[2]]
found_images.extend(add_path_to_file)
if not found_images:
print('Found nothing in ' + self.filename)
return None, file_settings
found_images.sort()
for count, i in enumerate(found_images):
page_name = 'Page ' + str(count + 1)
image_path = os.path.join(extract_path, i)
contents.append((page_name, image_path))
return contents, file_settings

109
lector/parsers/cbz.py Normal file
View File

@@ -0,0 +1,109 @@
#!/usr/bin/env python3
# This file is a part of Lector, a Qt based ebook reader
# Copyright (C) 2017 BasioMeusPuga
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# TODO
# Account for files with passwords
import os
import time
import zipfile
import collections
class ParseCBZ:
def __init__(self, filename, temp_dir, file_md5):
self.filename = filename
self.book = None
self.temp_dir = temp_dir
self.file_md5 = file_md5
def read_book(self):
try:
self.book = zipfile.ZipFile(self.filename, mode='r', allowZip64=True)
except FileNotFoundError:
print('Invalid path for ' + self.filename)
return
except (KeyError, AttributeError, zipfile.BadZipFile):
print('Cannot parse ' + self.filename)
return
def get_title(self):
filename = os.path.basename(self.book.filename)
filename_proper = os.path.splitext(filename)[0]
return filename_proper
def get_author(self):
return None
def get_year(self):
creation_time = time.ctime(os.path.getctime(self.filename))
creation_year = creation_time.split()[-1]
return creation_year
def get_cover_image(self):
# The first image in the archive may not be the cover
# It is implied, however, that the first image in order
# will be the cover
image_list = [i.filename for i in self.book.infolist() if not i.is_dir()]
image_list.sort()
cover_image_filename = image_list[0]
for i in self.book.infolist():
if not i.is_dir():
if i.filename == cover_image_filename:
cover_image = self.book.read(i)
return cover_image
def get_isbn(self):
return
def get_tags(self):
return
def get_contents(self):
file_settings = {
'images_only': True}
extract_path = os.path.join(self.temp_dir, self.file_md5)
contents = []
# I'm currently choosing not to keep multiple files in memory
self.book.extractall(extract_path)
found_images = []
for i in os.walk(extract_path):
if i[2]: # Implies files were found
image_dir = i[0]
add_path_to_file = [
os.path.join(image_dir, j) for j in i[2]]
found_images.extend(add_path_to_file)
if not found_images:
print('Found nothing in ' + self.filename)
return None, file_settings
found_images.sort()
for count, i in enumerate(found_images):
page_name = 'Page ' + str(count + 1)
image_path = os.path.join(extract_path, i)
contents.append((page_name, image_path))
return contents, file_settings

68
lector/parsers/epub.py Normal file
View File

@@ -0,0 +1,68 @@
#!/usr/bin/env python3
# This file is a part of Lector, a Qt based ebook reader
# Copyright (C) 2017 BasioMeusPuga
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import os
import zipfile
from ePub.read_epub import EPUB
class ParseEPUB:
def __init__(self, filename, temp_dir, file_md5):
# TODO
# Maybe also include book description
self.book_ref = None
self.book = None
self.temp_dir = temp_dir
self.filename = filename
self.file_md5 = file_md5
def read_book(self):
self.book_ref = EPUB(self.filename)
contents_found = self.book_ref.read_epub()
if not contents_found:
print('Cannot process: ' + self.filename)
return
self.book = self.book_ref.book
def get_title(self):
return self.book['title']
def get_author(self):
return self.book['author']
def get_year(self):
return self.book['year']
def get_cover_image(self):
return self.book['cover']
def get_isbn(self):
return self.book['isbn']
def get_tags(self):
return self.book['tags']
def get_contents(self):
extract_path = os.path.join(self.temp_dir, self.file_md5)
zipfile.ZipFile(self.filename).extractall(extract_path)
self.book_ref.parse_chapters(temp_dir=self.temp_dir)
file_settings = {
'images_only': False}
return self.book['book_list'], file_settings

98
lector/parsers/mobi.py Normal file
View File

@@ -0,0 +1,98 @@
#!/usr/bin/env python3
# This file is a part of Lector, a Qt based ebook reader
# Copyright (C) 2017 BasioMeusPuga
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# This module parses Amazon ebooks using KindleUnpack to first create an
# epub that is then read the usual way
import os
import sys
import shutil
import zipfile
from ePub.read_epub import EPUB
import KindleUnpack.kindleunpack as KindleUnpack
class ParseMOBI:
def __init__(self, filename, temp_dir, file_md5):
self.book_ref = None
self.book = None
self.filename = filename
self.epub_filepath = None
self.split_large_xml = False
self.temp_dir = temp_dir
self.extract_dir = os.path.join(temp_dir, file_md5)
def read_book(self):
with HidePrinting():
KindleUnpack.unpackBook(self.filename, self.extract_dir)
epub_filename = os.path.splitext(
os.path.basename(self.filename))[0] + '.epub'
self.epub_filepath = os.path.join(
self.extract_dir, 'mobi8', epub_filename)
if not os.path.exists(self.epub_filepath):
zip_dir = os.path.join(self.extract_dir, 'mobi7')
zip_file = os.path.join(
self.extract_dir, epub_filename)
self.epub_filepath = shutil.make_archive(zip_file, 'zip', zip_dir)
self.split_large_xml = True
self.book_ref = EPUB(self.epub_filepath)
contents_found = self.book_ref.read_epub()
if not contents_found:
print('Cannot process: ' + self.filename)
return
self.book = self.book_ref.book
def get_title(self):
return self.book['title']
def get_author(self):
return self.book['author']
def get_year(self):
return self.book['year']
def get_cover_image(self):
return self.book['cover']
def get_isbn(self):
return self.book['isbn']
def get_tags(self):
return self.book['tags']
def get_contents(self):
extract_path = os.path.join(self.extract_dir)
zipfile.ZipFile(self.epub_filepath).extractall(extract_path)
self.book_ref.parse_chapters(
temp_dir=self.temp_dir, split_large_xml=self.split_large_xml)
file_settings = {
'images_only': False}
return self.book['book_list'], file_settings
class HidePrinting:
def __enter__(self):
self._original_stdout = sys.stdout
sys.stdout = None
def __exit__(self, exc_type, exc_val, exc_tb):
sys.stdout = self._original_stdout

15
lector/rarfile/LICENSE Normal file
View File

@@ -0,0 +1,15 @@
Copyright (c) 2005-2016 Marko Kreen <markokr@gmail.com>
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

View File

556
lector/rarfile/dumprar.py Executable file
View File

@@ -0,0 +1,556 @@
#! /usr/bin/env python
"""Dump archive contents, test extraction."""
from __future__ import division, absolute_import, print_function
import io
import sys
import getopt
from datetime import datetime
import rarfile as rf
usage = """
dumprar [switches] [ARC1 ARC2 ...] [@ARCLIST]
switches:
@file read archive names from file
-pPSW set password
-Ccharset set fallback charset
-v increase verbosity
-t attempt to read all files
-x write read files out
-c show archive comment
-h show usage
-- stop switch parsing
""".strip()
os_list = ['DOS', 'OS2', 'WIN', 'UNIX', 'MACOS', 'BEOS']
block_strs = ['MARK', 'MAIN', 'FILE', 'OLD_COMMENT', 'OLD_EXTRA',
'OLD_SUB', 'OLD_RECOVERY', 'OLD_AUTH', 'SUB', 'ENDARC']
r5_block_types = {
rf.RAR5_BLOCK_MAIN: 'R5_MAIN',
rf.RAR5_BLOCK_FILE: 'R5_FILE',
rf.RAR5_BLOCK_SERVICE: 'R5_SVC',
rf.RAR5_BLOCK_ENCRYPTION: 'R5_ENC',
rf.RAR5_BLOCK_ENDARC: 'R5_ENDARC',
}
def rar3_type(btype):
"""RAR3 type code as string."""
if btype < rf.RAR_BLOCK_MARK or btype > rf.RAR_BLOCK_ENDARC:
return "*UNKNOWN*"
return block_strs[btype - rf.RAR_BLOCK_MARK]
def rar5_type(btype):
"""RAR5 type code as string."""
return r5_block_types.get(btype, '*UNKNOWN*')
main_bits = (
(rf.RAR_MAIN_VOLUME, "VOL"),
(rf.RAR_MAIN_COMMENT, "COMMENT"),
(rf.RAR_MAIN_LOCK, "LOCK"),
(rf.RAR_MAIN_SOLID, "SOLID"),
(rf.RAR_MAIN_NEWNUMBERING, "NEWNR"),
(rf.RAR_MAIN_AUTH, "AUTH"),
(rf.RAR_MAIN_RECOVERY, "RECOVERY"),
(rf.RAR_MAIN_PASSWORD, "PASSWORD"),
(rf.RAR_MAIN_FIRSTVOLUME, "FIRSTVOL"),
(rf.RAR_SKIP_IF_UNKNOWN, "SKIP"),
(rf.RAR_LONG_BLOCK, "LONG"),
)
endarc_bits = (
(rf.RAR_ENDARC_NEXT_VOLUME, "NEXTVOL"),
(rf.RAR_ENDARC_DATACRC, "DATACRC"),
(rf.RAR_ENDARC_REVSPACE, "REVSPACE"),
(rf.RAR_ENDARC_VOLNR, "VOLNR"),
(rf.RAR_SKIP_IF_UNKNOWN, "SKIP"),
(rf.RAR_LONG_BLOCK, "LONG"),
)
file_bits = (
(rf.RAR_FILE_SPLIT_BEFORE, "SPLIT_BEFORE"),
(rf.RAR_FILE_SPLIT_AFTER, "SPLIT_AFTER"),
(rf.RAR_FILE_PASSWORD, "PASSWORD"),
(rf.RAR_FILE_COMMENT, "COMMENT"),
(rf.RAR_FILE_SOLID, "SOLID"),
(rf.RAR_FILE_LARGE, "LARGE"),
(rf.RAR_FILE_UNICODE, "UNICODE"),
(rf.RAR_FILE_SALT, "SALT"),
(rf.RAR_FILE_VERSION, "VERSION"),
(rf.RAR_FILE_EXTTIME, "EXTTIME"),
(rf.RAR_FILE_EXTFLAGS, "EXTFLAGS"),
(rf.RAR_SKIP_IF_UNKNOWN, "SKIP"),
(rf.RAR_LONG_BLOCK, "LONG"),
)
generic_bits = (
(rf.RAR_SKIP_IF_UNKNOWN, "SKIP"),
(rf.RAR_LONG_BLOCK, "LONG"),
)
file_parms = ("D64", "D128", "D256", "D512",
"D1024", "D2048", "D4096", "DIR")
r5_block_flags = (
(rf.RAR5_BLOCK_FLAG_EXTRA_DATA, 'EXTRA'),
(rf.RAR5_BLOCK_FLAG_DATA_AREA, 'DATA'),
(rf.RAR5_BLOCK_FLAG_SKIP_IF_UNKNOWN, 'SKIP'),
(rf.RAR5_BLOCK_FLAG_SPLIT_BEFORE, 'SPLIT_BEFORE'),
(rf.RAR5_BLOCK_FLAG_SPLIT_AFTER, 'SPLIT_AFTER'),
(rf.RAR5_BLOCK_FLAG_DEPENDS_PREV, 'DEPENDS'),
(rf.RAR5_BLOCK_FLAG_KEEP_WITH_PARENT, 'KEEP'),
)
r5_main_flags = (
(rf.RAR5_MAIN_FLAG_ISVOL, 'ISVOL'),
(rf.RAR5_MAIN_FLAG_HAS_VOLNR, 'VOLNR'),
(rf.RAR5_MAIN_FLAG_SOLID, 'SOLID'),
(rf.RAR5_MAIN_FLAG_RECOVERY, 'RECOVERY'),
(rf.RAR5_MAIN_FLAG_LOCKED, 'LOCKED'),
)
r5_file_flags = (
(rf.RAR5_FILE_FLAG_ISDIR, 'DIR'),
(rf.RAR5_FILE_FLAG_HAS_MTIME, 'MTIME'),
(rf.RAR5_FILE_FLAG_HAS_CRC32, 'CRC32'),
(rf.RAR5_FILE_FLAG_UNKNOWN_SIZE, 'NOSIZE'),
)
r5_enc_flags = (
(rf.RAR5_ENC_FLAG_HAS_CHECKVAL, 'CHECKVAL'),
)
r5_endarc_flags = (
(rf.RAR5_ENDARC_FLAG_NEXT_VOL, 'NEXTVOL'),
)
r5_file_enc_flags = (
(rf.RAR5_XENC_CHECKVAL, 'CHECKVAL'),
(rf.RAR5_XENC_TWEAKED, 'TWEAKED'),
)
r5_file_redir_types = {
rf.RAR5_XREDIR_UNIX_SYMLINK: 'UNIX_SYMLINK',
rf.RAR5_XREDIR_WINDOWS_SYMLINK: 'WINDOWS_SYMLINK',
rf.RAR5_XREDIR_WINDOWS_JUNCTION: 'WINDOWS_JUNCTION',
rf.RAR5_XREDIR_HARD_LINK: 'HARD_LINK',
rf.RAR5_XREDIR_FILE_COPY: 'FILE_COPY',
}
r5_file_redir_flags = (
(rf.RAR5_XREDIR_ISDIR, 'DIR'),
)
def xprint(m, *args):
"""Print string to stdout.
Format unicode safely.
"""
if sys.hexversion < 0x3000000:
m = m.decode('utf8')
if args:
m = m % args
if sys.hexversion < 0x3000000:
m = m.encode('utf8')
sys.stdout.write(m)
sys.stdout.write('\n')
def render_flags(flags, bit_list):
"""Show bit names.
"""
res = []
known = 0
for bit in bit_list:
known = known | bit[0]
if flags & bit[0]:
res.append(bit[1])
unknown = flags & ~known
n = 0
while unknown:
if unknown & 1:
res.append("UNK_%04x" % (1 << n))
unknown = unknown >> 1
n += 1
if not res:
return '-'
return ",".join(res)
def get_file_flags(flags):
"""Show flag names and handle dict size.
"""
res = render_flags(flags & ~rf.RAR_FILE_DICTMASK, file_bits)
xf = (flags & rf.RAR_FILE_DICTMASK) >> 5
res += "," + file_parms[xf]
return res
def fmt_time(t):
"""Format time.
"""
if t is None:
return '(-)'
if isinstance(t, datetime):
return t.isoformat('T')
return "%04d-%02d-%02d %02d:%02d:%02d" % t
def show_item(h):
"""Show any RAR3/5 record.
"""
if isinstance(h, rf.Rar3Info):
show_item_v3(h)
elif isinstance(h, rf.Rar5Info):
show_item_v5(h)
else:
xprint('Unknown info record')
def show_item_v3(h):
"""Show any RAR3 record.
"""
st = rar3_type(h.type)
xprint("%s: hdrlen=%d datlen=%d", st, h.header_size, h.add_size)
if h.type in (rf.RAR_BLOCK_FILE, rf.RAR_BLOCK_SUB):
if h.host_os == rf.RAR_OS_UNIX:
s_mode = "0%o" % h.mode
else:
s_mode = "0x%x" % h.mode
xprint(" flags=0x%04x:%s", h.flags, get_file_flags(h.flags))
if h.host_os >= 0 and h.host_os < len(os_list):
s_os = os_list[h.host_os]
else:
s_os = "?"
xprint(" os=%d:%s ver=%d mode=%s meth=%c cmp=%d dec=%d vol=%d",
h.host_os, s_os,
h.extract_version, s_mode, h.compress_type,
h.compress_size, h.file_size, h.volume)
ucrc = (h.CRC + (1 << 32)) & ((1 << 32) - 1)
xprint(" crc=0x%08x (%d) date_time=%s", ucrc, h.CRC, fmt_time(h.date_time))
xprint(" name=%s", h.filename)
if h.mtime:
xprint(" mtime=%s", fmt_time(h.mtime))
if h.ctime:
xprint(" ctime=%s", fmt_time(h.ctime))
if h.atime:
xprint(" atime=%s", fmt_time(h.atime))
if h.arctime:
xprint(" arctime=%s", fmt_time(h.arctime))
elif h.type == rf.RAR_BLOCK_MAIN:
xprint(" flags=0x%04x:%s", h.flags, render_flags(h.flags, main_bits))
elif h.type == rf.RAR_BLOCK_ENDARC:
xprint(" flags=0x%04x:%s", h.flags, render_flags(h.flags, endarc_bits))
elif h.type == rf.RAR_BLOCK_MARK:
xprint(" flags=0x%04x:", h.flags)
else:
xprint(" flags=0x%04x:%s", h.flags, render_flags(h.flags, generic_bits))
if h.comment is not None:
cm = repr(h.comment)
if cm[0] == 'u':
cm = cm[1:]
xprint(" comment=%s", cm)
def show_item_v5(h):
"""Show any RAR5 record.
"""
st = rar5_type(h.block_type)
xprint("%s: hdrlen=%d datlen=%d hdr_extra=%d", st, h.header_size,
h.compress_size, h.block_extra_size)
xprint(" block_flags=0x%04x:%s", h.block_flags, render_flags(h.block_flags, r5_block_flags))
if h.block_type in (rf.RAR5_BLOCK_FILE, rf.RAR5_BLOCK_SERVICE):
xprint(" name=%s", h.filename)
if h.file_host_os == rf.RAR5_OS_UNIX:
s_os = 'UNIX'
s_mode = "0%o" % h.mode
else:
s_os = 'WINDOWS'
s_mode = "0x%x" % h.mode
xprint(" file_flags=0x%04x:%s", h.file_flags, render_flags(h.file_flags, r5_file_flags))
cmp_flags = h.file_compress_flags
xprint(" cmp_algo=%d cmp_meth=%d dict=%d solid=%r",
cmp_flags & 0x3f,
(cmp_flags >> 7) & 0x07,
cmp_flags >> 10,
cmp_flags & rf.RAR5_COMPR_SOLID > 0)
xprint(" os=%d:%s mode=%s cmp=%r dec=%r vol=%r",
h.file_host_os, s_os, s_mode,
h.compress_size, h.file_size, h.volume)
if h.CRC is not None:
xprint(" crc=0x%08x (%d)", h.CRC, h.CRC)
if h.blake2sp_hash is not None:
xprint(" blake2sp=%s", rf.tohex(h.blake2sp_hash))
if h.date_time is not None:
xprint(" date_time=%s", fmt_time(h.date_time))
if h.mtime:
xprint(" mtime=%s", fmt_time(h.mtime))
if h.ctime:
xprint(" ctime=%s", fmt_time(h.ctime))
if h.atime:
xprint(" atime=%s", fmt_time(h.atime))
if h.arctime:
xprint(" arctime=%s", fmt_time(h.arctime))
if h.flags & rf.RAR_FILE_PASSWORD:
enc_algo, enc_flags, kdf_count, salt, iv, checkval = h.file_encryption
algo_name = 'AES256' if enc_algo == rf.RAR5_XENC_CIPHER_AES256 else 'UnknownAlgo'
xprint(' algo=%d:%s enc_flags=%04x:%s kdf_lg=%d kdf_count=%d salt=%s iv=%s checkval=%s',
enc_algo, algo_name, enc_flags, render_flags(enc_flags, r5_file_enc_flags),
kdf_count, 1 << kdf_count, rf.tohex(salt), rf.tohex(iv),
checkval and rf.tohex(checkval) or '-')
if h.file_redir:
redir_type, redir_flags, redir_name = h.file_redir
xprint(' redir: type=%s flags=%d:%s destination=%s',
r5_file_redir_types.get(redir_type, 'Unknown'),
redir_flags, render_flags(redir_flags, r5_file_redir_flags),
redir_name)
if h.file_owner:
uname, gname, uid, gid = h.file_owner
xprint(' owner: name=%r group=%r uid=%r gid=%r',
uname, gname, uid, gid)
if h.file_version:
flags, version = h.file_version
xprint(' version: flags=%r version=%r', flags, version)
elif h.block_type == rf.RAR5_BLOCK_MAIN:
xprint(" flags=0x%04x:%s", h.flags, render_flags(h.main_flags, r5_main_flags))
elif h.block_type == rf.RAR5_BLOCK_ENDARC:
xprint(" flags=0x%04x:%s", h.flags, render_flags(h.endarc_flags, r5_endarc_flags))
elif h.block_type == rf.RAR5_BLOCK_ENCRYPTION:
algo_name = 'AES256' if h.encryption_algo == rf.RAR5_XENC_CIPHER_AES256 else 'UnknownAlgo'
xprint(" algo=%d:%s flags=0x%04x:%s", h.encryption_algo, algo_name, h.flags,
render_flags(h.encryption_flags, r5_enc_flags))
xprint(" kdf_lg=%d kdf_count=%d", h.encryption_kdf_count, 1 << h.encryption_kdf_count)
xprint(" salt=%s", rf.tohex(h.encryption_salt))
else:
xprint(" - missing info -")
if h.comment is not None:
cm = repr(h.comment)
if cm[0] == 'u':
cm = cm[1:]
xprint(" comment=%s", cm)
cf_show_comment = 0
cf_verbose = 0
cf_charset = None
cf_extract = 0
cf_test_read = 0
cf_test_unrar = 0
cf_test_memory = 0
def check_crc(f, inf, desc):
"""Compare result crc to expected value.
"""
exp = inf._md_expect
if exp is None:
return
ucrc = f._md_context.digest()
if ucrc != exp:
print('crc error - %s - exp=%r got=%r' % (desc, exp, ucrc))
def test_read_long(r, inf):
"""Test read and readinto.
"""
md_class = inf._md_class or rf.NoHashContext
bctx = md_class()
f = r.open(inf.filename)
total = 0
while 1:
data = f.read(8192)
if not data:
break
bctx.update(data)
total += len(data)
if total != inf.file_size:
xprint("\n *** %s has corrupt file: %s ***", r.rarfile, inf.filename)
xprint(" *** short read: got=%d, need=%d ***\n", total, inf.file_size)
check_crc(f, inf, 'read')
bhash = bctx.hexdigest()
if cf_verbose > 1:
if f._md_context.digest() == inf._md_expect:
#xprint(" checkhash: %r", bhash)
pass
else:
xprint(" checkhash: %r got=%r exp=%r cls=%r\n",
bhash, f._md_context.digest(), inf._md_expect, inf._md_class)
# test .seek() & .readinto()
if cf_test_read > 1:
f.seek(0, 0)
total = 0
buf = bytearray(rf.ZERO * 1024)
while 1:
res = f.readinto(buf)
if not res:
break
total += res
if inf.file_size != total:
xprint(" *** readinto failed: got=%d, need=%d ***\n", total, inf.file_size)
#check_crc(f, inf, 'readinto')
f.close()
def test_read(r, inf):
"""Test file read."""
test_read_long(r, inf)
def test_real(fn, psw):
"""Actual archive processing.
"""
xprint("Archive: %s", fn)
cb = None
if cf_verbose > 1:
cb = show_item
rfarg = fn
if cf_test_memory:
rfarg = io.BytesIO(open(fn, 'rb').read())
# check if rar
if not rf.is_rarfile(rfarg):
xprint(" --- %s is not a RAR file ---", fn)
return
# open
r = rf.RarFile(rfarg, charset=cf_charset, info_callback=cb)
# set password
if r.needs_password():
if psw:
r.setpassword(psw)
else:
xprint(" --- %s requires password ---", fn)
return
# show comment
if cf_show_comment and r.comment:
for ln in r.comment.split('\n'):
xprint(" %s", ln)
elif cf_verbose > 0 and r.comment:
cm = repr(r.comment)
if cm[0] == 'u':
cm = cm[1:]
xprint(" comment=%s", cm)
# process
for n in r.namelist():
inf = r.getinfo(n)
if inf.isdir():
continue
if cf_verbose == 1:
show_item(inf)
if cf_test_read:
test_read(r, inf)
if cf_extract:
r.extractall()
for inf in r.infolist():
r.extract(inf)
if cf_test_unrar:
r.testrar()
def test(fn, psw):
"""Process one archive with error handling.
"""
try:
test_real(fn, psw)
except rf.NeedFirstVolume:
xprint(" --- %s is middle part of multi-vol archive ---", fn)
except rf.Error:
exc, msg, tb = sys.exc_info()
xprint("\n *** %s: %s ***\n", exc.__name__, msg)
del tb
except IOError:
exc, msg, tb = sys.exc_info()
xprint("\n *** %s: %s ***\n", exc.__name__, msg)
del tb
def main():
"""Program entry point.
"""
global cf_verbose, cf_show_comment, cf_charset
global cf_extract, cf_test_read, cf_test_unrar
global cf_test_memory
psw = None
# parse args
try:
opts, args = getopt.getopt(sys.argv[1:], 'p:C:hvcxtRM')
except getopt.error as ex:
print(str(ex), file=sys.stderr)
sys.exit(1)
for o, v in opts:
if o == '-p':
psw = v
elif o == '-h':
xprint(usage)
return
elif o == '-v':
cf_verbose += 1
elif o == '-c':
cf_show_comment = 1
elif o == '-x':
cf_extract = 1
elif o == '-t':
cf_test_read += 1
elif o == '-T':
cf_test_unrar = 1
elif o == '-M':
cf_test_memory = 1
elif o == '-C':
cf_charset = v
else:
raise Exception("unhandled switch: " + o)
args2 = []
for a in args:
if a[0] == "@":
for ln in open(a[1:], 'r'):
fn = ln[:-1]
args2.append(fn)
else:
args2.append(a)
args = args2
if not args:
xprint(usage)
# pypy .readinto()+memoryview() is buggy
#if cf_test_read > 1 and hasattr(sys, 'pypy_version_info'):
# cf_test_read = 1
for fn in args:
test(fn, psw)
if __name__ == '__main__':
try:
main()
except KeyboardInterrupt:
pass

3011
lector/rarfile/rarfile.py Normal file

File diff suppressed because it is too large Load Diff

View File

View File

@@ -0,0 +1,14 @@
<html>
<head>
<title></title>
<meta content="HTML is not a programming language">
<style></style>
</head>
<body><h1 style="text-align: center;">Lector</h1>
<h2 style="text-align: center;">A Qt Based ebook reader</h2>
<p>&nbsp;</p>
<p>Author: BasioMeusPuga <a href="mailto:disgruntled.mob@gmail.com">disgruntled.mob@gmail.com</a></p>
<p>Page:&nbsp;<a href="https://github.com/BasioMeusPuga/Lector">https://github.com/BasioMeusPuga/Lector</a></p>
<p>License: GPLv3&nbsp;<a href="https://www.gnu.org/licenses/gpl-3.0.en.html">https://www.gnu.org/licenses/gpl-3.0.en.html</a></p>
<p>&nbsp;</p></body>
</html>

View File

@@ -0,0 +1,67 @@
# -*- coding: utf-8 -*-
# Form implementation generated from reading ui file 'raw/definition.ui'
#
# Created by: PyQt5 UI code generator 5.10.1
#
# WARNING! All changes made in this file will be lost!
from PyQt5 import QtCore, QtGui, QtWidgets
class Ui_Dialog(object):
def setupUi(self, Dialog):
Dialog.setObjectName("Dialog")
Dialog.resize(729, 318)
self.gridLayout = QtWidgets.QGridLayout(Dialog)
self.gridLayout.setObjectName("gridLayout")
self.verticalLayout = QtWidgets.QVBoxLayout()
self.verticalLayout.setObjectName("verticalLayout")
self.definitionView = QtWidgets.QTextBrowser(Dialog)
self.definitionView.setFrameShape(QtWidgets.QFrame.NoFrame)
self.definitionView.setFrameShadow(QtWidgets.QFrame.Plain)
self.definitionView.setObjectName("definitionView")
self.verticalLayout.addWidget(self.definitionView)
self.horizontalLayout = QtWidgets.QHBoxLayout()
self.horizontalLayout.setObjectName("horizontalLayout")
spacerItem = QtWidgets.QSpacerItem(40, 20, QtWidgets.QSizePolicy.Expanding, QtWidgets.QSizePolicy.Minimum)
self.horizontalLayout.addItem(spacerItem)
self.okButton = QtWidgets.QPushButton(Dialog)
self.okButton.setText("")
icon = QtGui.QIcon()
icon.addPixmap(QtGui.QPixmap(":/images/checkmark.svg"), QtGui.QIcon.Normal, QtGui.QIcon.Off)
self.okButton.setIcon(icon)
self.okButton.setIconSize(QtCore.QSize(24, 24))
self.okButton.setFlat(True)
self.okButton.setObjectName("okButton")
self.horizontalLayout.addWidget(self.okButton)
self.pronounceButton = QtWidgets.QPushButton(Dialog)
self.pronounceButton.setText("")
icon1 = QtGui.QIcon()
icon1.addPixmap(QtGui.QPixmap(":/images/QMPlay2.svg"), QtGui.QIcon.Normal, QtGui.QIcon.Off)
self.pronounceButton.setIcon(icon1)
self.pronounceButton.setIconSize(QtCore.QSize(24, 24))
self.pronounceButton.setFlat(True)
self.pronounceButton.setObjectName("pronounceButton")
self.horizontalLayout.addWidget(self.pronounceButton)
spacerItem1 = QtWidgets.QSpacerItem(40, 20, QtWidgets.QSizePolicy.Expanding, QtWidgets.QSizePolicy.Minimum)
self.horizontalLayout.addItem(spacerItem1)
self.dialogBackground = QtWidgets.QPushButton(Dialog)
self.dialogBackground.setText("")
icon2 = QtGui.QIcon()
icon2.addPixmap(QtGui.QPixmap(":/images/color.svg"), QtGui.QIcon.Normal, QtGui.QIcon.Off)
self.dialogBackground.setIcon(icon2)
self.dialogBackground.setIconSize(QtCore.QSize(27, 27))
self.dialogBackground.setFlat(True)
self.dialogBackground.setObjectName("dialogBackground")
self.horizontalLayout.addWidget(self.dialogBackground)
self.verticalLayout.addLayout(self.horizontalLayout)
self.gridLayout.addLayout(self.verticalLayout, 0, 0, 1, 1)
self.retranslateUi(Dialog)
QtCore.QMetaObject.connectSlotsByName(Dialog)
def retranslateUi(self, Dialog):
_translate = QtCore.QCoreApplication.translate
Dialog.setWindowTitle(_translate("Dialog", "Dialog"))
self.okButton.setToolTip(_translate("Dialog", "WERDS"))
self.pronounceButton.setToolTip(_translate("Dialog", "Play pronunciation of root word"))

View File

@@ -0,0 +1,96 @@
# -*- coding: utf-8 -*-
# Form implementation generated from reading ui file 'raw/main.ui'
#
# Created by: PyQt5 UI code generator 5.9.2
#
# WARNING! All changes made in this file will be lost!
from PyQt5 import QtCore, QtGui, QtWidgets
class Ui_MainWindow(object):
def setupUi(self, MainWindow):
MainWindow.setObjectName("MainWindow")
MainWindow.resize(1049, 720)
self.centralwidget = QtWidgets.QWidget(MainWindow)
self.centralwidget.setObjectName("centralwidget")
self.gridLayout = QtWidgets.QGridLayout(self.centralwidget)
self.gridLayout.setObjectName("gridLayout")
self.horizontalLayout = QtWidgets.QHBoxLayout()
self.horizontalLayout.setObjectName("horizontalLayout")
self.tabWidget = QtWidgets.QTabWidget(self.centralwidget)
self.tabWidget.setTabsClosable(True)
self.tabWidget.setObjectName("tabWidget")
self.tab = QtWidgets.QWidget()
self.tab.setObjectName("tab")
self.gridLayout_2 = QtWidgets.QGridLayout(self.tab)
self.gridLayout_2.setObjectName("gridLayout_2")
self.stackedWidget = QtWidgets.QStackedWidget(self.tab)
self.stackedWidget.setFrameShape(QtWidgets.QFrame.NoFrame)
self.stackedWidget.setLineWidth(0)
self.stackedWidget.setObjectName("stackedWidget")
self.listPage = QtWidgets.QWidget()
self.listPage.setObjectName("listPage")
self.gridLayout_4 = QtWidgets.QGridLayout(self.listPage)
self.gridLayout_4.setContentsMargins(0, 0, 0, 0)
self.gridLayout_4.setSpacing(0)
self.gridLayout_4.setObjectName("gridLayout_4")
self.listView = QtWidgets.QListView(self.listPage)
self.listView.setFrameShape(QtWidgets.QFrame.NoFrame)
self.listView.setEditTriggers(QtWidgets.QAbstractItemView.NoEditTriggers)
self.listView.setProperty("showDropIndicator", False)
self.listView.setSelectionMode(QtWidgets.QAbstractItemView.ExtendedSelection)
self.listView.setMovement(QtWidgets.QListView.Static)
self.listView.setProperty("isWrapping", True)
self.listView.setResizeMode(QtWidgets.QListView.Fixed)
self.listView.setLayoutMode(QtWidgets.QListView.SinglePass)
self.listView.setViewMode(QtWidgets.QListView.IconMode)
self.listView.setUniformItemSizes(True)
self.listView.setWordWrap(True)
self.listView.setObjectName("listView")
self.gridLayout_4.addWidget(self.listView, 0, 0, 1, 1)
self.stackedWidget.addWidget(self.listPage)
self.tablePage = QtWidgets.QWidget()
self.tablePage.setObjectName("tablePage")
self.gridLayout_3 = QtWidgets.QGridLayout(self.tablePage)
self.gridLayout_3.setContentsMargins(0, 0, 0, 0)
self.gridLayout_3.setSpacing(0)
self.gridLayout_3.setObjectName("gridLayout_3")
self.tableView = QtWidgets.QTableView(self.tablePage)
self.tableView.setFrameShape(QtWidgets.QFrame.Box)
self.tableView.setFrameShadow(QtWidgets.QFrame.Plain)
self.tableView.setSizeAdjustPolicy(QtWidgets.QAbstractScrollArea.AdjustToContentsOnFirstShow)
self.tableView.setEditTriggers(QtWidgets.QAbstractItemView.DoubleClicked|QtWidgets.QAbstractItemView.EditKeyPressed|QtWidgets.QAbstractItemView.SelectedClicked)
self.tableView.setAlternatingRowColors(True)
self.tableView.setSelectionBehavior(QtWidgets.QAbstractItemView.SelectRows)
self.tableView.setGridStyle(QtCore.Qt.NoPen)
self.tableView.setSortingEnabled(True)
self.tableView.setWordWrap(False)
self.tableView.setObjectName("tableView")
self.tableView.horizontalHeader().setVisible(True)
self.tableView.verticalHeader().setVisible(False)
self.gridLayout_3.addWidget(self.tableView, 0, 0, 1, 1)
self.stackedWidget.addWidget(self.tablePage)
self.gridLayout_2.addWidget(self.stackedWidget, 0, 0, 1, 1)
self.tabWidget.addTab(self.tab, "")
self.horizontalLayout.addWidget(self.tabWidget)
self.gridLayout.addLayout(self.horizontalLayout, 0, 0, 1, 1)
MainWindow.setCentralWidget(self.centralwidget)
self.statusBar = QtWidgets.QStatusBar(MainWindow)
sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Expanding, QtWidgets.QSizePolicy.Preferred)
sizePolicy.setHorizontalStretch(0)
sizePolicy.setVerticalStretch(0)
sizePolicy.setHeightForWidth(self.statusBar.sizePolicy().hasHeightForWidth())
self.statusBar.setSizePolicy(sizePolicy)
self.statusBar.setMinimumSize(QtCore.QSize(0, 0))
self.statusBar.setObjectName("statusBar")
MainWindow.setStatusBar(self.statusBar)
self.retranslateUi(MainWindow)
self.tabWidget.setCurrentIndex(0)
QtCore.QMetaObject.connectSlotsByName(MainWindow)
def retranslateUi(self, MainWindow):
_translate = QtCore.QCoreApplication.translate
MainWindow.setWindowTitle(_translate("MainWindow", "Lector"))
self.tabWidget.setTabText(self.tabWidget.indexOf(self.tab), _translate("MainWindow", "Library"))

View File

@@ -0,0 +1,99 @@
# -*- coding: utf-8 -*-
# Form implementation generated from reading ui file 'raw/metadata.ui'
#
# Created by: PyQt5 UI code generator 5.10.1
#
# WARNING! All changes made in this file will be lost!
from PyQt5 import QtCore, QtGui, QtWidgets
class Ui_Dialog(object):
def setupUi(self, Dialog):
Dialog.setObjectName("Dialog")
Dialog.resize(728, 234)
sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Expanding, QtWidgets.QSizePolicy.Expanding)
sizePolicy.setHorizontalStretch(0)
sizePolicy.setVerticalStretch(0)
sizePolicy.setHeightForWidth(Dialog.sizePolicy().hasHeightForWidth())
Dialog.setSizePolicy(sizePolicy)
Dialog.setMaximumSize(QtCore.QSize(16777215, 16777215))
Dialog.setModal(True)
self.gridLayout = QtWidgets.QGridLayout(Dialog)
self.gridLayout.setObjectName("gridLayout")
self.horizontalLayout = QtWidgets.QHBoxLayout()
self.horizontalLayout.setObjectName("horizontalLayout")
self.coverView = QtWidgets.QGraphicsView(Dialog)
self.coverView.setMaximumSize(QtCore.QSize(165, 16777215))
self.coverView.setFrameShadow(QtWidgets.QFrame.Plain)
self.coverView.setObjectName("coverView")
self.horizontalLayout.addWidget(self.coverView)
self.verticalLayout = QtWidgets.QVBoxLayout()
self.verticalLayout.setObjectName("verticalLayout")
self.titleLine = QtWidgets.QLineEdit(Dialog)
self.titleLine.setObjectName("titleLine")
self.verticalLayout.addWidget(self.titleLine)
self.authorLine = QtWidgets.QLineEdit(Dialog)
self.authorLine.setObjectName("authorLine")
self.verticalLayout.addWidget(self.authorLine)
self.yearLine = QtWidgets.QLineEdit(Dialog)
self.yearLine.setObjectName("yearLine")
self.verticalLayout.addWidget(self.yearLine)
self.tagsLine = QtWidgets.QLineEdit(Dialog)
self.tagsLine.setMinimumSize(QtCore.QSize(0, 0))
self.tagsLine.setObjectName("tagsLine")
self.verticalLayout.addWidget(self.tagsLine)
self.horizontalLayout_2 = QtWidgets.QHBoxLayout()
self.horizontalLayout_2.setObjectName("horizontalLayout_2")
spacerItem = QtWidgets.QSpacerItem(40, 20, QtWidgets.QSizePolicy.Expanding, QtWidgets.QSizePolicy.Minimum)
self.horizontalLayout_2.addItem(spacerItem)
self.okButton = QtWidgets.QPushButton(Dialog)
self.okButton.setText("")
icon = QtGui.QIcon()
icon.addPixmap(QtGui.QPixmap(":/images/checkmark.svg"), QtGui.QIcon.Normal, QtGui.QIcon.Off)
self.okButton.setIcon(icon)
self.okButton.setIconSize(QtCore.QSize(24, 24))
self.okButton.setFlat(True)
self.okButton.setObjectName("okButton")
self.horizontalLayout_2.addWidget(self.okButton)
self.cancelButton = QtWidgets.QPushButton(Dialog)
self.cancelButton.setText("")
icon1 = QtGui.QIcon()
icon1.addPixmap(QtGui.QPixmap(":/images/error.svg"), QtGui.QIcon.Normal, QtGui.QIcon.Off)
self.cancelButton.setIcon(icon1)
self.cancelButton.setIconSize(QtCore.QSize(24, 24))
self.cancelButton.setFlat(True)
self.cancelButton.setObjectName("cancelButton")
self.horizontalLayout_2.addWidget(self.cancelButton)
spacerItem1 = QtWidgets.QSpacerItem(40, 20, QtWidgets.QSizePolicy.Expanding, QtWidgets.QSizePolicy.Minimum)
self.horizontalLayout_2.addItem(spacerItem1)
self.dialogBackground = QtWidgets.QPushButton(Dialog)
self.dialogBackground.setText("")
icon2 = QtGui.QIcon()
icon2.addPixmap(QtGui.QPixmap(":/images/color.svg"), QtGui.QIcon.Normal, QtGui.QIcon.Off)
self.dialogBackground.setIcon(icon2)
self.dialogBackground.setIconSize(QtCore.QSize(27, 27))
self.dialogBackground.setFlat(True)
self.dialogBackground.setObjectName("dialogBackground")
self.horizontalLayout_2.addWidget(self.dialogBackground)
self.verticalLayout.addLayout(self.horizontalLayout_2)
self.horizontalLayout.addLayout(self.verticalLayout)
self.gridLayout.addLayout(self.horizontalLayout, 0, 0, 1, 1)
self.retranslateUi(Dialog)
QtCore.QMetaObject.connectSlotsByName(Dialog)
def retranslateUi(self, Dialog):
_translate = QtCore.QCoreApplication.translate
Dialog.setWindowTitle(_translate("Dialog", "Edit metadata"))
self.coverView.setToolTip(_translate("Dialog", "Cover (click to change)"))
self.titleLine.setToolTip(_translate("Dialog", "Title"))
self.titleLine.setPlaceholderText(_translate("Dialog", "Title"))
self.authorLine.setToolTip(_translate("Dialog", "Author"))
self.authorLine.setPlaceholderText(_translate("Dialog", "Author"))
self.yearLine.setToolTip(_translate("Dialog", "Year"))
self.yearLine.setPlaceholderText(_translate("Dialog", "Year"))
self.tagsLine.setToolTip(_translate("Dialog", "Tags (comma separated)"))
self.tagsLine.setPlaceholderText(_translate("Dialog", "Tags"))
self.okButton.setToolTip(_translate("Dialog", "OK"))
self.cancelButton.setToolTip(_translate("Dialog", "Cancel"))

View File

@@ -0,0 +1,120 @@
# Modified from: http://drumcoder.co.uk/blog/2010/nov/16/python-code-generate-svg-pie-chart/
import os
import math
from PyQt5 import QtGui
def generate_pie(progress_percent, temp_dir=None):
progress_percent = int(progress_percent)
lSlices = (progress_percent, 100 - progress_percent) # percentages to show in pie
lOffsetX = 150
lOffsetY = 150
lRadius = 100
def endpoint(pAngleInRadians, pRadius, pCentreOffsetX, pCentreOffsetY):
"""
Calculate position of point on circle given an angle, a radius,
and the location of the center of the circle
Zero line points west.
"""
lCosAngle = math.cos(pAngleInRadians)
lSinAngle = math.sin(pAngleInRadians)
lStartLineDestinationX = pCentreOffsetX - (lRadius * lCosAngle)
lStartLineDestinationY = pCentreOffsetY - (lRadius * lSinAngle)
return (lStartLineDestinationX, lStartLineDestinationY)
GRADIENTS = ('myRadialGradientGreen', 'myRadialGradientOrange',
'myRadialGradientGreen', 'myRadialGradientOrange')
DEGREES_IN_CIRCLE = 360.0
lSvgPath = ""
lCurrentAngle = 0
lTotalSlices = 0
lIndex = 0
lSvgPath = ""
for x in lSlices:
lTotalSlices += x
for lSlice in lSlices:
lLineOneX, lLineOneY = endpoint(lCurrentAngle, lRadius, lOffsetX, lOffsetY)
lLineOne = "M%d,%d L%d,%d" % (lOffsetX, lOffsetY, lLineOneX, lLineOneY)
lDegrees = (DEGREES_IN_CIRCLE / lTotalSlices) * lSlice
lRadians = math.radians(lDegrees)
lCurrentAngle += lRadians
lLineTwoX, lLineTwoY = endpoint(lCurrentAngle, lRadius, lOffsetX, lOffsetY)
lRoute = 0
if lDegrees > 180:
lRoute = 1
lArc = "A%d,%d 0 %d,1 %d %d" % (
lRadius, lRadius, lRoute, lLineTwoX, lLineTwoY)
lLineTwo = "L%d,%d" % (lOffsetX, lOffsetY)
lPath = "%s %s %s" % (lLineOne, lArc, lLineTwo)
lGradient = GRADIENTS[lIndex]
lSvgPath += "<path d='%s' style='stroke:#097b8c; fill:url(#%s);'/>" % (
lPath, lGradient)
lIndex += 1
lSvg = """
<svg xmlns="http://www.w3.org/2000/svg"
xmlns:xlink="http://www.w3.org/1999/xlink">
<defs>
<radialGradient id="myRadialGradientGreen" r="65%%" cx="0" cy="0" spreadMethod="pad">
<stop offset="0%%" stop-color="#11e0ff" stop-opacity="1"/>
<stop offset="100%%" stop-color="#11e0ff" stop-opacity="1" />
</radialGradient>
</defs>
<defs>
<radialGradient id="myRadialGradientOrange" r="65%%" cx="0" cy="0" spreadMethod="pad">
<stop offset="0%%" stop-color="#097b8c" stop-opacity="1"/>
<stop offset="100%%" stop-color="#097b8c" stop-opacity="1" />
</radialGradient>
</defs>
%s
<!-- <circle cx="%d" cy="%d" r="100" style="stroke:#097b8c; fill:none;"/> -->
</svg>
""" % (lSvgPath, lOffsetX, lOffsetY)
if temp_dir:
svg_path = os.path.join(temp_dir, 'lector_progress.svg')
lFile = open(svg_path, 'w')
lFile.write(lSvg)
lFile.close()
else:
return lSvg
def pixmapper(current_chapter, total_chapters, temp_dir, size):
# A current_chapter of -1 implies the files does not exist
# A chapter number == Total chapters implies the file is unread
return_pixmap = None
if current_chapter == -1:
return_pixmap = QtGui.QIcon(':/images/error.svg').pixmap(size)
return return_pixmap
if current_chapter == total_chapters:
return_pixmap = QtGui.QIcon(':/images/checkmark.svg').pixmap(size)
else:
# TODO
# See if saving the svg to disk can be avoided
# Shift to lines to track progress
# Maybe make the alignment a little more uniform across emblems
progress_percent = int(current_chapter * 100 / total_chapters)
generate_pie(progress_percent, temp_dir)
svg_path = os.path.join(temp_dir, 'lector_progress.svg')
return_pixmap = QtGui.QIcon(svg_path).pixmap(size - 4) ## The -4 looks more proportional
return return_pixmap

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

View File

@@ -0,0 +1,7 @@
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" version="1">
<path fill="#4f4f4f" d="m3.8102 2.0006a1.7691 1.5694 0 0 0 -1.8102 1.5688v16.863a1.7691 1.5694 0 0 0 2.6536 1.358l8.2314-4.215 8.231-4.215a1.7691 1.5694 0 0 0 0 -2.7182l-8.231-4.2161-8.2314-4.2156a1.7691 1.5694 0 0 0 -0.8434 -0.2097z"/>
<path opacity=".2" d="m5 5.5v14l6.832-3.679 6.168-3.321-6.168-3.3213-6.832-3.6787z"/>
<path fill="#00e382" d="m5 5v14l6.832-3.679 6.168-3.321-6.168-3.3213-6.832-3.6787z"/>
<path fill="#fff" opacity=".1" d="m3.8105 2.0002a1.7691 1.5694 0 0 0 -1.8105 1.5693v0.5a1.7691 1.5694 0 0 1 1.8105 -1.5693 1.7691 1.5694 0 0 1 0.8428 0.2099l8.2314 4.2158 8.2305 4.2158a1.7691 1.5694 0 0 1 0.86133 1.1074 1.7691 1.5694 0 0 0 -0.86133 -1.6074l-8.23-4.2161-8.2317-4.2158a1.7691 1.5694 0 0 0 -0.8428 -0.2099z"/>
<path opacity=".2" d="m21.977 12.249a1.7691 1.5694 0 0 1 -0.86133 1.1104l-8.2305 4.2158-8.2314 4.2148a1.7691 1.5694 0 0 1 -2.654 -1.358v0.5a1.7691 1.5694 0 0 0 2.6533 1.3584l8.2314-4.2148 8.2305-4.2158a1.7691 1.5694 0 0 0 0.86133 -1.6104z"/>
</svg>

After

Width:  |  Height:  |  Size: 1.0 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 663 B

View File

@@ -0,0 +1,6 @@
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" version="1.1" viewBox="0 0 16 16">
<g transform="translate(0 -1036.4)">
<circle fill="#4caf50" cx="8" cy="1044.4" r="7"/>
<path fill="#fff" d="m11.535 1040.8-4.2422 4.2422-2.8281-2.8281-1.4141 1.4141 2.8281 2.8281 1.4141 1.4141 5.6563-5.6563-1.4141-1.4141z"/>
</g>
</svg>

After

Width:  |  Height:  |  Size: 341 B

View File

@@ -0,0 +1,426 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<svg
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:cc="http://creativecommons.org/ns#"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:svg="http://www.w3.org/2000/svg"
xmlns="http://www.w3.org/2000/svg"
xmlns:xlink="http://www.w3.org/1999/xlink"
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
width="22"
height="22"
version="1.1"
viewBox="0 0 22 22"
id="svg90"
sodipodi:docname="color-management.svg"
inkscape:version="0.92.2 5c3e80d, 2017-08-06">
<metadata
id="metadata94">
<rdf:RDF>
<cc:Work
rdf:about="">
<dc:format>image/svg+xml</dc:format>
<dc:type
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
</cc:Work>
</rdf:RDF>
</metadata>
<sodipodi:namedview
pagecolor="#ffffff"
bordercolor="#666666"
borderopacity="1"
objecttolerance="10"
gridtolerance="10"
guidetolerance="10"
inkscape:pageopacity="0"
inkscape:pageshadow="2"
inkscape:window-width="1225"
inkscape:window-height="767"
id="namedview92"
showgrid="false"
inkscape:zoom="10.727273"
inkscape:cx="0.27966102"
inkscape:cy="11"
inkscape:window-x="578"
inkscape:window-y="148"
inkscape:window-maximized="0"
inkscape:current-layer="svg90" />
<defs
id="defs46">
<linearGradient
id="linearGradient4202"
x1="1"
x2="8"
y1="8"
y2="1"
gradientUnits="userSpaceOnUse">
<stop
offset="0"
stop-color="#e81877"
id="stop2" />
<stop
offset=".313"
stop-color="#dd1d8c"
id="stop4" />
<stop
offset=".563"
stop-color="#6d57b1"
id="stop6" />
<stop
offset=".75"
stop-color="#2a78c1"
id="stop8" />
<stop
offset="1"
stop-color="#018dcb"
id="stop10" />
</linearGradient>
<linearGradient
id="linearGradient4204"
x1="1"
x2="8"
y1="8"
y2="15"
gradientUnits="userSpaceOnUse">
<stop
offset="0"
stop-color="#e51561"
id="stop13" />
<stop
offset=".313"
stop-color="#e4156c"
id="stop15" />
<stop
offset=".563"
stop-color="#e71e2c"
id="stop17" />
<stop
offset=".75"
stop-color="#e8301e"
id="stop19" />
<stop
offset="1"
stop-color="#e6320e"
id="stop21" />
</linearGradient>
<linearGradient
id="linearGradient4206"
x1="15"
x2="8"
y1="8"
y2="1"
gradientUnits="userSpaceOnUse">
<stop
offset="0"
stop-color="#bec900"
id="stop24" />
<stop
offset=".313"
stop-color="#9ec80a"
id="stop26" />
<stop
offset=".563"
stop-color="#71b93d"
id="stop28" />
<stop
offset=".75"
stop-color="#35a48f"
id="stop30" />
<stop
offset="1"
stop-color="#018fca"
id="stop32" />
</linearGradient>
<linearGradient
id="linearGradient4208"
x1="15"
x2="8"
y1="8"
y2="15"
gradientUnits="userSpaceOnUse">
<stop
offset="0"
stop-color="#c1cc00"
id="stop35" />
<stop
offset=".313"
stop-color="#dfcd00"
id="stop37" />
<stop
offset=".563"
stop-color="#f0cc00"
id="stop39" />
<stop
offset=".75"
stop-color="#fd8c08"
id="stop41" />
<stop
offset="1"
stop-color="#f25c13"
id="stop43" />
</linearGradient>
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient4206"
id="linearGradient4590"
gradientUnits="userSpaceOnUse"
x1="15"
y1="8"
x2="8"
y2="1"
gradientTransform="matrix(0.80906149,0,0,0.80906149,1.5275081,1.5275081)" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient4202"
id="linearGradient4592"
gradientUnits="userSpaceOnUse"
x1="1"
y1="8"
x2="8"
y2="1"
gradientTransform="matrix(0.80906149,0,0,0.80906149,1.5275081,1.5275081)" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient4208"
id="linearGradient4594"
gradientUnits="userSpaceOnUse"
x1="15"
y1="8"
x2="8"
y2="15"
gradientTransform="matrix(0.80906149,0,0,0.80906149,1.5275081,1.5275081)" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient4204"
id="linearGradient4596"
gradientUnits="userSpaceOnUse"
x1="1"
y1="8"
x2="8"
y2="15"
gradientTransform="matrix(0.80906149,0,0,0.80906149,1.5275081,1.5275081)" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient4206"
id="linearGradient4598"
gradientUnits="userSpaceOnUse"
x1="15"
y1="8"
x2="8"
y2="1"
gradientTransform="matrix(0.80906149,0,0,0.80906149,1.5275081,1.5275081)" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient4202"
id="linearGradient4600"
gradientUnits="userSpaceOnUse"
x1="1"
y1="8"
x2="8"
y2="1"
gradientTransform="matrix(0.80906149,0,0,0.80906149,1.5275081,1.5275081)" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient4208"
id="linearGradient4602"
gradientUnits="userSpaceOnUse"
x1="15"
y1="8"
x2="8"
y2="15"
gradientTransform="matrix(0.80906149,0,0,0.80906149,1.5275081,1.5275081)" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient4204"
id="linearGradient4604"
gradientUnits="userSpaceOnUse"
x1="1"
y1="8"
x2="8"
y2="15"
gradientTransform="matrix(0.80906149,0,0,0.80906149,1.5275081,1.5275081)" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient4206"
id="linearGradient4606"
gradientUnits="userSpaceOnUse"
x1="15"
y1="8"
x2="8"
y2="1"
gradientTransform="matrix(0.80906149,0,0,0.80906149,1.5275081,1.5275081)" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient4202"
id="linearGradient4608"
gradientUnits="userSpaceOnUse"
x1="1"
y1="8"
x2="8"
y2="1"
gradientTransform="matrix(0.80906149,0,0,0.80906149,1.5275081,1.5275081)" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient4208"
id="linearGradient4610"
gradientUnits="userSpaceOnUse"
x1="15"
y1="8"
x2="8"
y2="15"
gradientTransform="matrix(0.80906149,0,0,0.80906149,1.5275081,1.5275081)" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient4204"
id="linearGradient4612"
gradientUnits="userSpaceOnUse"
x1="1"
y1="8"
x2="8"
y2="15"
gradientTransform="matrix(0.80906149,0,0,0.80906149,1.5275081,1.5275081)" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient4206"
id="linearGradient4614"
gradientUnits="userSpaceOnUse"
x1="15"
y1="8"
x2="8"
y2="1"
gradientTransform="matrix(0.80906149,0,0,0.80906149,1.5275081,1.5275081)" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient4202"
id="linearGradient4616"
gradientUnits="userSpaceOnUse"
x1="1"
y1="8"
x2="8"
y2="1"
gradientTransform="matrix(0.80906149,0,0,0.80906149,1.5275081,1.5275081)" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient4208"
id="linearGradient4618"
gradientUnits="userSpaceOnUse"
x1="15"
y1="8"
x2="8"
y2="15"
gradientTransform="matrix(0.80906149,0,0,0.80906149,1.5275081,1.5275081)" />
<linearGradient
inkscape:collect="always"
xlink:href="#linearGradient4204"
id="linearGradient4620"
gradientUnits="userSpaceOnUse"
x1="1"
y1="8"
x2="8"
y2="15"
gradientTransform="matrix(0.80906149,0,0,0.80906149,1.5275081,1.5275081)" />
</defs>
<g
transform="matrix(1.236,0,0,1.236,1.112,1.112)"
id="g88">
<g
transform="rotate(15,8,8)"
id="g56">
<path
style="fill:url(#linearGradient4590)"
d="m 8,1 v 7 h 7 A 7,7 0 0 0 8,1 Z"
id="path48"
inkscape:connector-curvature="0" />
<path
style="fill:url(#linearGradient4592)"
d="M 8,1 A 7,7 0 0 0 1,8 h 7 z"
id="path50"
inkscape:connector-curvature="0" />
<path
style="fill:url(#linearGradient4594)"
d="m 8,8 v 7 a 7,7 0 0 0 7,-7 z"
id="path52"
inkscape:connector-curvature="0" />
<path
style="fill:url(#linearGradient4596)"
d="m 1,8 a 7,7 0 0 0 7,7 V 8 Z"
id="path54"
inkscape:connector-curvature="0" />
</g>
<g
style="opacity:0.7"
id="g66">
<path
style="fill:url(#linearGradient4598)"
d="m 8,1 v 7 h 7 A 7,7 0 0 0 8,1 Z"
id="path58"
inkscape:connector-curvature="0" />
<path
style="fill:url(#linearGradient4600)"
d="M 8,1 A 7,7 0 0 0 1,8 h 7 z"
id="path60"
inkscape:connector-curvature="0" />
<path
style="fill:url(#linearGradient4602)"
d="m 8,8 v 7 a 7,7 0 0 0 7,-7 z"
id="path62"
inkscape:connector-curvature="0" />
<path
style="fill:url(#linearGradient4604)"
d="m 1,8 a 7,7 0 0 0 7,7 V 8 Z"
id="path64"
inkscape:connector-curvature="0" />
</g>
<g
style="opacity:0.35"
transform="rotate(45,8,8)"
id="g76">
<path
style="fill:url(#linearGradient4606)"
d="m 8,1 v 7 h 7 A 7,7 0 0 0 8,1 Z"
id="path68"
inkscape:connector-curvature="0" />
<path
style="fill:url(#linearGradient4608)"
d="M 8,1 A 7,7 0 0 0 1,8 h 7 z"
id="path70"
inkscape:connector-curvature="0" />
<path
style="fill:url(#linearGradient4610)"
d="m 8,8 v 7 a 7,7 0 0 0 7,-7 z"
id="path72"
inkscape:connector-curvature="0" />
<path
style="fill:url(#linearGradient4612)"
d="m 1,8 a 7,7 0 0 0 7,7 V 8 Z"
id="path74"
inkscape:connector-curvature="0" />
</g>
<g
style="opacity:0.3"
transform="rotate(60,8,8)"
id="g86">
<path
style="fill:url(#linearGradient4614)"
d="m 8,1 v 7 h 7 A 7,7 0 0 0 8,1 Z"
id="path78"
inkscape:connector-curvature="0" />
<path
style="fill:url(#linearGradient4616)"
d="M 8,1 A 7,7 0 0 0 1,8 h 7 z"
id="path80"
inkscape:connector-curvature="0" />
<path
style="fill:url(#linearGradient4618)"
d="m 8,8 v 7 a 7,7 0 0 0 7,-7 z"
id="path82"
inkscape:connector-curvature="0" />
<path
style="fill:url(#linearGradient4620)"
d="m 1,8 a 7,7 0 0 0 7,7 V 8 Z"
id="path84"
inkscape:connector-curvature="0" />
</g>
</g>
</svg>

After

Width:  |  Height:  |  Size: 12 KiB

View File

@@ -0,0 +1,133 @@
<?xml version="1.0" encoding="UTF-8"?>
<ui version="4.0">
<class>Dialog</class>
<widget class="QDialog" name="Dialog">
<property name="geometry">
<rect>
<x>0</x>
<y>0</y>
<width>729</width>
<height>318</height>
</rect>
</property>
<property name="windowTitle">
<string>Dialog</string>
</property>
<layout class="QGridLayout" name="gridLayout">
<item row="0" column="0">
<layout class="QVBoxLayout" name="verticalLayout">
<item>
<widget class="QTextBrowser" name="definitionView">
<property name="frameShape">
<enum>QFrame::NoFrame</enum>
</property>
<property name="frameShadow">
<enum>QFrame::Plain</enum>
</property>
</widget>
</item>
<item>
<layout class="QHBoxLayout" name="horizontalLayout">
<item>
<spacer name="horizontalSpacer">
<property name="orientation">
<enum>Qt::Horizontal</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>40</width>
<height>20</height>
</size>
</property>
</spacer>
</item>
<item>
<widget class="QPushButton" name="okButton">
<property name="toolTip">
<string>WERDS</string>
</property>
<property name="text">
<string/>
</property>
<property name="icon">
<iconset resource="resources.qrc">
<normaloff>:/images/checkmark.svg</normaloff>:/images/checkmark.svg</iconset>
</property>
<property name="iconSize">
<size>
<width>24</width>
<height>24</height>
</size>
</property>
<property name="flat">
<bool>true</bool>
</property>
</widget>
</item>
<item>
<widget class="QPushButton" name="pronounceButton">
<property name="toolTip">
<string>Play pronunciation of root word</string>
</property>
<property name="text">
<string/>
</property>
<property name="icon">
<iconset resource="resources.qrc">
<normaloff>:/images/QMPlay2.svg</normaloff>:/images/QMPlay2.svg</iconset>
</property>
<property name="iconSize">
<size>
<width>24</width>
<height>24</height>
</size>
</property>
<property name="flat">
<bool>true</bool>
</property>
</widget>
</item>
<item>
<spacer name="horizontalSpacer_2">
<property name="orientation">
<enum>Qt::Horizontal</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>40</width>
<height>20</height>
</size>
</property>
</spacer>
</item>
<item>
<widget class="QPushButton" name="dialogBackground">
<property name="text">
<string/>
</property>
<property name="icon">
<iconset resource="resources.qrc">
<normaloff>:/images/color.svg</normaloff>:/images/color.svg</iconset>
</property>
<property name="iconSize">
<size>
<width>27</width>
<height>27</height>
</size>
</property>
<property name="flat">
<bool>true</bool>
</property>
</widget>
</item>
</layout>
</item>
</layout>
</item>
</layout>
</widget>
<resources>
<include location="resources.qrc"/>
</resources>
<connections/>
</ui>

View File

@@ -0,0 +1,9 @@
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" version="1.1" viewBox="0 0 16 16">
<g transform="translate(0 -1036.4)">
<circle fill="#f44336" cx="8" cy="1044.4" r="7"/>
<g fill="#fff" transform="matrix(.70711 .70711 -.70711 .70711 740.82 300.23)">
<rect width="2" height="10" x="1043.4" y="-13" transform="rotate(90)"/>
<rect width="2" height="10" x="-9" y="-1049.4" transform="scale(-1)"/>
</g>
</g>
</svg>

After

Width:  |  Height:  |  Size: 438 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.3 KiB

View File

@@ -0,0 +1,185 @@
<?xml version="1.0" encoding="UTF-8"?>
<ui version="4.0">
<class>MainWindow</class>
<widget class="QMainWindow" name="MainWindow">
<property name="geometry">
<rect>
<x>0</x>
<y>0</y>
<width>1049</width>
<height>720</height>
</rect>
</property>
<property name="windowTitle">
<string>Lector</string>
</property>
<widget class="QWidget" name="centralwidget">
<layout class="QGridLayout" name="gridLayout">
<item row="0" column="0">
<layout class="QHBoxLayout" name="horizontalLayout">
<item>
<widget class="QTabWidget" name="tabWidget">
<property name="currentIndex">
<number>0</number>
</property>
<property name="tabsClosable">
<bool>true</bool>
</property>
<widget class="QWidget" name="tab">
<attribute name="title">
<string>Library</string>
</attribute>
<layout class="QGridLayout" name="gridLayout_2">
<item row="0" column="0">
<widget class="QStackedWidget" name="stackedWidget">
<property name="frameShape">
<enum>QFrame::NoFrame</enum>
</property>
<property name="lineWidth">
<number>0</number>
</property>
<widget class="QWidget" name="listPage">
<layout class="QGridLayout" name="gridLayout_4">
<property name="leftMargin">
<number>0</number>
</property>
<property name="topMargin">
<number>0</number>
</property>
<property name="rightMargin">
<number>0</number>
</property>
<property name="bottomMargin">
<number>0</number>
</property>
<property name="spacing">
<number>0</number>
</property>
<item row="0" column="0">
<widget class="QListView" name="listView">
<property name="frameShape">
<enum>QFrame::NoFrame</enum>
</property>
<property name="editTriggers">
<set>QAbstractItemView::NoEditTriggers</set>
</property>
<property name="showDropIndicator" stdset="0">
<bool>false</bool>
</property>
<property name="selectionMode">
<enum>QAbstractItemView::ExtendedSelection</enum>
</property>
<property name="movement">
<enum>QListView::Static</enum>
</property>
<property name="isWrapping" stdset="0">
<bool>true</bool>
</property>
<property name="resizeMode">
<enum>QListView::Fixed</enum>
</property>
<property name="layoutMode">
<enum>QListView::SinglePass</enum>
</property>
<property name="spacing">
<number>0</number>
</property>
<property name="viewMode">
<enum>QListView::IconMode</enum>
</property>
<property name="uniformItemSizes">
<bool>true</bool>
</property>
<property name="wordWrap">
<bool>true</bool>
</property>
</widget>
</item>
</layout>
</widget>
<widget class="QWidget" name="tablePage">
<layout class="QGridLayout" name="gridLayout_3">
<property name="leftMargin">
<number>0</number>
</property>
<property name="topMargin">
<number>0</number>
</property>
<property name="rightMargin">
<number>0</number>
</property>
<property name="bottomMargin">
<number>0</number>
</property>
<property name="spacing">
<number>0</number>
</property>
<item row="0" column="0">
<widget class="QTableView" name="tableView">
<property name="frameShape">
<enum>QFrame::Box</enum>
</property>
<property name="frameShadow">
<enum>QFrame::Plain</enum>
</property>
<property name="sizeAdjustPolicy">
<enum>QAbstractScrollArea::AdjustToContentsOnFirstShow</enum>
</property>
<property name="editTriggers">
<set>QAbstractItemView::DoubleClicked|QAbstractItemView::EditKeyPressed|QAbstractItemView::SelectedClicked</set>
</property>
<property name="alternatingRowColors">
<bool>true</bool>
</property>
<property name="selectionBehavior">
<enum>QAbstractItemView::SelectRows</enum>
</property>
<property name="gridStyle">
<enum>Qt::NoPen</enum>
</property>
<property name="sortingEnabled">
<bool>true</bool>
</property>
<property name="wordWrap">
<bool>false</bool>
</property>
<attribute name="horizontalHeaderVisible">
<bool>false</bool>
</attribute>
<attribute name="verticalHeaderVisible">
<bool>false</bool>
</attribute>
</widget>
</item>
</layout>
</widget>
</widget>
</item>
</layout>
</widget>
</widget>
</item>
</layout>
</item>
</layout>
</widget>
<widget class="QStatusBar" name="statusBar">
<property name="sizePolicy">
<sizepolicy hsizetype="Expanding" vsizetype="Preferred">
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<property name="minimumSize">
<size>
<width>0</width>
<height>0</height>
</size>
</property>
</widget>
</widget>
<resources>
<include location="resources.qrc"/>
</resources>
<connections/>
</ui>

View File

@@ -0,0 +1,204 @@
<?xml version="1.0" encoding="UTF-8"?>
<ui version="4.0">
<class>Dialog</class>
<widget class="QDialog" name="Dialog">
<property name="geometry">
<rect>
<x>0</x>
<y>0</y>
<width>728</width>
<height>234</height>
</rect>
</property>
<property name="sizePolicy">
<sizepolicy hsizetype="Expanding" vsizetype="Expanding">
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<property name="maximumSize">
<size>
<width>16777215</width>
<height>16777215</height>
</size>
</property>
<property name="windowTitle">
<string>Edit metadata</string>
</property>
<property name="modal">
<bool>true</bool>
</property>
<layout class="QGridLayout" name="gridLayout">
<item row="0" column="0">
<layout class="QHBoxLayout" name="horizontalLayout">
<item>
<widget class="QGraphicsView" name="coverView">
<property name="maximumSize">
<size>
<width>165</width>
<height>16777215</height>
</size>
</property>
<property name="toolTip">
<string>Cover (click to change)</string>
</property>
<property name="frameShadow">
<enum>QFrame::Plain</enum>
</property>
</widget>
</item>
<item>
<layout class="QVBoxLayout" name="verticalLayout">
<item>
<widget class="QLineEdit" name="titleLine">
<property name="toolTip">
<string>Title</string>
</property>
<property name="placeholderText">
<string>Title</string>
</property>
</widget>
</item>
<item>
<widget class="QLineEdit" name="authorLine">
<property name="toolTip">
<string>Author</string>
</property>
<property name="placeholderText">
<string>Author</string>
</property>
</widget>
</item>
<item>
<widget class="QLineEdit" name="yearLine">
<property name="toolTip">
<string>Year</string>
</property>
<property name="placeholderText">
<string>Year</string>
</property>
</widget>
</item>
<item>
<widget class="QLineEdit" name="tagsLine">
<property name="minimumSize">
<size>
<width>0</width>
<height>0</height>
</size>
</property>
<property name="toolTip">
<string>Tags (comma separated)</string>
</property>
<property name="placeholderText">
<string>Tags</string>
</property>
</widget>
</item>
<item>
<layout class="QHBoxLayout" name="horizontalLayout_2">
<item>
<spacer name="horizontalSpacer">
<property name="orientation">
<enum>Qt::Horizontal</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>40</width>
<height>20</height>
</size>
</property>
</spacer>
</item>
<item>
<widget class="QPushButton" name="okButton">
<property name="toolTip">
<string>OK</string>
</property>
<property name="text">
<string/>
</property>
<property name="icon">
<iconset resource="resources.qrc">
<normaloff>:/images/checkmark.svg</normaloff>:/images/checkmark.svg</iconset>
</property>
<property name="iconSize">
<size>
<width>24</width>
<height>24</height>
</size>
</property>
<property name="flat">
<bool>true</bool>
</property>
</widget>
</item>
<item>
<widget class="QPushButton" name="cancelButton">
<property name="toolTip">
<string>Cancel</string>
</property>
<property name="text">
<string/>
</property>
<property name="icon">
<iconset resource="resources.qrc">
<normaloff>:/images/error.svg</normaloff>:/images/error.svg</iconset>
</property>
<property name="iconSize">
<size>
<width>24</width>
<height>24</height>
</size>
</property>
<property name="flat">
<bool>true</bool>
</property>
</widget>
</item>
<item>
<spacer name="horizontalSpacer_2">
<property name="orientation">
<enum>Qt::Horizontal</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>40</width>
<height>20</height>
</size>
</property>
</spacer>
</item>
<item>
<widget class="QPushButton" name="dialogBackground">
<property name="text">
<string/>
</property>
<property name="icon">
<iconset resource="resources.qrc">
<normaloff>:/images/color.svg</normaloff>:/images/color.svg</iconset>
</property>
<property name="iconSize">
<size>
<width>27</width>
<height>27</height>
</size>
</property>
<property name="flat">
<bool>true</bool>
</property>
</widget>
</item>
</layout>
</item>
</layout>
</item>
</layout>
</item>
</layout>
</widget>
<resources>
<include location="resources.qrc"/>
</resources>
<connections/>
</ui>

View File

@@ -0,0 +1,11 @@
<RCC>
<qresource prefix="images">
<file>QMPlay2.svg</file>
<file>color.svg</file>
<file>blank.png</file>
<file>gray-shadow.png</file>
<file>NotFound.png</file>
<file>checkmark.svg</file>
<file>error.svg</file>
</qresource>
</RCC>

View File

@@ -0,0 +1,157 @@
<?xml version="1.0" encoding="UTF-8"?>
<ui version="4.0">
<class>Dialog</class>
<widget class="QDialog" name="Dialog">
<property name="geometry">
<rect>
<x>0</x>
<y>0</y>
<width>1009</width>
<height>658</height>
</rect>
</property>
<property name="windowTitle">
<string>Settings</string>
</property>
<layout class="QGridLayout" name="gridLayout_3">
<item row="0" column="0">
<layout class="QVBoxLayout" name="verticalLayout_2">
<item>
<widget class="QGroupBox" name="groupBox_2">
<property name="title">
<string>Library</string>
</property>
<layout class="QGridLayout" name="gridLayout_2">
<item row="0" column="0">
<widget class="QTreeView" name="treeView"/>
</item>
<item row="1" column="0">
<widget class="QTextBrowser" name="aboutBox">
<property name="openExternalLinks">
<bool>true</bool>
</property>
<property name="openLinks">
<bool>false</bool>
</property>
</widget>
</item>
</layout>
</widget>
</item>
<item>
<widget class="QGroupBox" name="groupBox">
<property name="title">
<string>Switches</string>
</property>
<layout class="QGridLayout" name="gridLayout">
<item row="0" column="0">
<layout class="QVBoxLayout" name="verticalLayout">
<item>
<layout class="QHBoxLayout" name="horizontalLayout_4">
<item>
<widget class="QCheckBox" name="refreshLibrary">
<property name="text">
<string>Startup: Refresh library</string>
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="fileRemember">
<property name="text">
<string>Remember open files</string>
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="autoTags">
<property name="text">
<string>Generate tags from files</string>
</property>
</widget>
</item>
</layout>
</item>
</layout>
</item>
<item row="1" column="0">
<layout class="QHBoxLayout" name="horizontalLayout">
<item>
<widget class="QCheckBox" name="coverShadows">
<property name="text">
<string>Cover shadows</string>
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="performCulling">
<property name="toolTip">
<string>Enabling reduces startup time and memory usage</string>
</property>
<property name="text">
<string>Load covers only when needed</string>
</property>
</widget>
</item>
<item>
<layout class="QHBoxLayout" name="horizontalLayout_3">
<item>
<widget class="QLabel" name="languageLabel">
<property name="sizePolicy">
<sizepolicy hsizetype="Maximum" vsizetype="Preferred">
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<property name="text">
<string>Dictionary:</string>
</property>
<property name="alignment">
<set>Qt::AlignLeading|Qt::AlignLeft|Qt::AlignVCenter</set>
</property>
</widget>
</item>
<item>
<widget class="QComboBox" name="languageBox"/>
</item>
</layout>
</item>
</layout>
</item>
</layout>
</widget>
</item>
</layout>
</item>
<item row="1" column="0">
<layout class="QHBoxLayout" name="horizontalLayout_2">
<item>
<widget class="QPushButton" name="okButton">
<property name="text">
<string>Scan Library</string>
</property>
</widget>
</item>
<item>
<widget class="QPushButton" name="cancelButton">
<property name="text">
<string>Close</string>
</property>
</widget>
</item>
<item>
<widget class="QPushButton" name="aboutButton">
<property name="text">
<string>About</string>
</property>
<property name="checkable">
<bool>true</bool>
</property>
</widget>
</item>
</layout>
</item>
</layout>
</widget>
<resources/>
<connections/>
</ui>

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,109 @@
# -*- coding: utf-8 -*-
# Form implementation generated from reading ui file 'raw/settings.ui'
#
# Created by: PyQt5 UI code generator 5.10.1
#
# WARNING! All changes made in this file will be lost!
from PyQt5 import QtCore, QtGui, QtWidgets
class Ui_Dialog(object):
def setupUi(self, Dialog):
Dialog.setObjectName("Dialog")
Dialog.resize(1009, 658)
self.gridLayout_3 = QtWidgets.QGridLayout(Dialog)
self.gridLayout_3.setObjectName("gridLayout_3")
self.verticalLayout_2 = QtWidgets.QVBoxLayout()
self.verticalLayout_2.setObjectName("verticalLayout_2")
self.groupBox_2 = QtWidgets.QGroupBox(Dialog)
self.groupBox_2.setObjectName("groupBox_2")
self.gridLayout_2 = QtWidgets.QGridLayout(self.groupBox_2)
self.gridLayout_2.setObjectName("gridLayout_2")
self.treeView = QtWidgets.QTreeView(self.groupBox_2)
self.treeView.setObjectName("treeView")
self.gridLayout_2.addWidget(self.treeView, 0, 0, 1, 1)
self.aboutBox = QtWidgets.QTextBrowser(self.groupBox_2)
self.aboutBox.setOpenExternalLinks(True)
self.aboutBox.setOpenLinks(False)
self.aboutBox.setObjectName("aboutBox")
self.gridLayout_2.addWidget(self.aboutBox, 1, 0, 1, 1)
self.verticalLayout_2.addWidget(self.groupBox_2)
self.groupBox = QtWidgets.QGroupBox(Dialog)
self.groupBox.setObjectName("groupBox")
self.gridLayout = QtWidgets.QGridLayout(self.groupBox)
self.gridLayout.setObjectName("gridLayout")
self.verticalLayout = QtWidgets.QVBoxLayout()
self.verticalLayout.setObjectName("verticalLayout")
self.horizontalLayout_4 = QtWidgets.QHBoxLayout()
self.horizontalLayout_4.setObjectName("horizontalLayout_4")
self.refreshLibrary = QtWidgets.QCheckBox(self.groupBox)
self.refreshLibrary.setObjectName("refreshLibrary")
self.horizontalLayout_4.addWidget(self.refreshLibrary)
self.fileRemember = QtWidgets.QCheckBox(self.groupBox)
self.fileRemember.setObjectName("fileRemember")
self.horizontalLayout_4.addWidget(self.fileRemember)
self.autoTags = QtWidgets.QCheckBox(self.groupBox)
self.autoTags.setObjectName("autoTags")
self.horizontalLayout_4.addWidget(self.autoTags)
self.verticalLayout.addLayout(self.horizontalLayout_4)
self.gridLayout.addLayout(self.verticalLayout, 0, 0, 1, 1)
self.horizontalLayout = QtWidgets.QHBoxLayout()
self.horizontalLayout.setObjectName("horizontalLayout")
self.coverShadows = QtWidgets.QCheckBox(self.groupBox)
self.coverShadows.setObjectName("coverShadows")
self.horizontalLayout.addWidget(self.coverShadows)
self.performCulling = QtWidgets.QCheckBox(self.groupBox)
self.performCulling.setObjectName("performCulling")
self.horizontalLayout.addWidget(self.performCulling)
self.horizontalLayout_3 = QtWidgets.QHBoxLayout()
self.horizontalLayout_3.setObjectName("horizontalLayout_3")
self.languageLabel = QtWidgets.QLabel(self.groupBox)
sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Maximum, QtWidgets.QSizePolicy.Preferred)
sizePolicy.setHorizontalStretch(0)
sizePolicy.setVerticalStretch(0)
sizePolicy.setHeightForWidth(self.languageLabel.sizePolicy().hasHeightForWidth())
self.languageLabel.setSizePolicy(sizePolicy)
self.languageLabel.setAlignment(QtCore.Qt.AlignLeading|QtCore.Qt.AlignLeft|QtCore.Qt.AlignVCenter)
self.languageLabel.setObjectName("languageLabel")
self.horizontalLayout_3.addWidget(self.languageLabel)
self.languageBox = QtWidgets.QComboBox(self.groupBox)
self.languageBox.setObjectName("languageBox")
self.horizontalLayout_3.addWidget(self.languageBox)
self.horizontalLayout.addLayout(self.horizontalLayout_3)
self.gridLayout.addLayout(self.horizontalLayout, 1, 0, 1, 1)
self.verticalLayout_2.addWidget(self.groupBox)
self.gridLayout_3.addLayout(self.verticalLayout_2, 0, 0, 1, 1)
self.horizontalLayout_2 = QtWidgets.QHBoxLayout()
self.horizontalLayout_2.setObjectName("horizontalLayout_2")
self.okButton = QtWidgets.QPushButton(Dialog)
self.okButton.setObjectName("okButton")
self.horizontalLayout_2.addWidget(self.okButton)
self.cancelButton = QtWidgets.QPushButton(Dialog)
self.cancelButton.setObjectName("cancelButton")
self.horizontalLayout_2.addWidget(self.cancelButton)
self.aboutButton = QtWidgets.QPushButton(Dialog)
self.aboutButton.setCheckable(True)
self.aboutButton.setObjectName("aboutButton")
self.horizontalLayout_2.addWidget(self.aboutButton)
self.gridLayout_3.addLayout(self.horizontalLayout_2, 1, 0, 1, 1)
self.retranslateUi(Dialog)
QtCore.QMetaObject.connectSlotsByName(Dialog)
def retranslateUi(self, Dialog):
_translate = QtCore.QCoreApplication.translate
Dialog.setWindowTitle(_translate("Dialog", "Settings"))
self.groupBox_2.setTitle(_translate("Dialog", "Library"))
self.groupBox.setTitle(_translate("Dialog", "Switches"))
self.refreshLibrary.setText(_translate("Dialog", "Startup: Refresh library"))
self.fileRemember.setText(_translate("Dialog", "Remember open files"))
self.autoTags.setText(_translate("Dialog", "Generate tags from files"))
self.coverShadows.setText(_translate("Dialog", "Cover shadows"))
self.performCulling.setToolTip(_translate("Dialog", "Enabling reduces startup time and memory usage"))
self.performCulling.setText(_translate("Dialog", "Load covers only when needed"))
self.languageLabel.setText(_translate("Dialog", "Dictionary:"))
self.okButton.setText(_translate("Dialog", "Scan Library"))
self.cancelButton.setText(_translate("Dialog", "Close"))
self.aboutButton.setText(_translate("Dialog", "About"))

172
lector/settings.py Normal file
View File

@@ -0,0 +1,172 @@
#!/usr/bin/env python3
# Keep in mind that all integer / boolean settings are returned as strings
import os
from ast import literal_eval
from PyQt5 import QtCore, QtGui
class Settings:
def __init__(self, parent):
self.parent = parent
self.settings = QtCore.QSettings('Lector', 'Lector')
default_profile1 = {
'font': 'Noto Sans Fallback',
'foreground': QtGui.QColor().fromRgb(0, 0, 0),
'background': QtGui.QColor().fromRgb(216, 216, 216),
'padding': 150,
'font_size': 30,
'line_spacing': 110,
'text_alignment': 'justify'}
default_profile2 = {
'font': 'Roboto',
'foreground': QtGui.QColor().fromRgb(194, 194, 194),
'background': QtGui.QColor().fromRgb(22, 22, 22),
'padding': 150,
'font_size': 30,
'line_spacing': 110,
'text_alignment': 'justify'}
default_profile3 = {
'font': 'Clear Sans',
'foreground': QtGui.QColor().fromRgb(101, 123, 131),
'background': QtGui.QColor().fromRgb(0, 43, 54),
'padding': 150,
'font_size': 30,
'line_spacing': 110,
'text_alignment': 'justify'}
self.default_profiles = [
default_profile1, default_profile2, default_profile3]
self.default_comic_profile = {
'padding': 100, # pixel padding on either size
'background': QtGui.QColor().fromRgb(0, 0, 0),
'zoom_mode': 'bestFit'}
def read_settings(self):
self.settings.beginGroup('mainWindow')
self.parent.resize(self.settings.value('windowSize', QtCore.QSize(1299, 748)))
self.parent.move(self.settings.value('windowPosition', QtCore.QPoint(0, 0)))
self.parent.settings['current_view'] = int(self.settings.value('currentView', 0))
self.parent.settings['main_window_headers'] = self.settings.value('tableHeaders', None)
self.parent.settings['listview_background'] = self.settings.value(
'listViewBackground', QtGui.QColor().fromRgb(76, 76, 76))
self.settings.endGroup()
self.settings.beginGroup('runtimeVariables')
self.parent.settings['last_open_path'] = self.settings.value(
'lastOpenPath', os.path.expanduser('~'))
self.parent.database_path = self.settings.value(
'databasePath',
QtCore.QStandardPaths.writableLocation(QtCore.QStandardPaths.AppDataLocation))
self.parent.display_profiles = self.settings.value(
'displayProfiles', self.default_profiles)
self.parent.current_profile_index = int(self.settings.value(
'currentProfileIndex', 0))
self.parent.comic_profile = self.settings.value(
'comicProfile', self.default_comic_profile)
self.settings.endGroup()
self.settings.beginGroup('lastOpen')
self.parent.settings['last_open_books'] = self.settings.value('lastOpenBooks', [])
self.parent.settings['last_open_tab'] = self.settings.value('lastOpenTab', 'library')
self.settings.endGroup()
self.settings.beginGroup('settingsWindow')
self.parent.settings['settings_dialog_size'] = self.settings.value(
'windowSize', QtCore.QSize(700, 500))
self.parent.settings['settings_dialog_position'] = self.settings.value(
'windowPosition', QtCore.QPoint(0, 0))
self.parent.settings['settings_dialog_headers'] = self.settings.value(
'tableHeaders', [200, 150])
self.settings.endGroup()
self.settings.beginGroup('settingsSwitches')
# The default is string true because literal eval will convert it anyway
self.parent.settings['cover_shadows'] = literal_eval(self.settings.value(
'coverShadows', 'True').capitalize())
self.parent.settings['auto_tags'] = literal_eval(self.settings.value(
'autoTags', 'True').capitalize())
self.parent.settings['scan_library'] = literal_eval(self.settings.value(
'scanLibraryAtStart', 'False').capitalize())
self.parent.settings['remember_files'] = literal_eval(self.settings.value(
'rememberFiles', 'True').capitalize())
self.parent.settings['perform_culling'] = literal_eval(self.settings.value(
'performCulling', 'True').capitalize())
self.parent.settings['dictionary_language'] = self.settings.value(
'dictionaryLanguage', 'en')
self.settings.endGroup()
self.settings.beginGroup('dialogSettings')
self.parent.settings['dialog_background'] = self.settings.value(
'dialogBackground', QtGui.QColor().fromRgb(0, 0, 0))
self.settings.endGroup()
def save_settings(self):
print('Saving settings...')
current_settings = self.parent.settings
self.settings.beginGroup('mainWindow')
self.settings.setValue('windowSize', self.parent.size())
self.settings.setValue('windowPosition', self.parent.pos())
self.settings.setValue('currentView', self.parent.stackedWidget.currentIndex())
self.settings.setValue(
'listViewBackground', self.parent.settings['listview_background'])
table_headers = []
for i in range(3):
table_headers.append(self.parent.tableView.horizontalHeader().sectionSize(i))
self.settings.setValue('tableHeaders', table_headers)
self.settings.endGroup()
self.settings.beginGroup('runtimeVariables')
self.settings.setValue('lastOpenPath', self.parent.settings['last_open_path'])
self.settings.setValue('databasePath', self.parent.database_path)
current_profile1 = self.parent.bookToolBar.profileBox.itemData(
0, QtCore.Qt.UserRole)
current_profile2 = self.parent.bookToolBar.profileBox.itemData(
1, QtCore.Qt.UserRole)
current_profile3 = self.parent.bookToolBar.profileBox.itemData(
2, QtCore.Qt.UserRole)
current_profile_index = self.parent.bookToolBar.profileBox.currentIndex()
self.settings.setValue('displayProfiles', [
current_profile1,
current_profile2,
current_profile3])
self.settings.setValue('currentProfileIndex', current_profile_index)
self.settings.setValue('comicProfile', self.parent.comic_profile)
self.settings.endGroup()
current_tab_index = self.parent.tabWidget.currentIndex()
if current_tab_index == 0:
last_open_tab = 'library'
else:
last_open_tab = self.parent.tabWidget.widget(current_tab_index).metadata['path']
self.settings.beginGroup('lastOpen')
self.settings.setValue('lastOpenBooks', current_settings['last_open_books'])
self.settings.setValue('lastOpenTab', last_open_tab)
self.settings.endGroup()
self.settings.beginGroup('settingsWindow')
self.settings.setValue('windowSize', current_settings['settings_dialog_size'])
self.settings.setValue('windowPosition', current_settings['settings_dialog_position'])
self.settings.setValue('tableHeaders', current_settings['settings_dialog_headers'])
self.settings.endGroup()
self.settings.beginGroup('settingsSwitches')
self.settings.setValue('rememberFiles', current_settings['remember_files'])
self.settings.setValue('coverShadows', current_settings['cover_shadows'])
self.settings.setValue('autoTags', current_settings['auto_tags'])
self.settings.setValue('scanLibraryAtStart', current_settings['scan_library'])
self.settings.setValue('performCulling', current_settings['perform_culling'])
self.settings.setValue('dictionaryLanguage', current_settings['dictionary_language'])
self.settings.endGroup()
self.settings.beginGroup('dialogSettings')
self.settings.setValue('dialogBackground', current_settings['dialog_background'])
self.settings.endGroup()

270
lector/settingsdialog.py Normal file
View File

@@ -0,0 +1,270 @@
#!/usr/bin/env python3
# This file is a part of Lector, a Qt based ebook reader
# Copyright (C) 2017 BasioMeusPuga
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# TODO
# Get Cancel working with the file system model
import os
import copy
from PyQt5 import QtWidgets, QtCore
import database
from resources import settingswindow
from models import MostExcellentFileSystemModel, FileSystemProxyModel
from threaded import BackGroundBookSearch, BackGroundBookAddition
class SettingsUI(QtWidgets.QDialog, settingswindow.Ui_Dialog):
def __init__(self, parent):
super(SettingsUI, self).__init__()
self.setupUi(self)
self.parent = parent
self.database_path = self.parent.database_path
self.resize(self.parent.settings['settings_dialog_size'])
self.move(self.parent.settings['settings_dialog_position'])
self.aboutBox.setVisible(False)
aboutfile_path = os.path.join('resources', 'about.html')
with open(aboutfile_path) as about_html:
self.aboutBox.setHtml(about_html.read())
self.paths = None
self.thread = None
self.filesystem_model = None
self.tag_data_copy = None
languages = ['English', 'Spanish', 'Hindi']
self.languageBox.addItems(languages)
current_language = self.parent.settings['dictionary_language']
if current_language == 'en':
self.languageBox.setCurrentIndex(0)
elif current_language == 'es':
self.languageBox.setCurrentIndex(1)
else:
self.languageBox.setCurrentIndex(2)
self.languageBox.activated.connect(self.change_dictionary_language)
self.okButton.setToolTip('Save changes and start library scan')
self.okButton.clicked.connect(self.start_library_scan)
self.cancelButton.clicked.connect(self.cancel_pressed)
self.aboutButton.clicked.connect(self.about_pressed)
# Check boxes
self.autoTags.setChecked(self.parent.settings['auto_tags'])
self.coverShadows.setChecked(self.parent.settings['cover_shadows'])
self.refreshLibrary.setChecked(self.parent.settings['scan_library'])
self.fileRemember.setChecked(self.parent.settings['remember_files'])
self.performCulling.setChecked(self.parent.settings['perform_culling'])
self.autoTags.clicked.connect(self.manage_checkboxes)
self.coverShadows.clicked.connect(self.manage_checkboxes)
self.refreshLibrary.clicked.connect(self.manage_checkboxes)
self.fileRemember.clicked.connect(self.manage_checkboxes)
self.performCulling.clicked.connect(self.manage_checkboxes)
# Generate the filesystem treeView
self.generate_tree()
def generate_tree(self):
# Fetch all directories in the database
paths = database.DatabaseFunctions(
self.database_path).fetch_data(
('Path', 'Name', 'Tags', 'CheckState'),
'directories',
{'Path': ''},
'LIKE')
self.parent.generate_library_filter_menu(paths)
directory_data = {}
if not paths:
print('Database returned no paths for settings...')
else:
# Convert to the dictionary format that is
# to be fed into the QFileSystemModel
for i in paths:
directory_data[i[0]] = {
'name': i[1],
'tags': i[2],
'check_state': i[3]}
self.filesystem_model = MostExcellentFileSystemModel(directory_data)
self.filesystem_model.setFilter(QtCore.QDir.NoDotAndDotDot | QtCore.QDir.Dirs)
self.treeView.setModel(self.filesystem_model)
# TODO
# This here might break on them pestilent non unixy OSes
# Check and see
root_directory = QtCore.QDir().rootPath()
self.treeView.setRootIndex(
self.filesystem_model.setRootPath(root_directory))
# Set the treeView and QFileSystemModel to its desired state
selected_paths = [
i for i in directory_data if directory_data[i]['check_state'] == QtCore.Qt.Checked]
expand_paths = set()
for i in selected_paths:
# Recursively grind down parent paths for expansion
this_path = i
while True:
parent_path = os.path.dirname(this_path)
if parent_path == this_path:
break
expand_paths.add(parent_path)
this_path = parent_path
# Expand all the parent paths derived from the selected path
if root_directory in expand_paths:
expand_paths.remove(root_directory)
for i in expand_paths:
this_index = self.filesystem_model.index(i)
self.treeView.expand(this_index)
header_sizes = self.parent.settings['settings_dialog_headers']
if header_sizes:
for count, i in enumerate((0, 4)):
self.treeView.setColumnWidth(i, int(header_sizes[count]))
# TODO
# Set a QSortFilterProxy model on top of the existing QFileSystem model
# self.filesystem_proxy_model = FileSystemProxyModel()
# self.filesystem_proxy_model.setSourceModel(self.filesystem_model)
# self.treeView.setModel(self.filesystem_proxy_model)
for i in range(1, 4):
self.treeView.hideColumn(i)
def start_library_scan(self):
# TODO
# return in case the treeView is not edited
self.hide()
data_pairs = []
for i in self.filesystem_model.tag_data.items():
data_pairs.append([
i[0], i[1]['name'], i[1]['tags'], i[1]['check_state']
])
database.DatabaseFunctions(
self.database_path).set_library_paths(data_pairs)
if not data_pairs:
try:
if self.sender().objectName() == 'reloadLibrary':
self.show()
except AttributeError:
pass
self.parent.lib_ref.view_model.clear()
self.parent.lib_ref.table_rows = []
# TODO
# Change this to no longer include files added manually
database.DatabaseFunctions(
self.database_path).delete_from_database('*', '*')
return
# Update the main window library filter menu
self.parent.generate_library_filter_menu(data_pairs)
self.parent.set_library_filter()
# Disallow rechecking until the first check completes
self.okButton.setEnabled(False)
self.parent.reloadLibrary.setEnabled(False)
self.okButton.setToolTip('Library scan in progress...')
# Traverse directories looking for files
self.parent.statusMessage.setText('Checking library folders')
self.thread = BackGroundBookSearch(data_pairs, self)
self.thread.finished.connect(self.finished_iterating)
self.thread.start()
def finished_iterating(self):
# The books the search thread has found
# are now in self.thread.valid_files
if not self.thread.valid_files:
return
# Hey, messaging is important, okay?
self.parent.sorterProgress.setVisible(True)
self.parent.statusMessage.setText('Parsing files')
# We now create a new thread to put those files into the database
self.thread = BackGroundBookAddition(
self.thread.valid_files, self.database_path, True, self.parent)
self.thread.finished.connect(self.parent.move_on)
self.thread.start()
def cancel_pressed(self):
self.filesystem_model.tag_data = copy.deepcopy(self.tag_data_copy)
self.hide()
def hideEvent(self, event):
self.no_more_settings()
event.accept()
def showEvent(self, event):
self.tag_data_copy = copy.deepcopy(self.filesystem_model.tag_data)
event.accept()
def no_more_settings(self):
self.parent.libraryToolBar.settingsButton.setChecked(False)
self.aboutBox.hide()
self.treeView.show()
self.resizeEvent()
def resizeEvent(self, event=None):
self.parent.settings['settings_dialog_size'] = self.size()
self.parent.settings['settings_dialog_position'] = self.pos()
table_headers = []
for i in [0, 4]:
table_headers.append(self.treeView.columnWidth(i))
self.parent.settings['settings_dialog_headers'] = table_headers
def change_dictionary_language(self, event):
language_dict = {
0: 'en',
1: 'es',
2: 'hi'}
self.parent.settings['dictionary_language'] = language_dict[self.languageBox.currentIndex()]
def manage_checkboxes(self, event=None):
sender = self.sender().objectName()
sender_dict = {
'coverShadows': 'cover_shadows',
'autoTags': 'auto_tags',
'refreshLibrary': 'scan_library',
'fileRemember': 'remember_files',
'performCulling': 'perform_culling'}
self.parent.settings[sender_dict[sender]] = not self.parent.settings[sender_dict[sender]]
if not self.performCulling.isChecked():
self.parent.load_all_covers()
def about_pressed(self):
self.treeView.setVisible(not self.treeView.isVisible())
self.aboutBox.setVisible(not self.aboutBox.isVisible())

301
lector/sorter.py Normal file
View File

@@ -0,0 +1,301 @@
#!/usr/bin/env python3
# This file is a part of Lector, a Qt based ebook reader
# Copyright (C) 2017 BasioMeusPuga
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# INSTRUCTIONS
# Every parser is supposed to have the following methods, even if they return None:
# read_book()
# get_title()
# get_author()
# get_year()
# get_cover_image()
# get_isbn()
# get_tags()
# get_contents() - Should return a tuple with 0: TOC 1: special_settings (dict)
# Parsers for files containing only images need to return only images_only = True
# TODO
# Maybe shift to insert or replace instead of hash checking
# See if you want to include a hash of the book's name and author
# Change thread niceness
import io
import os
import time
import pickle
import hashlib
import threading
from multiprocessing import Pool, Manager
from PyQt5 import QtCore, QtGui
import database
from parsers.cbz import ParseCBZ
from parsers.cbr import ParseCBR
from parsers.epub import ParseEPUB
from parsers.mobi import ParseMOBI
sorter = {
'epub': ParseEPUB,
'mobi': ParseMOBI,
'azw': ParseMOBI,
'azw3': ParseMOBI,
'azw4': ParseMOBI,
'prc': ParseMOBI,
'cbz': ParseCBZ,
'cbr': ParseCBR,}
available_parsers = [i for i in sorter]
progressbar = None # This is populated by __main__
progress_emitter = None # This is to be made into a global variable
class UpdateProgress(QtCore.QObject):
# This is for thread safety
update_signal = QtCore.pyqtSignal(int)
def connect_to_progressbar(self):
self.update_signal.connect(progressbar.setValue)
def update_progress(self, progress_percent):
self.update_signal.emit(progress_percent)
class BookSorter:
def __init__(self, file_list, mode, database_path, auto_tags=True, temp_dir=None):
# Have the GUI pass a list of files straight to here
# Then, on the basis of what is needed, pass the
# filenames to the requisite functions
# This includes getting file info for the database
# Parsing for the reader proper
# Caching upon closing
self.file_list = [i for i in file_list if os.path.exists(i)]
self.statistics = [0, (len(file_list))]
self.hashes_and_paths = {}
self.mode = mode
self.database_path = database_path
self.auto_tags = auto_tags
self.temp_dir = temp_dir
if database_path:
self.database_hashes()
self.threading_completed = []
self.queue = Manager().Queue()
self.processed_books = []
if self.mode == 'addition':
progress_object_generator()
def database_hashes(self):
all_hashes_and_paths = database.DatabaseFunctions(
self.database_path).fetch_data(
('Hash', 'Path'),
'books',
{'Hash': ''},
'LIKE')
if all_hashes_and_paths:
# self.hashes = [i[0] for i in all_hashes]
self.hashes_and_paths = {
i[0]: i[1] for i in all_hashes_and_paths}
def database_entry_for_book(self, file_hash):
database_return = database.DatabaseFunctions(
self.database_path).fetch_data(
('Position', 'Bookmarks'),
'books',
{'Hash': file_hash},
'EQUALS')[0]
book_data = []
for i in database_return:
# All of these values are pickled and stored
if i:
book_data.append(pickle.loads(i))
else:
book_data.append(None)
return book_data
def read_book(self, filename):
# filename is expected as a string containg the
# full path of the ebook file
with open(filename, 'rb') as current_book:
# This should speed up addition for larger files
# without compromising the integrity of the process
first_bytes = current_book.read(1024 * 32) # First 32KB of the file
file_md5 = hashlib.md5(first_bytes).hexdigest()
# Update the progress queue
self.queue.put(filename)
# This should not get triggered in reading mode
# IF the file is NOT being loaded into the reader,
# Do not allow addition in case the file
# is already in the database and it remains at its original path
if self.mode == 'addition' and file_md5 in self.hashes_and_paths:
if self.hashes_and_paths[file_md5] == filename:
return
file_extension = os.path.splitext(filename)[1][1:]
try:
# Get the requisite parser from the sorter dict
book_ref = sorter[file_extension](filename, self.temp_dir, file_md5)
except KeyError:
print(filename + ' has an unsupported extension')
return
# Everything following this is standard
# None values are accounted for here
book_ref.read_book()
if book_ref.book:
title = book_ref.get_title()
author = book_ref.get_author()
if not author:
author = 'Unknown'
try:
year = int(book_ref.get_year())
except (TypeError, ValueError):
year = 9999
isbn = book_ref.get_isbn()
tags = None
if self.auto_tags:
tags = book_ref.get_tags()
this_book = {}
this_book[file_md5] = {
'title': title,
'author': author,
'year': year,
'isbn': isbn,
'hash': file_md5,
'path': filename,
'tags': tags}
# Different modes require different values
if self.mode == 'addition':
# Reduce the size of the incoming image
# if one is found
cover_image_raw = book_ref.get_cover_image()
if cover_image_raw:
cover_image = resize_image(cover_image_raw)
else:
cover_image = None
this_book[file_md5]['cover_image'] = cover_image
if self.mode == 'reading':
all_content = book_ref.get_contents()
# get_contents() returns a tuple. Index 1 is a collection of
# special settings that depend on the kind of data being parsed.
# Currently, this includes:
# Only images included images_only BOOL Specify only paths to images
# File will not be cached on exit
content = all_content[0]
images_only = all_content[1]['images_only']
if not content:
content = [('Invalid', 'Something went horribly wrong')]
book_data = self.database_entry_for_book(file_md5)
position = book_data[0]
bookmarks = book_data[1]
this_book[file_md5]['position'] = position
this_book[file_md5]['bookmarks'] = bookmarks
this_book[file_md5]['content'] = content
this_book[file_md5]['images_only'] = images_only
return this_book
def read_progress(self):
while True:
processed_file = self.queue.get()
self.threading_completed.append(processed_file)
total_number = len(self.file_list)
completed_number = len(self.threading_completed)
if progress_emitter: # Skip update in reading mode
progress_emitter.update_progress(
completed_number * 100 // total_number)
if total_number == completed_number:
break
def initiate_threads(self):
def pool_creator():
_pool = Pool(5)
self.processed_books = _pool.map(
self.read_book, self.file_list)
_pool.close()
_pool.join()
start_time = time.time()
worker_thread = threading.Thread(target=pool_creator)
progress_thread = threading.Thread(target=self.read_progress)
worker_thread.start()
progress_thread.start()
worker_thread.join()
progress_thread.join(timeout=.5)
return_books = {}
# Exclude None returns generated in case of duplication / parse errors
self.processed_books = [i for i in self.processed_books if i]
for i in self.processed_books:
for j in i:
return_books[j] = i[j]
del self.processed_books
print('Finished processing in', time.time() - start_time)
return return_books
def progress_object_generator():
# This has to be kept separate from the BookSorter class because
# the QtObject inheritance disallows pickling
global progress_emitter
progress_emitter = UpdateProgress()
progress_emitter.connect_to_progressbar()
def resize_image(cover_image_raw):
cover_image = QtGui.QImage()
cover_image.loadFromData(cover_image_raw)
cover_image = cover_image.scaled(
420, 600, QtCore.Qt.IgnoreAspectRatio)
byte_array = QtCore.QByteArray()
buffer = QtCore.QBuffer(byte_array)
buffer.open(QtCore.QIODevice.WriteOnly)
cover_image.save(buffer, 'jpg', 75)
cover_image_final = io.BytesIO(byte_array)
cover_image_final.seek(0)
return cover_image_final.getvalue()

117
lector/threaded.py Normal file
View File

@@ -0,0 +1,117 @@
#!/usr/bin/env python3
# This file is a part of Lector, a Qt based ebook reader
# Copyright (C) 2017 BasioMeusPuga
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import os
import pathlib
from multiprocessing.dummy import Pool
from PyQt5 import QtCore
import sorter
import database
class BackGroundTabUpdate(QtCore.QThread):
def __init__(self, database_path, all_metadata, parent=None):
super(BackGroundTabUpdate, self).__init__(parent)
self.database_path = database_path
self.all_metadata = all_metadata
def run(self):
for i in self.all_metadata:
book_hash = i['hash']
database_dict = {
'Position': i['position'],
'LastAccessed': i['last_accessed'],
'Bookmarks': i['bookmarks']}
database.DatabaseFunctions(self.database_path).modify_metadata(
database_dict, book_hash)
class BackGroundBookAddition(QtCore.QThread):
def __init__(self, file_list, database_path, prune_required, parent=None):
super(BackGroundBookAddition, self).__init__(parent)
self.file_list = file_list
self.parent = parent
self.database_path = database_path
self.prune_required = prune_required
def run(self):
books = sorter.BookSorter(
self.file_list,
'addition',
self.database_path,
self.parent.settings['auto_tags'],
self.parent.temp_dir.path())
parsed_books = books.initiate_threads()
self.parent.lib_ref.generate_model('addition', parsed_books, False)
if self.prune_required:
self.parent.lib_ref.prune_models(self.file_list)
database.DatabaseFunctions(self.database_path).add_to_database(parsed_books)
class BackGroundBookDeletion(QtCore.QThread):
def __init__(self, hash_list, database_path, parent=None):
super(BackGroundBookDeletion, self).__init__(parent)
self.parent = parent
self.hash_list = hash_list
self.database_path = database_path
def run(self):
database.DatabaseFunctions(
self.database_path).delete_from_database('Hash', self.hash_list)
class BackGroundBookSearch(QtCore.QThread):
def __init__(self, data_list, parent=None):
super(BackGroundBookSearch, self).__init__(parent)
self.parent = parent
self.valid_files = []
# Filter for checked directories
self.valid_directories = [
[i[0], i[1], i[2]] for i in data_list if i[3] == QtCore.Qt.Checked]
self.unwanted_directories = [
pathlib.Path(i[0]) for i in data_list if i[3] == QtCore.Qt.Unchecked]
def run(self):
def is_wanted(directory):
directory_parents = pathlib.Path(directory).parents
for i in self.unwanted_directories:
if i in directory_parents:
return False
return True
def traverse_directory(incoming_data):
root_directory = incoming_data[0]
for directory, subdirs, files in os.walk(root_directory, topdown=True):
# Black magic fuckery
# Skip subdir tree in case it's not wanted
subdirs[:] = [d for d in subdirs if is_wanted(os.path.join(directory, d))]
for filename in files:
if os.path.splitext(filename)[1][1:] in sorter.available_parsers:
self.valid_files.append(os.path.join(directory, filename))
def initiate_threads():
_pool = Pool(5)
_pool.map(traverse_directory, self.valid_directories)
_pool.close()
_pool.join()
initiate_threads()
print(len(self.valid_files), 'books found')

414
lector/toolbars.py Normal file
View File

@@ -0,0 +1,414 @@
#!usr/bin/env python3
# This file is a part of Lector, a Qt based ebook reader
# Copyright (C) 2018 BasioMeusPuga
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from PyQt5 import QtWidgets, QtGui, QtCore
class BookToolBar(QtWidgets.QToolBar):
def __init__(self, parent=None):
super(BookToolBar, self).__init__(parent)
# Spacer
spacer = QtWidgets.QWidget()
spacer.setSizePolicy(
QtWidgets.QSizePolicy.Expanding, QtWidgets.QSizePolicy.Expanding)
sizePolicy = QtWidgets.QSizePolicy(
QtWidgets.QSizePolicy.Fixed, QtWidgets.QSizePolicy.Fixed)
self.setMovable(False)
self.setIconSize(QtCore.QSize(22, 22))
self.setFloatable(False)
self.setContextMenuPolicy(QtCore.Qt.PreventContextMenu)
self.setObjectName("LibraryToolBar")
# Buttons
self.fontButton = QtWidgets.QAction(
QtGui.QIcon.fromTheme('gtk-select-font'),
'View settings', self)
self.fullscreenButton = QtWidgets.QAction(
QtGui.QIcon.fromTheme('view-fullscreen'),
'Fullscreen', self)
self.addBookmarkButton = QtWidgets.QAction(
QtGui.QIcon.fromTheme('bookmark-new'),
'Add bookmark', self)
self.bookmarkButton = QtWidgets.QAction(
QtGui.QIcon.fromTheme('bookmarks'),
'Bookmarks', self)
self.bookmarkButton.setObjectName('bookmarkButton')
self.resetProfile = QtWidgets.QAction(
QtGui.QIcon.fromTheme('view-refresh'),
'Reset profile', self)
# Add buttons
self.addAction(self.fontButton)
self.fontButton.setCheckable(True)
self.fontButton.triggered.connect(self.toggle_font_settings)
self.addSeparator()
self.addAction(self.addBookmarkButton)
self.addAction(self.bookmarkButton)
self.bookmarkButton.setCheckable(True)
self.addSeparator()
self.addAction(self.fullscreenButton)
# Font modification
font_sizes = [str(i) for i in range(8, 48, 2)]
font_sizes.extend(['56', '64', '72'])
self.fontSizeBox = QtWidgets.QComboBox(self)
self.fontSizeBox.setObjectName('fontSizeBox')
self.fontSizeBox.setToolTip('Font size')
self.fontSizeBox.addItems(font_sizes)
self.fontSizeBox.setEditable(True)
self.paddingUp = QtWidgets.QAction(
QtGui.QIcon.fromTheme('format-indent-less'),
'Increase padding', self)
self.paddingUp.setObjectName('paddingUp')
self.paddingDown = QtWidgets.QAction(
QtGui.QIcon.fromTheme('format-indent-more'),
'Decrease padding', self)
self.paddingDown.setObjectName('paddingDown')
self.lineSpacingUp = QtWidgets.QAction(
QtGui.QIcon.fromTheme('format-line-spacing-triple'),
'Increase line spacing', self)
self.lineSpacingUp.setObjectName('lineSpacingUp')
self.lineSpacingDown = QtWidgets.QAction(
QtGui.QIcon.fromTheme('format-line-spacing-double'),
'Decrease line spacing', self)
self.lineSpacingDown.setObjectName('lineSpacingDown')
self.alignLeft = QtWidgets.QAction(
QtGui.QIcon.fromTheme('format-justify-left'),
'Left align text', self)
self.alignLeft.setObjectName('alignLeft')
self.alignLeft.setCheckable(True)
self.alignRight = QtWidgets.QAction(
QtGui.QIcon.fromTheme('format-justify-right'),
'Right align text', self)
self.alignRight.setObjectName('alignRight')
self.alignRight.setCheckable(True)
self.alignCenter = QtWidgets.QAction(
QtGui.QIcon.fromTheme('format-justify-center'),
'Center align text', self)
self.alignCenter.setObjectName('alignCenter')
self.alignCenter.setCheckable(True)
self.alignJustify = QtWidgets.QAction(
QtGui.QIcon.fromTheme('format-justify-fill'),
'Justify text', self)
self.alignJustify.setObjectName('alignJustify')
self.alignJustify.setCheckable(True)
self.alignButtons = QtWidgets.QActionGroup(self)
self.alignButtons.setExclusive(True)
self.alignButtons.addAction(self.alignLeft)
self.alignButtons.addAction(self.alignRight)
self.alignButtons.addAction(self.alignCenter)
self.alignButtons.addAction(self.alignJustify)
self.fontBox = QtWidgets.QFontComboBox()
self.fontBox.setFontFilters(QtWidgets.QFontComboBox.ScalableFonts)
self.fontBox.setObjectName('fontBox')
self.colorBoxFG = FixedPushButton(self)
self.colorBoxFG.setObjectName('fgColor')
self.colorBoxFG.setToolTip('Text color')
self.colorBoxBG = FixedPushButton(self)
self.colorBoxBG.setToolTip('Background color')
self.colorBoxBG.setObjectName('bgColor')
profiles = ['Profile 1', 'Profile 2', 'Profile 3']
self.profileBox = QtWidgets.QComboBox(self)
self.profileBox.addItems(profiles)
self.profileAction = self.addWidget(self.profileBox)
self.fontSeparator1 = self.addSeparator()
self.fontBoxAction = self.addWidget(self.fontBox)
self.fontSizeBoxAction = self.addWidget(self.fontSizeBox)
self.fontSeparator2 = self.addSeparator()
self.fgColorAction = self.addWidget(self.colorBoxFG)
self.bgColorAction = self.addWidget(self.colorBoxBG)
self.fontSeparator3 = self.addSeparator()
self.addAction(self.lineSpacingUp)
self.addAction(self.lineSpacingDown)
self.fontSeparator4 = self.addSeparator()
self.addAction(self.paddingUp)
self.addAction(self.paddingDown)
self.fontSeparator4 = self.addSeparator()
self.addAction(self.alignLeft)
self.addAction(self.alignRight)
self.addAction(self.alignCenter)
self.addAction(self.alignJustify)
self.fontActions = [
self.fontBoxAction,
self.fontSizeBoxAction,
self.fgColorAction,
self.bgColorAction,
self.lineSpacingUp,
self.lineSpacingDown,
self.paddingUp,
self.paddingDown,
self.alignLeft,
self.alignRight,
self.alignCenter,
self.alignJustify,
self.profileAction,
self.fontSeparator1,
self.fontSeparator2,
self.fontSeparator3,
self.fontSeparator4,
self.resetProfile]
for i in self.fontActions:
i.setVisible(False)
# Comic view modification
self.zoomIn = QtWidgets.QAction(
QtGui.QIcon.fromTheme('zoom-in'),
'Zoom in', self)
self.zoomIn.setObjectName('zoomIn')
self.zoomOut = QtWidgets.QAction(
QtGui.QIcon.fromTheme('zoom-out'),
'Zoom Out', self)
self.zoomOut.setObjectName('zoomOut')
self.fitWidth = QtWidgets.QAction(
QtGui.QIcon.fromTheme('zoom-fit-width'),
'Fit Width', self)
self.fitWidth.setObjectName('fitWidth')
self.fitWidth.setCheckable(True)
self.bestFit = QtWidgets.QAction(
QtGui.QIcon.fromTheme('zoom-fit-best'),
'Best Fit', self)
self.bestFit.setObjectName('bestFit')
self.bestFit.setCheckable(True)
self.originalSize = QtWidgets.QAction(
QtGui.QIcon.fromTheme('zoom-original'),
'Original size', self)
self.originalSize.setObjectName('originalSize')
self.originalSize.setCheckable(True)
self.comicBGColor = FixedPushButton(self)
self.comicBGColor.setToolTip('Background color')
self.comicBGColor.setObjectName('comicBGColor')
self.comicSeparator1 = self.addSeparator()
self.addAction(self.zoomIn)
self.addAction(self.zoomOut)
self.addAction(self.fitWidth)
self.addAction(self.bestFit)
self.addAction(self.originalSize)
self.comicSeparator2 = self.addSeparator()
self.comicBGColorAction = self.addWidget(self.comicBGColor)
self.comicActions = [
self.comicBGColorAction,
self.zoomIn,
self.zoomOut,
self.fitWidth,
self.bestFit,
self.originalSize,
self.comicSeparator1,
self.comicSeparator2]
for i in self.comicActions:
i.setVisible(False)
# Other booktoolbar widgets
self.searchBar = FixedLineEdit(self)
self.searchBar.setPlaceholderText(
'Search...')
self.searchBar.setSizePolicy(sizePolicy)
self.searchBar.setContentsMargins(10, 0, 0, 0)
self.searchBar.setObjectName('searchBar')
# Sorter
self.tocBox = FixedComboBox(self)
self.tocBox.setObjectName('sortingBox')
self.tocBox.setToolTip('Table of Contents')
# All of these will be put after the spacer
# This means that the buttons in the left side of
# the toolbar have to split up and added here
self.boxSpacer = self.addWidget(spacer)
self.tocBoxAction = self.addWidget(self.tocBox)
self.searchBarAction = self.addWidget(self.searchBar)
self.bookActions = [
self.addBookmarkButton,
self.bookmarkButton,
self.fullscreenButton,
self.tocBoxAction,
self.searchBarAction]
for i in self.bookActions:
i.setVisible(True)
self.addAction(self.resetProfile)
def toggle_font_settings(self):
if self.fontButton.isChecked():
self.customize_view_on()
else:
self.customize_view_off()
def customize_view_on(self):
if self.parent().tabWidget.widget(
self.parent().tabWidget.currentIndex()).metadata['images_only']:
# The following might seem redundant,
# but it's necessary for tab switching
for i in self.comicActions:
i.setVisible(True)
for i in self.fontActions:
i.setVisible(False)
else:
for i in self.fontActions:
i.setVisible(True)
for i in self.comicActions:
i.setVisible(False)
for i in self.bookActions:
i.setVisible(False)
def customize_view_off(self):
for i in self.fontActions:
i.setVisible(False)
for i in self.comicActions:
i.setVisible(False)
for i in self.bookActions:
i.setVisible(True)
class LibraryToolBar(QtWidgets.QToolBar):
def __init__(self, parent=None):
super(LibraryToolBar, self).__init__(parent)
spacer = QtWidgets.QWidget()
spacer.setSizePolicy(
QtWidgets.QSizePolicy.Expanding, QtWidgets.QSizePolicy.Expanding)
self.setMovable(False)
self.setIconSize(QtCore.QSize(22, 22))
self.setFloatable(False)
self.setContextMenuPolicy(QtCore.Qt.PreventContextMenu)
self.setObjectName("LibraryToolBar")
# Buttons
self.addButton = QtWidgets.QAction(
QtGui.QIcon.fromTheme('add'), 'Add book', self)
self.deleteButton = QtWidgets.QAction(
QtGui.QIcon.fromTheme('remove'), 'Delete book', self)
self.colorButton = QtWidgets.QAction(
QtGui.QIcon.fromTheme('color-picker'), 'Library background color', self)
self.colorButton.setObjectName('libraryBackground')
self.settingsButton = QtWidgets.QAction(
QtGui.QIcon.fromTheme('settings'), 'Settings', self)
self.settingsButton.setCheckable(True)
self.coverViewButton = QtWidgets.QAction(
QtGui.QIcon.fromTheme('view-grid'), 'View as covers', self)
self.coverViewButton.setCheckable(True)
self.tableViewButton = QtWidgets.QAction(
QtGui.QIcon.fromTheme('table'), 'View as table', self)
self.tableViewButton.setCheckable(True)
self.libraryFilterButton = QtWidgets.QToolButton(self)
self.libraryFilterButton.setIcon(QtGui.QIcon.fromTheme('view-readermode'))
self.libraryFilterButton.setText('Filter library')
self.libraryFilterButton.setToolTip('Filter library')
# Auto unchecks the other QToolButton in case of clicking
self.viewButtons = QtWidgets.QActionGroup(self)
self.viewButtons.setExclusive(True)
self.viewButtons.addAction(self.coverViewButton)
self.viewButtons.addAction(self.tableViewButton)
# Add buttons
self.addAction(self.addButton)
self.addAction(self.deleteButton)
self.addSeparator()
self.addAction(self.coverViewButton)
self.addAction(self.tableViewButton)
self.addSeparator()
self.addWidget(self.libraryFilterButton)
self.addSeparator()
self.addAction(self.colorButton)
self.addAction(self.settingsButton)
# Filter
sizePolicy = QtWidgets.QSizePolicy(
QtWidgets.QSizePolicy.Fixed, QtWidgets.QSizePolicy.Fixed)
self.searchBar = FixedLineEdit(self)
self.searchBar.setClearButtonEnabled(True)
self.searchBar.setPlaceholderText(
'Search for Title, Author, Tags...')
self.searchBar.setSizePolicy(sizePolicy)
self.searchBar.setContentsMargins(10, 0, 0, 0)
self.searchBar.setObjectName('searchBar')
# Sorter
sorting_choices = ['Title', 'Author', 'Year', 'Newest', 'Last read']
self.sortingBox = FixedComboBox(self)
self.sortingBox.addItems(sorting_choices)
self.sortingBox.setObjectName('sortingBox')
self.sortingBox.setSizePolicy(sizePolicy)
self.sortingBox.setMinimumContentsLength(10)
self.sortingBox.setToolTip('Sort by')
# Add widgets
self.addWidget(spacer)
self.sortingBoxAction = self.addWidget(self.sortingBox)
self.addWidget(self.searchBar)
# Sublassing these widgets out prevents them from resizing
class FixedComboBox(QtWidgets.QComboBox):
def __init__(self, parent=None):
super(FixedComboBox, self).__init__(parent)
def sizeHint(self):
return QtCore.QSize(400, 22)
class FixedLineEdit(QtWidgets.QLineEdit):
def __init__(self, parent=None):
super(FixedLineEdit, self).__init__(parent)
def sizeHint(self):
return QtCore.QSize(400, 22)
class FixedPushButton(QtWidgets.QPushButton):
def __init__(self, parent=None):
super(FixedPushButton, self).__init__(parent)
def sizeHint(self):
return QtCore.QSize(36, 30)

785
lector/widgets.py Normal file
View File

@@ -0,0 +1,785 @@
#!usr/bin/env python3
# This file is a part of Lector, a Qt based ebook reader
# Copyright (C) 2017 BasioMeusPuga
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# TODO
# Reading modes
# Double page, Continuous etc
# Especially for comics
import os
import uuid
from PyQt5 import QtWidgets, QtGui, QtCore
from resources import pie_chart
from models import BookmarkProxyModel
from sorter import resize_image
from delegates import BookmarkDelegate
class Tab(QtWidgets.QWidget):
def __init__(self, metadata, parent=None):
super(Tab, self).__init__(parent)
self.parent = parent
self.metadata = metadata # Save progress data into this dictionary
self.masterLayout = QtWidgets.QHBoxLayout(self)
self.horzLayout = QtWidgets.QSplitter(self)
self.horzLayout.setOrientation(QtCore.Qt.Horizontal)
self.masterLayout.addWidget(self.horzLayout)
self.metadata['last_accessed'] = QtCore.QDateTime().currentDateTime()
if self.metadata['position']:
if self.metadata['position']['is_read']:
self.generate_position(True)
current_chapter = self.metadata['position']['current_chapter']
else:
self.generate_position()
current_chapter = 1
chapter_content = self.metadata['content'][current_chapter - 1][1]
# The content display widget is, by default a QTextBrowser.
# In case the incoming data is only images
# such as in the case of comic book files,
# we want a QGraphicsView widget doing all the heavy lifting
# instead of a QTextBrowser
self.are_we_doing_images_only = self.metadata['images_only']
if self.are_we_doing_images_only: # Boolean
self.contentView = PliantQGraphicsView(self.window(), self)
self.contentView.loadImage(chapter_content)
else:
self.contentView = PliantQTextBrowser(self.window(), self)
relative_path_root = os.path.join(
self.window().temp_dir.path(), self.metadata['hash'])
relative_paths = []
for i in os.walk(relative_path_root):
# TODO
# Rename the .css files to something else here and keep
# a record of them
# Currently, I'm just removing them for the sake of simplicity
for j in i[2]:
file_extension = os.path.splitext(j)[1]
if file_extension == '.css':
file_path = os.path.join(i[0], j)
os.remove(file_path)
relative_paths.append(os.path.join(relative_path_root, i[0]))
self.contentView.setSearchPaths(relative_paths)
self.contentView.setOpenLinks(False) # TODO Change this when HTML navigation works
self.contentView.setHtml(chapter_content)
self.contentView.setReadOnly(True)
tempHiddenButton = QtWidgets.QToolButton(self)
tempHiddenButton.setVisible(False)
tempHiddenButton.clicked.connect(self.set_scroll_value)
tempHiddenButton.animateClick(100)
# The following are common to both the text browser and
# the graphics view
self.contentView.setFrameShape(QtWidgets.QFrame.NoFrame)
self.contentView.setObjectName('contentView')
self.contentView.verticalScrollBar().setSingleStep(7)
self.contentView.setHorizontalScrollBarPolicy(
QtCore.Qt.ScrollBarAlwaysOff)
# See bookmark availability
if not self.metadata['bookmarks']:
self.metadata['bookmarks'] = {}
# Create the dock widget for context specific display
self.dockWidget = PliantDockWidget(self)
self.dockWidget.setWindowTitle('Bookmarks')
self.dockWidget.setFeatures(QtWidgets.QDockWidget.DockWidgetClosable)
self.dockWidget.setFloating(False)
self.dockWidget.hide()
self.dockListView = QtWidgets.QListView(self.dockWidget)
self.dockListView.setResizeMode(QtWidgets.QListWidget.Adjust)
self.dockListView.setMaximumWidth(350)
self.dockListView.setItemDelegate(BookmarkDelegate(self.dockListView))
self.dockListView.setUniformItemSizes(True)
self.dockListView.setContextMenuPolicy(QtCore.Qt.CustomContextMenu)
self.dockListView.customContextMenuRequested.connect(
self.generate_bookmark_context_menu)
self.dockListView.clicked.connect(self.navigate_to_bookmark)
self.dockWidget.setWidget(self.dockListView)
self.bookmark_model = QtGui.QStandardItemModel(self)
self.proxy_model = BookmarkProxyModel(self)
self.generate_bookmark_model()
self.generate_keyboard_shortcuts()
self.horzLayout.addWidget(self.contentView)
self.horzLayout.addWidget(self.dockWidget)
title = self.metadata['title']
self.parent.addTab(self, title)
# Hide mouse cursor timer
self.mouse_hide_timer = QtCore.QTimer()
self.mouse_hide_timer.setSingleShot(True)
self.mouse_hide_timer.timeout.connect(self.hide_mouse)
self.contentView.setFocus()
def update_last_accessed_time(self):
self.metadata['last_accessed'] = QtCore.QDateTime().currentDateTime()
start_index = self.window().lib_ref.view_model.index(0, 0)
matching_item = self.window().lib_ref.view_model.match(
start_index,
QtCore.Qt.UserRole + 6,
self.metadata['hash'],
1, QtCore.Qt.MatchExactly)
self.window().lib_ref.view_model.setData(
matching_item[0], self.metadata['last_accessed'], QtCore.Qt.UserRole + 12)
def set_scroll_value(self, switch_widgets=True, search_data=None):
# TODO
# Bookmark navigation does not work in case 2 entries in the same
# chapter are clicked successively
# It plain refuses to work other times
if self.sender().objectName() == 'tabWidget':
return
if switch_widgets:
previous_widget = self.window().tabWidget.currentWidget()
self.window().tabWidget.setCurrentWidget(self)
scroll_value = self.metadata['position']['scroll_value']
if search_data:
scroll_value = search_data[0]
# Scroll a little ahead
# This avoids confusion with potentially duplicate phrases
# And the found result is at the top of the window
scroll_position = scroll_value * self.contentView.verticalScrollBar().maximum()
self.contentView.verticalScrollBar().setValue(scroll_position * 1.1)
try:
search_text = self.metadata['position']['last_visible_text']
if search_data:
search_text = search_data[1]
if search_text:
self.contentView.find(search_text)
text_cursor = self.contentView.textCursor()
text_cursor.clearSelection()
self.contentView.setTextCursor(text_cursor)
except KeyError:
pass
if switch_widgets:
self.window().tabWidget.setCurrentWidget(previous_widget)
def generate_position(self, is_read=False):
# TODO
# Calculate lines to incorporate into progress
total_chapters = len(self.metadata['content'])
current_chapter = 1
scroll_value = 0
if is_read:
current_chapter = total_chapters
scroll_value = 1
self.metadata['position'] = {
'current_chapter': current_chapter,
'total_chapters': total_chapters,
'scroll_value': scroll_value,
'last_visible_text': None,
'is_read': is_read}
def generate_keyboard_shortcuts(self):
self.next_chapter = QtWidgets.QShortcut(
QtGui.QKeySequence('Right'), self.contentView)
self.next_chapter.setObjectName('nextChapter')
self.next_chapter.activated.connect(self.sneaky_change)
self.prev_chapter = QtWidgets.QShortcut(
QtGui.QKeySequence('Left'), self.contentView)
self.prev_chapter.setObjectName('prevChapter')
self.prev_chapter.activated.connect(self.sneaky_change)
self.go_fs = QtWidgets.QShortcut(
QtGui.QKeySequence('F11'), self.contentView)
self.go_fs.activated.connect(self.go_fullscreen)
self.exit_fs = QtWidgets.QShortcut(
QtGui.QKeySequence('Escape'), self.contentView)
self.exit_fs.setContext(QtCore.Qt.ApplicationShortcut)
self.exit_fs.activated.connect(self.exit_fullscreen)
# TODO
# See why Ctrl + Q won't work on a non fullscreened contentView
# widget in case the following is in code
# self.exit_all = QtWidgets.QShortcut(
# QtGui.QKeySequence('Ctrl+Q'), self.contentView)
# self.exit_all.activated.connect(self.sneaky_exit)
def go_fullscreen(self):
if self.contentView.windowState() == QtCore.Qt.WindowFullScreen:
self.exit_fullscreen()
return
self.contentView.setWindowFlags(QtCore.Qt.Window)
self.contentView.setWindowState(QtCore.Qt.WindowFullScreen)
self.contentView.show()
self.window().hide()
def exit_fullscreen(self):
self.window().show()
self.contentView.setWindowFlags(QtCore.Qt.Widget)
self.contentView.setWindowState(QtCore.Qt.WindowNoState)
self.contentView.show()
def change_chapter_tocBox(self):
chapter_number = self.window().bookToolBar.tocBox.currentIndex()
required_content = self.metadata['content'][chapter_number][1]
if self.are_we_doing_images_only:
self.contentView.loadImage(required_content)
else:
self.contentView.clear()
self.contentView.setHtml(required_content)
def format_view(self, font, font_size, foreground,
background, padding, line_spacing,
text_alignment):
if self.are_we_doing_images_only:
# Tab color does not need to be set separately in case
# no padding is set for the viewport of a QGraphicsView
# and image resizing in done in the pixmap
my_qbrush = QtGui.QBrush(QtCore.Qt.SolidPattern)
my_qbrush.setColor(background)
self.contentView.setBackgroundBrush(my_qbrush)
self.contentView.resizeEvent()
else:
self.contentView.setStyleSheet(
"QTextEdit {{font-family: {0}; font-size: {1}px; color: {2}; background-color: {3}}}".format(
font, font_size, foreground.name(), background.name()))
# Line spacing
# Set line spacing per a block format
# This is proportional line spacing so assume a divisor of 100
block_format = QtGui.QTextBlockFormat()
block_format.setLineHeight(
line_spacing, QtGui.QTextBlockFormat.ProportionalHeight)
block_format.setTextIndent(50)
# Give options for alignment
alignment_dict = {
'left': QtCore.Qt.AlignLeft,
'right': QtCore.Qt.AlignRight,
'center': QtCore.Qt.AlignCenter,
'justify': QtCore.Qt.AlignJustify}
current_index = self.window().bookToolBar.tocBox.currentIndex()
if current_index == 0:
block_format.setAlignment(QtCore.Qt.AlignVCenter | QtCore.Qt.AlignHCenter)
else:
block_format.setAlignment(alignment_dict[text_alignment])
# Also for padding
# Using setViewPortMargins for this disables scrolling in the margins
block_format.setLeftMargin(padding)
block_format.setRightMargin(padding)
this_cursor = self.contentView.textCursor()
this_cursor.movePosition(QtGui.QTextCursor.Start, 0, 1)
# Iterate over the entire document block by block
# The document ends when the cursor position can no longer be incremented
while True:
old_position = this_cursor.position()
this_cursor.mergeBlockFormat(block_format)
this_cursor.movePosition(QtGui.QTextCursor.NextBlock, 0, 1)
new_position = this_cursor.position()
if old_position == new_position:
break
def toggle_bookmarks(self):
if self.dockWidget.isVisible():
self.dockWidget.hide()
else:
self.dockWidget.show()
def add_bookmark(self):
# TODO
# Start dockListView.edit(index) when something new is added
identifier = uuid.uuid4().hex[:10]
description = 'New bookmark'
if self.are_we_doing_images_only:
chapter = self.metadata['position']['current_chapter']
search_data = (0, None)
else:
chapter, scroll_position, visible_text = self.contentView.record_scroll_position(True)
search_data = (scroll_position, visible_text)
self.metadata['bookmarks'][identifier] = {
'chapter': chapter,
'search_data': search_data,
'description': description}
self.add_bookmark_to_model(
description, chapter, search_data, identifier)
self.dockWidget.setVisible(True)
def add_bookmark_to_model(self, description, chapter, search_data, identifier):
bookmark = QtGui.QStandardItem()
bookmark.setData(description, QtCore.Qt.DisplayRole)
bookmark.setData(chapter, QtCore.Qt.UserRole)
bookmark.setData(search_data, QtCore.Qt.UserRole + 1)
bookmark.setData(identifier, QtCore.Qt.UserRole + 2)
self.bookmark_model.appendRow(bookmark)
self.update_bookmark_proxy_model()
def navigate_to_bookmark(self, index):
if not index.isValid():
return
chapter = self.proxy_model.data(index, QtCore.Qt.UserRole)
search_data = self.proxy_model.data(index, QtCore.Qt.UserRole + 1)
self.window().bookToolBar.tocBox.setCurrentIndex(chapter - 1)
if not self.are_we_doing_images_only:
self.set_scroll_value(False, search_data)
def generate_bookmark_model(self):
# TODO
# Sorting is not working correctly
for i in self.metadata['bookmarks'].items():
self.add_bookmark_to_model(
i[1]['description'],
i[1]['chapter'],
i[1]['search_data'],
i[0])
self.generate_bookmark_proxy_model()
def generate_bookmark_proxy_model(self):
self.proxy_model.setSourceModel(self.bookmark_model)
self.proxy_model.setSortCaseSensitivity(False)
self.proxy_model.setSortRole(QtCore.Qt.UserRole)
self.dockListView.setModel(self.proxy_model)
def update_bookmark_proxy_model(self):
self.proxy_model.invalidateFilter()
self.proxy_model.setFilterParams(
self.window().bookToolBar.searchBar.text())
self.proxy_model.setFilterFixedString(
self.window().bookToolBar.searchBar.text())
def generate_bookmark_context_menu(self, position):
index = self.dockListView.indexAt(position)
if not index.isValid():
return
bookmark_menu = QtWidgets.QMenu()
editAction = bookmark_menu.addAction(
QtGui.QIcon.fromTheme('edit-rename'), 'Edit')
deleteAction = bookmark_menu.addAction(
QtGui.QIcon.fromTheme('trash-empty'), 'Delete')
action = bookmark_menu.exec_(self.dockListView.mapToGlobal(position))
if action == editAction:
self.dockListView.edit(index)
if action == deleteAction:
row = index.row()
delete_uuid = self.bookmark_model.item(row).data(QtCore.Qt.UserRole + 2)
self.metadata['bookmarks'].pop(delete_uuid)
self.bookmark_model.removeRow(index.row())
def hide_mouse(self):
self.contentView.viewport().setCursor(QtCore.Qt.BlankCursor)
def sneaky_change(self):
direction = -1
if self.sender().objectName() == 'nextChapter':
direction = 1
self.contentView.common_functions.change_chapter(
direction, True)
def sneaky_exit(self):
self.contentView.hide()
self.window().closeEvent()
class PliantQGraphicsView(QtWidgets.QGraphicsView):
def __init__(self, main_window, parent=None):
super(PliantQGraphicsView, self).__init__(parent)
self.main_window = main_window
self.parent = parent
self.image_pixmap = None
self.ignore_wheel_event = False
self.ignore_wheel_event_number = 0
self.setDragMode(QtWidgets.QGraphicsView.ScrollHandDrag)
self.viewport().setCursor(QtCore.Qt.ArrowCursor)
self.common_functions = PliantWidgetsCommonFunctions(
self, self.main_window)
self.setMouseTracking(True)
self.image_cache = [None for _ in range(4)]
def loadImage(self, current_image):
# TODO
# For double page view: 1 before, 1 after
# Image panning with mouse
content = self.parent.metadata['content']
image_paths = [i[1] for i in content]
def generate_image_cache(current_image):
print('Building image cache')
current_image_index = image_paths.index(current_image)
for i in (-1, 0, 1, 2):
try:
this_path = image_paths[current_image_index + i]
this_pixmap = QtGui.QPixmap()
this_pixmap.load(this_path)
self.image_cache[i + 1] = (this_path, this_pixmap)
except IndexError:
self.image_cache[i + 1] = None
def refill_cache(remove_value):
remove_index = self.image_cache.index(remove_value)
refill_pixmap = QtGui.QPixmap()
if remove_index == 1:
first_path = self.image_cache[0][0]
self.image_cache.pop(3)
previous_path = image_paths[image_paths.index(first_path) - 1]
refill_pixmap.load(previous_path)
self.image_cache.insert(0, (previous_path, refill_pixmap))
else:
self.image_cache[0] = self.image_cache[1]
self.image_cache.pop(1)
try:
last_path = self.image_cache[2][0]
next_path = image_paths[image_paths.index(last_path) + 1]
refill_pixmap.load(next_path)
self.image_cache.append((next_path, refill_pixmap))
except (IndexError, TypeError):
self.image_cache.append(None)
def check_cache(current_image):
for i in self.image_cache:
if i:
if i[0] == current_image:
return_pixmap = i[1]
refill_cache(i)
return return_pixmap
# No return happened so the image isn't in the cache
generate_image_cache(current_image)
return_pixmap = None
while not return_pixmap:
return_pixmap = check_cache(current_image)
self.image_pixmap = return_pixmap
self.resizeEvent()
def resizeEvent(self, *args):
if not self.image_pixmap:
return
zoom_mode = self.main_window.comic_profile['zoom_mode']
padding = self.main_window.comic_profile['padding']
if zoom_mode == 'fitWidth':
available_width = self.viewport().width()
image_pixmap = self.image_pixmap.scaledToWidth(
available_width, QtCore.Qt.SmoothTransformation)
elif zoom_mode == 'originalSize':
image_pixmap = self.image_pixmap
new_padding = (self.viewport().width() - image_pixmap.width()) // 2
if new_padding < 0: # The image is larger than the viewport
self.main_window.comic_profile['padding'] = 0
else:
self.main_window.comic_profile['padding'] = new_padding
elif zoom_mode == 'bestFit':
available_width = self.viewport().width()
available_height = self.viewport().height()
image_pixmap = self.image_pixmap.scaled(
available_width, available_height,
QtCore.Qt.KeepAspectRatio, QtCore.Qt.SmoothTransformation)
self.main_window.comic_profile['padding'] = (
self.viewport().width() - image_pixmap.width()) // 2
elif zoom_mode == 'manualZoom':
available_width = self.viewport().width() - 2 * padding
image_pixmap = self.image_pixmap.scaledToWidth(
available_width, QtCore.Qt.SmoothTransformation)
graphics_scene = QtWidgets.QGraphicsScene()
graphics_scene.addPixmap(image_pixmap)
self.setScene(graphics_scene)
self.show()
def wheelEvent(self, event):
self.common_functions.wheelEvent(event, True)
def keyPressEvent(self, event):
# This function is sufficiently different to warrant
# exclusion from the common functions class
if event.key() == 32: # Spacebar press
vertical = self.verticalScrollBar().value()
maximum = self.verticalScrollBar().maximum()
if vertical == maximum:
self.common_functions.change_chapter(1, True)
else:
# Increment by following value
scroll_increment = int((maximum - 0) / 2)
self.verticalScrollBar().setValue(vertical + scroll_increment)
def mouseMoveEvent(self, *args):
self.viewport().setCursor(QtCore.Qt.ArrowCursor)
self.parent.mouse_hide_timer.start(3000)
def closeEvent(self, *args):
# In case the program is closed when a contentView is fullscreened
self.main_window.closeEvent()
class PliantQTextBrowser(QtWidgets.QTextBrowser):
def __init__(self, main_window, parent=None):
super(PliantQTextBrowser, self).__init__(parent)
self.main_window = main_window
self.parent = parent
self.ignore_wheel_event = False
self.ignore_wheel_event_number = 0
self.common_functions = PliantWidgetsCommonFunctions(
self, self.main_window)
self.verticalScrollBar().sliderMoved.connect(self.record_scroll_position)
self.setMouseTracking(True)
self.setContextMenuPolicy(QtCore.Qt.CustomContextMenu)
self.customContextMenuRequested.connect(
self.generate_textbrowser_context_menu)
self.viewport().setCursor(QtCore.Qt.IBeamCursor)
def wheelEvent(self, event):
self.record_scroll_position()
self.common_functions.wheelEvent(event, False)
def keyPressEvent(self, event):
if event.key() == 32:
self.record_scroll_position()
if self.verticalScrollBar().value() == self.verticalScrollBar().maximum():
self.common_functions.change_chapter(1, True)
else:
QtWidgets.QTextEdit.keyPressEvent(self, event)
else:
QtWidgets.QTextEdit.keyPressEvent(self, event)
def record_scroll_position(self, return_as_bookmark=False):
self.parent.metadata['position']['is_read'] = False
vertical = self.verticalScrollBar().value()
maximum = self.verticalScrollBar().maximum()
self.parent.metadata['position']['scroll_value'] = 1
if maximum != 0:
self.parent.metadata['position']['scroll_value'] = (vertical / maximum)
cursor = self.cursorForPosition(QtCore.QPoint(0, 0))
bottom_right = QtCore.QPoint(self.viewport().width() - 1, self.viewport().height())
bottom_right_cursor = self.cursorForPosition(bottom_right).position()
cursor.setPosition(bottom_right_cursor, QtGui.QTextCursor.KeepAnchor)
visible_text = cursor.selectedText()
if len(visible_text) > 50:
visible_text = visible_text[:51]
if return_as_bookmark:
return (self.parent.metadata['position']['current_chapter'],
self.parent.metadata['position']['scroll_value'],
visible_text)
else:
self.parent.metadata['position']['last_visible_text'] = visible_text
def generate_textbrowser_context_menu(self, position):
selected_word = self.textCursor().selection()
selected_word = selected_word.toPlainText()
context_menu = QtWidgets.QMenu()
defineAction = 'Caesar si viveret, ad remum dareris'
if selected_word and selected_word != '':
selected_word = selected_word.split()[0]
defineAction = context_menu.addAction(
QtGui.QIcon.fromTheme('view-readermode'), f'Define "{selected_word}"')
searchAction = context_menu.addAction(
QtGui.QIcon.fromTheme('search'), 'Search')
action = context_menu.exec_(self.sender().mapToGlobal(position))
if action == defineAction:
self.window().definitionDialog.find_definition(selected_word)
if action == searchAction:
self.window().bookToolBar.searchBar.setFocus()
def closeEvent(self, *args):
self.main_window.closeEvent()
def mouseMoveEvent(self, event):
event.accept()
self.viewport().setCursor(QtCore.Qt.IBeamCursor)
self.parent.mouse_hide_timer.start(3000)
class PliantWidgetsCommonFunctions():
def __init__(self, parent_widget, main_window):
self.pw = parent_widget
self.main_window = main_window
def wheelEvent(self, event, are_we_doing_images_only):
ignore_events = 20
if are_we_doing_images_only:
ignore_events = 10
if self.pw.ignore_wheel_event:
self.pw.ignore_wheel_event_number += 1
if self.pw.ignore_wheel_event_number > ignore_events:
self.pw.ignore_wheel_event = False
self.pw.ignore_wheel_event_number = 0
return
if are_we_doing_images_only:
QtWidgets.QGraphicsView.wheelEvent(self.pw, event)
else:
QtWidgets.QTextBrowser.wheelEvent(self.pw, event)
# Since this is a delta on a mouse move event, it cannot ever be 0
vertical_pdelta = event.pixelDelta().y()
if vertical_pdelta > 0:
moving_up = True
elif vertical_pdelta < 0:
moving_up = False
if abs(vertical_pdelta) > 80: # Adjust sensitivity here
# Implies that no scrollbar movement is possible
if self.pw.verticalScrollBar().value() == self.pw.verticalScrollBar().maximum() == 0:
if moving_up:
self.change_chapter(-1)
else:
self.change_chapter(1)
# Implies that the scrollbar is at the bottom
elif self.pw.verticalScrollBar().value() == self.pw.verticalScrollBar().maximum():
if not moving_up:
self.change_chapter(1)
# Implies scrollbar is at the top
elif self.pw.verticalScrollBar().value() == 0:
if moving_up:
self.change_chapter(-1)
def change_chapter(self, direction, was_button_pressed=None):
current_toc_index = self.main_window.bookToolBar.tocBox.currentIndex()
max_toc_index = self.main_window.bookToolBar.tocBox.count() - 1
if (current_toc_index < max_toc_index and direction == 1) or (
current_toc_index > 0 and direction == -1):
self.main_window.bookToolBar.tocBox.setCurrentIndex(current_toc_index + direction)
# Set page position depending on if the chapter number is increasing or decreasing
if direction == 1 or was_button_pressed:
self.pw.verticalScrollBar().setValue(0)
else:
self.pw.verticalScrollBar().setValue(
self.pw.verticalScrollBar().maximum())
if not was_button_pressed:
self.pw.ignore_wheel_event = True
class PliantDockWidget(QtWidgets.QDockWidget):
def __init__(self, parent=None):
super(PliantDockWidget, self).__init__(parent)
self.parent = parent
def showEvent(self, event):
self.parent.window().bookToolBar.bookmarkButton.setChecked(True)
def hideEvent(self, event):
self.parent.window().bookToolBar.bookmarkButton.setChecked(False)
class PliantQGraphicsScene(QtWidgets.QGraphicsScene):
def __init__(self, parent=None):
super(PliantQGraphicsScene, self).__init__(parent)
self.parent = parent
def mouseReleaseEvent(self, event):
self.parent.previous_position = self.parent.pos()
image_files = '*.jpg *.png'
new_cover = QtWidgets.QFileDialog.getOpenFileName(
None, 'Select new cover', self.parent.parent.settings['last_open_path'],
f'Images ({image_files})')[0]
if not new_cover:
self.parent.show()
return
with open(new_cover, 'rb') as cover_ref:
cover_bytes = cover_ref.read()
resized_cover = resize_image(cover_bytes)
self.parent.cover_for_database = resized_cover
cover_pixmap = QtGui.QPixmap()
cover_pixmap.load(new_cover)
cover_pixmap = cover_pixmap.scaled(
140, 205, QtCore.Qt.IgnoreAspectRatio)
self.parent.load_cover(cover_pixmap, True)
self.parent.show()