From db52b413ad157f52c41def7dd9a6be98478e7224 Mon Sep 17 00:00:00 2001 From: BasioMeusPuga Date: Sat, 4 Nov 2017 12:20:18 +0530 Subject: [PATCH] Start epub parser --- parse_book.py | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/parse_book.py b/parse_book.py index 479e17c..aae5a94 100644 --- a/parse_book.py +++ b/parse_book.py @@ -2,9 +2,29 @@ import os import zipfile -import xml.etree.ElementTree as ET +import tempfile +import xmltodict class ePUB: def __init__(self, filename): - pass \ No newline at end of file + self.filename = filename + self.tmpdir = None + + def extract(self): + self.tmpdir = tempfile.TemporaryDirectory() + with zipfile.ZipFile(self.filename, 'r') as zip_ref: + zip_ref.extractall(self.tmpdir.name) + + def parse(self): + with open(self.tmpdir.name + os.sep + 'content.opf') as fd: + xml_dict = xmltodict.parse(fd.read()) + + metadata = xml_dict['package']['metadata'] + book_title = metadata['dc:title'] + book_description = metadata['dc:description'] + book_author = metadata['dc:creator']['#text'] + + print(book_author) + print(book_title) + print(book_description) \ No newline at end of file