Source code for core.util.epub
import contextlib
import logging
import os, sys
from lxml import etree
from io import BytesIO
from zipfile import ZipFile
from .http import HTTP
[docs]class EpubAccessor(object):
CONTAINER_FILE = "META-INF/container.xml"
IDPF_NAMESPACE = "http://www.idpf.org/2007/opf"
[docs] @classmethod
@contextlib.contextmanager
def open_epub(cls, url, content=None):
"""Cracks open an EPUB to expose its contents
:param url: A url representing the EPUB, only used for errors and in
the absence of the `content` parameter
:param content: A string representing the compressed EPUB
:return: A tuple containing a ZipFile of the EPUB and the path to its
package
"""
if not (url or content):
raise ValueError("Cannot open epub without url or content")
if url and not content:
# Get the epub from the url if no content has been made available.
content = HTTP.get_with_timeout(url).content
content = BytesIO(content)
with ZipFile(content) as zip_file:
if not cls.CONTAINER_FILE in zip_file.namelist():
raise ValueError("Invalid EPUB file, not modifying: %s" % url)
with zip_file.open(cls.CONTAINER_FILE) as container_file:
container = container_file.read()
rootfiles_element = etree.fromstring(container).find("{urn:oasis:names:tc:opendocument:xmlns:container}rootfiles")
if rootfiles_element is None:
raise ValueError("Invalid EPUB file, not modifying: %s" % url)
rootfile_element = rootfiles_element.find("{urn:oasis:names:tc:opendocument:xmlns:container}rootfile")
if rootfile_element is None:
raise ValueError("Invalid EPUB file, not modifying: %s" % url)
package_document_path = rootfile_element.get('full-path')
yield zip_file, package_document_path
[docs] @classmethod
def get_element_from_package(cls, zip_file, package_document_path, element_tag):
"""Pulls one or more elements from the package_document"""
[element] = cls.get_elements_from_package(
zip_file, package_document_path, [element_tag]
)
return element
[docs] @classmethod
def get_elements_from_package(cls, zip_file, package_document_path, element_tags):
"""Pulls one or more elements from the package_document"""
if not isinstance(element_tags, list):
element_tags = [element_tags]
elements = list()
with zip_file.open(package_document_path) as package_file:
package = package_file.read()
for element_tag in element_tags:
element = etree.fromstring(package).find(
"{%s}%s" % (cls.IDPF_NAMESPACE, element_tag)
)
if element is None:
raise ValueError("Invalid EPUB file: '%s' could not be found" % element_tag)
elements.append(element)
return elements