# -*- coding: utf-8 -*- """ sphinx.builders.epub ~~~~~~~~~~~~~~~~~~~~ Build epub files. Originally derived from qthelp.py. :copyright: Copyright 2007-2014 by the Sphinx team, see AUTHORS. :license: BSD, see LICENSE for details. """ import os import re import time import codecs import zipfile from os import path try: from PIL import Image except ImportError: try: import Image except ImportError: Image = None from docutils import nodes from sphinx import addnodes from sphinx.builders.html import StandaloneHTMLBuilder from sphinx.util.osutil import ensuredir, copyfile, EEXIST from sphinx.util.smartypants import sphinx_smarty_pants as ssp from sphinx.util.console import brown # (Fragment) templates from which the metainfo files content.opf, toc.ncx, # mimetype, and META-INF/container.xml are created. # This template section also defines strings that are embedded in the html # output but that may be customized by (re-)setting module attributes, # e.g. from conf.py. _mimetype_template = 'application/epub+zip' # no EOL! _container_template = u'''\ ''' _toc_template = u'''\ %(title)s %(navpoints)s ''' _navpoint_template = u'''\ %(indent)s %(indent)s %(indent)s %(text)s %(indent)s %(indent)s %(indent)s ''' _navpoint_indent = ' ' _navPoint_template = 'navPoint%d' _content_template = u'''\ %(lang)s %(title)s %(author)s %(publisher)s %(copyright)s %(id)s %(date)s %(files)s %(spine)s %(guide)s ''' _cover_template = u'''\ ''' _coverpage_name = u'epub-cover.html' _file_template = u'''\ ''' _spine_template = u'''\ ''' _guide_template = u'''\ ''' _toctree_template = u'toctree-l%d' _link_target_template = u' [%(uri)s]' _footnote_label_template = u'#%d' _footnotes_rubric_name = u'Footnotes' _css_link_target_class = u'link-target' # XXX These strings should be localized according to epub_language _guide_titles = { 'toc': u'Table of Contents', 'cover': u'Cover' } _media_types = { '.html': 'application/xhtml+xml', '.css': 'text/css', '.png': 'image/png', '.gif': 'image/gif', '.svg': 'image/svg+xml', '.jpg': 'image/jpeg', '.jpeg': 'image/jpeg', '.otf': 'application/x-font-otf', '.ttf': 'application/x-font-ttf', } _vector_graphics_extensions = ('.svg',) # Regular expression to match colons only in local fragment identifiers. # If the URI contains a colon before the #, # it is an external link that should not change. _refuri_re = re.compile("([^#:]*#)(.*)") # The epub publisher class EpubBuilder(StandaloneHTMLBuilder): """ Builder that outputs epub files. It creates the metainfo files container.opf, toc.ncx, mimetype, and META-INF/container.xml. Afterwards, all necessary files are zipped to an epub file. """ name = 'epub' # don't copy the reST source copysource = False supported_image_types = ['image/svg+xml', 'image/png', 'image/gif', 'image/jpeg'] # don't add links add_permalinks = False # don't add sidebar etc. embedded = True def init(self): StandaloneHTMLBuilder.init(self) # the output files for epub must be .html only self.out_suffix = '.html' self.playorder = 0 self.tocid = 0 def get_theme_config(self): return self.config.epub_theme, self.config.epub_theme_options # generic support functions def make_id(self, name, id_cache={}): # id_cache is intentionally mutable """Return a unique id for name.""" id = id_cache.get(name) if not id: id = 'epub-%d' % self.env.new_serialno('epub') id_cache[name] = id return id def esc(self, name): """Replace all characters not allowed in text an attribute values.""" # Like cgi.escape, but also replace apostrophe name = name.replace('&', '&') name = name.replace('<', '<') name = name.replace('>', '>') name = name.replace('"', '"') name = name.replace('\'', ''') return name def get_refnodes(self, doctree, result): """Collect section titles, their depth in the toc and the refuri.""" # XXX: is there a better way than checking the attribute # toctree-l[1-8] on the parent node? if isinstance(doctree, nodes.reference) and 'refuri' in doctree: refuri = doctree['refuri'] if refuri.startswith('http://') or refuri.startswith('https://') \ or refuri.startswith('irc:') or refuri.startswith('mailto:'): return result classes = doctree.parent.attributes['classes'] for level in range(8, 0, -1): # or range(1, 8)? if (_toctree_template % level) in classes: result.append({ 'level': level, 'refuri': self.esc(refuri), 'text': ssp(self.esc(doctree.astext())) }) break else: for elem in doctree.children: result = self.get_refnodes(elem, result) return result def get_toc(self): """Get the total table of contents, containing the master_doc and pre and post files not managed by sphinx. """ doctree = self.env.get_and_resolve_doctree(self.config.master_doc, self, prune_toctrees=False, includehidden=True) self.refnodes = self.get_refnodes(doctree, []) master_dir = path.dirname(self.config.master_doc) if master_dir: master_dir += '/' # XXX or os.sep? for item in self.refnodes: item['refuri'] = master_dir + item['refuri'] self.toc_add_files(self.refnodes) def toc_add_files(self, refnodes): """Add the master_doc, pre and post files to a list of refnodes. """ refnodes.insert(0, { 'level': 1, 'refuri': self.esc(self.config.master_doc + '.html'), 'text': ssp(self.esc( self.env.titles[self.config.master_doc].astext())) }) for file, text in reversed(self.config.epub_pre_files): refnodes.insert(0, { 'level': 1, 'refuri': self.esc(file), 'text': ssp(self.esc(text)) }) for file, text in self.config.epub_post_files: refnodes.append({ 'level': 1, 'refuri': self.esc(file), 'text': ssp(self.esc(text)) }) def fix_fragment(self, prefix, fragment): """Return a href/id attribute with colons replaced by hyphens.""" return prefix + fragment.replace(':', '-') def fix_ids(self, tree): """Replace colons with hyphens in href and id attributes. Some readers crash because they interpret the part as a transport protocol specification. """ for node in tree.traverse(nodes.reference): if 'refuri' in node: m = _refuri_re.match(node['refuri']) if m: node['refuri'] = self.fix_fragment(m.group(1), m.group(2)) if 'refid' in node: node['refid'] = self.fix_fragment('', node['refid']) for node in tree.traverse(addnodes.desc_signature): ids = node.attributes['ids'] newids = [] for id in ids: newids.append(self.fix_fragment('', id)) node.attributes['ids'] = newids def add_visible_links(self, tree, show_urls='inline'): """Add visible link targets for external links""" def make_footnote_ref(doc, label): """Create a footnote_reference node with children""" footnote_ref = nodes.footnote_reference('[#]_') footnote_ref.append(nodes.Text(label)) doc.note_autofootnote_ref(footnote_ref) return footnote_ref def make_footnote(doc, label, uri): """Create a footnote node with children""" footnote = nodes.footnote(uri) para = nodes.paragraph() para.append(nodes.Text(uri)) footnote.append(para) footnote.insert(0, nodes.label('', label)) doc.note_autofootnote(footnote) return footnote def footnote_spot(tree): """Find or create a spot to place footnotes. The function returns the tuple (parent, index).""" # The code uses the following heuristic: # a) place them after the last existing footnote # b) place them after an (empty) Footnotes rubric # c) create an empty Footnotes rubric at the end of the document fns = tree.traverse(nodes.footnote) if fns: fn = fns[-1] return fn.parent, fn.parent.index(fn) + 1 for node in tree.traverse(nodes.rubric): if len(node.children) == 1 and \ node.children[0].astext() == _footnotes_rubric_name: return node.parent, node.parent.index(node) + 1 doc = tree.traverse(nodes.document)[0] rub = nodes.rubric() rub.append(nodes.Text(_footnotes_rubric_name)) doc.append(rub) return doc, doc.index(rub) + 1 if show_urls == 'no': return if show_urls == 'footnote': doc = tree.traverse(nodes.document)[0] fn_spot, fn_idx = footnote_spot(tree) nr = 1 for node in tree.traverse(nodes.reference): uri = node.get('refuri', '') if (uri.startswith('http:') or uri.startswith('https:') or uri.startswith('ftp:')) and uri not in node.astext(): idx = node.parent.index(node) + 1 if show_urls == 'inline': uri = _link_target_template % {'uri': uri} link = nodes.inline(uri, uri) link['classes'].append(_css_link_target_class) node.parent.insert(idx, link) elif show_urls == 'footnote': label = _footnote_label_template % nr nr += 1 footnote_ref = make_footnote_ref(doc, label) node.parent.insert(idx, footnote_ref) footnote = make_footnote(doc, label, uri) fn_spot.insert(fn_idx, footnote) footnote_ref['refid'] = footnote['ids'][0] footnote.add_backref(footnote_ref['ids'][0]) fn_idx += 1 def write_doc(self, docname, doctree): """Write one document file. This method is overwritten in order to fix fragment identifiers and to add visible external links. """ self.fix_ids(doctree) self.add_visible_links(doctree, self.config.epub_show_urls) return StandaloneHTMLBuilder.write_doc(self, docname, doctree) def fix_genindex(self, tree): """Fix href attributes for genindex pages.""" # XXX: modifies tree inline # Logic modeled from themes/basic/genindex.html for key, columns in tree: for entryname, (links, subitems) in columns: for (i, (ismain, link)) in enumerate(links): m = _refuri_re.match(link) if m: links[i] = (ismain, self.fix_fragment(m.group(1), m.group(2))) for subentryname, subentrylinks in subitems: for (i, (ismain, link)) in enumerate(subentrylinks): m = _refuri_re.match(link) if m: subentrylinks[i] = (ismain, self.fix_fragment(m.group(1), m.group(2))) def is_vector_graphics(self, filename): """Does the filename extension indicate a vector graphic format?""" ext = path.splitext(filename)[-1] return ext in _vector_graphics_extensions def copy_image_files_pil(self): """Copy images using the PIL. The method tries to read and write the files with the PIL, converting the format and resizing the image if necessary/possible. """ ensuredir(path.join(self.outdir, '_images')) for src in self.status_iterator(self.images, 'copying images... ', brown, len(self.images)): dest = self.images[src] try: img = Image.open(path.join(self.srcdir, src)) except IOError: if not self.is_vector_graphics(src): self.warn('cannot read image file %r: copying it instead' % (path.join(self.srcdir, src), )) try: copyfile(path.join(self.srcdir, src), path.join(self.outdir, '_images', dest)) except (IOError, OSError) as err: self.warn('cannot copy image file %r: %s' % (path.join(self.srcdir, src), err)) continue if self.config.epub_fix_images: if img.mode in ('P',): # See PIL documentation for Image.convert() img = img.convert() if self.config.epub_max_image_width > 0: (width, height) = img.size nw = self.config.epub_max_image_width if width > nw: nh = (height * nw) / width img = img.resize((nw, nh), Image.BICUBIC) try: img.save(path.join(self.outdir, '_images', dest)) except (IOError, OSError) as err: self.warn('cannot write image file %r: %s' % (path.join(self.srcdir, src), err)) def copy_image_files(self): """Copy image files to destination directory. This overwritten method can use the PIL to convert image files. """ if self.images: if self.config.epub_fix_images or self.config.epub_max_image_width: if not Image: self.warn('PIL not found - copying image files') super(EpubBuilder, self).copy_image_files() else: self.copy_image_files_pil() else: super(EpubBuilder, self).copy_image_files() def handle_page(self, pagename, addctx, templatename='page.html', outfilename=None, event_arg=None): """Create a rendered page. This method is overwritten for genindex pages in order to fix href link attributes. """ if pagename.startswith('genindex'): self.fix_genindex(addctx['genindexentries']) StandaloneHTMLBuilder.handle_page(self, pagename, addctx, templatename, outfilename, event_arg) # Finish by building the epub file def handle_finish(self): """Create the metainfo files and finally the epub.""" self.get_toc() self.build_mimetype(self.outdir, 'mimetype') self.build_container(self.outdir, 'META-INF/container.xml') self.build_content(self.outdir, 'content.opf') self.build_toc(self.outdir, 'toc.ncx') self.build_epub(self.outdir, self.config.epub_basename + '.epub') def build_mimetype(self, outdir, outname): """Write the metainfo file mimetype.""" self.info('writing %s file...' % outname) f = codecs.open(path.join(outdir, outname), 'w', 'utf-8') try: f.write(_mimetype_template) finally: f.close() def build_container(self, outdir, outname): """Write the metainfo file META-INF/cointainer.xml.""" self.info('writing %s file...' % outname) fn = path.join(outdir, outname) try: os.mkdir(path.dirname(fn)) except OSError as err: if err.errno != EEXIST: raise f = codecs.open(path.join(outdir, outname), 'w', 'utf-8') try: f.write(_container_template) finally: f.close() def content_metadata(self, files, spine, guide): """Create a dictionary with all metadata for the content.opf file properly escaped. """ metadata = {} metadata['title'] = self.esc(self.config.epub_title) metadata['author'] = self.esc(self.config.epub_author) metadata['uid'] = self.esc(self.config.epub_uid) metadata['lang'] = self.esc(self.config.epub_language) metadata['publisher'] = self.esc(self.config.epub_publisher) metadata['copyright'] = self.esc(self.config.epub_copyright) metadata['scheme'] = self.esc(self.config.epub_scheme) metadata['id'] = self.esc(self.config.epub_identifier) metadata['date'] = self.esc(time.strftime('%Y-%m-%d')) metadata['files'] = files metadata['spine'] = spine metadata['guide'] = guide return metadata def build_content(self, outdir, outname): """Write the metainfo file content.opf It contains bibliographic data, a file list and the spine (the reading order). """ self.info('writing %s file...' % outname) # files if not outdir.endswith(os.sep): outdir += os.sep olen = len(outdir) projectfiles = [] self.files = [] self.ignored_files = ['.buildinfo', 'mimetype', 'content.opf', 'toc.ncx', 'META-INF/container.xml', self.config.epub_basename + '.epub'] + \ self.config.epub_exclude_files for root, dirs, files in os.walk(outdir): for fn in files: filename = path.join(root, fn)[olen:] if filename in self.ignored_files: continue ext = path.splitext(filename)[-1] if ext not in _media_types: # we always have JS and potentially OpenSearch files, don't # always warn about them if ext not in ('.js', '.xml'): self.warn('unknown mimetype for %s, ignoring' % filename) continue filename = filename.replace(os.sep, '/') projectfiles.append(_file_template % { 'href': self.esc(filename), 'id': self.esc(self.make_id(filename)), 'media_type': self.esc(_media_types[ext]) }) self.files.append(filename) # spine spine = [] for item in self.refnodes: if '#' in item['refuri']: continue if item['refuri'] in self.ignored_files: continue spine.append(_spine_template % { 'idref': self.esc(self.make_id(item['refuri'])) }) for info in self.domain_indices: spine.append(_spine_template % { 'idref': self.esc(self.make_id(info[0] + self.out_suffix)) }) if self.get_builder_config('use_index', 'epub'): spine.append(_spine_template % { 'idref': self.esc(self.make_id('genindex' + self.out_suffix)) }) # add the optional cover content_tmpl = _content_template html_tmpl = None if self.config.epub_cover: image, html_tmpl = self.config.epub_cover image = image.replace(os.sep, '/') mpos = content_tmpl.rfind('') cpos = content_tmpl.rfind('\n', 0 , mpos) + 1 content_tmpl = content_tmpl[:cpos] + \ _cover_template % {'cover': self.esc(self.make_id(image))} + \ content_tmpl[cpos:] if html_tmpl: spine.insert(0, _spine_template % { 'idref': self.esc(self.make_id(_coverpage_name))}) if _coverpage_name not in self.files: ext = path.splitext(_coverpage_name)[-1] self.files.append(_coverpage_name) projectfiles.append(_file_template % { 'href': self.esc(_coverpage_name), 'id': self.esc(self.make_id(_coverpage_name)), 'media_type': self.esc(_media_types[ext]) }) ctx = {'image': self.esc(image), 'title': self.config.project} self.handle_page( path.splitext(_coverpage_name)[0], ctx, html_tmpl) guide = [] auto_add_cover = True auto_add_toc = True if self.config.epub_guide: for type, uri, title in self.config.epub_guide: file = uri.split('#')[0] if file not in self.files: self.files.append(file) if type == 'cover': auto_add_cover = False if type == 'toc': auto_add_toc = False guide.append(_guide_template % { 'type': self.esc(type), 'title': self.esc(title), 'uri': self.esc(uri) }) if auto_add_cover and html_tmpl: guide.append(_guide_template % { 'type': 'cover', 'title': _guide_titles['cover'], 'uri': self.esc(_coverpage_name) }) if auto_add_toc and self.refnodes: guide.append(_guide_template % { 'type': 'toc', 'title': _guide_titles['toc'], 'uri': self.esc(self.refnodes[0]['refuri']) }) projectfiles = '\n'.join(projectfiles) spine = '\n'.join(spine) guide = '\n'.join(guide) # write the project file f = codecs.open(path.join(outdir, outname), 'w', 'utf-8') try: f.write(content_tmpl % \ self.content_metadata(projectfiles, spine, guide)) finally: f.close() def new_navpoint(self, node, level, incr=True): """Create a new entry in the toc from the node at given level.""" # XXX Modifies the node if incr: self.playorder += 1 self.tocid += 1 node['indent'] = _navpoint_indent * level node['navpoint'] = self.esc(_navPoint_template % self.tocid) node['playorder'] = self.playorder return _navpoint_template % node def insert_subnav(self, node, subnav): """Insert nested navpoints for given node. The node and subnav are already rendered to text. """ nlist = node.rsplit('\n', 1) nlist.insert(-1, subnav) return '\n'.join(nlist) def build_navpoints(self, nodes): """Create the toc navigation structure. Subelements of a node are nested inside the navpoint. For nested nodes the parent node is reinserted in the subnav. """ navstack = [] navlist = [] level = 1 lastnode = None for node in nodes: if not node['text']: continue file = node['refuri'].split('#')[0] if file in self.ignored_files: continue if node['level'] > self.config.epub_tocdepth: continue if node['level'] == level: navlist.append(self.new_navpoint(node, level)) elif node['level'] == level + 1: navstack.append(navlist) navlist = [] level += 1 if lastnode and self.config.epub_tocdup: # Insert starting point in subtoc with same playOrder navlist.append(self.new_navpoint(lastnode, level, False)) navlist.append(self.new_navpoint(node, level)) else: while node['level'] < level: subnav = '\n'.join(navlist) navlist = navstack.pop() navlist[-1] = self.insert_subnav(navlist[-1], subnav) level -= 1 navlist.append(self.new_navpoint(node, level)) lastnode = node while level != 1: subnav = '\n'.join(navlist) navlist = navstack.pop() navlist[-1] = self.insert_subnav(navlist[-1], subnav) level -= 1 return '\n'.join(navlist) def toc_metadata(self, level, navpoints): """Create a dictionary with all metadata for the toc.ncx file properly escaped. """ metadata = {} metadata['uid'] = self.config.epub_uid metadata['title'] = self.config.epub_title metadata['level'] = level metadata['navpoints'] = navpoints return metadata def build_toc(self, outdir, outname): """Write the metainfo file toc.ncx.""" self.info('writing %s file...' % outname) if self.config.epub_tocscope == 'default': doctree = self.env.get_and_resolve_doctree(self.config.master_doc, self, prune_toctrees=False, includehidden=False) refnodes = self.get_refnodes(doctree, []) self.toc_add_files(refnodes) else: # 'includehidden' refnodes = self.refnodes navpoints = self.build_navpoints(refnodes) level = max(item['level'] for item in self.refnodes) level = min(level, self.config.epub_tocdepth) f = codecs.open(path.join(outdir, outname), 'w', 'utf-8') try: f.write(_toc_template % self.toc_metadata(level, navpoints)) finally: f.close() def build_epub(self, outdir, outname): """Write the epub file. It is a zip file with the mimetype file stored uncompressed as the first entry. """ self.info('writing %s file...' % outname) projectfiles = ['META-INF/container.xml', 'content.opf', 'toc.ncx'] \ + self.files epub = zipfile.ZipFile(path.join(outdir, outname), 'w', \ zipfile.ZIP_DEFLATED) epub.write(path.join(outdir, 'mimetype'), 'mimetype', \ zipfile.ZIP_STORED) for file in projectfiles: fp = path.join(outdir, file) epub.write(fp, file, zipfile.ZIP_DEFLATED) epub.close()