7 files changed, 2342 insertions, 4360 deletions
diff --git a/tools/meson.build b/tools/meson.build
index b6b3956..35187ca 100644
--- a/tools/meson.build
+++ b/tools/meson.build
@@ -1,10 +1,7 @@
 yelp_tools_in = configuration_data()
 yelp_tools_in.set('DATADIR', pkgdir)
 
-yelp_tools_in.set('YELP_XSLT_DIR', yelp_xslt_dir)
-yelp_tools_in.set('YELP_TMPL_DIR', join_paths(yelp_templates_dir, '')) # Hack for trailing '/'
 yelp_tools_in.set('YELP_JS_DIR', yelp_js_dir)
-yelp_tools_in.set('YELP_ICON_DIR', yelp_icons_dir)
 
 yelp_tools_in.set('XSL_DB2HTML', yelp_db2html_path)
 yelp_tools_in.set('XSL_DB2XHTML', yelp_db2xhtml_path)
@@ -19,21 +16,6 @@ yelp_tools_in.set('XSL_MAL_CACHE', join_paths(
   )
 )
 
-yelp_tools_in.set('XSL_MAL_LINK', join_paths(
-  yelp_xslt_dir,
-  'mallard',
-  'common',
-  'mal-link.xsl',
-  )
-)
-
-yelp_tools_in.set('ID', '@ID@')
-yelp_tools_in.set('DATE', '@DATE@')
-yelp_tools_in.set('YEAR', '@YEAR@')
-yelp_tools_in.set('NAME', '@NAME@')
-yelp_tools_in.set('EMAIL', '@EMAIL@')
-yelp_tools_in.set('TITLE', '@TITLE@')
-
 configure_file(
   input: 'yelp-build.in',
   output: 'yelp-build',
@@ -65,4 +47,4 @@ if get_option('yelpm4') == true
       autoconfdatadir,
     )
   )
-endif
-\ No newline at end of file
+endif
diff --git a/tools/yelp-build.in b/tools/yelp-build.in
index f3e21d8..d53db52 100755
--- a/tools/yelp-build.in
+++ b/tools/yelp-build.in
@@ -1,8 +1,7 @@
-#!/bin/sh
-# -*- indent-tabs-mode: nil -*-
+#!/bin/python3
 #
 # yelp-build
-# Copyright (C) 2010-2015 Shaun McCance <shaunm@gnome.org>
+# Copyright (C) 2010-2020 Shaun McCance <shaunm@gnome.org>
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -18,590 +17,816 @@
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 
-xsl_mal_cache='@XSL_MAL_CACHE@'
-xsl_db2html='@XSL_DB2HTML@'
-xsl_db2xhtml='@XSL_DB2XHTML@'
-xsl_mal2html='@XSL_MAL2HTML@'
-xsl_mal2xhtml='@XSL_MAL2XHTML@'
-yelp_icon_dir='@YELP_ICON_DIR@'
-yelp_js_dir='@YELP_JS_DIR@'
-xsl_mal_epub='@DATADIR@/xslt/mal-epub.xsl'
-xsl_mal_opf='@DATADIR@/xslt/mal-opf.xsl'
-xsl_mal_ncx='@DATADIR@/xslt/mal-ncx.xsl'
-xsl_mal_files='@DATADIR@/xslt/mal-files.xsl'
-
-mkdir_p () {
-    if [ ! -d "$1" ]; then
-        mkdir_p `dirname "$1"`
-        mkdir "$1"
-    fi
-}
-
-urlencode () {
-    LC_ALL=C awk '
-BEGIN {
-  for (i = 1; i <= 255; i++) chars[sprintf("%c", i)] = i;
-}
-{
-  ret = "";
-  for (i = 1; i <= length($0); i++) {
-    c = substr($0, i, 1);
-    if (c ~ /[\/a-zA-Z0-9._-]/)
-      ret = ret c;
-    else
-      ret = ret sprintf("%%%X%X", int(chars[c] / 16), chars[c] % 16);
-  }
-  print ret;
-}'
-}
-
-urldecode () {
-    LC_ALL=C awk '
-BEGIN {
-  for(i = 0; i < 10; i++) hex[i] = i;
-  hex["A"] = hex["a"] = 10;
-  hex["B"] = hex["b"] = 11;
-  hex["C"] = hex["c"] = 12;
-  hex["D"] = hex["d"] = 13;
-  hex["E"] = hex["e"] = 14;
-  hex["F"] = hex["f"] = 15;
-}
-{
-  ret = "";
-  for (i = 1; i <= length($0); i++) {
-    c = substr($0, i, 1);
-    if (c == "+") {
-      ret = ret " ";
-    }
-    else if (c == "%") {
-      c = sprintf("%c", hex[substr($0, i + 1, 1)] * 16 + hex[substr($0, i + 2, 1)]);
-      ret = ret c;
-      i += 2;
-    }
-    else {
-      ret = ret c;
-    }
-  }
-  print ret;
-}'
-}
-
-yelp_usage () {
-    (
-        echo "Usage: yelp-build <COMMAND> [OPTIONS] [FILES]"
-        echo ""
-        echo "Commands:"
-        echo "  cache         Create a Mallard cache file"
-        echo "  epub          Create an EPUB file for Mallard"
-        echo "  html          Convert input files to HTML"
-        echo "  xhtml         Convert input files to XHTML"
-    ) 1>&2
-}
-yelp_usage_cache () {
-    (
-        echo "Usage: yelp-build cache <FILES>"
-        echo ""
-        echo "  Create a Mallard cache file from the page files FILES."
-        echo "  If FILES contains directories, all .page files in those"
-        echo "  directories will be used."
-        echo ""
-        echo "Options:"
-        echo "  -o OUT        Output cache to the file OUT"
-        echo "  -s            Treat pages as belonging to a Mallard site"
-    ) 1>&2
-}
-yelp_usage_epub () {
-    (
-        echo "Usage: yelp-build epub [OPTIONS] <FILES>"
-        echo ""
-        echo "  Create and EPUB file from the Mallard page files FILES."
-        echo ""
-        echo "Options:"
-        echo "  -c CACHE      Use the existing Mallard cache CACHE"
-        echo "  -o OUT        Output en EPUB file named OUT"
-        echo "  -x CUSTOM     Import the custom XSLT file CUSTOM"
-        echo "  -p PATHS      Extra paths to search for files"
-        echo "  -i            Ignore missing media files"
-    ) 1>&2
-}
-yelp_usage_html () {
-    (
-        echo "Usage: yelp-build <html|xhtml> [OPTIONS] <FILES>"
-        echo ""
-        echo "  Create HTML or XHTML from the input files FILES."
-        echo "  FILES can be DocBook files, Mallard page files,"
-        echo "  or directories containing Mallard page files."
-        echo ""
-        echo "Options:"
-        echo "  -c CACHE      Use the existing Mallard cache CACHE"
-        echo "  -o OUT        Output files in the directory OUT"
-        echo "  -x CUSTOM     Import the custom XSLT file CUSTOM"
-        echo "  -p PATHS      Extra paths to search for files"
-        echo "  -i            Ignore missing media files"
-    ) 1>&2
-}
-
-if [ $# = 0 ]; then
-    yelp_usage
-    exit 1
-fi
-
-yelp_paths_normalize () {
-    for path in $(echo "$1" | sed -e 's/[: ]/\n/g'); do
-        (cd "$path" && pwd)
-    done | tr '\n' ' '
-}
-
-yelp_cache_in_page () {
-    fbase=$(basename "$1")
-    ext=$(echo "$fbase" | sed -e 's/.*\.//')
-    fdir=$( (cd $(dirname "$1") && pwd) )
-    sdir=${fdir##${cache_site_root}}/
-    url=file://$(echo "$fdir/$fbase" | urlencode)
-    if [ "x$cache_site" = "x1" ]; then
-        siteattr=' site:dir="'"$sdir"'"'
-    fi
-    if [ "x$ext" = "xstack" ]; then
-        echo '<stack cache:href="'"$url"'"'"$siteattr"'/>'
-    else
-        echo '<page cache:href="'"$url"'"'"$siteattr"'/>'
-    fi
-}
-
-yelp_cache_in_site () {
-    for dir in "$1"/*; do
-        if [ -d "$dir" ]; then
-            if [ $(basename "$dir") != "__pintail__" ]; then
-                yelp_cache_in_site "$dir"
-            fi
-        fi
-    done
-    for page in "$1"/*.page "$1"/*.stack; do
-        if [ -e "$page" ]; then
-            yelp_cache_in_page "$page"
-        fi
-    done
-}
-
-yelp_cache_in () {
-    echo '<cache:cache xmlns:cache="http://projectmallard.org/cache/1.0/"'
-    if [ "x$cache_site" = "x1" ]; then
-        echo '  xmlns:site="http://projectmallard.org/site/1.0/"'
-    fi
-    echo '  xmlns="http://projectmallard.org/1.0/">'
-    for page in "$@"; do
-        if [ -d "$page" ]; then
-            if [ "x$cache_site" = "x1" ]; then
-                yelp_cache_in_site "$page"
-            else
-                for sub in "$page"/*.page "$page"/*.stack; do
-                    if [ -e "$sub" ]; then
-                        yelp_cache_in_page "$sub"
-                    fi
-                done
-            fi
-        else
-            yelp_cache_in_page "$page"
-        fi
-    done
-    echo '</cache:cache>'
-}
-
-yelp_cache () {
-    cache_out="index.cache"
-    while [ "$#" != "0" ]; do
-        case "$1" in
-            "-o")
-                shift
-                cache_out="$1"
-                shift
-                ;;
-            "-s")
-                cache_site="1"
-                cache_site_root=$(pwd)
-                shift
-                ;;
-            *)
-                break
-                ;;
-        esac
-    done
-    if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then
-        yelp_usage_cache
-        exit 1
-    fi
-    yelp_cache_in "$@" | xsltproc --path "$html_paths" \
-                                  --xinclude -o "$cache_out" \
-                                  "$xsl_mal_cache" -
-}
-
-yelp_html_xsl_common () {
-    if [ "x$html_internal_datadir" != "x" ]; then
-        echo '<xsl:variable name="yelp.internal.datadir" select="'"'$html_internal_datadir'"'"/>'
-        echo '<xsl:param name="html.css.root" select="$yelp.internal.datadir"/>'
-        echo '<xsl:param name="html.js.root" select="$yelp.internal.datadir"/>'
-    else
-        echo '<xsl:variable name="yelp.internal.datadir" select="'"''"'"/>'
-    fi
-    if [ "x$html_internal_xsl" != "x" ]; then
-        echo '<xsl:include href="file://'`echo "$html_internal_xsl" | urlencode`'"/>'
-    fi
-    if [ "x$html_custom" != "x" ]; then
-        echo '<xsl:include href="file://'`echo "$html_custom" | urlencode`'"/>'
-    fi
-    echo '<xsl:template name="html.css">'
-    echo ' <xsl:param name="node" select="."/>'
-    echo ' <xsl:variable name="yelp.locale">'
-    echo '  <xsl:choose>'
-    echo '   <xsl:when test="$node/@xml:lang != '"''"'">'
-    echo '    <xsl:value-of select="$node/@xml:lang"/>'
-    echo '   </xsl:when>'
-    echo '   <xsl:when test="$node/@lang != '"''"'">'
-    echo '    <xsl:value-of select="$node/@lang"/>'
-    echo '   </xsl:when>'
-    echo '   <xsl:otherwise>'
-    echo '    <xsl:text>C</xsl:text>'
-    echo '   </xsl:otherwise>'
-    echo '  </xsl:choose>'
-    echo ' </xsl:variable>'
-    echo ' <exsl:document href="{$yelp.internal.datadir}{$yelp.locale}.css" method="text">'
-    echo '  <xsl:call-template name="html.css.content">'
-    echo '   <xsl:with-param name="node" select="$node"/>'
-    echo '   <xsl:with-param name="direction">'
-    echo '    <xsl:call-template name="l10n.direction">'
-    echo '     <xsl:with-param name="lang" select="$yelp.locale"/>'
-    echo '    </xsl:call-template>'
-    echo '   </xsl:with-param>'
-    echo '  </xsl:call-template>'
-    echo ' </exsl:document>'
-    echo ' <link rel="stylesheet" type="text/css" href="{$html.css.root}{$yelp.locale}.css"/>'
-    echo '</xsl:template>'
-    echo '<xsl:template name="html.js.script">'
-    echo ' <xsl:param name="node" select="."/>'
-    echo ' <exsl:document href="{$yelp.internal.datadir}yelp.js" method="text">'
-    echo '  <xsl:call-template name="html.js.content">'
-    echo '   <xsl:with-param name="node" select="$node"/>'
-    echo '  </xsl:call-template>'
-    echo ' </exsl:document>'
-    echo ' <script type="text/javascript" src="{$html.js.root}yelp.js"/>'
-    echo '</xsl:template>'
-}
-yelp_html_db2html () {
-    for xml in "$@"; do
-        xmldir=`dirname "$xml"`
-        xmldir=`(cd "$xmldir" && pwd)`
-        # Output HTML
-        (
-            echo '<xsl:stylesheet'
-            echo ' xmlns:xsl="http://www.w3.org/1999/XSL/Transform"'
-            echo ' xmlns:exsl="http://exslt.org/common"'
-            echo ' xmlns="http://www.w3.org/1999/xhtml"'
-            echo ' extension-element-prefixes="exsl"'
-            echo ' version="1.0">'
-            if [ "x$is_xhtml" = "x1" ]; then
-                xsl='file://'`echo "$xsl_db2xhtml" | urlencode`
-            else
-                xsl='file://'`echo "$xsl_db2html" | urlencode`
-            fi
-            echo '<xsl:import href="'"$xsl"'"/>'
-            yelp_html_xsl_common
-            echo '</xsl:stylesheet>'
-        ) | xsltproc --path "$html_paths" --xinclude -o "$html_out"/ - "$xml"
-        # Copy media
-        if [ "x$xmldir" != "x$html_out" ]; then
-            (
-                echo '<xsl:stylesheet'
-                echo ' xmlns:xsl="http://www.w3.org/1999/XSL/Transform"'
-                echo ' xmlns:db="http://docbook.org/ns/docbook"'
-                echo ' exclude-result-prefixes="db"'
-                echo ' version="1.0">'
-                echo '<xsl:output method="text"/>'
-                echo '<xsl:template match="/">'
-                echo ' <xsl:for-each select="'
-                echo '   //audiodata | //imagedata | //videodata |'
-                echo '   //db:audiodata | //db:imagedata | //db:videodata">'
-                echo '  <xsl:value-of select="concat(@fileref, '"'&#x000A;'"')"/>'
-                echo ' </xsl:for-each>'
-                echo '</xsl:template>'
-                echo '</xsl:stylesheet>'
-            ) | xsltproc --path "$html_paths" --xinclude - "$xml" | while read media; do
-                mfile=`echo "$media" | urldecode`
-                minput="$xmldir/$mfile"
-                moutput="$html_out/$mfile"
-                mkdir_p `dirname "$moutput"`
-                if [ ! -f "$minput" -a "x$html_paths" != "x" ]; then
-                    minput_rel=${minput#"$(pwd)/"}
-                    if [ "x$minput_rel" != "x$minput_src" ]; then
-                        for path in $html_paths; do
-                            if [ -f "$path/$minput_rel" ]; then
-                                minput="$path/$minput_rel"
-                                break
-                            fi
-                        done
-                    fi
-                fi
-                if [ -f "$minput" -o "x$html_ignore_media" != "x1" ]; then
-                    cp "$minput" "$moutput"
-                fi
-            done
-        fi
-        # Copy JavaScript
-        cp "${yelp_js_dir}/highlight.pack.js" "$html_out/$html_internal_datadir"
-    done
-}
-
-yelp_html_mal2html () {
-    if [ "x$html_cache_file" != "x" ]; then
-        html_cache_file=`(cd $(dirname "$html_cache_file") && pwd)`/`basename "$html_cache_file"`
-    else
-        html_cache_file_is_tmp="yes"
-        html_cache_file=`mktemp "${TMPDIR:-/tmp}"/yelp-XXXXXXXX`
-        yelp_cache -o "$html_cache_file" "$@"
-    fi
-    html_tmp_infile=`mktemp "${TMPDIR:-/tmp}"/yelp-XXXXXXXX`
-    yelp_cache_in "$@" > "$html_tmp_infile"
-    # Output HTML
-    (
-        echo '<xsl:stylesheet'
-        echo ' xmlns:xsl="http://www.w3.org/1999/XSL/Transform"'
-        echo ' xmlns:mal="http://projectmallard.org/1.0/"'
-        echo ' xmlns:cache="http://projectmallard.org/cache/1.0/"'
-        echo ' xmlns:exsl="http://exslt.org/common"'
-        echo ' xmlns="http://www.w3.org/1999/xhtml"'
-        echo ' exclude-result-prefixes="mal cache"'
-        echo ' extension-element-prefixes="exsl"'
-        echo ' version="1.0">'
-        if [ "x$is_xhtml" = "x1" ]; then
-            xsl='file://'`echo "$xsl_mal2xhtml" | urlencode`
-        else
-            xsl='file://'`echo "$xsl_mal2html" | urlencode`
-        fi
-        echo '<xsl:import href="'"$xsl"'"/>'
-        echo '<xsl:include href="'"$xsl_mal_files"'"/>'
-        echo '<xsl:output method="text"/>'
-        yelp_html_xsl_common
-        html_cache_url='file://'`echo "$html_cache_file" | urlencode`
-        echo '<xsl:param name="mal.cache.file" select="'"'$html_cache_url'"'"/>'
-        echo '<xsl:template match="/">'
-        echo '<xsl:for-each select="cache:cache/mal:page | cache:cache/mal:stack">'
-        echo '<xsl:variable name="href" select="@cache:href"/>'
-        echo '<xsl:for-each select="document(@cache:href)">'
-        echo '<xsl:for-each select="mal:page | mal:stack/mal:page">'
-        echo '<xsl:call-template name="html.output"/>'
-        echo '<xsl:call-template name="mal.files.copy">'
-        echo ' <xsl:with-param name="href" select="substring-after($href, '\''file://'\'')"/>'
-        echo '</xsl:call-template>'
-        echo '</xsl:for-each>'
-        echo '</xsl:for-each>'
-        echo '</xsl:for-each>'
-        echo '</xsl:template>'
-        echo '</xsl:stylesheet>'
-    ) | (cd "$html_out" && xsltproc $html_profile \
-        --path "$html_paths" --xinclude \
-        - "$html_tmp_infile") | sort | uniq | \
-    while read line; do
-        # Copy media from paths output by HTML transform
-        line_src=$(echo "$line" | cut -d' ' -f1 | urldecode)
-        line_dest="$html_out/"$(echo "$line" | cut -d' ' -f2)
-        if [ ! -f "$line_src" -a "x$html_paths" != "x" ]; then
-            line_src_rel=${line_src#"$(pwd)/"}
-            if [ "x$line_src_rel" != "x$line_src" ]; then
-                for path in $html_paths; do
-                    if [ -f "$path/$line_src_rel" ]; then
-                        line_src="$path/$line_src_rel"
+import configparser
+import os
+import sys
+import shutil
+import subprocess
+import tempfile
+import urllib.parse
+import uuid
+
+import lxml.etree
+import lxml.ElementInclude
+
+
+XSL_DB2HTML = '@XSL_DB2HTML@'
+XSL_DB2XHTML = '@XSL_DB2XHTML@'
+XSL_MALCACHE = '@XSL_MAL_CACHE@'
+XSL_MAL2HTML = '@XSL_MAL2HTML@'
+XSL_MAL2XHTML = '@XSL_MAL2XHTML@'
+XSL_MAL_OPF='@DATADIR@/xslt/mal-opf.xsl'
+XSL_MAL_NCX='@DATADIR@/xslt/mal-ncx.xsl'
+YELP_JS_DIR = '@YELP_JS_DIR@'
+
+XSLCOMMON = ('''
+<xsl:variable name="yelp.internal.datadir" select="'{intdatadir}'"/>
+<xsl:param name="html.css.root" select="$yelp.internal.datadir"/>
+<xsl:param name="html.js.root" select="$yelp.internal.datadir"/>
+{includes}
+<xsl:template name="html.css">
+ <xsl:param name="node" select="."/>
+ <xsl:variable name="yelp.locale">
+  <xsl:choose>
+   <xsl:when test="$node/@xml:lang != ''">
+    <xsl:value-of select="$node/@xml:lang"/>
+   </xsl:when>
+   <xsl:when test="$node/@lang != ''">
+    <xsl:value-of select="$node/@lang"/>
+   </xsl:when>
+   <xsl:otherwise>
+    <xsl:text>C</xsl:text>
+   </xsl:otherwise>
+  </xsl:choose>
+ </xsl:variable>
+ <exsl:document href="{{$yelp.internal.datadir}}{{$yelp.locale}}.css" method="text">
+  <xsl:call-template name="html.css.content">
+   <xsl:with-param name="node" select="$node"/>
+   <xsl:with-param name="direction">
+    <xsl:call-template name="l10n.direction">
+     <xsl:with-param name="lang" select="$yelp.locale"/>
+    </xsl:call-template>
+   </xsl:with-param>
+  </xsl:call-template>
+ </exsl:document>
+ <link rel="stylesheet" type="text/css" href="{{$html.css.root}}{{$yelp.locale}}.css"/>
+</xsl:template>
+<xsl:template name="html.js.script">
+ <xsl:param name="node" select="."/>
+ <exsl:document href="{{$yelp.internal.datadir}}yelp.js" method="text">
+  <xsl:call-template name="html.js.content">
+   <xsl:with-param name="node" select="$node"/>
+  </xsl:call-template>
+ </exsl:document>
+ <script type="text/javascript" src="{{$html.js.root}}yelp.js"/>
+</xsl:template>
+''')
+
+DB2HTML = ('''
+<xsl:stylesheet
+  xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+  xmlns:exsl="http://exslt.org/common"
+  xmlns="http://www.w3.org/1999/xhtml"
+  extension-element-prefixes="exsl"
+  version="1.0">
+<xsl:import href="file://{xslfile}"/>
+'''
++ XSLCOMMON + 
+'''
+</xsl:stylesheet>
+''')
+
+
+MAL2HTML = ('''
+<xsl:stylesheet
+  xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+  xmlns:mal="http://projectmallard.org/1.0/"
+  xmlns:cache="http://projectmallard.org/cache/1.0/"
+  xmlns:exsl="http://exslt.org/common"
+  xmlns="http://www.w3.org/1999/xhtml"
+  exclude-result-prefixes="mal cache"
+  extension-element-prefixes="exsl"
+  version="1.0">
+<xsl:import href="file://{xslfile}"/>
+<xsl:param name="mal.cache.file" select="'file://{cachefile}'"/>
+'''
++ XSLCOMMON + 
+'''
+<xsl:template match="/">
+  <xsl:for-each select="cache:cache/mal:page | cache:cache/mal:stack">
+    <xsl:variable name="href" select="@cache:href"/>
+    <xsl:for-each select="document(@cache:href)">
+      <xsl:for-each select="mal:page | mal:stack/mal:page">
+        <xsl:call-template name="html.output"/>
+      </xsl:for-each>
+    </xsl:for-each>
+  </xsl:for-each>
+</xsl:template>
+</xsl:stylesheet>
+''')
+
+
+
+class InputFile:
+    def __init__(self, filepath, filename, sitedir=None):
+        self.filepath = filepath
+        self.filename = filename
+        self.absfile = os.path.join(filepath, filename)
+        self.absdir = os.path.dirname(self.absfile)
+        self.sitedir = sitedir or ''
+        self.sitefilename = self.sitedir + self.filename
+
+
+class PathResolver(lxml.etree.Resolver):
+    def __init__(self, srcdir, path):
+        if srcdir.endswith('/'):
+            self.srcdir = srcdir
+        else:
+            self.srcdir = srcdir + '/'
+        self.path = path
+
+    def resolve(self, uri, id, context):
+        if os.path.exists(uri):
+            return self.resolve_filename(uri, context)
+        if uri.startswith(self.srcdir):
+            ref = uri[len(self.srcdir):]
+        else:
+            ref = uri
+        for p in self.path:
+            tryfile = os.path.join(p, ref)
+            if os.path.exists(tryfile):
+                return self.resolve_filename(tryfile, context)
+        return None
+
+
+class Builder:
+    name = None
+    desc = None
+    blurb = None
+    formats = []
+    arguments = []
+    postblurb = None
+    config = None
+
+    def __init__(self, yelpbuild):
+        self.yelpbuild = yelpbuild
+        self.options = {}
+        self.fileargs = []
+        self.tmpdir = None
+
+    def __del__(self):
+        if self.tmpdir is not None:
+            shutil.rmtree(self.tmpdir)
+            self.tmpdir = None
+
+    def parse_args(self, args):
+        while len(args) > 0:
+            argdef = None
+            if args[0].startswith('--'):
+                for arg_ in self.arguments:
+                    if args[0] == '--' + arg_[0]:
+                        argdef = arg_
+                        break
+                if argdef is None:
+                    self.print_help()
+                    return 1
+            elif args[0].startswith('-'):
+                for arg_ in self.arguments:
+                    if args[0] == arg_[1]:
+                        argdef = arg_
+                        break
+                if argdef is None:
+                    self.print_help()
+                    return 1
+            if argdef is not None:
+                takesarg = (argdef[2] is not None)
+                if takesarg:
+                    if len(args) < 2:
+                        self.print_help()
+                        return 1
+                    self.options.setdefault(argdef[0], [])
+                    self.options[argdef[0]].append(args[1])
+                    args = args[2:]
+                else:
+                    self.options[argdef[0]] = True
+                    args = args[1:]
+            else:
+                self.fileargs.append(args[0])
+                args = args[1:]
+        cfgfile = None
+        if len(self.fileargs) > 0:
+            cfgfile = os.path.join(os.path.dirname(self.fileargs[0]), '.yelp-tools.cfg')
+            if not os.path.exists(cfgfile):
+                cfgfile = None
+        if cfgfile is None:
+            cfgfile = os.path.join(os.getcwd(), '.yelp-tools.cfg')
+        if os.path.exists(cfgfile):
+            self.config = configparser.ConfigParser()
+            try:
+                self.config.read(cfgfile)
+            except Exception as e:
+                print(e, file=sys.stderr)
+                sys.exit(1)
+        return 0
+
+    def get_option_bool(self, arg):
+        if arg in self.options:
+            return self.options[arg] == True
+        if self.config is not None:
+            val = self.config.get('build:' + self.name, arg, fallback=None)
+            if val is not None:
+                return (val == 'true')
+            val = self.config.get('build', arg, fallback=None)
+            if val is not None:
+                return (val == 'true')
+            val = self.config.get('default', arg, fallback=None)
+            if val is not None:
+                return (val == 'true')
+        return False
+
+    def get_option_str(self, arg):
+        if arg in self.options:
+            if isinstance(self.options[arg], list):
+                return self.options[arg][-1]
+        if self.config is not None:
+            val = self.config.get('build:' + self.name, arg, fallback=None)
+            if val is not None:
+                return val
+            val = self.config.get('build', arg, fallback=None)
+            if val is not None:
+                return val
+            val = self.config.get('default', arg, fallback=None)
+            if val is not None:
+                return val
+        return None
+
+    def get_option_list(self, arg):
+        if arg in self.options:
+            if isinstance(self.options[arg], list):
+                ret = []
+                for opt in self.options[arg]:
+                    ret.extend(opt.replace(',', ' ').split())
+                return ret
+        if self.config is not None:
+            val = self.config.get('build:' + self.name, arg, fallback=None)
+            if val is not None:
+                return val.replace(',', ' ').split()
+            val = self.config.get('build', arg, fallback=None)
+            if val is not None:
+                return val.replace(',', ' ').split()
+            val = self.config.get('default', arg, fallback=None)
+            if val is not None:
+                return val.replace(',', ' ').split()
+        return None
+
+    def get_xml(self, infile, path):
+        parser = lxml.etree.XMLParser()
+        parser.resolvers.add(PathResolver(os.path.realpath(infile.absdir), path))
+        tree = lxml.etree.parse(infile.absfile, parser=parser)
+        def pathloader(href, parse, encoding=None):
+            usefile = os.path.join(infile.absdir, href)
+            if not os.path.exists(href):
+                usefile = None
+            if usefile is None:
+                absdir = infile.absdir
+                if not absdir.endswith('/'):
+                    absdir = absdir + '/'
+                ref = href
+                if ref.startswith(absdir):
+                    ref = ref[len(absdir):]
+                for p in path:
+                    tryfile = os.path.join(p, ref)
+                    if os.path.exists(tryfile):
+                        usefile = tryfile
+                        break
+            if usefile is not None:
+                if parse == 'xml':
+                    return lxml.etree.parse(usefile, parser=parser).getroot()
+                elif parse == 'text':
+                    return open(usefile).read()
+            return None
+        lxml.ElementInclude.include(tree, loader=pathloader)
+        return tree
+
+    def iter_files(self, sitedir=None):
+        issite = self.get_option_bool('site')
+        if len(self.fileargs) == 0:
+            self.fileargs.append('.')
+        for filearg in self.fileargs:
+            if os.path.isdir(filearg):
+                if issite:
+                    for infile in self.iter_site(filearg, '/'):
+                        yield infile
+                else:
+                    for fname in os.listdir(filearg):
+                        if fname.endswith('.page'):
+                            yield InputFile(filearg, fname)
+            else:
+                if issite:
+                    # FIXME: should do some normalization here, I guess.
+                    # It's hard to get this perfect without a defined start dir
+                    yield InputFile(os.getcwd(), filearg, '/' + os.path.dirname(filearg))
+                else:
+                    yield InputFile(os.getcwd(), filearg)
+
+    def iter_site(self, filepath, sitedir):
+        for fname in os.listdir(filepath):
+            newpath = os.path.join(filepath, fname)
+            if os.path.isdir(newpath):
+                # FIXME https://github.com/projectmallard/pintail/issues/36
+                if fname == '__pintail__':
+                    continue
+                for infile in self.iter_site(newpath, sitedir + fname + '/'):
+                    yield infile
+            elif fname.endswith('.page'):
+                yield InputFile(filepath, fname, sitedir)
+
+    def create_tmpdir(self):
+        if self.tmpdir is None:
+            self.tmpdir = tempfile.mkdtemp()
+
+    def print_help(self):
+        print('Usage:   yelp-build ' + self.name + ' [OPTIONS] [FILES]')
+        print('Formats: ' + ' '.join(self.formats) + '\n')
+        #FIXME: prettify names of formats
+        if self.blurb is not None:
+            print(self.blurb + '\n')
+        print('Options:')
+        maxarglen = 2
+        args = []
+        for arg in self.arguments:
+            argkey = '--' + arg[0]
+            if arg[1] is not None:
+                argkey = arg[1] + ', ' + argkey
+            if arg[2] is not None:
+                argkey = argkey + ' ' + arg[2]
+            args.append((argkey, arg[3]))
+        for arg in args:
+            maxarglen = max(maxarglen, len(arg[0]) + 1)
+        for arg in args:
+            print('  ' + (arg[0]).ljust(maxarglen) + '  ' + arg[1])
+        if self.postblurb is not None:
+            print(self.postblurb)
+
+    def main(self, args):
+        pass
+
+
+class CacheBuilder (Builder):
+    name = 'cache'
+    desc = 'Convert a Mallard cache file'
+    blurb = ('Create a Mallard cache file from the page files FILES.\n' +
+             'If FILES contains directories, all .page files in those\n' +
+             'directories will be used.')
+    formats = ['mallard']
+    arguments = [
+        ('help',   '-h', None, 'Show this help and exit'),
+        ('output', '-o', 'OUT', 'Output files in the directory OUT'),
+        ('path',   '-p', 'PATH', 'Extra directories to search for files'),
+        ('site',   '-s', None, 'Treat pages as belonging to a Mallard site')
+    ]
+
+    def build_cache_in(self, filename):
+        with open(filename, 'w') as cachein:
+            print('<cache:cache xmlns:cache="http://projectmallard.org/cache/1.0/"' +
+                  ' xmlns:site="http://projectmallard.org/site/1.0/"'
+                  ' xmlns="http://projectmallard.org/1.0/">',
+                  file=cachein)
+            for infile in self.iter_files():
+                if infile.filename.endswith('.page'):
+                    page = '<page'
+                elif infile.filename.endswith('.stack'):
+                    page = '<stack'
+                else:
+                    continue
+                page += ' cache:href="file://' + urllib.parse.quote(os.path.realpath(infile.absfile)) + '"'
+                if self.get_option_bool('site'):
+                    page += ' site:dir="' + infile.sitedir + '"'
+                page += '/>'
+                print(page, file=cachein)
+            print('</cache:cache>', file=cachein)
+
+    def main(self, args, output=None, path=None):
+        if self.parse_args(args) != 0:
+            return 1
+        if 'help' in self.options:
+            self.print_help()
+            return 0
+
+        retcode = 0
+        self.create_tmpdir()
+        cacheinfile = os.path.join(self.tmpdir, 'index.cache.in')
+        self.build_cache_in(cacheinfile)
+        if output is None:
+            output = self.get_option_str('output')
+        if output is None:
+            output = 'index.cache'
+        if path is None:
+            path = self.get_option_list('path')
+        if path is None:
+            path = ':'
+        else:
+            path = ':'.join(path)
+        retcode = subprocess.call(['xsltproc', '--xinclude', '-o', output,
+                                   '--path', path,
+                                   XSL_MALCACHE, cacheinfile])
+        return retcode
+
+
+class XhtmlBuilder (Builder):
+    name = 'xhtml'
+    desc = 'Convert input files to XHTML'
+    blurb = ('Create XHTML output from the input files FILES.\n' +
+             'FILES can be DocBook files, Mallard page files,\n' +
+             'or directories containing Mallard page files.')
+    formats = ['docbook4', 'docbook5', 'mallard']
+    arguments = [
+        ('help',   '-h', None, 'Show this help and exit'),
+        ('cache',  '-c', 'CACHE', 'Use the existing Mallard cache CACHE'),
+        ('output', '-o', 'OUT', 'Output files in the directory OUT'),
+        ('xsl',    '-x', 'CUSTOM', 'Import the custom XSLT file CUSTOM'),
+        ('path',   '-p', 'PATH', 'Extra directories to search for files'),
+        ('ignore', '-i', None, 'Ignore missing media files')
+    ]
+
+    def __init__(self, yelpbuild, xhtml=True, epub=False):
+        super().__init__(yelpbuild)
+        self.mal2html = None
+        self.db2html = None
+        self.xhtml = xhtml
+        self.epub = epub
+        if self.epub:
+            self.intdatadir = 'yelp'
+        else:
+            self.intdatadir = ''
+        self.cacheinfile = None
+
+
+    def build_mallard_all(self, cache=None, output=None, xsl=None, path=None):
+        if self.mal2html is not None:
+            # We build all the pages on the first call, because it's faster
+            return 0
+        if path is None:
+            path = self.get_option_list('path')
+        self.create_tmpdir()
+        if cache is None:
+            cachefile = self.get_option_str('cache')
+        else:
+            cachefile = cache
+        cachebuilder = CacheBuilder(self.yelpbuild)
+        if cachefile is None:
+            cachefile = os.path.join(self.tmpdir, 'index.cache')
+            retcode = cachebuilder.main(self.fileargs, output=cachefile, path=path)
+            if retcode != 0:
+                return retcode
+            self.cacheinfile = cachefile
+        else:
+            cachefile = os.path.realpath(cachefile)
+            self.cacheinfile = os.path.join(self.tmpdir, 'index.cache.in')
+            cachebuilder.parse_args(self.fileargs)
+            cachebuilder.build_cache_in(self.cacheinfile)
+        self.mal2html = os.path.join(self.tmpdir, 'mal2html.xsl')
+        with open(self.mal2html, 'w') as xslout:
+            if self.xhtml:
+                xslfile = XSL_MAL2XHTML
+            else:
+                xslfile = XSL_MAL2HTML
+            includes = ''
+            if xsl is None:
+                customxsl = self.get_option_str('xsl')
+            else:
+                customxsl = xsl
+            if customxsl is not None:
+                customxsl = urllib.parse.quote(os.path.realpath(customxsl))
+                includes += '<xsl:include href="file://' + customxsl + '"/>'
+            if self.epub:
+                includes += '''<xsl:param name="mal.if.target" select="'target:epub target:html target:xhtml'"/>'''
+                includes += '''<xsl:template mode="html.header.mode" match="mal:page"/>'''
+                includes += '''<xsl:template mode="html.footer.mode" match="mal:page"/>'''
+            xslout.write(MAL2HTML.format(xslfile=xslfile,
+                                         cachefile=cachefile,
+                                         includes=includes,
+                                         intdatadir=self.intdatadir))
+        if output is None:
+            output = self.get_option_str('output')
+        if output is None:
+            output = os.getcwd()
+        else:
+            if not os.path.isdir(output):
+                print('Output must be a directory', file=sys.stderr)
+                return 1
+        if not output.endswith('/'):
+            # xsltproc is picky about this
+            output = output + '/'
+        if path is None:
+            pathstr = ':'
+        else:
+            pathstr = ':'.join(path)
+        retcode = subprocess.call(['xsltproc', '--xinclude', '-o', output,
+                                   '--path', pathstr,
+                                   '--stringparam', 'mal.cache.file', cachefile,
+                                   self.mal2html, self.cacheinfile])
+        return retcode
+
+
+    def build_docbook(self, infile, output=None, xsl=None, path=None):
+        if self.db2html is None:
+            self.create_tmpdir()
+            self.db2html = os.path.join(self.tmpdir, 'db2html.xsl')
+            with open(self.db2html, 'w') as xslout:
+                if self.xhtml:
+                    xslfile = XSL_DB2XHTML
+                else:
+                    xslfile = XSL_DB2HTML
+                includes = ''
+                if xsl is not None:
+                    customxsl = xsl
+                else:
+                    customxsl = self.get_option_str('xsl')
+                if customxsl is not None:
+                    customxsl = urllib.parse.quote(os.path.realpath(customxsl))
+                    includes += '<xsl:include href="file://' + customxsl + '"/>'
+                xslout.write(DB2HTML.format(xslfile=xslfile,
+                                            includes=includes,
+                                            intdatadir=self.intdatadir))
+        if output is None:
+            output = self.get_option_str('output')
+        if output is None:
+            output = os.getcwd()
+        else:
+            if not os.path.isdir(output):
+                print('Output must be a directory', file=sys.stderr)
+                return 1
+        if path is None:
+            path = self.get_option_list('path')
+        if path is None:
+            pathstr = ':'
+        else:
+            pathstr = ':'.join(path)
+        retcode = subprocess.call(['xsltproc', '--xinclude', '-o', output,
+                                   '--path', pathstr,
+                                   self.db2html, infile.absfile])
+        return retcode
+
+
+    def main(self, args, cache=None, output=None, xsl=None, path=None, ignore=None):
+        if self.parse_args(args) != 0:
+            return 1
+        if 'help' in self.options:
+            self.print_help()
+            return 0
+
+        if path is None:
+            pathopt = self.get_option_list('path')
+        else:
+            pathopt = path
+        path = []
+        if pathopt is not None:
+            for p in pathopt:
+                path.extend(p.split(':'))
+        if output is None:
+            output = self.get_option_str('output')
+        srcs = {}
+        for infile in self.iter_files():
+            if infile.filename.endswith('.page') or infile.filename.endswith('.stack'):
+                retcode = self.build_mallard_all(cache=cache, output=output, xsl=xsl, path=path)
+                if retcode != 0:
+                    return retcode
+                if output is not None:
+                    tree = self.get_xml(infile, path)
+                    if tree is None:
+                        return 1
+                    for el in tree.xpath('//*[@src]'):
+                        src = el.get('src')
+                        srcs.setdefault(src, [])
+                        orig = os.path.join(os.path.realpath(infile.absdir), src)
+                        if orig not in srcs[src]:
+                            srcs[src].append(orig)
+            elif infile.filename.endswith('.docbook') or infile.filename.endswith('.xml'):
+                retcode = self.build_docbook(infile, output=output, xsl=xsl, path=path)
+                if retcode != 0:
+                    return retcode
+                if output is not None:
+                    tree = self.get_xml(infile, path)
+                    if tree is None:
+                        return 1
+                    for el in tree.xpath('//*[@fileref]'):
+                        src = el.get('fileref')
+                        srcs.setdefault(src, [])
+                        orig = os.path.join(os.path.realpath(infile.absdir), src)
+                        if orig not in srcs[src]:
+                            srcs[src].append(orig)
+            else:
+                print('Error: No builder for ' + infile.filename)
+                return 1
+
+        if ignore is None:
+            ignore = self.get_option_bool('ignore')
+        tocopy = {}
+        for src in srcs:
+            useorig = None
+            for orig in srcs[src]:
+                if os.path.exists(orig):
+                    if useorig is None:
+                        useorig = orig
+                    else:
+                        print('Warning: Multiple sources for ' + src + '. Using first.',
+                              file=sys.stderr)
+            if useorig is None:
+                for p in path:
+                    tryorig = os.path.join(p, src)
+                    if os.path.exists(tryorig):
+                        useorig = tryorig
                         break
-                    fi
-                done
-            fi
-        fi
-        line_dest=`echo "$line_dest" | urldecode`
-        if [ "$line_src" != "$line_dest" ]; then
-            line_dir=`dirname "$line_dest"`
-            mkdir_p "$line_dir"
-            if [ -f "$line_src" -o "x$html_ignore_media" != "x1" ]; then
-                cp "$line_src" "$line_dest"
-            fi
-        fi
-    done
-    # Copy JavaScript
-    cp "${yelp_js_dir}/highlight.pack.js" "$html_out/$html_internal_datadir"
-    # Clean up
-    rm "$html_tmp_infile"
-    if [ "x$html_cache_file_is_tmp" = "xyes" ]; then
-        rm "$html_cache_file"
-    fi
-}
-
-yelp_html () {
-    while [ "$#" != "0" ]; do
-        case "$1" in
-            "-c")
-                shift
-                html_cache_file="$1"
-                shift
-                ;;
-            "-o")
-                shift
-                html_out="$1"
-                shift
-                ;;
-            "-x")
-                shift
-                html_custom="$1"
-                shift
-                ;;
-            "-p")
-                shift
-                html_paths=$(yelp_paths_normalize "$1")
-                shift
-                ;;
-            "-i")
-                shift
-                html_ignore_media="1"
-                ;;
-            "--profile")
-                html_profile="--profile"
-                shift
-                ;;
-            *)
-                break
-                ;;
-        esac
-    done
-    if [ "x$html_out" = "x" ]; then
-        html_out="."
-    elif [ ! -d "$html_out" ]; then
-        echo "Error: output must be a directory." 1>&2
-        exit 1
-    fi
-    html_out=`(cd "$html_out" && pwd)`
-    if [ "x$html_custom" != "x" ]; then
-        html_custom_dir=`dirname "$html_custom"`
-        html_custom_dir=`(cd "$html_custom_dir" && pwd)`
-        html_custom="$html_custom_dir"/`basename "$html_custom"`
-    fi
-    if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then
-        yelp_usage_html
-        exit 1
-    fi
-    ext=`echo "$1" | sed -e 's/.*\.//'`
-    if [ "x$ext" = "xxml" -o "x$ext" = "xdocbook" ]; then
-        yelp_html_db2html "$@"
-    else
-        yelp_html_mal2html "$@"
-    fi
-}
-
-yelp_epub () {
-    while [ "$#" != "0" ]; do
-        case "$1" in
-            "-c")
-                shift
-                epub_cache_file="$1"
-                shift
-                ;;
-            "-o")
-                shift
-                epub_out="$1"
-                shift
-                ;;
-            "-x")
-                shift
-                html_custom="$1"
-                shift
-                ;;
-            "-p")
-                shift
-                html_paths=$(yelp_paths_normalize "$1")
-                shift
-                ;;
-            "-i")
-                shift
-                html_ignore_media="1"
-                ;;
-            *)
-                break
-                ;;
-        esac
-    done
-    if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then
-        yelp_usage_epub
-        exit 1
-    fi
-    if [ "x$epub_cache_file" != "x" ]; then
-        epub_cache_file=`(cd $(dirname "$epub_cache_file") && pwd)`/`basename "$epub_cache_file"`
-    else
-        epub_cache_file_is_tmp="yes"
-        epub_cache_file=`mktemp "${TMPDIR:-/tmp}"/yelp-XXXXXXXX`
-        yelp_cache -o "$epub_cache_file" "$@"
-    fi
-    html_cache_file="$epub_cache_file"
-    epub_data_out=`mktemp -d "${TMPDIR:-/tmp}"/yelp-XXXXXXXX`
-    html_out="$epub_data_out/OPS"
-    mkdir "$html_out"
-    mkdir "$html_out/yelp"
-    html_internal_datadir="yelp/"
-    html_internal_xsl="$xsl_mal_epub"
-    yelp_html_mal2html "$@"
-
-    epub_id=`uuidgen`
-    epub_data=`(cd "$html_out" && ls yelp/*.*)`
-    xsltproc \
-        --path "$html_paths" \
-        -o "$html_out/opf.opf" \
-        --stringparam opf.id "$epub_id" \
-        --stringparam opf.data "$epub_data" \
-        "$xsl_mal_opf" "$epub_cache_file"
-    xsltproc \
-        --path "$html_paths" \
-        -o "$html_out/ncx.ncx" \
-        --stringparam ncx.id "$epub_id" \
-        "$xsl_mal_ncx" "$epub_cache_file"
-
-    echo "application/epub+zip" > "$epub_data_out/mimetype"
-
-    mkdir "$epub_data_out/META-INF"
-    (
-        echo "<?xml version='1.0' encoding='UTF-8'?>"
-        echo "<container version='1.0' xmlns='urn:oasis:names:tc:opendocument:xmlns:container'>"
-        echo "<rootfiles>"
-        echo "<rootfile full-path='OPS/opf.opf' media-type='application/oebps-package+xml'/>"
-        echo "</rootfiles>"
-        echo "</container>"
-    ) > "$epub_data_out/META-INF/container.xml"
-
-    if [ "x$epub_out" = "x" ]; then
-        epub_out=`pwd`/index.epub
-    else
-        epub_out=`(cd $(dirname "$epub_out") && pwd)`/`basename "$epub_out"`
-    fi
-    (cd "$epub_data_out" && zip -q -r "$epub_out" mimetype META-INF OPS)
-
-    if [ "x$epub_cache_file_is_tmp" = "xyes" ]; then
-        rm "$epub_cache_file"
-    fi
-    rm -rf "$html_out"
-}
-
-cmd="$1"
-shift
-case "x$cmd" in
-    "xcache")
-        yelp_cache "$@"
-        ;;
-    "xepub")
-        is_xhtml=1
-        yelp_epub "$@"
-        ;;
-    "xhtml")
-        is_xhtml=0
-        yelp_html "$@"
-        ;;
-    "xxhtml")
-        is_xhtml=1
-        yelp_html "$@"
-        ;;
-    *)
-        yelp_usage
-        ;;
-esac
+            if useorig is None:
+                if ignore:
+                    print('Warning: No source found for ' + src, file=sys.stderr)
+                else:
+                    print('Error: No source found for ' + src, file=sys.stderr)
+                    return 1
+            if useorig is not None:
+                destfile = os.path.join(output, src)
+                destdir = os.path.dirname(destfile)
+                os.makedirs(destdir, exist_ok=True)
+                shutil.copyfile(useorig, destfile)
+
+        if output is None:
+            shutil.copyfile(os.path.join(YELP_JS_DIR, 'highlight.pack.js'),
+                            os.path.join(self.intdatadir, 'highlight.pack.js'))
+        else:
+            shutil.copyfile(os.path.join(YELP_JS_DIR, 'highlight.pack.js'),
+                            os.path.join(output, self.intdatadir, 'highlight.pack.js'))
+
+        return 0
+
+
+class HtmlBuilder (Builder):
+    name = 'html'
+    desc = 'Convert input files to HTML'
+    blurb = ('Create HTML output from the input files FILES.\n' +
+             'FILES can be DocBook files, Mallard page files,\n' +
+             'or directories containing Mallard page files.')
+    formats = ['docbook4', 'docbook5', 'mallard']
+    arguments = [
+        ('help',   '-h', None, 'Show this help and exit'),
+        ('cache',  '-c', 'CACHE', 'Use the existing Mallard cache CACHE'),
+        ('output', '-o', 'OUT', 'Output files in the directory OUT'),
+        ('xsl',    '-x', 'CUSTOM', 'Import the custom XSLT file CUSTOM'),
+        ('path',   '-p', 'PATH', 'Extra directories to search for files'),
+        ('ignore', '-i', None, 'Ignore missing media files')
+    ]
+
+    def __init__(self, yelpbuild):
+        super().__init__(yelpbuild)
+        self.xhtmlbuilder = XhtmlBuilder(yelpbuild, xhtml=False)
+
+    def main(self, args):
+        if self.parse_args(args) != 0:
+            return 1
+        if 'help' in self.options:
+            self.print_help()
+            return 0
+
+        return self.xhtmlbuilder.main(args)
+
+
+class EpubBuilder (Builder):
+    name = 'epub'
+    desc = 'Create an EPUB file for Mallard'
+    blurb = ('Create an EPUB file from the Mallard page files FILES')
+    formats = ['mallard']
+    arguments = [
+        ('help',   '-h', None, 'Show this help and exit'),
+        ('cache',  '-c', 'CACHE', 'Use the existing Mallard cache CACHE'),
+        ('output', '-o', 'OUT', 'Output files in the directory OUT'),
+        ('xsl',    '-x', 'CUSTOM', 'Import the custom XSLT file CUSTOM'),
+        ('path',   '-p', 'PATH', 'Extra directories to search for files'),
+        ('ignore', '-i', None, 'Ignore missing media files'),
+        ('nozip', None,  None, 'Do not zip the output directory')
+    ]
+
+    def __init__(self, yelpbuild):
+        super().__init__(yelpbuild)
+
+    def main(self, args):
+        if self.parse_args(args) != 0:
+            return 1
+        if 'help' in self.options:
+            self.print_help()
+            return 0
+
+        output = self.get_option_str('output')
+        nozip = self.get_option_bool('nozip')
+        if nozip:
+            if output is None:
+                output = 'EPUB'
+            if os.path.isfile(output):
+                print('Error: Output must be a directory', file=sys.stderr)
+                sys.exit(1)
+            epubdir = output
+        else:
+            self.create_tmpdir()
+            if output is None:
+                output = 'index.epub'
+            if os.path.isdir(output):
+                print('Error: Output must be a file', file=sys.stderr)
+                sys.exit(1)
+            epubdir = os.path.join(self.tmpdir, 'EPUB')
+            os.makedirs(epubdir, exist_ok=True)
+        os.makedirs(os.path.join(epubdir, 'OPS', 'yelp'), exist_ok=True)
+
+        xhtmlbuilder = XhtmlBuilder(self.yelpbuild, epub=True)
+        retcode = xhtmlbuilder.main(self.fileargs,
+                                    cache=self.get_option_str('cache'),
+                                    output=os.path.join(epubdir, 'OPS'),
+                                    xsl=self.get_option_str('xsl'),
+                                    path=self.get_option_list('path'),
+                                    ignore=self.get_option_bool('ignore'))
+        if retcode != 0:
+            return retcode
+
+        with open(os.path.join(epubdir, 'mimetype'), 'w') as fd:
+            fd.write('application/epub+zip\n')
+
+        os.makedirs(os.path.join(epubdir, 'META-INF'), exist_ok=True)
+
+        with open(os.path.join(epubdir, 'META-INF', 'container.xml'), 'w') as fd:
+            fd.write('<?xml version="1.0" encoding="UTF-8"?>')
+            fd.write('<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">')
+            fd.write('<rootfiles>')
+            fd.write('<rootfile full-path="OPS/opf.opf" media-type="application/oebps-package+xml"/>')
+            fd.write('</rootfiles>')
+            fd.write('</container>\n')
+
+        path = self.get_option_list('path')
+        if path is None:
+            pathstr = ':'
+        else:
+            pathstr = ':'.join(path)
+        epubid = str(uuid.uuid4())
+        opfdata = ''
+        for fname in os.listdir(os.path.join(epubdir, 'OPS', 'yelp')):
+            opfdata += ' OPS/yelp/' + urllib.parse.quote(fname)
+        retcode = subprocess.call(['xsltproc', '--xinclude',
+                                   '-o', os.path.join(epubdir, 'OPS', 'opf.opf'),
+                                   '--path', pathstr,
+                                   '--stringparam', 'opf.id', epubid,
+                                   '--stringparam', 'opf.data', opfdata,
+                                   XSL_MAL_OPF, xhtmlbuilder.cacheinfile])
+        if retcode != 0:
+            return retcode
+        retcode = subprocess.call(['xsltproc', '--xinclude',
+                                   '-o', os.path.join(epubdir, 'OPS', 'ncx.ncx'),
+                                   '--path', pathstr,
+                                   '--stringparam', 'ncx.id', epubid,
+                                   XSL_MAL_NCX, xhtmlbuilder.cacheinfile])
+        if retcode != 0:
+            return retcode
+
+        if not nozip:
+            retcode = subprocess.call(['zip', '-q', '-r', os.path.realpath(output),
+                                       'mimetype', 'META-INF', 'OPS'],
+                                      cwd=os.path.realpath(epubdir))
+            if retcode != 0:
+                return retcode
+        return 0
+
+
+class YelpBuild:
+    def __init__(self):
+        pass
+
+    def main(self):
+        if len(sys.argv) < 2:
+            self.print_usage()
+            return 1
+
+        builder = None
+        for cls in Builder.__subclasses__():
+            if sys.argv[1] == cls.name:
+                builder = cls(self)
+
+        if builder is None:
+            print('Unrecognized command: ' + sys.argv[1], file=sys.stderr)
+            return 1
+
+        return builder.main(sys.argv[2:])
+
+    def print_usage(self):
+        print('Usage: yelp-builder <COMMAND> [OPTIONS] [FILES]')
+        namelen = 2
+        builders = []
+        for cls in sorted(Builder.__subclasses__(), key=(lambda cls: cls.name or '')):
+            namelen = max(namelen, len(cls.name) + 2)
+            builders.append(cls)
+
+        print('\nCommands:')
+        for cls in builders:
+            print('  ' + cls.name.ljust(namelen) + cls.desc)
+
+
+if __name__ == '__main__':
+    try:
+        sys.exit(YelpBuild().main())
+    except KeyboardInterrupt:
+        sys.exit(1)
diff --git a/tools/yelp-build.py b/tools/yelp-build.py
deleted file mode 100644
index 1c52335..0000000
--- a/tools/yelp-build.py
+++ /dev/null
@@ -1,834 +0,0 @@
-#!/bin/python3
-#
-# yelp-build
-# Copyright (C) 2010-2020 Shaun McCance <shaunm@gnome.org>
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-
-import configparser
-import os
-import sys
-import shutil
-import subprocess
-import tempfile
-import urllib.parse
-import uuid
-
-import lxml.etree
-import lxml.ElementInclude
-
-
-# FIXME: don't hardcode these
-XSL_DB2HTML = '/usr/share/yelp-xsl/xslt/docbook/html/db2html.xsl'
-XSL_DB2XHTML = '/usr/share/yelp-xsl/xslt/docbook/html/db2xhtml.xsl'
-XSL_MALCACHE = '/usr/share/yelp-xsl/xslt/mallard/cache/mal-cache.xsl'
-XSL_MAL2HTML = '/usr/share/yelp-xsl/xslt/mallard/html/mal2html.xsl'
-XSL_MAL2XHTML = '/usr/share/yelp-xsl/xslt/mallard/html/mal2xhtml.xsl'
-XSL_MAL_OPF='/usr/share/yelp-tools/xslt/mal-opf.xsl'
-XSL_MAL_NCX='/usr/share/yelp-tools/xslt/mal-ncx.xsl'
-DATADIR = '/usr/share/yelp-tools'
-YELP_JSDIR = '/usr/share/yelp-xsl/js'
-
-XSLCOMMON = ('''
-<xsl:variable name="yelp.internal.datadir" select="'{intdatadir}'"/>
-<xsl:param name="html.css.root" select="$yelp.internal.datadir"/>
-<xsl:param name="html.js.root" select="$yelp.internal.datadir"/>
-{includes}
-<xsl:template name="html.css">
- <xsl:param name="node" select="."/>
- <xsl:variable name="yelp.locale">
-  <xsl:choose>
-   <xsl:when test="$node/@xml:lang != ''">
-    <xsl:value-of select="$node/@xml:lang"/>
-   </xsl:when>
-   <xsl:when test="$node/@lang != ''">
-    <xsl:value-of select="$node/@lang"/>
-   </xsl:when>
-   <xsl:otherwise>
-    <xsl:text>C</xsl:text>
-   </xsl:otherwise>
-  </xsl:choose>
- </xsl:variable>
- <exsl:document href="{{$yelp.internal.datadir}}{{$yelp.locale}}.css" method="text">
-  <xsl:call-template name="html.css.content">
-   <xsl:with-param name="node" select="$node"/>
-   <xsl:with-param name="direction">
-    <xsl:call-template name="l10n.direction">
-     <xsl:with-param name="lang" select="$yelp.locale"/>
-    </xsl:call-template>
-   </xsl:with-param>
-  </xsl:call-template>
- </exsl:document>
- <link rel="stylesheet" type="text/css" href="{{$html.css.root}}{{$yelp.locale}}.css"/>
-</xsl:template>
-<xsl:template name="html.js.script">
- <xsl:param name="node" select="."/>
- <exsl:document href="{{$yelp.internal.datadir}}yelp.js" method="text">
-  <xsl:call-template name="html.js.content">
-   <xsl:with-param name="node" select="$node"/>
-  </xsl:call-template>
- </exsl:document>
- <script type="text/javascript" src="{{$html.js.root}}yelp.js"/>
-</xsl:template>
-''')
-
-DB2HTML = ('''
-<xsl:stylesheet
-  xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
-  xmlns:exsl="http://exslt.org/common"
-  xmlns="http://www.w3.org/1999/xhtml"
-  extension-element-prefixes="exsl"
-  version="1.0">
-<xsl:import href="file://{xslfile}"/>
-'''
-+ XSLCOMMON + 
-'''
-</xsl:stylesheet>
-''')
-
-
-MAL2HTML = ('''
-<xsl:stylesheet
-  xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
-  xmlns:mal="http://projectmallard.org/1.0/"
-  xmlns:cache="http://projectmallard.org/cache/1.0/"
-  xmlns:exsl="http://exslt.org/common"
-  xmlns="http://www.w3.org/1999/xhtml"
-  exclude-result-prefixes="mal cache"
-  extension-element-prefixes="exsl"
-  version="1.0">
-<xsl:import href="file://{xslfile}"/>
-<xsl:param name="mal.cache.file" select="'file://{cachefile}'"/>
-'''
-+ XSLCOMMON + 
-'''
-<xsl:template match="/">
-  <xsl:for-each select="cache:cache/mal:page | cache:cache/mal:stack">
-    <xsl:variable name="href" select="@cache:href"/>
-    <xsl:for-each select="document(@cache:href)">
-      <xsl:for-each select="mal:page | mal:stack/mal:page">
-        <xsl:call-template name="html.output"/>
-      </xsl:for-each>
-    </xsl:for-each>
-  </xsl:for-each>
-</xsl:template>
-</xsl:stylesheet>
-''')
-
-
-
-class InputFile:
-    def __init__(self, filepath, filename, sitedir=None):
-        self.filepath = filepath
-        self.filename = filename
-        self.absfile = os.path.join(filepath, filename)
-        self.absdir = os.path.dirname(self.absfile)
-        self.sitedir = sitedir or ''
-        self.sitefilename = self.sitedir + self.filename
-
-
-class PathResolver(lxml.etree.Resolver):
-    def __init__(self, srcdir, path):
-        if srcdir.endswith('/'):
-            self.srcdir = srcdir
-        else:
-            self.srcdir = srcdir + '/'
-        self.path = path
-
-    def resolve(self, uri, id, context):
-        if os.path.exists(uri):
-            return self.resolve_filename(uri, context)
-        if uri.startswith(self.srcdir):
-            ref = uri[len(self.srcdir):]
-        else:
-            ref = uri
-        for p in self.path:
-            tryfile = os.path.join(p, ref)
-            if os.path.exists(tryfile):
-                return self.resolve_filename(tryfile, context)
-        return None
-
-
-class Builder:
-    name = None
-    desc = None
-    blurb = None
-    formats = []
-    arguments = []
-    postblurb = None
-    config = None
-
-    def __init__(self, yelpbuild):
-        self.yelpbuild = yelpbuild
-        self.options = {}
-        self.fileargs = []
-        self.tmpdir = None
-
-    def __del__(self):
-        if self.tmpdir is not None:
-            shutil.rmtree(self.tmpdir)
-            self.tmpdir = None
-
-    def parse_args(self, args):
-        while len(args) > 0:
-            argdef = None
-            if args[0].startswith('--'):
-                for arg_ in self.arguments:
-                    if args[0] == '--' + arg_[0]:
-                        argdef = arg_
-                        break
-                if argdef is None:
-                    self.print_help()
-                    return 1
-            elif args[0].startswith('-'):
-                for arg_ in self.arguments:
-                    if args[0] == arg_[1]:
-                        argdef = arg_
-                        break
-                if argdef is None:
-                    self.print_help()
-                    return 1
-            if argdef is not None:
-                takesarg = (argdef[2] is not None)
-                if takesarg:
-                    if len(args) < 2:
-                        self.print_help()
-                        return 1
-                    self.options.setdefault(argdef[0], [])
-                    self.options[argdef[0]].append(args[1])
-                    args = args[2:]
-                else:
-                    self.options[argdef[0]] = True
-                    args = args[1:]
-            else:
-                self.fileargs.append(args[0])
-                args = args[1:]
-        cfgfile = None
-        if len(self.fileargs) > 0:
-            cfgfile = os.path.join(os.path.dirname(self.fileargs[0]), '.yelp-tools.cfg')
-            if not os.path.exists(cfgfile):
-                cfgfile = None
-        if cfgfile is None:
-            cfgfile = os.path.join(os.getcwd(), '.yelp-tools.cfg')
-        if os.path.exists(cfgfile):
-            self.config = configparser.ConfigParser()
-            try:
-                self.config.read(cfgfile)
-            except Exception as e:
-                print(e, file=sys.stderr)
-                sys.exit(1)
-        return 0
-
-    def get_option_bool(self, arg):
-        if arg in self.options:
-            return self.options[arg] == True
-        if self.config is not None:
-            val = self.config.get('build:' + self.name, arg, fallback=None)
-            if val is not None:
-                return (val == 'true')
-            val = self.config.get('build', arg, fallback=None)
-            if val is not None:
-                return (val == 'true')
-            val = self.config.get('default', arg, fallback=None)
-            if val is not None:
-                return (val == 'true')
-        return False
-
-    def get_option_str(self, arg):
-        if arg in self.options:
-            if isinstance(self.options[arg], list):
-                return self.options[arg][-1]
-        if self.config is not None:
-            val = self.config.get('build:' + self.name, arg, fallback=None)
-            if val is not None:
-                return val
-            val = self.config.get('build', arg, fallback=None)
-            if val is not None:
-                return val
-            val = self.config.get('default', arg, fallback=None)
-            if val is not None:
-                return val
-        return None
-
-    def get_option_list(self, arg):
-        if arg in self.options:
-            if isinstance(self.options[arg], list):
-                ret = []
-                for opt in self.options[arg]:
-                    ret.extend(opt.replace(',', ' ').split())
-                return ret
-        if self.config is not None:
-            val = self.config.get('build:' + self.name, arg, fallback=None)
-            if val is not None:
-                return val.replace(',', ' ').split()
-            val = self.config.get('build', arg, fallback=None)
-            if val is not None:
-                return val.replace(',', ' ').split()
-            val = self.config.get('default', arg, fallback=None)
-            if val is not None:
-                return val.replace(',', ' ').split()
-        return None
-
-    def get_xml(self, infile, path):
-        parser = lxml.etree.XMLParser()
-        parser.resolvers.add(PathResolver(os.path.realpath(infile.absdir), path))
-        tree = lxml.etree.parse(infile.absfile, parser=parser)
-        def pathloader(href, parse, encoding=None):
-            usefile = os.path.join(infile.absdir, href)
-            if not os.path.exists(href):
-                usefile = None
-            if usefile is None:
-                absdir = infile.absdir
-                if not absdir.endswith('/'):
-                    absdir = absdir + '/'
-                ref = href
-                if ref.startswith(absdir):
-                    ref = ref[len(absdir):]
-                for p in path:
-                    tryfile = os.path.join(p, ref)
-                    if os.path.exists(tryfile):
-                        usefile = tryfile
-                        break
-            if usefile is not None:
-                if parse == 'xml':
-                    return lxml.etree.parse(usefile, parser=parser).getroot()
-                elif parse == 'text':
-                    return open(usefile).read()
-            return None
-        lxml.ElementInclude.include(tree, loader=pathloader)
-        return tree
-
-    def iter_files(self, sitedir=None):
-        issite = self.get_option_bool('site')
-        if len(self.fileargs) == 0:
-            self.fileargs.append('.')
-        for filearg in self.fileargs:
-            if os.path.isdir(filearg):
-                if issite:
-                    for infile in self.iter_site(filearg, '/'):
-                        yield infile
-                else:
-                    for fname in os.listdir(filearg):
-                        if fname.endswith('.page'):
-                            yield InputFile(filearg, fname)
-            else:
-                if issite:
-                    # FIXME: should do some normalization here, I guess.
-                    # It's hard to get this perfect without a defined start dir
-                    yield InputFile(os.getcwd(), filearg, '/' + os.path.dirname(filearg))
-                else:
-                    yield InputFile(os.getcwd(), filearg)
-
-    def iter_site(self, filepath, sitedir):
-        for fname in os.listdir(filepath):
-            newpath = os.path.join(filepath, fname)
-            if os.path.isdir(newpath):
-                # FIXME https://github.com/projectmallard/pintail/issues/36
-                if fname == '__pintail__':
-                    continue
-                for infile in self.iter_site(newpath, sitedir + fname + '/'):
-                    yield infile
-            elif fname.endswith('.page'):
-                yield InputFile(filepath, fname, sitedir)
-
-    def create_tmpdir(self):
-        if self.tmpdir is None:
-            self.tmpdir = tempfile.mkdtemp()
-
-    def print_help(self):
-        print('Usage:   yelp-build ' + self.name + ' [OPTIONS] [FILES]')
-        print('Formats: ' + ' '.join(self.formats) + '\n')
-        #FIXME: prettify names of formats
-        if self.blurb is not None:
-            print(self.blurb + '\n')
-        print('Options:')
-        maxarglen = 2
-        args = []
-        for arg in self.arguments:
-            argkey = '--' + arg[0]
-            if arg[1] is not None:
-                argkey = arg[1] + ', ' + argkey
-            if arg[2] is not None:
-                argkey = argkey + ' ' + arg[2]
-            args.append((argkey, arg[3]))
-        for arg in args:
-            maxarglen = max(maxarglen, len(arg[0]) + 1)
-        for arg in args:
-            print('  ' + (arg[0]).ljust(maxarglen) + '  ' + arg[1])
-        if self.postblurb is not None:
-            print(self.postblurb)
-
-    def main(self, args):
-        pass
-
-
-class CacheBuilder (Builder):
-    name = 'cache'
-    desc = 'Convert a Mallard cache file'
-    blurb = ('Create a Mallard cache file from the page files FILES.\n' +
-             'If FILES contains directories, all .page files in those\n' +
-             'directories will be used.')
-    formats = ['mallard']
-    arguments = [
-        ('help',   '-h', None, 'Show this help and exit'),
-        ('output', '-o', 'OUT', 'Output files in the directory OUT'),
-        ('path',   '-p', 'PATH', 'Extra directories to search for files'),
-        ('site',   '-s', None, 'Treat pages as belonging to a Mallard site')
-    ]
-
-    def build_cache_in(self, filename):
-        with open(filename, 'w') as cachein:
-            print('<cache:cache xmlns:cache="http://projectmallard.org/cache/1.0/"' +
-                  ' xmlns:site="http://projectmallard.org/site/1.0/"'
-                  ' xmlns="http://projectmallard.org/1.0/">',
-                  file=cachein)
-            for infile in self.iter_files():
-                if infile.filename.endswith('.page'):
-                    page = '<page'
-                elif infile.filename.endswith('.stack'):
-                    page = '<stack'
-                else:
-                    continue
-                page += ' cache:href="file://' + urllib.parse.quote(os.path.realpath(infile.absfile)) + '"'
-                if self.get_option_bool('site'):
-                    page += ' site:dir="' + infile.sitedir + '"'
-                page += '/>'
-                print(page, file=cachein)
-            print('</cache:cache>', file=cachein)
-
-    def main(self, args, output=None, path=None):
-        if self.parse_args(args) != 0:
-            return 1
-        if 'help' in self.options:
-            self.print_help()
-            return 0
-
-        retcode = 0
-        self.create_tmpdir()
-        cacheinfile = os.path.join(self.tmpdir, 'index.cache.in')
-        self.build_cache_in(cacheinfile)
-        if output is None:
-            output = self.get_option_str('output')
-        if output is None:
-            output = 'index.cache'
-        if path is None:
-            path = self.get_option_list('path')
-        if path is None:
-            path = ':'
-        else:
-            path = ':'.join(path)
-        retcode = subprocess.call(['xsltproc', '--xinclude', '-o', output,
-                                   '--path', path,
-                                   XSL_MALCACHE, cacheinfile])
-        return retcode
-
-
-class XhtmlBuilder (Builder):
-    name = 'xhtml'
-    desc = 'Convert input files to XHTML'
-    blurb = ('Create XHTML output from the input files FILES.\n' +
-             'FILES can be DocBook files, Mallard page files,\n' +
-             'or directories containing Mallard page files.')
-    formats = ['docbook4', 'docbook5', 'mallard']
-    arguments = [
-        ('help',   '-h', None, 'Show this help and exit'),
-        ('cache',  '-c', 'CACHE', 'Use the existing Mallard cache CACHE'),
-        ('output', '-o', 'OUT', 'Output files in the directory OUT'),
-        ('xsl',    '-x', 'CUSTOM', 'Import the custom XSLT file CUSTOM'),
-        ('path',   '-p', 'PATH', 'Extra directories to search for files'),
-        ('ignore', '-i', None, 'Ignore missing media files')
-    ]
-
-    def __init__(self, yelpbuild, xhtml=True, epub=False):
-        super().__init__(yelpbuild)
-        self.mal2html = None
-        self.db2html = None
-        self.xhtml = xhtml
-        self.epub = epub
-        if self.epub:
-            self.intdatadir = 'yelp'
-        else:
-            self.intdatadir = ''
-        self.cacheinfile = None
-
-
-    def build_mallard_all(self, cache=None, output=None, xsl=None, path=None):
-        if self.mal2html is not None:
-            # We build all the pages on the first call, because it's faster
-            return 0
-        if path is None:
-            path = self.get_option_list('path')
-        self.create_tmpdir()
-        if cache is None:
-            cachefile = self.get_option_str('cache')
-        else:
-            cachefile = cache
-        cachebuilder = CacheBuilder(self.yelpbuild)
-        if cachefile is None:
-            cachefile = os.path.join(self.tmpdir, 'index.cache')
-            retcode = cachebuilder.main(self.fileargs, output=cachefile, path=path)
-            if retcode != 0:
-                return retcode
-            self.cacheinfile = cachefile
-        else:
-            cachefile = os.path.realpath(cachefile)
-            self.cacheinfile = os.path.join(self.tmpdir, 'index.cache.in')
-            cachebuilder.parse_args(self.fileargs)
-            cachebuilder.build_cache_in(self.cacheinfile)
-        self.mal2html = os.path.join(self.tmpdir, 'mal2html.xsl')
-        with open(self.mal2html, 'w') as xslout:
-            if self.xhtml:
-                xslfile = XSL_MAL2XHTML
-            else:
-                xslfile = XSL_MAL2HTML
-            includes = ''
-            if xsl is None:
-                customxsl = self.get_option_str('xsl')
-            else:
-                customxsl = xsl
-            if customxsl is not None:
-                customxsl = urllib.parse.quote(os.path.realpath(customxsl))
-                includes += '<xsl:include href="file://' + customxsl + '"/>'
-            if self.epub:
-                includes += '''<xsl:param name="mal.if.target" select="'target:epub target:html target:xhtml'"/>'''
-                includes += '''<xsl:template mode="html.header.mode" match="mal:page"/>'''
-                includes += '''<xsl:template mode="html.footer.mode" match="mal:page"/>'''
-            xslout.write(MAL2HTML.format(xslfile=xslfile,
-                                         cachefile=cachefile,
-                                         includes=includes,
-                                         intdatadir=self.intdatadir))
-        if output is None:
-            output = self.get_option_str('output')
-        if output is None:
-            output = os.getcwd()
-        else:
-            if not os.path.isdir(output):
-                print('Output must be a directory', file=sys.stderr)
-                return 1
-        if not output.endswith('/'):
-            # xsltproc is picky about this
-            output = output + '/'
-        if path is None:
-            pathstr = ':'
-        else:
-            pathstr = ':'.join(path)
-        retcode = subprocess.call(['xsltproc', '--xinclude', '-o', output,
-                                   '--path', pathstr,
-                                   '--stringparam', 'mal.cache.file', cachefile,
-                                   self.mal2html, self.cacheinfile])
-        return retcode
-
-
-    def build_docbook(self, infile, output=None, xsl=None, path=None):
-        if self.db2html is None:
-            self.create_tmpdir()
-            self.db2html = os.path.join(self.tmpdir, 'db2html.xsl')
-            with open(self.db2html, 'w') as xslout:
-                if self.xhtml:
-                    xslfile = XSL_DB2XHTML
-                else:
-                    xslfile = XSL_DB2HTML
-                includes = ''
-                if xsl is not None:
-                    customxsl = xsl
-                else:
-                    customxsl = self.get_option_str('xsl')
-                if customxsl is not None:
-                    customxsl = urllib.parse.quote(os.path.realpath(customxsl))
-                    includes += '<xsl:include href="file://' + customxsl + '"/>'
-                xslout.write(DB2HTML.format(xslfile=xslfile,
-                                            includes=includes,
-                                            intdatadir=self.intdatadir))
-        if output is None:
-            output = self.get_option_str('output')
-        if output is None:
-            output = os.getcwd()
-        else:
-            if not os.path.isdir(output):
-                print('Output must be a directory', file=sys.stderr)
-                return 1
-        if path is None:
-            path = self.get_option_list('path')
-        if path is None:
-            pathstr = ':'
-        else:
-            pathstr = ':'.join(path)
-        retcode = subprocess.call(['xsltproc', '--xinclude', '-o', output,
-                                   '--path', pathstr,
-                                   self.db2html, infile.absfile])
-        return retcode
-
-
-    def main(self, args, cache=None, output=None, xsl=None, path=None, ignore=None):
-        if self.parse_args(args) != 0:
-            return 1
-        if 'help' in self.options:
-            self.print_help()
-            return 0
-
-        if path is None:
-            pathopt = self.get_option_list('path')
-        else:
-            pathopt = path
-        path = []
-        if pathopt is not None:
-            for p in pathopt:
-                path.extend(p.split(':'))
-        if output is None:
-            output = self.get_option_str('output')
-        srcs = {}
-        for infile in self.iter_files():
-            if infile.filename.endswith('.page') or infile.filename.endswith('.stack'):
-                retcode = self.build_mallard_all(cache=cache, output=output, xsl=xsl, path=path)
-                if retcode != 0:
-                    return retcode
-                if output is not None:
-                    tree = self.get_xml(infile, path)
-                    if tree is None:
-                        return 1
-                    for el in tree.xpath('//*[@src]'):
-                        src = el.get('src')
-                        srcs.setdefault(src, [])
-                        orig = os.path.join(os.path.realpath(infile.absdir), src)
-                        if orig not in srcs[src]:
-                            srcs[src].append(orig)
-            elif infile.filename.endswith('.docbook') or infile.filename.endswith('.xml'):
-                retcode = self.build_docbook(infile, output=output, xsl=xsl, path=path)
-                if retcode != 0:
-                    return retcode
-                if output is not None:
-                    tree = self.get_xml(infile, path)
-                    if tree is None:
-                        return 1
-                    for el in tree.xpath('//*[@fileref]'):
-                        src = el.get('fileref')
-                        srcs.setdefault(src, [])
-                        orig = os.path.join(os.path.realpath(infile.absdir), src)
-                        if orig not in srcs[src]:
-                            srcs[src].append(orig)
-            else:
-                print('Error: No builder for ' + infile.filename)
-                return 1
-
-        if ignore is None:
-            ignore = self.get_option_bool('ignore')
-        tocopy = {}
-        for src in srcs:
-            useorig = None
-            for orig in srcs[src]:
-                if os.path.exists(orig):
-                    if useorig is None:
-                        useorig = orig
-                    else:
-                        print('Warning: Multiple sources for ' + src + '. Using first.',
-                              file=sys.stderr)
-            if useorig is None:
-                for p in path:
-                    tryorig = os.path.join(p, src)
-                    if os.path.exists(tryorig):
-                        useorig = tryorig
-                        break
-            if useorig is None:
-                if ignore:
-                    print('Warning: No source found for ' + src, file=sys.stderr)
-                else:
-                    print('Error: No source found for ' + src, file=sys.stderr)
-                    return 1
-            if useorig is not None:
-                destfile = os.path.join(output, src)
-                destdir = os.path.dirname(destfile)
-                os.makedirs(destdir, exist_ok=True)
-                shutil.copyfile(useorig, destfile)
-
-        if output is None:
-            shutil.copyfile(os.path.join(YELP_JSDIR, 'highlight.pack.js'),
-                            os.path.join(self.intdatadir, 'highlight.pack.js'))
-        else:
-            shutil.copyfile(os.path.join(YELP_JSDIR, 'highlight.pack.js'),
-                            os.path.join(output, self.intdatadir, 'highlight.pack.js'))
-
-        return 0
-
-
-class HtmlBuilder (Builder):
-    name = 'html'
-    desc = 'Convert input files to HTML'
-    blurb = ('Create HTML output from the input files FILES.\n' +
-             'FILES can be DocBook files, Mallard page files,\n' +
-             'or directories containing Mallard page files.')
-    formats = ['docbook4', 'docbook5', 'mallard']
-    arguments = [
-        ('help',   '-h', None, 'Show this help and exit'),
-        ('cache',  '-c', 'CACHE', 'Use the existing Mallard cache CACHE'),
-        ('output', '-o', 'OUT', 'Output files in the directory OUT'),
-        ('xsl',    '-x', 'CUSTOM', 'Import the custom XSLT file CUSTOM'),
-        ('path',   '-p', 'PATH', 'Extra directories to search for files'),
-        ('ignore', '-i', None, 'Ignore missing media files')
-    ]
-
-    def __init__(self, yelpbuild):
-        super().__init__(yelpbuild)
-        self.xhtmlbuilder = XhtmlBuilder(yelpbuild, xhtml=False)
-
-    def main(self, args):
-        if self.parse_args(args) != 0:
-            return 1
-        if 'help' in self.options:
-            self.print_help()
-            return 0
-
-        return self.xhtmlbuilder.main(args)
-
-
-class EpubBuilder (Builder):
-    name = 'epub'
-    desc = 'Create an EPUB file for Mallard'
-    blurb = ('Create an EPUB file from the Mallard page files FILES')
-    formats = ['mallard']
-    arguments = [
-        ('help',   '-h', None, 'Show this help and exit'),
-        ('cache',  '-c', 'CACHE', 'Use the existing Mallard cache CACHE'),
-        ('output', '-o', 'OUT', 'Output files in the directory OUT'),
-        ('xsl',    '-x', 'CUSTOM', 'Import the custom XSLT file CUSTOM'),
-        ('path',   '-p', 'PATH', 'Extra directories to search for files'),
-        ('ignore', '-i', None, 'Ignore missing media files'),
-        ('nozip', None,  None, 'Do not zip the output directory')
-    ]
-
-    def __init__(self, yelpbuild):
-        super().__init__(yelpbuild)
-
-    def main(self, args):
-        if self.parse_args(args) != 0:
-            return 1
-        if 'help' in self.options:
-            self.print_help()
-            return 0
-
-        output = self.get_option_str('output')
-        nozip = self.get_option_bool('nozip')
-        if nozip:
-            if output is None:
-                output = 'EPUB'
-            if os.path.isfile(output):
-                print('Error: Output must be a directory', file=sys.stderr)
-                sys.exit(1)
-            epubdir = output
-        else:
-            self.create_tmpdir()
-            if output is None:
-                output = 'index.epub'
-            if os.path.isdir(output):
-                print('Error: Output must be a file', file=sys.stderr)
-                sys.exit(1)
-            epubdir = os.path.join(self.tmpdir, 'EPUB')
-            os.makedirs(epubdir, exist_ok=True)
-        os.makedirs(os.path.join(epubdir, 'OPS', 'yelp'), exist_ok=True)
-
-        xhtmlbuilder = XhtmlBuilder(self.yelpbuild, epub=True)
-        retcode = xhtmlbuilder.main(self.fileargs,
-                                    cache=self.get_option_str('cache'),
-                                    output=os.path.join(epubdir, 'OPS'),
-                                    xsl=self.get_option_str('xsl'),
-                                    path=self.get_option_list('path'),
-                                    ignore=self.get_option_bool('ignore'))
-        if retcode != 0:
-            return retcode
-
-        with open(os.path.join(epubdir, 'mimetype'), 'w') as fd:
-            fd.write('application/epub+zip\n')
-
-        os.makedirs(os.path.join(epubdir, 'META-INF'), exist_ok=True)
-
-        with open(os.path.join(epubdir, 'META-INF', 'container.xml'), 'w') as fd:
-            fd.write('<?xml version="1.0" encoding="UTF-8"?>')
-            fd.write('<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">')
-            fd.write('<rootfiles>')
-            fd.write('<rootfile full-path="OPS/opf.opf" media-type="application/oebps-package+xml"/>')
-            fd.write('</rootfiles>')
-            fd.write('</container>\n')
-
-        path = self.get_option_list('path')
-        if path is None:
-            pathstr = ':'
-        else:
-            pathstr = ':'.join(path)
-        epubid = str(uuid.uuid4())
-        opfdata = ''
-        for fname in os.listdir(os.path.join(epubdir, 'OPS', 'yelp')):
-            opfdata += ' OPS/yelp/' + urllib.parse.quote(fname)
-        retcode = subprocess.call(['xsltproc', '--xinclude',
-                                   '-o', os.path.join(epubdir, 'OPS', 'opf.opf'),
-                                   '--path', pathstr,
-                                   '--stringparam', 'opf.id', epubid,
-                                   '--stringparam', 'opf.data', opfdata,
-                                   XSL_MAL_OPF, xhtmlbuilder.cacheinfile])
-        if retcode != 0:
-            return retcode
-        retcode = subprocess.call(['xsltproc', '--xinclude',
-                                   '-o', os.path.join(epubdir, 'OPS', 'ncx.ncx'),
-                                   '--path', pathstr,
-                                   '--stringparam', 'ncx.id', epubid,
-                                   XSL_MAL_NCX, xhtmlbuilder.cacheinfile])
-        if retcode != 0:
-            return retcode
-
-        if not nozip:
-            retcode = subprocess.call(['zip', '-q', '-r', os.path.realpath(output),
-                                       'mimetype', 'META-INF', 'OPS'],
-                                      cwd=os.path.realpath(epubdir))
-            if retcode != 0:
-                return retcode
-        return 0
-
-
-class YelpBuild:
-    def __init__(self):
-        pass
-
-    def main(self):
-        if len(sys.argv) < 2:
-            self.print_usage()
-            return 1
-
-        builder = None
-        for cls in Builder.__subclasses__():
-            if sys.argv[1] == cls.name:
-                builder = cls(self)
-
-        if builder is None:
-            print('Unrecognized command: ' + sys.argv[1], file=sys.stderr)
-            return 1
-
-        return builder.main(sys.argv[2:])
-
-    def print_usage(self):
-        print('Usage: yelp-builder <COMMAND> [OPTIONS] [FILES]')
-        namelen = 2
-        builders = []
-        for cls in sorted(Builder.__subclasses__(), key=(lambda cls: cls.name or '')):
-            namelen = max(namelen, len(cls.name) + 2)
-            builders.append(cls)
-
-        print('\nCommands:')
-        for cls in builders:
-            print('  ' + cls.name.ljust(namelen) + cls.desc)
-
-
-if __name__ == '__main__':
-    try:
-        sys.exit(YelpBuild().main())
-    except KeyboardInterrupt:
-        sys.exit(1)
diff --git a/tools/yelp-check.in b/tools/yelp-check.in
index d46e004..2578800 100755
--- a/tools/yelp-check.in
+++ b/tools/yelp-check.in
@@ -1,8 +1,7 @@
-#!/bin/sh
-# -*- indent-tabs-mode: nil -*-
+#!/bin/python3
 #
 # yelp-check
-# Copyright (C) 2011-2015 Shaun McCance <shaunm@gnome.org>
+# Copyright (C) 2011-2020 Shaun McCance <shaunm@gnome.org>
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -18,1214 +17,1231 @@
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 
-xsl_mal_link='@XSL_MAL_LINK@'
-xsl_mal_license='@DATADIR@/xslt/mal-license.xsl'
-xsl_mal_status='@DATADIR@/xslt/mal-status.xsl'
-xsl_mal_rng='@DATADIR@/xslt/mal-rng.xsl'
-xsl_comments='@DATADIR@/xslt/comments.xsl'
-xsl_media='@DATADIR@/xslt/media.xsl'
-
-yelp_check_retval="0"
-
-urlencode () {
-    # We usually don't want to urlencode slashes, because we're
-    # usually converting file paths to URIs. But we do want to
-    # urlencode slases for names of RNG files in validate_page.
-    if [ "x$2" = "x/" ]; then
-        urlencode_slash=''
-    else
-        urlencode_slash='\/'
-    fi
-    echo "$1" | LC_ALL=C awk '
-BEGIN {
-  for (i = 1; i <= 255; i++) chars[sprintf("%c", i)] = i;
-}
-{
-  ret = "";
-  for (i = 1; i <= length($0); i++) {
-    c = substr($0, i, 1);
-    if (c ~ /['$urlencode_slash'a-zA-Z0-9._-]/)
-      ret = ret c;
-    else
-      ret = ret sprintf("%%%X%X", int(chars[c] / 16), chars[c] % 16);
-  }
-  print ret;
-}'
-}
-
-urldecode () {
-    echo "$1" | LC_ALL=C awk '
-BEGIN {
-  for(i = 0; i < 10; i++) hex[i] = i;
-  hex["A"] = hex["a"] = 10;
-  hex["B"] = hex["b"] = 11;
-  hex["C"] = hex["c"] = 12;
-  hex["D"] = hex["d"] = 13;
-  hex["E"] = hex["e"] = 14;
-  hex["F"] = hex["f"] = 15;
-}
-{
-  ret = "";
-  for (i = 1; i <= length($0); i++) {
-    c = substr($0, i, 1);
-    if (c == "+") {
-      ret = ret " ";
-    }
-    else if (c == "%") {
-      c = sprintf("%c", hex[substr($0, i + 1, 1)] * 16 + hex[substr($0, i + 2, 1)]);
-      ret = ret c;
-      i += 2;
-    }
-    else {
-      ret = ret c;
+import configparser
+import lxml.etree
+import os
+import sys
+import urllib.request
+import shutil
+import subprocess
+import tempfile
+import textwrap
+
+
+DATADIR = '@DATADIR@'
+
+XML_ID = '{http://www.w3.org/XML/1998/namespace}id'
+NAMESPACES = {
+    'mal':   'http://projectmallard.org/1.0/',
+    'cache': 'http://projectmallard.org/cache/1.0/',
+    'db':    'http://docbook.org/ns/docbook',
+    'e':     'http://projectmallard.org/experimental/',
+    'ui':    'http://projectmallard.org/ui/1.0/',
+    'uix':   'http://projectmallard.org/experimental/ui/',
+    'xlink': 'http://www.w3.org/1999/xlink'
     }
-  }
-  print ret;
-}'
-}
-
-docbook_version='
-<xsl:stylesheet
-    xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
-    xmlns:db="http://docbook.org/ns/docbook"
-    version="1.0">
-<xsl:output method="text"/>
-<xsl:template match="/">
-  <xsl:choose>
-    <xsl:when test="/db:*/@version">
-      <xsl:value-of select="/db:*/@version"/>
-    </xsl:when>
-    <xsl:when test="/db:*">
-      <xsl:text>5.0</xsl:text>
-    </xsl:when>
-    <xsl:otherwise>
-      <xsl:text>4</xsl:text>
-    </xsl:otherwise>
-  </xsl:choose>
-</xsl:template>
-</xsl:stylesheet>
-'
-
-mallard_style='
-<xsl:stylesheet
-    xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
-    xmlns:mal="http://projectmallard.org/1.0/"
-    version="1.0">
-<xsl:output method="text"/>
-<xsl:template match="/mal:page"><xsl:value-of select="@style"/></xsl:template>
-</xsl:stylesheet>'
-
-yelp_usage() {
-    (
-        echo "Usage: yelp-check <COMMAND> [OPTIONS] [FILES]"
-        echo ""
-        echo "Commands:"
-        echo "  comments      Print the editorial comments in a document"
-        echo "  hrefs         Find broken external links in a document"
-        echo "  ids           Find Mallard page IDs that do not match file names"
-        echo "  license       Report the license of Mallard pages"
-        echo "  links         Find broken xref or linkend links in a document"
-        echo "  media         Find broken references to media files"
-        echo "  orphans       Find orphaned pages in a Mallard document"
-        echo "  status        Report the status of Mallard pages"
-        echo "  style         Report the style attribute of Mallard pages"
-        echo "  validate      Validate files against a DTD or RNG"
-    ) 1>&2
-}
-yelp_usage_hrefs () {
-    (
-        echo "Usage: yelp-check hrefs <FILES>"
-        echo ""
-        echo "  Find broken href links in FILES in a Mallard document, or"
-        echo "  broken ulink or XLink links in FILES in a DocBook document."
-        echo ""
-        echo "Options:"
-        echo "  -s            Treat pages as belonging to a Mallard site"
-    ) 1>&2
-}
-yelp_usage_ids () {
-    (
-        echo "Usage: yelp-check ids <FILES>"
-        echo ""
-        echo "  Find pages in a Mallard document whose page ID does not match"
-        echo "  the base file name of the page file."
-        echo ""
-        echo "Options:"
-        echo "  -s            Treat pages as belonging to a Mallard site"
-    ) 1>&2
-}
-yelp_usage_links () {
-    (
-        echo "Usage: yelp-check links <FILES>"
-        echo ""
-        echo "  Find broken xref links in FILES in a Mallard document,"
-        echo "  or broken linkend links in FILES in a DocBook document."
-        echo ""
-        echo "Options:"
-        echo "  -c CACHE      Use the existing Mallard cache CACHE"
-        echo "  -s            Treat pages as belonging to a Mallard site"
-        echo "  -i            Ignore xrefs where href is present"
-    ) 1>&2
-}
-yelp_usage_media () {
-    (
-        echo "Usage: yelp-check media <FILES>"
-        echo ""
-        echo "  Find broken references to media files. In Mallard, this"
-        echo "  checks media and thumb elements. In DocBook, this checks"
-        echo "  audiodata, imagedata, and videodata elements."
-        echo ""
-        echo "Options:"
-        echo "  -s            Treat pages as belonging to a Mallard site"
-    ) 1>&2
-}
-yelp_usage_orphans () {
-    (
-        echo "Usage: yelp-check orphans <FILES>"
-        echo ""
-        echo "  Locate orphaned pages among FILES in a Mallard document."
-        echo "  Orphaned pages are any pages that cannot be reached by"
-        echo "  topic links alone from the index page."
-        echo ""
-        echo "Options:"
-        echo "  -c CACHE      Use the existing Mallard cache CACHE"
-        echo "  -s            Treat pages as belonging to a Mallard site"
-    ) 1>&2
-}
-yelp_usage_comments () {
-    (
-        echo "Usage: yelp-check comments <FILES>"
-        echo ""
-        echo "  Print the editorial comments in the files FILES, using the"
-        echo "  comment element in Mallard and the remark element in DocBook."
-        echo ""
-        echo "Options:"
-        echo "  -s            Treat pages as belonging to a Mallard site"
-    ) 1>&2
-}
-yelp_usage_license () {
-    (
-        echo "Usage: yelp-check license <FILES>"
-        echo ""
-        echo "  Report the license of the Mallard page files FILES. Each"
-        echo "  matching page is reporting along with its license, reported"
-        echo "  based on the href attribute of the license element. Common"
-        echo "  licenses use a shortened identifier. Pages with multiple"
-        echo "  licenses have the identifiers separated by spaces. Pages"
-        echo "  with no license element report 'none'. Licenses with no"
-        echo "  href attribute are reported as 'unknown'."
-        echo ""
-        echo "Options:"
-        echo "  -s                  Treat pages as belonging to a Mallard site"
-        echo "  --only LICENSES     Only show pages whose license is in LICENSES"
-        echo "  --except LICENSES   Exclude pages whose license is in LICENSES"
-        echo "  --totals            Show total counts for each license"
-        echo "LICENSES may be a comma- and/or space-separated list."
-    ) 1>&2
-}
-yelp_usage_style () {
-    (
-        echo "Usage: yelp-check style <FILES>"
-        echo ""
-        echo "  Report the page style attribute of the Mallard page files"
-        echo "  FILES. Each matching page is reporting along with its status."
-        echo ""
-        echo "Options:"
-        echo "  -s                  Treat pages as belonging to a Mallard site"
-        echo "  --only STYLES       Only show pages whose style is in STATUSES"
-        echo "  --except STYLES     Exclude pages whose style is in STATUSES"
-        echo "  --totals            Show total counts for each style"
-        echo "STYLES may be comma- and/or space-separated lists."
-    ) 1>&2
-}
-yelp_usage_status () {
-    (
-        echo "Usage: yelp-check status <FILES>"
-        echo ""
-        echo "  Report the status of the Mallard page files FILES. Each"
-        echo "  matching page is reporting along with its status."
-        echo ""
-        echo "Options:"
-        echo "  -s                  Treat pages as belonging to a Mallard site"
-        echo "  --version VER       Select revisions with the version attribute VER"
-        echo "  --docversion VER    Select revisions with the docversion attribute VER"
-        echo "  --pkgversion VER    Select revisions with the pkgversion attribute VER"
-        echo "  --older DATE        Only show pages older than DATE"
-        echo "  --newer DATE        Only show pages newer than DATE"
-        echo "  --only STATUSES     Only show pages whose status is in STATUSES"
-        echo "  --except STATUSES   Exclude pages whose status is in STATUSES"
-        echo "  --totals            Show total counts for each status"
-        echo "VER and STATUSES may be comma- and/or space-separated lists."
-    ) 1>&2
-}
-yelp_usage_validate () {
-    (
-        echo "Usage: yelp-check validate <FILES>"
-        echo ""
-        echo "  Validate FILES against the appropriate DTD or RNG."
-        echo "  For Mallard pages, perform automatic RNG merging"
-        echo "  based on the version attribute."
-        echo ""
-        echo "Options:"
-        echo "  -s                  Treat pages as belonging to a Mallard site"
-        echo "  --strict            Disallow unknown namespaces"
-        echo "  --allow NS          Explicitly allow namespace NS in strict mode"
-        echo "  --jing              Use jing instead of xmllint for RNG validation"
-    ) 1>&2
-}
-
-if [ $# = 0 ]; then
-    yelp_usage
-    exit 1
-fi
-
-yelp_check_iter_site () {
-    for dir in "$1"/*; do
-        if [ -d "$dir" ]; then
-            if [ $(basename "$dir") != "__pintail__" ]; then
-                yelp_check_iter_site "$dir"
-            fi
-        fi
-    done
-    for page in "$1"/*.page; do
-        if [ -e "$page" ]; then
-            $check_page "$page" || yelp_check_retval="$?"
-        fi
-    done
-}
-
-yelp_check_iter_args () {
-    for arg in "$@"; do
-        ext=$(echo "$arg" | sed -e 's/.*\.//')
-        if [ -d "$arg" ]; then
-            if [ "x$check_site" = "x1" ]; then
-                yelp_check_iter_site "$arg" 
-            else
-                for page in "${arg%%/}"/*.page; do
-                    if [ -e "$page" ]; then
-                        $check_page "$page"
-                    fi
-                done
-            fi
-        elif [ "x$ext" = "xpage" -o "x$ext" = "xstub" -o "x$ext" = "xcache" ]; then
-            $check_page "$arg" || yelp_check_retval="$?"
-	elif [ "x$check_db" != "x" -a \( "x$ext" = "xdocbook" -o "x$ext" = "xxml" \) ]; then
-	    $check_db "$arg" || yelp_check_retval="$?"
-        else
-            echo "Unrecognized page $arg" 1>&2
-            exit 1
-        fi
-    done
-    return $yelp_check_retval
-}
-
-yelp_hrefs_page () {
-    base=$(dirname "$1")
-    if [ "x$check_site" = "x1" ]; then
-        sdir=$(cd $(dirname "$1") && pwd)
-        sdir=${sdir##${check_site_root}}/
-    fi
-    (
-        echo '<xsl:stylesheet'
-        echo ' xmlns:xsl="http://www.w3.org/1999/XSL/Transform"'
-        echo ' xmlns:mal="http://projectmallard.org/1.0/"'
-        echo ' xmlns:db="http://docbook.org/ns/docbook"'
-        echo ' xmlns:xlink="www.w3.org/1999/xlink"'
-        echo ' version="1.0">'
-        echo '<xsl:output method="text"/>'
-        echo '<xsl:template match="/mal:page">'
-        echo ' <xsl:for-each select="//*[@href]">'
-        echo '  <xsl:if test="not(starts-with(@href, '\''mailto:'\''))">'
-        echo '   <xsl:value-of select="/mal:page/@id"/>'
-        echo '   <xsl:text> </xsl:text>'
-        echo '   <xsl:value-of select="@href"/>'
-        echo '   <xsl:text>&#x000A;</xsl:text>'
-        echo '  </xsl:if>'
-        echo ' </xsl:for-each>'
-        echo '</xsl:template>'
-        echo '<xsl:template match="/*[namespace-uri(.) = '\'\''] | /db:*">'
-        echo ' <xsl:for-each select="//ulink/@url | //*/xlink:href">'
-        echo '  <xsl:if test="not(starts-with(string(.), '\''mailto:'\''))">'
-        echo '   <xsl:value-of select="(ancestor-or-self::*/@id | ancestor-or-self::*/@xml:id)[last()]"/>'
-        echo '   <xsl:text> </xsl:text>'
-        echo '   <xsl:value-of select="string(.)"/>'
-        echo '   <xsl:text>&#x000A;</xsl:text>'
-        echo '  </xsl:if>'
-        echo ' </xsl:for-each>'
-        echo '</xsl:template>'
-        echo '</xsl:stylesheet>'
-    ) | xsltproc --xinclude - "$1" | sort | uniq | \
-        while read id url; do
-            colon=`echo "$url" | cut -d: -f1`
-            if [ "x$colon" = "x$url" ]; then
-                test -f "$base/"$(urldecode "$url") || echo "$sdir$id: $url"
-            else
-                status=$(cat "$check_href_cache" | while read trystatus tryurl; do
-                                if [ "x$tryurl" = "x$url" ]; then echo "$trystatus"; break; fi
-                            done)
-                if [ "x$status" = "x1" ]; then
-                    true
-                elif [ "x$status" = "x0" ]; then
-                    echo "$sdir$id: $url"
-                else
-                    (curl -s -I -L "$url" | \
-                            grep '^HTTP/' | tail -n 1 | head -n 1 | \
-                            grep -q 'HTTP/.\.. 200 .*') \
-                        && (echo "1 $url" >> "$check_href_cache") \
-                        || (echo "0 $url" >> "$check_href_cache"; echo "$sdir$id: $url")
-                fi
-            fi
-        done
-}
-
-yelp_hrefs () {
-    if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then
-        yelp_usage_hrefs
-        exit 1
-    fi
-    while [ "$#" != "0" ]; do
-        case "$1" in
-            "-s")
-                check_site="1"
-                check_site_root=$(pwd)
-                shift
-                ;;
-            *)
-                break
-                ;;
-        esac
-    done
-    if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then
-        yelp_usage_hrefs
-        exit 1
-    fi
-    check_out_file=`mktemp "${TMPDIR:-/tmp}"/yelp-XXXXXXXX`
-    check_href_cache=`mktemp "${TMPDIR:-/tmp}"/yelp-XXXXXXXX`
-    echo > "$check_href_cache"
-    check_db=yelp_hrefs_page
-    check_page=yelp_hrefs_page
-    yelp_check_iter_args "$@" > "$check_out_file"
-    yelp_check_retval=$(wc -l < "$check_out_file")
-    if test "x$yelp_check_retval" != "x0"; then
-        yelp_check_retval=1
-    fi
-    cat "$check_out_file"
-    rm "$check_out_file"
-    rm "$check_href_cache"
-    exit $yelp_check_retval
-}
-
-yelp_ids_page () {
-    pageid=$(
-    (
-        echo '<xsl:stylesheet'
-        echo ' xmlns:xsl="http://www.w3.org/1999/XSL/Transform"'
-        echo ' xmlns:mal="http://projectmallard.org/1.0/"'
-        echo ' version="1.0">'
-        echo '<xsl:output method="text"/>'
-        echo '<xsl:template match="/mal:page">'
-        echo '<xsl:value-of select="@id"/>'
-        echo '</xsl:template>'
-        echo '</xsl:stylesheet>'
-    ) | xsltproc --xinclude - "$1")
-    dname=$(dirname "$1")
-    bname=$(basename "$1")
-    if [ "x$pageid.page" != "x$bname" ]; then
-	if [ "x$check_site" = "x1" ]; then
-            sdir=$(cd $(dirname "$1") && pwd)
-            sdir=${sdir##${check_site_root}}/
-            echo $sdir$(basename "$1")": $pageid"
-	elif [ "x$dname" = 'x.' ]; then
-            echo "$bname: $pageid"
-        else
-            echo "$1: $pageid"
-        fi
-        yelp_check_retval=1
-    fi
-}
-
-yelp_ids () {
-    if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then
-        yelp_usage_ids
-        exit 1
-    fi
-    while [ "$#" != "0" ]; do
-        case "$1" in
-            "-s")
-                check_site="1"
-                check_site_root=$(pwd)
-                shift
-                ;;
-            *)
-                break
-                ;;
-        esac
-    done
-    if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then
-        yelp_usage_ids
-        exit 1
-    fi
-    check_db=
-    check_page=yelp_ids_page
-    yelp_check_iter_args "$@"
-    exit $yelp_check_retval
-}
-
-yelp_links_db () {
-    (
-        echo '<xsl:stylesheet'
-        echo ' xmlns:xsl="http://www.w3.org/1999/XSL/Transform"'
-        echo ' xmlns:db="http://docbook.org/ns/docbook"'
-        echo ' xmlns:exsl="http://exslt.org/common"'
-        echo ' extension-element-prefixes="exsl"'
-        echo ' version="1.0">'
-        echo '<xsl:output method="text"/>'
-        echo '<xsl:key name="idkey" match="*[@id or @xml:id]" use="@id | @xml:id"/>'
-        echo '<xsl:template match="/">'
-        echo ' <xsl:for-each select="//*[@linkend]">'
-        echo '  <xsl:if test="not(key('"'idkey'"', @linkend))">'
-        echo '   <xsl:value-of select="(ancestor-or-self::*/@id | ancestor-or-self::*/@xml:id)[last()]"/>'
-        echo '   <xsl:text>: </xsl:text>'
-        echo '   <xsl:value-of select="@linkend"/>'
-        echo '   <xsl:text>&#x000A;</xsl:text>'
-        echo '  </xsl:if>'
-        echo ' </xsl:for-each>'
-        echo '</xsl:template>'
-        echo '</xsl:stylesheet>'
-    ) | xsltproc --xinclude - "$1"
-}
-
-yelp_links_page () {
-    if [ "x$check_site" = "x1" ]; then
-        sdir=$(cd $(dirname "$1") && pwd)
-        sdir=${sdir##${check_site_root}}/
-    fi
-    (
-        echo '<xsl:stylesheet'
-        echo ' xmlns:xsl="http://www.w3.org/1999/XSL/Transform"'
-        echo ' xmlns:mal="http://projectmallard.org/1.0/"'
-        echo ' xmlns:site="http://projectmallard.org/site/1.0/"'
-        echo ' xmlns:exsl="http://exslt.org/common"'
-        echo ' extension-element-prefixes="exsl"'
-        echo ' version="1.0">'
-        xsl='file://'`urlencode "$xsl_mal_link"`
-        echo '<xsl:import href="'"$xsl"'"/>'
-        check_cache_url='file://'`urlencode "$check_cache_file"`
-        echo '<xsl:param name="mal.cache.file" select="'"'$check_cache_url'"'"/>'
-        echo '<xsl:variable name="site.dir" select="'"'$sdir'"'"/>'
-        echo '<xsl:output method="text"/>'
-        echo '<xsl:key name="__site.cache.key" match="mal:page | mal:section"'
-        echo '         use="concat(ancestor-or-self::mal:page/@site:dir, @id)"/>'
-        echo '<xsl:template match="/mal:page">'
-        echo ' <xsl:variable name="page" select="@id"/>'
-        if [ "x$check_links_ignore" = "x1" ]; then
-            echo ' <xsl:for-each select="//*[@xref][not(@href)]">'
-        else
-            echo ' <xsl:for-each select="//*[@xref]">'
-        fi
-        echo '  <xsl:variable name="xref" select="@xref"/>'
-        echo '  <xsl:variable name="linkid">'
-        echo '   <xsl:call-template name="mal.link.xref.linkid"/>'
-        echo '  </xsl:variable>'
-        echo '  <xsl:for-each select="$mal.cache">'
-        echo '   <xsl:if test="count(key('"'mal.cache.key'"', $linkid) | '
-        echo '                       key('"'__site.cache.key'"', $linkid)) = 0">'
-        echo '    <xsl:value-of select="$site.dir"/>'
-        echo '    <xsl:value-of select="$page"/>'
-        echo '    <xsl:text>: </xsl:text>'
-        echo '    <xsl:value-of select="$xref"/>'
-        echo '    <xsl:text>&#x000A;</xsl:text>'
-        echo '   </xsl:if>'
-        echo '  </xsl:for-each>'
-        echo ' </xsl:for-each>'
-        echo '</xsl:template>'
-        echo '</xsl:stylesheet>'
-    ) | xsltproc --xinclude - "$1"
-}
-
-yelp_links () {
-    if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then
-        yelp_usage_links
-        exit 1
-    fi
-    while [ "$#" != "0" ]; do
-        case "$1" in
-            "-c")
-                shift
-                check_cache_file="$1"
-                shift
-                ;;
-            "-s")
-                check_site="1"
-                check_site_root=$(pwd)
-                shift
-                ;;
-            "-i")
-                shift
-                check_links_ignore="1"
-                ;;
-            *)
-                break
-                ;;
-        esac
-    done
-    if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then
-        yelp_usage_links
-        exit 1
-    fi
-    if [ "x$check_cache_file" != "x" ]; then
-        check_cache_dir=$(dirname "$check_cache_file")
-        check_cache_dir=$(cd "$check_cache_dir" && pwd)
-        check_cache_file="$check_cache_dir/"$(basename "$check_cache_file")
-    elif [ -d "$1" ]; then
-        check_cache_file=1
-    else
-        case "$1" in
-            *.page | *.stub | *.cache)
-                check_cache_file=1
-                ;;
-            *)
-                break
-                ;;
-        esac
-    fi
-    if [ "x$check_cache_file" = "x1" ]; then
-        check_cache_file_is_tmp="yes"
-        check_cache_file=$(mktemp "${TMPDIR:-/tmp}"/yelp-XXXXXXXX)
-        if [ "x$check_site" = "x1" ]; then
-            yelp-build cache -s -o "$check_cache_file" "$@"
-        else
-            yelp-build cache -o "$check_cache_file" "$@"
-        fi
-    fi
-
-    check_out_file=`mktemp "${TMPDIR:-/tmp}"/yelp-XXXXXXXX`
-    check_db=yelp_links_db
-    check_page=yelp_links_page
-    yelp_check_iter_args "$@" > "$check_out_file"
-    yelp_check_retval=$(wc -l < "$check_out_file")
-    if test "x$yelp_check_retval" != "x0"; then
-        yelp_check_retval=1
-    fi
-    cat "$check_out_file"
-    rm "$check_out_file"
-    if [ "x$check_cache_file_is_tmp" = "xyes" ]; then
-        rm "$check_cache_file"
-    fi
-    exit $yelp_check_retval
-}
-
-yelp_media_page () {
-    ext=$(echo "$1" | sed -e 's/.*\.//')
-    bname=$(basename "$1" ".$ext")
-    dname=$(dirname "$1")
-    if [ "x$dname" = "x." ]; then
-        dname=""
-    else
-        dname="$dname"/
-    fi;
-    if [ "x$check_site" = "x1" ]; then
-        sdir=$(cd "$dname" && pwd)
-        sdir=${sdir##${check_site_root}}/
-    else
-        sdir="$dname"
-    fi
-    xsltproc "$xsl_media" "$1" | \
-        sort | uniq | \
-        while read line; do
-            src=$(urldecode "$line")
-            if [ ! -f "$dname$src" ]; then
-                echo "$sdir$bname: $line"
-            fi
-        done
-}
-
-yelp_media () {
-    if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then
-        yelp_usage_media
-        exit 1
-    fi
-    while [ "$#" != "0" ]; do
-        case "$1" in
-            "-s")
-                check_site="1"
-                check_site_root=$(pwd)
-                shift
-                ;;
-            *)
-                break
-                ;;
-        esac
-    done
-    if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then
-        yelp_usage_media
-        exit 1
-    fi
-    check_out_file=`mktemp "${TMPDIR:-/tmp}"/yelp-XXXXXXXX`
-    check_db=yelp_media_page
-    check_page=yelp_media_page
-    yelp_check_iter_args "$@" > "$check_out_file"
-    yelp_check_retval=$(wc -l < "$check_out_file")
-    if test "x$yelp_check_retval" != "x0"; then
-        yelp_check_retval=1
-    fi
-    cat "$check_out_file"
-    rm "$check_out_file"
-    exit $yelp_check_retval
-}
-
-yelp_orphans_page () {
-    if [ "x$check_site" = "x1" ]; then
-        sdir=$(cd $(dirname "$1") && pwd)
-        sdir=${sdir##${check_site_root}}/
-    fi
-    (
-        echo '<xsl:stylesheet'
-        echo ' xmlns:xsl="http://www.w3.org/1999/XSL/Transform"'
-        echo ' xmlns:mal="http://projectmallard.org/1.0/"'
-        echo ' xmlns:exsl="http://exslt.org/common"'
-        echo ' extension-element-prefixes="exsl"'
-        echo ' version="1.0">'
-        xsl='file://'`urlencode "$xsl_mal_link"`
-        echo '<xsl:import href="'"$xsl"'"/>'
-        check_cache_url='file://'`urlencode "$check_cache_file"`
-        echo '<xsl:param name="mal.cache.file" select="'"'$check_cache_url'"'"/>'
-        echo '<xsl:variable name="site.dir" select="'"'$sdir'"'"/>'
-        echo '<xsl:output method="text"/>'
-        echo '<xsl:template match="/mal:page">'
-        echo ' <xsl:variable name="trails">'
-        echo '  <xsl:call-template name="mal.link.linktrails"/>'
-        echo ' </xsl:variable>'
-        echo ' <xsl:if test="@id != '"'index'"' and count(exsl:node-set($trails)/*) = 0">'
-        echo '  <xsl:value-of select="$site.dir"/>'
-        echo '  <xsl:value-of select="@id"/>'
-        echo '  <xsl:text>&#x000A;</xsl:text>'
-        echo ' </xsl:if>'
-        echo '</xsl:template>'
-        echo '</xsl:stylesheet>'
-    ) | xsltproc --xinclude - "$1"
-}
-
-yelp_orphans () {
-    if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then
-        yelp_usage_orphans
-        exit 1
-    fi
-    while [ "$#" != "0" ]; do
-        case "$1" in
-            "-s")
-                check_site="1"
-                check_site_root=$(pwd)
-                shift
-                ;;
-            "-c")
-                shift
-                check_cache_file="$1"
-                shift
-                ;;
-            *)
-                break
-                ;;
-        esac
-    done
-    if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then
-        yelp_usage_orphans
-        exit 1
-    fi
-    if [ "x$check_cache_file" != "x" ]; then
-        check_cache_dir=$(dirname "$check_cache_file")
-        check_cache_dir=$(cd "$check_cache_dir" && pwd)
-        check_cache_file="$check_cache_dir/"$(basename "$check_cache_file")
-    elif [ -d "$1" ]; then
-        check_cache_file=1
-    else
-        case "$1" in
-            *.page | *.stub | *.cache)
-                check_cache_file=1
-                ;;
-            *)
-                break
-                ;;
-        esac
-    fi
-    if [ "x$check_cache_file" = "x1" ]; then
-        check_cache_file_is_tmp="yes"
-        check_cache_file=$(mktemp "${TMPDIR:-/tmp}"/yelp-XXXXXXXX)
-        if [ "x$check_site" = "x1" ]; then
-            yelp-build cache -s -o "$check_cache_file" "$@"
-        else
-            yelp-build cache -o "$check_cache_file" "$@"
-        fi
-    fi
-
-    check_out_file=`mktemp "${TMPDIR:-/tmp}"/yelp-XXXXXXXX`
-    check_db=
-    check_page=yelp_orphans_page
-    yelp_check_iter_args "$@" > "$check_out_file"
-    yelp_check_retval=$(wc -l < "$check_out_file")
-    if test "x$yelp_check_retval" != "x0"; then
-        yelp_check_retval=1
-    fi
-    cat "$check_out_file"
-    rm "$check_out_file"
-    if [ "x$check_cache_file_is_tmp" = "xyes" ]; then
-        rm "$check_cache_file"
-    fi
-    exit $yelp_check_retval
-}
-
-yelp_comments_page () {
-    ext=$(echo "$1" | sed -e 's/.*\.//')
-    bname=$(basename "$1" ".$ext")
-    if [ "x$check_site" = "x1" ]; then
-        sdir=$(cd $(dirname "$1") && pwd)
-        sdir=${sdir##${check_site_root}}/
-    fi
-    xsltproc --stringparam basename "$bname" \
-             --stringparam site.dir "$sdir" \
-             "$xsl_comments" "$1"
-}
-
-yelp_comments () {
-    if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then
-        yelp_usage_comments
-        exit 1
-    fi
-    while [ "$#" != "0" ]; do
-        case "$1" in
-            "-s")
-                check_site="1"
-                check_site_root=$(pwd)
-                shift
-                ;;
-            *)
-                break
-                ;;
-        esac
-    done
-    if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then
-        yelp_usage_comments
-        exit 1
-    fi
-    check_db=yelp_comments_page
-    check_page=yelp_comments_page
-    yelp_check_iter_args "$@"
-    exit $yelp_check_retval
-}
-
-yelp_license_page () {
-    if [ "x$check_site" = "x1" ]; then
-        sdir=$(cd $(dirname "$1") && pwd)
-        sdir=${sdir##${check_site_root}}/
-    fi
-    xsltproc --xinclude \
-        --stringparam only "$check_only" \
-        --stringparam except "$check_except" \
-        --stringparam totals "$check_totals" \
-        --stringparam site.dir "$sdir" \
-        "$xsl_mal_license" "$1"
-}
-
-yelp_license () {
-    if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then
-        yelp_usage_license
-        exit 1
-    fi
-    while [ "$#" != "0" ]; do
-        case "$1" in
-            "-s")
-                check_site="1"
-                check_site_root=$(pwd)
-                shift
-                ;;
-            "--only")
-                shift
-                check_only="$1"
-                shift
-                ;;
-            "--except")
-                shift
-                check_except="$1"
-                shift
-                ;;
-            "--totals")
-                check_totals="1"
-                shift
-                ;;
-            *)
-                break
-                ;;
-        esac
-    done
-    if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then
-        yelp_usage_license
-        exit 1
-    fi
-    check_db=
-    check_page=yelp_license_page
-    if [ "x$check_totals" = "x1" ]; then
-        yelp_check_iter_args "$@" | \
-            sort | uniq -c | sed -e 's/^ *//' | awk '{print $2 ": " $1}'
-    else
-        yelp_check_iter_args "$@" | sort
-    fi
-}
-
-yelp_style_page () {
-    ext=$(echo "$1" | sed -e 's/.*\.//')
-    bname=$(basename "$1" ".$ext")
-    if [ "x$check_site" = "x1" ]; then
-        sdir=$(cd $(dirname "$1") && pwd)
-        sdir=${sdir##${check_site_root}}/
-    fi
-    style=$(echo "$mallard_style" | xsltproc - "$1")
-    output=1
-    if [ "x$check_only_defined" = "x1" ]; then
-        output=0
-        if [ "x$check_only" = "x" ]; then
-            # We treat an empty --only '' as requesting pages with no style
-            if [ "x$style" = "x" ]; then output=1; fi
-        else
-            for pstyle in "$style"; do
-                for sstyle in $(echo "$check_only" | sed -e 's/,/ /g'); do
-                    if [ "$pstyle" = "$sstyle" ]; then
-                        output=1
+
+def _stringify(el):
+    ret = el.text or ''
+    for ch in el:
+        ret = ret + _stringify(ch)
+    if el.tail is not None:
+        ret = ret + el.tail
+    return ret
+
+def get_format(node):
+    ns = lxml.etree.QName(node).namespace
+    if ns in (NAMESPACES['mal'], NAMESPACES['cache']):
+        return 'mallard'
+    elif ns == NAMESPACES['db']:
+        return 'docbook5'
+    elif ns is None:
+        # For now, just assume no ns means docbook4
+        return 'docbook4'
+    else:
+        return None
+
+class InputFile:
+    def __init__(self, filepath, filename, sitedir=None):
+        self.filepath = filepath
+        self.filename = filename
+        self.absfile = os.path.join(filepath, filename)
+        self.absdir = os.path.dirname(self.absfile)
+        self.sitedir = sitedir or ''
+        self.sitefilename = self.sitedir + self.filename
+
+
+class Checker:
+    name = None
+    desc = None
+    blurb = None
+    formats = []
+    arguments = []
+    postblurb = None
+    xinclude = True
+    config = None
+
+    def __init__(self, yelpcheck):
+        self.yelpcheck = yelpcheck
+        self.options = {}
+        self.fileargs = []
+        self.tmpdir = None
+
+    def __del__(self):
+        if self.tmpdir is not None:
+            shutil.rmtree(self.tmpdir)
+            self.tmpdir = None
+
+    def parse_args(self, args):
+        while len(args) > 0:
+            argdef = None
+            if args[0].startswith('--'):
+                for arg_ in self.arguments:
+                    if args[0] == '--' + arg_[0]:
+                        argdef = arg_
+                        break
+                if argdef is None:
+                    self.print_help()
+                    return 1
+            elif args[0].startswith('-'):
+                for arg_ in self.arguments:
+                    if args[0] == arg_[1]:
+                        argdef = arg_
+                        break
+                if argdef is None:
+                    self.print_help()
+                    return 1
+            if argdef is not None:
+                takesarg = (argdef[2] is not None)
+                if takesarg:
+                    if len(args) < 2:
+                        self.print_help()
+                        return 1
+                    self.options.setdefault(argdef[0], [])
+                    self.options[argdef[0]].append(args[1])
+                    args = args[2:]
+                else:
+                    self.options[argdef[0]] = True
+                    args = args[1:]
+            else:
+                self.fileargs.append(args[0])
+                args = args[1:]
+        cfgfile = None
+        if len(self.fileargs) > 0:
+            cfgfile = os.path.join(os.path.dirname(self.fileargs[0]), '.yelp-tools.cfg')
+            if not os.path.exists(cfgfile):
+                cfgfile = None
+        if cfgfile is None:
+            cfgfile = os.path.join(os.getcwd(), '.yelp-tools.cfg')
+        if os.path.exists(cfgfile):
+            self.config = configparser.ConfigParser()
+            try:
+                self.config.read(cfgfile)
+            except Exception as e:
+                print(e, file=sys.stderr)
+                sys.exit(1)
+        return 0
+
+    def get_option_bool(self, arg):
+        if arg in self.options:
+            return self.options[arg] == True
+        if self.config is not None:
+            val = self.config.get('check:' + self.name, arg, fallback=None)
+            if val is not None:
+                return (val == 'true')
+            val = self.config.get('check', arg, fallback=None)
+            if val is not None:
+                return (val == 'true')
+            val = self.config.get('default', arg, fallback=None)
+            if val is not None:
+                return (val == 'true')
+        return False
+
+    def get_option_str(self, arg):
+        if arg in self.options:
+            if isinstance(self.options[arg], list):
+                return self.options[arg][-1]
+        if self.config is not None:
+            val = self.config.get('check:' + self.name, arg, fallback=None)
+            if val is not None:
+                return val
+            val = self.config.get('check', arg, fallback=None)
+            if val is not None:
+                return val
+            val = self.config.get('default', arg, fallback=None)
+            if val is not None:
+                return val
+        return None
+
+    def get_option_list(self, arg):
+        if arg in self.options:
+            if isinstance(self.options[arg], list):
+                ret = []
+                for opt in self.options[arg]:
+                    ret.extend(opt.replace(',', ' ').split())
+                return ret
+        if self.config is not None:
+            val = self.config.get('check:' + self.name, arg, fallback=None)
+            if val is not None:
+                return val.replace(',', ' ').split()
+            val = self.config.get('check', arg, fallback=None)
+            if val is not None:
+                return val.replace(',', ' ').split()
+            val = self.config.get('default', arg, fallback=None)
+            if val is not None:
+                return val.replace(',', ' ').split()
+        return None
+
+    def iter_files(self, sitedir=None):
+        issite = self.get_option_bool('site')
+        if len(self.fileargs) == 0:
+            self.fileargs.append('.')
+        for filearg in self.fileargs:
+            if os.path.isdir(filearg):
+                if issite:
+                    for infile in self.iter_site(filearg, '/'):
+                        yield infile
+                else:
+                    for fname in os.listdir(filearg):
+                        if fname.endswith('.page'):
+                            yield InputFile(filearg, fname)
+            else:
+                if issite:
+                    # FIXME: should do some normalization here, I guess.
+                    # It's hard to get this perfect without a defined start dir
+                    yield InputFile(os.getcwd(), filearg, '/' + os.path.dirname(filearg))
+                else:
+                    yield InputFile(os.getcwd(), filearg)
+
+    def iter_site(self, filepath, sitedir):
+        for fname in os.listdir(filepath):
+            newpath = os.path.join(filepath, fname)
+            if os.path.isdir(newpath):
+                # FIXME https://github.com/projectmallard/pintail/issues/36
+                if fname == '__pintail__':
+                    continue
+                for infile in self.iter_site(newpath, sitedir + fname + '/'):
+                    yield infile
+            elif fname.endswith('.page'):
+                yield InputFile(filepath, fname, sitedir)
+
+    def get_xml(self, xmlfile):
+        # FIXME: we can cache these if we add a feature to run multiple
+        # checkers at once
+        tree = lxml.etree.parse(xmlfile.absfile)
+        if self.xinclude:
+            lxml.etree.XInclude()(tree.getroot())
+        return tree
+
+    def create_tmpdir(self):
+        if self.tmpdir is None:
+            self.tmpdir = tempfile.mkdtemp()
+
+    def print_help(self):
+        print('Usage:   yelp-check ' + self.name + ' [OPTIONS] [FILES]')
+        print('Formats: ' + ' '.join(self.formats) + '\n')
+        #FIXME: prettify names of formats
+        if self.blurb is not None:
+            print(self.blurb + '\n')
+        print('Options:')
+        maxarglen = 2
+        args = []
+        for arg in self.arguments:
+            argkey = '--' + arg[0]
+            if arg[1] is not None:
+                argkey = arg[1] + ', ' + argkey
+            if arg[2] is not None:
+                argkey = argkey + ' ' + arg[2]
+            args.append((argkey, arg[3]))
+        for arg in args:
+            maxarglen = max(maxarglen, len(arg[0]) + 1)
+        for arg in args:
+            print('  ' + (arg[0]).ljust(maxarglen) + '  ' + arg[1])
+        if self.postblurb is not None:
+            print(self.postblurb)
+
+    def main(self, args):
+        pass
+
+
+class HrefsChecker (Checker):
+    name = 'hrefs'
+    desc = 'Find broken external links in a document'
+    blurb = ('Find broken href links in FILES in a Mallard document, or\n' +
+             'broken ulink or XLink links in FILES in a DocBook document.')
+    formats = ['docbook4', 'docbook5', 'mallard']
+    arguments = [
+        ('help', '-h', None, 'Show this help and exit'),
+        ('site', '-s', None, 'Treat pages as belonging to a Mallard site'),
+        ('allow', None, 'URL', 'Allow URL or list of URLs without checking')
+    ]
+    postblurb = 'URL may be a comma- and/or space-separated list, or specified\nmultiple times.'
+
+    def main(self, args):
+        if self.parse_args(args) != 0:
+            return 1
+        if 'help' in self.options:
+            self.print_help()
+            return 0
+
+        # safelisting URLs that we use as identifiers
+        hrefs = {
+             'http://creativecommons.org/licenses/by-sa/3.0/': True,
+            'https://creativecommons.org/licenses/by-sa/3.0/': True,
+             'http://creativecommons.org/licenses/by-sa/3.0/us/': True,
+            'https://creativecommons.org/licenses/by-sa/3.0/us/': True
+        }
+        allow = self.get_option_list('allow')
+        if allow is not None:
+            for url in allow:
+                hrefs[url] = True
+        retcode = 0
+
+        for infile in self.iter_files():
+            xml = self.get_xml(infile)
+            for el in xml.xpath('//*[@href | @xlink:href | self::ulink/@url]',
+                                namespaces=NAMESPACES):
+                href = el.get('href', None)
+                if href is None:
+                    href = el.get('{www.w3.org/1999/xlink}href')
+                if href is None:
+                    href = el.get('url')
+                if href is None:
+                    continue
+                if href.startswith('mailto:'):
+                    continue
+                if href not in hrefs:
+                    try:
+                        req = urllib.request.urlopen(href)
+                        hrefs[href] = (req.status == 200)
+                    except Exception as e:
+                        hrefs[href] = False
+                if not hrefs[href]:
+                    retcode = 1
+                    print(infile.sitefilename + ': ' + href)
+
+        return retcode
+
+
+class IdsChecker (Checker):
+    name = 'ids'
+    desc = 'Find Mallard page IDs that do not match file names'
+    blurb = ('Find pages in a Mallard document whose page ID does not match\n' +
+             'the base file name of the page file.')
+    formats = ['mallard']
+    arguments = [
+        ('help', '-h', None, 'Show this help and exit'),
+        ('site', '-s', None, 'Treat pages as belonging to a Mallard site')
+    ]
+
+    def main(self, args):
+        if self.parse_args(args) != 0:
+            return 1
+        if 'help' in self.options:
+            self.print_help()
+            return 0
+
+        retcode = 0
+
+        for infile in self.iter_files():
+            xml = self.get_xml(infile)
+            isok = False
+            pageid = None
+            if infile.filename.endswith('.page'):
+                try:
+                    pageid = xml.getroot().get('id')
+                    isok = (pageid == os.path.basename(infile.filename)[:-5])
+                except:
+                    isok = False
+            if not isok:
+                retcode = 1
+                print(infile.sitefilename + ': ' + (pageid or ''))
+
+        return retcode
+
+
+class LinksChecker (Checker):
+    name = 'links'
+    desc = 'Find broken xref or linkend links in a document'
+    blurb = ('Find broken xref links in FILES in a Mallard document,\n' +
+             'or broken linkend links in FILES in a DocBook document.')
+    formats = ['docbook4', 'docbook5', 'mallard']
+    arguments = [
+        ('help', '-h', None, 'Show this help and exit'),
+        ('site', '-s', None, 'Treat pages as belonging to a Mallard site'),
+        ('cache', '-c', 'CACHE', 'Use the existing Mallard cache CACHE'),
+        ('ignore', '-i', None, 'Ignore xrefs where href is present')
+    ]
+
+    def __init__(self, yelpcheck):
+        super().__init__(yelpcheck)
+        self.idstoxrefs = {}
+        self.idstolinkends = {}
+
+    def _accumulate_mal(self, node, pageid, sectid, xrefs, sitedir=None):
+        thisid = node.get('id')
+        if thisid is not None:
+            if node.tag == '{' + NAMESPACES['mal'] + '}page':
+                pageid = thisid
+            else:
+                sectid = thisid
+        curid = pageid
+        ignore = self.get_option_bool('ignore')
+        if curid is not None:
+            if sectid is not None:
+                # id attrs in cache files are already fully formed
+                if '#' in sectid:
+                    curid = sectid
+                else:
+                    curid = curid + '#' + sectid
+            if sitedir is not None:
+                # id attrs in cache files already have sitedir prefixed
+                if curid[0] != '/':
+                    curid = sitedir + curid
+            self.idstoxrefs.setdefault(curid, [])
+            if xrefs:
+                xref = node.get('xref')
+                if xref is not None:
+                    if not (ignore and (node.get('href') is not None)):
+                        self.idstoxrefs[curid].append(xref)
+        for child in node:
+            self._accumulate_mal(child, pageid, sectid, xrefs, sitedir)
+
+    def _accumulate_db(self, node, nodeid):
+        thisid = node.get('id')
+        if thisid is None:
+            thisid = node.get(XML_ID)
+        if thisid is not None:
+            nodeid = thisid
+            self.idstolinkends.setdefault(nodeid, [])
+        if nodeid is not None:
+            linkend = node.get('linkend')
+            if linkend is not None:
+                self.idstolinkends[nodeid].append(linkend)
+        for child in node:
+            self._accumulate_db(child, nodeid)
+
+    def main(self, args):
+        if self.parse_args(args) != 0:
+            return 1
+        if 'help' in self.options:
+            self.print_help()
+            return 0
+
+        retcode = 0
+
+        cachefile = self.get_option_str('cache')
+        if cachefile is not None:
+            xml = self.get_xml(InputFile(os.getcwd(), cachefile))
+            self._accumulate_mal(xml.getroot(), None, None, False)
+
+        for infile in self.iter_files():
+            xml = self.get_xml(infile)
+            format = get_format(xml.getroot())
+            if format == 'mallard':
+                self._accumulate_mal(xml.getroot(), None, None, True, infile.sitedir)
+            elif format in ('docbook4', 'docbook5'):
+                # For DocBook, we assume each filearg is its own document, so
+                # we reset the dict each time and only check within the file.
+                # Note that XInclude and SYSTEM includes DO happen first.
+                self.idstolinkends = {}
+                self._accumulate_db(xml.getroot(), None)
+                for curid in self.idstolinkends:
+                    for linkend in self.idstolinkends[curid]:
+                        if linkend not in self.idstolinkends:
+                            print(curid + ': ' + linkend)
+                            retcode = 1
+
+        for curid in self.idstoxrefs:
+            for xref in self.idstoxrefs[curid]:
+                checkref = xref
+                if checkref[0] == '#':
+                    checkref = curid.split('#')[0] + checkref
+                if curid[0] == '/' and checkref[0] != '/':
+                    checkref = curid[:curid.rfind('/')+1] + checkref
+                if checkref not in self.idstoxrefs:
+                    print(curid + ': ' + xref)
+                    retcode = 1
+
+        return retcode
+
+
+class MediaChecker (Checker):
+    name = 'media'
+    desc = 'Find broken references to media files'
+    blurb = ('Find broken references to media files. In Mallard, this\n' +
+             'checks media and thumb elements. In DocBook, this checks\n' +
+             'audiodata, imagedata, and videodata elements.')
+    formats = ['docbook4', 'docbook5', 'mallard']
+    arguments = [
+        ('help', '-h', None, 'Show this help and exit'),
+        ('site', '-s', None, 'Treat pages as belonging to a Mallard site')
+    ]
+
+    def main(self, args):
+        if self.parse_args(args) != 0:
+            return 1
+        if 'help' in self.options:
+            self.print_help()
+            return 0
+
+        retcode = 0
+
+        for infile in self.iter_files():
+            xml = self.get_xml(infile)
+            format = get_format(xml.getroot())
+            srcs = []
+            if format == 'mallard':
+                for el in xml.xpath('//mal:media[@src] | //uix:thumb | //ui:thumb | //e:mouseover',
+                                    namespaces=NAMESPACES):
+                    srcs.append(el.get('src'))
+            elif format == 'docbook5':
+                # FIXME: do we care about entityref?
+                for el in xml.xpath('//db:audiodata | //db:imagedata | //db:videodata',
+                                    namespaces=NAMESPACES):
+                    srcs.append(el.get('fileref'))
+            elif format == 'docbook4':
+                for el in xml.xpath('//audiodata | //imagedata | //videodata'):
+                    srcs.append(el.get('fileref'))
+            for src in srcs:
+                fsrc = os.path.join(infile.absdir, src)
+                if not os.path.exists(fsrc):
+                    print(infile.sitefilename + ': ' + src)
+                    retcode = 1
+
+        return retcode
+
+
+class OrphansChecker (Checker):
+    name = 'orphans'
+    desc = 'Find orphaned pages in a Mallard document'
+    blurb = ('Locate orphaned pages among FILES in a Mallard document.\n' +
+             'Orphaned pages are any pages that cannot be reached by\n' +
+             'topic links alone from the index page.')
+    formats = ['mallard']
+    arguments = [
+        ('help', '-h', None, 'Show this help and exit'),
+        ('site', '-s', None, 'Treat pages as belonging to a Mallard site'),
+        ('cache', '-c', 'CACHE', 'Use the existing Mallard cache CACHE')
+    ]
+
+    def __init__(self, yelpcheck):
+        super().__init__(yelpcheck)
+        self.guidelinks = {}
+        self.sitesubdirs = set()
+
+    def _collect_links(self, node, sitedir):
+        pageid = node.get('id')
+        if pageid[0] != '/':
+            # id attrs in cache files already have sitedir prefixed
+            pageid = sitedir + pageid
+        else:
+            sitedir = pageid[:pageid.rfind('/')+1]
+        self.guidelinks.setdefault(pageid, set())
+        # For the purposes of finding orphans, we'll just pretend that
+        # all links to or from sections are just to or from pages.
+        for el in node.xpath('//mal:info/mal:link[@type="guide"]',
+                             namespaces=NAMESPACES):
+            xref = el.get('xref')
+            if xref is None or xref == '':
+                continue
+            if xref[0] == '#':
+                continue
+            if '#' in xref:
+                xref = xref[:xref.find('#')]
+            if sitedir is not None and sitedir != '':
+                if xref[0] != '/':
+                    xref = sitedir + xref
+            self.guidelinks[pageid].add(xref)
+        for el in node.xpath('//mal:info/mal:link[@type="topic"]',
+                             namespaces=NAMESPACES):
+            xref = el.get('xref')
+            if xref is None or xref == '':
+                continue
+            if xref[0] == '#':
+                continue
+            if '#' in xref:
+                xref = xref[:xref.find('#')]
+            if sitedir is not None and sitedir != '':
+                if xref[0] != '/':
+                    xref = sitedir + xref
+            self.guidelinks.setdefault(xref, set())
+            self.guidelinks[xref].add(pageid)
+        for el in node.xpath('//mal:links[@type="site-subdirs" or @type="site:subdirs"]',
+                             namespaces=NAMESPACES):
+            self.sitesubdirs.add(pageid)
+
+    def main(self, args):
+        if self.parse_args(args) != 0:
+            return 1
+        if 'help' in self.options:
+            self.print_help()
+            return 0
+
+        retcode = 0
+
+        cachefile = self.get_option_str('cache')
+        if cachefile is not None:
+            xml = self.get_xml(InputFile(os.getcwd(), cachefile))
+            for page in xml.getroot():
+                if page.tag == '{' + NAMESPACES['mal'] + '}page':
+                    pageid = page.get('id')
+                    if pageid is None or pageid == '':
+                        continue
+                    self._collect_links(page, page.get('{http://projectmallard.org/site/1.0/}dir', ''))
+
+        pageids = set()
+        for infile in self.iter_files():
+            xml = self.get_xml(infile)
+            pageid = xml.getroot().get('id')
+            if pageid is None:
+                continue
+            pageids.add(infile.sitedir + pageid)
+            self._collect_links(xml.getroot(), infile.sitedir)
+
+        siteupdirs = {}
+        for pageid in self.sitesubdirs:
+            dirname = pageid[:pageid.rfind('/')+1]
+            for subid in self.guidelinks:
+                if subid.startswith(dirname):
+                    if subid.endswith('/index'):
+                        mid = subid[len(dirname):-6]
+                        if mid != '' and '/' not in mid:
+                            siteupdirs[subid] = pageid
+
+        if self.get_option_bool('site'):
+            okpages = set(['/index'])
+        else:
+            okpages = set(['index'])
+        for pageid in sorted(pageids):
+            if pageid in okpages:
+                isok = True
+            else:
+                isok = False
+                guides = [g for g in self.guidelinks[pageid]]
+                if pageid in siteupdirs:
+                    updir = siteupdirs[pageid]
+                    if updir not in guides:
+                        guides.append(updir)
+                cur = 0
+                while cur < len(guides):
+                    if guides[cur] in okpages:
+                        isok = True
                         break
-                    fi
-                done
-                if [ "x$output" = "x1" ]; then break; fi
-            done
-        fi
-    fi
-    if [ "x$check_except_defined" = "x1" ]; then
-        if [ "x$check_except" = "x" ]; then
-            # We treat an empty --except '' as excluding pages with no style
-            if [ "x$style" = "x" ]; then output=0; fi
-        else
-            for pstyle in "$style"; do
-                for sstyle in $(echo "$check_except" | sed -e 's/,/ /g'); do
-                    if [ "$pstyle" = "$sstyle" ]; then
-                        output=0
+                    if guides[cur] in self.guidelinks:
+                        for guide in self.guidelinks[guides[cur]]:
+                            if guide not in guides:
+                                guides.append(guide)
+                    cur += 1
+            if isok:
+                okpages.add(pageid)
+            else:
+                print(pageid)
+                retcode = 1
+
+        return retcode
+
+
+class ValidateChecker (Checker):
+    name = 'validate'
+    desc = 'Validate files against a DTD or RNG'
+    blurb = ('Validate FILES against the appropriate DTD or RNG.\n' +
+             'For Mallard pages, perform automatic RNG merging\n' +
+             'based on the version attribute.')
+    formats = ['docbook4', 'docbook5', 'mallard']
+    arguments = [
+        ('help', '-h', None, 'Show this help and exit'),
+        ('site', '-s', None, 'Treat pages as belonging to a Mallard site'),
+        ('strict', None, None, 'Disallow unknown namespaces'),
+        ('allow', None, 'NS', 'Explicitly allow namespace NS in strict mode'),
+        ('jing', None, None, 'Use jing instead of xmllint for RNG validation')
+    ]
+    postblurb = 'NS may be a comma- and/or space-separated list, or specified\nmultiple times.'
+
+    def main(self, args):
+        if self.parse_args(args) != 0:
+            return 1
+        if 'help' in self.options:
+            self.print_help()
+            return 0
+
+        retcode = 0
+
+        for infile in self.iter_files():
+            xml = self.get_xml(infile)
+            format = get_format(xml.getroot())
+            command = None
+            if format == 'mallard':
+                version = xml.getroot().get('version')
+                if version is None or version == '':
+                    tag = xml.getroot().tag
+                    if tag == '{' + NAMESPACES['mal'] + '}stack':
+                        # 1.2 isn't final yet as of 2020-01-09. Stacks will
+                        # likely be in 1.2, so we can assume at least that.
+                        version = '1.2'
+                    elif tag == '{' + NAMESPACES['cache'] + '}cache':
+                        version = 'cache/1.0'
+                    else:
+                        version = '1.0'
+                self.create_tmpdir()
+                rng = os.path.join(self.tmpdir,
+                                   version.replace('/', '__').replace(' ', '__'))
+                if not os.path.exists(rng):
+                    strict = 'true()' if self.get_option_bool('strict') else 'false()'
+                    allow = self.get_option_list('allow')
+                    if allow is None:
+                        allow = ''
+                    else:
+                        allow = ' '.join(allow)
+                    subprocess.call(['xsltproc', '-o', rng,
+                                    '--param', 'rng.strict', strict,
+                                    '--stringparam', 'rng.strict.allow', allow,
+                                    os.path.join(DATADIR, 'xslt', 'mal-rng.xsl'),
+                                    infile.absfile])
+                if self.get_option_bool('jing'):
+                    command = ['jing', '-i', rng, infile.filename]
+                else:
+                    command = ['xmllint', '--noout', '--xinclude', '--noent',
+                               '--relaxng', rng, infile.filename]
+            elif format == 'docbook4':
+                if xml.docinfo.doctype.startswith('<!DOCTYPE'):
+                    command = ['xmllint', '--noout', '--xinclude', '--noent',
+                               '--postvalid', infile.filename]
+                else:
+                    command = ['xmllint', '--noout', '--xinclude', '--noent',
+                               '--dtdvalid',
+                               'http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd',
+                               infile.filename]
+            elif format == 'docbook5':
+                version = xml.getroot().get('version')
+                if version is None or version == '':
+                    version = '5.0'
+                # Canonical URIs are http, but they 301 redirect to https. jing
+                # can handle https fine, but not the redirect. And jing doesn't
+                # look at catalogs. So just always feed jing an https URI.
+                rnghttp = 'http://docbook.org/xml/' + version + '/rng/docbook.rng'
+                rnghttps = 'https://docbook.org/xml/' + version + '/rng/docbook.rng'
+                if self.get_option_bool('jing'):
+                    command = ['jing', '-i', rnghttps, infile.filename]
+                else:
+                    # xmllint, on the other hand, does support catalogs. It also
+                    # doesn't do the redirect, but it wouldn't matter if it did
+                    # because it doesn't do https. So if the schema is available
+                    # locally in the catalog, hand xmllint the http URI so it
+                    # can use the local copy. Otherwise, we have to get curl
+                    # involved to do https.
+                    try:
+                        catfile = subprocess.check_output(['xmlcatalog',
+                                                           '/etc/xml/catalog',
+                                                           rnghttp],
+                                                          stderr=subprocess.DEVNULL,
+                                                          text=True)
+                        for catline in catfile.split('\n'):
+                            if catline.startswith('file://'):
+                                command = ['xmllint', '--noout', '--xinclude',  '--noent',
+                                           '--relaxng', rnghttp, infile.filename]
+                    except:
+                        pass
+                    if command is None:
+                        self.create_tmpdir()
+                        rngfile = os.path.join(self.tmpdir, 'docbook-' + version + '.rng')
+                        if not os.path.exists(rngfile):
+                            urllib.request.urlretrieve(rnghttps, rngfile)
+                        command = ['xmllint', '--noout', '--xinclude',  '--noent',
+                                   '--relaxng', rngfile, infile.filename]
+            if command is not None:
+                try:
+                    subprocess.check_output(command,
+                                            cwd=infile.filepath,
+                                            stderr=subprocess.STDOUT,
+                                            text=True)
+                except subprocess.CalledProcessError as e:
+                    retcode = e.returncode
+                    print(e.output)
+            else:
+                retcode = 1
+
+        return retcode
+
+
+class CommentsChecker (Checker):
+    name = 'comments'
+    desc = 'Print the editorial comments in a document'
+    blurb = ('Print the editorial comments in the files FILES, using the\n' +
+             'comment element in Mallard and the remark element in DocBook.')
+    formats = ['docbook4', 'docbook5', 'mallard']
+    arguments = [
+        ('help', '-h', None, 'Show this help and exit'),
+        ('site', '-s', None, 'Treat pages as belonging to a Mallard site')
+    ]
+
+    def main(self, args):
+        if self.parse_args(args) != 0:
+            return 1
+        if 'help' in self.options:
+            self.print_help()
+            return 0
+
+        for infile in self.iter_files():
+            xml = self.get_xml(infile)
+            format = get_format(xml.getroot())
+            if format == 'mallard':
+                for el in xml.xpath('//mal:comment', namespaces=NAMESPACES):
+                    thisid = xml.getroot().get('id')
+                    par = el
+                    while par is not None:
+                        if par.tag == '{' + NAMESPACES['mal'] + '}section':
+                            sectid = par.get('id')
+                            if sectid is not None:
+                                thisid = thisid + '#' + sectid
+                                break
+                        par = par.getparent()
+                    print('Page:  ' + infile.sitedir + thisid)
+                    for ch in el.xpath('mal:cite[1]', namespaces=NAMESPACES):
+                        name = _stringify(ch).strip()
+                        href = ch.get('href')
+                        if href is not None and href.startswith('mailto:'):
+                            name = name + ' <' + href[7:] + '>'
+                        print('From:  ' + name)
+                        date = ch.get('date')
+                        if date is not None:
+                            print('Date:  ' + date)
+                    print('')
+                    for ch in el:
+                        if isinstance(ch, lxml.etree._ProcessingInstruction):
+                            continue
+                        elif ch.tag == '{' + NAMESPACES['mal'] + '}cite':
+                            continue
+                        elif ch.tag in ('{' + NAMESPACES['mal'] + '}p',
+                                        '{' + NAMESPACES['mal'] + '}title'):
+                            for s in _stringify(ch).strip().split('\n'):
+                                print('  ' + s.strip())
+                            print('')
+                        else:
+                            name = lxml.etree.QName(ch).localname
+                            print('  <' + name + '>...</' + name + '>\n')
+            elif format in ('docbook4', 'docbook5'):
+                if format == 'docbook4':
+                    dbxpath = '//remark'
+                else:
+                    dbxpath = '//db:remark'
+                for el in xml.xpath(dbxpath, namespaces=NAMESPACES):
+                    thisid = infile.filename
+                    par = el
+                    while par is not None:
+                        sectid = par.get('id')
+                        if sectid is None:
+                            sectid = par.get(XML_ID)
+                        if sectid is not None:
+                            thisid = thisid + '#' + sectid
+                            break
+                        par = par.getparent()
+                    print('Page:  ' + thisid)
+                    flag = el.get('revisionflag')
+                    if flag is not None:
+                        print('Flag:  ' + flag)
+                    print('')
+                    for s in _stringify(el).strip().split('\n'):
+                        print('  ' + s.strip())
+                    print('')
+
+        return 0
+
+
+class LicenseChecker (Checker):
+    name = 'license'
+    desc = 'Report the license of Mallard pages'
+    blurb = ('Report the license of the Mallard page files FILES. Each\n' +
+             'matching page is reporting along with its license, reported\n' +
+             'based on the href attribute of the license element. Common\n' +
+             'licenses use a shortened identifier. Pages with multiple\n' +
+             'licenses have the identifiers separated by spaces. Pages\n' +
+             'with no license element report \'none\'. Licenses with no\n' +
+             'href attribute are reported as \'unknown\'')
+    formats = ['mallard']
+    arguments = [
+        ('help', '-h', None, 'Show this help and exit'),
+        ('site', '-s', None, 'Treat pages as belonging to a Mallard site'),
+        ('only', None, 'LICENSES', 'Only show pages whose license is in LICENSES'),
+        ('except', None, 'LICENSES', 'Exclude pages whose license is in LICENSES'),
+        ('totals', None, None, 'Show total counts for each license')
+    ]
+    postblurb = 'LICENSES may be a comma- and/or space-separated list, or specified\nmultiple times.'
+
+    def get_license(self, href):
+        if href is None:
+            return 'unknown'
+        elif (href.startswith('http://creativecommons.org/licenses/') or
+              href.startswith('https://creativecommons.org/licenses/')):
+            return 'cc-' + '-'.join([x for x in href.split('/') if x][3:])
+        elif (href.startswith('http://www.gnu.org/licenses/') or
+              href.startswith('https://www.gnu.org/licenses/')):
+            return href.split('/')[-1].replace('.html', '')
+        else:
+            return 'unknown'
+
+    def main(self, args):
+        if self.parse_args(args) != 0:
+            return 1
+        if 'help' in self.options:
+            self.print_help()
+            return 0
+
+        totals = {}
+
+        for infile in self.iter_files():
+            xml = self.get_xml(infile)
+            thisid = xml.getroot().get('id') or infile.filename
+            licenses = []
+            for el in xml.xpath('/mal:page/mal:info/mal:license',
+                                namespaces=NAMESPACES):
+                licenses.append(self.get_license(el.get('href')))
+            if len(licenses) == 0:
+                licenses.append('none')
+
+            only = self.get_option_list('only')
+            if only is not None:
+                skip = True
+                for lic in licenses:
+                    if lic in only:
+                        skip = False
+                if skip:
+                    continue
+            cept = self.get_option_list('except')
+            if cept is not None:
+                skip = False
+                for lic in licenses:
+                    if lic in cept:
+                        skip = True
+                if skip:
+                    continue
+
+            if self.get_option_bool('totals'):
+                for lic in licenses:
+                    totals.setdefault(lic, 0)
+                    totals[lic] += 1
+            else:
+                print(infile.sitedir + thisid + ': ' + ' '.join(licenses))
+
+        if self.get_option_bool('totals'):
+            for lic in sorted(totals):
+                print(lic + ': ' + str(totals[lic]))
+
+        return 0
+
+
+class StatusChecker (Checker):
+    name = 'status'
+    desc = 'Report the status of Mallard pages'
+    blurb = ('Report the status of the Mallard page files FILES. Each\n' +
+             'matching page is reporting along with its status.')
+    formats = ['mallard']
+    arguments = [
+        ('help', '-h', None, 'Show this help and exit'),
+        ('site', '-s', None, 'Treat pages as belonging to a Mallard site'),
+        ('version',    None, 'VER', 'Select revisions with the version attribute VER'),
+        ('docversion', None, 'VER', 'Select revisions with the docversion attribute VER'),
+        ('pkgversion', None, 'VER', 'Select revisions with the pkgversion attribute VER'),
+        ('older',  None, 'DATE', 'Only show pages older than DATE'),
+        ('newer',  None, 'DATE', 'Only show pages newer than DATE'),
+        ('only',   None, 'STATUSES', 'Only show pages whose status is in STATUSES'),
+        ('except', None, 'STATUSES', 'Exclude pages whose status is in STATUSES'),
+        ('totals', None, None, 'Show total counts for each status')
+    ]
+    postblurb = 'VER and STATUSES may be comma- and/or space-separated lists, or specified\nmultiple times.'
+
+    def main(self, args):
+        if self.parse_args(args) != 0:
+            return 1
+        if 'help' in self.options:
+            self.print_help()
+            return 0
+
+        totals = {}
+
+        checks = []
+        ver = self.get_option_list('version')
+        if ver is not None:
+            checks.append(ver)
+        ver = self.get_option_list('docversion')
+        if ver is not None:
+            checks.append(['doc:' + v for v in ver])
+        ver = self.get_option_list('pkgversion')
+        if ver is not None:
+            checks.append(['pkg:' + v for v in ver])
+
+        for infile in self.iter_files():
+            xml = self.get_xml(infile)
+            pageid = xml.getroot().get('id')
+            bestrev = None
+            for rev in xml.xpath('/mal:page/mal:info/mal:revision', namespaces=NAMESPACES):
+                revversion = (rev.get('version') or '').split()
+                docversion = rev.get('docversion')
+                if docversion is not None:
+                    revversion.append('doc:' + docversion)
+                pkgversion = rev.get('pkgversion')
+                if pkgversion is not None:
+                    revversion.append('pkg:' + pkgversion)
+                revok = True
+                for check in checks:
+                    checkok = False
+                    for v in check:
+                        if v in revversion:
+                            checkok = True
+                            break
+                    if not checkok:
+                        revok = False
                         break
-                    fi
-                done
-                #if [ "x$output" = "x0" ]; then break; fi
-            done
-        fi
-    fi
-    if [ "x$output" = "x1" ]; then
-        echo "$sdir$bname: $style"
-    fi
-}
-
-yelp_style () {
-    if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then
-        yelp_usage_style
-        exit 1
-    fi
-    while [ "$#" != "0" ]; do
-        case "$1" in
-            "-s")
-                check_site="1"
-                check_site_root=$(pwd)
-                shift
-                ;;
-            "--only")
-                shift
-                check_only_defined=1
-                check_only="$1"
-                shift
-                ;;
-            "--except")
-                shift
-                check_except_defined=1
-                check_except="$1"
-                shift
-                ;;
-            "--totals")
-                check_totals="1"
-                shift
-                ;;
-            *)
-                break
-                ;;
-        esac
-    done
-    if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then
-        yelp_usage_style
-        exit 1
-    fi
-    check_db=
-    check_page=yelp_style_page
-    if [ "x$check_totals" = "x1" ]; then
-        yelp_check_iter_args "$@" | \
-            while read line; do
-                styles=$(echo "$line" | sed -e 's/^[^:]*://')
-                if [ "x$styles" = "x" ]; then
-                    echo ""
-                else
-                    for style in $styles; do 
-                        echo "$style"
-                    done
-                fi
-            done | \
-            sort | uniq -c | sed -e 's/^ *//' | awk '{print $2 ": " $1}'
-    else
-        yelp_check_iter_args "$@" | sort
-    fi
-}
-
-yelp_status () {
-    if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then
-        yelp_usage_status
-        exit 1
-    fi
-    while [ "$#" != "0" ]; do
-        case "$1" in
-            "-s")
-                check_site="1"
-                check_site_root=$(pwd)
-                shift
-                ;;
-            "--version")
-                shift
-                check_version="$1"
-                shift
-                ;;
-            "--docversion")
-                shift
-                check_docversion="$1"
-                shift
-                ;;
-            "--pkgversion")
-                shift
-                check_pkgversion="$1"
-                shift
-                ;;
-            "--older")
-                shift
-                check_older="$1"
-                shift
-                ;;
-            "--newer")
-                shift
-                check_newer="$1"
-                shift
-                ;;
-            "--only")
-                shift
-                check_only="$1"
-                shift
-                ;;
-            "--except")
-                shift
-                check_except="$1"
-                shift
-                ;;
-            "--totals")
-                check_totals="1"
-                shift
-                ;;
-            *)
-                break
-                ;;
-        esac
-    done
-    if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then
-        yelp_usage_status
-        exit 1
-    fi
-    check_cache_file=`mktemp "${TMPDIR:-/tmp}"/yelp-XXXXXXXX`
-    if [ "x$check_site" = "x1" ]; then
-        yelp-build cache -s -o "$check_cache_file" "$@"
-    else
-        yelp-build cache -o "$check_cache_file" "$@"
-    fi
-    xsltproc \
-        --stringparam version "$check_version" \
-        --stringparam docversion "$check_docversion" \
-        --stringparam pkgversion "$check_pkgversion" \
-        --stringparam newer "$check_newer" \
-        --stringparam older "$check_older" \
-        --stringparam only "$check_only" \
-        --stringparam except "$check_except" \
-        --stringparam totals "$check_totals" \
-        "$xsl_mal_status" "$check_cache_file"
-    rm "$check_cache_file"
-    return 0
-}
-
-yelp_validate_db () {
-    version=$(echo "$docbook_version" | xsltproc - "$1")
-    major=$(echo "$version" | cut -c1)
-    if [ "x$major" = "x5" ]; then
-        check_out_file=`mktemp "${TMPDIR:-/tmp}"/yelp-XXXXXXXX`
-        # Canonical URIs are http, but they 301 redirect to https. jing can handle
-        # https fine, but not the redirect. And jing doesn't look at catalogs. So
-        # just always feed jing an https URI.
-        if [ "x$check_jing" = "x1" ]; then
-            rng_uri="https://docbook.org/xml/$version/rng/docbook.rng"
-            jing -i "$rng_uri" "$1" > "$check_out_file" 2>&1
-        else
-            # xmllint, on the other hand, does support catalogs. It also doesn't
-            # do the redirect, but it wouldn't matter if it did because it doesn't
-            # do https. So if the schema is available locally in the catalog, hand
-            # xmllint the http URI so it can use the local copy. Otherwise, we have
-            # to get curl involved to do https.
-            rng_uri="http://docbook.org/xml/$version/rng/docbook.rng"
-            incat=$(xmlcatalog /etc/xml/catalog "$rng_uri" | grep -c '^file:')
-            if [ "x$incat" != "x0" ]; then
-                xmllint --noout --xinclude --noent --relaxng "$rng_uri" "$1" > "$check_out_file" 2>&1
-            else
-                rng_uri="https://docbook.org/xml/$version/rng/docbook.rng"
-                check_rng_file=`mktemp "${TMPDIR:-/tmp}"/yelp-XXXXXXXX`
-                curl -sL -o "$check_rng_file" "$rng_uri"
-                xmllint --noout --xinclude --noent --relaxng "$check_rng_file" "$1" > "$check_out_file" 2>&1
-                rm "$check_rng_file"
-            fi
-        fi
-        yelp_check_retval="$?"
-        cat "$check_out_file" | grep -v 'validates$'
-        rm "$check_out_file"
-    elif xmllint --nocdata "$1" | grep -q '<!DOCTYPE'; then
-        xmllint --noout --xinclude --noent --postvalid "$1" || yelp_check_retval="$?"
-    else
-        dtd_uri='http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd'
-        xmllint --noout --xinclude --noent --dtdvalid "$dtd_uri" "$1" || yelp_check_retval="$?"
-    fi
-}
-
-yelp_validate_page () {
-    # Using temp files because pipes create subshells, making it really
-    # hard to return the right exit status in a portable way.
-    if [ "x$check_rng_dir" = "x" ]; then
-        check_rng_dir=`mktemp -d "${TMPDIR:-/tmp}"/yelp-XXXXXXXX`
-    fi
-    check_out_file=`mktemp "${TMPDIR:-/tmp}"/yelp-XXXXXXXX`
-    check_rng_file=`(
-            echo '<xsl:stylesheet'
-            echo ' xmlns:cache="http://projectmallard.org/cache/1.0/"'
-            echo ' xmlns:xsl="http://www.w3.org/1999/XSL/Transform"'
-            echo ' version="1.0">'
-            echo '<xsl:output method="text"/>'
-            echo '<xsl:template match="/*">'
-            echo '<xsl:choose>'
-            echo '<xsl:when test="string(@version) != '"''"'">'
-            echo '<xsl:value-of select="@version"/>'
-            echo '</xsl:when>'
-            echo '<xsl:when test="/cache:cache">'
-            echo '<xsl:text>cache/1.0 1.0</xsl:text>'
-            echo '</xsl:when>'
-            echo '<xsl:otherwise>'
-            echo '<xsl:text>1.0</xsl:text>'
-            echo '</xsl:otherwise>'
-            echo '</xsl:choose>'
-            echo '</xsl:template>'
-            echo '</xsl:stylesheet>'
-            ) | xsltproc - "$1"`
-    check_rng_file=`urlencode "$check_rng_file" /`.rng
-    if [ ! -f "$check_rng_dir/$check_rng_file" ]; then
-        # If we've already made an RNG file for this version string, don't
-        # do it again. We've urlencoded the file name + slashes, because
-        # version strings often contain slashes. But xsltproc treats the
-        # -o option as a URL and urldecodes, so doubly urlencode, because
-        # we want the urlencoded string to be the on-disk name.
-        xsltproc -o "$check_rng_dir/"`urlencode "$check_rng_file"` \
-            --param rng.strict "$check_strict" \
-            --stringparam rng.strict.allow "$check_strict_allow" \
-            "$xsl_mal_rng" "$1"
-    fi
-    if [ "x$check_jing" = "x1" ]; then
-        jing -i "$check_rng_dir/$check_rng_file" "$1" > "$check_out_file" 2>&1
-    else
-        xmllint --noout --xinclude --noent --relaxng "$check_rng_dir/$check_rng_file" "$1" > "$check_out_file" 2>&1
-    fi
-    ret="$?"
-    cat "$check_out_file" | grep -v 'validates$'
-    rm "$check_out_file"
-    return $ret;
-}
-
-yelp_validate () {
-    if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then
-        yelp_usage_validate
-        exit 1
-    fi
-    check_strict="false()"
-    check_strict_allow=""
-    while [ "$#" != "0" ]; do
-        case "$1" in
-            "-s")
-                check_site="1"
-                check_site_root=$(pwd)
-                shift
-                ;;
-            "--strict")
-                check_strict="true()"
-                shift
-                ;;
-            "--allow")
-                shift
-                check_strict_allow="$check_strict_allow $1"
-                shift
-                ;;
-            "--jing")
-                check_jing="1"
-                shift
-                ;;
-            *)
-                break
-                ;;
-        esac
-    done
-    if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then
-        yelp_usage_validate
-        exit 1
-    fi
-    check_db=yelp_validate_db
-    check_page=yelp_validate_page
-    yelp_check_iter_args "$@"
-    if [ "x$check_rng_dir" != "x" ]; then
-        rm "$check_rng_dir"/*.rng
-        rmdir "$check_rng_dir"
-    fi
-    exit $yelp_check_retval
-}
-
-cmd="$1"
-shift
-case "x$cmd" in
-    "xcomments")
-        yelp_comments "$@"
-        ;;
-    "xhrefs")
-        yelp_hrefs "$@"
-        ;;
-    "xids")
-        yelp_ids "$@"
-        ;;
-    "xlinks")
-        yelp_links "$@"
-        ;;
-    "xorphans")
-        yelp_orphans "$@"
-        ;;
-    "xlicense")
-        yelp_license "$@"
-        ;;
-    "xmedia")
-        yelp_media "$@"
-        ;;
-    "xstatus")
-        yelp_status "$@"
-        ;;
-    "xstyle")
-        yelp_style "$@"
-        ;;
-    "xvalidate")
-        yelp_validate "$@"
-        ;;
-    *)
-        yelp_usage
-        ;;
-esac
+                if revok:
+                    if bestrev is None:
+                        bestrev = rev
+                        continue
+                    bestdate = bestrev.get('date')
+                    thisdate = rev.get('date')
+                    if bestdate is None:
+                        bestrev = rev
+                    elif thisdate is None:
+                        pass
+                    elif thisdate >= bestdate:
+                        bestrev = rev
+            if bestrev is not None:
+                status = bestrev.get('status') or 'none'
+                date = bestrev.get('date') or None
+            else:
+                status = 'none'
+                date = None
+            older = self.get_option_str('older')
+            if older is not None:
+                if date is None or date >= older:
+                    continue
+            newer = self.get_option_str('newer')
+            if newer is not None:
+                if date is None or date <= newer:
+                    continue
+            only = self.get_option_list('only')
+            if only is not None:
+                if status not in only:
+                    continue
+            cept = self.get_option_list('except')
+            if cept is not None:
+                if status in cept:
+                    continue
+            if self.get_option_bool('totals'):
+                totals.setdefault(status, 0)
+                totals[status] += 1
+            else:
+                print(infile.sitedir + pageid + ': ' + status)
+
+        if self.get_option_bool('totals'):
+            for st in sorted(totals):
+                print(st + ': ' + str(totals[st]))
+
+        return 0
+
+
+class StyleChecker (Checker):
+    name = 'style'
+    desc = 'Report the style attribute of Mallard pages'
+    blurb = ('Report the page style attribute of the Mallard page files\n' +
+             'FILES. Each matching page is reporting along with its status.')
+    formats = ['mallard']
+    arguments = [
+        ('help', '-h', None, 'Show this help and exit'),
+        ('site', '-s', None, 'Treat pages as belonging to a Mallard site'),
+        ('only',   None, 'STYLES', 'Only show pages whose style is in STATUSES'),
+        ('except', None, 'STYLES', 'Exclude pages whose style is in STATUSES'),
+        ('totals', None, None, 'Show total counts for each style')
+    ]
+    postblurb = 'STYLES may be comma- and/or space-separated lists, or specified\nmultiple times.'
+
+    def main(self, args):
+        if self.parse_args(args) != 0:
+            return 1
+        if 'help' in self.options:
+            self.print_help()
+            return 0
+
+        totals = {}
+
+        for infile in self.iter_files():
+            xml = self.get_xml(infile)
+            thisid = xml.getroot().get('id')
+            style = xml.getroot().get('style')
+            if style is None:
+                style = 'none'
+            styles = style.split()
+            # We'll set style to None if it doesn't meat the criteria
+            only = self.get_option_list('only')
+            if only is not None:
+                if len(only) == 0:
+                    # We treat a blank --only as requesting pages with no style
+                    if style != 'none':
+                        style = None
+                else:
+                    allow = False
+                    for st in styles:
+                        if st in only:
+                            allow = True
+                            break
+                    if not allow:
+                        style = None
+            cept = self.get_option_list('except')
+            if cept is not None:
+                for st in styles:
+                    if st in cept:
+                        style = None
+                        break
+            if self.get_option_bool('totals'):
+                if style is not None:
+                    for st in styles:
+                        totals.setdefault(st, 0)
+                        totals[st] += 1
+            else:
+                if style is not None:
+                    print(infile.sitedir + thisid + ': ' + style)
+
+        if self.get_option_bool('totals'):
+            for st in sorted(totals):
+                print(st + ': ' + str(totals[st]))
+
+        return 0
+
+
+class CustomChecker(Checker):
+    formats = ['docbook4', 'docbook5', 'mallard']
+    arguments = [
+        ('help', '-h', None, 'Show this help and exit'),
+        ('site', '-s', None, 'Treat pages as belonging to a Mallard site')
+    ]
+
+    def __init__(self, name, yelpcheck):
+        super().__init__(yelpcheck)
+        self.name = name
+
+    def main(self, args):
+        if self.parse_args(args) != 0:
+            return 1
+
+        sect = 'check:' + self.name
+        if self.config is None or (sect not in self.config.sections()):
+            print('Unrecognized command: ' + self.name, file=sys.stderr)
+            return 1
+        self.blurb = self.config.get(sect, 'blurb', fallback=None)
+        if self.blurb is not None:
+            self.blurb = '\n'.join(textwrap.wrap(self.blurb))
+
+        if 'help' in self.options:
+            self.print_help()
+            return 0
+
+        assertexpr = self.config.get(sect, 'assert', fallback=None)
+        if assertexpr is not None:
+            return self.run_assert(assertexpr)
+
+        print('No action found for command: ' + self.name, file=sys.stderr)
+        return 1
+
+    def run_assert(self, assertexpr):
+        sect = 'check:' + self.name
+        selectexpr = self.config.get(sect, 'select', fallback='/')
+        message = self.config.get(sect, 'message', fallback='Assertion failed')
+        self.xinclude = self.config.get(sect, 'xinclude', fallback='true') != 'false'
+
+        namespaces = {}
+        if 'namespaces' in self.config.sections():
+            for ns in self.config.options('namespaces'):
+                namespaces[ns] = self.config.get('namespaces', ns)
+
+        for infile in self.iter_files():
+            xml = self.get_xml(infile)
+            thisid = xml.getroot().get('id') or infile.filename
+            # FIXME check these expressions and give better errors
+            for root in xml.xpath(selectexpr, namespaces=namespaces):
+                if not bool(root.xpath(assertexpr, namespaces=namespaces)):
+                    print(infile.sitedir + thisid + ': ' + message)
+        # FIXME are these comments outdated? remove?
+        # check if self.config has section check:self.name
+        # check if section has select, assert, message
+
+
+class YelpCheck:
+    def __init__(self):
+        pass
+
+    def main(self):
+        if len(sys.argv) < 2:
+            self.print_usage()
+            return 1
+
+        checker = None
+        for cls in Checker.__subclasses__():
+            if sys.argv[1] == cls.name:
+                checker = cls(self)
+
+        if checker is None:
+            checker = CustomChecker(sys.argv[1], self)
+
+        return checker.main(sys.argv[2:])
+
+    def print_usage(self):
+        print('Usage: yelp-check <COMMAND> [OPTIONS] [FILES]')
+        namelen = 2
+        checks = []
+        reports = []
+        others = []
+        for cls in sorted(Checker.__subclasses__(), key=(lambda cls: cls.name or '')):
+            if cls is CustomChecker:
+                continue
+            namelen = max(namelen, len(cls.name) + 2)
+            if cls in (HrefsChecker, IdsChecker, LinksChecker,
+                       MediaChecker, OrphansChecker, ValidateChecker):
+                checks.append(cls)
+            elif cls in (CommentsChecker, LicenseChecker, StatusChecker,
+                         StyleChecker):
+                reports.append(cls)
+            else:
+                others.append(cls)
+        if len(checks) > 0:
+            print('\nCheck commands:')
+            for cls in checks:
+                print('  ' + cls.name.ljust(namelen) + cls.desc)
+        if len(reports) > 0:
+            print('\nReport commands:')
+            for cls in reports:
+                print('  ' + cls.name.ljust(namelen) + cls.desc)
+        if len(others) > 0:
+            print('\nOther commands:')
+            for cls in others:
+                print('  ' + cls.name.ljust(namelen) + cls.desc)
+        config = configparser.ConfigParser()
+        try:
+            config.read('.yelp-tools.cfg')
+        except:
+            return
+        customs = []
+        for sect in config.sections():
+            if sect.startswith('check:'):
+                name = sect[6:]
+                skip = False
+                for cls in Checker.__subclasses__():
+                    if name == cls.name:
+                        skip = True
+                        break
+                if skip:
+                    continue
+                if config.get(sect, 'assert', fallback=None) == None:
+                    continue
+                desc = config.get(sect, 'desc', fallback='')
+                namelen = max(namelen, len(name) + 2)
+                customs.append((name, desc))
+        if len(customs) > 0:
+            print('\nCustom commands:')
+            for name, desc in customs:
+                print('  ' + name.ljust(namelen) + desc)
+
+
+if __name__ == '__main__':
+    try:
+        sys.exit(YelpCheck().main())
+    except KeyboardInterrupt:
+        sys.exit(1)
diff --git a/tools/yelp-check.py b/tools/yelp-check.py
deleted file mode 100644
index a3eb486..0000000
--- a/tools/yelp-check.py
+++ /dev/null
@@ -1,1245 +0,0 @@
-#!/bin/python3
-#
-# yelp-check
-# Copyright (C) 2011-2020 Shaun McCance <shaunm@gnome.org>
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-
-import configparser
-import lxml.etree
-import os
-import sys
-import urllib.request
-import shutil
-import subprocess
-import tempfile
-import textwrap
-
-# FIXME: don't hardcode this
-DATADIR = '/usr/share/yelp-tools'
-
-XML_ID = '{http://www.w3.org/XML/1998/namespace}id'
-NAMESPACES = {
-    'mal':   'http://projectmallard.org/1.0/',
-    'cache': 'http://projectmallard.org/cache/1.0/',
-    'db':    'http://docbook.org/ns/docbook',
-    'e':     'http://projectmallard.org/experimental/',
-    'ui':    'http://projectmallard.org/ui/1.0/',
-    'uix':   'http://projectmallard.org/experimental/ui/',
-    'xlink': 'http://www.w3.org/1999/xlink'
-    }
-
-def _stringify(el):
-    ret = el.text or ''
-    for ch in el:
-        ret = ret + _stringify(ch)
-    if el.tail is not None:
-        ret = ret + el.tail
-    return ret
-
-def get_format(node):
-    ns = lxml.etree.QName(node).namespace
-    if ns in (NAMESPACES['mal'], NAMESPACES['cache']):
-        return 'mallard'
-    elif ns == NAMESPACES['db']:
-        return 'docbook5'
-    elif ns is None:
-        # For now, just assume no ns means docbook4
-        return 'docbook4'
-    else:
-        return None
-
-class InputFile:
-    def __init__(self, filepath, filename, sitedir=None):
-        self.filepath = filepath
-        self.filename = filename
-        self.absfile = os.path.join(filepath, filename)
-        self.absdir = os.path.dirname(self.absfile)
-        self.sitedir = sitedir or ''
-        self.sitefilename = self.sitedir + self.filename
-
-
-class Checker:
-    name = None
-    desc = None
-    blurb = None
-    formats = []
-    arguments = []
-    postblurb = None
-    xinclude = True
-    config = None
-
-    def __init__(self, yelpcheck):
-        self.yelpcheck = yelpcheck
-        self.options = {}
-        self.fileargs = []
-        self.tmpdir = None
-
-    def __del__(self):
-        if self.tmpdir is not None:
-            shutil.rmtree(self.tmpdir)
-            self.tmpdir = None
-
-    def parse_args(self, args):
-        while len(args) > 0:
-            argdef = None
-            if args[0].startswith('--'):
-                for arg_ in self.arguments:
-                    if args[0] == '--' + arg_[0]:
-                        argdef = arg_
-                        break
-                if argdef is None:
-                    self.print_help()
-                    return 1
-            elif args[0].startswith('-'):
-                for arg_ in self.arguments:
-                    if args[0] == arg_[1]:
-                        argdef = arg_
-                        break
-                if argdef is None:
-                    self.print_help()
-                    return 1
-            if argdef is not None:
-                takesarg = (argdef[2] is not None)
-                if takesarg:
-                    if len(args) < 2:
-                        self.print_help()
-                        return 1
-                    self.options.setdefault(argdef[0], [])
-                    self.options[argdef[0]].append(args[1])
-                    args = args[2:]
-                else:
-                    self.options[argdef[0]] = True
-                    args = args[1:]
-            else:
-                self.fileargs.append(args[0])
-                args = args[1:]
-        cfgfile = None
-        if len(self.fileargs) > 0:
-            cfgfile = os.path.join(os.path.dirname(self.fileargs[0]), '.yelp-tools.cfg')
-            if not os.path.exists(cfgfile):
-                cfgfile = None
-        if cfgfile is None:
-            cfgfile = os.path.join(os.getcwd(), '.yelp-tools.cfg')
-        if os.path.exists(cfgfile):
-            self.config = configparser.ConfigParser()
-            try:
-                self.config.read(cfgfile)
-            except Exception as e:
-                print(e, file=sys.stderr)
-                sys.exit(1)
-        return 0
-
-    def get_option_bool(self, arg):
-        if arg in self.options:
-            return self.options[arg] == True
-        if self.config is not None:
-            val = self.config.get('check:' + self.name, arg, fallback=None)
-            if val is not None:
-                return (val == 'true')
-            val = self.config.get('check', arg, fallback=None)
-            if val is not None:
-                return (val == 'true')
-            val = self.config.get('default', arg, fallback=None)
-            if val is not None:
-                return (val == 'true')
-        return False
-
-    def get_option_str(self, arg):
-        if arg in self.options:
-            if isinstance(self.options[arg], list):
-                return self.options[arg][-1]
-        if self.config is not None:
-            val = self.config.get('check:' + self.name, arg, fallback=None)
-            if val is not None:
-                return val
-            val = self.config.get('check', arg, fallback=None)
-            if val is not None:
-                return val
-            val = self.config.get('default', arg, fallback=None)
-            if val is not None:
-                return val
-        return None
-
-    def get_option_list(self, arg):
-        if arg in self.options:
-            if isinstance(self.options[arg], list):
-                ret = []
-                for opt in self.options[arg]:
-                    ret.extend(opt.replace(',', ' ').split())
-                return ret
-        if self.config is not None:
-            val = self.config.get('check:' + self.name, arg, fallback=None)
-            if val is not None:
-                return val.replace(',', ' ').split()
-            val = self.config.get('check', arg, fallback=None)
-            if val is not None:
-                return val.replace(',', ' ').split()
-            val = self.config.get('default', arg, fallback=None)
-            if val is not None:
-                return val.replace(',', ' ').split()
-        return None
-
-    def iter_files(self, sitedir=None):
-        issite = self.get_option_bool('site')
-        if len(self.fileargs) == 0:
-            self.fileargs.append('.')
-        for filearg in self.fileargs:
-            if os.path.isdir(filearg):
-                if issite:
-                    for infile in self.iter_site(filearg, '/'):
-                        yield infile
-                else:
-                    for fname in os.listdir(filearg):
-                        if fname.endswith('.page'):
-                            yield InputFile(filearg, fname)
-            else:
-                if issite:
-                    # FIXME: should do some normalization here, I guess.
-                    # It's hard to get this perfect without a defined start dir
-                    yield InputFile(os.getcwd(), filearg, '/' + os.path.dirname(filearg))
-                else:
-                    yield InputFile(os.getcwd(), filearg)
-
-    def iter_site(self, filepath, sitedir):
-        for fname in os.listdir(filepath):
-            newpath = os.path.join(filepath, fname)
-            if os.path.isdir(newpath):
-                # FIXME https://github.com/projectmallard/pintail/issues/36
-                if fname == '__pintail__':
-                    continue
-                for infile in self.iter_site(newpath, sitedir + fname + '/'):
-                    yield infile
-            elif fname.endswith('.page'):
-                yield InputFile(filepath, fname, sitedir)
-
-    def get_xml(self, xmlfile):
-        # FIXME: we can cache these if we add a feature to run multiple
-        # checkers at once
-        tree = lxml.etree.parse(xmlfile.absfile)
-        if self.xinclude:
-            lxml.etree.XInclude()(tree.getroot())
-        return tree
-
-    def create_tmpdir(self):
-        if self.tmpdir is None:
-            self.tmpdir = tempfile.mkdtemp()
-
-    def print_help(self):
-        print('Usage:   yelp-check ' + self.name + ' [OPTIONS] [FILES]')
-        print('Formats: ' + ' '.join(self.formats) + '\n')
-        #FIXME: prettify names of formats
-        if self.blurb is not None:
-            print(self.blurb + '\n')
-        print('Options:')
-        maxarglen = 2
-        args = []
-        for arg in self.arguments:
-            argkey = '--' + arg[0]
-            if arg[1] is not None:
-                argkey = arg[1] + ', ' + argkey
-            if arg[2] is not None:
-                argkey = argkey + ' ' + arg[2]
-            args.append((argkey, arg[3]))
-        for arg in args:
-            maxarglen = max(maxarglen, len(arg[0]) + 1)
-        for arg in args:
-            print('  ' + (arg[0]).ljust(maxarglen) + '  ' + arg[1])
-        if self.postblurb is not None:
-            print(self.postblurb)
-
-    def main(self, args):
-        pass
-
-
-class HrefsChecker (Checker):
-    name = 'hrefs'
-    desc = 'Find broken external links in a document'
-    blurb = ('Find broken href links in FILES in a Mallard document, or\n' +
-             'broken ulink or XLink links in FILES in a DocBook document.')
-    formats = ['docbook4', 'docbook5', 'mallard']
-    arguments = [
-        ('help', '-h', None, 'Show this help and exit'),
-        ('site', '-s', None, 'Treat pages as belonging to a Mallard site'),
-        ('allow', None, 'URL', 'Allow URL or list of URLs without checking')
-    ]
-    postblurb = 'URL may be a comma- and/or space-separated list, or specified\nmultiple times.'
-
-    def main(self, args):
-        if self.parse_args(args) != 0:
-            return 1
-        if 'help' in self.options:
-            self.print_help()
-            return 0
-
-        # safelisting URLs that we use as identifiers
-        hrefs = {
-             'http://creativecommons.org/licenses/by-sa/3.0/': True,
-            'https://creativecommons.org/licenses/by-sa/3.0/': True,
-             'http://creativecommons.org/licenses/by-sa/3.0/us/': True,
-            'https://creativecommons.org/licenses/by-sa/3.0/us/': True
-        }
-        allow = self.get_option_list('allow')
-        if allow is not None:
-            for url in allow:
-                hrefs[url] = True
-        retcode = 0
-
-        for infile in self.iter_files():
-            xml = self.get_xml(infile)
-            for el in xml.xpath('//*[@href | @xlink:href | self::ulink/@url]',
-                                namespaces=NAMESPACES):
-                href = el.get('href', None)
-                if href is None:
-                    href = el.get('{www.w3.org/1999/xlink}href')
-                if href is None:
-                    href = el.get('url')
-                if href is None:
-                    continue
-                if href.startswith('mailto:'):
-                    continue
-                if href not in hrefs:
-                    try:
-                        req = urllib.request.urlopen(href)
-                        hrefs[href] = (req.status == 200)
-                    except Exception as e:
-                        hrefs[href] = False
-                if not hrefs[href]:
-                    retcode = 1
-                    print(infile.sitefilename + ': ' + href)
-
-        return retcode
-
-
-class IdsChecker (Checker):
-    name = 'ids'
-    desc = 'Find Mallard page IDs that do not match file names'
-    blurb = ('Find pages in a Mallard document whose page ID does not match\n' +
-             'the base file name of the page file.')
-    formats = ['mallard']
-    arguments = [
-        ('help', '-h', None, 'Show this help and exit'),
-        ('site', '-s', None, 'Treat pages as belonging to a Mallard site')
-    ]
-
-    def main(self, args):
-        if self.parse_args(args) != 0:
-            return 1
-        if 'help' in self.options:
-            self.print_help()
-            return 0
-
-        retcode = 0
-
-        for infile in self.iter_files():
-            xml = self.get_xml(infile)
-            isok = False
-            pageid = None
-            if infile.filename.endswith('.page'):
-                try:
-                    pageid = xml.getroot().get('id')
-                    isok = (pageid == os.path.basename(infile.filename)[:-5])
-                except:
-                    isok = False
-            if not isok:
-                retcode = 1
-                print(infile.sitefilename + ': ' + (pageid or ''))
-
-        return retcode
-
-
-class LinksChecker (Checker):
-    name = 'links'
-    desc = 'Find broken xref or linkend links in a document'
-    blurb = ('Find broken xref links in FILES in a Mallard document,\n' +
-             'or broken linkend links in FILES in a DocBook document.')
-    formats = ['docbook4', 'docbook5', 'mallard']
-    arguments = [
-        ('help', '-h', None, 'Show this help and exit'),
-        ('site', '-s', None, 'Treat pages as belonging to a Mallard site'),
-        ('cache', '-c', 'CACHE', 'Use the existing Mallard cache CACHE'),
-        ('ignore', '-i', None, 'Ignore xrefs where href is present')
-    ]
-
-    def __init__(self, yelpcheck):
-        super().__init__(yelpcheck)
-        self.idstoxrefs = {}
-        self.idstolinkends = {}
-
-    def _accumulate_mal(self, node, pageid, sectid, xrefs, sitedir=None):
-        thisid = node.get('id')
-        if thisid is not None:
-            if node.tag == '{' + NAMESPACES['mal'] + '}page':
-                pageid = thisid
-            else:
-                sectid = thisid
-        curid = pageid
-        ignore = self.get_option_bool('ignore')
-        if curid is not None:
-            if sectid is not None:
-                # id attrs in cache files are already fully formed
-                if '#' in sectid:
-                    curid = sectid
-                else:
-                    curid = curid + '#' + sectid
-            if sitedir is not None:
-                # id attrs in cache files already have sitedir prefixed
-                if curid[0] != '/':
-                    curid = sitedir + curid
-            self.idstoxrefs.setdefault(curid, [])
-            if xrefs:
-                xref = node.get('xref')
-                if xref is not None:
-                    if not (ignore and (node.get('href') is not None)):
-                        self.idstoxrefs[curid].append(xref)
-        for child in node:
-            self._accumulate_mal(child, pageid, sectid, xrefs, sitedir)
-
-    def _accumulate_db(self, node, nodeid):
-        thisid = node.get('id')
-        if thisid is None:
-            thisid = node.get(XML_ID)
-        if thisid is not None:
-            nodeid = thisid
-            self.idstolinkends.setdefault(nodeid, [])
-        if nodeid is not None:
-            linkend = node.get('linkend')
-            if linkend is not None:
-                self.idstolinkends[nodeid].append(linkend)
-        for child in node:
-            self._accumulate_db(child, nodeid)
-
-    def main(self, args):
-        if self.parse_args(args) != 0:
-            return 1
-        if 'help' in self.options:
-            self.print_help()
-            return 0
-
-        retcode = 0
-
-        cachefile = self.get_option_str('cache')
-        if cachefile is not None:
-            xml = self.get_xml(InputFile(os.getcwd(), cachefile))
-            self._accumulate_mal(xml.getroot(), None, None, False)
-
-        for infile in self.iter_files():
-            xml = self.get_xml(infile)
-            format = get_format(xml.getroot())
-            if format == 'mallard':
-                self._accumulate_mal(xml.getroot(), None, None, True, infile.sitedir)
-            elif format in ('docbook4', 'docbook5'):
-                # For DocBook, we assume each filearg is its own document, so
-                # we reset the dict each time and only check within the file.
-                # Note that XInclude and SYSTEM includes DO happen first.
-                self.idstolinkends = {}
-                self._accumulate_db(xml.getroot(), None)
-                for curid in self.idstolinkends:
-                    for linkend in self.idstolinkends[curid]:
-                        if linkend not in self.idstolinkends:
-                            print(curid + ': ' + linkend)
-                            retcode = 1
-
-        for curid in self.idstoxrefs:
-            for xref in self.idstoxrefs[curid]:
-                checkref = xref
-                if checkref[0] == '#':
-                    checkref = curid.split('#')[0] + checkref
-                if curid[0] == '/' and checkref[0] != '/':
-                    checkref = curid[:curid.rfind('/')+1] + checkref
-                if checkref not in self.idstoxrefs:
-                    print(curid + ': ' + xref)
-                    retcode = 1
-
-        return retcode
-
-
-class MediaChecker (Checker):
-    name = 'media'
-    desc = 'Find broken references to media files'
-    blurb = ('Find broken references to media files. In Mallard, this\n' +
-             'checks media and thumb elements. In DocBook, this checks\n' +
-             'audiodata, imagedata, and videodata elements.')
-    formats = ['docbook4', 'docbook5', 'mallard']
-    arguments = [
-        ('help', '-h', None, 'Show this help and exit'),
-        ('site', '-s', None, 'Treat pages as belonging to a Mallard site')
-    ]
-
-    def main(self, args):
-        if self.parse_args(args) != 0:
-            return 1
-        if 'help' in self.options:
-            self.print_help()
-            return 0
-
-        retcode = 0
-
-        for infile in self.iter_files():
-            xml = self.get_xml(infile)
-            format = get_format(xml.getroot())
-            srcs = []
-            if format == 'mallard':
-                for el in xml.xpath('//mal:media[@src] | //uix:thumb | //ui:thumb | //e:mouseover',
-                                    namespaces=NAMESPACES):
-                    srcs.append(el.get('src'))
-            elif format == 'docbook5':
-                # FIXME: do we care about entityref?
-                for el in xml.xpath('//db:audiodata | //db:imagedata | //db:videodata',
-                                    namespaces=NAMESPACES):
-                    srcs.append(el.get('fileref'))
-            elif format == 'docbook4':
-                for el in xml.xpath('//audiodata | //imagedata | //videodata'):
-                    srcs.append(el.get('fileref'))
-            for src in srcs:
-                fsrc = os.path.join(infile.absdir, src)
-                if not os.path.exists(fsrc):
-                    print(infile.sitefilename + ': ' + src)
-                    retcode = 1
-
-        return retcode
-
-
-class OrphansChecker (Checker):
-    name = 'orphans'
-    desc = 'Find orphaned pages in a Mallard document'
-    blurb = ('Locate orphaned pages among FILES in a Mallard document.\n' +
-             'Orphaned pages are any pages that cannot be reached by\n' +
-             'topic links alone from the index page.')
-    formats = ['mallard']
-    arguments = [
-        ('help', '-h', None, 'Show this help and exit'),
-        ('site', '-s', None, 'Treat pages as belonging to a Mallard site'),
-        ('cache', '-c', 'CACHE', 'Use the existing Mallard cache CACHE')
-    ]
-
-    def __init__(self, yelpcheck):
-        super().__init__(yelpcheck)
-        self.guidelinks = {}
-        self.sitesubdirs = set()
-
-    def _collect_links(self, node, sitedir):
-        pageid = node.get('id')
-        if pageid[0] != '/':
-            # id attrs in cache files already have sitedir prefixed
-            pageid = sitedir + pageid
-        else:
-            sitedir = pageid[:pageid.rfind('/')+1]
-        self.guidelinks.setdefault(pageid, set())
-        # For the purposes of finding orphans, we'll just pretend that
-        # all links to or from sections are just to or from pages.
-        for el in node.xpath('//mal:info/mal:link[@type="guide"]',
-                             namespaces=NAMESPACES):
-            xref = el.get('xref')
-            if xref is None or xref == '':
-                continue
-            if xref[0] == '#':
-                continue
-            if '#' in xref:
-                xref = xref[:xref.find('#')]
-            if sitedir is not None and sitedir != '':
-                if xref[0] != '/':
-                    xref = sitedir + xref
-            self.guidelinks[pageid].add(xref)
-        for el in node.xpath('//mal:info/mal:link[@type="topic"]',
-                             namespaces=NAMESPACES):
-            xref = el.get('xref')
-            if xref is None or xref == '':
-                continue
-            if xref[0] == '#':
-                continue
-            if '#' in xref:
-                xref = xref[:xref.find('#')]
-            if sitedir is not None and sitedir != '':
-                if xref[0] != '/':
-                    xref = sitedir + xref
-            self.guidelinks.setdefault(xref, set())
-            self.guidelinks[xref].add(pageid)
-        for el in node.xpath('//mal:links[@type="site-subdirs" or @type="site:subdirs"]',
-                             namespaces=NAMESPACES):
-            self.sitesubdirs.add(pageid)
-
-    def main(self, args):
-        if self.parse_args(args) != 0:
-            return 1
-        if 'help' in self.options:
-            self.print_help()
-            return 0
-
-        retcode = 0
-
-        cachefile = self.get_option_str('cache')
-        if cachefile is not None:
-            xml = self.get_xml(InputFile(os.getcwd(), cachefile))
-            for page in xml.getroot():
-                if page.tag == '{' + NAMESPACES['mal'] + '}page':
-                    pageid = page.get('id')
-                    if pageid is None or pageid == '':
-                        continue
-                    self._collect_links(page, page.get('{http://projectmallard.org/site/1.0/}dir', ''))
-
-        pageids = set()
-        for infile in self.iter_files():
-            xml = self.get_xml(infile)
-            pageid = xml.getroot().get('id')
-            if pageid is None:
-                continue
-            pageids.add(infile.sitedir + pageid)
-            self._collect_links(xml.getroot(), infile.sitedir)
-
-        siteupdirs = {}
-        for pageid in self.sitesubdirs:
-            dirname = pageid[:pageid.rfind('/')+1]
-            for subid in self.guidelinks:
-                if subid.startswith(dirname):
-                    if subid.endswith('/index'):
-                        mid = subid[len(dirname):-6]
-                        if mid != '' and '/' not in mid:
-                            siteupdirs[subid] = pageid
-
-        if self.get_option_bool('site'):
-            okpages = set(['/index'])
-        else:
-            okpages = set(['index'])
-        for pageid in sorted(pageids):
-            if pageid in okpages:
-                isok = True
-            else:
-                isok = False
-                guides = [g for g in self.guidelinks[pageid]]
-                if pageid in siteupdirs:
-                    updir = siteupdirs[pageid]
-                    if updir not in guides:
-                        guides.append(updir)
-                cur = 0
-                while cur < len(guides):
-                    if guides[cur] in okpages:
-                        isok = True
-                        break
-                    if guides[cur] in self.guidelinks:
-                        for guide in self.guidelinks[guides[cur]]:
-                            if guide not in guides:
-                                guides.append(guide)
-                    cur += 1
-            if isok:
-                okpages.add(pageid)
-            else:
-                print(pageid)
-                retcode = 1
-
-        return retcode
-
-
-class ValidateChecker (Checker):
-    name = 'validate'
-    desc = 'Validate files against a DTD or RNG'
-    blurb = ('Validate FILES against the appropriate DTD or RNG.\n' +
-             'For Mallard pages, perform automatic RNG merging\n' +
-             'based on the version attribute.')
-    formats = ['docbook4', 'docbook5', 'mallard']
-    arguments = [
-        ('help', '-h', None, 'Show this help and exit'),
-        ('site', '-s', None, 'Treat pages as belonging to a Mallard site'),
-        ('strict', None, None, 'Disallow unknown namespaces'),
-        ('allow', None, 'NS', 'Explicitly allow namespace NS in strict mode'),
-        ('jing', None, None, 'Use jing instead of xmllint for RNG validation')
-    ]
-    postblurb = 'NS may be a comma- and/or space-separated list, or specified\nmultiple times.'
-
-    def main(self, args):
-        if self.parse_args(args) != 0:
-            return 1
-        if 'help' in self.options:
-            self.print_help()
-            return 0
-
-        retcode = 0
-
-        for infile in self.iter_files():
-            xml = self.get_xml(infile)
-            format = get_format(xml.getroot())
-            command = None
-            if format == 'mallard':
-                version = xml.getroot().get('version')
-                if version is None or version == '':
-                    tag = xml.getroot().tag
-                    if tag == '{' + NAMESPACES['mal'] + '}stack':
-                        # 1.2 isn't final yet as of 2020-01-09. Stacks will
-                        # likely be in 1.2, so we can assume at least that.
-                        version = '1.2'
-                    elif tag == '{' + NAMESPACES['cache'] + '}cache':
-                        version = 'cache/1.0'
-                    else:
-                        version = '1.0'
-                self.create_tmpdir()
-                rng = os.path.join(self.tmpdir,
-                                   version.replace('/', '__').replace(' ', '__'))
-                if not os.path.exists(rng):
-                    strict = 'true()' if self.get_option_bool('strict') else 'false()'
-                    allow = self.get_option_list('allow')
-                    if allow is None:
-                        allow = ''
-                    else:
-                        allow = ' '.join(allow)
-                    subprocess.call(['xsltproc', '-o', rng,
-                                    '--param', 'rng.strict', strict,
-                                    '--stringparam', 'rng.strict.allow', allow,
-                                    os.path.join(DATADIR, 'xslt', 'mal-rng.xsl'),
-                                    infile.absfile])
-                if self.get_option_bool('jing'):
-                    command = ['jing', '-i', rng, infile.filename]
-                else:
-                    command = ['xmllint', '--noout', '--xinclude', '--noent',
-                               '--relaxng', rng, infile.filename]
-            elif format == 'docbook4':
-                if xml.docinfo.doctype.startswith('<!DOCTYPE'):
-                    command = ['xmllint', '--noout', '--xinclude', '--noent',
-                               '--postvalid', infile.filename]
-                else:
-                    command = ['xmllint', '--noout', '--xinclude', '--noent',
-                               '--dtdvalid',
-                               'http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd',
-                               infile.filename]
-            elif format == 'docbook5':
-                version = xml.getroot().get('version')
-                if version is None or version == '':
-                    version = '5.0'
-                # Canonical URIs are http, but they 301 redirect to https. jing
-                # can handle https fine, but not the redirect. And jing doesn't
-                # look at catalogs. So just always feed jing an https URI.
-                rnghttp = 'http://docbook.org/xml/' + version + '/rng/docbook.rng'
-                rnghttps = 'https://docbook.org/xml/' + version + '/rng/docbook.rng'
-                if self.get_option_bool('jing'):
-                    command = ['jing', '-i', rnghttps, infile.filename]
-                else:
-                    # xmllint, on the other hand, does support catalogs. It also
-                    # doesn't do the redirect, but it wouldn't matter if it did
-                    # because it doesn't do https. So if the schema is available
-                    # locally in the catalog, hand xmllint the http URI so it
-                    # can use the local copy. Otherwise, we have to get curl
-                    # involved to do https.
-                    try:
-                        catfile = subprocess.check_output(['xmlcatalog',
-                                                           '/etc/xml/catalog',
-                                                           rnghttp],
-                                                          stderr=subprocess.DEVNULL,
-                                                          text=True)
-                        for catline in catfile.split('\n'):
-                            if catline.startswith('file://'):
-                                command = ['xmllint', '--noout', '--xinclude',  '--noent',
-                                           '--relaxng', rnghttp, infile.filename]
-                    except:
-                        pass
-                    if command is None:
-                        self.create_tmpdir()
-                        rngfile = os.path.join(self.tmpdir, 'docbook-' + version + '.rng')
-                        if not os.path.exists(rngfile):
-                            urllib.request.urlretrieve(rnghttps, rngfile)
-                        command = ['xmllint', '--noout', '--xinclude',  '--noent',
-                                   '--relaxng', rngfile, infile.filename]
-            if command is not None:
-                try:
-                    subprocess.check_output(command,
-                                            cwd=infile.filepath,
-                                            stderr=subprocess.STDOUT,
-                                            text=True)
-                except subprocess.CalledProcessError as e:
-                    retcode = e.returncode
-                    print(e.output)
-            else:
-                retcode = 1
-
-        return retcode
-
-
-class CommentsChecker (Checker):
-    name = 'comments'
-    desc = 'Print the editorial comments in a document'
-    blurb = ('Print the editorial comments in the files FILES, using the\n' +
-             'comment element in Mallard and the remark element in DocBook.')
-    formats = ['docbook4', 'docbook5', 'mallard']
-    arguments = [
-        ('help', '-h', None, 'Show this help and exit'),
-        ('site', '-s', None, 'Treat pages as belonging to a Mallard site')
-    ]
-
-    def main(self, args):
-        if self.parse_args(args) != 0:
-            return 1
-        if 'help' in self.options:
-            self.print_help()
-            return 0
-
-        for infile in self.iter_files():
-            xml = self.get_xml(infile)
-            format = get_format(xml.getroot())
-            if format == 'mallard':
-                for el in xml.xpath('//mal:comment', namespaces=NAMESPACES):
-                    thisid = xml.getroot().get('id')
-                    par = el
-                    while par is not None:
-                        if par.tag == '{' + NAMESPACES['mal'] + '}section':
-                            sectid = par.get('id')
-                            if sectid is not None:
-                                thisid = thisid + '#' + sectid
-                                break
-                        par = par.getparent()
-                    print('Page:  ' + infile.sitedir + thisid)
-                    for ch in el.xpath('mal:cite[1]', namespaces=NAMESPACES):
-                        name = _stringify(ch).strip()
-                        href = ch.get('href')
-                        if href is not None and href.startswith('mailto:'):
-                            name = name + ' <' + href[7:] + '>'
-                        print('From:  ' + name)
-                        date = ch.get('date')
-                        if date is not None:
-                            print('Date:  ' + date)
-                    print('')
-                    for ch in el:
-                        if isinstance(ch, lxml.etree._ProcessingInstruction):
-                            continue
-                        elif ch.tag == '{' + NAMESPACES['mal'] + '}cite':
-                            continue
-                        elif ch.tag in ('{' + NAMESPACES['mal'] + '}p',
-                                        '{' + NAMESPACES['mal'] + '}title'):
-                            for s in _stringify(ch).strip().split('\n'):
-                                print('  ' + s.strip())
-                            print('')
-                        else:
-                            name = lxml.etree.QName(ch).localname
-                            print('  <' + name + '>...</' + name + '>\n')
-            elif format in ('docbook4', 'docbook5'):
-                if format == 'docbook4':
-                    dbxpath = '//remark'
-                else:
-                    dbxpath = '//db:remark'
-                for el in xml.xpath(dbxpath, namespaces=NAMESPACES):
-                    thisid = infile.filename
-                    par = el
-                    while par is not None:
-                        sectid = par.get('id')
-                        if sectid is None:
-                            sectid = par.get(XML_ID)
-                        if sectid is not None:
-                            thisid = thisid + '#' + sectid
-                            break
-                        par = par.getparent()
-                    print('Page:  ' + thisid)
-                    flag = el.get('revisionflag')
-                    if flag is not None:
-                        print('Flag:  ' + flag)
-                    print('')
-                    for s in _stringify(el).strip().split('\n'):
-                        print('  ' + s.strip())
-                    print('')
-
-        return 0
-
-
-class LicenseChecker (Checker):
-    name = 'license'
-    desc = 'Report the license of Mallard pages'
-    blurb = ('Report the license of the Mallard page files FILES. Each\n' +
-             'matching page is reporting along with its license, reported\n' +
-             'based on the href attribute of the license element. Common\n' +
-             'licenses use a shortened identifier. Pages with multiple\n' +
-             'licenses have the identifiers separated by spaces. Pages\n' +
-             'with no license element report \'none\'. Licenses with no\n' +
-             'href attribute are reported as \'unknown\'')
-    formats = ['mallard']
-    arguments = [
-        ('help', '-h', None, 'Show this help and exit'),
-        ('site', '-s', None, 'Treat pages as belonging to a Mallard site'),
-        ('only', None, 'LICENSES', 'Only show pages whose license is in LICENSES'),
-        ('except', None, 'LICENSES', 'Exclude pages whose license is in LICENSES'),
-        ('totals', None, None, 'Show total counts for each license')
-    ]
-    postblurb = 'LICENSES may be a comma- and/or space-separated list, or specified\nmultiple times.'
-
-    def get_license(self, href):
-        if href is None:
-            return 'unknown'
-        elif (href.startswith('http://creativecommons.org/licenses/') or
-              href.startswith('https://creativecommons.org/licenses/')):
-            return 'cc-' + '-'.join([x for x in href.split('/') if x][3:])
-        elif (href.startswith('http://www.gnu.org/licenses/') or
-              href.startswith('https://www.gnu.org/licenses/')):
-            return href.split('/')[-1].replace('.html', '')
-        else:
-            return 'unknown'
-
-    def main(self, args):
-        if self.parse_args(args) != 0:
-            return 1
-        if 'help' in self.options:
-            self.print_help()
-            return 0
-
-        totals = {}
-
-        for infile in self.iter_files():
-            xml = self.get_xml(infile)
-            thisid = xml.getroot().get('id') or infile.filename
-            licenses = []
-            for el in xml.xpath('/mal:page/mal:info/mal:license',
-                                namespaces=NAMESPACES):
-                licenses.append(self.get_license(el.get('href')))
-            if len(licenses) == 0:
-                licenses.append('none')
-
-            only = self.get_option_list('only')
-            if only is not None:
-                skip = True
-                for lic in licenses:
-                    if lic in only:
-                        skip = False
-                if skip:
-                    continue
-            cept = self.get_option_list('except')
-            if cept is not None:
-                skip = False
-                for lic in licenses:
-                    if lic in cept:
-                        skip = True
-                if skip:
-                    continue
-
-            if self.get_option_bool('totals'):
-                for lic in licenses:
-                    totals.setdefault(lic, 0)
-                    totals[lic] += 1
-            else:
-                print(infile.sitedir + thisid + ': ' + ' '.join(licenses))
-
-        if self.get_option_bool('totals'):
-            for lic in sorted(totals):
-                print(lic + ': ' + str(totals[lic]))
-
-        return 0
-
-
-class StatusChecker (Checker):
-    name = 'status'
-    desc = 'Report the status of Mallard pages'
-    blurb = ('Report the status of the Mallard page files FILES. Each\n' +
-             'matching page is reporting along with its status.')
-    formats = ['mallard']
-    arguments = [
-        ('help', '-h', None, 'Show this help and exit'),
-        ('site', '-s', None, 'Treat pages as belonging to a Mallard site'),
-        ('version',    None, 'VER', 'Select revisions with the version attribute VER'),
-        ('docversion', None, 'VER', 'Select revisions with the docversion attribute VER'),
-        ('pkgversion', None, 'VER', 'Select revisions with the pkgversion attribute VER'),
-        ('older',  None, 'DATE', 'Only show pages older than DATE'),
-        ('newer',  None, 'DATE', 'Only show pages newer than DATE'),
-        ('only',   None, 'STATUSES', 'Only show pages whose status is in STATUSES'),
-        ('except', None, 'STATUSES', 'Exclude pages whose status is in STATUSES'),
-        ('totals', None, None, 'Show total counts for each status')
-    ]
-    postblurb = 'VER and STATUSES may be comma- and/or space-separated lists, or specified\nmultiple times.'
-
-    def main(self, args):
-        if self.parse_args(args) != 0:
-            return 1
-        if 'help' in self.options:
-            self.print_help()
-            return 0
-
-        totals = {}
-
-        checks = []
-        ver = self.get_option_list('version')
-        if ver is not None:
-            checks.append(ver)
-        ver = self.get_option_list('docversion')
-        if ver is not None:
-            checks.append(['doc:' + v for v in ver])
-        ver = self.get_option_list('pkgversion')
-        if ver is not None:
-            checks.append(['pkg:' + v for v in ver])
-
-        for infile in self.iter_files():
-            xml = self.get_xml(infile)
-            pageid = xml.getroot().get('id')
-            bestrev = None
-            for rev in xml.xpath('/mal:page/mal:info/mal:revision', namespaces=NAMESPACES):
-                revversion = (rev.get('version') or '').split()
-                docversion = rev.get('docversion')
-                if docversion is not None:
-                    revversion.append('doc:' + docversion)
-                pkgversion = rev.get('pkgversion')
-                if pkgversion is not None:
-                    revversion.append('pkg:' + pkgversion)
-                revok = True
-                for check in checks:
-                    checkok = False
-                    for v in check:
-                        if v in revversion:
-                            checkok = True
-                            break
-                    if not checkok:
-                        revok = False
-                        break
-                if revok:
-                    if bestrev is None:
-                        bestrev = rev
-                        continue
-                    bestdate = bestrev.get('date')
-                    thisdate = rev.get('date')
-                    if bestdate is None:
-                        bestrev = rev
-                    elif thisdate is None:
-                        pass
-                    elif thisdate >= bestdate:
-                        bestrev = rev
-            if bestrev is not None:
-                status = bestrev.get('status') or 'none'
-                date = bestrev.get('date') or None
-            else:
-                status = 'none'
-                date = None
-            older = self.get_option_str('older')
-            if older is not None:
-                if date is None or date >= older:
-                    continue
-            newer = self.get_option_str('newer')
-            if newer is not None:
-                if date is None or date <= newer:
-                    continue
-            only = self.get_option_list('only')
-            if only is not None:
-                if status not in only:
-                    continue
-            cept = self.get_option_list('except')
-            if cept is not None:
-                if status in cept:
-                    continue
-            if self.get_option_bool('totals'):
-                totals.setdefault(status, 0)
-                totals[status] += 1
-            else:
-                print(infile.sitedir + pageid + ': ' + status)
-
-        if self.get_option_bool('totals'):
-            for st in sorted(totals):
-                print(st + ': ' + str(totals[st]))
-
-        return 0
-
-
-class StyleChecker (Checker):
-    name = 'style'
-    desc = 'Report the style attribute of Mallard pages'
-    blurb = ('Report the page style attribute of the Mallard page files\n' +
-             'FILES. Each matching page is reporting along with its status.')
-    formats = ['mallard']
-    arguments = [
-        ('help', '-h', None, 'Show this help and exit'),
-        ('site', '-s', None, 'Treat pages as belonging to a Mallard site'),
-        ('only',   None, 'STYLES', 'Only show pages whose style is in STATUSES'),
-        ('except', None, 'STYLES', 'Exclude pages whose style is in STATUSES'),
-        ('totals', None, None, 'Show total counts for each style')
-    ]
-    postblurb = 'STYLES may be comma- and/or space-separated lists, or specified\nmultiple times.'
-
-    def main(self, args):
-        if self.parse_args(args) != 0:
-            return 1
-        if 'help' in self.options:
-            self.print_help()
-            return 0
-
-        totals = {}
-
-        for infile in self.iter_files():
-            xml = self.get_xml(infile)
-            thisid = xml.getroot().get('id')
-            style = xml.getroot().get('style')
-            if style is None:
-                style = 'none'
-            styles = style.split()
-            # We'll set style to None if it doesn't meat the criteria
-            only = self.get_option_list('only')
-            if only is not None:
-                if len(only) == 0:
-                    # We treat a blank --only as requesting pages with no style
-                    if style != 'none':
-                        style = None
-                else:
-                    allow = False
-                    for st in styles:
-                        if st in only:
-                            allow = True
-                            break
-                    if not allow:
-                        style = None
-            cept = self.get_option_list('except')
-            if cept is not None:
-                for st in styles:
-                    if st in cept:
-                        style = None
-                        break
-            if self.get_option_bool('totals'):
-                if style is not None:
-                    for st in styles:
-                        totals.setdefault(st, 0)
-                        totals[st] += 1
-            else:
-                if style is not None:
-                    print(infile.sitedir + thisid + ': ' + style)
-
-        if self.get_option_bool('totals'):
-            for st in sorted(totals):
-                print(st + ': ' + str(totals[st]))
-
-        return 0
-
-
-class CustomChecker(Checker):
-    formats = ['docbook4', 'docbook5', 'mallard']
-    arguments = [
-        ('help', '-h', None, 'Show this help and exit'),
-        ('site', '-s', None, 'Treat pages as belonging to a Mallard site')
-    ]
-
-    def __init__(self, name, yelpcheck):
-        super().__init__(yelpcheck)
-        self.name = name
-
-    def main(self, args):
-        if self.parse_args(args) != 0:
-            return 1
-
-        sect = 'check:' + self.name
-        if sect not in self.config.sections():
-            print('Unrecognized command: ' + self.name, file=sys.stderr)
-            return 1
-        self.blurb = self.config.get(sect, 'blurb', fallback=None)
-        if self.blurb is not None:
-            self.blurb = '\n'.join(textwrap.wrap(self.blurb))
-
-        if 'help' in self.options:
-            self.print_help()
-            return 0
-
-        assertexpr = self.config.get(sect, 'assert', fallback=None)
-        if assertexpr is not None:
-            return self.run_assert(assertexpr)
-
-        print('No action found for command: ' + self.name, file=sys.stderr)
-        return 1
-
-    def run_assert(self, assertexpr):
-        sect = 'check:' + self.name
-        selectexpr = self.config.get(sect, 'select', fallback='/')
-        message = self.config.get(sect, 'message', fallback='Assertion failed')
-        self.xinclude = self.config.get(sect, 'xinclude', fallback='true') != 'false'
-
-        namespaces = {}
-        if 'namespaces' in self.config.sections():
-            for ns in self.config.options('namespaces'):
-                namespaces[ns] = self.config.get('namespaces', ns)
-
-        for infile in self.iter_files():
-            xml = self.get_xml(infile)
-            thisid = xml.getroot().get('id') or infile.filename
-            for root in xml.xpath(selectexpr, namespaces=namespaces):
-                if not bool(root.xpath(assertexpr, namespaces=namespaces)):
-                    print(infile.sitedir + thisid + ': ' + message)
-        # check if self.config has section check:self.name
-        # check if section has select, assert, message
-
-
-class YelpCheck:
-    def __init__(self):
-        pass
-
-    def main(self):
-        if len(sys.argv) < 2:
-            self.print_usage()
-            return 1
-
-        checker = None
-        for cls in Checker.__subclasses__():
-            if sys.argv[1] == cls.name:
-                checker = cls(self)
-
-        if checker is None:
-            checker = CustomChecker(sys.argv[1], self)
-
-        return checker.main(sys.argv[2:])
-
-    def print_usage(self):
-        print('Usage: yelp-check <COMMAND> [OPTIONS] [FILES]')
-        namelen = 2
-        checks = []
-        reports = []
-        others = []
-        for cls in sorted(Checker.__subclasses__(), key=(lambda cls: cls.name or '')):
-            if cls is CustomChecker:
-                continue
-            namelen = max(namelen, len(cls.name) + 2)
-            if cls in (HrefsChecker, IdsChecker, LinksChecker,
-                       MediaChecker, OrphansChecker, ValidateChecker):
-                checks.append(cls)
-            elif cls in (CommentsChecker, LicenseChecker, StatusChecker,
-                         StyleChecker):
-                reports.append(cls)
-            else:
-                others.append(cls)
-        if len(checks) > 0:
-            print('\nCheck commands:')
-            for cls in checks:
-                print('  ' + cls.name.ljust(namelen) + cls.desc)
-        if len(reports) > 0:
-            print('\nReport commands:')
-            for cls in reports:
-                print('  ' + cls.name.ljust(namelen) + cls.desc)
-        if len(others) > 0:
-            print('\nOther commands:')
-            for cls in others:
-                print('  ' + cls.name.ljust(namelen) + cls.desc)
-        config = configparser.ConfigParser()
-        try:
-            config.read('.yelp-tools.cfg')
-        except:
-            return
-        customs = []
-        for sect in config.sections():
-            if sect.startswith('check:'):
-                name = sect[6:]
-                skip = False
-                for cls in Checker.__subclasses__():
-                    if name == cls.name:
-                        skip = True
-                        break
-                if skip:
-                    continue
-                if config.get(sect, 'assert', fallback=None) == None:
-                    continue
-                desc = config.get(sect, 'desc', fallback='')
-                namelen = max(namelen, len(name) + 2)
-                customs.append((name, desc))
-        if len(customs) > 0:
-            print('\nCustom commands:')
-            for name, desc in customs:
-                print('  ' + name.ljust(namelen) + desc)
-
-
-if __name__ == '__main__':
-    try:
-        sys.exit(YelpCheck().main())
-    except KeyboardInterrupt:
-        sys.exit(1)
diff --git a/tools/yelp-new.in b/tools/yelp-new.in
index 76bd101..cb923d5 100755
--- a/tools/yelp-new.in
+++ b/tools/yelp-new.in
@@ -1,6 +1,7 @@
-#!/bin/sh
+#!/bin/python3
+#
 # yelp-new
-# Copyright (C) 2010 Shaun McCance <shaunm@gnome.org>
+# Copyright (C) 2010-2020 Shaun McCance <shaunm@gnome.org>
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -16,147 +17,301 @@
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 
-tmpldir="@YELP_TMPL_DIR@"
-
-yelp_describe_tmpl () {
-    line="  "`basename "$1" | sed -e 's/\.'$2'$//'`
-    desc=`cat "$f" | grep '<\?yelp-tmpl-desc' | sed -e 's/<?yelp-tmpl-desc //' -e 's/?>$//'`
-    if [ "x$desc" != "x" ]; then
-        line="$line - $desc"
-    fi
-    echo "$line"
-}
-
-yelp_get_extension () {
-    echo "$1" | awk -F . '{print $NF}'
-}
-
-yelp_usage() {
-    echo "Usage: yelp-new [OPTIONS] <TEMPLATE> <ID> [TITLE]"
-    echo ""
-    echo "Options:"
-    echo "  --stub  Create a .page.stub file instead of a .page file"
-    echo "  --tmpl  Copy an installed template to a local template"
-    if [ -f *.page.tmpl ]; then
-        echo ""
-        echo "Local Mallard Templates:"
-        for f in *.page.tmpl; do
-            yelp_describe_tmpl "$f" "page.tmpl"
-        done
-    fi
-    if [ -f ${tmpldir}*.page ]; then
-        echo ""
-        echo "Mallard Templates:"
-        for f in ${tmpldir}*.page; do
-            yelp_describe_tmpl "$f" "page"
-        done
-    fi
-    if [ -f *.docbook.tmpl ]; then
-        echo ""
-        echo "Local DocBook Templates:"
-        for f in *.docbook.tmpl; do
-            yelp_describe_tmpl "$f" "xml.tmpl"
-        done
-    fi
-    if [ -f ${tmpldir}*.docbook ]; then
-        echo ""
-        echo "DocBook Templates:"
-        for f in ${tmpldir}*.docbook; do
-            yelp_describe_tmpl "$f" "xml"
-        done
-    fi
-}
-
-if [ $# -lt 2 ]; then
-    yelp_usage
-    exit 1
-fi
-
-# Process options
-spec=""
-while [ $# -gt 0 ]; do
-    case "$1" in
-        --stub)
-            spec=".stub"
-            shift;;
-        --tmpl)
-            spec=".tmpl"
-            shift;;
-        -h | --help)
-            yelp_usage
-            exit 0;;
-        *)
-            break
-    esac
-done
-
-# Locate the template file
-if [ $(yelp_get_extension ${1}) = "tmpl" -a -f "${1}" ]; then
-    infile="${1}"
-    outext="."$(yelp_get_extension $(basename "${1}" ".tmpl"))
-elif [ -f "${1}.page.tmpl" ]; then
-    infile="${1}.page.tmpl"
-    outext=".page"
-elif [ -f "${tmpldir}${1}.page" ]; then
-    infile="${tmpldir}${1}.page"
-    outext=".page"
-elif [ -f "${1}.docbook.tmpl" ]; then
-    infile="${1}.docbook.tmpl"
-    outext=".docbook"
-elif [ -f "${tmpldir}${1}.docbook" ]; then
-    infile="${tmpldir}${1}.docbook"
-    outext=".docbook"
-else
-    echo "Error: No template named ${1} found"
-    exit 1
-fi
-
-# Set up some variables for substitution
-if type git >/dev/null 2>&1; then
-    username=`git config user.name`
-    useremail=`git config user.email`
-fi
-if [ "x$username" = "x" -a "x$useremail" = "x" ]; then
-    if type bzr >/dev/null 2>&1; then
-        username=`bzr whoami | sed -e 's/ <.*//'`
-        useremail=`bzr whoami --email`
-    fi
-fi
-if [ "x$username" = "x" -a "x$useremail" = "x" ]; then
-    username='YOUR NAME'
-    useremail='YOUR EMAIL ADDRESS'
-fi
-pagetitle="$3"
-if [ "x$pagetitle" = "x" ]; then
-    pagetitle="TITLE"
-fi
-
-outid=$(basename "${2}")
-
-if [ "x$spec" != "x" ]; then
-    if [ "."$(yelp_get_extension "${2}") = "$spec" ]; then
-        outfile="${2}"
-    elif [ "."$(yelp_get_extension "${2}") = "$outext" ]; then
-        outfile="${2}${spec}"
-    else
-        outfile="${2}${outext}${spec}"
-    fi
-elif [ "."$(yelp_get_extension ${2}) = "$outext" ]; then
-    outfile="${2}"
-else
-    outfile="${2}${outext}"
-fi
-
-if [ "x$spec" = "x.tmpl" ]; then
-    cp "$infile" "$outfile"
-else
-    cat "$infile" | grep -v '<\?yelp-tmpl-desc' | sed \
-        -e s/@ID@/"$outid"/ \
-        -e s/@DATE@/`date +%Y-%m-%d`/ \
-        -e s/@YEAR@/`date +%Y`/ \
-        -e s/@NAME@/"$username"/ \
-        -e s/@EMAIL@/"$useremail"/ \
-        -e s/@TITLE@/"$pagetitle"/ \
-        > "$outfile"
-fi
+import configparser
+import datetime
+import os
+import subprocess
+import sys
+
+
+DATADIR = '@DATADIR@'
+
+class YelpNew:
+    arguments = [
+        ('help', '-h', None, 'Show this help and exit'),
+        ('stub', None, None, 'Create a stub file with .stub appended'),
+        ('tmpl', None, None, 'Copy an installed template to a local template'),
+        ('version', '-v', 'VERS', 'Specify the version number to substitute')
+    ]
+
+    def __init__(self):
+        self.options = {}
+        self.fileargs = []
+        self.parse_args(sys.argv[1:])
+        self.config = configparser.ConfigParser()
+        try:
+            self.config.read('.yelp-tools.cfg')
+        except:
+            self.config = None
+
+
+    def parse_args(self, args):
+        while len(args) > 0:
+            argdef = None
+            if args[0].startswith('--'):
+                for arg_ in self.arguments:
+                    if args[0] == '--' + arg_[0]:
+                        argdef = arg_
+                        break
+                if argdef is None:
+                    self.print_usage()
+                    return 1
+            elif args[0].startswith('-'):
+                for arg_ in self.arguments:
+                    if args[0] == arg_[1]:
+                        argdef = arg_
+                        break
+                if argdef is None:
+                    self.print_usage()
+                    return 1
+            if argdef is not None:
+                takesarg = (argdef[2] is not None)
+                if takesarg:
+                    if len(args) < 2:
+                        self.print_usage()
+                        return 1
+                    self.options.setdefault(argdef[0], [])
+                    self.options[argdef[0]].append(args[1])
+                    args = args[2:]
+                else:
+                    self.options[argdef[0]] = True
+                    args = args[1:]
+            else:
+                self.fileargs.append(args[0])
+                args = args[1:]
+
+
+    def get_option_bool(self, arg):
+        if arg in self.options:
+            return self.options[arg] == True
+        if self.config is not None:
+            val = self.config.get('new', arg, fallback=None)
+            if val is not None:
+                return (val == 'true')
+            val = self.config.get('default', arg, fallback=None)
+            if val is not None:
+                return (val == 'true')
+        return False
+
+
+    def get_option_str(self, arg):
+        if arg in self.options:
+            if isinstance(self.options[arg], list):
+                return self.options[arg][-1]
+        if self.config is not None:
+            val = self.config.get('new', arg, fallback=None)
+            if val is not None:
+                return val
+            val = self.config.get('default', arg, fallback=None)
+            if val is not None:
+                return val
+        return None
+
+
+    def get_replacements(self, pageid):
+        repl = {'ID' : pageid}
+        if len(self.fileargs) > 2:
+            repl['TITLE'] = ' '.join(self.fileargs[2:])
+        else:
+            repl['TITLE'] = 'TITLE'
+        today = datetime.datetime.now()
+        repl['DATE'] = today.strftime('%Y-%m-%d')
+        repl['YEAR'] = today.strftime('%Y')
+
+        username = None
+        useremail = None
+        isgit = False
+        isbzr = False
+        cwd = os.getcwd()
+        while cwd:
+            if os.path.exists(os.path.join(cwd, '.git')):
+                isgit = True
+                break
+            if os.path.exists(os.path.join(cwd, '.bzr')):
+                isbzr = True
+                break
+            newcwd = os.path.dirname(cwd)
+            if newcwd == cwd:
+                break
+            cwd = newcwd
+        if isbzr:
+            try:
+                who = subprocess.run(['bzr', 'whoami'], check=True,
+                                     capture_output=True, encoding='utf8')
+                username, useremail = who.stdout.split('<')
+                username = username.strip()
+                useremail = useremail.split('>')[0].strip()
+            except:
+                username = None
+                useremail = None
+        if username is None:
+            try:
+                who = subprocess.run(['git', 'config', 'user.name'], check=True,
+                                     capture_output=True, encoding='utf8')
+                username = who.stdout.strip()
+                who = subprocess.run(['git', 'config', 'user.email'], check=True,
+                                     capture_output=True, encoding='utf8')
+                useremail = who.stdout.strip()
+            except:
+                username = None
+                useremail = None
+        repl['NAME'] = username or 'YOUR NAME'
+        repl['EMAIL'] = useremail or 'YOUR EMAIL ADDRESS'
+        repl['VERSION'] = self.get_option_str('version') or 'VERSION.NUMBER'
+        return repl
+
+
+    def main(self):
+        if len(self.fileargs) < 2:
+            self.print_usage()
+            return 1
+
+        tmpl = self.fileargs[0]
+        if '.' not in tmpl:
+            tmpl = tmpl + '.page'
+            ext = '.page'
+        elif tmpl.endswith('.page'):
+            ext = '.page'
+        elif tmpl.endswith('.duck'):
+            ext = '.duck'
+        if self.get_option_bool('stub'):
+            ext = ext + '.stub'
+        tmplfile = os.path.join(os.getcwd(), tmpl + '.tmpl')
+        if not os.path.exists(tmplfile):
+            tmplfile = os.path.join(DATADIR, 'templates', tmpl)
+            if not os.path.exists(tmplfile):
+                print('No template found named ' + tmpl, file=sys.stderr)
+                sys.exit(1)
+        pageid = self.fileargs[1]
+        istmpl = self.get_option_bool('tmpl')
+        if istmpl:
+            ext = ext + '.tmpl'
+            repl = {}
+        else:
+            repl = self.get_replacements(pageid)
+        def _writeout(outfile, infilename, depth=0):
+            if depth > 10:
+                # We could do this smarter by keeping a stack of infilenames, but why?
+                print('Recursion limit reached for template includes', file=sys.stderr)
+                sys.exit(1)
+            for line in open(infilename):
+                if (not istmpl) and line.startswith('<?yelp-tmpl-desc'):
+                    continue
+                if (not istmpl) and line.startswith('[-] yelp-tmpl-desc'):
+                    continue
+                while line is not None and '{{' in line:
+                    before, after = line.split('{{', maxsplit=1)
+                    if '}}' in after:
+                        var, after = after.split('}}', maxsplit=1)
+                        outfile.write(before)
+                        isinclude = var.startswith('INCLUDE ')
+                        if isinclude:
+                            newfile = os.path.join(os.path.dirname(infilename), var[8:].strip())
+                            _writeout(outfile, newfile, depth=depth+1)
+                        elif istmpl:
+                            outfile.write('{{' + var + '}}')
+                        else:
+                            outfile.write(repl.get(var, '{{' + var + '}}'))
+                        if isinclude and after == '\n':
+                            line = None
+                        else:
+                            line = after
+                    else:
+                        outfile.write(line)
+                        line = None
+                if line is not None:
+                    outfile.write(line)
+
+        if os.path.exists(pageid + ext):
+            print('Output file ' + pageid + ext + ' already exists', file=sys.stderr)
+            sys.exit(1)
+        with open(pageid + ext, 'w') as outfile:
+            _writeout(outfile, tmplfile)
+
+
+    def print_usage(self):
+        print('Usage: yelp-new [OPTIONS] <TEMPLATE> <ID> [TITLE]\n')
+        print('Create a new file from an installed or local template file,\n' +
+              'or create a new local template. TEMPLATE must be the name of\n' +
+              'an installed or local template. ID is a page ID (and base\n' +
+              'filename) for the new page. The optional TITLE argument\n'
+              'provides the page title\n')
+        print('Options:')
+        maxarglen = 2
+        args = []
+        for arg in self.arguments:
+            argkey = '--' + arg[0]
+            if arg[1] is not None:
+                argkey = arg[1] + ', ' + argkey
+            if arg[2] is not None:
+                argkey = argkey + ' ' + arg[2]
+            args.append((argkey, arg[3]))
+        for arg in args:
+            maxarglen = max(maxarglen, len(arg[0]) + 1)
+        for arg in args:
+            print('  ' + (arg[0]).ljust(maxarglen) + '  ' + arg[1])
+        localpages = []
+        localducks = []
+        installedpages = []
+        installedducks = []
+        descs = {}
+        maxlen = 0
+        def _getdesc(fpath):
+            for line in open(fpath):
+                if line.startswith('<?yelp-tmpl-desc '):
+                    s = line[16:].strip()
+                    if s.endswith('?>'):
+                        s = s[:-2]
+                    return s
+                if line.startswith('[-] yelp-tmpl-desc'):
+                    return line[18:].strip()
+            return ''
+        for fname in os.listdir(os.getcwd()):
+            if fname.endswith('.page.tmpl'):
+                fname = fname[:-5]
+                maxlen = max(maxlen, len(fname))
+                localpages.append(fname)
+            elif fname.endswith('.duck.tmpl'):
+                fname = fname[:-5]
+                maxlen = max(maxlen, len(fname))
+                localducks.append(fname)
+            else:
+                continue
+            descs[fname] = _getdesc(os.path.join(os.getcwd(), fname + '.tmpl'))
+        for fname in os.listdir(os.path.join(DATADIR, 'templates')):
+            if fname.endswith('.page'):
+                if fname in localpages:
+                    continue
+                maxlen = max(maxlen, len(fname))
+                installedpages.append(fname)
+            elif fname.endswith('.duck'):
+                if fname in localducks:
+                    continue
+                maxlen = max(maxlen, len(fname))
+                installedducks.append(fname)
+            else:
+                continue
+            descs[fname] = _getdesc(os.path.join(DATADIR, 'templates', fname))
+        if len(localpages) > 0:
+            print('\nLocal Mallard Templates:')
+            for page in localpages:
+                print('  ' + page.ljust(maxlen) + '  ' + descs.get(page, ''))
+        if len(localducks) > 0:
+            print('\nLocal Ducktype Templates:')
+            for duck in localducks:
+                print('  ' + duck.ljust(maxlen) + '  ' + descs.get(duck, ''))
+        if len(installedpages) > 0:
+            print('\nInstalled Mallard Templates:')
+            for page in installedpages:
+                print('  ' + page.ljust(maxlen) + '  ' + descs.get(page, ''))
+        if len(installedducks) > 0:
+            print('\nInstalled Ducktype Templates:')
+            for duck in installedducks:
+                print('  ' + duck.ljust(maxlen) + '  ' + descs.get(duck, ''))
+
 
+if __name__ == '__main__':
+    try:
+        sys.exit(YelpNew().main())
+    except KeyboardInterrupt:
+        sys.exit(1)
diff --git a/tools/yelp-new.py b/tools/yelp-new.py
deleted file mode 100644
index 5b8f6f0..0000000
--- a/tools/yelp-new.py
+++ /dev/null
@@ -1,317 +0,0 @@
-#!/bin/python3
-#
-# yelp-new
-# Copyright (C) 2010-2020 Shaun McCance <shaunm@gnome.org>
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-
-import configparser
-import datetime
-import os
-import subprocess
-import sys
-
-# FIXME: don't hardcode this
-DATADIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', 'templates', 'py')
-
-class YelpNew:
-    arguments = [
-        ('help', '-h', None, 'Show this help and exit'),
-        ('stub', None, None, 'Create a stub file with .stub appended'),
-        ('tmpl', None, None, 'Copy an installed template to a local template'),
-        ('version', '-v', 'VERS', 'Specify the version number to substitute')
-    ]
-
-    def __init__(self):
-        self.options = {}
-        self.fileargs = []
-        self.parse_args(sys.argv[1:])
-        self.config = configparser.ConfigParser()
-        try:
-            self.config.read('.yelp-tools.cfg')
-        except:
-            self.config = None
-
-
-    def parse_args(self, args):
-        while len(args) > 0:
-            argdef = None
-            if args[0].startswith('--'):
-                for arg_ in self.arguments:
-                    if args[0] == '--' + arg_[0]:
-                        argdef = arg_
-                        break
-                if argdef is None:
-                    self.print_usage()
-                    return 1
-            elif args[0].startswith('-'):
-                for arg_ in self.arguments:
-                    if args[0] == arg_[1]:
-                        argdef = arg_
-                        break
-                if argdef is None:
-                    self.print_usage()
-                    return 1
-            if argdef is not None:
-                takesarg = (argdef[2] is not None)
-                if takesarg:
-                    if len(args) < 2:
-                        self.print_usage()
-                        return 1
-                    self.options.setdefault(argdef[0], [])
-                    self.options[argdef[0]].append(args[1])
-                    args = args[2:]
-                else:
-                    self.options[argdef[0]] = True
-                    args = args[1:]
-            else:
-                self.fileargs.append(args[0])
-                args = args[1:]
-
-
-    def get_option_bool(self, arg):
-        if arg in self.options:
-            return self.options[arg] == True
-        if self.config is not None:
-            val = self.config.get('new', arg, fallback=None)
-            if val is not None:
-                return (val == 'true')
-            val = self.config.get('default', arg, fallback=None)
-            if val is not None:
-                return (val == 'true')
-        return False
-
-
-    def get_option_str(self, arg):
-        if arg in self.options:
-            if isinstance(self.options[arg], list):
-                return self.options[arg][-1]
-        if self.config is not None:
-            val = self.config.get('new', arg, fallback=None)
-            if val is not None:
-                return val
-            val = self.config.get('default', arg, fallback=None)
-            if val is not None:
-                return val
-        return None
-
-
-    def get_replacements(self, pageid):
-        repl = {'ID' : pageid}
-        if len(self.fileargs) > 2:
-            repl['TITLE'] = ' '.join(self.fileargs[2:])
-        else:
-            repl['TITLE'] = 'TITLE'
-        today = datetime.datetime.now()
-        repl['DATE'] = today.strftime('%Y-%m-%d')
-        repl['YEAR'] = today.strftime('%Y')
-
-        username = None
-        useremail = None
-        isgit = False
-        isbzr = False
-        cwd = os.getcwd()
-        while cwd:
-            if os.path.exists(os.path.join(cwd, '.git')):
-                isgit = True
-                break
-            if os.path.exists(os.path.join(cwd, '.bzr')):
-                isbzr = True
-                break
-            newcwd = os.path.dirname(cwd)
-            if newcwd == cwd:
-                break
-            cwd = newcwd
-        if isbzr:
-            try:
-                who = subprocess.run(['bzr', 'whoami'], check=True,
-                                     capture_output=True, encoding='utf8')
-                username, useremail = who.stdout.split('<')
-                username = username.strip()
-                useremail = useremail.split('>')[0].strip()
-            except:
-                username = None
-                useremail = None
-        if username is None:
-            try:
-                who = subprocess.run(['git', 'config', 'user.name'], check=True,
-                                     capture_output=True, encoding='utf8')
-                username = who.stdout.strip()
-                who = subprocess.run(['git', 'config', 'user.email'], check=True,
-                                     capture_output=True, encoding='utf8')
-                useremail = who.stdout.strip()
-            except:
-                username = None
-                useremail = None
-        repl['NAME'] = username or 'YOUR NAME'
-        repl['EMAIL'] = useremail or 'YOUR EMAIL ADDRESS'
-        repl['VERSION'] = self.get_option_str('version') or 'VERSION.NUMBER'
-        return repl
-
-
-    def main(self):
-        if len(self.fileargs) < 2:
-            self.print_usage()
-            return 1
-
-        tmpl = self.fileargs[0]
-        if '.' not in tmpl:
-            tmpl = tmpl + '.page'
-            ext = '.page'
-        elif tmpl.endswith('.page'):
-            ext = '.page'
-        elif tmpl.endswith('.duck'):
-            ext = '.duck'
-        if self.get_option_bool('stub'):
-            ext = ext + '.stub'
-        tmplfile = os.path.join(os.getcwd(), tmpl + '.tmpl')
-        if not os.path.exists(tmplfile):
-            tmplfile = os.path.join(DATADIR, 'templates', tmpl)
-            if not os.path.exists(tmplfile):
-                print('No template found named ' + tmpl, file=sys.stderr)
-                sys.exit(1)
-        pageid = self.fileargs[1]
-        istmpl = self.get_option_bool('tmpl')
-        if istmpl:
-            ext = ext + '.tmpl'
-            repl = {}
-        else:
-            repl = self.get_replacements(pageid)
-        def _writeout(outfile, infilename, depth=0):
-            if depth > 10:
-                # We could do this smarter by keeping a stack of infilenames, but why?
-                print('Recursion limit reached for template includes', file=sys.stderr)
-                sys.exit(1)
-            for line in open(infilename):
-                if (not istmpl) and line.startswith('<?yelp-tmpl-desc'):
-                    continue
-                if (not istmpl) and line.startswith('[-] yelp-tmpl-desc'):
-                    continue
-                while line is not None and '{{' in line:
-                    before, after = line.split('{{', maxsplit=1)
-                    if '}}' in after:
-                        var, after = after.split('}}', maxsplit=1)
-                        outfile.write(before)
-                        isinclude = var.startswith('INCLUDE ')
-                        if isinclude:
-                            newfile = os.path.join(os.path.dirname(infilename), var[8:].strip())
-                            _writeout(outfile, newfile, depth=depth+1)
-                        elif istmpl:
-                            outfile.write('{{' + var + '}}')
-                        else:
-                            outfile.write(repl.get(var, '{{' + var + '}}'))
-                        if isinclude and after == '\n':
-                            line = None
-                        else:
-                            line = after
-                    else:
-                        outfile.write(line)
-                        line = None
-                if line is not None:
-                    outfile.write(line)
-
-        if os.path.exists(pageid + ext):
-            print('Output file ' + pageid + ext + ' already exists', file=sys.stderr)
-            sys.exit(1)
-        with open(pageid + ext, 'w') as outfile:
-            _writeout(outfile, tmplfile)
-
-
-    def print_usage(self):
-        print('Usage: yelp-new [OPTIONS] <TEMPLATE> <ID> [TITLE]\n')
-        print('Create a new file from an installed or local template file,\n' +
-              'or create a new local template. TEMPLATE must be the name of\n' +
-              'an installed or local template. ID is a page ID (and base\n' +
-              'filename) for the new page. The optional TITLE argument\n'
-              'provides the page title\n')
-        print('Options:')
-        maxarglen = 2
-        args = []
-        for arg in self.arguments:
-            argkey = '--' + arg[0]
-            if arg[1] is not None:
-                argkey = arg[1] + ', ' + argkey
-            if arg[2] is not None:
-                argkey = argkey + ' ' + arg[2]
-            args.append((argkey, arg[3]))
-        for arg in args:
-            maxarglen = max(maxarglen, len(arg[0]) + 1)
-        for arg in args:
-            print('  ' + (arg[0]).ljust(maxarglen) + '  ' + arg[1])
-        localpages = []
-        localducks = []
-        installedpages = []
-        installedducks = []
-        descs = {}
-        maxlen = 0
-        def _getdesc(fpath):
-            for line in open(fpath):
-                if line.startswith('<?yelp-tmpl-desc '):
-                    s = line[16:].strip()
-                    if s.endswith('?>'):
-                        s = s[:-2]
-                    return s
-                if line.startswith('[-] yelp-tmpl-desc'):
-                    return line[18:].strip()
-            return ''
-        for fname in os.listdir(os.getcwd()):
-            if fname.endswith('.page.tmpl'):
-                fname = fname[:-5]
-                maxlen = max(maxlen, len(fname))
-                localpages.append(fname)
-            elif fname.endswith('.duck.tmpl'):
-                fname = fname[:-5]
-                maxlen = max(maxlen, len(fname))
-                localducks.append(fname)
-            else:
-                continue
-            descs[fname] = _getdesc(os.path.join(os.getcwd(), fname + '.tmpl'))
-        for fname in os.listdir(os.path.join(DATADIR, 'templates')):
-            if fname.endswith('.page'):
-                if fname in localpages:
-                    continue
-                maxlen = max(maxlen, len(fname))
-                installedpages.append(fname)
-            elif fname.endswith('.duck'):
-                if fname in localducks:
-                    continue
-                maxlen = max(maxlen, len(fname))
-                installedducks.append(fname)
-            else:
-                continue
-            descs[fname] = _getdesc(os.path.join(DATADIR, 'templates', fname))
-        if len(localpages) > 0:
-            print('\nLocal Mallard Templates:')
-            for page in localpages:
-                print('  ' + page.ljust(maxlen) + '  ' + descs.get(page, ''))
-        if len(localducks) > 0:
-            print('\nLocal Ducktype Templates:')
-            for duck in localducks:
-                print('  ' + duck.ljust(maxlen) + '  ' + descs.get(duck, ''))
-        if len(installedpages) > 0:
-            print('\nInstalled Mallard Templates:')
-            for page in installedpages:
-                print('  ' + page.ljust(maxlen) + '  ' + descs.get(page, ''))
-        if len(installedducks) > 0:
-            print('\nInstalled Ducktype Templates:')
-            for duck in installedducks:
-                print('  ' + duck.ljust(maxlen) + '  ' + descs.get(duck, ''))
-
-
-if __name__ == '__main__':
-    try:
-        sys.exit(YelpNew().main())
-    except KeyboardInterrupt:
-        sys.exit(1)