diff options
Diffstat (limited to 'tools')
-rw-r--r-- | tools/meson.build | 20 | ||||
-rwxr-xr-x | tools/yelp-build.in | 1403 | ||||
-rw-r--r-- | tools/yelp-build.py | 834 | ||||
-rwxr-xr-x | tools/yelp-check.in | 2438 | ||||
-rw-r--r-- | tools/yelp-check.py | 1245 | ||||
-rwxr-xr-x | tools/yelp-new.in | 445 | ||||
-rw-r--r-- | tools/yelp-new.py | 317 |
7 files changed, 2342 insertions, 4360 deletions
diff --git a/tools/meson.build b/tools/meson.build index b6b3956..35187ca 100644 --- a/tools/meson.build +++ b/tools/meson.build @@ -1,10 +1,7 @@ yelp_tools_in = configuration_data() yelp_tools_in.set('DATADIR', pkgdir) -yelp_tools_in.set('YELP_XSLT_DIR', yelp_xslt_dir) -yelp_tools_in.set('YELP_TMPL_DIR', join_paths(yelp_templates_dir, '')) # Hack for trailing '/' yelp_tools_in.set('YELP_JS_DIR', yelp_js_dir) -yelp_tools_in.set('YELP_ICON_DIR', yelp_icons_dir) yelp_tools_in.set('XSL_DB2HTML', yelp_db2html_path) yelp_tools_in.set('XSL_DB2XHTML', yelp_db2xhtml_path) @@ -19,21 +16,6 @@ yelp_tools_in.set('XSL_MAL_CACHE', join_paths( ) ) -yelp_tools_in.set('XSL_MAL_LINK', join_paths( - yelp_xslt_dir, - 'mallard', - 'common', - 'mal-link.xsl', - ) -) - -yelp_tools_in.set('ID', '@ID@') -yelp_tools_in.set('DATE', '@DATE@') -yelp_tools_in.set('YEAR', '@YEAR@') -yelp_tools_in.set('NAME', '@NAME@') -yelp_tools_in.set('EMAIL', '@EMAIL@') -yelp_tools_in.set('TITLE', '@TITLE@') - configure_file( input: 'yelp-build.in', output: 'yelp-build', @@ -65,4 +47,4 @@ if get_option('yelpm4') == true autoconfdatadir, ) ) -endif
\ No newline at end of file +endif diff --git a/tools/yelp-build.in b/tools/yelp-build.in index f3e21d8..d53db52 100755 --- a/tools/yelp-build.in +++ b/tools/yelp-build.in @@ -1,8 +1,7 @@ -#!/bin/sh -# -*- indent-tabs-mode: nil -*- +#!/bin/python3 # # yelp-build -# Copyright (C) 2010-2015 Shaun McCance <shaunm@gnome.org> +# Copyright (C) 2010-2020 Shaun McCance <shaunm@gnome.org> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -18,590 +17,816 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -xsl_mal_cache='@XSL_MAL_CACHE@' -xsl_db2html='@XSL_DB2HTML@' -xsl_db2xhtml='@XSL_DB2XHTML@' -xsl_mal2html='@XSL_MAL2HTML@' -xsl_mal2xhtml='@XSL_MAL2XHTML@' -yelp_icon_dir='@YELP_ICON_DIR@' -yelp_js_dir='@YELP_JS_DIR@' -xsl_mal_epub='@DATADIR@/xslt/mal-epub.xsl' -xsl_mal_opf='@DATADIR@/xslt/mal-opf.xsl' -xsl_mal_ncx='@DATADIR@/xslt/mal-ncx.xsl' -xsl_mal_files='@DATADIR@/xslt/mal-files.xsl' - -mkdir_p () { - if [ ! -d "$1" ]; then - mkdir_p `dirname "$1"` - mkdir "$1" - fi -} - -urlencode () { - LC_ALL=C awk ' -BEGIN { - for (i = 1; i <= 255; i++) chars[sprintf("%c", i)] = i; -} -{ - ret = ""; - for (i = 1; i <= length($0); i++) { - c = substr($0, i, 1); - if (c ~ /[\/a-zA-Z0-9._-]/) - ret = ret c; - else - ret = ret sprintf("%%%X%X", int(chars[c] / 16), chars[c] % 16); - } - print ret; -}' -} - -urldecode () { - LC_ALL=C awk ' -BEGIN { - for(i = 0; i < 10; i++) hex[i] = i; - hex["A"] = hex["a"] = 10; - hex["B"] = hex["b"] = 11; - hex["C"] = hex["c"] = 12; - hex["D"] = hex["d"] = 13; - hex["E"] = hex["e"] = 14; - hex["F"] = hex["f"] = 15; -} -{ - ret = ""; - for (i = 1; i <= length($0); i++) { - c = substr($0, i, 1); - if (c == "+") { - ret = ret " "; - } - else if (c == "%") { - c = sprintf("%c", hex[substr($0, i + 1, 1)] * 16 + hex[substr($0, i + 2, 1)]); - ret = ret c; - i += 2; - } - else { - ret = ret c; - } - } - print ret; -}' -} - -yelp_usage () { - ( - echo "Usage: yelp-build <COMMAND> [OPTIONS] [FILES]" - echo "" - echo "Commands:" - echo " cache Create a Mallard cache file" - echo " epub Create an EPUB file for Mallard" - echo " html Convert input files to HTML" - echo " xhtml Convert input files to XHTML" - ) 1>&2 -} -yelp_usage_cache () { - ( - echo "Usage: yelp-build cache <FILES>" - echo "" - echo " Create a Mallard cache file from the page files FILES." - echo " If FILES contains directories, all .page files in those" - echo " directories will be used." - echo "" - echo "Options:" - echo " -o OUT Output cache to the file OUT" - echo " -s Treat pages as belonging to a Mallard site" - ) 1>&2 -} -yelp_usage_epub () { - ( - echo "Usage: yelp-build epub [OPTIONS] <FILES>" - echo "" - echo " Create and EPUB file from the Mallard page files FILES." - echo "" - echo "Options:" - echo " -c CACHE Use the existing Mallard cache CACHE" - echo " -o OUT Output en EPUB file named OUT" - echo " -x CUSTOM Import the custom XSLT file CUSTOM" - echo " -p PATHS Extra paths to search for files" - echo " -i Ignore missing media files" - ) 1>&2 -} -yelp_usage_html () { - ( - echo "Usage: yelp-build <html|xhtml> [OPTIONS] <FILES>" - echo "" - echo " Create HTML or XHTML from the input files FILES." - echo " FILES can be DocBook files, Mallard page files," - echo " or directories containing Mallard page files." - echo "" - echo "Options:" - echo " -c CACHE Use the existing Mallard cache CACHE" - echo " -o OUT Output files in the directory OUT" - echo " -x CUSTOM Import the custom XSLT file CUSTOM" - echo " -p PATHS Extra paths to search for files" - echo " -i Ignore missing media files" - ) 1>&2 -} - -if [ $# = 0 ]; then - yelp_usage - exit 1 -fi - -yelp_paths_normalize () { - for path in $(echo "$1" | sed -e 's/[: ]/\n/g'); do - (cd "$path" && pwd) - done | tr '\n' ' ' -} - -yelp_cache_in_page () { - fbase=$(basename "$1") - ext=$(echo "$fbase" | sed -e 's/.*\.//') - fdir=$( (cd $(dirname "$1") && pwd) ) - sdir=${fdir##${cache_site_root}}/ - url=file://$(echo "$fdir/$fbase" | urlencode) - if [ "x$cache_site" = "x1" ]; then - siteattr=' site:dir="'"$sdir"'"' - fi - if [ "x$ext" = "xstack" ]; then - echo '<stack cache:href="'"$url"'"'"$siteattr"'/>' - else - echo '<page cache:href="'"$url"'"'"$siteattr"'/>' - fi -} - -yelp_cache_in_site () { - for dir in "$1"/*; do - if [ -d "$dir" ]; then - if [ $(basename "$dir") != "__pintail__" ]; then - yelp_cache_in_site "$dir" - fi - fi - done - for page in "$1"/*.page "$1"/*.stack; do - if [ -e "$page" ]; then - yelp_cache_in_page "$page" - fi - done -} - -yelp_cache_in () { - echo '<cache:cache xmlns:cache="http://projectmallard.org/cache/1.0/"' - if [ "x$cache_site" = "x1" ]; then - echo ' xmlns:site="http://projectmallard.org/site/1.0/"' - fi - echo ' xmlns="http://projectmallard.org/1.0/">' - for page in "$@"; do - if [ -d "$page" ]; then - if [ "x$cache_site" = "x1" ]; then - yelp_cache_in_site "$page" - else - for sub in "$page"/*.page "$page"/*.stack; do - if [ -e "$sub" ]; then - yelp_cache_in_page "$sub" - fi - done - fi - else - yelp_cache_in_page "$page" - fi - done - echo '</cache:cache>' -} - -yelp_cache () { - cache_out="index.cache" - while [ "$#" != "0" ]; do - case "$1" in - "-o") - shift - cache_out="$1" - shift - ;; - "-s") - cache_site="1" - cache_site_root=$(pwd) - shift - ;; - *) - break - ;; - esac - done - if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then - yelp_usage_cache - exit 1 - fi - yelp_cache_in "$@" | xsltproc --path "$html_paths" \ - --xinclude -o "$cache_out" \ - "$xsl_mal_cache" - -} - -yelp_html_xsl_common () { - if [ "x$html_internal_datadir" != "x" ]; then - echo '<xsl:variable name="yelp.internal.datadir" select="'"'$html_internal_datadir'"'"/>' - echo '<xsl:param name="html.css.root" select="$yelp.internal.datadir"/>' - echo '<xsl:param name="html.js.root" select="$yelp.internal.datadir"/>' - else - echo '<xsl:variable name="yelp.internal.datadir" select="'"''"'"/>' - fi - if [ "x$html_internal_xsl" != "x" ]; then - echo '<xsl:include href="file://'`echo "$html_internal_xsl" | urlencode`'"/>' - fi - if [ "x$html_custom" != "x" ]; then - echo '<xsl:include href="file://'`echo "$html_custom" | urlencode`'"/>' - fi - echo '<xsl:template name="html.css">' - echo ' <xsl:param name="node" select="."/>' - echo ' <xsl:variable name="yelp.locale">' - echo ' <xsl:choose>' - echo ' <xsl:when test="$node/@xml:lang != '"''"'">' - echo ' <xsl:value-of select="$node/@xml:lang"/>' - echo ' </xsl:when>' - echo ' <xsl:when test="$node/@lang != '"''"'">' - echo ' <xsl:value-of select="$node/@lang"/>' - echo ' </xsl:when>' - echo ' <xsl:otherwise>' - echo ' <xsl:text>C</xsl:text>' - echo ' </xsl:otherwise>' - echo ' </xsl:choose>' - echo ' </xsl:variable>' - echo ' <exsl:document href="{$yelp.internal.datadir}{$yelp.locale}.css" method="text">' - echo ' <xsl:call-template name="html.css.content">' - echo ' <xsl:with-param name="node" select="$node"/>' - echo ' <xsl:with-param name="direction">' - echo ' <xsl:call-template name="l10n.direction">' - echo ' <xsl:with-param name="lang" select="$yelp.locale"/>' - echo ' </xsl:call-template>' - echo ' </xsl:with-param>' - echo ' </xsl:call-template>' - echo ' </exsl:document>' - echo ' <link rel="stylesheet" type="text/css" href="{$html.css.root}{$yelp.locale}.css"/>' - echo '</xsl:template>' - echo '<xsl:template name="html.js.script">' - echo ' <xsl:param name="node" select="."/>' - echo ' <exsl:document href="{$yelp.internal.datadir}yelp.js" method="text">' - echo ' <xsl:call-template name="html.js.content">' - echo ' <xsl:with-param name="node" select="$node"/>' - echo ' </xsl:call-template>' - echo ' </exsl:document>' - echo ' <script type="text/javascript" src="{$html.js.root}yelp.js"/>' - echo '</xsl:template>' -} -yelp_html_db2html () { - for xml in "$@"; do - xmldir=`dirname "$xml"` - xmldir=`(cd "$xmldir" && pwd)` - # Output HTML - ( - echo '<xsl:stylesheet' - echo ' xmlns:xsl="http://www.w3.org/1999/XSL/Transform"' - echo ' xmlns:exsl="http://exslt.org/common"' - echo ' xmlns="http://www.w3.org/1999/xhtml"' - echo ' extension-element-prefixes="exsl"' - echo ' version="1.0">' - if [ "x$is_xhtml" = "x1" ]; then - xsl='file://'`echo "$xsl_db2xhtml" | urlencode` - else - xsl='file://'`echo "$xsl_db2html" | urlencode` - fi - echo '<xsl:import href="'"$xsl"'"/>' - yelp_html_xsl_common - echo '</xsl:stylesheet>' - ) | xsltproc --path "$html_paths" --xinclude -o "$html_out"/ - "$xml" - # Copy media - if [ "x$xmldir" != "x$html_out" ]; then - ( - echo '<xsl:stylesheet' - echo ' xmlns:xsl="http://www.w3.org/1999/XSL/Transform"' - echo ' xmlns:db="http://docbook.org/ns/docbook"' - echo ' exclude-result-prefixes="db"' - echo ' version="1.0">' - echo '<xsl:output method="text"/>' - echo '<xsl:template match="/">' - echo ' <xsl:for-each select="' - echo ' //audiodata | //imagedata | //videodata |' - echo ' //db:audiodata | //db:imagedata | //db:videodata">' - echo ' <xsl:value-of select="concat(@fileref, '"'
'"')"/>' - echo ' </xsl:for-each>' - echo '</xsl:template>' - echo '</xsl:stylesheet>' - ) | xsltproc --path "$html_paths" --xinclude - "$xml" | while read media; do - mfile=`echo "$media" | urldecode` - minput="$xmldir/$mfile" - moutput="$html_out/$mfile" - mkdir_p `dirname "$moutput"` - if [ ! -f "$minput" -a "x$html_paths" != "x" ]; then - minput_rel=${minput#"$(pwd)/"} - if [ "x$minput_rel" != "x$minput_src" ]; then - for path in $html_paths; do - if [ -f "$path/$minput_rel" ]; then - minput="$path/$minput_rel" - break - fi - done - fi - fi - if [ -f "$minput" -o "x$html_ignore_media" != "x1" ]; then - cp "$minput" "$moutput" - fi - done - fi - # Copy JavaScript - cp "${yelp_js_dir}/highlight.pack.js" "$html_out/$html_internal_datadir" - done -} - -yelp_html_mal2html () { - if [ "x$html_cache_file" != "x" ]; then - html_cache_file=`(cd $(dirname "$html_cache_file") && pwd)`/`basename "$html_cache_file"` - else - html_cache_file_is_tmp="yes" - html_cache_file=`mktemp "${TMPDIR:-/tmp}"/yelp-XXXXXXXX` - yelp_cache -o "$html_cache_file" "$@" - fi - html_tmp_infile=`mktemp "${TMPDIR:-/tmp}"/yelp-XXXXXXXX` - yelp_cache_in "$@" > "$html_tmp_infile" - # Output HTML - ( - echo '<xsl:stylesheet' - echo ' xmlns:xsl="http://www.w3.org/1999/XSL/Transform"' - echo ' xmlns:mal="http://projectmallard.org/1.0/"' - echo ' xmlns:cache="http://projectmallard.org/cache/1.0/"' - echo ' xmlns:exsl="http://exslt.org/common"' - echo ' xmlns="http://www.w3.org/1999/xhtml"' - echo ' exclude-result-prefixes="mal cache"' - echo ' extension-element-prefixes="exsl"' - echo ' version="1.0">' - if [ "x$is_xhtml" = "x1" ]; then - xsl='file://'`echo "$xsl_mal2xhtml" | urlencode` - else - xsl='file://'`echo "$xsl_mal2html" | urlencode` - fi - echo '<xsl:import href="'"$xsl"'"/>' - echo '<xsl:include href="'"$xsl_mal_files"'"/>' - echo '<xsl:output method="text"/>' - yelp_html_xsl_common - html_cache_url='file://'`echo "$html_cache_file" | urlencode` - echo '<xsl:param name="mal.cache.file" select="'"'$html_cache_url'"'"/>' - echo '<xsl:template match="/">' - echo '<xsl:for-each select="cache:cache/mal:page | cache:cache/mal:stack">' - echo '<xsl:variable name="href" select="@cache:href"/>' - echo '<xsl:for-each select="document(@cache:href)">' - echo '<xsl:for-each select="mal:page | mal:stack/mal:page">' - echo '<xsl:call-template name="html.output"/>' - echo '<xsl:call-template name="mal.files.copy">' - echo ' <xsl:with-param name="href" select="substring-after($href, '\''file://'\'')"/>' - echo '</xsl:call-template>' - echo '</xsl:for-each>' - echo '</xsl:for-each>' - echo '</xsl:for-each>' - echo '</xsl:template>' - echo '</xsl:stylesheet>' - ) | (cd "$html_out" && xsltproc $html_profile \ - --path "$html_paths" --xinclude \ - - "$html_tmp_infile") | sort | uniq | \ - while read line; do - # Copy media from paths output by HTML transform - line_src=$(echo "$line" | cut -d' ' -f1 | urldecode) - line_dest="$html_out/"$(echo "$line" | cut -d' ' -f2) - if [ ! -f "$line_src" -a "x$html_paths" != "x" ]; then - line_src_rel=${line_src#"$(pwd)/"} - if [ "x$line_src_rel" != "x$line_src" ]; then - for path in $html_paths; do - if [ -f "$path/$line_src_rel" ]; then - line_src="$path/$line_src_rel" +import configparser +import os +import sys +import shutil +import subprocess +import tempfile +import urllib.parse +import uuid + +import lxml.etree +import lxml.ElementInclude + + +XSL_DB2HTML = '@XSL_DB2HTML@' +XSL_DB2XHTML = '@XSL_DB2XHTML@' +XSL_MALCACHE = '@XSL_MAL_CACHE@' +XSL_MAL2HTML = '@XSL_MAL2HTML@' +XSL_MAL2XHTML = '@XSL_MAL2XHTML@' +XSL_MAL_OPF='@DATADIR@/xslt/mal-opf.xsl' +XSL_MAL_NCX='@DATADIR@/xslt/mal-ncx.xsl' +YELP_JS_DIR = '@YELP_JS_DIR@' + +XSLCOMMON = (''' +<xsl:variable name="yelp.internal.datadir" select="'{intdatadir}'"/> +<xsl:param name="html.css.root" select="$yelp.internal.datadir"/> +<xsl:param name="html.js.root" select="$yelp.internal.datadir"/> +{includes} +<xsl:template name="html.css"> + <xsl:param name="node" select="."/> + <xsl:variable name="yelp.locale"> + <xsl:choose> + <xsl:when test="$node/@xml:lang != ''"> + <xsl:value-of select="$node/@xml:lang"/> + </xsl:when> + <xsl:when test="$node/@lang != ''"> + <xsl:value-of select="$node/@lang"/> + </xsl:when> + <xsl:otherwise> + <xsl:text>C</xsl:text> + </xsl:otherwise> + </xsl:choose> + </xsl:variable> + <exsl:document href="{{$yelp.internal.datadir}}{{$yelp.locale}}.css" method="text"> + <xsl:call-template name="html.css.content"> + <xsl:with-param name="node" select="$node"/> + <xsl:with-param name="direction"> + <xsl:call-template name="l10n.direction"> + <xsl:with-param name="lang" select="$yelp.locale"/> + </xsl:call-template> + </xsl:with-param> + </xsl:call-template> + </exsl:document> + <link rel="stylesheet" type="text/css" href="{{$html.css.root}}{{$yelp.locale}}.css"/> +</xsl:template> +<xsl:template name="html.js.script"> + <xsl:param name="node" select="."/> + <exsl:document href="{{$yelp.internal.datadir}}yelp.js" method="text"> + <xsl:call-template name="html.js.content"> + <xsl:with-param name="node" select="$node"/> + </xsl:call-template> + </exsl:document> + <script type="text/javascript" src="{{$html.js.root}}yelp.js"/> +</xsl:template> +''') + +DB2HTML = (''' +<xsl:stylesheet + xmlns:xsl="http://www.w3.org/1999/XSL/Transform" + xmlns:exsl="http://exslt.org/common" + xmlns="http://www.w3.org/1999/xhtml" + extension-element-prefixes="exsl" + version="1.0"> +<xsl:import href="file://{xslfile}"/> +''' ++ XSLCOMMON + +''' +</xsl:stylesheet> +''') + + +MAL2HTML = (''' +<xsl:stylesheet + xmlns:xsl="http://www.w3.org/1999/XSL/Transform" + xmlns:mal="http://projectmallard.org/1.0/" + xmlns:cache="http://projectmallard.org/cache/1.0/" + xmlns:exsl="http://exslt.org/common" + xmlns="http://www.w3.org/1999/xhtml" + exclude-result-prefixes="mal cache" + extension-element-prefixes="exsl" + version="1.0"> +<xsl:import href="file://{xslfile}"/> +<xsl:param name="mal.cache.file" select="'file://{cachefile}'"/> +''' ++ XSLCOMMON + +''' +<xsl:template match="/"> + <xsl:for-each select="cache:cache/mal:page | cache:cache/mal:stack"> + <xsl:variable name="href" select="@cache:href"/> + <xsl:for-each select="document(@cache:href)"> + <xsl:for-each select="mal:page | mal:stack/mal:page"> + <xsl:call-template name="html.output"/> + </xsl:for-each> + </xsl:for-each> + </xsl:for-each> +</xsl:template> +</xsl:stylesheet> +''') + + + +class InputFile: + def __init__(self, filepath, filename, sitedir=None): + self.filepath = filepath + self.filename = filename + self.absfile = os.path.join(filepath, filename) + self.absdir = os.path.dirname(self.absfile) + self.sitedir = sitedir or '' + self.sitefilename = self.sitedir + self.filename + + +class PathResolver(lxml.etree.Resolver): + def __init__(self, srcdir, path): + if srcdir.endswith('/'): + self.srcdir = srcdir + else: + self.srcdir = srcdir + '/' + self.path = path + + def resolve(self, uri, id, context): + if os.path.exists(uri): + return self.resolve_filename(uri, context) + if uri.startswith(self.srcdir): + ref = uri[len(self.srcdir):] + else: + ref = uri + for p in self.path: + tryfile = os.path.join(p, ref) + if os.path.exists(tryfile): + return self.resolve_filename(tryfile, context) + return None + + +class Builder: + name = None + desc = None + blurb = None + formats = [] + arguments = [] + postblurb = None + config = None + + def __init__(self, yelpbuild): + self.yelpbuild = yelpbuild + self.options = {} + self.fileargs = [] + self.tmpdir = None + + def __del__(self): + if self.tmpdir is not None: + shutil.rmtree(self.tmpdir) + self.tmpdir = None + + def parse_args(self, args): + while len(args) > 0: + argdef = None + if args[0].startswith('--'): + for arg_ in self.arguments: + if args[0] == '--' + arg_[0]: + argdef = arg_ + break + if argdef is None: + self.print_help() + return 1 + elif args[0].startswith('-'): + for arg_ in self.arguments: + if args[0] == arg_[1]: + argdef = arg_ + break + if argdef is None: + self.print_help() + return 1 + if argdef is not None: + takesarg = (argdef[2] is not None) + if takesarg: + if len(args) < 2: + self.print_help() + return 1 + self.options.setdefault(argdef[0], []) + self.options[argdef[0]].append(args[1]) + args = args[2:] + else: + self.options[argdef[0]] = True + args = args[1:] + else: + self.fileargs.append(args[0]) + args = args[1:] + cfgfile = None + if len(self.fileargs) > 0: + cfgfile = os.path.join(os.path.dirname(self.fileargs[0]), '.yelp-tools.cfg') + if not os.path.exists(cfgfile): + cfgfile = None + if cfgfile is None: + cfgfile = os.path.join(os.getcwd(), '.yelp-tools.cfg') + if os.path.exists(cfgfile): + self.config = configparser.ConfigParser() + try: + self.config.read(cfgfile) + except Exception as e: + print(e, file=sys.stderr) + sys.exit(1) + return 0 + + def get_option_bool(self, arg): + if arg in self.options: + return self.options[arg] == True + if self.config is not None: + val = self.config.get('build:' + self.name, arg, fallback=None) + if val is not None: + return (val == 'true') + val = self.config.get('build', arg, fallback=None) + if val is not None: + return (val == 'true') + val = self.config.get('default', arg, fallback=None) + if val is not None: + return (val == 'true') + return False + + def get_option_str(self, arg): + if arg in self.options: + if isinstance(self.options[arg], list): + return self.options[arg][-1] + if self.config is not None: + val = self.config.get('build:' + self.name, arg, fallback=None) + if val is not None: + return val + val = self.config.get('build', arg, fallback=None) + if val is not None: + return val + val = self.config.get('default', arg, fallback=None) + if val is not None: + return val + return None + + def get_option_list(self, arg): + if arg in self.options: + if isinstance(self.options[arg], list): + ret = [] + for opt in self.options[arg]: + ret.extend(opt.replace(',', ' ').split()) + return ret + if self.config is not None: + val = self.config.get('build:' + self.name, arg, fallback=None) + if val is not None: + return val.replace(',', ' ').split() + val = self.config.get('build', arg, fallback=None) + if val is not None: + return val.replace(',', ' ').split() + val = self.config.get('default', arg, fallback=None) + if val is not None: + return val.replace(',', ' ').split() + return None + + def get_xml(self, infile, path): + parser = lxml.etree.XMLParser() + parser.resolvers.add(PathResolver(os.path.realpath(infile.absdir), path)) + tree = lxml.etree.parse(infile.absfile, parser=parser) + def pathloader(href, parse, encoding=None): + usefile = os.path.join(infile.absdir, href) + if not os.path.exists(href): + usefile = None + if usefile is None: + absdir = infile.absdir + if not absdir.endswith('/'): + absdir = absdir + '/' + ref = href + if ref.startswith(absdir): + ref = ref[len(absdir):] + for p in path: + tryfile = os.path.join(p, ref) + if os.path.exists(tryfile): + usefile = tryfile + break + if usefile is not None: + if parse == 'xml': + return lxml.etree.parse(usefile, parser=parser).getroot() + elif parse == 'text': + return open(usefile).read() + return None + lxml.ElementInclude.include(tree, loader=pathloader) + return tree + + def iter_files(self, sitedir=None): + issite = self.get_option_bool('site') + if len(self.fileargs) == 0: + self.fileargs.append('.') + for filearg in self.fileargs: + if os.path.isdir(filearg): + if issite: + for infile in self.iter_site(filearg, '/'): + yield infile + else: + for fname in os.listdir(filearg): + if fname.endswith('.page'): + yield InputFile(filearg, fname) + else: + if issite: + # FIXME: should do some normalization here, I guess. + # It's hard to get this perfect without a defined start dir + yield InputFile(os.getcwd(), filearg, '/' + os.path.dirname(filearg)) + else: + yield InputFile(os.getcwd(), filearg) + + def iter_site(self, filepath, sitedir): + for fname in os.listdir(filepath): + newpath = os.path.join(filepath, fname) + if os.path.isdir(newpath): + # FIXME https://github.com/projectmallard/pintail/issues/36 + if fname == '__pintail__': + continue + for infile in self.iter_site(newpath, sitedir + fname + '/'): + yield infile + elif fname.endswith('.page'): + yield InputFile(filepath, fname, sitedir) + + def create_tmpdir(self): + if self.tmpdir is None: + self.tmpdir = tempfile.mkdtemp() + + def print_help(self): + print('Usage: yelp-build ' + self.name + ' [OPTIONS] [FILES]') + print('Formats: ' + ' '.join(self.formats) + '\n') + #FIXME: prettify names of formats + if self.blurb is not None: + print(self.blurb + '\n') + print('Options:') + maxarglen = 2 + args = [] + for arg in self.arguments: + argkey = '--' + arg[0] + if arg[1] is not None: + argkey = arg[1] + ', ' + argkey + if arg[2] is not None: + argkey = argkey + ' ' + arg[2] + args.append((argkey, arg[3])) + for arg in args: + maxarglen = max(maxarglen, len(arg[0]) + 1) + for arg in args: + print(' ' + (arg[0]).ljust(maxarglen) + ' ' + arg[1]) + if self.postblurb is not None: + print(self.postblurb) + + def main(self, args): + pass + + +class CacheBuilder (Builder): + name = 'cache' + desc = 'Convert a Mallard cache file' + blurb = ('Create a Mallard cache file from the page files FILES.\n' + + 'If FILES contains directories, all .page files in those\n' + + 'directories will be used.') + formats = ['mallard'] + arguments = [ + ('help', '-h', None, 'Show this help and exit'), + ('output', '-o', 'OUT', 'Output files in the directory OUT'), + ('path', '-p', 'PATH', 'Extra directories to search for files'), + ('site', '-s', None, 'Treat pages as belonging to a Mallard site') + ] + + def build_cache_in(self, filename): + with open(filename, 'w') as cachein: + print('<cache:cache xmlns:cache="http://projectmallard.org/cache/1.0/"' + + ' xmlns:site="http://projectmallard.org/site/1.0/"' + ' xmlns="http://projectmallard.org/1.0/">', + file=cachein) + for infile in self.iter_files(): + if infile.filename.endswith('.page'): + page = '<page' + elif infile.filename.endswith('.stack'): + page = '<stack' + else: + continue + page += ' cache:href="file://' + urllib.parse.quote(os.path.realpath(infile.absfile)) + '"' + if self.get_option_bool('site'): + page += ' site:dir="' + infile.sitedir + '"' + page += '/>' + print(page, file=cachein) + print('</cache:cache>', file=cachein) + + def main(self, args, output=None, path=None): + if self.parse_args(args) != 0: + return 1 + if 'help' in self.options: + self.print_help() + return 0 + + retcode = 0 + self.create_tmpdir() + cacheinfile = os.path.join(self.tmpdir, 'index.cache.in') + self.build_cache_in(cacheinfile) + if output is None: + output = self.get_option_str('output') + if output is None: + output = 'index.cache' + if path is None: + path = self.get_option_list('path') + if path is None: + path = ':' + else: + path = ':'.join(path) + retcode = subprocess.call(['xsltproc', '--xinclude', '-o', output, + '--path', path, + XSL_MALCACHE, cacheinfile]) + return retcode + + +class XhtmlBuilder (Builder): + name = 'xhtml' + desc = 'Convert input files to XHTML' + blurb = ('Create XHTML output from the input files FILES.\n' + + 'FILES can be DocBook files, Mallard page files,\n' + + 'or directories containing Mallard page files.') + formats = ['docbook4', 'docbook5', 'mallard'] + arguments = [ + ('help', '-h', None, 'Show this help and exit'), + ('cache', '-c', 'CACHE', 'Use the existing Mallard cache CACHE'), + ('output', '-o', 'OUT', 'Output files in the directory OUT'), + ('xsl', '-x', 'CUSTOM', 'Import the custom XSLT file CUSTOM'), + ('path', '-p', 'PATH', 'Extra directories to search for files'), + ('ignore', '-i', None, 'Ignore missing media files') + ] + + def __init__(self, yelpbuild, xhtml=True, epub=False): + super().__init__(yelpbuild) + self.mal2html = None + self.db2html = None + self.xhtml = xhtml + self.epub = epub + if self.epub: + self.intdatadir = 'yelp' + else: + self.intdatadir = '' + self.cacheinfile = None + + + def build_mallard_all(self, cache=None, output=None, xsl=None, path=None): + if self.mal2html is not None: + # We build all the pages on the first call, because it's faster + return 0 + if path is None: + path = self.get_option_list('path') + self.create_tmpdir() + if cache is None: + cachefile = self.get_option_str('cache') + else: + cachefile = cache + cachebuilder = CacheBuilder(self.yelpbuild) + if cachefile is None: + cachefile = os.path.join(self.tmpdir, 'index.cache') + retcode = cachebuilder.main(self.fileargs, output=cachefile, path=path) + if retcode != 0: + return retcode + self.cacheinfile = cachefile + else: + cachefile = os.path.realpath(cachefile) + self.cacheinfile = os.path.join(self.tmpdir, 'index.cache.in') + cachebuilder.parse_args(self.fileargs) + cachebuilder.build_cache_in(self.cacheinfile) + self.mal2html = os.path.join(self.tmpdir, 'mal2html.xsl') + with open(self.mal2html, 'w') as xslout: + if self.xhtml: + xslfile = XSL_MAL2XHTML + else: + xslfile = XSL_MAL2HTML + includes = '' + if xsl is None: + customxsl = self.get_option_str('xsl') + else: + customxsl = xsl + if customxsl is not None: + customxsl = urllib.parse.quote(os.path.realpath(customxsl)) + includes += '<xsl:include href="file://' + customxsl + '"/>' + if self.epub: + includes += '''<xsl:param name="mal.if.target" select="'target:epub target:html target:xhtml'"/>''' + includes += '''<xsl:template mode="html.header.mode" match="mal:page"/>''' + includes += '''<xsl:template mode="html.footer.mode" match="mal:page"/>''' + xslout.write(MAL2HTML.format(xslfile=xslfile, + cachefile=cachefile, + includes=includes, + intdatadir=self.intdatadir)) + if output is None: + output = self.get_option_str('output') + if output is None: + output = os.getcwd() + else: + if not os.path.isdir(output): + print('Output must be a directory', file=sys.stderr) + return 1 + if not output.endswith('/'): + # xsltproc is picky about this + output = output + '/' + if path is None: + pathstr = ':' + else: + pathstr = ':'.join(path) + retcode = subprocess.call(['xsltproc', '--xinclude', '-o', output, + '--path', pathstr, + '--stringparam', 'mal.cache.file', cachefile, + self.mal2html, self.cacheinfile]) + return retcode + + + def build_docbook(self, infile, output=None, xsl=None, path=None): + if self.db2html is None: + self.create_tmpdir() + self.db2html = os.path.join(self.tmpdir, 'db2html.xsl') + with open(self.db2html, 'w') as xslout: + if self.xhtml: + xslfile = XSL_DB2XHTML + else: + xslfile = XSL_DB2HTML + includes = '' + if xsl is not None: + customxsl = xsl + else: + customxsl = self.get_option_str('xsl') + if customxsl is not None: + customxsl = urllib.parse.quote(os.path.realpath(customxsl)) + includes += '<xsl:include href="file://' + customxsl + '"/>' + xslout.write(DB2HTML.format(xslfile=xslfile, + includes=includes, + intdatadir=self.intdatadir)) + if output is None: + output = self.get_option_str('output') + if output is None: + output = os.getcwd() + else: + if not os.path.isdir(output): + print('Output must be a directory', file=sys.stderr) + return 1 + if path is None: + path = self.get_option_list('path') + if path is None: + pathstr = ':' + else: + pathstr = ':'.join(path) + retcode = subprocess.call(['xsltproc', '--xinclude', '-o', output, + '--path', pathstr, + self.db2html, infile.absfile]) + return retcode + + + def main(self, args, cache=None, output=None, xsl=None, path=None, ignore=None): + if self.parse_args(args) != 0: + return 1 + if 'help' in self.options: + self.print_help() + return 0 + + if path is None: + pathopt = self.get_option_list('path') + else: + pathopt = path + path = [] + if pathopt is not None: + for p in pathopt: + path.extend(p.split(':')) + if output is None: + output = self.get_option_str('output') + srcs = {} + for infile in self.iter_files(): + if infile.filename.endswith('.page') or infile.filename.endswith('.stack'): + retcode = self.build_mallard_all(cache=cache, output=output, xsl=xsl, path=path) + if retcode != 0: + return retcode + if output is not None: + tree = self.get_xml(infile, path) + if tree is None: + return 1 + for el in tree.xpath('//*[@src]'): + src = el.get('src') + srcs.setdefault(src, []) + orig = os.path.join(os.path.realpath(infile.absdir), src) + if orig not in srcs[src]: + srcs[src].append(orig) + elif infile.filename.endswith('.docbook') or infile.filename.endswith('.xml'): + retcode = self.build_docbook(infile, output=output, xsl=xsl, path=path) + if retcode != 0: + return retcode + if output is not None: + tree = self.get_xml(infile, path) + if tree is None: + return 1 + for el in tree.xpath('//*[@fileref]'): + src = el.get('fileref') + srcs.setdefault(src, []) + orig = os.path.join(os.path.realpath(infile.absdir), src) + if orig not in srcs[src]: + srcs[src].append(orig) + else: + print('Error: No builder for ' + infile.filename) + return 1 + + if ignore is None: + ignore = self.get_option_bool('ignore') + tocopy = {} + for src in srcs: + useorig = None + for orig in srcs[src]: + if os.path.exists(orig): + if useorig is None: + useorig = orig + else: + print('Warning: Multiple sources for ' + src + '. Using first.', + file=sys.stderr) + if useorig is None: + for p in path: + tryorig = os.path.join(p, src) + if os.path.exists(tryorig): + useorig = tryorig break - fi - done - fi - fi - line_dest=`echo "$line_dest" | urldecode` - if [ "$line_src" != "$line_dest" ]; then - line_dir=`dirname "$line_dest"` - mkdir_p "$line_dir" - if [ -f "$line_src" -o "x$html_ignore_media" != "x1" ]; then - cp "$line_src" "$line_dest" - fi - fi - done - # Copy JavaScript - cp "${yelp_js_dir}/highlight.pack.js" "$html_out/$html_internal_datadir" - # Clean up - rm "$html_tmp_infile" - if [ "x$html_cache_file_is_tmp" = "xyes" ]; then - rm "$html_cache_file" - fi -} - -yelp_html () { - while [ "$#" != "0" ]; do - case "$1" in - "-c") - shift - html_cache_file="$1" - shift - ;; - "-o") - shift - html_out="$1" - shift - ;; - "-x") - shift - html_custom="$1" - shift - ;; - "-p") - shift - html_paths=$(yelp_paths_normalize "$1") - shift - ;; - "-i") - shift - html_ignore_media="1" - ;; - "--profile") - html_profile="--profile" - shift - ;; - *) - break - ;; - esac - done - if [ "x$html_out" = "x" ]; then - html_out="." - elif [ ! -d "$html_out" ]; then - echo "Error: output must be a directory." 1>&2 - exit 1 - fi - html_out=`(cd "$html_out" && pwd)` - if [ "x$html_custom" != "x" ]; then - html_custom_dir=`dirname "$html_custom"` - html_custom_dir=`(cd "$html_custom_dir" && pwd)` - html_custom="$html_custom_dir"/`basename "$html_custom"` - fi - if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then - yelp_usage_html - exit 1 - fi - ext=`echo "$1" | sed -e 's/.*\.//'` - if [ "x$ext" = "xxml" -o "x$ext" = "xdocbook" ]; then - yelp_html_db2html "$@" - else - yelp_html_mal2html "$@" - fi -} - -yelp_epub () { - while [ "$#" != "0" ]; do - case "$1" in - "-c") - shift - epub_cache_file="$1" - shift - ;; - "-o") - shift - epub_out="$1" - shift - ;; - "-x") - shift - html_custom="$1" - shift - ;; - "-p") - shift - html_paths=$(yelp_paths_normalize "$1") - shift - ;; - "-i") - shift - html_ignore_media="1" - ;; - *) - break - ;; - esac - done - if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then - yelp_usage_epub - exit 1 - fi - if [ "x$epub_cache_file" != "x" ]; then - epub_cache_file=`(cd $(dirname "$epub_cache_file") && pwd)`/`basename "$epub_cache_file"` - else - epub_cache_file_is_tmp="yes" - epub_cache_file=`mktemp "${TMPDIR:-/tmp}"/yelp-XXXXXXXX` - yelp_cache -o "$epub_cache_file" "$@" - fi - html_cache_file="$epub_cache_file" - epub_data_out=`mktemp -d "${TMPDIR:-/tmp}"/yelp-XXXXXXXX` - html_out="$epub_data_out/OPS" - mkdir "$html_out" - mkdir "$html_out/yelp" - html_internal_datadir="yelp/" - html_internal_xsl="$xsl_mal_epub" - yelp_html_mal2html "$@" - - epub_id=`uuidgen` - epub_data=`(cd "$html_out" && ls yelp/*.*)` - xsltproc \ - --path "$html_paths" \ - -o "$html_out/opf.opf" \ - --stringparam opf.id "$epub_id" \ - --stringparam opf.data "$epub_data" \ - "$xsl_mal_opf" "$epub_cache_file" - xsltproc \ - --path "$html_paths" \ - -o "$html_out/ncx.ncx" \ - --stringparam ncx.id "$epub_id" \ - "$xsl_mal_ncx" "$epub_cache_file" - - echo "application/epub+zip" > "$epub_data_out/mimetype" - - mkdir "$epub_data_out/META-INF" - ( - echo "<?xml version='1.0' encoding='UTF-8'?>" - echo "<container version='1.0' xmlns='urn:oasis:names:tc:opendocument:xmlns:container'>" - echo "<rootfiles>" - echo "<rootfile full-path='OPS/opf.opf' media-type='application/oebps-package+xml'/>" - echo "</rootfiles>" - echo "</container>" - ) > "$epub_data_out/META-INF/container.xml" - - if [ "x$epub_out" = "x" ]; then - epub_out=`pwd`/index.epub - else - epub_out=`(cd $(dirname "$epub_out") && pwd)`/`basename "$epub_out"` - fi - (cd "$epub_data_out" && zip -q -r "$epub_out" mimetype META-INF OPS) - - if [ "x$epub_cache_file_is_tmp" = "xyes" ]; then - rm "$epub_cache_file" - fi - rm -rf "$html_out" -} - -cmd="$1" -shift -case "x$cmd" in - "xcache") - yelp_cache "$@" - ;; - "xepub") - is_xhtml=1 - yelp_epub "$@" - ;; - "xhtml") - is_xhtml=0 - yelp_html "$@" - ;; - "xxhtml") - is_xhtml=1 - yelp_html "$@" - ;; - *) - yelp_usage - ;; -esac + if useorig is None: + if ignore: + print('Warning: No source found for ' + src, file=sys.stderr) + else: + print('Error: No source found for ' + src, file=sys.stderr) + return 1 + if useorig is not None: + destfile = os.path.join(output, src) + destdir = os.path.dirname(destfile) + os.makedirs(destdir, exist_ok=True) + shutil.copyfile(useorig, destfile) + + if output is None: + shutil.copyfile(os.path.join(YELP_JS_DIR, 'highlight.pack.js'), + os.path.join(self.intdatadir, 'highlight.pack.js')) + else: + shutil.copyfile(os.path.join(YELP_JS_DIR, 'highlight.pack.js'), + os.path.join(output, self.intdatadir, 'highlight.pack.js')) + + return 0 + + +class HtmlBuilder (Builder): + name = 'html' + desc = 'Convert input files to HTML' + blurb = ('Create HTML output from the input files FILES.\n' + + 'FILES can be DocBook files, Mallard page files,\n' + + 'or directories containing Mallard page files.') + formats = ['docbook4', 'docbook5', 'mallard'] + arguments = [ + ('help', '-h', None, 'Show this help and exit'), + ('cache', '-c', 'CACHE', 'Use the existing Mallard cache CACHE'), + ('output', '-o', 'OUT', 'Output files in the directory OUT'), + ('xsl', '-x', 'CUSTOM', 'Import the custom XSLT file CUSTOM'), + ('path', '-p', 'PATH', 'Extra directories to search for files'), + ('ignore', '-i', None, 'Ignore missing media files') + ] + + def __init__(self, yelpbuild): + super().__init__(yelpbuild) + self.xhtmlbuilder = XhtmlBuilder(yelpbuild, xhtml=False) + + def main(self, args): + if self.parse_args(args) != 0: + return 1 + if 'help' in self.options: + self.print_help() + return 0 + + return self.xhtmlbuilder.main(args) + + +class EpubBuilder (Builder): + name = 'epub' + desc = 'Create an EPUB file for Mallard' + blurb = ('Create an EPUB file from the Mallard page files FILES') + formats = ['mallard'] + arguments = [ + ('help', '-h', None, 'Show this help and exit'), + ('cache', '-c', 'CACHE', 'Use the existing Mallard cache CACHE'), + ('output', '-o', 'OUT', 'Output files in the directory OUT'), + ('xsl', '-x', 'CUSTOM', 'Import the custom XSLT file CUSTOM'), + ('path', '-p', 'PATH', 'Extra directories to search for files'), + ('ignore', '-i', None, 'Ignore missing media files'), + ('nozip', None, None, 'Do not zip the output directory') + ] + + def __init__(self, yelpbuild): + super().__init__(yelpbuild) + + def main(self, args): + if self.parse_args(args) != 0: + return 1 + if 'help' in self.options: + self.print_help() + return 0 + + output = self.get_option_str('output') + nozip = self.get_option_bool('nozip') + if nozip: + if output is None: + output = 'EPUB' + if os.path.isfile(output): + print('Error: Output must be a directory', file=sys.stderr) + sys.exit(1) + epubdir = output + else: + self.create_tmpdir() + if output is None: + output = 'index.epub' + if os.path.isdir(output): + print('Error: Output must be a file', file=sys.stderr) + sys.exit(1) + epubdir = os.path.join(self.tmpdir, 'EPUB') + os.makedirs(epubdir, exist_ok=True) + os.makedirs(os.path.join(epubdir, 'OPS', 'yelp'), exist_ok=True) + + xhtmlbuilder = XhtmlBuilder(self.yelpbuild, epub=True) + retcode = xhtmlbuilder.main(self.fileargs, + cache=self.get_option_str('cache'), + output=os.path.join(epubdir, 'OPS'), + xsl=self.get_option_str('xsl'), + path=self.get_option_list('path'), + ignore=self.get_option_bool('ignore')) + if retcode != 0: + return retcode + + with open(os.path.join(epubdir, 'mimetype'), 'w') as fd: + fd.write('application/epub+zip\n') + + os.makedirs(os.path.join(epubdir, 'META-INF'), exist_ok=True) + + with open(os.path.join(epubdir, 'META-INF', 'container.xml'), 'w') as fd: + fd.write('<?xml version="1.0" encoding="UTF-8"?>') + fd.write('<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">') + fd.write('<rootfiles>') + fd.write('<rootfile full-path="OPS/opf.opf" media-type="application/oebps-package+xml"/>') + fd.write('</rootfiles>') + fd.write('</container>\n') + + path = self.get_option_list('path') + if path is None: + pathstr = ':' + else: + pathstr = ':'.join(path) + epubid = str(uuid.uuid4()) + opfdata = '' + for fname in os.listdir(os.path.join(epubdir, 'OPS', 'yelp')): + opfdata += ' OPS/yelp/' + urllib.parse.quote(fname) + retcode = subprocess.call(['xsltproc', '--xinclude', + '-o', os.path.join(epubdir, 'OPS', 'opf.opf'), + '--path', pathstr, + '--stringparam', 'opf.id', epubid, + '--stringparam', 'opf.data', opfdata, + XSL_MAL_OPF, xhtmlbuilder.cacheinfile]) + if retcode != 0: + return retcode + retcode = subprocess.call(['xsltproc', '--xinclude', + '-o', os.path.join(epubdir, 'OPS', 'ncx.ncx'), + '--path', pathstr, + '--stringparam', 'ncx.id', epubid, + XSL_MAL_NCX, xhtmlbuilder.cacheinfile]) + if retcode != 0: + return retcode + + if not nozip: + retcode = subprocess.call(['zip', '-q', '-r', os.path.realpath(output), + 'mimetype', 'META-INF', 'OPS'], + cwd=os.path.realpath(epubdir)) + if retcode != 0: + return retcode + return 0 + + +class YelpBuild: + def __init__(self): + pass + + def main(self): + if len(sys.argv) < 2: + self.print_usage() + return 1 + + builder = None + for cls in Builder.__subclasses__(): + if sys.argv[1] == cls.name: + builder = cls(self) + + if builder is None: + print('Unrecognized command: ' + sys.argv[1], file=sys.stderr) + return 1 + + return builder.main(sys.argv[2:]) + + def print_usage(self): + print('Usage: yelp-builder <COMMAND> [OPTIONS] [FILES]') + namelen = 2 + builders = [] + for cls in sorted(Builder.__subclasses__(), key=(lambda cls: cls.name or '')): + namelen = max(namelen, len(cls.name) + 2) + builders.append(cls) + + print('\nCommands:') + for cls in builders: + print(' ' + cls.name.ljust(namelen) + cls.desc) + + +if __name__ == '__main__': + try: + sys.exit(YelpBuild().main()) + except KeyboardInterrupt: + sys.exit(1) diff --git a/tools/yelp-build.py b/tools/yelp-build.py deleted file mode 100644 index 1c52335..0000000 --- a/tools/yelp-build.py +++ /dev/null @@ -1,834 +0,0 @@ -#!/bin/python3 -# -# yelp-build -# Copyright (C) 2010-2020 Shaun McCance <shaunm@gnome.org> -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - -import configparser -import os -import sys -import shutil -import subprocess -import tempfile -import urllib.parse -import uuid - -import lxml.etree -import lxml.ElementInclude - - -# FIXME: don't hardcode these -XSL_DB2HTML = '/usr/share/yelp-xsl/xslt/docbook/html/db2html.xsl' -XSL_DB2XHTML = '/usr/share/yelp-xsl/xslt/docbook/html/db2xhtml.xsl' -XSL_MALCACHE = '/usr/share/yelp-xsl/xslt/mallard/cache/mal-cache.xsl' -XSL_MAL2HTML = '/usr/share/yelp-xsl/xslt/mallard/html/mal2html.xsl' -XSL_MAL2XHTML = '/usr/share/yelp-xsl/xslt/mallard/html/mal2xhtml.xsl' -XSL_MAL_OPF='/usr/share/yelp-tools/xslt/mal-opf.xsl' -XSL_MAL_NCX='/usr/share/yelp-tools/xslt/mal-ncx.xsl' -DATADIR = '/usr/share/yelp-tools' -YELP_JSDIR = '/usr/share/yelp-xsl/js' - -XSLCOMMON = (''' -<xsl:variable name="yelp.internal.datadir" select="'{intdatadir}'"/> -<xsl:param name="html.css.root" select="$yelp.internal.datadir"/> -<xsl:param name="html.js.root" select="$yelp.internal.datadir"/> -{includes} -<xsl:template name="html.css"> - <xsl:param name="node" select="."/> - <xsl:variable name="yelp.locale"> - <xsl:choose> - <xsl:when test="$node/@xml:lang != ''"> - <xsl:value-of select="$node/@xml:lang"/> - </xsl:when> - <xsl:when test="$node/@lang != ''"> - <xsl:value-of select="$node/@lang"/> - </xsl:when> - <xsl:otherwise> - <xsl:text>C</xsl:text> - </xsl:otherwise> - </xsl:choose> - </xsl:variable> - <exsl:document href="{{$yelp.internal.datadir}}{{$yelp.locale}}.css" method="text"> - <xsl:call-template name="html.css.content"> - <xsl:with-param name="node" select="$node"/> - <xsl:with-param name="direction"> - <xsl:call-template name="l10n.direction"> - <xsl:with-param name="lang" select="$yelp.locale"/> - </xsl:call-template> - </xsl:with-param> - </xsl:call-template> - </exsl:document> - <link rel="stylesheet" type="text/css" href="{{$html.css.root}}{{$yelp.locale}}.css"/> -</xsl:template> -<xsl:template name="html.js.script"> - <xsl:param name="node" select="."/> - <exsl:document href="{{$yelp.internal.datadir}}yelp.js" method="text"> - <xsl:call-template name="html.js.content"> - <xsl:with-param name="node" select="$node"/> - </xsl:call-template> - </exsl:document> - <script type="text/javascript" src="{{$html.js.root}}yelp.js"/> -</xsl:template> -''') - -DB2HTML = (''' -<xsl:stylesheet - xmlns:xsl="http://www.w3.org/1999/XSL/Transform" - xmlns:exsl="http://exslt.org/common" - xmlns="http://www.w3.org/1999/xhtml" - extension-element-prefixes="exsl" - version="1.0"> -<xsl:import href="file://{xslfile}"/> -''' -+ XSLCOMMON + -''' -</xsl:stylesheet> -''') - - -MAL2HTML = (''' -<xsl:stylesheet - xmlns:xsl="http://www.w3.org/1999/XSL/Transform" - xmlns:mal="http://projectmallard.org/1.0/" - xmlns:cache="http://projectmallard.org/cache/1.0/" - xmlns:exsl="http://exslt.org/common" - xmlns="http://www.w3.org/1999/xhtml" - exclude-result-prefixes="mal cache" - extension-element-prefixes="exsl" - version="1.0"> -<xsl:import href="file://{xslfile}"/> -<xsl:param name="mal.cache.file" select="'file://{cachefile}'"/> -''' -+ XSLCOMMON + -''' -<xsl:template match="/"> - <xsl:for-each select="cache:cache/mal:page | cache:cache/mal:stack"> - <xsl:variable name="href" select="@cache:href"/> - <xsl:for-each select="document(@cache:href)"> - <xsl:for-each select="mal:page | mal:stack/mal:page"> - <xsl:call-template name="html.output"/> - </xsl:for-each> - </xsl:for-each> - </xsl:for-each> -</xsl:template> -</xsl:stylesheet> -''') - - - -class InputFile: - def __init__(self, filepath, filename, sitedir=None): - self.filepath = filepath - self.filename = filename - self.absfile = os.path.join(filepath, filename) - self.absdir = os.path.dirname(self.absfile) - self.sitedir = sitedir or '' - self.sitefilename = self.sitedir + self.filename - - -class PathResolver(lxml.etree.Resolver): - def __init__(self, srcdir, path): - if srcdir.endswith('/'): - self.srcdir = srcdir - else: - self.srcdir = srcdir + '/' - self.path = path - - def resolve(self, uri, id, context): - if os.path.exists(uri): - return self.resolve_filename(uri, context) - if uri.startswith(self.srcdir): - ref = uri[len(self.srcdir):] - else: - ref = uri - for p in self.path: - tryfile = os.path.join(p, ref) - if os.path.exists(tryfile): - return self.resolve_filename(tryfile, context) - return None - - -class Builder: - name = None - desc = None - blurb = None - formats = [] - arguments = [] - postblurb = None - config = None - - def __init__(self, yelpbuild): - self.yelpbuild = yelpbuild - self.options = {} - self.fileargs = [] - self.tmpdir = None - - def __del__(self): - if self.tmpdir is not None: - shutil.rmtree(self.tmpdir) - self.tmpdir = None - - def parse_args(self, args): - while len(args) > 0: - argdef = None - if args[0].startswith('--'): - for arg_ in self.arguments: - if args[0] == '--' + arg_[0]: - argdef = arg_ - break - if argdef is None: - self.print_help() - return 1 - elif args[0].startswith('-'): - for arg_ in self.arguments: - if args[0] == arg_[1]: - argdef = arg_ - break - if argdef is None: - self.print_help() - return 1 - if argdef is not None: - takesarg = (argdef[2] is not None) - if takesarg: - if len(args) < 2: - self.print_help() - return 1 - self.options.setdefault(argdef[0], []) - self.options[argdef[0]].append(args[1]) - args = args[2:] - else: - self.options[argdef[0]] = True - args = args[1:] - else: - self.fileargs.append(args[0]) - args = args[1:] - cfgfile = None - if len(self.fileargs) > 0: - cfgfile = os.path.join(os.path.dirname(self.fileargs[0]), '.yelp-tools.cfg') - if not os.path.exists(cfgfile): - cfgfile = None - if cfgfile is None: - cfgfile = os.path.join(os.getcwd(), '.yelp-tools.cfg') - if os.path.exists(cfgfile): - self.config = configparser.ConfigParser() - try: - self.config.read(cfgfile) - except Exception as e: - print(e, file=sys.stderr) - sys.exit(1) - return 0 - - def get_option_bool(self, arg): - if arg in self.options: - return self.options[arg] == True - if self.config is not None: - val = self.config.get('build:' + self.name, arg, fallback=None) - if val is not None: - return (val == 'true') - val = self.config.get('build', arg, fallback=None) - if val is not None: - return (val == 'true') - val = self.config.get('default', arg, fallback=None) - if val is not None: - return (val == 'true') - return False - - def get_option_str(self, arg): - if arg in self.options: - if isinstance(self.options[arg], list): - return self.options[arg][-1] - if self.config is not None: - val = self.config.get('build:' + self.name, arg, fallback=None) - if val is not None: - return val - val = self.config.get('build', arg, fallback=None) - if val is not None: - return val - val = self.config.get('default', arg, fallback=None) - if val is not None: - return val - return None - - def get_option_list(self, arg): - if arg in self.options: - if isinstance(self.options[arg], list): - ret = [] - for opt in self.options[arg]: - ret.extend(opt.replace(',', ' ').split()) - return ret - if self.config is not None: - val = self.config.get('build:' + self.name, arg, fallback=None) - if val is not None: - return val.replace(',', ' ').split() - val = self.config.get('build', arg, fallback=None) - if val is not None: - return val.replace(',', ' ').split() - val = self.config.get('default', arg, fallback=None) - if val is not None: - return val.replace(',', ' ').split() - return None - - def get_xml(self, infile, path): - parser = lxml.etree.XMLParser() - parser.resolvers.add(PathResolver(os.path.realpath(infile.absdir), path)) - tree = lxml.etree.parse(infile.absfile, parser=parser) - def pathloader(href, parse, encoding=None): - usefile = os.path.join(infile.absdir, href) - if not os.path.exists(href): - usefile = None - if usefile is None: - absdir = infile.absdir - if not absdir.endswith('/'): - absdir = absdir + '/' - ref = href - if ref.startswith(absdir): - ref = ref[len(absdir):] - for p in path: - tryfile = os.path.join(p, ref) - if os.path.exists(tryfile): - usefile = tryfile - break - if usefile is not None: - if parse == 'xml': - return lxml.etree.parse(usefile, parser=parser).getroot() - elif parse == 'text': - return open(usefile).read() - return None - lxml.ElementInclude.include(tree, loader=pathloader) - return tree - - def iter_files(self, sitedir=None): - issite = self.get_option_bool('site') - if len(self.fileargs) == 0: - self.fileargs.append('.') - for filearg in self.fileargs: - if os.path.isdir(filearg): - if issite: - for infile in self.iter_site(filearg, '/'): - yield infile - else: - for fname in os.listdir(filearg): - if fname.endswith('.page'): - yield InputFile(filearg, fname) - else: - if issite: - # FIXME: should do some normalization here, I guess. - # It's hard to get this perfect without a defined start dir - yield InputFile(os.getcwd(), filearg, '/' + os.path.dirname(filearg)) - else: - yield InputFile(os.getcwd(), filearg) - - def iter_site(self, filepath, sitedir): - for fname in os.listdir(filepath): - newpath = os.path.join(filepath, fname) - if os.path.isdir(newpath): - # FIXME https://github.com/projectmallard/pintail/issues/36 - if fname == '__pintail__': - continue - for infile in self.iter_site(newpath, sitedir + fname + '/'): - yield infile - elif fname.endswith('.page'): - yield InputFile(filepath, fname, sitedir) - - def create_tmpdir(self): - if self.tmpdir is None: - self.tmpdir = tempfile.mkdtemp() - - def print_help(self): - print('Usage: yelp-build ' + self.name + ' [OPTIONS] [FILES]') - print('Formats: ' + ' '.join(self.formats) + '\n') - #FIXME: prettify names of formats - if self.blurb is not None: - print(self.blurb + '\n') - print('Options:') - maxarglen = 2 - args = [] - for arg in self.arguments: - argkey = '--' + arg[0] - if arg[1] is not None: - argkey = arg[1] + ', ' + argkey - if arg[2] is not None: - argkey = argkey + ' ' + arg[2] - args.append((argkey, arg[3])) - for arg in args: - maxarglen = max(maxarglen, len(arg[0]) + 1) - for arg in args: - print(' ' + (arg[0]).ljust(maxarglen) + ' ' + arg[1]) - if self.postblurb is not None: - print(self.postblurb) - - def main(self, args): - pass - - -class CacheBuilder (Builder): - name = 'cache' - desc = 'Convert a Mallard cache file' - blurb = ('Create a Mallard cache file from the page files FILES.\n' + - 'If FILES contains directories, all .page files in those\n' + - 'directories will be used.') - formats = ['mallard'] - arguments = [ - ('help', '-h', None, 'Show this help and exit'), - ('output', '-o', 'OUT', 'Output files in the directory OUT'), - ('path', '-p', 'PATH', 'Extra directories to search for files'), - ('site', '-s', None, 'Treat pages as belonging to a Mallard site') - ] - - def build_cache_in(self, filename): - with open(filename, 'w') as cachein: - print('<cache:cache xmlns:cache="http://projectmallard.org/cache/1.0/"' + - ' xmlns:site="http://projectmallard.org/site/1.0/"' - ' xmlns="http://projectmallard.org/1.0/">', - file=cachein) - for infile in self.iter_files(): - if infile.filename.endswith('.page'): - page = '<page' - elif infile.filename.endswith('.stack'): - page = '<stack' - else: - continue - page += ' cache:href="file://' + urllib.parse.quote(os.path.realpath(infile.absfile)) + '"' - if self.get_option_bool('site'): - page += ' site:dir="' + infile.sitedir + '"' - page += '/>' - print(page, file=cachein) - print('</cache:cache>', file=cachein) - - def main(self, args, output=None, path=None): - if self.parse_args(args) != 0: - return 1 - if 'help' in self.options: - self.print_help() - return 0 - - retcode = 0 - self.create_tmpdir() - cacheinfile = os.path.join(self.tmpdir, 'index.cache.in') - self.build_cache_in(cacheinfile) - if output is None: - output = self.get_option_str('output') - if output is None: - output = 'index.cache' - if path is None: - path = self.get_option_list('path') - if path is None: - path = ':' - else: - path = ':'.join(path) - retcode = subprocess.call(['xsltproc', '--xinclude', '-o', output, - '--path', path, - XSL_MALCACHE, cacheinfile]) - return retcode - - -class XhtmlBuilder (Builder): - name = 'xhtml' - desc = 'Convert input files to XHTML' - blurb = ('Create XHTML output from the input files FILES.\n' + - 'FILES can be DocBook files, Mallard page files,\n' + - 'or directories containing Mallard page files.') - formats = ['docbook4', 'docbook5', 'mallard'] - arguments = [ - ('help', '-h', None, 'Show this help and exit'), - ('cache', '-c', 'CACHE', 'Use the existing Mallard cache CACHE'), - ('output', '-o', 'OUT', 'Output files in the directory OUT'), - ('xsl', '-x', 'CUSTOM', 'Import the custom XSLT file CUSTOM'), - ('path', '-p', 'PATH', 'Extra directories to search for files'), - ('ignore', '-i', None, 'Ignore missing media files') - ] - - def __init__(self, yelpbuild, xhtml=True, epub=False): - super().__init__(yelpbuild) - self.mal2html = None - self.db2html = None - self.xhtml = xhtml - self.epub = epub - if self.epub: - self.intdatadir = 'yelp' - else: - self.intdatadir = '' - self.cacheinfile = None - - - def build_mallard_all(self, cache=None, output=None, xsl=None, path=None): - if self.mal2html is not None: - # We build all the pages on the first call, because it's faster - return 0 - if path is None: - path = self.get_option_list('path') - self.create_tmpdir() - if cache is None: - cachefile = self.get_option_str('cache') - else: - cachefile = cache - cachebuilder = CacheBuilder(self.yelpbuild) - if cachefile is None: - cachefile = os.path.join(self.tmpdir, 'index.cache') - retcode = cachebuilder.main(self.fileargs, output=cachefile, path=path) - if retcode != 0: - return retcode - self.cacheinfile = cachefile - else: - cachefile = os.path.realpath(cachefile) - self.cacheinfile = os.path.join(self.tmpdir, 'index.cache.in') - cachebuilder.parse_args(self.fileargs) - cachebuilder.build_cache_in(self.cacheinfile) - self.mal2html = os.path.join(self.tmpdir, 'mal2html.xsl') - with open(self.mal2html, 'w') as xslout: - if self.xhtml: - xslfile = XSL_MAL2XHTML - else: - xslfile = XSL_MAL2HTML - includes = '' - if xsl is None: - customxsl = self.get_option_str('xsl') - else: - customxsl = xsl - if customxsl is not None: - customxsl = urllib.parse.quote(os.path.realpath(customxsl)) - includes += '<xsl:include href="file://' + customxsl + '"/>' - if self.epub: - includes += '''<xsl:param name="mal.if.target" select="'target:epub target:html target:xhtml'"/>''' - includes += '''<xsl:template mode="html.header.mode" match="mal:page"/>''' - includes += '''<xsl:template mode="html.footer.mode" match="mal:page"/>''' - xslout.write(MAL2HTML.format(xslfile=xslfile, - cachefile=cachefile, - includes=includes, - intdatadir=self.intdatadir)) - if output is None: - output = self.get_option_str('output') - if output is None: - output = os.getcwd() - else: - if not os.path.isdir(output): - print('Output must be a directory', file=sys.stderr) - return 1 - if not output.endswith('/'): - # xsltproc is picky about this - output = output + '/' - if path is None: - pathstr = ':' - else: - pathstr = ':'.join(path) - retcode = subprocess.call(['xsltproc', '--xinclude', '-o', output, - '--path', pathstr, - '--stringparam', 'mal.cache.file', cachefile, - self.mal2html, self.cacheinfile]) - return retcode - - - def build_docbook(self, infile, output=None, xsl=None, path=None): - if self.db2html is None: - self.create_tmpdir() - self.db2html = os.path.join(self.tmpdir, 'db2html.xsl') - with open(self.db2html, 'w') as xslout: - if self.xhtml: - xslfile = XSL_DB2XHTML - else: - xslfile = XSL_DB2HTML - includes = '' - if xsl is not None: - customxsl = xsl - else: - customxsl = self.get_option_str('xsl') - if customxsl is not None: - customxsl = urllib.parse.quote(os.path.realpath(customxsl)) - includes += '<xsl:include href="file://' + customxsl + '"/>' - xslout.write(DB2HTML.format(xslfile=xslfile, - includes=includes, - intdatadir=self.intdatadir)) - if output is None: - output = self.get_option_str('output') - if output is None: - output = os.getcwd() - else: - if not os.path.isdir(output): - print('Output must be a directory', file=sys.stderr) - return 1 - if path is None: - path = self.get_option_list('path') - if path is None: - pathstr = ':' - else: - pathstr = ':'.join(path) - retcode = subprocess.call(['xsltproc', '--xinclude', '-o', output, - '--path', pathstr, - self.db2html, infile.absfile]) - return retcode - - - def main(self, args, cache=None, output=None, xsl=None, path=None, ignore=None): - if self.parse_args(args) != 0: - return 1 - if 'help' in self.options: - self.print_help() - return 0 - - if path is None: - pathopt = self.get_option_list('path') - else: - pathopt = path - path = [] - if pathopt is not None: - for p in pathopt: - path.extend(p.split(':')) - if output is None: - output = self.get_option_str('output') - srcs = {} - for infile in self.iter_files(): - if infile.filename.endswith('.page') or infile.filename.endswith('.stack'): - retcode = self.build_mallard_all(cache=cache, output=output, xsl=xsl, path=path) - if retcode != 0: - return retcode - if output is not None: - tree = self.get_xml(infile, path) - if tree is None: - return 1 - for el in tree.xpath('//*[@src]'): - src = el.get('src') - srcs.setdefault(src, []) - orig = os.path.join(os.path.realpath(infile.absdir), src) - if orig not in srcs[src]: - srcs[src].append(orig) - elif infile.filename.endswith('.docbook') or infile.filename.endswith('.xml'): - retcode = self.build_docbook(infile, output=output, xsl=xsl, path=path) - if retcode != 0: - return retcode - if output is not None: - tree = self.get_xml(infile, path) - if tree is None: - return 1 - for el in tree.xpath('//*[@fileref]'): - src = el.get('fileref') - srcs.setdefault(src, []) - orig = os.path.join(os.path.realpath(infile.absdir), src) - if orig not in srcs[src]: - srcs[src].append(orig) - else: - print('Error: No builder for ' + infile.filename) - return 1 - - if ignore is None: - ignore = self.get_option_bool('ignore') - tocopy = {} - for src in srcs: - useorig = None - for orig in srcs[src]: - if os.path.exists(orig): - if useorig is None: - useorig = orig - else: - print('Warning: Multiple sources for ' + src + '. Using first.', - file=sys.stderr) - if useorig is None: - for p in path: - tryorig = os.path.join(p, src) - if os.path.exists(tryorig): - useorig = tryorig - break - if useorig is None: - if ignore: - print('Warning: No source found for ' + src, file=sys.stderr) - else: - print('Error: No source found for ' + src, file=sys.stderr) - return 1 - if useorig is not None: - destfile = os.path.join(output, src) - destdir = os.path.dirname(destfile) - os.makedirs(destdir, exist_ok=True) - shutil.copyfile(useorig, destfile) - - if output is None: - shutil.copyfile(os.path.join(YELP_JSDIR, 'highlight.pack.js'), - os.path.join(self.intdatadir, 'highlight.pack.js')) - else: - shutil.copyfile(os.path.join(YELP_JSDIR, 'highlight.pack.js'), - os.path.join(output, self.intdatadir, 'highlight.pack.js')) - - return 0 - - -class HtmlBuilder (Builder): - name = 'html' - desc = 'Convert input files to HTML' - blurb = ('Create HTML output from the input files FILES.\n' + - 'FILES can be DocBook files, Mallard page files,\n' + - 'or directories containing Mallard page files.') - formats = ['docbook4', 'docbook5', 'mallard'] - arguments = [ - ('help', '-h', None, 'Show this help and exit'), - ('cache', '-c', 'CACHE', 'Use the existing Mallard cache CACHE'), - ('output', '-o', 'OUT', 'Output files in the directory OUT'), - ('xsl', '-x', 'CUSTOM', 'Import the custom XSLT file CUSTOM'), - ('path', '-p', 'PATH', 'Extra directories to search for files'), - ('ignore', '-i', None, 'Ignore missing media files') - ] - - def __init__(self, yelpbuild): - super().__init__(yelpbuild) - self.xhtmlbuilder = XhtmlBuilder(yelpbuild, xhtml=False) - - def main(self, args): - if self.parse_args(args) != 0: - return 1 - if 'help' in self.options: - self.print_help() - return 0 - - return self.xhtmlbuilder.main(args) - - -class EpubBuilder (Builder): - name = 'epub' - desc = 'Create an EPUB file for Mallard' - blurb = ('Create an EPUB file from the Mallard page files FILES') - formats = ['mallard'] - arguments = [ - ('help', '-h', None, 'Show this help and exit'), - ('cache', '-c', 'CACHE', 'Use the existing Mallard cache CACHE'), - ('output', '-o', 'OUT', 'Output files in the directory OUT'), - ('xsl', '-x', 'CUSTOM', 'Import the custom XSLT file CUSTOM'), - ('path', '-p', 'PATH', 'Extra directories to search for files'), - ('ignore', '-i', None, 'Ignore missing media files'), - ('nozip', None, None, 'Do not zip the output directory') - ] - - def __init__(self, yelpbuild): - super().__init__(yelpbuild) - - def main(self, args): - if self.parse_args(args) != 0: - return 1 - if 'help' in self.options: - self.print_help() - return 0 - - output = self.get_option_str('output') - nozip = self.get_option_bool('nozip') - if nozip: - if output is None: - output = 'EPUB' - if os.path.isfile(output): - print('Error: Output must be a directory', file=sys.stderr) - sys.exit(1) - epubdir = output - else: - self.create_tmpdir() - if output is None: - output = 'index.epub' - if os.path.isdir(output): - print('Error: Output must be a file', file=sys.stderr) - sys.exit(1) - epubdir = os.path.join(self.tmpdir, 'EPUB') - os.makedirs(epubdir, exist_ok=True) - os.makedirs(os.path.join(epubdir, 'OPS', 'yelp'), exist_ok=True) - - xhtmlbuilder = XhtmlBuilder(self.yelpbuild, epub=True) - retcode = xhtmlbuilder.main(self.fileargs, - cache=self.get_option_str('cache'), - output=os.path.join(epubdir, 'OPS'), - xsl=self.get_option_str('xsl'), - path=self.get_option_list('path'), - ignore=self.get_option_bool('ignore')) - if retcode != 0: - return retcode - - with open(os.path.join(epubdir, 'mimetype'), 'w') as fd: - fd.write('application/epub+zip\n') - - os.makedirs(os.path.join(epubdir, 'META-INF'), exist_ok=True) - - with open(os.path.join(epubdir, 'META-INF', 'container.xml'), 'w') as fd: - fd.write('<?xml version="1.0" encoding="UTF-8"?>') - fd.write('<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">') - fd.write('<rootfiles>') - fd.write('<rootfile full-path="OPS/opf.opf" media-type="application/oebps-package+xml"/>') - fd.write('</rootfiles>') - fd.write('</container>\n') - - path = self.get_option_list('path') - if path is None: - pathstr = ':' - else: - pathstr = ':'.join(path) - epubid = str(uuid.uuid4()) - opfdata = '' - for fname in os.listdir(os.path.join(epubdir, 'OPS', 'yelp')): - opfdata += ' OPS/yelp/' + urllib.parse.quote(fname) - retcode = subprocess.call(['xsltproc', '--xinclude', - '-o', os.path.join(epubdir, 'OPS', 'opf.opf'), - '--path', pathstr, - '--stringparam', 'opf.id', epubid, - '--stringparam', 'opf.data', opfdata, - XSL_MAL_OPF, xhtmlbuilder.cacheinfile]) - if retcode != 0: - return retcode - retcode = subprocess.call(['xsltproc', '--xinclude', - '-o', os.path.join(epubdir, 'OPS', 'ncx.ncx'), - '--path', pathstr, - '--stringparam', 'ncx.id', epubid, - XSL_MAL_NCX, xhtmlbuilder.cacheinfile]) - if retcode != 0: - return retcode - - if not nozip: - retcode = subprocess.call(['zip', '-q', '-r', os.path.realpath(output), - 'mimetype', 'META-INF', 'OPS'], - cwd=os.path.realpath(epubdir)) - if retcode != 0: - return retcode - return 0 - - -class YelpBuild: - def __init__(self): - pass - - def main(self): - if len(sys.argv) < 2: - self.print_usage() - return 1 - - builder = None - for cls in Builder.__subclasses__(): - if sys.argv[1] == cls.name: - builder = cls(self) - - if builder is None: - print('Unrecognized command: ' + sys.argv[1], file=sys.stderr) - return 1 - - return builder.main(sys.argv[2:]) - - def print_usage(self): - print('Usage: yelp-builder <COMMAND> [OPTIONS] [FILES]') - namelen = 2 - builders = [] - for cls in sorted(Builder.__subclasses__(), key=(lambda cls: cls.name or '')): - namelen = max(namelen, len(cls.name) + 2) - builders.append(cls) - - print('\nCommands:') - for cls in builders: - print(' ' + cls.name.ljust(namelen) + cls.desc) - - -if __name__ == '__main__': - try: - sys.exit(YelpBuild().main()) - except KeyboardInterrupt: - sys.exit(1) diff --git a/tools/yelp-check.in b/tools/yelp-check.in index d46e004..2578800 100755 --- a/tools/yelp-check.in +++ b/tools/yelp-check.in @@ -1,8 +1,7 @@ -#!/bin/sh -# -*- indent-tabs-mode: nil -*- +#!/bin/python3 # # yelp-check -# Copyright (C) 2011-2015 Shaun McCance <shaunm@gnome.org> +# Copyright (C) 2011-2020 Shaun McCance <shaunm@gnome.org> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -18,1214 +17,1231 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -xsl_mal_link='@XSL_MAL_LINK@' -xsl_mal_license='@DATADIR@/xslt/mal-license.xsl' -xsl_mal_status='@DATADIR@/xslt/mal-status.xsl' -xsl_mal_rng='@DATADIR@/xslt/mal-rng.xsl' -xsl_comments='@DATADIR@/xslt/comments.xsl' -xsl_media='@DATADIR@/xslt/media.xsl' - -yelp_check_retval="0" - -urlencode () { - # We usually don't want to urlencode slashes, because we're - # usually converting file paths to URIs. But we do want to - # urlencode slases for names of RNG files in validate_page. - if [ "x$2" = "x/" ]; then - urlencode_slash='' - else - urlencode_slash='\/' - fi - echo "$1" | LC_ALL=C awk ' -BEGIN { - for (i = 1; i <= 255; i++) chars[sprintf("%c", i)] = i; -} -{ - ret = ""; - for (i = 1; i <= length($0); i++) { - c = substr($0, i, 1); - if (c ~ /['$urlencode_slash'a-zA-Z0-9._-]/) - ret = ret c; - else - ret = ret sprintf("%%%X%X", int(chars[c] / 16), chars[c] % 16); - } - print ret; -}' -} - -urldecode () { - echo "$1" | LC_ALL=C awk ' -BEGIN { - for(i = 0; i < 10; i++) hex[i] = i; - hex["A"] = hex["a"] = 10; - hex["B"] = hex["b"] = 11; - hex["C"] = hex["c"] = 12; - hex["D"] = hex["d"] = 13; - hex["E"] = hex["e"] = 14; - hex["F"] = hex["f"] = 15; -} -{ - ret = ""; - for (i = 1; i <= length($0); i++) { - c = substr($0, i, 1); - if (c == "+") { - ret = ret " "; - } - else if (c == "%") { - c = sprintf("%c", hex[substr($0, i + 1, 1)] * 16 + hex[substr($0, i + 2, 1)]); - ret = ret c; - i += 2; - } - else { - ret = ret c; +import configparser +import lxml.etree +import os +import sys +import urllib.request +import shutil +import subprocess +import tempfile +import textwrap + + +DATADIR = '@DATADIR@' + +XML_ID = '{http://www.w3.org/XML/1998/namespace}id' +NAMESPACES = { + 'mal': 'http://projectmallard.org/1.0/', + 'cache': 'http://projectmallard.org/cache/1.0/', + 'db': 'http://docbook.org/ns/docbook', + 'e': 'http://projectmallard.org/experimental/', + 'ui': 'http://projectmallard.org/ui/1.0/', + 'uix': 'http://projectmallard.org/experimental/ui/', + 'xlink': 'http://www.w3.org/1999/xlink' } - } - print ret; -}' -} - -docbook_version=' -<xsl:stylesheet - xmlns:xsl="http://www.w3.org/1999/XSL/Transform" - xmlns:db="http://docbook.org/ns/docbook" - version="1.0"> -<xsl:output method="text"/> -<xsl:template match="/"> - <xsl:choose> - <xsl:when test="/db:*/@version"> - <xsl:value-of select="/db:*/@version"/> - </xsl:when> - <xsl:when test="/db:*"> - <xsl:text>5.0</xsl:text> - </xsl:when> - <xsl:otherwise> - <xsl:text>4</xsl:text> - </xsl:otherwise> - </xsl:choose> -</xsl:template> -</xsl:stylesheet> -' - -mallard_style=' -<xsl:stylesheet - xmlns:xsl="http://www.w3.org/1999/XSL/Transform" - xmlns:mal="http://projectmallard.org/1.0/" - version="1.0"> -<xsl:output method="text"/> -<xsl:template match="/mal:page"><xsl:value-of select="@style"/></xsl:template> -</xsl:stylesheet>' - -yelp_usage() { - ( - echo "Usage: yelp-check <COMMAND> [OPTIONS] [FILES]" - echo "" - echo "Commands:" - echo " comments Print the editorial comments in a document" - echo " hrefs Find broken external links in a document" - echo " ids Find Mallard page IDs that do not match file names" - echo " license Report the license of Mallard pages" - echo " links Find broken xref or linkend links in a document" - echo " media Find broken references to media files" - echo " orphans Find orphaned pages in a Mallard document" - echo " status Report the status of Mallard pages" - echo " style Report the style attribute of Mallard pages" - echo " validate Validate files against a DTD or RNG" - ) 1>&2 -} -yelp_usage_hrefs () { - ( - echo "Usage: yelp-check hrefs <FILES>" - echo "" - echo " Find broken href links in FILES in a Mallard document, or" - echo " broken ulink or XLink links in FILES in a DocBook document." - echo "" - echo "Options:" - echo " -s Treat pages as belonging to a Mallard site" - ) 1>&2 -} -yelp_usage_ids () { - ( - echo "Usage: yelp-check ids <FILES>" - echo "" - echo " Find pages in a Mallard document whose page ID does not match" - echo " the base file name of the page file." - echo "" - echo "Options:" - echo " -s Treat pages as belonging to a Mallard site" - ) 1>&2 -} -yelp_usage_links () { - ( - echo "Usage: yelp-check links <FILES>" - echo "" - echo " Find broken xref links in FILES in a Mallard document," - echo " or broken linkend links in FILES in a DocBook document." - echo "" - echo "Options:" - echo " -c CACHE Use the existing Mallard cache CACHE" - echo " -s Treat pages as belonging to a Mallard site" - echo " -i Ignore xrefs where href is present" - ) 1>&2 -} -yelp_usage_media () { - ( - echo "Usage: yelp-check media <FILES>" - echo "" - echo " Find broken references to media files. In Mallard, this" - echo " checks media and thumb elements. In DocBook, this checks" - echo " audiodata, imagedata, and videodata elements." - echo "" - echo "Options:" - echo " -s Treat pages as belonging to a Mallard site" - ) 1>&2 -} -yelp_usage_orphans () { - ( - echo "Usage: yelp-check orphans <FILES>" - echo "" - echo " Locate orphaned pages among FILES in a Mallard document." - echo " Orphaned pages are any pages that cannot be reached by" - echo " topic links alone from the index page." - echo "" - echo "Options:" - echo " -c CACHE Use the existing Mallard cache CACHE" - echo " -s Treat pages as belonging to a Mallard site" - ) 1>&2 -} -yelp_usage_comments () { - ( - echo "Usage: yelp-check comments <FILES>" - echo "" - echo " Print the editorial comments in the files FILES, using the" - echo " comment element in Mallard and the remark element in DocBook." - echo "" - echo "Options:" - echo " -s Treat pages as belonging to a Mallard site" - ) 1>&2 -} -yelp_usage_license () { - ( - echo "Usage: yelp-check license <FILES>" - echo "" - echo " Report the license of the Mallard page files FILES. Each" - echo " matching page is reporting along with its license, reported" - echo " based on the href attribute of the license element. Common" - echo " licenses use a shortened identifier. Pages with multiple" - echo " licenses have the identifiers separated by spaces. Pages" - echo " with no license element report 'none'. Licenses with no" - echo " href attribute are reported as 'unknown'." - echo "" - echo "Options:" - echo " -s Treat pages as belonging to a Mallard site" - echo " --only LICENSES Only show pages whose license is in LICENSES" - echo " --except LICENSES Exclude pages whose license is in LICENSES" - echo " --totals Show total counts for each license" - echo "LICENSES may be a comma- and/or space-separated list." - ) 1>&2 -} -yelp_usage_style () { - ( - echo "Usage: yelp-check style <FILES>" - echo "" - echo " Report the page style attribute of the Mallard page files" - echo " FILES. Each matching page is reporting along with its status." - echo "" - echo "Options:" - echo " -s Treat pages as belonging to a Mallard site" - echo " --only STYLES Only show pages whose style is in STATUSES" - echo " --except STYLES Exclude pages whose style is in STATUSES" - echo " --totals Show total counts for each style" - echo "STYLES may be comma- and/or space-separated lists." - ) 1>&2 -} -yelp_usage_status () { - ( - echo "Usage: yelp-check status <FILES>" - echo "" - echo " Report the status of the Mallard page files FILES. Each" - echo " matching page is reporting along with its status." - echo "" - echo "Options:" - echo " -s Treat pages as belonging to a Mallard site" - echo " --version VER Select revisions with the version attribute VER" - echo " --docversion VER Select revisions with the docversion attribute VER" - echo " --pkgversion VER Select revisions with the pkgversion attribute VER" - echo " --older DATE Only show pages older than DATE" - echo " --newer DATE Only show pages newer than DATE" - echo " --only STATUSES Only show pages whose status is in STATUSES" - echo " --except STATUSES Exclude pages whose status is in STATUSES" - echo " --totals Show total counts for each status" - echo "VER and STATUSES may be comma- and/or space-separated lists." - ) 1>&2 -} -yelp_usage_validate () { - ( - echo "Usage: yelp-check validate <FILES>" - echo "" - echo " Validate FILES against the appropriate DTD or RNG." - echo " For Mallard pages, perform automatic RNG merging" - echo " based on the version attribute." - echo "" - echo "Options:" - echo " -s Treat pages as belonging to a Mallard site" - echo " --strict Disallow unknown namespaces" - echo " --allow NS Explicitly allow namespace NS in strict mode" - echo " --jing Use jing instead of xmllint for RNG validation" - ) 1>&2 -} - -if [ $# = 0 ]; then - yelp_usage - exit 1 -fi - -yelp_check_iter_site () { - for dir in "$1"/*; do - if [ -d "$dir" ]; then - if [ $(basename "$dir") != "__pintail__" ]; then - yelp_check_iter_site "$dir" - fi - fi - done - for page in "$1"/*.page; do - if [ -e "$page" ]; then - $check_page "$page" || yelp_check_retval="$?" - fi - done -} - -yelp_check_iter_args () { - for arg in "$@"; do - ext=$(echo "$arg" | sed -e 's/.*\.//') - if [ -d "$arg" ]; then - if [ "x$check_site" = "x1" ]; then - yelp_check_iter_site "$arg" - else - for page in "${arg%%/}"/*.page; do - if [ -e "$page" ]; then - $check_page "$page" - fi - done - fi - elif [ "x$ext" = "xpage" -o "x$ext" = "xstub" -o "x$ext" = "xcache" ]; then - $check_page "$arg" || yelp_check_retval="$?" - elif [ "x$check_db" != "x" -a \( "x$ext" = "xdocbook" -o "x$ext" = "xxml" \) ]; then - $check_db "$arg" || yelp_check_retval="$?" - else - echo "Unrecognized page $arg" 1>&2 - exit 1 - fi - done - return $yelp_check_retval -} - -yelp_hrefs_page () { - base=$(dirname "$1") - if [ "x$check_site" = "x1" ]; then - sdir=$(cd $(dirname "$1") && pwd) - sdir=${sdir##${check_site_root}}/ - fi - ( - echo '<xsl:stylesheet' - echo ' xmlns:xsl="http://www.w3.org/1999/XSL/Transform"' - echo ' xmlns:mal="http://projectmallard.org/1.0/"' - echo ' xmlns:db="http://docbook.org/ns/docbook"' - echo ' xmlns:xlink="www.w3.org/1999/xlink"' - echo ' version="1.0">' - echo '<xsl:output method="text"/>' - echo '<xsl:template match="/mal:page">' - echo ' <xsl:for-each select="//*[@href]">' - echo ' <xsl:if test="not(starts-with(@href, '\''mailto:'\''))">' - echo ' <xsl:value-of select="/mal:page/@id"/>' - echo ' <xsl:text> </xsl:text>' - echo ' <xsl:value-of select="@href"/>' - echo ' <xsl:text>
</xsl:text>' - echo ' </xsl:if>' - echo ' </xsl:for-each>' - echo '</xsl:template>' - echo '<xsl:template match="/*[namespace-uri(.) = '\'\''] | /db:*">' - echo ' <xsl:for-each select="//ulink/@url | //*/xlink:href">' - echo ' <xsl:if test="not(starts-with(string(.), '\''mailto:'\''))">' - echo ' <xsl:value-of select="(ancestor-or-self::*/@id | ancestor-or-self::*/@xml:id)[last()]"/>' - echo ' <xsl:text> </xsl:text>' - echo ' <xsl:value-of select="string(.)"/>' - echo ' <xsl:text>
</xsl:text>' - echo ' </xsl:if>' - echo ' </xsl:for-each>' - echo '</xsl:template>' - echo '</xsl:stylesheet>' - ) | xsltproc --xinclude - "$1" | sort | uniq | \ - while read id url; do - colon=`echo "$url" | cut -d: -f1` - if [ "x$colon" = "x$url" ]; then - test -f "$base/"$(urldecode "$url") || echo "$sdir$id: $url" - else - status=$(cat "$check_href_cache" | while read trystatus tryurl; do - if [ "x$tryurl" = "x$url" ]; then echo "$trystatus"; break; fi - done) - if [ "x$status" = "x1" ]; then - true - elif [ "x$status" = "x0" ]; then - echo "$sdir$id: $url" - else - (curl -s -I -L "$url" | \ - grep '^HTTP/' | tail -n 1 | head -n 1 | \ - grep -q 'HTTP/.\.. 200 .*') \ - && (echo "1 $url" >> "$check_href_cache") \ - || (echo "0 $url" >> "$check_href_cache"; echo "$sdir$id: $url") - fi - fi - done -} - -yelp_hrefs () { - if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then - yelp_usage_hrefs - exit 1 - fi - while [ "$#" != "0" ]; do - case "$1" in - "-s") - check_site="1" - check_site_root=$(pwd) - shift - ;; - *) - break - ;; - esac - done - if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then - yelp_usage_hrefs - exit 1 - fi - check_out_file=`mktemp "${TMPDIR:-/tmp}"/yelp-XXXXXXXX` - check_href_cache=`mktemp "${TMPDIR:-/tmp}"/yelp-XXXXXXXX` - echo > "$check_href_cache" - check_db=yelp_hrefs_page - check_page=yelp_hrefs_page - yelp_check_iter_args "$@" > "$check_out_file" - yelp_check_retval=$(wc -l < "$check_out_file") - if test "x$yelp_check_retval" != "x0"; then - yelp_check_retval=1 - fi - cat "$check_out_file" - rm "$check_out_file" - rm "$check_href_cache" - exit $yelp_check_retval -} - -yelp_ids_page () { - pageid=$( - ( - echo '<xsl:stylesheet' - echo ' xmlns:xsl="http://www.w3.org/1999/XSL/Transform"' - echo ' xmlns:mal="http://projectmallard.org/1.0/"' - echo ' version="1.0">' - echo '<xsl:output method="text"/>' - echo '<xsl:template match="/mal:page">' - echo '<xsl:value-of select="@id"/>' - echo '</xsl:template>' - echo '</xsl:stylesheet>' - ) | xsltproc --xinclude - "$1") - dname=$(dirname "$1") - bname=$(basename "$1") - if [ "x$pageid.page" != "x$bname" ]; then - if [ "x$check_site" = "x1" ]; then - sdir=$(cd $(dirname "$1") && pwd) - sdir=${sdir##${check_site_root}}/ - echo $sdir$(basename "$1")": $pageid" - elif [ "x$dname" = 'x.' ]; then - echo "$bname: $pageid" - else - echo "$1: $pageid" - fi - yelp_check_retval=1 - fi -} - -yelp_ids () { - if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then - yelp_usage_ids - exit 1 - fi - while [ "$#" != "0" ]; do - case "$1" in - "-s") - check_site="1" - check_site_root=$(pwd) - shift - ;; - *) - break - ;; - esac - done - if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then - yelp_usage_ids - exit 1 - fi - check_db= - check_page=yelp_ids_page - yelp_check_iter_args "$@" - exit $yelp_check_retval -} - -yelp_links_db () { - ( - echo '<xsl:stylesheet' - echo ' xmlns:xsl="http://www.w3.org/1999/XSL/Transform"' - echo ' xmlns:db="http://docbook.org/ns/docbook"' - echo ' xmlns:exsl="http://exslt.org/common"' - echo ' extension-element-prefixes="exsl"' - echo ' version="1.0">' - echo '<xsl:output method="text"/>' - echo '<xsl:key name="idkey" match="*[@id or @xml:id]" use="@id | @xml:id"/>' - echo '<xsl:template match="/">' - echo ' <xsl:for-each select="//*[@linkend]">' - echo ' <xsl:if test="not(key('"'idkey'"', @linkend))">' - echo ' <xsl:value-of select="(ancestor-or-self::*/@id | ancestor-or-self::*/@xml:id)[last()]"/>' - echo ' <xsl:text>: </xsl:text>' - echo ' <xsl:value-of select="@linkend"/>' - echo ' <xsl:text>
</xsl:text>' - echo ' </xsl:if>' - echo ' </xsl:for-each>' - echo '</xsl:template>' - echo '</xsl:stylesheet>' - ) | xsltproc --xinclude - "$1" -} - -yelp_links_page () { - if [ "x$check_site" = "x1" ]; then - sdir=$(cd $(dirname "$1") && pwd) - sdir=${sdir##${check_site_root}}/ - fi - ( - echo '<xsl:stylesheet' - echo ' xmlns:xsl="http://www.w3.org/1999/XSL/Transform"' - echo ' xmlns:mal="http://projectmallard.org/1.0/"' - echo ' xmlns:site="http://projectmallard.org/site/1.0/"' - echo ' xmlns:exsl="http://exslt.org/common"' - echo ' extension-element-prefixes="exsl"' - echo ' version="1.0">' - xsl='file://'`urlencode "$xsl_mal_link"` - echo '<xsl:import href="'"$xsl"'"/>' - check_cache_url='file://'`urlencode "$check_cache_file"` - echo '<xsl:param name="mal.cache.file" select="'"'$check_cache_url'"'"/>' - echo '<xsl:variable name="site.dir" select="'"'$sdir'"'"/>' - echo '<xsl:output method="text"/>' - echo '<xsl:key name="__site.cache.key" match="mal:page | mal:section"' - echo ' use="concat(ancestor-or-self::mal:page/@site:dir, @id)"/>' - echo '<xsl:template match="/mal:page">' - echo ' <xsl:variable name="page" select="@id"/>' - if [ "x$check_links_ignore" = "x1" ]; then - echo ' <xsl:for-each select="//*[@xref][not(@href)]">' - else - echo ' <xsl:for-each select="//*[@xref]">' - fi - echo ' <xsl:variable name="xref" select="@xref"/>' - echo ' <xsl:variable name="linkid">' - echo ' <xsl:call-template name="mal.link.xref.linkid"/>' - echo ' </xsl:variable>' - echo ' <xsl:for-each select="$mal.cache">' - echo ' <xsl:if test="count(key('"'mal.cache.key'"', $linkid) | ' - echo ' key('"'__site.cache.key'"', $linkid)) = 0">' - echo ' <xsl:value-of select="$site.dir"/>' - echo ' <xsl:value-of select="$page"/>' - echo ' <xsl:text>: </xsl:text>' - echo ' <xsl:value-of select="$xref"/>' - echo ' <xsl:text>
</xsl:text>' - echo ' </xsl:if>' - echo ' </xsl:for-each>' - echo ' </xsl:for-each>' - echo '</xsl:template>' - echo '</xsl:stylesheet>' - ) | xsltproc --xinclude - "$1" -} - -yelp_links () { - if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then - yelp_usage_links - exit 1 - fi - while [ "$#" != "0" ]; do - case "$1" in - "-c") - shift - check_cache_file="$1" - shift - ;; - "-s") - check_site="1" - check_site_root=$(pwd) - shift - ;; - "-i") - shift - check_links_ignore="1" - ;; - *) - break - ;; - esac - done - if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then - yelp_usage_links - exit 1 - fi - if [ "x$check_cache_file" != "x" ]; then - check_cache_dir=$(dirname "$check_cache_file") - check_cache_dir=$(cd "$check_cache_dir" && pwd) - check_cache_file="$check_cache_dir/"$(basename "$check_cache_file") - elif [ -d "$1" ]; then - check_cache_file=1 - else - case "$1" in - *.page | *.stub | *.cache) - check_cache_file=1 - ;; - *) - break - ;; - esac - fi - if [ "x$check_cache_file" = "x1" ]; then - check_cache_file_is_tmp="yes" - check_cache_file=$(mktemp "${TMPDIR:-/tmp}"/yelp-XXXXXXXX) - if [ "x$check_site" = "x1" ]; then - yelp-build cache -s -o "$check_cache_file" "$@" - else - yelp-build cache -o "$check_cache_file" "$@" - fi - fi - - check_out_file=`mktemp "${TMPDIR:-/tmp}"/yelp-XXXXXXXX` - check_db=yelp_links_db - check_page=yelp_links_page - yelp_check_iter_args "$@" > "$check_out_file" - yelp_check_retval=$(wc -l < "$check_out_file") - if test "x$yelp_check_retval" != "x0"; then - yelp_check_retval=1 - fi - cat "$check_out_file" - rm "$check_out_file" - if [ "x$check_cache_file_is_tmp" = "xyes" ]; then - rm "$check_cache_file" - fi - exit $yelp_check_retval -} - -yelp_media_page () { - ext=$(echo "$1" | sed -e 's/.*\.//') - bname=$(basename "$1" ".$ext") - dname=$(dirname "$1") - if [ "x$dname" = "x." ]; then - dname="" - else - dname="$dname"/ - fi; - if [ "x$check_site" = "x1" ]; then - sdir=$(cd "$dname" && pwd) - sdir=${sdir##${check_site_root}}/ - else - sdir="$dname" - fi - xsltproc "$xsl_media" "$1" | \ - sort | uniq | \ - while read line; do - src=$(urldecode "$line") - if [ ! -f "$dname$src" ]; then - echo "$sdir$bname: $line" - fi - done -} - -yelp_media () { - if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then - yelp_usage_media - exit 1 - fi - while [ "$#" != "0" ]; do - case "$1" in - "-s") - check_site="1" - check_site_root=$(pwd) - shift - ;; - *) - break - ;; - esac - done - if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then - yelp_usage_media - exit 1 - fi - check_out_file=`mktemp "${TMPDIR:-/tmp}"/yelp-XXXXXXXX` - check_db=yelp_media_page - check_page=yelp_media_page - yelp_check_iter_args "$@" > "$check_out_file" - yelp_check_retval=$(wc -l < "$check_out_file") - if test "x$yelp_check_retval" != "x0"; then - yelp_check_retval=1 - fi - cat "$check_out_file" - rm "$check_out_file" - exit $yelp_check_retval -} - -yelp_orphans_page () { - if [ "x$check_site" = "x1" ]; then - sdir=$(cd $(dirname "$1") && pwd) - sdir=${sdir##${check_site_root}}/ - fi - ( - echo '<xsl:stylesheet' - echo ' xmlns:xsl="http://www.w3.org/1999/XSL/Transform"' - echo ' xmlns:mal="http://projectmallard.org/1.0/"' - echo ' xmlns:exsl="http://exslt.org/common"' - echo ' extension-element-prefixes="exsl"' - echo ' version="1.0">' - xsl='file://'`urlencode "$xsl_mal_link"` - echo '<xsl:import href="'"$xsl"'"/>' - check_cache_url='file://'`urlencode "$check_cache_file"` - echo '<xsl:param name="mal.cache.file" select="'"'$check_cache_url'"'"/>' - echo '<xsl:variable name="site.dir" select="'"'$sdir'"'"/>' - echo '<xsl:output method="text"/>' - echo '<xsl:template match="/mal:page">' - echo ' <xsl:variable name="trails">' - echo ' <xsl:call-template name="mal.link.linktrails"/>' - echo ' </xsl:variable>' - echo ' <xsl:if test="@id != '"'index'"' and count(exsl:node-set($trails)/*) = 0">' - echo ' <xsl:value-of select="$site.dir"/>' - echo ' <xsl:value-of select="@id"/>' - echo ' <xsl:text>
</xsl:text>' - echo ' </xsl:if>' - echo '</xsl:template>' - echo '</xsl:stylesheet>' - ) | xsltproc --xinclude - "$1" -} - -yelp_orphans () { - if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then - yelp_usage_orphans - exit 1 - fi - while [ "$#" != "0" ]; do - case "$1" in - "-s") - check_site="1" - check_site_root=$(pwd) - shift - ;; - "-c") - shift - check_cache_file="$1" - shift - ;; - *) - break - ;; - esac - done - if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then - yelp_usage_orphans - exit 1 - fi - if [ "x$check_cache_file" != "x" ]; then - check_cache_dir=$(dirname "$check_cache_file") - check_cache_dir=$(cd "$check_cache_dir" && pwd) - check_cache_file="$check_cache_dir/"$(basename "$check_cache_file") - elif [ -d "$1" ]; then - check_cache_file=1 - else - case "$1" in - *.page | *.stub | *.cache) - check_cache_file=1 - ;; - *) - break - ;; - esac - fi - if [ "x$check_cache_file" = "x1" ]; then - check_cache_file_is_tmp="yes" - check_cache_file=$(mktemp "${TMPDIR:-/tmp}"/yelp-XXXXXXXX) - if [ "x$check_site" = "x1" ]; then - yelp-build cache -s -o "$check_cache_file" "$@" - else - yelp-build cache -o "$check_cache_file" "$@" - fi - fi - - check_out_file=`mktemp "${TMPDIR:-/tmp}"/yelp-XXXXXXXX` - check_db= - check_page=yelp_orphans_page - yelp_check_iter_args "$@" > "$check_out_file" - yelp_check_retval=$(wc -l < "$check_out_file") - if test "x$yelp_check_retval" != "x0"; then - yelp_check_retval=1 - fi - cat "$check_out_file" - rm "$check_out_file" - if [ "x$check_cache_file_is_tmp" = "xyes" ]; then - rm "$check_cache_file" - fi - exit $yelp_check_retval -} - -yelp_comments_page () { - ext=$(echo "$1" | sed -e 's/.*\.//') - bname=$(basename "$1" ".$ext") - if [ "x$check_site" = "x1" ]; then - sdir=$(cd $(dirname "$1") && pwd) - sdir=${sdir##${check_site_root}}/ - fi - xsltproc --stringparam basename "$bname" \ - --stringparam site.dir "$sdir" \ - "$xsl_comments" "$1" -} - -yelp_comments () { - if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then - yelp_usage_comments - exit 1 - fi - while [ "$#" != "0" ]; do - case "$1" in - "-s") - check_site="1" - check_site_root=$(pwd) - shift - ;; - *) - break - ;; - esac - done - if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then - yelp_usage_comments - exit 1 - fi - check_db=yelp_comments_page - check_page=yelp_comments_page - yelp_check_iter_args "$@" - exit $yelp_check_retval -} - -yelp_license_page () { - if [ "x$check_site" = "x1" ]; then - sdir=$(cd $(dirname "$1") && pwd) - sdir=${sdir##${check_site_root}}/ - fi - xsltproc --xinclude \ - --stringparam only "$check_only" \ - --stringparam except "$check_except" \ - --stringparam totals "$check_totals" \ - --stringparam site.dir "$sdir" \ - "$xsl_mal_license" "$1" -} - -yelp_license () { - if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then - yelp_usage_license - exit 1 - fi - while [ "$#" != "0" ]; do - case "$1" in - "-s") - check_site="1" - check_site_root=$(pwd) - shift - ;; - "--only") - shift - check_only="$1" - shift - ;; - "--except") - shift - check_except="$1" - shift - ;; - "--totals") - check_totals="1" - shift - ;; - *) - break - ;; - esac - done - if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then - yelp_usage_license - exit 1 - fi - check_db= - check_page=yelp_license_page - if [ "x$check_totals" = "x1" ]; then - yelp_check_iter_args "$@" | \ - sort | uniq -c | sed -e 's/^ *//' | awk '{print $2 ": " $1}' - else - yelp_check_iter_args "$@" | sort - fi -} - -yelp_style_page () { - ext=$(echo "$1" | sed -e 's/.*\.//') - bname=$(basename "$1" ".$ext") - if [ "x$check_site" = "x1" ]; then - sdir=$(cd $(dirname "$1") && pwd) - sdir=${sdir##${check_site_root}}/ - fi - style=$(echo "$mallard_style" | xsltproc - "$1") - output=1 - if [ "x$check_only_defined" = "x1" ]; then - output=0 - if [ "x$check_only" = "x" ]; then - # We treat an empty --only '' as requesting pages with no style - if [ "x$style" = "x" ]; then output=1; fi - else - for pstyle in "$style"; do - for sstyle in $(echo "$check_only" | sed -e 's/,/ /g'); do - if [ "$pstyle" = "$sstyle" ]; then - output=1 + +def _stringify(el): + ret = el.text or '' + for ch in el: + ret = ret + _stringify(ch) + if el.tail is not None: + ret = ret + el.tail + return ret + +def get_format(node): + ns = lxml.etree.QName(node).namespace + if ns in (NAMESPACES['mal'], NAMESPACES['cache']): + return 'mallard' + elif ns == NAMESPACES['db']: + return 'docbook5' + elif ns is None: + # For now, just assume no ns means docbook4 + return 'docbook4' + else: + return None + +class InputFile: + def __init__(self, filepath, filename, sitedir=None): + self.filepath = filepath + self.filename = filename + self.absfile = os.path.join(filepath, filename) + self.absdir = os.path.dirname(self.absfile) + self.sitedir = sitedir or '' + self.sitefilename = self.sitedir + self.filename + + +class Checker: + name = None + desc = None + blurb = None + formats = [] + arguments = [] + postblurb = None + xinclude = True + config = None + + def __init__(self, yelpcheck): + self.yelpcheck = yelpcheck + self.options = {} + self.fileargs = [] + self.tmpdir = None + + def __del__(self): + if self.tmpdir is not None: + shutil.rmtree(self.tmpdir) + self.tmpdir = None + + def parse_args(self, args): + while len(args) > 0: + argdef = None + if args[0].startswith('--'): + for arg_ in self.arguments: + if args[0] == '--' + arg_[0]: + argdef = arg_ + break + if argdef is None: + self.print_help() + return 1 + elif args[0].startswith('-'): + for arg_ in self.arguments: + if args[0] == arg_[1]: + argdef = arg_ + break + if argdef is None: + self.print_help() + return 1 + if argdef is not None: + takesarg = (argdef[2] is not None) + if takesarg: + if len(args) < 2: + self.print_help() + return 1 + self.options.setdefault(argdef[0], []) + self.options[argdef[0]].append(args[1]) + args = args[2:] + else: + self.options[argdef[0]] = True + args = args[1:] + else: + self.fileargs.append(args[0]) + args = args[1:] + cfgfile = None + if len(self.fileargs) > 0: + cfgfile = os.path.join(os.path.dirname(self.fileargs[0]), '.yelp-tools.cfg') + if not os.path.exists(cfgfile): + cfgfile = None + if cfgfile is None: + cfgfile = os.path.join(os.getcwd(), '.yelp-tools.cfg') + if os.path.exists(cfgfile): + self.config = configparser.ConfigParser() + try: + self.config.read(cfgfile) + except Exception as e: + print(e, file=sys.stderr) + sys.exit(1) + return 0 + + def get_option_bool(self, arg): + if arg in self.options: + return self.options[arg] == True + if self.config is not None: + val = self.config.get('check:' + self.name, arg, fallback=None) + if val is not None: + return (val == 'true') + val = self.config.get('check', arg, fallback=None) + if val is not None: + return (val == 'true') + val = self.config.get('default', arg, fallback=None) + if val is not None: + return (val == 'true') + return False + + def get_option_str(self, arg): + if arg in self.options: + if isinstance(self.options[arg], list): + return self.options[arg][-1] + if self.config is not None: + val = self.config.get('check:' + self.name, arg, fallback=None) + if val is not None: + return val + val = self.config.get('check', arg, fallback=None) + if val is not None: + return val + val = self.config.get('default', arg, fallback=None) + if val is not None: + return val + return None + + def get_option_list(self, arg): + if arg in self.options: + if isinstance(self.options[arg], list): + ret = [] + for opt in self.options[arg]: + ret.extend(opt.replace(',', ' ').split()) + return ret + if self.config is not None: + val = self.config.get('check:' + self.name, arg, fallback=None) + if val is not None: + return val.replace(',', ' ').split() + val = self.config.get('check', arg, fallback=None) + if val is not None: + return val.replace(',', ' ').split() + val = self.config.get('default', arg, fallback=None) + if val is not None: + return val.replace(',', ' ').split() + return None + + def iter_files(self, sitedir=None): + issite = self.get_option_bool('site') + if len(self.fileargs) == 0: + self.fileargs.append('.') + for filearg in self.fileargs: + if os.path.isdir(filearg): + if issite: + for infile in self.iter_site(filearg, '/'): + yield infile + else: + for fname in os.listdir(filearg): + if fname.endswith('.page'): + yield InputFile(filearg, fname) + else: + if issite: + # FIXME: should do some normalization here, I guess. + # It's hard to get this perfect without a defined start dir + yield InputFile(os.getcwd(), filearg, '/' + os.path.dirname(filearg)) + else: + yield InputFile(os.getcwd(), filearg) + + def iter_site(self, filepath, sitedir): + for fname in os.listdir(filepath): + newpath = os.path.join(filepath, fname) + if os.path.isdir(newpath): + # FIXME https://github.com/projectmallard/pintail/issues/36 + if fname == '__pintail__': + continue + for infile in self.iter_site(newpath, sitedir + fname + '/'): + yield infile + elif fname.endswith('.page'): + yield InputFile(filepath, fname, sitedir) + + def get_xml(self, xmlfile): + # FIXME: we can cache these if we add a feature to run multiple + # checkers at once + tree = lxml.etree.parse(xmlfile.absfile) + if self.xinclude: + lxml.etree.XInclude()(tree.getroot()) + return tree + + def create_tmpdir(self): + if self.tmpdir is None: + self.tmpdir = tempfile.mkdtemp() + + def print_help(self): + print('Usage: yelp-check ' + self.name + ' [OPTIONS] [FILES]') + print('Formats: ' + ' '.join(self.formats) + '\n') + #FIXME: prettify names of formats + if self.blurb is not None: + print(self.blurb + '\n') + print('Options:') + maxarglen = 2 + args = [] + for arg in self.arguments: + argkey = '--' + arg[0] + if arg[1] is not None: + argkey = arg[1] + ', ' + argkey + if arg[2] is not None: + argkey = argkey + ' ' + arg[2] + args.append((argkey, arg[3])) + for arg in args: + maxarglen = max(maxarglen, len(arg[0]) + 1) + for arg in args: + print(' ' + (arg[0]).ljust(maxarglen) + ' ' + arg[1]) + if self.postblurb is not None: + print(self.postblurb) + + def main(self, args): + pass + + +class HrefsChecker (Checker): + name = 'hrefs' + desc = 'Find broken external links in a document' + blurb = ('Find broken href links in FILES in a Mallard document, or\n' + + 'broken ulink or XLink links in FILES in a DocBook document.') + formats = ['docbook4', 'docbook5', 'mallard'] + arguments = [ + ('help', '-h', None, 'Show this help and exit'), + ('site', '-s', None, 'Treat pages as belonging to a Mallard site'), + ('allow', None, 'URL', 'Allow URL or list of URLs without checking') + ] + postblurb = 'URL may be a comma- and/or space-separated list, or specified\nmultiple times.' + + def main(self, args): + if self.parse_args(args) != 0: + return 1 + if 'help' in self.options: + self.print_help() + return 0 + + # safelisting URLs that we use as identifiers + hrefs = { + 'http://creativecommons.org/licenses/by-sa/3.0/': True, + 'https://creativecommons.org/licenses/by-sa/3.0/': True, + 'http://creativecommons.org/licenses/by-sa/3.0/us/': True, + 'https://creativecommons.org/licenses/by-sa/3.0/us/': True + } + allow = self.get_option_list('allow') + if allow is not None: + for url in allow: + hrefs[url] = True + retcode = 0 + + for infile in self.iter_files(): + xml = self.get_xml(infile) + for el in xml.xpath('//*[@href | @xlink:href | self::ulink/@url]', + namespaces=NAMESPACES): + href = el.get('href', None) + if href is None: + href = el.get('{www.w3.org/1999/xlink}href') + if href is None: + href = el.get('url') + if href is None: + continue + if href.startswith('mailto:'): + continue + if href not in hrefs: + try: + req = urllib.request.urlopen(href) + hrefs[href] = (req.status == 200) + except Exception as e: + hrefs[href] = False + if not hrefs[href]: + retcode = 1 + print(infile.sitefilename + ': ' + href) + + return retcode + + +class IdsChecker (Checker): + name = 'ids' + desc = 'Find Mallard page IDs that do not match file names' + blurb = ('Find pages in a Mallard document whose page ID does not match\n' + + 'the base file name of the page file.') + formats = ['mallard'] + arguments = [ + ('help', '-h', None, 'Show this help and exit'), + ('site', '-s', None, 'Treat pages as belonging to a Mallard site') + ] + + def main(self, args): + if self.parse_args(args) != 0: + return 1 + if 'help' in self.options: + self.print_help() + return 0 + + retcode = 0 + + for infile in self.iter_files(): + xml = self.get_xml(infile) + isok = False + pageid = None + if infile.filename.endswith('.page'): + try: + pageid = xml.getroot().get('id') + isok = (pageid == os.path.basename(infile.filename)[:-5]) + except: + isok = False + if not isok: + retcode = 1 + print(infile.sitefilename + ': ' + (pageid or '')) + + return retcode + + +class LinksChecker (Checker): + name = 'links' + desc = 'Find broken xref or linkend links in a document' + blurb = ('Find broken xref links in FILES in a Mallard document,\n' + + 'or broken linkend links in FILES in a DocBook document.') + formats = ['docbook4', 'docbook5', 'mallard'] + arguments = [ + ('help', '-h', None, 'Show this help and exit'), + ('site', '-s', None, 'Treat pages as belonging to a Mallard site'), + ('cache', '-c', 'CACHE', 'Use the existing Mallard cache CACHE'), + ('ignore', '-i', None, 'Ignore xrefs where href is present') + ] + + def __init__(self, yelpcheck): + super().__init__(yelpcheck) + self.idstoxrefs = {} + self.idstolinkends = {} + + def _accumulate_mal(self, node, pageid, sectid, xrefs, sitedir=None): + thisid = node.get('id') + if thisid is not None: + if node.tag == '{' + NAMESPACES['mal'] + '}page': + pageid = thisid + else: + sectid = thisid + curid = pageid + ignore = self.get_option_bool('ignore') + if curid is not None: + if sectid is not None: + # id attrs in cache files are already fully formed + if '#' in sectid: + curid = sectid + else: + curid = curid + '#' + sectid + if sitedir is not None: + # id attrs in cache files already have sitedir prefixed + if curid[0] != '/': + curid = sitedir + curid + self.idstoxrefs.setdefault(curid, []) + if xrefs: + xref = node.get('xref') + if xref is not None: + if not (ignore and (node.get('href') is not None)): + self.idstoxrefs[curid].append(xref) + for child in node: + self._accumulate_mal(child, pageid, sectid, xrefs, sitedir) + + def _accumulate_db(self, node, nodeid): + thisid = node.get('id') + if thisid is None: + thisid = node.get(XML_ID) + if thisid is not None: + nodeid = thisid + self.idstolinkends.setdefault(nodeid, []) + if nodeid is not None: + linkend = node.get('linkend') + if linkend is not None: + self.idstolinkends[nodeid].append(linkend) + for child in node: + self._accumulate_db(child, nodeid) + + def main(self, args): + if self.parse_args(args) != 0: + return 1 + if 'help' in self.options: + self.print_help() + return 0 + + retcode = 0 + + cachefile = self.get_option_str('cache') + if cachefile is not None: + xml = self.get_xml(InputFile(os.getcwd(), cachefile)) + self._accumulate_mal(xml.getroot(), None, None, False) + + for infile in self.iter_files(): + xml = self.get_xml(infile) + format = get_format(xml.getroot()) + if format == 'mallard': + self._accumulate_mal(xml.getroot(), None, None, True, infile.sitedir) + elif format in ('docbook4', 'docbook5'): + # For DocBook, we assume each filearg is its own document, so + # we reset the dict each time and only check within the file. + # Note that XInclude and SYSTEM includes DO happen first. + self.idstolinkends = {} + self._accumulate_db(xml.getroot(), None) + for curid in self.idstolinkends: + for linkend in self.idstolinkends[curid]: + if linkend not in self.idstolinkends: + print(curid + ': ' + linkend) + retcode = 1 + + for curid in self.idstoxrefs: + for xref in self.idstoxrefs[curid]: + checkref = xref + if checkref[0] == '#': + checkref = curid.split('#')[0] + checkref + if curid[0] == '/' and checkref[0] != '/': + checkref = curid[:curid.rfind('/')+1] + checkref + if checkref not in self.idstoxrefs: + print(curid + ': ' + xref) + retcode = 1 + + return retcode + + +class MediaChecker (Checker): + name = 'media' + desc = 'Find broken references to media files' + blurb = ('Find broken references to media files. In Mallard, this\n' + + 'checks media and thumb elements. In DocBook, this checks\n' + + 'audiodata, imagedata, and videodata elements.') + formats = ['docbook4', 'docbook5', 'mallard'] + arguments = [ + ('help', '-h', None, 'Show this help and exit'), + ('site', '-s', None, 'Treat pages as belonging to a Mallard site') + ] + + def main(self, args): + if self.parse_args(args) != 0: + return 1 + if 'help' in self.options: + self.print_help() + return 0 + + retcode = 0 + + for infile in self.iter_files(): + xml = self.get_xml(infile) + format = get_format(xml.getroot()) + srcs = [] + if format == 'mallard': + for el in xml.xpath('//mal:media[@src] | //uix:thumb | //ui:thumb | //e:mouseover', + namespaces=NAMESPACES): + srcs.append(el.get('src')) + elif format == 'docbook5': + # FIXME: do we care about entityref? + for el in xml.xpath('//db:audiodata | //db:imagedata | //db:videodata', + namespaces=NAMESPACES): + srcs.append(el.get('fileref')) + elif format == 'docbook4': + for el in xml.xpath('//audiodata | //imagedata | //videodata'): + srcs.append(el.get('fileref')) + for src in srcs: + fsrc = os.path.join(infile.absdir, src) + if not os.path.exists(fsrc): + print(infile.sitefilename + ': ' + src) + retcode = 1 + + return retcode + + +class OrphansChecker (Checker): + name = 'orphans' + desc = 'Find orphaned pages in a Mallard document' + blurb = ('Locate orphaned pages among FILES in a Mallard document.\n' + + 'Orphaned pages are any pages that cannot be reached by\n' + + 'topic links alone from the index page.') + formats = ['mallard'] + arguments = [ + ('help', '-h', None, 'Show this help and exit'), + ('site', '-s', None, 'Treat pages as belonging to a Mallard site'), + ('cache', '-c', 'CACHE', 'Use the existing Mallard cache CACHE') + ] + + def __init__(self, yelpcheck): + super().__init__(yelpcheck) + self.guidelinks = {} + self.sitesubdirs = set() + + def _collect_links(self, node, sitedir): + pageid = node.get('id') + if pageid[0] != '/': + # id attrs in cache files already have sitedir prefixed + pageid = sitedir + pageid + else: + sitedir = pageid[:pageid.rfind('/')+1] + self.guidelinks.setdefault(pageid, set()) + # For the purposes of finding orphans, we'll just pretend that + # all links to or from sections are just to or from pages. + for el in node.xpath('//mal:info/mal:link[@type="guide"]', + namespaces=NAMESPACES): + xref = el.get('xref') + if xref is None or xref == '': + continue + if xref[0] == '#': + continue + if '#' in xref: + xref = xref[:xref.find('#')] + if sitedir is not None and sitedir != '': + if xref[0] != '/': + xref = sitedir + xref + self.guidelinks[pageid].add(xref) + for el in node.xpath('//mal:info/mal:link[@type="topic"]', + namespaces=NAMESPACES): + xref = el.get('xref') + if xref is None or xref == '': + continue + if xref[0] == '#': + continue + if '#' in xref: + xref = xref[:xref.find('#')] + if sitedir is not None and sitedir != '': + if xref[0] != '/': + xref = sitedir + xref + self.guidelinks.setdefault(xref, set()) + self.guidelinks[xref].add(pageid) + for el in node.xpath('//mal:links[@type="site-subdirs" or @type="site:subdirs"]', + namespaces=NAMESPACES): + self.sitesubdirs.add(pageid) + + def main(self, args): + if self.parse_args(args) != 0: + return 1 + if 'help' in self.options: + self.print_help() + return 0 + + retcode = 0 + + cachefile = self.get_option_str('cache') + if cachefile is not None: + xml = self.get_xml(InputFile(os.getcwd(), cachefile)) + for page in xml.getroot(): + if page.tag == '{' + NAMESPACES['mal'] + '}page': + pageid = page.get('id') + if pageid is None or pageid == '': + continue + self._collect_links(page, page.get('{http://projectmallard.org/site/1.0/}dir', '')) + + pageids = set() + for infile in self.iter_files(): + xml = self.get_xml(infile) + pageid = xml.getroot().get('id') + if pageid is None: + continue + pageids.add(infile.sitedir + pageid) + self._collect_links(xml.getroot(), infile.sitedir) + + siteupdirs = {} + for pageid in self.sitesubdirs: + dirname = pageid[:pageid.rfind('/')+1] + for subid in self.guidelinks: + if subid.startswith(dirname): + if subid.endswith('/index'): + mid = subid[len(dirname):-6] + if mid != '' and '/' not in mid: + siteupdirs[subid] = pageid + + if self.get_option_bool('site'): + okpages = set(['/index']) + else: + okpages = set(['index']) + for pageid in sorted(pageids): + if pageid in okpages: + isok = True + else: + isok = False + guides = [g for g in self.guidelinks[pageid]] + if pageid in siteupdirs: + updir = siteupdirs[pageid] + if updir not in guides: + guides.append(updir) + cur = 0 + while cur < len(guides): + if guides[cur] in okpages: + isok = True break - fi - done - if [ "x$output" = "x1" ]; then break; fi - done - fi - fi - if [ "x$check_except_defined" = "x1" ]; then - if [ "x$check_except" = "x" ]; then - # We treat an empty --except '' as excluding pages with no style - if [ "x$style" = "x" ]; then output=0; fi - else - for pstyle in "$style"; do - for sstyle in $(echo "$check_except" | sed -e 's/,/ /g'); do - if [ "$pstyle" = "$sstyle" ]; then - output=0 + if guides[cur] in self.guidelinks: + for guide in self.guidelinks[guides[cur]]: + if guide not in guides: + guides.append(guide) + cur += 1 + if isok: + okpages.add(pageid) + else: + print(pageid) + retcode = 1 + + return retcode + + +class ValidateChecker (Checker): + name = 'validate' + desc = 'Validate files against a DTD or RNG' + blurb = ('Validate FILES against the appropriate DTD or RNG.\n' + + 'For Mallard pages, perform automatic RNG merging\n' + + 'based on the version attribute.') + formats = ['docbook4', 'docbook5', 'mallard'] + arguments = [ + ('help', '-h', None, 'Show this help and exit'), + ('site', '-s', None, 'Treat pages as belonging to a Mallard site'), + ('strict', None, None, 'Disallow unknown namespaces'), + ('allow', None, 'NS', 'Explicitly allow namespace NS in strict mode'), + ('jing', None, None, 'Use jing instead of xmllint for RNG validation') + ] + postblurb = 'NS may be a comma- and/or space-separated list, or specified\nmultiple times.' + + def main(self, args): + if self.parse_args(args) != 0: + return 1 + if 'help' in self.options: + self.print_help() + return 0 + + retcode = 0 + + for infile in self.iter_files(): + xml = self.get_xml(infile) + format = get_format(xml.getroot()) + command = None + if format == 'mallard': + version = xml.getroot().get('version') + if version is None or version == '': + tag = xml.getroot().tag + if tag == '{' + NAMESPACES['mal'] + '}stack': + # 1.2 isn't final yet as of 2020-01-09. Stacks will + # likely be in 1.2, so we can assume at least that. + version = '1.2' + elif tag == '{' + NAMESPACES['cache'] + '}cache': + version = 'cache/1.0' + else: + version = '1.0' + self.create_tmpdir() + rng = os.path.join(self.tmpdir, + version.replace('/', '__').replace(' ', '__')) + if not os.path.exists(rng): + strict = 'true()' if self.get_option_bool('strict') else 'false()' + allow = self.get_option_list('allow') + if allow is None: + allow = '' + else: + allow = ' '.join(allow) + subprocess.call(['xsltproc', '-o', rng, + '--param', 'rng.strict', strict, + '--stringparam', 'rng.strict.allow', allow, + os.path.join(DATADIR, 'xslt', 'mal-rng.xsl'), + infile.absfile]) + if self.get_option_bool('jing'): + command = ['jing', '-i', rng, infile.filename] + else: + command = ['xmllint', '--noout', '--xinclude', '--noent', + '--relaxng', rng, infile.filename] + elif format == 'docbook4': + if xml.docinfo.doctype.startswith('<!DOCTYPE'): + command = ['xmllint', '--noout', '--xinclude', '--noent', + '--postvalid', infile.filename] + else: + command = ['xmllint', '--noout', '--xinclude', '--noent', + '--dtdvalid', + 'http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd', + infile.filename] + elif format == 'docbook5': + version = xml.getroot().get('version') + if version is None or version == '': + version = '5.0' + # Canonical URIs are http, but they 301 redirect to https. jing + # can handle https fine, but not the redirect. And jing doesn't + # look at catalogs. So just always feed jing an https URI. + rnghttp = 'http://docbook.org/xml/' + version + '/rng/docbook.rng' + rnghttps = 'https://docbook.org/xml/' + version + '/rng/docbook.rng' + if self.get_option_bool('jing'): + command = ['jing', '-i', rnghttps, infile.filename] + else: + # xmllint, on the other hand, does support catalogs. It also + # doesn't do the redirect, but it wouldn't matter if it did + # because it doesn't do https. So if the schema is available + # locally in the catalog, hand xmllint the http URI so it + # can use the local copy. Otherwise, we have to get curl + # involved to do https. + try: + catfile = subprocess.check_output(['xmlcatalog', + '/etc/xml/catalog', + rnghttp], + stderr=subprocess.DEVNULL, + text=True) + for catline in catfile.split('\n'): + if catline.startswith('file://'): + command = ['xmllint', '--noout', '--xinclude', '--noent', + '--relaxng', rnghttp, infile.filename] + except: + pass + if command is None: + self.create_tmpdir() + rngfile = os.path.join(self.tmpdir, 'docbook-' + version + '.rng') + if not os.path.exists(rngfile): + urllib.request.urlretrieve(rnghttps, rngfile) + command = ['xmllint', '--noout', '--xinclude', '--noent', + '--relaxng', rngfile, infile.filename] + if command is not None: + try: + subprocess.check_output(command, + cwd=infile.filepath, + stderr=subprocess.STDOUT, + text=True) + except subprocess.CalledProcessError as e: + retcode = e.returncode + print(e.output) + else: + retcode = 1 + + return retcode + + +class CommentsChecker (Checker): + name = 'comments' + desc = 'Print the editorial comments in a document' + blurb = ('Print the editorial comments in the files FILES, using the\n' + + 'comment element in Mallard and the remark element in DocBook.') + formats = ['docbook4', 'docbook5', 'mallard'] + arguments = [ + ('help', '-h', None, 'Show this help and exit'), + ('site', '-s', None, 'Treat pages as belonging to a Mallard site') + ] + + def main(self, args): + if self.parse_args(args) != 0: + return 1 + if 'help' in self.options: + self.print_help() + return 0 + + for infile in self.iter_files(): + xml = self.get_xml(infile) + format = get_format(xml.getroot()) + if format == 'mallard': + for el in xml.xpath('//mal:comment', namespaces=NAMESPACES): + thisid = xml.getroot().get('id') + par = el + while par is not None: + if par.tag == '{' + NAMESPACES['mal'] + '}section': + sectid = par.get('id') + if sectid is not None: + thisid = thisid + '#' + sectid + break + par = par.getparent() + print('Page: ' + infile.sitedir + thisid) + for ch in el.xpath('mal:cite[1]', namespaces=NAMESPACES): + name = _stringify(ch).strip() + href = ch.get('href') + if href is not None and href.startswith('mailto:'): + name = name + ' <' + href[7:] + '>' + print('From: ' + name) + date = ch.get('date') + if date is not None: + print('Date: ' + date) + print('') + for ch in el: + if isinstance(ch, lxml.etree._ProcessingInstruction): + continue + elif ch.tag == '{' + NAMESPACES['mal'] + '}cite': + continue + elif ch.tag in ('{' + NAMESPACES['mal'] + '}p', + '{' + NAMESPACES['mal'] + '}title'): + for s in _stringify(ch).strip().split('\n'): + print(' ' + s.strip()) + print('') + else: + name = lxml.etree.QName(ch).localname + print(' <' + name + '>...</' + name + '>\n') + elif format in ('docbook4', 'docbook5'): + if format == 'docbook4': + dbxpath = '//remark' + else: + dbxpath = '//db:remark' + for el in xml.xpath(dbxpath, namespaces=NAMESPACES): + thisid = infile.filename + par = el + while par is not None: + sectid = par.get('id') + if sectid is None: + sectid = par.get(XML_ID) + if sectid is not None: + thisid = thisid + '#' + sectid + break + par = par.getparent() + print('Page: ' + thisid) + flag = el.get('revisionflag') + if flag is not None: + print('Flag: ' + flag) + print('') + for s in _stringify(el).strip().split('\n'): + print(' ' + s.strip()) + print('') + + return 0 + + +class LicenseChecker (Checker): + name = 'license' + desc = 'Report the license of Mallard pages' + blurb = ('Report the license of the Mallard page files FILES. Each\n' + + 'matching page is reporting along with its license, reported\n' + + 'based on the href attribute of the license element. Common\n' + + 'licenses use a shortened identifier. Pages with multiple\n' + + 'licenses have the identifiers separated by spaces. Pages\n' + + 'with no license element report \'none\'. Licenses with no\n' + + 'href attribute are reported as \'unknown\'') + formats = ['mallard'] + arguments = [ + ('help', '-h', None, 'Show this help and exit'), + ('site', '-s', None, 'Treat pages as belonging to a Mallard site'), + ('only', None, 'LICENSES', 'Only show pages whose license is in LICENSES'), + ('except', None, 'LICENSES', 'Exclude pages whose license is in LICENSES'), + ('totals', None, None, 'Show total counts for each license') + ] + postblurb = 'LICENSES may be a comma- and/or space-separated list, or specified\nmultiple times.' + + def get_license(self, href): + if href is None: + return 'unknown' + elif (href.startswith('http://creativecommons.org/licenses/') or + href.startswith('https://creativecommons.org/licenses/')): + return 'cc-' + '-'.join([x for x in href.split('/') if x][3:]) + elif (href.startswith('http://www.gnu.org/licenses/') or + href.startswith('https://www.gnu.org/licenses/')): + return href.split('/')[-1].replace('.html', '') + else: + return 'unknown' + + def main(self, args): + if self.parse_args(args) != 0: + return 1 + if 'help' in self.options: + self.print_help() + return 0 + + totals = {} + + for infile in self.iter_files(): + xml = self.get_xml(infile) + thisid = xml.getroot().get('id') or infile.filename + licenses = [] + for el in xml.xpath('/mal:page/mal:info/mal:license', + namespaces=NAMESPACES): + licenses.append(self.get_license(el.get('href'))) + if len(licenses) == 0: + licenses.append('none') + + only = self.get_option_list('only') + if only is not None: + skip = True + for lic in licenses: + if lic in only: + skip = False + if skip: + continue + cept = self.get_option_list('except') + if cept is not None: + skip = False + for lic in licenses: + if lic in cept: + skip = True + if skip: + continue + + if self.get_option_bool('totals'): + for lic in licenses: + totals.setdefault(lic, 0) + totals[lic] += 1 + else: + print(infile.sitedir + thisid + ': ' + ' '.join(licenses)) + + if self.get_option_bool('totals'): + for lic in sorted(totals): + print(lic + ': ' + str(totals[lic])) + + return 0 + + +class StatusChecker (Checker): + name = 'status' + desc = 'Report the status of Mallard pages' + blurb = ('Report the status of the Mallard page files FILES. Each\n' + + 'matching page is reporting along with its status.') + formats = ['mallard'] + arguments = [ + ('help', '-h', None, 'Show this help and exit'), + ('site', '-s', None, 'Treat pages as belonging to a Mallard site'), + ('version', None, 'VER', 'Select revisions with the version attribute VER'), + ('docversion', None, 'VER', 'Select revisions with the docversion attribute VER'), + ('pkgversion', None, 'VER', 'Select revisions with the pkgversion attribute VER'), + ('older', None, 'DATE', 'Only show pages older than DATE'), + ('newer', None, 'DATE', 'Only show pages newer than DATE'), + ('only', None, 'STATUSES', 'Only show pages whose status is in STATUSES'), + ('except', None, 'STATUSES', 'Exclude pages whose status is in STATUSES'), + ('totals', None, None, 'Show total counts for each status') + ] + postblurb = 'VER and STATUSES may be comma- and/or space-separated lists, or specified\nmultiple times.' + + def main(self, args): + if self.parse_args(args) != 0: + return 1 + if 'help' in self.options: + self.print_help() + return 0 + + totals = {} + + checks = [] + ver = self.get_option_list('version') + if ver is not None: + checks.append(ver) + ver = self.get_option_list('docversion') + if ver is not None: + checks.append(['doc:' + v for v in ver]) + ver = self.get_option_list('pkgversion') + if ver is not None: + checks.append(['pkg:' + v for v in ver]) + + for infile in self.iter_files(): + xml = self.get_xml(infile) + pageid = xml.getroot().get('id') + bestrev = None + for rev in xml.xpath('/mal:page/mal:info/mal:revision', namespaces=NAMESPACES): + revversion = (rev.get('version') or '').split() + docversion = rev.get('docversion') + if docversion is not None: + revversion.append('doc:' + docversion) + pkgversion = rev.get('pkgversion') + if pkgversion is not None: + revversion.append('pkg:' + pkgversion) + revok = True + for check in checks: + checkok = False + for v in check: + if v in revversion: + checkok = True + break + if not checkok: + revok = False break - fi - done - #if [ "x$output" = "x0" ]; then break; fi - done - fi - fi - if [ "x$output" = "x1" ]; then - echo "$sdir$bname: $style" - fi -} - -yelp_style () { - if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then - yelp_usage_style - exit 1 - fi - while [ "$#" != "0" ]; do - case "$1" in - "-s") - check_site="1" - check_site_root=$(pwd) - shift - ;; - "--only") - shift - check_only_defined=1 - check_only="$1" - shift - ;; - "--except") - shift - check_except_defined=1 - check_except="$1" - shift - ;; - "--totals") - check_totals="1" - shift - ;; - *) - break - ;; - esac - done - if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then - yelp_usage_style - exit 1 - fi - check_db= - check_page=yelp_style_page - if [ "x$check_totals" = "x1" ]; then - yelp_check_iter_args "$@" | \ - while read line; do - styles=$(echo "$line" | sed -e 's/^[^:]*://') - if [ "x$styles" = "x" ]; then - echo "" - else - for style in $styles; do - echo "$style" - done - fi - done | \ - sort | uniq -c | sed -e 's/^ *//' | awk '{print $2 ": " $1}' - else - yelp_check_iter_args "$@" | sort - fi -} - -yelp_status () { - if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then - yelp_usage_status - exit 1 - fi - while [ "$#" != "0" ]; do - case "$1" in - "-s") - check_site="1" - check_site_root=$(pwd) - shift - ;; - "--version") - shift - check_version="$1" - shift - ;; - "--docversion") - shift - check_docversion="$1" - shift - ;; - "--pkgversion") - shift - check_pkgversion="$1" - shift - ;; - "--older") - shift - check_older="$1" - shift - ;; - "--newer") - shift - check_newer="$1" - shift - ;; - "--only") - shift - check_only="$1" - shift - ;; - "--except") - shift - check_except="$1" - shift - ;; - "--totals") - check_totals="1" - shift - ;; - *) - break - ;; - esac - done - if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then - yelp_usage_status - exit 1 - fi - check_cache_file=`mktemp "${TMPDIR:-/tmp}"/yelp-XXXXXXXX` - if [ "x$check_site" = "x1" ]; then - yelp-build cache -s -o "$check_cache_file" "$@" - else - yelp-build cache -o "$check_cache_file" "$@" - fi - xsltproc \ - --stringparam version "$check_version" \ - --stringparam docversion "$check_docversion" \ - --stringparam pkgversion "$check_pkgversion" \ - --stringparam newer "$check_newer" \ - --stringparam older "$check_older" \ - --stringparam only "$check_only" \ - --stringparam except "$check_except" \ - --stringparam totals "$check_totals" \ - "$xsl_mal_status" "$check_cache_file" - rm "$check_cache_file" - return 0 -} - -yelp_validate_db () { - version=$(echo "$docbook_version" | xsltproc - "$1") - major=$(echo "$version" | cut -c1) - if [ "x$major" = "x5" ]; then - check_out_file=`mktemp "${TMPDIR:-/tmp}"/yelp-XXXXXXXX` - # Canonical URIs are http, but they 301 redirect to https. jing can handle - # https fine, but not the redirect. And jing doesn't look at catalogs. So - # just always feed jing an https URI. - if [ "x$check_jing" = "x1" ]; then - rng_uri="https://docbook.org/xml/$version/rng/docbook.rng" - jing -i "$rng_uri" "$1" > "$check_out_file" 2>&1 - else - # xmllint, on the other hand, does support catalogs. It also doesn't - # do the redirect, but it wouldn't matter if it did because it doesn't - # do https. So if the schema is available locally in the catalog, hand - # xmllint the http URI so it can use the local copy. Otherwise, we have - # to get curl involved to do https. - rng_uri="http://docbook.org/xml/$version/rng/docbook.rng" - incat=$(xmlcatalog /etc/xml/catalog "$rng_uri" | grep -c '^file:') - if [ "x$incat" != "x0" ]; then - xmllint --noout --xinclude --noent --relaxng "$rng_uri" "$1" > "$check_out_file" 2>&1 - else - rng_uri="https://docbook.org/xml/$version/rng/docbook.rng" - check_rng_file=`mktemp "${TMPDIR:-/tmp}"/yelp-XXXXXXXX` - curl -sL -o "$check_rng_file" "$rng_uri" - xmllint --noout --xinclude --noent --relaxng "$check_rng_file" "$1" > "$check_out_file" 2>&1 - rm "$check_rng_file" - fi - fi - yelp_check_retval="$?" - cat "$check_out_file" | grep -v 'validates$' - rm "$check_out_file" - elif xmllint --nocdata "$1" | grep -q '<!DOCTYPE'; then - xmllint --noout --xinclude --noent --postvalid "$1" || yelp_check_retval="$?" - else - dtd_uri='http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd' - xmllint --noout --xinclude --noent --dtdvalid "$dtd_uri" "$1" || yelp_check_retval="$?" - fi -} - -yelp_validate_page () { - # Using temp files because pipes create subshells, making it really - # hard to return the right exit status in a portable way. - if [ "x$check_rng_dir" = "x" ]; then - check_rng_dir=`mktemp -d "${TMPDIR:-/tmp}"/yelp-XXXXXXXX` - fi - check_out_file=`mktemp "${TMPDIR:-/tmp}"/yelp-XXXXXXXX` - check_rng_file=`( - echo '<xsl:stylesheet' - echo ' xmlns:cache="http://projectmallard.org/cache/1.0/"' - echo ' xmlns:xsl="http://www.w3.org/1999/XSL/Transform"' - echo ' version="1.0">' - echo '<xsl:output method="text"/>' - echo '<xsl:template match="/*">' - echo '<xsl:choose>' - echo '<xsl:when test="string(@version) != '"''"'">' - echo '<xsl:value-of select="@version"/>' - echo '</xsl:when>' - echo '<xsl:when test="/cache:cache">' - echo '<xsl:text>cache/1.0 1.0</xsl:text>' - echo '</xsl:when>' - echo '<xsl:otherwise>' - echo '<xsl:text>1.0</xsl:text>' - echo '</xsl:otherwise>' - echo '</xsl:choose>' - echo '</xsl:template>' - echo '</xsl:stylesheet>' - ) | xsltproc - "$1"` - check_rng_file=`urlencode "$check_rng_file" /`.rng - if [ ! -f "$check_rng_dir/$check_rng_file" ]; then - # If we've already made an RNG file for this version string, don't - # do it again. We've urlencoded the file name + slashes, because - # version strings often contain slashes. But xsltproc treats the - # -o option as a URL and urldecodes, so doubly urlencode, because - # we want the urlencoded string to be the on-disk name. - xsltproc -o "$check_rng_dir/"`urlencode "$check_rng_file"` \ - --param rng.strict "$check_strict" \ - --stringparam rng.strict.allow "$check_strict_allow" \ - "$xsl_mal_rng" "$1" - fi - if [ "x$check_jing" = "x1" ]; then - jing -i "$check_rng_dir/$check_rng_file" "$1" > "$check_out_file" 2>&1 - else - xmllint --noout --xinclude --noent --relaxng "$check_rng_dir/$check_rng_file" "$1" > "$check_out_file" 2>&1 - fi - ret="$?" - cat "$check_out_file" | grep -v 'validates$' - rm "$check_out_file" - return $ret; -} - -yelp_validate () { - if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then - yelp_usage_validate - exit 1 - fi - check_strict="false()" - check_strict_allow="" - while [ "$#" != "0" ]; do - case "$1" in - "-s") - check_site="1" - check_site_root=$(pwd) - shift - ;; - "--strict") - check_strict="true()" - shift - ;; - "--allow") - shift - check_strict_allow="$check_strict_allow $1" - shift - ;; - "--jing") - check_jing="1" - shift - ;; - *) - break - ;; - esac - done - if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then - yelp_usage_validate - exit 1 - fi - check_db=yelp_validate_db - check_page=yelp_validate_page - yelp_check_iter_args "$@" - if [ "x$check_rng_dir" != "x" ]; then - rm "$check_rng_dir"/*.rng - rmdir "$check_rng_dir" - fi - exit $yelp_check_retval -} - -cmd="$1" -shift -case "x$cmd" in - "xcomments") - yelp_comments "$@" - ;; - "xhrefs") - yelp_hrefs "$@" - ;; - "xids") - yelp_ids "$@" - ;; - "xlinks") - yelp_links "$@" - ;; - "xorphans") - yelp_orphans "$@" - ;; - "xlicense") - yelp_license "$@" - ;; - "xmedia") - yelp_media "$@" - ;; - "xstatus") - yelp_status "$@" - ;; - "xstyle") - yelp_style "$@" - ;; - "xvalidate") - yelp_validate "$@" - ;; - *) - yelp_usage - ;; -esac + if revok: + if bestrev is None: + bestrev = rev + continue + bestdate = bestrev.get('date') + thisdate = rev.get('date') + if bestdate is None: + bestrev = rev + elif thisdate is None: + pass + elif thisdate >= bestdate: + bestrev = rev + if bestrev is not None: + status = bestrev.get('status') or 'none' + date = bestrev.get('date') or None + else: + status = 'none' + date = None + older = self.get_option_str('older') + if older is not None: + if date is None or date >= older: + continue + newer = self.get_option_str('newer') + if newer is not None: + if date is None or date <= newer: + continue + only = self.get_option_list('only') + if only is not None: + if status not in only: + continue + cept = self.get_option_list('except') + if cept is not None: + if status in cept: + continue + if self.get_option_bool('totals'): + totals.setdefault(status, 0) + totals[status] += 1 + else: + print(infile.sitedir + pageid + ': ' + status) + + if self.get_option_bool('totals'): + for st in sorted(totals): + print(st + ': ' + str(totals[st])) + + return 0 + + +class StyleChecker (Checker): + name = 'style' + desc = 'Report the style attribute of Mallard pages' + blurb = ('Report the page style attribute of the Mallard page files\n' + + 'FILES. Each matching page is reporting along with its status.') + formats = ['mallard'] + arguments = [ + ('help', '-h', None, 'Show this help and exit'), + ('site', '-s', None, 'Treat pages as belonging to a Mallard site'), + ('only', None, 'STYLES', 'Only show pages whose style is in STATUSES'), + ('except', None, 'STYLES', 'Exclude pages whose style is in STATUSES'), + ('totals', None, None, 'Show total counts for each style') + ] + postblurb = 'STYLES may be comma- and/or space-separated lists, or specified\nmultiple times.' + + def main(self, args): + if self.parse_args(args) != 0: + return 1 + if 'help' in self.options: + self.print_help() + return 0 + + totals = {} + + for infile in self.iter_files(): + xml = self.get_xml(infile) + thisid = xml.getroot().get('id') + style = xml.getroot().get('style') + if style is None: + style = 'none' + styles = style.split() + # We'll set style to None if it doesn't meat the criteria + only = self.get_option_list('only') + if only is not None: + if len(only) == 0: + # We treat a blank --only as requesting pages with no style + if style != 'none': + style = None + else: + allow = False + for st in styles: + if st in only: + allow = True + break + if not allow: + style = None + cept = self.get_option_list('except') + if cept is not None: + for st in styles: + if st in cept: + style = None + break + if self.get_option_bool('totals'): + if style is not None: + for st in styles: + totals.setdefault(st, 0) + totals[st] += 1 + else: + if style is not None: + print(infile.sitedir + thisid + ': ' + style) + + if self.get_option_bool('totals'): + for st in sorted(totals): + print(st + ': ' + str(totals[st])) + + return 0 + + +class CustomChecker(Checker): + formats = ['docbook4', 'docbook5', 'mallard'] + arguments = [ + ('help', '-h', None, 'Show this help and exit'), + ('site', '-s', None, 'Treat pages as belonging to a Mallard site') + ] + + def __init__(self, name, yelpcheck): + super().__init__(yelpcheck) + self.name = name + + def main(self, args): + if self.parse_args(args) != 0: + return 1 + + sect = 'check:' + self.name + if self.config is None or (sect not in self.config.sections()): + print('Unrecognized command: ' + self.name, file=sys.stderr) + return 1 + self.blurb = self.config.get(sect, 'blurb', fallback=None) + if self.blurb is not None: + self.blurb = '\n'.join(textwrap.wrap(self.blurb)) + + if 'help' in self.options: + self.print_help() + return 0 + + assertexpr = self.config.get(sect, 'assert', fallback=None) + if assertexpr is not None: + return self.run_assert(assertexpr) + + print('No action found for command: ' + self.name, file=sys.stderr) + return 1 + + def run_assert(self, assertexpr): + sect = 'check:' + self.name + selectexpr = self.config.get(sect, 'select', fallback='/') + message = self.config.get(sect, 'message', fallback='Assertion failed') + self.xinclude = self.config.get(sect, 'xinclude', fallback='true') != 'false' + + namespaces = {} + if 'namespaces' in self.config.sections(): + for ns in self.config.options('namespaces'): + namespaces[ns] = self.config.get('namespaces', ns) + + for infile in self.iter_files(): + xml = self.get_xml(infile) + thisid = xml.getroot().get('id') or infile.filename + # FIXME check these expressions and give better errors + for root in xml.xpath(selectexpr, namespaces=namespaces): + if not bool(root.xpath(assertexpr, namespaces=namespaces)): + print(infile.sitedir + thisid + ': ' + message) + # FIXME are these comments outdated? remove? + # check if self.config has section check:self.name + # check if section has select, assert, message + + +class YelpCheck: + def __init__(self): + pass + + def main(self): + if len(sys.argv) < 2: + self.print_usage() + return 1 + + checker = None + for cls in Checker.__subclasses__(): + if sys.argv[1] == cls.name: + checker = cls(self) + + if checker is None: + checker = CustomChecker(sys.argv[1], self) + + return checker.main(sys.argv[2:]) + + def print_usage(self): + print('Usage: yelp-check <COMMAND> [OPTIONS] [FILES]') + namelen = 2 + checks = [] + reports = [] + others = [] + for cls in sorted(Checker.__subclasses__(), key=(lambda cls: cls.name or '')): + if cls is CustomChecker: + continue + namelen = max(namelen, len(cls.name) + 2) + if cls in (HrefsChecker, IdsChecker, LinksChecker, + MediaChecker, OrphansChecker, ValidateChecker): + checks.append(cls) + elif cls in (CommentsChecker, LicenseChecker, StatusChecker, + StyleChecker): + reports.append(cls) + else: + others.append(cls) + if len(checks) > 0: + print('\nCheck commands:') + for cls in checks: + print(' ' + cls.name.ljust(namelen) + cls.desc) + if len(reports) > 0: + print('\nReport commands:') + for cls in reports: + print(' ' + cls.name.ljust(namelen) + cls.desc) + if len(others) > 0: + print('\nOther commands:') + for cls in others: + print(' ' + cls.name.ljust(namelen) + cls.desc) + config = configparser.ConfigParser() + try: + config.read('.yelp-tools.cfg') + except: + return + customs = [] + for sect in config.sections(): + if sect.startswith('check:'): + name = sect[6:] + skip = False + for cls in Checker.__subclasses__(): + if name == cls.name: + skip = True + break + if skip: + continue + if config.get(sect, 'assert', fallback=None) == None: + continue + desc = config.get(sect, 'desc', fallback='') + namelen = max(namelen, len(name) + 2) + customs.append((name, desc)) + if len(customs) > 0: + print('\nCustom commands:') + for name, desc in customs: + print(' ' + name.ljust(namelen) + desc) + + +if __name__ == '__main__': + try: + sys.exit(YelpCheck().main()) + except KeyboardInterrupt: + sys.exit(1) diff --git a/tools/yelp-check.py b/tools/yelp-check.py deleted file mode 100644 index a3eb486..0000000 --- a/tools/yelp-check.py +++ /dev/null @@ -1,1245 +0,0 @@ -#!/bin/python3 -# -# yelp-check -# Copyright (C) 2011-2020 Shaun McCance <shaunm@gnome.org> -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - -import configparser -import lxml.etree -import os -import sys -import urllib.request -import shutil -import subprocess -import tempfile -import textwrap - -# FIXME: don't hardcode this -DATADIR = '/usr/share/yelp-tools' - -XML_ID = '{http://www.w3.org/XML/1998/namespace}id' -NAMESPACES = { - 'mal': 'http://projectmallard.org/1.0/', - 'cache': 'http://projectmallard.org/cache/1.0/', - 'db': 'http://docbook.org/ns/docbook', - 'e': 'http://projectmallard.org/experimental/', - 'ui': 'http://projectmallard.org/ui/1.0/', - 'uix': 'http://projectmallard.org/experimental/ui/', - 'xlink': 'http://www.w3.org/1999/xlink' - } - -def _stringify(el): - ret = el.text or '' - for ch in el: - ret = ret + _stringify(ch) - if el.tail is not None: - ret = ret + el.tail - return ret - -def get_format(node): - ns = lxml.etree.QName(node).namespace - if ns in (NAMESPACES['mal'], NAMESPACES['cache']): - return 'mallard' - elif ns == NAMESPACES['db']: - return 'docbook5' - elif ns is None: - # For now, just assume no ns means docbook4 - return 'docbook4' - else: - return None - -class InputFile: - def __init__(self, filepath, filename, sitedir=None): - self.filepath = filepath - self.filename = filename - self.absfile = os.path.join(filepath, filename) - self.absdir = os.path.dirname(self.absfile) - self.sitedir = sitedir or '' - self.sitefilename = self.sitedir + self.filename - - -class Checker: - name = None - desc = None - blurb = None - formats = [] - arguments = [] - postblurb = None - xinclude = True - config = None - - def __init__(self, yelpcheck): - self.yelpcheck = yelpcheck - self.options = {} - self.fileargs = [] - self.tmpdir = None - - def __del__(self): - if self.tmpdir is not None: - shutil.rmtree(self.tmpdir) - self.tmpdir = None - - def parse_args(self, args): - while len(args) > 0: - argdef = None - if args[0].startswith('--'): - for arg_ in self.arguments: - if args[0] == '--' + arg_[0]: - argdef = arg_ - break - if argdef is None: - self.print_help() - return 1 - elif args[0].startswith('-'): - for arg_ in self.arguments: - if args[0] == arg_[1]: - argdef = arg_ - break - if argdef is None: - self.print_help() - return 1 - if argdef is not None: - takesarg = (argdef[2] is not None) - if takesarg: - if len(args) < 2: - self.print_help() - return 1 - self.options.setdefault(argdef[0], []) - self.options[argdef[0]].append(args[1]) - args = args[2:] - else: - self.options[argdef[0]] = True - args = args[1:] - else: - self.fileargs.append(args[0]) - args = args[1:] - cfgfile = None - if len(self.fileargs) > 0: - cfgfile = os.path.join(os.path.dirname(self.fileargs[0]), '.yelp-tools.cfg') - if not os.path.exists(cfgfile): - cfgfile = None - if cfgfile is None: - cfgfile = os.path.join(os.getcwd(), '.yelp-tools.cfg') - if os.path.exists(cfgfile): - self.config = configparser.ConfigParser() - try: - self.config.read(cfgfile) - except Exception as e: - print(e, file=sys.stderr) - sys.exit(1) - return 0 - - def get_option_bool(self, arg): - if arg in self.options: - return self.options[arg] == True - if self.config is not None: - val = self.config.get('check:' + self.name, arg, fallback=None) - if val is not None: - return (val == 'true') - val = self.config.get('check', arg, fallback=None) - if val is not None: - return (val == 'true') - val = self.config.get('default', arg, fallback=None) - if val is not None: - return (val == 'true') - return False - - def get_option_str(self, arg): - if arg in self.options: - if isinstance(self.options[arg], list): - return self.options[arg][-1] - if self.config is not None: - val = self.config.get('check:' + self.name, arg, fallback=None) - if val is not None: - return val - val = self.config.get('check', arg, fallback=None) - if val is not None: - return val - val = self.config.get('default', arg, fallback=None) - if val is not None: - return val - return None - - def get_option_list(self, arg): - if arg in self.options: - if isinstance(self.options[arg], list): - ret = [] - for opt in self.options[arg]: - ret.extend(opt.replace(',', ' ').split()) - return ret - if self.config is not None: - val = self.config.get('check:' + self.name, arg, fallback=None) - if val is not None: - return val.replace(',', ' ').split() - val = self.config.get('check', arg, fallback=None) - if val is not None: - return val.replace(',', ' ').split() - val = self.config.get('default', arg, fallback=None) - if val is not None: - return val.replace(',', ' ').split() - return None - - def iter_files(self, sitedir=None): - issite = self.get_option_bool('site') - if len(self.fileargs) == 0: - self.fileargs.append('.') - for filearg in self.fileargs: - if os.path.isdir(filearg): - if issite: - for infile in self.iter_site(filearg, '/'): - yield infile - else: - for fname in os.listdir(filearg): - if fname.endswith('.page'): - yield InputFile(filearg, fname) - else: - if issite: - # FIXME: should do some normalization here, I guess. - # It's hard to get this perfect without a defined start dir - yield InputFile(os.getcwd(), filearg, '/' + os.path.dirname(filearg)) - else: - yield InputFile(os.getcwd(), filearg) - - def iter_site(self, filepath, sitedir): - for fname in os.listdir(filepath): - newpath = os.path.join(filepath, fname) - if os.path.isdir(newpath): - # FIXME https://github.com/projectmallard/pintail/issues/36 - if fname == '__pintail__': - continue - for infile in self.iter_site(newpath, sitedir + fname + '/'): - yield infile - elif fname.endswith('.page'): - yield InputFile(filepath, fname, sitedir) - - def get_xml(self, xmlfile): - # FIXME: we can cache these if we add a feature to run multiple - # checkers at once - tree = lxml.etree.parse(xmlfile.absfile) - if self.xinclude: - lxml.etree.XInclude()(tree.getroot()) - return tree - - def create_tmpdir(self): - if self.tmpdir is None: - self.tmpdir = tempfile.mkdtemp() - - def print_help(self): - print('Usage: yelp-check ' + self.name + ' [OPTIONS] [FILES]') - print('Formats: ' + ' '.join(self.formats) + '\n') - #FIXME: prettify names of formats - if self.blurb is not None: - print(self.blurb + '\n') - print('Options:') - maxarglen = 2 - args = [] - for arg in self.arguments: - argkey = '--' + arg[0] - if arg[1] is not None: - argkey = arg[1] + ', ' + argkey - if arg[2] is not None: - argkey = argkey + ' ' + arg[2] - args.append((argkey, arg[3])) - for arg in args: - maxarglen = max(maxarglen, len(arg[0]) + 1) - for arg in args: - print(' ' + (arg[0]).ljust(maxarglen) + ' ' + arg[1]) - if self.postblurb is not None: - print(self.postblurb) - - def main(self, args): - pass - - -class HrefsChecker (Checker): - name = 'hrefs' - desc = 'Find broken external links in a document' - blurb = ('Find broken href links in FILES in a Mallard document, or\n' + - 'broken ulink or XLink links in FILES in a DocBook document.') - formats = ['docbook4', 'docbook5', 'mallard'] - arguments = [ - ('help', '-h', None, 'Show this help and exit'), - ('site', '-s', None, 'Treat pages as belonging to a Mallard site'), - ('allow', None, 'URL', 'Allow URL or list of URLs without checking') - ] - postblurb = 'URL may be a comma- and/or space-separated list, or specified\nmultiple times.' - - def main(self, args): - if self.parse_args(args) != 0: - return 1 - if 'help' in self.options: - self.print_help() - return 0 - - # safelisting URLs that we use as identifiers - hrefs = { - 'http://creativecommons.org/licenses/by-sa/3.0/': True, - 'https://creativecommons.org/licenses/by-sa/3.0/': True, - 'http://creativecommons.org/licenses/by-sa/3.0/us/': True, - 'https://creativecommons.org/licenses/by-sa/3.0/us/': True - } - allow = self.get_option_list('allow') - if allow is not None: - for url in allow: - hrefs[url] = True - retcode = 0 - - for infile in self.iter_files(): - xml = self.get_xml(infile) - for el in xml.xpath('//*[@href | @xlink:href | self::ulink/@url]', - namespaces=NAMESPACES): - href = el.get('href', None) - if href is None: - href = el.get('{www.w3.org/1999/xlink}href') - if href is None: - href = el.get('url') - if href is None: - continue - if href.startswith('mailto:'): - continue - if href not in hrefs: - try: - req = urllib.request.urlopen(href) - hrefs[href] = (req.status == 200) - except Exception as e: - hrefs[href] = False - if not hrefs[href]: - retcode = 1 - print(infile.sitefilename + ': ' + href) - - return retcode - - -class IdsChecker (Checker): - name = 'ids' - desc = 'Find Mallard page IDs that do not match file names' - blurb = ('Find pages in a Mallard document whose page ID does not match\n' + - 'the base file name of the page file.') - formats = ['mallard'] - arguments = [ - ('help', '-h', None, 'Show this help and exit'), - ('site', '-s', None, 'Treat pages as belonging to a Mallard site') - ] - - def main(self, args): - if self.parse_args(args) != 0: - return 1 - if 'help' in self.options: - self.print_help() - return 0 - - retcode = 0 - - for infile in self.iter_files(): - xml = self.get_xml(infile) - isok = False - pageid = None - if infile.filename.endswith('.page'): - try: - pageid = xml.getroot().get('id') - isok = (pageid == os.path.basename(infile.filename)[:-5]) - except: - isok = False - if not isok: - retcode = 1 - print(infile.sitefilename + ': ' + (pageid or '')) - - return retcode - - -class LinksChecker (Checker): - name = 'links' - desc = 'Find broken xref or linkend links in a document' - blurb = ('Find broken xref links in FILES in a Mallard document,\n' + - 'or broken linkend links in FILES in a DocBook document.') - formats = ['docbook4', 'docbook5', 'mallard'] - arguments = [ - ('help', '-h', None, 'Show this help and exit'), - ('site', '-s', None, 'Treat pages as belonging to a Mallard site'), - ('cache', '-c', 'CACHE', 'Use the existing Mallard cache CACHE'), - ('ignore', '-i', None, 'Ignore xrefs where href is present') - ] - - def __init__(self, yelpcheck): - super().__init__(yelpcheck) - self.idstoxrefs = {} - self.idstolinkends = {} - - def _accumulate_mal(self, node, pageid, sectid, xrefs, sitedir=None): - thisid = node.get('id') - if thisid is not None: - if node.tag == '{' + NAMESPACES['mal'] + '}page': - pageid = thisid - else: - sectid = thisid - curid = pageid - ignore = self.get_option_bool('ignore') - if curid is not None: - if sectid is not None: - # id attrs in cache files are already fully formed - if '#' in sectid: - curid = sectid - else: - curid = curid + '#' + sectid - if sitedir is not None: - # id attrs in cache files already have sitedir prefixed - if curid[0] != '/': - curid = sitedir + curid - self.idstoxrefs.setdefault(curid, []) - if xrefs: - xref = node.get('xref') - if xref is not None: - if not (ignore and (node.get('href') is not None)): - self.idstoxrefs[curid].append(xref) - for child in node: - self._accumulate_mal(child, pageid, sectid, xrefs, sitedir) - - def _accumulate_db(self, node, nodeid): - thisid = node.get('id') - if thisid is None: - thisid = node.get(XML_ID) - if thisid is not None: - nodeid = thisid - self.idstolinkends.setdefault(nodeid, []) - if nodeid is not None: - linkend = node.get('linkend') - if linkend is not None: - self.idstolinkends[nodeid].append(linkend) - for child in node: - self._accumulate_db(child, nodeid) - - def main(self, args): - if self.parse_args(args) != 0: - return 1 - if 'help' in self.options: - self.print_help() - return 0 - - retcode = 0 - - cachefile = self.get_option_str('cache') - if cachefile is not None: - xml = self.get_xml(InputFile(os.getcwd(), cachefile)) - self._accumulate_mal(xml.getroot(), None, None, False) - - for infile in self.iter_files(): - xml = self.get_xml(infile) - format = get_format(xml.getroot()) - if format == 'mallard': - self._accumulate_mal(xml.getroot(), None, None, True, infile.sitedir) - elif format in ('docbook4', 'docbook5'): - # For DocBook, we assume each filearg is its own document, so - # we reset the dict each time and only check within the file. - # Note that XInclude and SYSTEM includes DO happen first. - self.idstolinkends = {} - self._accumulate_db(xml.getroot(), None) - for curid in self.idstolinkends: - for linkend in self.idstolinkends[curid]: - if linkend not in self.idstolinkends: - print(curid + ': ' + linkend) - retcode = 1 - - for curid in self.idstoxrefs: - for xref in self.idstoxrefs[curid]: - checkref = xref - if checkref[0] == '#': - checkref = curid.split('#')[0] + checkref - if curid[0] == '/' and checkref[0] != '/': - checkref = curid[:curid.rfind('/')+1] + checkref - if checkref not in self.idstoxrefs: - print(curid + ': ' + xref) - retcode = 1 - - return retcode - - -class MediaChecker (Checker): - name = 'media' - desc = 'Find broken references to media files' - blurb = ('Find broken references to media files. In Mallard, this\n' + - 'checks media and thumb elements. In DocBook, this checks\n' + - 'audiodata, imagedata, and videodata elements.') - formats = ['docbook4', 'docbook5', 'mallard'] - arguments = [ - ('help', '-h', None, 'Show this help and exit'), - ('site', '-s', None, 'Treat pages as belonging to a Mallard site') - ] - - def main(self, args): - if self.parse_args(args) != 0: - return 1 - if 'help' in self.options: - self.print_help() - return 0 - - retcode = 0 - - for infile in self.iter_files(): - xml = self.get_xml(infile) - format = get_format(xml.getroot()) - srcs = [] - if format == 'mallard': - for el in xml.xpath('//mal:media[@src] | //uix:thumb | //ui:thumb | //e:mouseover', - namespaces=NAMESPACES): - srcs.append(el.get('src')) - elif format == 'docbook5': - # FIXME: do we care about entityref? - for el in xml.xpath('//db:audiodata | //db:imagedata | //db:videodata', - namespaces=NAMESPACES): - srcs.append(el.get('fileref')) - elif format == 'docbook4': - for el in xml.xpath('//audiodata | //imagedata | //videodata'): - srcs.append(el.get('fileref')) - for src in srcs: - fsrc = os.path.join(infile.absdir, src) - if not os.path.exists(fsrc): - print(infile.sitefilename + ': ' + src) - retcode = 1 - - return retcode - - -class OrphansChecker (Checker): - name = 'orphans' - desc = 'Find orphaned pages in a Mallard document' - blurb = ('Locate orphaned pages among FILES in a Mallard document.\n' + - 'Orphaned pages are any pages that cannot be reached by\n' + - 'topic links alone from the index page.') - formats = ['mallard'] - arguments = [ - ('help', '-h', None, 'Show this help and exit'), - ('site', '-s', None, 'Treat pages as belonging to a Mallard site'), - ('cache', '-c', 'CACHE', 'Use the existing Mallard cache CACHE') - ] - - def __init__(self, yelpcheck): - super().__init__(yelpcheck) - self.guidelinks = {} - self.sitesubdirs = set() - - def _collect_links(self, node, sitedir): - pageid = node.get('id') - if pageid[0] != '/': - # id attrs in cache files already have sitedir prefixed - pageid = sitedir + pageid - else: - sitedir = pageid[:pageid.rfind('/')+1] - self.guidelinks.setdefault(pageid, set()) - # For the purposes of finding orphans, we'll just pretend that - # all links to or from sections are just to or from pages. - for el in node.xpath('//mal:info/mal:link[@type="guide"]', - namespaces=NAMESPACES): - xref = el.get('xref') - if xref is None or xref == '': - continue - if xref[0] == '#': - continue - if '#' in xref: - xref = xref[:xref.find('#')] - if sitedir is not None and sitedir != '': - if xref[0] != '/': - xref = sitedir + xref - self.guidelinks[pageid].add(xref) - for el in node.xpath('//mal:info/mal:link[@type="topic"]', - namespaces=NAMESPACES): - xref = el.get('xref') - if xref is None or xref == '': - continue - if xref[0] == '#': - continue - if '#' in xref: - xref = xref[:xref.find('#')] - if sitedir is not None and sitedir != '': - if xref[0] != '/': - xref = sitedir + xref - self.guidelinks.setdefault(xref, set()) - self.guidelinks[xref].add(pageid) - for el in node.xpath('//mal:links[@type="site-subdirs" or @type="site:subdirs"]', - namespaces=NAMESPACES): - self.sitesubdirs.add(pageid) - - def main(self, args): - if self.parse_args(args) != 0: - return 1 - if 'help' in self.options: - self.print_help() - return 0 - - retcode = 0 - - cachefile = self.get_option_str('cache') - if cachefile is not None: - xml = self.get_xml(InputFile(os.getcwd(), cachefile)) - for page in xml.getroot(): - if page.tag == '{' + NAMESPACES['mal'] + '}page': - pageid = page.get('id') - if pageid is None or pageid == '': - continue - self._collect_links(page, page.get('{http://projectmallard.org/site/1.0/}dir', '')) - - pageids = set() - for infile in self.iter_files(): - xml = self.get_xml(infile) - pageid = xml.getroot().get('id') - if pageid is None: - continue - pageids.add(infile.sitedir + pageid) - self._collect_links(xml.getroot(), infile.sitedir) - - siteupdirs = {} - for pageid in self.sitesubdirs: - dirname = pageid[:pageid.rfind('/')+1] - for subid in self.guidelinks: - if subid.startswith(dirname): - if subid.endswith('/index'): - mid = subid[len(dirname):-6] - if mid != '' and '/' not in mid: - siteupdirs[subid] = pageid - - if self.get_option_bool('site'): - okpages = set(['/index']) - else: - okpages = set(['index']) - for pageid in sorted(pageids): - if pageid in okpages: - isok = True - else: - isok = False - guides = [g for g in self.guidelinks[pageid]] - if pageid in siteupdirs: - updir = siteupdirs[pageid] - if updir not in guides: - guides.append(updir) - cur = 0 - while cur < len(guides): - if guides[cur] in okpages: - isok = True - break - if guides[cur] in self.guidelinks: - for guide in self.guidelinks[guides[cur]]: - if guide not in guides: - guides.append(guide) - cur += 1 - if isok: - okpages.add(pageid) - else: - print(pageid) - retcode = 1 - - return retcode - - -class ValidateChecker (Checker): - name = 'validate' - desc = 'Validate files against a DTD or RNG' - blurb = ('Validate FILES against the appropriate DTD or RNG.\n' + - 'For Mallard pages, perform automatic RNG merging\n' + - 'based on the version attribute.') - formats = ['docbook4', 'docbook5', 'mallard'] - arguments = [ - ('help', '-h', None, 'Show this help and exit'), - ('site', '-s', None, 'Treat pages as belonging to a Mallard site'), - ('strict', None, None, 'Disallow unknown namespaces'), - ('allow', None, 'NS', 'Explicitly allow namespace NS in strict mode'), - ('jing', None, None, 'Use jing instead of xmllint for RNG validation') - ] - postblurb = 'NS may be a comma- and/or space-separated list, or specified\nmultiple times.' - - def main(self, args): - if self.parse_args(args) != 0: - return 1 - if 'help' in self.options: - self.print_help() - return 0 - - retcode = 0 - - for infile in self.iter_files(): - xml = self.get_xml(infile) - format = get_format(xml.getroot()) - command = None - if format == 'mallard': - version = xml.getroot().get('version') - if version is None or version == '': - tag = xml.getroot().tag - if tag == '{' + NAMESPACES['mal'] + '}stack': - # 1.2 isn't final yet as of 2020-01-09. Stacks will - # likely be in 1.2, so we can assume at least that. - version = '1.2' - elif tag == '{' + NAMESPACES['cache'] + '}cache': - version = 'cache/1.0' - else: - version = '1.0' - self.create_tmpdir() - rng = os.path.join(self.tmpdir, - version.replace('/', '__').replace(' ', '__')) - if not os.path.exists(rng): - strict = 'true()' if self.get_option_bool('strict') else 'false()' - allow = self.get_option_list('allow') - if allow is None: - allow = '' - else: - allow = ' '.join(allow) - subprocess.call(['xsltproc', '-o', rng, - '--param', 'rng.strict', strict, - '--stringparam', 'rng.strict.allow', allow, - os.path.join(DATADIR, 'xslt', 'mal-rng.xsl'), - infile.absfile]) - if self.get_option_bool('jing'): - command = ['jing', '-i', rng, infile.filename] - else: - command = ['xmllint', '--noout', '--xinclude', '--noent', - '--relaxng', rng, infile.filename] - elif format == 'docbook4': - if xml.docinfo.doctype.startswith('<!DOCTYPE'): - command = ['xmllint', '--noout', '--xinclude', '--noent', - '--postvalid', infile.filename] - else: - command = ['xmllint', '--noout', '--xinclude', '--noent', - '--dtdvalid', - 'http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd', - infile.filename] - elif format == 'docbook5': - version = xml.getroot().get('version') - if version is None or version == '': - version = '5.0' - # Canonical URIs are http, but they 301 redirect to https. jing - # can handle https fine, but not the redirect. And jing doesn't - # look at catalogs. So just always feed jing an https URI. - rnghttp = 'http://docbook.org/xml/' + version + '/rng/docbook.rng' - rnghttps = 'https://docbook.org/xml/' + version + '/rng/docbook.rng' - if self.get_option_bool('jing'): - command = ['jing', '-i', rnghttps, infile.filename] - else: - # xmllint, on the other hand, does support catalogs. It also - # doesn't do the redirect, but it wouldn't matter if it did - # because it doesn't do https. So if the schema is available - # locally in the catalog, hand xmllint the http URI so it - # can use the local copy. Otherwise, we have to get curl - # involved to do https. - try: - catfile = subprocess.check_output(['xmlcatalog', - '/etc/xml/catalog', - rnghttp], - stderr=subprocess.DEVNULL, - text=True) - for catline in catfile.split('\n'): - if catline.startswith('file://'): - command = ['xmllint', '--noout', '--xinclude', '--noent', - '--relaxng', rnghttp, infile.filename] - except: - pass - if command is None: - self.create_tmpdir() - rngfile = os.path.join(self.tmpdir, 'docbook-' + version + '.rng') - if not os.path.exists(rngfile): - urllib.request.urlretrieve(rnghttps, rngfile) - command = ['xmllint', '--noout', '--xinclude', '--noent', - '--relaxng', rngfile, infile.filename] - if command is not None: - try: - subprocess.check_output(command, - cwd=infile.filepath, - stderr=subprocess.STDOUT, - text=True) - except subprocess.CalledProcessError as e: - retcode = e.returncode - print(e.output) - else: - retcode = 1 - - return retcode - - -class CommentsChecker (Checker): - name = 'comments' - desc = 'Print the editorial comments in a document' - blurb = ('Print the editorial comments in the files FILES, using the\n' + - 'comment element in Mallard and the remark element in DocBook.') - formats = ['docbook4', 'docbook5', 'mallard'] - arguments = [ - ('help', '-h', None, 'Show this help and exit'), - ('site', '-s', None, 'Treat pages as belonging to a Mallard site') - ] - - def main(self, args): - if self.parse_args(args) != 0: - return 1 - if 'help' in self.options: - self.print_help() - return 0 - - for infile in self.iter_files(): - xml = self.get_xml(infile) - format = get_format(xml.getroot()) - if format == 'mallard': - for el in xml.xpath('//mal:comment', namespaces=NAMESPACES): - thisid = xml.getroot().get('id') - par = el - while par is not None: - if par.tag == '{' + NAMESPACES['mal'] + '}section': - sectid = par.get('id') - if sectid is not None: - thisid = thisid + '#' + sectid - break - par = par.getparent() - print('Page: ' + infile.sitedir + thisid) - for ch in el.xpath('mal:cite[1]', namespaces=NAMESPACES): - name = _stringify(ch).strip() - href = ch.get('href') - if href is not None and href.startswith('mailto:'): - name = name + ' <' + href[7:] + '>' - print('From: ' + name) - date = ch.get('date') - if date is not None: - print('Date: ' + date) - print('') - for ch in el: - if isinstance(ch, lxml.etree._ProcessingInstruction): - continue - elif ch.tag == '{' + NAMESPACES['mal'] + '}cite': - continue - elif ch.tag in ('{' + NAMESPACES['mal'] + '}p', - '{' + NAMESPACES['mal'] + '}title'): - for s in _stringify(ch).strip().split('\n'): - print(' ' + s.strip()) - print('') - else: - name = lxml.etree.QName(ch).localname - print(' <' + name + '>...</' + name + '>\n') - elif format in ('docbook4', 'docbook5'): - if format == 'docbook4': - dbxpath = '//remark' - else: - dbxpath = '//db:remark' - for el in xml.xpath(dbxpath, namespaces=NAMESPACES): - thisid = infile.filename - par = el - while par is not None: - sectid = par.get('id') - if sectid is None: - sectid = par.get(XML_ID) - if sectid is not None: - thisid = thisid + '#' + sectid - break - par = par.getparent() - print('Page: ' + thisid) - flag = el.get('revisionflag') - if flag is not None: - print('Flag: ' + flag) - print('') - for s in _stringify(el).strip().split('\n'): - print(' ' + s.strip()) - print('') - - return 0 - - -class LicenseChecker (Checker): - name = 'license' - desc = 'Report the license of Mallard pages' - blurb = ('Report the license of the Mallard page files FILES. Each\n' + - 'matching page is reporting along with its license, reported\n' + - 'based on the href attribute of the license element. Common\n' + - 'licenses use a shortened identifier. Pages with multiple\n' + - 'licenses have the identifiers separated by spaces. Pages\n' + - 'with no license element report \'none\'. Licenses with no\n' + - 'href attribute are reported as \'unknown\'') - formats = ['mallard'] - arguments = [ - ('help', '-h', None, 'Show this help and exit'), - ('site', '-s', None, 'Treat pages as belonging to a Mallard site'), - ('only', None, 'LICENSES', 'Only show pages whose license is in LICENSES'), - ('except', None, 'LICENSES', 'Exclude pages whose license is in LICENSES'), - ('totals', None, None, 'Show total counts for each license') - ] - postblurb = 'LICENSES may be a comma- and/or space-separated list, or specified\nmultiple times.' - - def get_license(self, href): - if href is None: - return 'unknown' - elif (href.startswith('http://creativecommons.org/licenses/') or - href.startswith('https://creativecommons.org/licenses/')): - return 'cc-' + '-'.join([x for x in href.split('/') if x][3:]) - elif (href.startswith('http://www.gnu.org/licenses/') or - href.startswith('https://www.gnu.org/licenses/')): - return href.split('/')[-1].replace('.html', '') - else: - return 'unknown' - - def main(self, args): - if self.parse_args(args) != 0: - return 1 - if 'help' in self.options: - self.print_help() - return 0 - - totals = {} - - for infile in self.iter_files(): - xml = self.get_xml(infile) - thisid = xml.getroot().get('id') or infile.filename - licenses = [] - for el in xml.xpath('/mal:page/mal:info/mal:license', - namespaces=NAMESPACES): - licenses.append(self.get_license(el.get('href'))) - if len(licenses) == 0: - licenses.append('none') - - only = self.get_option_list('only') - if only is not None: - skip = True - for lic in licenses: - if lic in only: - skip = False - if skip: - continue - cept = self.get_option_list('except') - if cept is not None: - skip = False - for lic in licenses: - if lic in cept: - skip = True - if skip: - continue - - if self.get_option_bool('totals'): - for lic in licenses: - totals.setdefault(lic, 0) - totals[lic] += 1 - else: - print(infile.sitedir + thisid + ': ' + ' '.join(licenses)) - - if self.get_option_bool('totals'): - for lic in sorted(totals): - print(lic + ': ' + str(totals[lic])) - - return 0 - - -class StatusChecker (Checker): - name = 'status' - desc = 'Report the status of Mallard pages' - blurb = ('Report the status of the Mallard page files FILES. Each\n' + - 'matching page is reporting along with its status.') - formats = ['mallard'] - arguments = [ - ('help', '-h', None, 'Show this help and exit'), - ('site', '-s', None, 'Treat pages as belonging to a Mallard site'), - ('version', None, 'VER', 'Select revisions with the version attribute VER'), - ('docversion', None, 'VER', 'Select revisions with the docversion attribute VER'), - ('pkgversion', None, 'VER', 'Select revisions with the pkgversion attribute VER'), - ('older', None, 'DATE', 'Only show pages older than DATE'), - ('newer', None, 'DATE', 'Only show pages newer than DATE'), - ('only', None, 'STATUSES', 'Only show pages whose status is in STATUSES'), - ('except', None, 'STATUSES', 'Exclude pages whose status is in STATUSES'), - ('totals', None, None, 'Show total counts for each status') - ] - postblurb = 'VER and STATUSES may be comma- and/or space-separated lists, or specified\nmultiple times.' - - def main(self, args): - if self.parse_args(args) != 0: - return 1 - if 'help' in self.options: - self.print_help() - return 0 - - totals = {} - - checks = [] - ver = self.get_option_list('version') - if ver is not None: - checks.append(ver) - ver = self.get_option_list('docversion') - if ver is not None: - checks.append(['doc:' + v for v in ver]) - ver = self.get_option_list('pkgversion') - if ver is not None: - checks.append(['pkg:' + v for v in ver]) - - for infile in self.iter_files(): - xml = self.get_xml(infile) - pageid = xml.getroot().get('id') - bestrev = None - for rev in xml.xpath('/mal:page/mal:info/mal:revision', namespaces=NAMESPACES): - revversion = (rev.get('version') or '').split() - docversion = rev.get('docversion') - if docversion is not None: - revversion.append('doc:' + docversion) - pkgversion = rev.get('pkgversion') - if pkgversion is not None: - revversion.append('pkg:' + pkgversion) - revok = True - for check in checks: - checkok = False - for v in check: - if v in revversion: - checkok = True - break - if not checkok: - revok = False - break - if revok: - if bestrev is None: - bestrev = rev - continue - bestdate = bestrev.get('date') - thisdate = rev.get('date') - if bestdate is None: - bestrev = rev - elif thisdate is None: - pass - elif thisdate >= bestdate: - bestrev = rev - if bestrev is not None: - status = bestrev.get('status') or 'none' - date = bestrev.get('date') or None - else: - status = 'none' - date = None - older = self.get_option_str('older') - if older is not None: - if date is None or date >= older: - continue - newer = self.get_option_str('newer') - if newer is not None: - if date is None or date <= newer: - continue - only = self.get_option_list('only') - if only is not None: - if status not in only: - continue - cept = self.get_option_list('except') - if cept is not None: - if status in cept: - continue - if self.get_option_bool('totals'): - totals.setdefault(status, 0) - totals[status] += 1 - else: - print(infile.sitedir + pageid + ': ' + status) - - if self.get_option_bool('totals'): - for st in sorted(totals): - print(st + ': ' + str(totals[st])) - - return 0 - - -class StyleChecker (Checker): - name = 'style' - desc = 'Report the style attribute of Mallard pages' - blurb = ('Report the page style attribute of the Mallard page files\n' + - 'FILES. Each matching page is reporting along with its status.') - formats = ['mallard'] - arguments = [ - ('help', '-h', None, 'Show this help and exit'), - ('site', '-s', None, 'Treat pages as belonging to a Mallard site'), - ('only', None, 'STYLES', 'Only show pages whose style is in STATUSES'), - ('except', None, 'STYLES', 'Exclude pages whose style is in STATUSES'), - ('totals', None, None, 'Show total counts for each style') - ] - postblurb = 'STYLES may be comma- and/or space-separated lists, or specified\nmultiple times.' - - def main(self, args): - if self.parse_args(args) != 0: - return 1 - if 'help' in self.options: - self.print_help() - return 0 - - totals = {} - - for infile in self.iter_files(): - xml = self.get_xml(infile) - thisid = xml.getroot().get('id') - style = xml.getroot().get('style') - if style is None: - style = 'none' - styles = style.split() - # We'll set style to None if it doesn't meat the criteria - only = self.get_option_list('only') - if only is not None: - if len(only) == 0: - # We treat a blank --only as requesting pages with no style - if style != 'none': - style = None - else: - allow = False - for st in styles: - if st in only: - allow = True - break - if not allow: - style = None - cept = self.get_option_list('except') - if cept is not None: - for st in styles: - if st in cept: - style = None - break - if self.get_option_bool('totals'): - if style is not None: - for st in styles: - totals.setdefault(st, 0) - totals[st] += 1 - else: - if style is not None: - print(infile.sitedir + thisid + ': ' + style) - - if self.get_option_bool('totals'): - for st in sorted(totals): - print(st + ': ' + str(totals[st])) - - return 0 - - -class CustomChecker(Checker): - formats = ['docbook4', 'docbook5', 'mallard'] - arguments = [ - ('help', '-h', None, 'Show this help and exit'), - ('site', '-s', None, 'Treat pages as belonging to a Mallard site') - ] - - def __init__(self, name, yelpcheck): - super().__init__(yelpcheck) - self.name = name - - def main(self, args): - if self.parse_args(args) != 0: - return 1 - - sect = 'check:' + self.name - if sect not in self.config.sections(): - print('Unrecognized command: ' + self.name, file=sys.stderr) - return 1 - self.blurb = self.config.get(sect, 'blurb', fallback=None) - if self.blurb is not None: - self.blurb = '\n'.join(textwrap.wrap(self.blurb)) - - if 'help' in self.options: - self.print_help() - return 0 - - assertexpr = self.config.get(sect, 'assert', fallback=None) - if assertexpr is not None: - return self.run_assert(assertexpr) - - print('No action found for command: ' + self.name, file=sys.stderr) - return 1 - - def run_assert(self, assertexpr): - sect = 'check:' + self.name - selectexpr = self.config.get(sect, 'select', fallback='/') - message = self.config.get(sect, 'message', fallback='Assertion failed') - self.xinclude = self.config.get(sect, 'xinclude', fallback='true') != 'false' - - namespaces = {} - if 'namespaces' in self.config.sections(): - for ns in self.config.options('namespaces'): - namespaces[ns] = self.config.get('namespaces', ns) - - for infile in self.iter_files(): - xml = self.get_xml(infile) - thisid = xml.getroot().get('id') or infile.filename - for root in xml.xpath(selectexpr, namespaces=namespaces): - if not bool(root.xpath(assertexpr, namespaces=namespaces)): - print(infile.sitedir + thisid + ': ' + message) - # check if self.config has section check:self.name - # check if section has select, assert, message - - -class YelpCheck: - def __init__(self): - pass - - def main(self): - if len(sys.argv) < 2: - self.print_usage() - return 1 - - checker = None - for cls in Checker.__subclasses__(): - if sys.argv[1] == cls.name: - checker = cls(self) - - if checker is None: - checker = CustomChecker(sys.argv[1], self) - - return checker.main(sys.argv[2:]) - - def print_usage(self): - print('Usage: yelp-check <COMMAND> [OPTIONS] [FILES]') - namelen = 2 - checks = [] - reports = [] - others = [] - for cls in sorted(Checker.__subclasses__(), key=(lambda cls: cls.name or '')): - if cls is CustomChecker: - continue - namelen = max(namelen, len(cls.name) + 2) - if cls in (HrefsChecker, IdsChecker, LinksChecker, - MediaChecker, OrphansChecker, ValidateChecker): - checks.append(cls) - elif cls in (CommentsChecker, LicenseChecker, StatusChecker, - StyleChecker): - reports.append(cls) - else: - others.append(cls) - if len(checks) > 0: - print('\nCheck commands:') - for cls in checks: - print(' ' + cls.name.ljust(namelen) + cls.desc) - if len(reports) > 0: - print('\nReport commands:') - for cls in reports: - print(' ' + cls.name.ljust(namelen) + cls.desc) - if len(others) > 0: - print('\nOther commands:') - for cls in others: - print(' ' + cls.name.ljust(namelen) + cls.desc) - config = configparser.ConfigParser() - try: - config.read('.yelp-tools.cfg') - except: - return - customs = [] - for sect in config.sections(): - if sect.startswith('check:'): - name = sect[6:] - skip = False - for cls in Checker.__subclasses__(): - if name == cls.name: - skip = True - break - if skip: - continue - if config.get(sect, 'assert', fallback=None) == None: - continue - desc = config.get(sect, 'desc', fallback='') - namelen = max(namelen, len(name) + 2) - customs.append((name, desc)) - if len(customs) > 0: - print('\nCustom commands:') - for name, desc in customs: - print(' ' + name.ljust(namelen) + desc) - - -if __name__ == '__main__': - try: - sys.exit(YelpCheck().main()) - except KeyboardInterrupt: - sys.exit(1) diff --git a/tools/yelp-new.in b/tools/yelp-new.in index 76bd101..cb923d5 100755 --- a/tools/yelp-new.in +++ b/tools/yelp-new.in @@ -1,6 +1,7 @@ -#!/bin/sh +#!/bin/python3 +# # yelp-new -# Copyright (C) 2010 Shaun McCance <shaunm@gnome.org> +# Copyright (C) 2010-2020 Shaun McCance <shaunm@gnome.org> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -16,147 +17,301 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -tmpldir="@YELP_TMPL_DIR@" - -yelp_describe_tmpl () { - line=" "`basename "$1" | sed -e 's/\.'$2'$//'` - desc=`cat "$f" | grep '<\?yelp-tmpl-desc' | sed -e 's/<?yelp-tmpl-desc //' -e 's/?>$//'` - if [ "x$desc" != "x" ]; then - line="$line - $desc" - fi - echo "$line" -} - -yelp_get_extension () { - echo "$1" | awk -F . '{print $NF}' -} - -yelp_usage() { - echo "Usage: yelp-new [OPTIONS] <TEMPLATE> <ID> [TITLE]" - echo "" - echo "Options:" - echo " --stub Create a .page.stub file instead of a .page file" - echo " --tmpl Copy an installed template to a local template" - if [ -f *.page.tmpl ]; then - echo "" - echo "Local Mallard Templates:" - for f in *.page.tmpl; do - yelp_describe_tmpl "$f" "page.tmpl" - done - fi - if [ -f ${tmpldir}*.page ]; then - echo "" - echo "Mallard Templates:" - for f in ${tmpldir}*.page; do - yelp_describe_tmpl "$f" "page" - done - fi - if [ -f *.docbook.tmpl ]; then - echo "" - echo "Local DocBook Templates:" - for f in *.docbook.tmpl; do - yelp_describe_tmpl "$f" "xml.tmpl" - done - fi - if [ -f ${tmpldir}*.docbook ]; then - echo "" - echo "DocBook Templates:" - for f in ${tmpldir}*.docbook; do - yelp_describe_tmpl "$f" "xml" - done - fi -} - -if [ $# -lt 2 ]; then - yelp_usage - exit 1 -fi - -# Process options -spec="" -while [ $# -gt 0 ]; do - case "$1" in - --stub) - spec=".stub" - shift;; - --tmpl) - spec=".tmpl" - shift;; - -h | --help) - yelp_usage - exit 0;; - *) - break - esac -done - -# Locate the template file -if [ $(yelp_get_extension ${1}) = "tmpl" -a -f "${1}" ]; then - infile="${1}" - outext="."$(yelp_get_extension $(basename "${1}" ".tmpl")) -elif [ -f "${1}.page.tmpl" ]; then - infile="${1}.page.tmpl" - outext=".page" -elif [ -f "${tmpldir}${1}.page" ]; then - infile="${tmpldir}${1}.page" - outext=".page" -elif [ -f "${1}.docbook.tmpl" ]; then - infile="${1}.docbook.tmpl" - outext=".docbook" -elif [ -f "${tmpldir}${1}.docbook" ]; then - infile="${tmpldir}${1}.docbook" - outext=".docbook" -else - echo "Error: No template named ${1} found" - exit 1 -fi - -# Set up some variables for substitution -if type git >/dev/null 2>&1; then - username=`git config user.name` - useremail=`git config user.email` -fi -if [ "x$username" = "x" -a "x$useremail" = "x" ]; then - if type bzr >/dev/null 2>&1; then - username=`bzr whoami | sed -e 's/ <.*//'` - useremail=`bzr whoami --email` - fi -fi -if [ "x$username" = "x" -a "x$useremail" = "x" ]; then - username='YOUR NAME' - useremail='YOUR EMAIL ADDRESS' -fi -pagetitle="$3" -if [ "x$pagetitle" = "x" ]; then - pagetitle="TITLE" -fi - -outid=$(basename "${2}") - -if [ "x$spec" != "x" ]; then - if [ "."$(yelp_get_extension "${2}") = "$spec" ]; then - outfile="${2}" - elif [ "."$(yelp_get_extension "${2}") = "$outext" ]; then - outfile="${2}${spec}" - else - outfile="${2}${outext}${spec}" - fi -elif [ "."$(yelp_get_extension ${2}) = "$outext" ]; then - outfile="${2}" -else - outfile="${2}${outext}" -fi - -if [ "x$spec" = "x.tmpl" ]; then - cp "$infile" "$outfile" -else - cat "$infile" | grep -v '<\?yelp-tmpl-desc' | sed \ - -e s/@ID@/"$outid"/ \ - -e s/@DATE@/`date +%Y-%m-%d`/ \ - -e s/@YEAR@/`date +%Y`/ \ - -e s/@NAME@/"$username"/ \ - -e s/@EMAIL@/"$useremail"/ \ - -e s/@TITLE@/"$pagetitle"/ \ - > "$outfile" -fi +import configparser +import datetime +import os +import subprocess +import sys + + +DATADIR = '@DATADIR@' + +class YelpNew: + arguments = [ + ('help', '-h', None, 'Show this help and exit'), + ('stub', None, None, 'Create a stub file with .stub appended'), + ('tmpl', None, None, 'Copy an installed template to a local template'), + ('version', '-v', 'VERS', 'Specify the version number to substitute') + ] + + def __init__(self): + self.options = {} + self.fileargs = [] + self.parse_args(sys.argv[1:]) + self.config = configparser.ConfigParser() + try: + self.config.read('.yelp-tools.cfg') + except: + self.config = None + + + def parse_args(self, args): + while len(args) > 0: + argdef = None + if args[0].startswith('--'): + for arg_ in self.arguments: + if args[0] == '--' + arg_[0]: + argdef = arg_ + break + if argdef is None: + self.print_usage() + return 1 + elif args[0].startswith('-'): + for arg_ in self.arguments: + if args[0] == arg_[1]: + argdef = arg_ + break + if argdef is None: + self.print_usage() + return 1 + if argdef is not None: + takesarg = (argdef[2] is not None) + if takesarg: + if len(args) < 2: + self.print_usage() + return 1 + self.options.setdefault(argdef[0], []) + self.options[argdef[0]].append(args[1]) + args = args[2:] + else: + self.options[argdef[0]] = True + args = args[1:] + else: + self.fileargs.append(args[0]) + args = args[1:] + + + def get_option_bool(self, arg): + if arg in self.options: + return self.options[arg] == True + if self.config is not None: + val = self.config.get('new', arg, fallback=None) + if val is not None: + return (val == 'true') + val = self.config.get('default', arg, fallback=None) + if val is not None: + return (val == 'true') + return False + + + def get_option_str(self, arg): + if arg in self.options: + if isinstance(self.options[arg], list): + return self.options[arg][-1] + if self.config is not None: + val = self.config.get('new', arg, fallback=None) + if val is not None: + return val + val = self.config.get('default', arg, fallback=None) + if val is not None: + return val + return None + + + def get_replacements(self, pageid): + repl = {'ID' : pageid} + if len(self.fileargs) > 2: + repl['TITLE'] = ' '.join(self.fileargs[2:]) + else: + repl['TITLE'] = 'TITLE' + today = datetime.datetime.now() + repl['DATE'] = today.strftime('%Y-%m-%d') + repl['YEAR'] = today.strftime('%Y') + + username = None + useremail = None + isgit = False + isbzr = False + cwd = os.getcwd() + while cwd: + if os.path.exists(os.path.join(cwd, '.git')): + isgit = True + break + if os.path.exists(os.path.join(cwd, '.bzr')): + isbzr = True + break + newcwd = os.path.dirname(cwd) + if newcwd == cwd: + break + cwd = newcwd + if isbzr: + try: + who = subprocess.run(['bzr', 'whoami'], check=True, + capture_output=True, encoding='utf8') + username, useremail = who.stdout.split('<') + username = username.strip() + useremail = useremail.split('>')[0].strip() + except: + username = None + useremail = None + if username is None: + try: + who = subprocess.run(['git', 'config', 'user.name'], check=True, + capture_output=True, encoding='utf8') + username = who.stdout.strip() + who = subprocess.run(['git', 'config', 'user.email'], check=True, + capture_output=True, encoding='utf8') + useremail = who.stdout.strip() + except: + username = None + useremail = None + repl['NAME'] = username or 'YOUR NAME' + repl['EMAIL'] = useremail or 'YOUR EMAIL ADDRESS' + repl['VERSION'] = self.get_option_str('version') or 'VERSION.NUMBER' + return repl + + + def main(self): + if len(self.fileargs) < 2: + self.print_usage() + return 1 + + tmpl = self.fileargs[0] + if '.' not in tmpl: + tmpl = tmpl + '.page' + ext = '.page' + elif tmpl.endswith('.page'): + ext = '.page' + elif tmpl.endswith('.duck'): + ext = '.duck' + if self.get_option_bool('stub'): + ext = ext + '.stub' + tmplfile = os.path.join(os.getcwd(), tmpl + '.tmpl') + if not os.path.exists(tmplfile): + tmplfile = os.path.join(DATADIR, 'templates', tmpl) + if not os.path.exists(tmplfile): + print('No template found named ' + tmpl, file=sys.stderr) + sys.exit(1) + pageid = self.fileargs[1] + istmpl = self.get_option_bool('tmpl') + if istmpl: + ext = ext + '.tmpl' + repl = {} + else: + repl = self.get_replacements(pageid) + def _writeout(outfile, infilename, depth=0): + if depth > 10: + # We could do this smarter by keeping a stack of infilenames, but why? + print('Recursion limit reached for template includes', file=sys.stderr) + sys.exit(1) + for line in open(infilename): + if (not istmpl) and line.startswith('<?yelp-tmpl-desc'): + continue + if (not istmpl) and line.startswith('[-] yelp-tmpl-desc'): + continue + while line is not None and '{{' in line: + before, after = line.split('{{', maxsplit=1) + if '}}' in after: + var, after = after.split('}}', maxsplit=1) + outfile.write(before) + isinclude = var.startswith('INCLUDE ') + if isinclude: + newfile = os.path.join(os.path.dirname(infilename), var[8:].strip()) + _writeout(outfile, newfile, depth=depth+1) + elif istmpl: + outfile.write('{{' + var + '}}') + else: + outfile.write(repl.get(var, '{{' + var + '}}')) + if isinclude and after == '\n': + line = None + else: + line = after + else: + outfile.write(line) + line = None + if line is not None: + outfile.write(line) + + if os.path.exists(pageid + ext): + print('Output file ' + pageid + ext + ' already exists', file=sys.stderr) + sys.exit(1) + with open(pageid + ext, 'w') as outfile: + _writeout(outfile, tmplfile) + + + def print_usage(self): + print('Usage: yelp-new [OPTIONS] <TEMPLATE> <ID> [TITLE]\n') + print('Create a new file from an installed or local template file,\n' + + 'or create a new local template. TEMPLATE must be the name of\n' + + 'an installed or local template. ID is a page ID (and base\n' + + 'filename) for the new page. The optional TITLE argument\n' + 'provides the page title\n') + print('Options:') + maxarglen = 2 + args = [] + for arg in self.arguments: + argkey = '--' + arg[0] + if arg[1] is not None: + argkey = arg[1] + ', ' + argkey + if arg[2] is not None: + argkey = argkey + ' ' + arg[2] + args.append((argkey, arg[3])) + for arg in args: + maxarglen = max(maxarglen, len(arg[0]) + 1) + for arg in args: + print(' ' + (arg[0]).ljust(maxarglen) + ' ' + arg[1]) + localpages = [] + localducks = [] + installedpages = [] + installedducks = [] + descs = {} + maxlen = 0 + def _getdesc(fpath): + for line in open(fpath): + if line.startswith('<?yelp-tmpl-desc '): + s = line[16:].strip() + if s.endswith('?>'): + s = s[:-2] + return s + if line.startswith('[-] yelp-tmpl-desc'): + return line[18:].strip() + return '' + for fname in os.listdir(os.getcwd()): + if fname.endswith('.page.tmpl'): + fname = fname[:-5] + maxlen = max(maxlen, len(fname)) + localpages.append(fname) + elif fname.endswith('.duck.tmpl'): + fname = fname[:-5] + maxlen = max(maxlen, len(fname)) + localducks.append(fname) + else: + continue + descs[fname] = _getdesc(os.path.join(os.getcwd(), fname + '.tmpl')) + for fname in os.listdir(os.path.join(DATADIR, 'templates')): + if fname.endswith('.page'): + if fname in localpages: + continue + maxlen = max(maxlen, len(fname)) + installedpages.append(fname) + elif fname.endswith('.duck'): + if fname in localducks: + continue + maxlen = max(maxlen, len(fname)) + installedducks.append(fname) + else: + continue + descs[fname] = _getdesc(os.path.join(DATADIR, 'templates', fname)) + if len(localpages) > 0: + print('\nLocal Mallard Templates:') + for page in localpages: + print(' ' + page.ljust(maxlen) + ' ' + descs.get(page, '')) + if len(localducks) > 0: + print('\nLocal Ducktype Templates:') + for duck in localducks: + print(' ' + duck.ljust(maxlen) + ' ' + descs.get(duck, '')) + if len(installedpages) > 0: + print('\nInstalled Mallard Templates:') + for page in installedpages: + print(' ' + page.ljust(maxlen) + ' ' + descs.get(page, '')) + if len(installedducks) > 0: + print('\nInstalled Ducktype Templates:') + for duck in installedducks: + print(' ' + duck.ljust(maxlen) + ' ' + descs.get(duck, '')) + +if __name__ == '__main__': + try: + sys.exit(YelpNew().main()) + except KeyboardInterrupt: + sys.exit(1) diff --git a/tools/yelp-new.py b/tools/yelp-new.py deleted file mode 100644 index 5b8f6f0..0000000 --- a/tools/yelp-new.py +++ /dev/null @@ -1,317 +0,0 @@ -#!/bin/python3 -# -# yelp-new -# Copyright (C) 2010-2020 Shaun McCance <shaunm@gnome.org> -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - -import configparser -import datetime -import os -import subprocess -import sys - -# FIXME: don't hardcode this -DATADIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', 'templates', 'py') - -class YelpNew: - arguments = [ - ('help', '-h', None, 'Show this help and exit'), - ('stub', None, None, 'Create a stub file with .stub appended'), - ('tmpl', None, None, 'Copy an installed template to a local template'), - ('version', '-v', 'VERS', 'Specify the version number to substitute') - ] - - def __init__(self): - self.options = {} - self.fileargs = [] - self.parse_args(sys.argv[1:]) - self.config = configparser.ConfigParser() - try: - self.config.read('.yelp-tools.cfg') - except: - self.config = None - - - def parse_args(self, args): - while len(args) > 0: - argdef = None - if args[0].startswith('--'): - for arg_ in self.arguments: - if args[0] == '--' + arg_[0]: - argdef = arg_ - break - if argdef is None: - self.print_usage() - return 1 - elif args[0].startswith('-'): - for arg_ in self.arguments: - if args[0] == arg_[1]: - argdef = arg_ - break - if argdef is None: - self.print_usage() - return 1 - if argdef is not None: - takesarg = (argdef[2] is not None) - if takesarg: - if len(args) < 2: - self.print_usage() - return 1 - self.options.setdefault(argdef[0], []) - self.options[argdef[0]].append(args[1]) - args = args[2:] - else: - self.options[argdef[0]] = True - args = args[1:] - else: - self.fileargs.append(args[0]) - args = args[1:] - - - def get_option_bool(self, arg): - if arg in self.options: - return self.options[arg] == True - if self.config is not None: - val = self.config.get('new', arg, fallback=None) - if val is not None: - return (val == 'true') - val = self.config.get('default', arg, fallback=None) - if val is not None: - return (val == 'true') - return False - - - def get_option_str(self, arg): - if arg in self.options: - if isinstance(self.options[arg], list): - return self.options[arg][-1] - if self.config is not None: - val = self.config.get('new', arg, fallback=None) - if val is not None: - return val - val = self.config.get('default', arg, fallback=None) - if val is not None: - return val - return None - - - def get_replacements(self, pageid): - repl = {'ID' : pageid} - if len(self.fileargs) > 2: - repl['TITLE'] = ' '.join(self.fileargs[2:]) - else: - repl['TITLE'] = 'TITLE' - today = datetime.datetime.now() - repl['DATE'] = today.strftime('%Y-%m-%d') - repl['YEAR'] = today.strftime('%Y') - - username = None - useremail = None - isgit = False - isbzr = False - cwd = os.getcwd() - while cwd: - if os.path.exists(os.path.join(cwd, '.git')): - isgit = True - break - if os.path.exists(os.path.join(cwd, '.bzr')): - isbzr = True - break - newcwd = os.path.dirname(cwd) - if newcwd == cwd: - break - cwd = newcwd - if isbzr: - try: - who = subprocess.run(['bzr', 'whoami'], check=True, - capture_output=True, encoding='utf8') - username, useremail = who.stdout.split('<') - username = username.strip() - useremail = useremail.split('>')[0].strip() - except: - username = None - useremail = None - if username is None: - try: - who = subprocess.run(['git', 'config', 'user.name'], check=True, - capture_output=True, encoding='utf8') - username = who.stdout.strip() - who = subprocess.run(['git', 'config', 'user.email'], check=True, - capture_output=True, encoding='utf8') - useremail = who.stdout.strip() - except: - username = None - useremail = None - repl['NAME'] = username or 'YOUR NAME' - repl['EMAIL'] = useremail or 'YOUR EMAIL ADDRESS' - repl['VERSION'] = self.get_option_str('version') or 'VERSION.NUMBER' - return repl - - - def main(self): - if len(self.fileargs) < 2: - self.print_usage() - return 1 - - tmpl = self.fileargs[0] - if '.' not in tmpl: - tmpl = tmpl + '.page' - ext = '.page' - elif tmpl.endswith('.page'): - ext = '.page' - elif tmpl.endswith('.duck'): - ext = '.duck' - if self.get_option_bool('stub'): - ext = ext + '.stub' - tmplfile = os.path.join(os.getcwd(), tmpl + '.tmpl') - if not os.path.exists(tmplfile): - tmplfile = os.path.join(DATADIR, 'templates', tmpl) - if not os.path.exists(tmplfile): - print('No template found named ' + tmpl, file=sys.stderr) - sys.exit(1) - pageid = self.fileargs[1] - istmpl = self.get_option_bool('tmpl') - if istmpl: - ext = ext + '.tmpl' - repl = {} - else: - repl = self.get_replacements(pageid) - def _writeout(outfile, infilename, depth=0): - if depth > 10: - # We could do this smarter by keeping a stack of infilenames, but why? - print('Recursion limit reached for template includes', file=sys.stderr) - sys.exit(1) - for line in open(infilename): - if (not istmpl) and line.startswith('<?yelp-tmpl-desc'): - continue - if (not istmpl) and line.startswith('[-] yelp-tmpl-desc'): - continue - while line is not None and '{{' in line: - before, after = line.split('{{', maxsplit=1) - if '}}' in after: - var, after = after.split('}}', maxsplit=1) - outfile.write(before) - isinclude = var.startswith('INCLUDE ') - if isinclude: - newfile = os.path.join(os.path.dirname(infilename), var[8:].strip()) - _writeout(outfile, newfile, depth=depth+1) - elif istmpl: - outfile.write('{{' + var + '}}') - else: - outfile.write(repl.get(var, '{{' + var + '}}')) - if isinclude and after == '\n': - line = None - else: - line = after - else: - outfile.write(line) - line = None - if line is not None: - outfile.write(line) - - if os.path.exists(pageid + ext): - print('Output file ' + pageid + ext + ' already exists', file=sys.stderr) - sys.exit(1) - with open(pageid + ext, 'w') as outfile: - _writeout(outfile, tmplfile) - - - def print_usage(self): - print('Usage: yelp-new [OPTIONS] <TEMPLATE> <ID> [TITLE]\n') - print('Create a new file from an installed or local template file,\n' + - 'or create a new local template. TEMPLATE must be the name of\n' + - 'an installed or local template. ID is a page ID (and base\n' + - 'filename) for the new page. The optional TITLE argument\n' - 'provides the page title\n') - print('Options:') - maxarglen = 2 - args = [] - for arg in self.arguments: - argkey = '--' + arg[0] - if arg[1] is not None: - argkey = arg[1] + ', ' + argkey - if arg[2] is not None: - argkey = argkey + ' ' + arg[2] - args.append((argkey, arg[3])) - for arg in args: - maxarglen = max(maxarglen, len(arg[0]) + 1) - for arg in args: - print(' ' + (arg[0]).ljust(maxarglen) + ' ' + arg[1]) - localpages = [] - localducks = [] - installedpages = [] - installedducks = [] - descs = {} - maxlen = 0 - def _getdesc(fpath): - for line in open(fpath): - if line.startswith('<?yelp-tmpl-desc '): - s = line[16:].strip() - if s.endswith('?>'): - s = s[:-2] - return s - if line.startswith('[-] yelp-tmpl-desc'): - return line[18:].strip() - return '' - for fname in os.listdir(os.getcwd()): - if fname.endswith('.page.tmpl'): - fname = fname[:-5] - maxlen = max(maxlen, len(fname)) - localpages.append(fname) - elif fname.endswith('.duck.tmpl'): - fname = fname[:-5] - maxlen = max(maxlen, len(fname)) - localducks.append(fname) - else: - continue - descs[fname] = _getdesc(os.path.join(os.getcwd(), fname + '.tmpl')) - for fname in os.listdir(os.path.join(DATADIR, 'templates')): - if fname.endswith('.page'): - if fname in localpages: - continue - maxlen = max(maxlen, len(fname)) - installedpages.append(fname) - elif fname.endswith('.duck'): - if fname in localducks: - continue - maxlen = max(maxlen, len(fname)) - installedducks.append(fname) - else: - continue - descs[fname] = _getdesc(os.path.join(DATADIR, 'templates', fname)) - if len(localpages) > 0: - print('\nLocal Mallard Templates:') - for page in localpages: - print(' ' + page.ljust(maxlen) + ' ' + descs.get(page, '')) - if len(localducks) > 0: - print('\nLocal Ducktype Templates:') - for duck in localducks: - print(' ' + duck.ljust(maxlen) + ' ' + descs.get(duck, '')) - if len(installedpages) > 0: - print('\nInstalled Mallard Templates:') - for page in installedpages: - print(' ' + page.ljust(maxlen) + ' ' + descs.get(page, '')) - if len(installedducks) > 0: - print('\nInstalled Ducktype Templates:') - for duck in installedducks: - print(' ' + duck.ljust(maxlen) + ' ' + descs.get(duck, '')) - - -if __name__ == '__main__': - try: - sys.exit(YelpNew().main()) - except KeyboardInterrupt: - sys.exit(1) |