summaryrefslogtreecommitdiff
path: root/tools/yelp-check.in
diff options
context:
space:
mode:
Diffstat (limited to 'tools/yelp-check.in')
-rwxr-xr-xtools/yelp-check.in2438
1 files changed, 1227 insertions, 1211 deletions
diff --git a/tools/yelp-check.in b/tools/yelp-check.in
index d46e004..2578800 100755
--- a/tools/yelp-check.in
+++ b/tools/yelp-check.in
@@ -1,8 +1,7 @@
-#!/bin/sh
-# -*- indent-tabs-mode: nil -*-
+#!/bin/python3
#
# yelp-check
-# Copyright (C) 2011-2015 Shaun McCance <shaunm@gnome.org>
+# Copyright (C) 2011-2020 Shaun McCance <shaunm@gnome.org>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -18,1214 +17,1231 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-xsl_mal_link='@XSL_MAL_LINK@'
-xsl_mal_license='@DATADIR@/xslt/mal-license.xsl'
-xsl_mal_status='@DATADIR@/xslt/mal-status.xsl'
-xsl_mal_rng='@DATADIR@/xslt/mal-rng.xsl'
-xsl_comments='@DATADIR@/xslt/comments.xsl'
-xsl_media='@DATADIR@/xslt/media.xsl'
-
-yelp_check_retval="0"
-
-urlencode () {
- # We usually don't want to urlencode slashes, because we're
- # usually converting file paths to URIs. But we do want to
- # urlencode slases for names of RNG files in validate_page.
- if [ "x$2" = "x/" ]; then
- urlencode_slash=''
- else
- urlencode_slash='\/'
- fi
- echo "$1" | LC_ALL=C awk '
-BEGIN {
- for (i = 1; i <= 255; i++) chars[sprintf("%c", i)] = i;
-}
-{
- ret = "";
- for (i = 1; i <= length($0); i++) {
- c = substr($0, i, 1);
- if (c ~ /['$urlencode_slash'a-zA-Z0-9._-]/)
- ret = ret c;
- else
- ret = ret sprintf("%%%X%X", int(chars[c] / 16), chars[c] % 16);
- }
- print ret;
-}'
-}
-
-urldecode () {
- echo "$1" | LC_ALL=C awk '
-BEGIN {
- for(i = 0; i < 10; i++) hex[i] = i;
- hex["A"] = hex["a"] = 10;
- hex["B"] = hex["b"] = 11;
- hex["C"] = hex["c"] = 12;
- hex["D"] = hex["d"] = 13;
- hex["E"] = hex["e"] = 14;
- hex["F"] = hex["f"] = 15;
-}
-{
- ret = "";
- for (i = 1; i <= length($0); i++) {
- c = substr($0, i, 1);
- if (c == "+") {
- ret = ret " ";
- }
- else if (c == "%") {
- c = sprintf("%c", hex[substr($0, i + 1, 1)] * 16 + hex[substr($0, i + 2, 1)]);
- ret = ret c;
- i += 2;
- }
- else {
- ret = ret c;
+import configparser
+import lxml.etree
+import os
+import sys
+import urllib.request
+import shutil
+import subprocess
+import tempfile
+import textwrap
+
+
+DATADIR = '@DATADIR@'
+
+XML_ID = '{http://www.w3.org/XML/1998/namespace}id'
+NAMESPACES = {
+ 'mal': 'http://projectmallard.org/1.0/',
+ 'cache': 'http://projectmallard.org/cache/1.0/',
+ 'db': 'http://docbook.org/ns/docbook',
+ 'e': 'http://projectmallard.org/experimental/',
+ 'ui': 'http://projectmallard.org/ui/1.0/',
+ 'uix': 'http://projectmallard.org/experimental/ui/',
+ 'xlink': 'http://www.w3.org/1999/xlink'
}
- }
- print ret;
-}'
-}
-
-docbook_version='
-<xsl:stylesheet
- xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
- xmlns:db="http://docbook.org/ns/docbook"
- version="1.0">
-<xsl:output method="text"/>
-<xsl:template match="/">
- <xsl:choose>
- <xsl:when test="/db:*/@version">
- <xsl:value-of select="/db:*/@version"/>
- </xsl:when>
- <xsl:when test="/db:*">
- <xsl:text>5.0</xsl:text>
- </xsl:when>
- <xsl:otherwise>
- <xsl:text>4</xsl:text>
- </xsl:otherwise>
- </xsl:choose>
-</xsl:template>
-</xsl:stylesheet>
-'
-
-mallard_style='
-<xsl:stylesheet
- xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
- xmlns:mal="http://projectmallard.org/1.0/"
- version="1.0">
-<xsl:output method="text"/>
-<xsl:template match="/mal:page"><xsl:value-of select="@style"/></xsl:template>
-</xsl:stylesheet>'
-
-yelp_usage() {
- (
- echo "Usage: yelp-check <COMMAND> [OPTIONS] [FILES]"
- echo ""
- echo "Commands:"
- echo " comments Print the editorial comments in a document"
- echo " hrefs Find broken external links in a document"
- echo " ids Find Mallard page IDs that do not match file names"
- echo " license Report the license of Mallard pages"
- echo " links Find broken xref or linkend links in a document"
- echo " media Find broken references to media files"
- echo " orphans Find orphaned pages in a Mallard document"
- echo " status Report the status of Mallard pages"
- echo " style Report the style attribute of Mallard pages"
- echo " validate Validate files against a DTD or RNG"
- ) 1>&2
-}
-yelp_usage_hrefs () {
- (
- echo "Usage: yelp-check hrefs <FILES>"
- echo ""
- echo " Find broken href links in FILES in a Mallard document, or"
- echo " broken ulink or XLink links in FILES in a DocBook document."
- echo ""
- echo "Options:"
- echo " -s Treat pages as belonging to a Mallard site"
- ) 1>&2
-}
-yelp_usage_ids () {
- (
- echo "Usage: yelp-check ids <FILES>"
- echo ""
- echo " Find pages in a Mallard document whose page ID does not match"
- echo " the base file name of the page file."
- echo ""
- echo "Options:"
- echo " -s Treat pages as belonging to a Mallard site"
- ) 1>&2
-}
-yelp_usage_links () {
- (
- echo "Usage: yelp-check links <FILES>"
- echo ""
- echo " Find broken xref links in FILES in a Mallard document,"
- echo " or broken linkend links in FILES in a DocBook document."
- echo ""
- echo "Options:"
- echo " -c CACHE Use the existing Mallard cache CACHE"
- echo " -s Treat pages as belonging to a Mallard site"
- echo " -i Ignore xrefs where href is present"
- ) 1>&2
-}
-yelp_usage_media () {
- (
- echo "Usage: yelp-check media <FILES>"
- echo ""
- echo " Find broken references to media files. In Mallard, this"
- echo " checks media and thumb elements. In DocBook, this checks"
- echo " audiodata, imagedata, and videodata elements."
- echo ""
- echo "Options:"
- echo " -s Treat pages as belonging to a Mallard site"
- ) 1>&2
-}
-yelp_usage_orphans () {
- (
- echo "Usage: yelp-check orphans <FILES>"
- echo ""
- echo " Locate orphaned pages among FILES in a Mallard document."
- echo " Orphaned pages are any pages that cannot be reached by"
- echo " topic links alone from the index page."
- echo ""
- echo "Options:"
- echo " -c CACHE Use the existing Mallard cache CACHE"
- echo " -s Treat pages as belonging to a Mallard site"
- ) 1>&2
-}
-yelp_usage_comments () {
- (
- echo "Usage: yelp-check comments <FILES>"
- echo ""
- echo " Print the editorial comments in the files FILES, using the"
- echo " comment element in Mallard and the remark element in DocBook."
- echo ""
- echo "Options:"
- echo " -s Treat pages as belonging to a Mallard site"
- ) 1>&2
-}
-yelp_usage_license () {
- (
- echo "Usage: yelp-check license <FILES>"
- echo ""
- echo " Report the license of the Mallard page files FILES. Each"
- echo " matching page is reporting along with its license, reported"
- echo " based on the href attribute of the license element. Common"
- echo " licenses use a shortened identifier. Pages with multiple"
- echo " licenses have the identifiers separated by spaces. Pages"
- echo " with no license element report 'none'. Licenses with no"
- echo " href attribute are reported as 'unknown'."
- echo ""
- echo "Options:"
- echo " -s Treat pages as belonging to a Mallard site"
- echo " --only LICENSES Only show pages whose license is in LICENSES"
- echo " --except LICENSES Exclude pages whose license is in LICENSES"
- echo " --totals Show total counts for each license"
- echo "LICENSES may be a comma- and/or space-separated list."
- ) 1>&2
-}
-yelp_usage_style () {
- (
- echo "Usage: yelp-check style <FILES>"
- echo ""
- echo " Report the page style attribute of the Mallard page files"
- echo " FILES. Each matching page is reporting along with its status."
- echo ""
- echo "Options:"
- echo " -s Treat pages as belonging to a Mallard site"
- echo " --only STYLES Only show pages whose style is in STATUSES"
- echo " --except STYLES Exclude pages whose style is in STATUSES"
- echo " --totals Show total counts for each style"
- echo "STYLES may be comma- and/or space-separated lists."
- ) 1>&2
-}
-yelp_usage_status () {
- (
- echo "Usage: yelp-check status <FILES>"
- echo ""
- echo " Report the status of the Mallard page files FILES. Each"
- echo " matching page is reporting along with its status."
- echo ""
- echo "Options:"
- echo " -s Treat pages as belonging to a Mallard site"
- echo " --version VER Select revisions with the version attribute VER"
- echo " --docversion VER Select revisions with the docversion attribute VER"
- echo " --pkgversion VER Select revisions with the pkgversion attribute VER"
- echo " --older DATE Only show pages older than DATE"
- echo " --newer DATE Only show pages newer than DATE"
- echo " --only STATUSES Only show pages whose status is in STATUSES"
- echo " --except STATUSES Exclude pages whose status is in STATUSES"
- echo " --totals Show total counts for each status"
- echo "VER and STATUSES may be comma- and/or space-separated lists."
- ) 1>&2
-}
-yelp_usage_validate () {
- (
- echo "Usage: yelp-check validate <FILES>"
- echo ""
- echo " Validate FILES against the appropriate DTD or RNG."
- echo " For Mallard pages, perform automatic RNG merging"
- echo " based on the version attribute."
- echo ""
- echo "Options:"
- echo " -s Treat pages as belonging to a Mallard site"
- echo " --strict Disallow unknown namespaces"
- echo " --allow NS Explicitly allow namespace NS in strict mode"
- echo " --jing Use jing instead of xmllint for RNG validation"
- ) 1>&2
-}
-
-if [ $# = 0 ]; then
- yelp_usage
- exit 1
-fi
-
-yelp_check_iter_site () {
- for dir in "$1"/*; do
- if [ -d "$dir" ]; then
- if [ $(basename "$dir") != "__pintail__" ]; then
- yelp_check_iter_site "$dir"
- fi
- fi
- done
- for page in "$1"/*.page; do
- if [ -e "$page" ]; then
- $check_page "$page" || yelp_check_retval="$?"
- fi
- done
-}
-
-yelp_check_iter_args () {
- for arg in "$@"; do
- ext=$(echo "$arg" | sed -e 's/.*\.//')
- if [ -d "$arg" ]; then
- if [ "x$check_site" = "x1" ]; then
- yelp_check_iter_site "$arg"
- else
- for page in "${arg%%/}"/*.page; do
- if [ -e "$page" ]; then
- $check_page "$page"
- fi
- done
- fi
- elif [ "x$ext" = "xpage" -o "x$ext" = "xstub" -o "x$ext" = "xcache" ]; then
- $check_page "$arg" || yelp_check_retval="$?"
- elif [ "x$check_db" != "x" -a \( "x$ext" = "xdocbook" -o "x$ext" = "xxml" \) ]; then
- $check_db "$arg" || yelp_check_retval="$?"
- else
- echo "Unrecognized page $arg" 1>&2
- exit 1
- fi
- done
- return $yelp_check_retval
-}
-
-yelp_hrefs_page () {
- base=$(dirname "$1")
- if [ "x$check_site" = "x1" ]; then
- sdir=$(cd $(dirname "$1") && pwd)
- sdir=${sdir##${check_site_root}}/
- fi
- (
- echo '<xsl:stylesheet'
- echo ' xmlns:xsl="http://www.w3.org/1999/XSL/Transform"'
- echo ' xmlns:mal="http://projectmallard.org/1.0/"'
- echo ' xmlns:db="http://docbook.org/ns/docbook"'
- echo ' xmlns:xlink="www.w3.org/1999/xlink"'
- echo ' version="1.0">'
- echo '<xsl:output method="text"/>'
- echo '<xsl:template match="/mal:page">'
- echo ' <xsl:for-each select="//*[@href]">'
- echo ' <xsl:if test="not(starts-with(@href, '\''mailto:'\''))">'
- echo ' <xsl:value-of select="/mal:page/@id"/>'
- echo ' <xsl:text> </xsl:text>'
- echo ' <xsl:value-of select="@href"/>'
- echo ' <xsl:text>&#x000A;</xsl:text>'
- echo ' </xsl:if>'
- echo ' </xsl:for-each>'
- echo '</xsl:template>'
- echo '<xsl:template match="/*[namespace-uri(.) = '\'\''] | /db:*">'
- echo ' <xsl:for-each select="//ulink/@url | //*/xlink:href">'
- echo ' <xsl:if test="not(starts-with(string(.), '\''mailto:'\''))">'
- echo ' <xsl:value-of select="(ancestor-or-self::*/@id | ancestor-or-self::*/@xml:id)[last()]"/>'
- echo ' <xsl:text> </xsl:text>'
- echo ' <xsl:value-of select="string(.)"/>'
- echo ' <xsl:text>&#x000A;</xsl:text>'
- echo ' </xsl:if>'
- echo ' </xsl:for-each>'
- echo '</xsl:template>'
- echo '</xsl:stylesheet>'
- ) | xsltproc --xinclude - "$1" | sort | uniq | \
- while read id url; do
- colon=`echo "$url" | cut -d: -f1`
- if [ "x$colon" = "x$url" ]; then
- test -f "$base/"$(urldecode "$url") || echo "$sdir$id: $url"
- else
- status=$(cat "$check_href_cache" | while read trystatus tryurl; do
- if [ "x$tryurl" = "x$url" ]; then echo "$trystatus"; break; fi
- done)
- if [ "x$status" = "x1" ]; then
- true
- elif [ "x$status" = "x0" ]; then
- echo "$sdir$id: $url"
- else
- (curl -s -I -L "$url" | \
- grep '^HTTP/' | tail -n 1 | head -n 1 | \
- grep -q 'HTTP/.\.. 200 .*') \
- && (echo "1 $url" >> "$check_href_cache") \
- || (echo "0 $url" >> "$check_href_cache"; echo "$sdir$id: $url")
- fi
- fi
- done
-}
-
-yelp_hrefs () {
- if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then
- yelp_usage_hrefs
- exit 1
- fi
- while [ "$#" != "0" ]; do
- case "$1" in
- "-s")
- check_site="1"
- check_site_root=$(pwd)
- shift
- ;;
- *)
- break
- ;;
- esac
- done
- if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then
- yelp_usage_hrefs
- exit 1
- fi
- check_out_file=`mktemp "${TMPDIR:-/tmp}"/yelp-XXXXXXXX`
- check_href_cache=`mktemp "${TMPDIR:-/tmp}"/yelp-XXXXXXXX`
- echo > "$check_href_cache"
- check_db=yelp_hrefs_page
- check_page=yelp_hrefs_page
- yelp_check_iter_args "$@" > "$check_out_file"
- yelp_check_retval=$(wc -l < "$check_out_file")
- if test "x$yelp_check_retval" != "x0"; then
- yelp_check_retval=1
- fi
- cat "$check_out_file"
- rm "$check_out_file"
- rm "$check_href_cache"
- exit $yelp_check_retval
-}
-
-yelp_ids_page () {
- pageid=$(
- (
- echo '<xsl:stylesheet'
- echo ' xmlns:xsl="http://www.w3.org/1999/XSL/Transform"'
- echo ' xmlns:mal="http://projectmallard.org/1.0/"'
- echo ' version="1.0">'
- echo '<xsl:output method="text"/>'
- echo '<xsl:template match="/mal:page">'
- echo '<xsl:value-of select="@id"/>'
- echo '</xsl:template>'
- echo '</xsl:stylesheet>'
- ) | xsltproc --xinclude - "$1")
- dname=$(dirname "$1")
- bname=$(basename "$1")
- if [ "x$pageid.page" != "x$bname" ]; then
- if [ "x$check_site" = "x1" ]; then
- sdir=$(cd $(dirname "$1") && pwd)
- sdir=${sdir##${check_site_root}}/
- echo $sdir$(basename "$1")": $pageid"
- elif [ "x$dname" = 'x.' ]; then
- echo "$bname: $pageid"
- else
- echo "$1: $pageid"
- fi
- yelp_check_retval=1
- fi
-}
-
-yelp_ids () {
- if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then
- yelp_usage_ids
- exit 1
- fi
- while [ "$#" != "0" ]; do
- case "$1" in
- "-s")
- check_site="1"
- check_site_root=$(pwd)
- shift
- ;;
- *)
- break
- ;;
- esac
- done
- if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then
- yelp_usage_ids
- exit 1
- fi
- check_db=
- check_page=yelp_ids_page
- yelp_check_iter_args "$@"
- exit $yelp_check_retval
-}
-
-yelp_links_db () {
- (
- echo '<xsl:stylesheet'
- echo ' xmlns:xsl="http://www.w3.org/1999/XSL/Transform"'
- echo ' xmlns:db="http://docbook.org/ns/docbook"'
- echo ' xmlns:exsl="http://exslt.org/common"'
- echo ' extension-element-prefixes="exsl"'
- echo ' version="1.0">'
- echo '<xsl:output method="text"/>'
- echo '<xsl:key name="idkey" match="*[@id or @xml:id]" use="@id | @xml:id"/>'
- echo '<xsl:template match="/">'
- echo ' <xsl:for-each select="//*[@linkend]">'
- echo ' <xsl:if test="not(key('"'idkey'"', @linkend))">'
- echo ' <xsl:value-of select="(ancestor-or-self::*/@id | ancestor-or-self::*/@xml:id)[last()]"/>'
- echo ' <xsl:text>: </xsl:text>'
- echo ' <xsl:value-of select="@linkend"/>'
- echo ' <xsl:text>&#x000A;</xsl:text>'
- echo ' </xsl:if>'
- echo ' </xsl:for-each>'
- echo '</xsl:template>'
- echo '</xsl:stylesheet>'
- ) | xsltproc --xinclude - "$1"
-}
-
-yelp_links_page () {
- if [ "x$check_site" = "x1" ]; then
- sdir=$(cd $(dirname "$1") && pwd)
- sdir=${sdir##${check_site_root}}/
- fi
- (
- echo '<xsl:stylesheet'
- echo ' xmlns:xsl="http://www.w3.org/1999/XSL/Transform"'
- echo ' xmlns:mal="http://projectmallard.org/1.0/"'
- echo ' xmlns:site="http://projectmallard.org/site/1.0/"'
- echo ' xmlns:exsl="http://exslt.org/common"'
- echo ' extension-element-prefixes="exsl"'
- echo ' version="1.0">'
- xsl='file://'`urlencode "$xsl_mal_link"`
- echo '<xsl:import href="'"$xsl"'"/>'
- check_cache_url='file://'`urlencode "$check_cache_file"`
- echo '<xsl:param name="mal.cache.file" select="'"'$check_cache_url'"'"/>'
- echo '<xsl:variable name="site.dir" select="'"'$sdir'"'"/>'
- echo '<xsl:output method="text"/>'
- echo '<xsl:key name="__site.cache.key" match="mal:page | mal:section"'
- echo ' use="concat(ancestor-or-self::mal:page/@site:dir, @id)"/>'
- echo '<xsl:template match="/mal:page">'
- echo ' <xsl:variable name="page" select="@id"/>'
- if [ "x$check_links_ignore" = "x1" ]; then
- echo ' <xsl:for-each select="//*[@xref][not(@href)]">'
- else
- echo ' <xsl:for-each select="//*[@xref]">'
- fi
- echo ' <xsl:variable name="xref" select="@xref"/>'
- echo ' <xsl:variable name="linkid">'
- echo ' <xsl:call-template name="mal.link.xref.linkid"/>'
- echo ' </xsl:variable>'
- echo ' <xsl:for-each select="$mal.cache">'
- echo ' <xsl:if test="count(key('"'mal.cache.key'"', $linkid) | '
- echo ' key('"'__site.cache.key'"', $linkid)) = 0">'
- echo ' <xsl:value-of select="$site.dir"/>'
- echo ' <xsl:value-of select="$page"/>'
- echo ' <xsl:text>: </xsl:text>'
- echo ' <xsl:value-of select="$xref"/>'
- echo ' <xsl:text>&#x000A;</xsl:text>'
- echo ' </xsl:if>'
- echo ' </xsl:for-each>'
- echo ' </xsl:for-each>'
- echo '</xsl:template>'
- echo '</xsl:stylesheet>'
- ) | xsltproc --xinclude - "$1"
-}
-
-yelp_links () {
- if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then
- yelp_usage_links
- exit 1
- fi
- while [ "$#" != "0" ]; do
- case "$1" in
- "-c")
- shift
- check_cache_file="$1"
- shift
- ;;
- "-s")
- check_site="1"
- check_site_root=$(pwd)
- shift
- ;;
- "-i")
- shift
- check_links_ignore="1"
- ;;
- *)
- break
- ;;
- esac
- done
- if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then
- yelp_usage_links
- exit 1
- fi
- if [ "x$check_cache_file" != "x" ]; then
- check_cache_dir=$(dirname "$check_cache_file")
- check_cache_dir=$(cd "$check_cache_dir" && pwd)
- check_cache_file="$check_cache_dir/"$(basename "$check_cache_file")
- elif [ -d "$1" ]; then
- check_cache_file=1
- else
- case "$1" in
- *.page | *.stub | *.cache)
- check_cache_file=1
- ;;
- *)
- break
- ;;
- esac
- fi
- if [ "x$check_cache_file" = "x1" ]; then
- check_cache_file_is_tmp="yes"
- check_cache_file=$(mktemp "${TMPDIR:-/tmp}"/yelp-XXXXXXXX)
- if [ "x$check_site" = "x1" ]; then
- yelp-build cache -s -o "$check_cache_file" "$@"
- else
- yelp-build cache -o "$check_cache_file" "$@"
- fi
- fi
-
- check_out_file=`mktemp "${TMPDIR:-/tmp}"/yelp-XXXXXXXX`
- check_db=yelp_links_db
- check_page=yelp_links_page
- yelp_check_iter_args "$@" > "$check_out_file"
- yelp_check_retval=$(wc -l < "$check_out_file")
- if test "x$yelp_check_retval" != "x0"; then
- yelp_check_retval=1
- fi
- cat "$check_out_file"
- rm "$check_out_file"
- if [ "x$check_cache_file_is_tmp" = "xyes" ]; then
- rm "$check_cache_file"
- fi
- exit $yelp_check_retval
-}
-
-yelp_media_page () {
- ext=$(echo "$1" | sed -e 's/.*\.//')
- bname=$(basename "$1" ".$ext")
- dname=$(dirname "$1")
- if [ "x$dname" = "x." ]; then
- dname=""
- else
- dname="$dname"/
- fi;
- if [ "x$check_site" = "x1" ]; then
- sdir=$(cd "$dname" && pwd)
- sdir=${sdir##${check_site_root}}/
- else
- sdir="$dname"
- fi
- xsltproc "$xsl_media" "$1" | \
- sort | uniq | \
- while read line; do
- src=$(urldecode "$line")
- if [ ! -f "$dname$src" ]; then
- echo "$sdir$bname: $line"
- fi
- done
-}
-
-yelp_media () {
- if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then
- yelp_usage_media
- exit 1
- fi
- while [ "$#" != "0" ]; do
- case "$1" in
- "-s")
- check_site="1"
- check_site_root=$(pwd)
- shift
- ;;
- *)
- break
- ;;
- esac
- done
- if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then
- yelp_usage_media
- exit 1
- fi
- check_out_file=`mktemp "${TMPDIR:-/tmp}"/yelp-XXXXXXXX`
- check_db=yelp_media_page
- check_page=yelp_media_page
- yelp_check_iter_args "$@" > "$check_out_file"
- yelp_check_retval=$(wc -l < "$check_out_file")
- if test "x$yelp_check_retval" != "x0"; then
- yelp_check_retval=1
- fi
- cat "$check_out_file"
- rm "$check_out_file"
- exit $yelp_check_retval
-}
-
-yelp_orphans_page () {
- if [ "x$check_site" = "x1" ]; then
- sdir=$(cd $(dirname "$1") && pwd)
- sdir=${sdir##${check_site_root}}/
- fi
- (
- echo '<xsl:stylesheet'
- echo ' xmlns:xsl="http://www.w3.org/1999/XSL/Transform"'
- echo ' xmlns:mal="http://projectmallard.org/1.0/"'
- echo ' xmlns:exsl="http://exslt.org/common"'
- echo ' extension-element-prefixes="exsl"'
- echo ' version="1.0">'
- xsl='file://'`urlencode "$xsl_mal_link"`
- echo '<xsl:import href="'"$xsl"'"/>'
- check_cache_url='file://'`urlencode "$check_cache_file"`
- echo '<xsl:param name="mal.cache.file" select="'"'$check_cache_url'"'"/>'
- echo '<xsl:variable name="site.dir" select="'"'$sdir'"'"/>'
- echo '<xsl:output method="text"/>'
- echo '<xsl:template match="/mal:page">'
- echo ' <xsl:variable name="trails">'
- echo ' <xsl:call-template name="mal.link.linktrails"/>'
- echo ' </xsl:variable>'
- echo ' <xsl:if test="@id != '"'index'"' and count(exsl:node-set($trails)/*) = 0">'
- echo ' <xsl:value-of select="$site.dir"/>'
- echo ' <xsl:value-of select="@id"/>'
- echo ' <xsl:text>&#x000A;</xsl:text>'
- echo ' </xsl:if>'
- echo '</xsl:template>'
- echo '</xsl:stylesheet>'
- ) | xsltproc --xinclude - "$1"
-}
-
-yelp_orphans () {
- if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then
- yelp_usage_orphans
- exit 1
- fi
- while [ "$#" != "0" ]; do
- case "$1" in
- "-s")
- check_site="1"
- check_site_root=$(pwd)
- shift
- ;;
- "-c")
- shift
- check_cache_file="$1"
- shift
- ;;
- *)
- break
- ;;
- esac
- done
- if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then
- yelp_usage_orphans
- exit 1
- fi
- if [ "x$check_cache_file" != "x" ]; then
- check_cache_dir=$(dirname "$check_cache_file")
- check_cache_dir=$(cd "$check_cache_dir" && pwd)
- check_cache_file="$check_cache_dir/"$(basename "$check_cache_file")
- elif [ -d "$1" ]; then
- check_cache_file=1
- else
- case "$1" in
- *.page | *.stub | *.cache)
- check_cache_file=1
- ;;
- *)
- break
- ;;
- esac
- fi
- if [ "x$check_cache_file" = "x1" ]; then
- check_cache_file_is_tmp="yes"
- check_cache_file=$(mktemp "${TMPDIR:-/tmp}"/yelp-XXXXXXXX)
- if [ "x$check_site" = "x1" ]; then
- yelp-build cache -s -o "$check_cache_file" "$@"
- else
- yelp-build cache -o "$check_cache_file" "$@"
- fi
- fi
-
- check_out_file=`mktemp "${TMPDIR:-/tmp}"/yelp-XXXXXXXX`
- check_db=
- check_page=yelp_orphans_page
- yelp_check_iter_args "$@" > "$check_out_file"
- yelp_check_retval=$(wc -l < "$check_out_file")
- if test "x$yelp_check_retval" != "x0"; then
- yelp_check_retval=1
- fi
- cat "$check_out_file"
- rm "$check_out_file"
- if [ "x$check_cache_file_is_tmp" = "xyes" ]; then
- rm "$check_cache_file"
- fi
- exit $yelp_check_retval
-}
-
-yelp_comments_page () {
- ext=$(echo "$1" | sed -e 's/.*\.//')
- bname=$(basename "$1" ".$ext")
- if [ "x$check_site" = "x1" ]; then
- sdir=$(cd $(dirname "$1") && pwd)
- sdir=${sdir##${check_site_root}}/
- fi
- xsltproc --stringparam basename "$bname" \
- --stringparam site.dir "$sdir" \
- "$xsl_comments" "$1"
-}
-
-yelp_comments () {
- if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then
- yelp_usage_comments
- exit 1
- fi
- while [ "$#" != "0" ]; do
- case "$1" in
- "-s")
- check_site="1"
- check_site_root=$(pwd)
- shift
- ;;
- *)
- break
- ;;
- esac
- done
- if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then
- yelp_usage_comments
- exit 1
- fi
- check_db=yelp_comments_page
- check_page=yelp_comments_page
- yelp_check_iter_args "$@"
- exit $yelp_check_retval
-}
-
-yelp_license_page () {
- if [ "x$check_site" = "x1" ]; then
- sdir=$(cd $(dirname "$1") && pwd)
- sdir=${sdir##${check_site_root}}/
- fi
- xsltproc --xinclude \
- --stringparam only "$check_only" \
- --stringparam except "$check_except" \
- --stringparam totals "$check_totals" \
- --stringparam site.dir "$sdir" \
- "$xsl_mal_license" "$1"
-}
-
-yelp_license () {
- if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then
- yelp_usage_license
- exit 1
- fi
- while [ "$#" != "0" ]; do
- case "$1" in
- "-s")
- check_site="1"
- check_site_root=$(pwd)
- shift
- ;;
- "--only")
- shift
- check_only="$1"
- shift
- ;;
- "--except")
- shift
- check_except="$1"
- shift
- ;;
- "--totals")
- check_totals="1"
- shift
- ;;
- *)
- break
- ;;
- esac
- done
- if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then
- yelp_usage_license
- exit 1
- fi
- check_db=
- check_page=yelp_license_page
- if [ "x$check_totals" = "x1" ]; then
- yelp_check_iter_args "$@" | \
- sort | uniq -c | sed -e 's/^ *//' | awk '{print $2 ": " $1}'
- else
- yelp_check_iter_args "$@" | sort
- fi
-}
-
-yelp_style_page () {
- ext=$(echo "$1" | sed -e 's/.*\.//')
- bname=$(basename "$1" ".$ext")
- if [ "x$check_site" = "x1" ]; then
- sdir=$(cd $(dirname "$1") && pwd)
- sdir=${sdir##${check_site_root}}/
- fi
- style=$(echo "$mallard_style" | xsltproc - "$1")
- output=1
- if [ "x$check_only_defined" = "x1" ]; then
- output=0
- if [ "x$check_only" = "x" ]; then
- # We treat an empty --only '' as requesting pages with no style
- if [ "x$style" = "x" ]; then output=1; fi
- else
- for pstyle in "$style"; do
- for sstyle in $(echo "$check_only" | sed -e 's/,/ /g'); do
- if [ "$pstyle" = "$sstyle" ]; then
- output=1
+
+def _stringify(el):
+ ret = el.text or ''
+ for ch in el:
+ ret = ret + _stringify(ch)
+ if el.tail is not None:
+ ret = ret + el.tail
+ return ret
+
+def get_format(node):
+ ns = lxml.etree.QName(node).namespace
+ if ns in (NAMESPACES['mal'], NAMESPACES['cache']):
+ return 'mallard'
+ elif ns == NAMESPACES['db']:
+ return 'docbook5'
+ elif ns is None:
+ # For now, just assume no ns means docbook4
+ return 'docbook4'
+ else:
+ return None
+
+class InputFile:
+ def __init__(self, filepath, filename, sitedir=None):
+ self.filepath = filepath
+ self.filename = filename
+ self.absfile = os.path.join(filepath, filename)
+ self.absdir = os.path.dirname(self.absfile)
+ self.sitedir = sitedir or ''
+ self.sitefilename = self.sitedir + self.filename
+
+
+class Checker:
+ name = None
+ desc = None
+ blurb = None
+ formats = []
+ arguments = []
+ postblurb = None
+ xinclude = True
+ config = None
+
+ def __init__(self, yelpcheck):
+ self.yelpcheck = yelpcheck
+ self.options = {}
+ self.fileargs = []
+ self.tmpdir = None
+
+ def __del__(self):
+ if self.tmpdir is not None:
+ shutil.rmtree(self.tmpdir)
+ self.tmpdir = None
+
+ def parse_args(self, args):
+ while len(args) > 0:
+ argdef = None
+ if args[0].startswith('--'):
+ for arg_ in self.arguments:
+ if args[0] == '--' + arg_[0]:
+ argdef = arg_
+ break
+ if argdef is None:
+ self.print_help()
+ return 1
+ elif args[0].startswith('-'):
+ for arg_ in self.arguments:
+ if args[0] == arg_[1]:
+ argdef = arg_
+ break
+ if argdef is None:
+ self.print_help()
+ return 1
+ if argdef is not None:
+ takesarg = (argdef[2] is not None)
+ if takesarg:
+ if len(args) < 2:
+ self.print_help()
+ return 1
+ self.options.setdefault(argdef[0], [])
+ self.options[argdef[0]].append(args[1])
+ args = args[2:]
+ else:
+ self.options[argdef[0]] = True
+ args = args[1:]
+ else:
+ self.fileargs.append(args[0])
+ args = args[1:]
+ cfgfile = None
+ if len(self.fileargs) > 0:
+ cfgfile = os.path.join(os.path.dirname(self.fileargs[0]), '.yelp-tools.cfg')
+ if not os.path.exists(cfgfile):
+ cfgfile = None
+ if cfgfile is None:
+ cfgfile = os.path.join(os.getcwd(), '.yelp-tools.cfg')
+ if os.path.exists(cfgfile):
+ self.config = configparser.ConfigParser()
+ try:
+ self.config.read(cfgfile)
+ except Exception as e:
+ print(e, file=sys.stderr)
+ sys.exit(1)
+ return 0
+
+ def get_option_bool(self, arg):
+ if arg in self.options:
+ return self.options[arg] == True
+ if self.config is not None:
+ val = self.config.get('check:' + self.name, arg, fallback=None)
+ if val is not None:
+ return (val == 'true')
+ val = self.config.get('check', arg, fallback=None)
+ if val is not None:
+ return (val == 'true')
+ val = self.config.get('default', arg, fallback=None)
+ if val is not None:
+ return (val == 'true')
+ return False
+
+ def get_option_str(self, arg):
+ if arg in self.options:
+ if isinstance(self.options[arg], list):
+ return self.options[arg][-1]
+ if self.config is not None:
+ val = self.config.get('check:' + self.name, arg, fallback=None)
+ if val is not None:
+ return val
+ val = self.config.get('check', arg, fallback=None)
+ if val is not None:
+ return val
+ val = self.config.get('default', arg, fallback=None)
+ if val is not None:
+ return val
+ return None
+
+ def get_option_list(self, arg):
+ if arg in self.options:
+ if isinstance(self.options[arg], list):
+ ret = []
+ for opt in self.options[arg]:
+ ret.extend(opt.replace(',', ' ').split())
+ return ret
+ if self.config is not None:
+ val = self.config.get('check:' + self.name, arg, fallback=None)
+ if val is not None:
+ return val.replace(',', ' ').split()
+ val = self.config.get('check', arg, fallback=None)
+ if val is not None:
+ return val.replace(',', ' ').split()
+ val = self.config.get('default', arg, fallback=None)
+ if val is not None:
+ return val.replace(',', ' ').split()
+ return None
+
+ def iter_files(self, sitedir=None):
+ issite = self.get_option_bool('site')
+ if len(self.fileargs) == 0:
+ self.fileargs.append('.')
+ for filearg in self.fileargs:
+ if os.path.isdir(filearg):
+ if issite:
+ for infile in self.iter_site(filearg, '/'):
+ yield infile
+ else:
+ for fname in os.listdir(filearg):
+ if fname.endswith('.page'):
+ yield InputFile(filearg, fname)
+ else:
+ if issite:
+ # FIXME: should do some normalization here, I guess.
+ # It's hard to get this perfect without a defined start dir
+ yield InputFile(os.getcwd(), filearg, '/' + os.path.dirname(filearg))
+ else:
+ yield InputFile(os.getcwd(), filearg)
+
+ def iter_site(self, filepath, sitedir):
+ for fname in os.listdir(filepath):
+ newpath = os.path.join(filepath, fname)
+ if os.path.isdir(newpath):
+ # FIXME https://github.com/projectmallard/pintail/issues/36
+ if fname == '__pintail__':
+ continue
+ for infile in self.iter_site(newpath, sitedir + fname + '/'):
+ yield infile
+ elif fname.endswith('.page'):
+ yield InputFile(filepath, fname, sitedir)
+
+ def get_xml(self, xmlfile):
+ # FIXME: we can cache these if we add a feature to run multiple
+ # checkers at once
+ tree = lxml.etree.parse(xmlfile.absfile)
+ if self.xinclude:
+ lxml.etree.XInclude()(tree.getroot())
+ return tree
+
+ def create_tmpdir(self):
+ if self.tmpdir is None:
+ self.tmpdir = tempfile.mkdtemp()
+
+ def print_help(self):
+ print('Usage: yelp-check ' + self.name + ' [OPTIONS] [FILES]')
+ print('Formats: ' + ' '.join(self.formats) + '\n')
+ #FIXME: prettify names of formats
+ if self.blurb is not None:
+ print(self.blurb + '\n')
+ print('Options:')
+ maxarglen = 2
+ args = []
+ for arg in self.arguments:
+ argkey = '--' + arg[0]
+ if arg[1] is not None:
+ argkey = arg[1] + ', ' + argkey
+ if arg[2] is not None:
+ argkey = argkey + ' ' + arg[2]
+ args.append((argkey, arg[3]))
+ for arg in args:
+ maxarglen = max(maxarglen, len(arg[0]) + 1)
+ for arg in args:
+ print(' ' + (arg[0]).ljust(maxarglen) + ' ' + arg[1])
+ if self.postblurb is not None:
+ print(self.postblurb)
+
+ def main(self, args):
+ pass
+
+
+class HrefsChecker (Checker):
+ name = 'hrefs'
+ desc = 'Find broken external links in a document'
+ blurb = ('Find broken href links in FILES in a Mallard document, or\n' +
+ 'broken ulink or XLink links in FILES in a DocBook document.')
+ formats = ['docbook4', 'docbook5', 'mallard']
+ arguments = [
+ ('help', '-h', None, 'Show this help and exit'),
+ ('site', '-s', None, 'Treat pages as belonging to a Mallard site'),
+ ('allow', None, 'URL', 'Allow URL or list of URLs without checking')
+ ]
+ postblurb = 'URL may be a comma- and/or space-separated list, or specified\nmultiple times.'
+
+ def main(self, args):
+ if self.parse_args(args) != 0:
+ return 1
+ if 'help' in self.options:
+ self.print_help()
+ return 0
+
+ # safelisting URLs that we use as identifiers
+ hrefs = {
+ 'http://creativecommons.org/licenses/by-sa/3.0/': True,
+ 'https://creativecommons.org/licenses/by-sa/3.0/': True,
+ 'http://creativecommons.org/licenses/by-sa/3.0/us/': True,
+ 'https://creativecommons.org/licenses/by-sa/3.0/us/': True
+ }
+ allow = self.get_option_list('allow')
+ if allow is not None:
+ for url in allow:
+ hrefs[url] = True
+ retcode = 0
+
+ for infile in self.iter_files():
+ xml = self.get_xml(infile)
+ for el in xml.xpath('//*[@href | @xlink:href | self::ulink/@url]',
+ namespaces=NAMESPACES):
+ href = el.get('href', None)
+ if href is None:
+ href = el.get('{www.w3.org/1999/xlink}href')
+ if href is None:
+ href = el.get('url')
+ if href is None:
+ continue
+ if href.startswith('mailto:'):
+ continue
+ if href not in hrefs:
+ try:
+ req = urllib.request.urlopen(href)
+ hrefs[href] = (req.status == 200)
+ except Exception as e:
+ hrefs[href] = False
+ if not hrefs[href]:
+ retcode = 1
+ print(infile.sitefilename + ': ' + href)
+
+ return retcode
+
+
+class IdsChecker (Checker):
+ name = 'ids'
+ desc = 'Find Mallard page IDs that do not match file names'
+ blurb = ('Find pages in a Mallard document whose page ID does not match\n' +
+ 'the base file name of the page file.')
+ formats = ['mallard']
+ arguments = [
+ ('help', '-h', None, 'Show this help and exit'),
+ ('site', '-s', None, 'Treat pages as belonging to a Mallard site')
+ ]
+
+ def main(self, args):
+ if self.parse_args(args) != 0:
+ return 1
+ if 'help' in self.options:
+ self.print_help()
+ return 0
+
+ retcode = 0
+
+ for infile in self.iter_files():
+ xml = self.get_xml(infile)
+ isok = False
+ pageid = None
+ if infile.filename.endswith('.page'):
+ try:
+ pageid = xml.getroot().get('id')
+ isok = (pageid == os.path.basename(infile.filename)[:-5])
+ except:
+ isok = False
+ if not isok:
+ retcode = 1
+ print(infile.sitefilename + ': ' + (pageid or ''))
+
+ return retcode
+
+
+class LinksChecker (Checker):
+ name = 'links'
+ desc = 'Find broken xref or linkend links in a document'
+ blurb = ('Find broken xref links in FILES in a Mallard document,\n' +
+ 'or broken linkend links in FILES in a DocBook document.')
+ formats = ['docbook4', 'docbook5', 'mallard']
+ arguments = [
+ ('help', '-h', None, 'Show this help and exit'),
+ ('site', '-s', None, 'Treat pages as belonging to a Mallard site'),
+ ('cache', '-c', 'CACHE', 'Use the existing Mallard cache CACHE'),
+ ('ignore', '-i', None, 'Ignore xrefs where href is present')
+ ]
+
+ def __init__(self, yelpcheck):
+ super().__init__(yelpcheck)
+ self.idstoxrefs = {}
+ self.idstolinkends = {}
+
+ def _accumulate_mal(self, node, pageid, sectid, xrefs, sitedir=None):
+ thisid = node.get('id')
+ if thisid is not None:
+ if node.tag == '{' + NAMESPACES['mal'] + '}page':
+ pageid = thisid
+ else:
+ sectid = thisid
+ curid = pageid
+ ignore = self.get_option_bool('ignore')
+ if curid is not None:
+ if sectid is not None:
+ # id attrs in cache files are already fully formed
+ if '#' in sectid:
+ curid = sectid
+ else:
+ curid = curid + '#' + sectid
+ if sitedir is not None:
+ # id attrs in cache files already have sitedir prefixed
+ if curid[0] != '/':
+ curid = sitedir + curid
+ self.idstoxrefs.setdefault(curid, [])
+ if xrefs:
+ xref = node.get('xref')
+ if xref is not None:
+ if not (ignore and (node.get('href') is not None)):
+ self.idstoxrefs[curid].append(xref)
+ for child in node:
+ self._accumulate_mal(child, pageid, sectid, xrefs, sitedir)
+
+ def _accumulate_db(self, node, nodeid):
+ thisid = node.get('id')
+ if thisid is None:
+ thisid = node.get(XML_ID)
+ if thisid is not None:
+ nodeid = thisid
+ self.idstolinkends.setdefault(nodeid, [])
+ if nodeid is not None:
+ linkend = node.get('linkend')
+ if linkend is not None:
+ self.idstolinkends[nodeid].append(linkend)
+ for child in node:
+ self._accumulate_db(child, nodeid)
+
+ def main(self, args):
+ if self.parse_args(args) != 0:
+ return 1
+ if 'help' in self.options:
+ self.print_help()
+ return 0
+
+ retcode = 0
+
+ cachefile = self.get_option_str('cache')
+ if cachefile is not None:
+ xml = self.get_xml(InputFile(os.getcwd(), cachefile))
+ self._accumulate_mal(xml.getroot(), None, None, False)
+
+ for infile in self.iter_files():
+ xml = self.get_xml(infile)
+ format = get_format(xml.getroot())
+ if format == 'mallard':
+ self._accumulate_mal(xml.getroot(), None, None, True, infile.sitedir)
+ elif format in ('docbook4', 'docbook5'):
+ # For DocBook, we assume each filearg is its own document, so
+ # we reset the dict each time and only check within the file.
+ # Note that XInclude and SYSTEM includes DO happen first.
+ self.idstolinkends = {}
+ self._accumulate_db(xml.getroot(), None)
+ for curid in self.idstolinkends:
+ for linkend in self.idstolinkends[curid]:
+ if linkend not in self.idstolinkends:
+ print(curid + ': ' + linkend)
+ retcode = 1
+
+ for curid in self.idstoxrefs:
+ for xref in self.idstoxrefs[curid]:
+ checkref = xref
+ if checkref[0] == '#':
+ checkref = curid.split('#')[0] + checkref
+ if curid[0] == '/' and checkref[0] != '/':
+ checkref = curid[:curid.rfind('/')+1] + checkref
+ if checkref not in self.idstoxrefs:
+ print(curid + ': ' + xref)
+ retcode = 1
+
+ return retcode
+
+
+class MediaChecker (Checker):
+ name = 'media'
+ desc = 'Find broken references to media files'
+ blurb = ('Find broken references to media files. In Mallard, this\n' +
+ 'checks media and thumb elements. In DocBook, this checks\n' +
+ 'audiodata, imagedata, and videodata elements.')
+ formats = ['docbook4', 'docbook5', 'mallard']
+ arguments = [
+ ('help', '-h', None, 'Show this help and exit'),
+ ('site', '-s', None, 'Treat pages as belonging to a Mallard site')
+ ]
+
+ def main(self, args):
+ if self.parse_args(args) != 0:
+ return 1
+ if 'help' in self.options:
+ self.print_help()
+ return 0
+
+ retcode = 0
+
+ for infile in self.iter_files():
+ xml = self.get_xml(infile)
+ format = get_format(xml.getroot())
+ srcs = []
+ if format == 'mallard':
+ for el in xml.xpath('//mal:media[@src] | //uix:thumb | //ui:thumb | //e:mouseover',
+ namespaces=NAMESPACES):
+ srcs.append(el.get('src'))
+ elif format == 'docbook5':
+ # FIXME: do we care about entityref?
+ for el in xml.xpath('//db:audiodata | //db:imagedata | //db:videodata',
+ namespaces=NAMESPACES):
+ srcs.append(el.get('fileref'))
+ elif format == 'docbook4':
+ for el in xml.xpath('//audiodata | //imagedata | //videodata'):
+ srcs.append(el.get('fileref'))
+ for src in srcs:
+ fsrc = os.path.join(infile.absdir, src)
+ if not os.path.exists(fsrc):
+ print(infile.sitefilename + ': ' + src)
+ retcode = 1
+
+ return retcode
+
+
+class OrphansChecker (Checker):
+ name = 'orphans'
+ desc = 'Find orphaned pages in a Mallard document'
+ blurb = ('Locate orphaned pages among FILES in a Mallard document.\n' +
+ 'Orphaned pages are any pages that cannot be reached by\n' +
+ 'topic links alone from the index page.')
+ formats = ['mallard']
+ arguments = [
+ ('help', '-h', None, 'Show this help and exit'),
+ ('site', '-s', None, 'Treat pages as belonging to a Mallard site'),
+ ('cache', '-c', 'CACHE', 'Use the existing Mallard cache CACHE')
+ ]
+
+ def __init__(self, yelpcheck):
+ super().__init__(yelpcheck)
+ self.guidelinks = {}
+ self.sitesubdirs = set()
+
+ def _collect_links(self, node, sitedir):
+ pageid = node.get('id')
+ if pageid[0] != '/':
+ # id attrs in cache files already have sitedir prefixed
+ pageid = sitedir + pageid
+ else:
+ sitedir = pageid[:pageid.rfind('/')+1]
+ self.guidelinks.setdefault(pageid, set())
+ # For the purposes of finding orphans, we'll just pretend that
+ # all links to or from sections are just to or from pages.
+ for el in node.xpath('//mal:info/mal:link[@type="guide"]',
+ namespaces=NAMESPACES):
+ xref = el.get('xref')
+ if xref is None or xref == '':
+ continue
+ if xref[0] == '#':
+ continue
+ if '#' in xref:
+ xref = xref[:xref.find('#')]
+ if sitedir is not None and sitedir != '':
+ if xref[0] != '/':
+ xref = sitedir + xref
+ self.guidelinks[pageid].add(xref)
+ for el in node.xpath('//mal:info/mal:link[@type="topic"]',
+ namespaces=NAMESPACES):
+ xref = el.get('xref')
+ if xref is None or xref == '':
+ continue
+ if xref[0] == '#':
+ continue
+ if '#' in xref:
+ xref = xref[:xref.find('#')]
+ if sitedir is not None and sitedir != '':
+ if xref[0] != '/':
+ xref = sitedir + xref
+ self.guidelinks.setdefault(xref, set())
+ self.guidelinks[xref].add(pageid)
+ for el in node.xpath('//mal:links[@type="site-subdirs" or @type="site:subdirs"]',
+ namespaces=NAMESPACES):
+ self.sitesubdirs.add(pageid)
+
+ def main(self, args):
+ if self.parse_args(args) != 0:
+ return 1
+ if 'help' in self.options:
+ self.print_help()
+ return 0
+
+ retcode = 0
+
+ cachefile = self.get_option_str('cache')
+ if cachefile is not None:
+ xml = self.get_xml(InputFile(os.getcwd(), cachefile))
+ for page in xml.getroot():
+ if page.tag == '{' + NAMESPACES['mal'] + '}page':
+ pageid = page.get('id')
+ if pageid is None or pageid == '':
+ continue
+ self._collect_links(page, page.get('{http://projectmallard.org/site/1.0/}dir', ''))
+
+ pageids = set()
+ for infile in self.iter_files():
+ xml = self.get_xml(infile)
+ pageid = xml.getroot().get('id')
+ if pageid is None:
+ continue
+ pageids.add(infile.sitedir + pageid)
+ self._collect_links(xml.getroot(), infile.sitedir)
+
+ siteupdirs = {}
+ for pageid in self.sitesubdirs:
+ dirname = pageid[:pageid.rfind('/')+1]
+ for subid in self.guidelinks:
+ if subid.startswith(dirname):
+ if subid.endswith('/index'):
+ mid = subid[len(dirname):-6]
+ if mid != '' and '/' not in mid:
+ siteupdirs[subid] = pageid
+
+ if self.get_option_bool('site'):
+ okpages = set(['/index'])
+ else:
+ okpages = set(['index'])
+ for pageid in sorted(pageids):
+ if pageid in okpages:
+ isok = True
+ else:
+ isok = False
+ guides = [g for g in self.guidelinks[pageid]]
+ if pageid in siteupdirs:
+ updir = siteupdirs[pageid]
+ if updir not in guides:
+ guides.append(updir)
+ cur = 0
+ while cur < len(guides):
+ if guides[cur] in okpages:
+ isok = True
break
- fi
- done
- if [ "x$output" = "x1" ]; then break; fi
- done
- fi
- fi
- if [ "x$check_except_defined" = "x1" ]; then
- if [ "x$check_except" = "x" ]; then
- # We treat an empty --except '' as excluding pages with no style
- if [ "x$style" = "x" ]; then output=0; fi
- else
- for pstyle in "$style"; do
- for sstyle in $(echo "$check_except" | sed -e 's/,/ /g'); do
- if [ "$pstyle" = "$sstyle" ]; then
- output=0
+ if guides[cur] in self.guidelinks:
+ for guide in self.guidelinks[guides[cur]]:
+ if guide not in guides:
+ guides.append(guide)
+ cur += 1
+ if isok:
+ okpages.add(pageid)
+ else:
+ print(pageid)
+ retcode = 1
+
+ return retcode
+
+
+class ValidateChecker (Checker):
+ name = 'validate'
+ desc = 'Validate files against a DTD or RNG'
+ blurb = ('Validate FILES against the appropriate DTD or RNG.\n' +
+ 'For Mallard pages, perform automatic RNG merging\n' +
+ 'based on the version attribute.')
+ formats = ['docbook4', 'docbook5', 'mallard']
+ arguments = [
+ ('help', '-h', None, 'Show this help and exit'),
+ ('site', '-s', None, 'Treat pages as belonging to a Mallard site'),
+ ('strict', None, None, 'Disallow unknown namespaces'),
+ ('allow', None, 'NS', 'Explicitly allow namespace NS in strict mode'),
+ ('jing', None, None, 'Use jing instead of xmllint for RNG validation')
+ ]
+ postblurb = 'NS may be a comma- and/or space-separated list, or specified\nmultiple times.'
+
+ def main(self, args):
+ if self.parse_args(args) != 0:
+ return 1
+ if 'help' in self.options:
+ self.print_help()
+ return 0
+
+ retcode = 0
+
+ for infile in self.iter_files():
+ xml = self.get_xml(infile)
+ format = get_format(xml.getroot())
+ command = None
+ if format == 'mallard':
+ version = xml.getroot().get('version')
+ if version is None or version == '':
+ tag = xml.getroot().tag
+ if tag == '{' + NAMESPACES['mal'] + '}stack':
+ # 1.2 isn't final yet as of 2020-01-09. Stacks will
+ # likely be in 1.2, so we can assume at least that.
+ version = '1.2'
+ elif tag == '{' + NAMESPACES['cache'] + '}cache':
+ version = 'cache/1.0'
+ else:
+ version = '1.0'
+ self.create_tmpdir()
+ rng = os.path.join(self.tmpdir,
+ version.replace('/', '__').replace(' ', '__'))
+ if not os.path.exists(rng):
+ strict = 'true()' if self.get_option_bool('strict') else 'false()'
+ allow = self.get_option_list('allow')
+ if allow is None:
+ allow = ''
+ else:
+ allow = ' '.join(allow)
+ subprocess.call(['xsltproc', '-o', rng,
+ '--param', 'rng.strict', strict,
+ '--stringparam', 'rng.strict.allow', allow,
+ os.path.join(DATADIR, 'xslt', 'mal-rng.xsl'),
+ infile.absfile])
+ if self.get_option_bool('jing'):
+ command = ['jing', '-i', rng, infile.filename]
+ else:
+ command = ['xmllint', '--noout', '--xinclude', '--noent',
+ '--relaxng', rng, infile.filename]
+ elif format == 'docbook4':
+ if xml.docinfo.doctype.startswith('<!DOCTYPE'):
+ command = ['xmllint', '--noout', '--xinclude', '--noent',
+ '--postvalid', infile.filename]
+ else:
+ command = ['xmllint', '--noout', '--xinclude', '--noent',
+ '--dtdvalid',
+ 'http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd',
+ infile.filename]
+ elif format == 'docbook5':
+ version = xml.getroot().get('version')
+ if version is None or version == '':
+ version = '5.0'
+ # Canonical URIs are http, but they 301 redirect to https. jing
+ # can handle https fine, but not the redirect. And jing doesn't
+ # look at catalogs. So just always feed jing an https URI.
+ rnghttp = 'http://docbook.org/xml/' + version + '/rng/docbook.rng'
+ rnghttps = 'https://docbook.org/xml/' + version + '/rng/docbook.rng'
+ if self.get_option_bool('jing'):
+ command = ['jing', '-i', rnghttps, infile.filename]
+ else:
+ # xmllint, on the other hand, does support catalogs. It also
+ # doesn't do the redirect, but it wouldn't matter if it did
+ # because it doesn't do https. So if the schema is available
+ # locally in the catalog, hand xmllint the http URI so it
+ # can use the local copy. Otherwise, we have to get curl
+ # involved to do https.
+ try:
+ catfile = subprocess.check_output(['xmlcatalog',
+ '/etc/xml/catalog',
+ rnghttp],
+ stderr=subprocess.DEVNULL,
+ text=True)
+ for catline in catfile.split('\n'):
+ if catline.startswith('file://'):
+ command = ['xmllint', '--noout', '--xinclude', '--noent',
+ '--relaxng', rnghttp, infile.filename]
+ except:
+ pass
+ if command is None:
+ self.create_tmpdir()
+ rngfile = os.path.join(self.tmpdir, 'docbook-' + version + '.rng')
+ if not os.path.exists(rngfile):
+ urllib.request.urlretrieve(rnghttps, rngfile)
+ command = ['xmllint', '--noout', '--xinclude', '--noent',
+ '--relaxng', rngfile, infile.filename]
+ if command is not None:
+ try:
+ subprocess.check_output(command,
+ cwd=infile.filepath,
+ stderr=subprocess.STDOUT,
+ text=True)
+ except subprocess.CalledProcessError as e:
+ retcode = e.returncode
+ print(e.output)
+ else:
+ retcode = 1
+
+ return retcode
+
+
+class CommentsChecker (Checker):
+ name = 'comments'
+ desc = 'Print the editorial comments in a document'
+ blurb = ('Print the editorial comments in the files FILES, using the\n' +
+ 'comment element in Mallard and the remark element in DocBook.')
+ formats = ['docbook4', 'docbook5', 'mallard']
+ arguments = [
+ ('help', '-h', None, 'Show this help and exit'),
+ ('site', '-s', None, 'Treat pages as belonging to a Mallard site')
+ ]
+
+ def main(self, args):
+ if self.parse_args(args) != 0:
+ return 1
+ if 'help' in self.options:
+ self.print_help()
+ return 0
+
+ for infile in self.iter_files():
+ xml = self.get_xml(infile)
+ format = get_format(xml.getroot())
+ if format == 'mallard':
+ for el in xml.xpath('//mal:comment', namespaces=NAMESPACES):
+ thisid = xml.getroot().get('id')
+ par = el
+ while par is not None:
+ if par.tag == '{' + NAMESPACES['mal'] + '}section':
+ sectid = par.get('id')
+ if sectid is not None:
+ thisid = thisid + '#' + sectid
+ break
+ par = par.getparent()
+ print('Page: ' + infile.sitedir + thisid)
+ for ch in el.xpath('mal:cite[1]', namespaces=NAMESPACES):
+ name = _stringify(ch).strip()
+ href = ch.get('href')
+ if href is not None and href.startswith('mailto:'):
+ name = name + ' <' + href[7:] + '>'
+ print('From: ' + name)
+ date = ch.get('date')
+ if date is not None:
+ print('Date: ' + date)
+ print('')
+ for ch in el:
+ if isinstance(ch, lxml.etree._ProcessingInstruction):
+ continue
+ elif ch.tag == '{' + NAMESPACES['mal'] + '}cite':
+ continue
+ elif ch.tag in ('{' + NAMESPACES['mal'] + '}p',
+ '{' + NAMESPACES['mal'] + '}title'):
+ for s in _stringify(ch).strip().split('\n'):
+ print(' ' + s.strip())
+ print('')
+ else:
+ name = lxml.etree.QName(ch).localname
+ print(' <' + name + '>...</' + name + '>\n')
+ elif format in ('docbook4', 'docbook5'):
+ if format == 'docbook4':
+ dbxpath = '//remark'
+ else:
+ dbxpath = '//db:remark'
+ for el in xml.xpath(dbxpath, namespaces=NAMESPACES):
+ thisid = infile.filename
+ par = el
+ while par is not None:
+ sectid = par.get('id')
+ if sectid is None:
+ sectid = par.get(XML_ID)
+ if sectid is not None:
+ thisid = thisid + '#' + sectid
+ break
+ par = par.getparent()
+ print('Page: ' + thisid)
+ flag = el.get('revisionflag')
+ if flag is not None:
+ print('Flag: ' + flag)
+ print('')
+ for s in _stringify(el).strip().split('\n'):
+ print(' ' + s.strip())
+ print('')
+
+ return 0
+
+
+class LicenseChecker (Checker):
+ name = 'license'
+ desc = 'Report the license of Mallard pages'
+ blurb = ('Report the license of the Mallard page files FILES. Each\n' +
+ 'matching page is reporting along with its license, reported\n' +
+ 'based on the href attribute of the license element. Common\n' +
+ 'licenses use a shortened identifier. Pages with multiple\n' +
+ 'licenses have the identifiers separated by spaces. Pages\n' +
+ 'with no license element report \'none\'. Licenses with no\n' +
+ 'href attribute are reported as \'unknown\'')
+ formats = ['mallard']
+ arguments = [
+ ('help', '-h', None, 'Show this help and exit'),
+ ('site', '-s', None, 'Treat pages as belonging to a Mallard site'),
+ ('only', None, 'LICENSES', 'Only show pages whose license is in LICENSES'),
+ ('except', None, 'LICENSES', 'Exclude pages whose license is in LICENSES'),
+ ('totals', None, None, 'Show total counts for each license')
+ ]
+ postblurb = 'LICENSES may be a comma- and/or space-separated list, or specified\nmultiple times.'
+
+ def get_license(self, href):
+ if href is None:
+ return 'unknown'
+ elif (href.startswith('http://creativecommons.org/licenses/') or
+ href.startswith('https://creativecommons.org/licenses/')):
+ return 'cc-' + '-'.join([x for x in href.split('/') if x][3:])
+ elif (href.startswith('http://www.gnu.org/licenses/') or
+ href.startswith('https://www.gnu.org/licenses/')):
+ return href.split('/')[-1].replace('.html', '')
+ else:
+ return 'unknown'
+
+ def main(self, args):
+ if self.parse_args(args) != 0:
+ return 1
+ if 'help' in self.options:
+ self.print_help()
+ return 0
+
+ totals = {}
+
+ for infile in self.iter_files():
+ xml = self.get_xml(infile)
+ thisid = xml.getroot().get('id') or infile.filename
+ licenses = []
+ for el in xml.xpath('/mal:page/mal:info/mal:license',
+ namespaces=NAMESPACES):
+ licenses.append(self.get_license(el.get('href')))
+ if len(licenses) == 0:
+ licenses.append('none')
+
+ only = self.get_option_list('only')
+ if only is not None:
+ skip = True
+ for lic in licenses:
+ if lic in only:
+ skip = False
+ if skip:
+ continue
+ cept = self.get_option_list('except')
+ if cept is not None:
+ skip = False
+ for lic in licenses:
+ if lic in cept:
+ skip = True
+ if skip:
+ continue
+
+ if self.get_option_bool('totals'):
+ for lic in licenses:
+ totals.setdefault(lic, 0)
+ totals[lic] += 1
+ else:
+ print(infile.sitedir + thisid + ': ' + ' '.join(licenses))
+
+ if self.get_option_bool('totals'):
+ for lic in sorted(totals):
+ print(lic + ': ' + str(totals[lic]))
+
+ return 0
+
+
+class StatusChecker (Checker):
+ name = 'status'
+ desc = 'Report the status of Mallard pages'
+ blurb = ('Report the status of the Mallard page files FILES. Each\n' +
+ 'matching page is reporting along with its status.')
+ formats = ['mallard']
+ arguments = [
+ ('help', '-h', None, 'Show this help and exit'),
+ ('site', '-s', None, 'Treat pages as belonging to a Mallard site'),
+ ('version', None, 'VER', 'Select revisions with the version attribute VER'),
+ ('docversion', None, 'VER', 'Select revisions with the docversion attribute VER'),
+ ('pkgversion', None, 'VER', 'Select revisions with the pkgversion attribute VER'),
+ ('older', None, 'DATE', 'Only show pages older than DATE'),
+ ('newer', None, 'DATE', 'Only show pages newer than DATE'),
+ ('only', None, 'STATUSES', 'Only show pages whose status is in STATUSES'),
+ ('except', None, 'STATUSES', 'Exclude pages whose status is in STATUSES'),
+ ('totals', None, None, 'Show total counts for each status')
+ ]
+ postblurb = 'VER and STATUSES may be comma- and/or space-separated lists, or specified\nmultiple times.'
+
+ def main(self, args):
+ if self.parse_args(args) != 0:
+ return 1
+ if 'help' in self.options:
+ self.print_help()
+ return 0
+
+ totals = {}
+
+ checks = []
+ ver = self.get_option_list('version')
+ if ver is not None:
+ checks.append(ver)
+ ver = self.get_option_list('docversion')
+ if ver is not None:
+ checks.append(['doc:' + v for v in ver])
+ ver = self.get_option_list('pkgversion')
+ if ver is not None:
+ checks.append(['pkg:' + v for v in ver])
+
+ for infile in self.iter_files():
+ xml = self.get_xml(infile)
+ pageid = xml.getroot().get('id')
+ bestrev = None
+ for rev in xml.xpath('/mal:page/mal:info/mal:revision', namespaces=NAMESPACES):
+ revversion = (rev.get('version') or '').split()
+ docversion = rev.get('docversion')
+ if docversion is not None:
+ revversion.append('doc:' + docversion)
+ pkgversion = rev.get('pkgversion')
+ if pkgversion is not None:
+ revversion.append('pkg:' + pkgversion)
+ revok = True
+ for check in checks:
+ checkok = False
+ for v in check:
+ if v in revversion:
+ checkok = True
+ break
+ if not checkok:
+ revok = False
break
- fi
- done
- #if [ "x$output" = "x0" ]; then break; fi
- done
- fi
- fi
- if [ "x$output" = "x1" ]; then
- echo "$sdir$bname: $style"
- fi
-}
-
-yelp_style () {
- if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then
- yelp_usage_style
- exit 1
- fi
- while [ "$#" != "0" ]; do
- case "$1" in
- "-s")
- check_site="1"
- check_site_root=$(pwd)
- shift
- ;;
- "--only")
- shift
- check_only_defined=1
- check_only="$1"
- shift
- ;;
- "--except")
- shift
- check_except_defined=1
- check_except="$1"
- shift
- ;;
- "--totals")
- check_totals="1"
- shift
- ;;
- *)
- break
- ;;
- esac
- done
- if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then
- yelp_usage_style
- exit 1
- fi
- check_db=
- check_page=yelp_style_page
- if [ "x$check_totals" = "x1" ]; then
- yelp_check_iter_args "$@" | \
- while read line; do
- styles=$(echo "$line" | sed -e 's/^[^:]*://')
- if [ "x$styles" = "x" ]; then
- echo ""
- else
- for style in $styles; do
- echo "$style"
- done
- fi
- done | \
- sort | uniq -c | sed -e 's/^ *//' | awk '{print $2 ": " $1}'
- else
- yelp_check_iter_args "$@" | sort
- fi
-}
-
-yelp_status () {
- if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then
- yelp_usage_status
- exit 1
- fi
- while [ "$#" != "0" ]; do
- case "$1" in
- "-s")
- check_site="1"
- check_site_root=$(pwd)
- shift
- ;;
- "--version")
- shift
- check_version="$1"
- shift
- ;;
- "--docversion")
- shift
- check_docversion="$1"
- shift
- ;;
- "--pkgversion")
- shift
- check_pkgversion="$1"
- shift
- ;;
- "--older")
- shift
- check_older="$1"
- shift
- ;;
- "--newer")
- shift
- check_newer="$1"
- shift
- ;;
- "--only")
- shift
- check_only="$1"
- shift
- ;;
- "--except")
- shift
- check_except="$1"
- shift
- ;;
- "--totals")
- check_totals="1"
- shift
- ;;
- *)
- break
- ;;
- esac
- done
- if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then
- yelp_usage_status
- exit 1
- fi
- check_cache_file=`mktemp "${TMPDIR:-/tmp}"/yelp-XXXXXXXX`
- if [ "x$check_site" = "x1" ]; then
- yelp-build cache -s -o "$check_cache_file" "$@"
- else
- yelp-build cache -o "$check_cache_file" "$@"
- fi
- xsltproc \
- --stringparam version "$check_version" \
- --stringparam docversion "$check_docversion" \
- --stringparam pkgversion "$check_pkgversion" \
- --stringparam newer "$check_newer" \
- --stringparam older "$check_older" \
- --stringparam only "$check_only" \
- --stringparam except "$check_except" \
- --stringparam totals "$check_totals" \
- "$xsl_mal_status" "$check_cache_file"
- rm "$check_cache_file"
- return 0
-}
-
-yelp_validate_db () {
- version=$(echo "$docbook_version" | xsltproc - "$1")
- major=$(echo "$version" | cut -c1)
- if [ "x$major" = "x5" ]; then
- check_out_file=`mktemp "${TMPDIR:-/tmp}"/yelp-XXXXXXXX`
- # Canonical URIs are http, but they 301 redirect to https. jing can handle
- # https fine, but not the redirect. And jing doesn't look at catalogs. So
- # just always feed jing an https URI.
- if [ "x$check_jing" = "x1" ]; then
- rng_uri="https://docbook.org/xml/$version/rng/docbook.rng"
- jing -i "$rng_uri" "$1" > "$check_out_file" 2>&1
- else
- # xmllint, on the other hand, does support catalogs. It also doesn't
- # do the redirect, but it wouldn't matter if it did because it doesn't
- # do https. So if the schema is available locally in the catalog, hand
- # xmllint the http URI so it can use the local copy. Otherwise, we have
- # to get curl involved to do https.
- rng_uri="http://docbook.org/xml/$version/rng/docbook.rng"
- incat=$(xmlcatalog /etc/xml/catalog "$rng_uri" | grep -c '^file:')
- if [ "x$incat" != "x0" ]; then
- xmllint --noout --xinclude --noent --relaxng "$rng_uri" "$1" > "$check_out_file" 2>&1
- else
- rng_uri="https://docbook.org/xml/$version/rng/docbook.rng"
- check_rng_file=`mktemp "${TMPDIR:-/tmp}"/yelp-XXXXXXXX`
- curl -sL -o "$check_rng_file" "$rng_uri"
- xmllint --noout --xinclude --noent --relaxng "$check_rng_file" "$1" > "$check_out_file" 2>&1
- rm "$check_rng_file"
- fi
- fi
- yelp_check_retval="$?"
- cat "$check_out_file" | grep -v 'validates$'
- rm "$check_out_file"
- elif xmllint --nocdata "$1" | grep -q '<!DOCTYPE'; then
- xmllint --noout --xinclude --noent --postvalid "$1" || yelp_check_retval="$?"
- else
- dtd_uri='http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd'
- xmllint --noout --xinclude --noent --dtdvalid "$dtd_uri" "$1" || yelp_check_retval="$?"
- fi
-}
-
-yelp_validate_page () {
- # Using temp files because pipes create subshells, making it really
- # hard to return the right exit status in a portable way.
- if [ "x$check_rng_dir" = "x" ]; then
- check_rng_dir=`mktemp -d "${TMPDIR:-/tmp}"/yelp-XXXXXXXX`
- fi
- check_out_file=`mktemp "${TMPDIR:-/tmp}"/yelp-XXXXXXXX`
- check_rng_file=`(
- echo '<xsl:stylesheet'
- echo ' xmlns:cache="http://projectmallard.org/cache/1.0/"'
- echo ' xmlns:xsl="http://www.w3.org/1999/XSL/Transform"'
- echo ' version="1.0">'
- echo '<xsl:output method="text"/>'
- echo '<xsl:template match="/*">'
- echo '<xsl:choose>'
- echo '<xsl:when test="string(@version) != '"''"'">'
- echo '<xsl:value-of select="@version"/>'
- echo '</xsl:when>'
- echo '<xsl:when test="/cache:cache">'
- echo '<xsl:text>cache/1.0 1.0</xsl:text>'
- echo '</xsl:when>'
- echo '<xsl:otherwise>'
- echo '<xsl:text>1.0</xsl:text>'
- echo '</xsl:otherwise>'
- echo '</xsl:choose>'
- echo '</xsl:template>'
- echo '</xsl:stylesheet>'
- ) | xsltproc - "$1"`
- check_rng_file=`urlencode "$check_rng_file" /`.rng
- if [ ! -f "$check_rng_dir/$check_rng_file" ]; then
- # If we've already made an RNG file for this version string, don't
- # do it again. We've urlencoded the file name + slashes, because
- # version strings often contain slashes. But xsltproc treats the
- # -o option as a URL and urldecodes, so doubly urlencode, because
- # we want the urlencoded string to be the on-disk name.
- xsltproc -o "$check_rng_dir/"`urlencode "$check_rng_file"` \
- --param rng.strict "$check_strict" \
- --stringparam rng.strict.allow "$check_strict_allow" \
- "$xsl_mal_rng" "$1"
- fi
- if [ "x$check_jing" = "x1" ]; then
- jing -i "$check_rng_dir/$check_rng_file" "$1" > "$check_out_file" 2>&1
- else
- xmllint --noout --xinclude --noent --relaxng "$check_rng_dir/$check_rng_file" "$1" > "$check_out_file" 2>&1
- fi
- ret="$?"
- cat "$check_out_file" | grep -v 'validates$'
- rm "$check_out_file"
- return $ret;
-}
-
-yelp_validate () {
- if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then
- yelp_usage_validate
- exit 1
- fi
- check_strict="false()"
- check_strict_allow=""
- while [ "$#" != "0" ]; do
- case "$1" in
- "-s")
- check_site="1"
- check_site_root=$(pwd)
- shift
- ;;
- "--strict")
- check_strict="true()"
- shift
- ;;
- "--allow")
- shift
- check_strict_allow="$check_strict_allow $1"
- shift
- ;;
- "--jing")
- check_jing="1"
- shift
- ;;
- *)
- break
- ;;
- esac
- done
- if [ "$#" = "0" -o "x$1" = "x--help" -o "x$1" = "x-h" ]; then
- yelp_usage_validate
- exit 1
- fi
- check_db=yelp_validate_db
- check_page=yelp_validate_page
- yelp_check_iter_args "$@"
- if [ "x$check_rng_dir" != "x" ]; then
- rm "$check_rng_dir"/*.rng
- rmdir "$check_rng_dir"
- fi
- exit $yelp_check_retval
-}
-
-cmd="$1"
-shift
-case "x$cmd" in
- "xcomments")
- yelp_comments "$@"
- ;;
- "xhrefs")
- yelp_hrefs "$@"
- ;;
- "xids")
- yelp_ids "$@"
- ;;
- "xlinks")
- yelp_links "$@"
- ;;
- "xorphans")
- yelp_orphans "$@"
- ;;
- "xlicense")
- yelp_license "$@"
- ;;
- "xmedia")
- yelp_media "$@"
- ;;
- "xstatus")
- yelp_status "$@"
- ;;
- "xstyle")
- yelp_style "$@"
- ;;
- "xvalidate")
- yelp_validate "$@"
- ;;
- *)
- yelp_usage
- ;;
-esac
+ if revok:
+ if bestrev is None:
+ bestrev = rev
+ continue
+ bestdate = bestrev.get('date')
+ thisdate = rev.get('date')
+ if bestdate is None:
+ bestrev = rev
+ elif thisdate is None:
+ pass
+ elif thisdate >= bestdate:
+ bestrev = rev
+ if bestrev is not None:
+ status = bestrev.get('status') or 'none'
+ date = bestrev.get('date') or None
+ else:
+ status = 'none'
+ date = None
+ older = self.get_option_str('older')
+ if older is not None:
+ if date is None or date >= older:
+ continue
+ newer = self.get_option_str('newer')
+ if newer is not None:
+ if date is None or date <= newer:
+ continue
+ only = self.get_option_list('only')
+ if only is not None:
+ if status not in only:
+ continue
+ cept = self.get_option_list('except')
+ if cept is not None:
+ if status in cept:
+ continue
+ if self.get_option_bool('totals'):
+ totals.setdefault(status, 0)
+ totals[status] += 1
+ else:
+ print(infile.sitedir + pageid + ': ' + status)
+
+ if self.get_option_bool('totals'):
+ for st in sorted(totals):
+ print(st + ': ' + str(totals[st]))
+
+ return 0
+
+
+class StyleChecker (Checker):
+ name = 'style'
+ desc = 'Report the style attribute of Mallard pages'
+ blurb = ('Report the page style attribute of the Mallard page files\n' +
+ 'FILES. Each matching page is reporting along with its status.')
+ formats = ['mallard']
+ arguments = [
+ ('help', '-h', None, 'Show this help and exit'),
+ ('site', '-s', None, 'Treat pages as belonging to a Mallard site'),
+ ('only', None, 'STYLES', 'Only show pages whose style is in STATUSES'),
+ ('except', None, 'STYLES', 'Exclude pages whose style is in STATUSES'),
+ ('totals', None, None, 'Show total counts for each style')
+ ]
+ postblurb = 'STYLES may be comma- and/or space-separated lists, or specified\nmultiple times.'
+
+ def main(self, args):
+ if self.parse_args(args) != 0:
+ return 1
+ if 'help' in self.options:
+ self.print_help()
+ return 0
+
+ totals = {}
+
+ for infile in self.iter_files():
+ xml = self.get_xml(infile)
+ thisid = xml.getroot().get('id')
+ style = xml.getroot().get('style')
+ if style is None:
+ style = 'none'
+ styles = style.split()
+ # We'll set style to None if it doesn't meat the criteria
+ only = self.get_option_list('only')
+ if only is not None:
+ if len(only) == 0:
+ # We treat a blank --only as requesting pages with no style
+ if style != 'none':
+ style = None
+ else:
+ allow = False
+ for st in styles:
+ if st in only:
+ allow = True
+ break
+ if not allow:
+ style = None
+ cept = self.get_option_list('except')
+ if cept is not None:
+ for st in styles:
+ if st in cept:
+ style = None
+ break
+ if self.get_option_bool('totals'):
+ if style is not None:
+ for st in styles:
+ totals.setdefault(st, 0)
+ totals[st] += 1
+ else:
+ if style is not None:
+ print(infile.sitedir + thisid + ': ' + style)
+
+ if self.get_option_bool('totals'):
+ for st in sorted(totals):
+ print(st + ': ' + str(totals[st]))
+
+ return 0
+
+
+class CustomChecker(Checker):
+ formats = ['docbook4', 'docbook5', 'mallard']
+ arguments = [
+ ('help', '-h', None, 'Show this help and exit'),
+ ('site', '-s', None, 'Treat pages as belonging to a Mallard site')
+ ]
+
+ def __init__(self, name, yelpcheck):
+ super().__init__(yelpcheck)
+ self.name = name
+
+ def main(self, args):
+ if self.parse_args(args) != 0:
+ return 1
+
+ sect = 'check:' + self.name
+ if self.config is None or (sect not in self.config.sections()):
+ print('Unrecognized command: ' + self.name, file=sys.stderr)
+ return 1
+ self.blurb = self.config.get(sect, 'blurb', fallback=None)
+ if self.blurb is not None:
+ self.blurb = '\n'.join(textwrap.wrap(self.blurb))
+
+ if 'help' in self.options:
+ self.print_help()
+ return 0
+
+ assertexpr = self.config.get(sect, 'assert', fallback=None)
+ if assertexpr is not None:
+ return self.run_assert(assertexpr)
+
+ print('No action found for command: ' + self.name, file=sys.stderr)
+ return 1
+
+ def run_assert(self, assertexpr):
+ sect = 'check:' + self.name
+ selectexpr = self.config.get(sect, 'select', fallback='/')
+ message = self.config.get(sect, 'message', fallback='Assertion failed')
+ self.xinclude = self.config.get(sect, 'xinclude', fallback='true') != 'false'
+
+ namespaces = {}
+ if 'namespaces' in self.config.sections():
+ for ns in self.config.options('namespaces'):
+ namespaces[ns] = self.config.get('namespaces', ns)
+
+ for infile in self.iter_files():
+ xml = self.get_xml(infile)
+ thisid = xml.getroot().get('id') or infile.filename
+ # FIXME check these expressions and give better errors
+ for root in xml.xpath(selectexpr, namespaces=namespaces):
+ if not bool(root.xpath(assertexpr, namespaces=namespaces)):
+ print(infile.sitedir + thisid + ': ' + message)
+ # FIXME are these comments outdated? remove?
+ # check if self.config has section check:self.name
+ # check if section has select, assert, message
+
+
+class YelpCheck:
+ def __init__(self):
+ pass
+
+ def main(self):
+ if len(sys.argv) < 2:
+ self.print_usage()
+ return 1
+
+ checker = None
+ for cls in Checker.__subclasses__():
+ if sys.argv[1] == cls.name:
+ checker = cls(self)
+
+ if checker is None:
+ checker = CustomChecker(sys.argv[1], self)
+
+ return checker.main(sys.argv[2:])
+
+ def print_usage(self):
+ print('Usage: yelp-check <COMMAND> [OPTIONS] [FILES]')
+ namelen = 2
+ checks = []
+ reports = []
+ others = []
+ for cls in sorted(Checker.__subclasses__(), key=(lambda cls: cls.name or '')):
+ if cls is CustomChecker:
+ continue
+ namelen = max(namelen, len(cls.name) + 2)
+ if cls in (HrefsChecker, IdsChecker, LinksChecker,
+ MediaChecker, OrphansChecker, ValidateChecker):
+ checks.append(cls)
+ elif cls in (CommentsChecker, LicenseChecker, StatusChecker,
+ StyleChecker):
+ reports.append(cls)
+ else:
+ others.append(cls)
+ if len(checks) > 0:
+ print('\nCheck commands:')
+ for cls in checks:
+ print(' ' + cls.name.ljust(namelen) + cls.desc)
+ if len(reports) > 0:
+ print('\nReport commands:')
+ for cls in reports:
+ print(' ' + cls.name.ljust(namelen) + cls.desc)
+ if len(others) > 0:
+ print('\nOther commands:')
+ for cls in others:
+ print(' ' + cls.name.ljust(namelen) + cls.desc)
+ config = configparser.ConfigParser()
+ try:
+ config.read('.yelp-tools.cfg')
+ except:
+ return
+ customs = []
+ for sect in config.sections():
+ if sect.startswith('check:'):
+ name = sect[6:]
+ skip = False
+ for cls in Checker.__subclasses__():
+ if name == cls.name:
+ skip = True
+ break
+ if skip:
+ continue
+ if config.get(sect, 'assert', fallback=None) == None:
+ continue
+ desc = config.get(sect, 'desc', fallback='')
+ namelen = max(namelen, len(name) + 2)
+ customs.append((name, desc))
+ if len(customs) > 0:
+ print('\nCustom commands:')
+ for name, desc in customs:
+ print(' ' + name.ljust(namelen) + desc)
+
+
+if __name__ == '__main__':
+ try:
+ sys.exit(YelpCheck().main())
+ except KeyboardInterrupt:
+ sys.exit(1)