Merge branch 'master' into tchaik/yelp-tools-wip/tchaik/meson

author: Shaun McCance <shaunm@redhat.com> 2020-12-26 12:33:16 -0500
committer: Shaun McCance <shaunm@redhat.com> 2020-12-26 12:33:16 -0500
commit: c08dcf356862dbac10dd8725dfa8359ee07cb643 (patch)
tree: 90dcac6f523dbc3715385213be99144275214d5e
parent: 36cac1db2806a6f50baaec01469f710061b42910 (diff)
parent: 3eb7368983d5c538df4fa7cb181b7f262a1b115e (diff)
download: yelp-tools-c08dcf356862dbac10dd8725dfa8359ee07cb643.tar.gz
15 files changed, 1805 insertions, 20 deletions
diff --git a/NEWS b/NEWS
index 587afd3..e848e65 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,27 @@
+3.38.0
+======
+* Stable release. No changes since 3.37.90
+
+3.37.90
+=======
+* yelp-new: Added new templates
+
+3.32.2
+======
+* yelp-build: Fix spurious warning about missing stack files
+
+3.32.1
+======
+* yelp-check: Fix validation for DocBook 5 with https redirects
+
+3.32.0
+======
+* Stable release. No changes since 3.31.90
+
+3.31.90
+=======
+* Initial support for Mallard 1.2 stack files
+
 3.28.0
 ======
 * Stable release. No changes since 3.27.90
diff --git a/configure.ac b/configure.ac
index 8093c7a..67f8e7d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,4 +1,4 @@
-AC_INIT([yelp-tools], [3.28.0],
+AC_INIT([yelp-tools], [3.38.0],
         [http://bugzilla.gnome.org/enter_bug.cgi?product=yelp-tools])
 AM_INIT_AUTOMAKE([1.9 no-dist-gzip dist-bzip2])
 
diff --git a/templates/Makefile.am b/templates/Makefile.am
index d4bbb75..1535f3a 100644
--- a/templates/Makefile.am
+++ b/templates/Makefile.am
@@ -1,4 +1,12 @@
 tmpldir = $(datadir)/yelp-tools/templates
-tmpl_DATA = task.page
+tmpl_DATA = 		\
+	concept.duck	\
+	concept.page	\
+	guide.duck	\
+	guide.page	\
+	reference.duck	\
+	reference.page	\
+	task.duck	\
+	task.page
 
 EXTRA_DIST = $(tmpl_DATA)
diff --git a/templates/concept.duck b/templates/concept.duck
new file mode 100644
index 0000000..c349e88
--- /dev/null
+++ b/templates/concept.duck
@@ -0,0 +1,47 @@
+@ducktype/1.0
+[-] yelp-tmpl-desc Explanation of a concept or background information
+
+= @TITLE@
+  [topic .concept version=1.1]
+
+[--
+  Recommended statuses: stub incomplete draft outdated review candidate final
+  Remove version attributes you don't use.
+--]
+@revision[version=0.1 docversion=0.1 pkgversion=0.1 date=@DATE@ status=stub]
+
+@credit[author copyright]
+  @name @NAME@
+  @email @EMAIL@
+  @years @YEAR@
+
+[--
+  This puts a link to this topic on the index page.
+  Change the xref to link it from another guide.
+--]
+@link[guide >index]
+
+[--
+  Think about whether other pages should be in the seealso list.
+  The target page will automatically get a seealso link back.
+@link[seealso >someotherid]
+--]
+
+[--
+  Think about whether external resources should be in the seealso
+  list. These require a title.
+@link[seealso >>http://someurl]
+  @title Link title
+--]
+
+@desc Write a short page description here.
+
+@keywords comma-separated list, of keywords, for search
+
+
+Provide as many paragraphs, lists, or media as necessary to explain.
+
+[list]
+. Next steps
+* Optionally, links to other things the user might do now.
+* But consider using seealso and other info links instead.
diff --git a/templates/concept.page b/templates/concept.page
new file mode 100644
index 0000000..92fbbb8
--- /dev/null
+++ b/templates/concept.page
@@ -0,0 +1,52 @@
+<?yelp-tmpl-desc Explanation of a concept or background information?>
+<page xmlns="http://projectmallard.org/1.0/"
+      type="topic" style="concept" version="1.1"
+      id="@ID@">
+  <info>
+    <!--
+        Recommended statuses: stub incomplete draft outdated review candidate final
+        Remove version attributes you don't use.
+    -->
+    <revision version="0.1" docversion="0.1" pkgversion="0.1" date="@DATE@" status="stub"/>
+
+    <credit type="author copyright">
+      <name>@NAME@</name>
+      <email>@EMAIL@</email>
+      <years>@YEAR@</years>
+    </credit>
+
+    <!--
+        This puts a link to this topic on the index page.
+        Change the xref to link it from another guide.
+    -->
+    <link type="guide" xref="index"/>
+
+    <!--
+        Think about whether other pages should be in the seealso list.
+        The target page will automatically get a seealso link back.
+    <link type="seealso" xref="someotherid"/>
+    -->
+
+    <!--
+        Think about whether external resources should be in the seealso
+        list. These require a title.
+    <link type="seealso" href="http://someurl">
+      <title>Link title</title>
+    </link>
+    -->
+
+    <desc>Write a short page description here.</desc>
+
+    <keywords>comma-separated list, of keywords, for search</keywords>
+  </info>
+
+  <title>@TITLE@</title>
+
+  <p>Provide as many paragraphs, lists, or media as necessary to explain.</p>
+
+  <list>
+    <title>Next steps</title>
+    <item><p>Optionally, links to other things the user might do now.</p></item>
+    <item><p>But consider using seealso and other info links instead.</p></item>
+  </list>
+</page>
diff --git a/templates/guide.duck b/templates/guide.duck
new file mode 100644
index 0000000..61cd746
--- /dev/null
+++ b/templates/guide.duck
@@ -0,0 +1,49 @@
+@ducktype/1.0
+[-] yelp-tmpl-desc Navigational glue for Mallard documents
+
+= @TITLE@
+  [guide version=1.1]
+
+[--
+  Recommended statuses: stub incomplete draft outdated review candidate final
+  Remove version attributes you don't use.
+--]
+@revision[version=0.1 docversion=0.1 pkgversion=0.1 date=@DATE@ status=stub]
+
+@credit[author copyright]
+  @name @NAME@
+  @email @EMAIL@
+  @years @YEAR@
+
+[--
+  This puts a link to this topic on the index page.
+  Change the xref to link it from another guide.
+--]
+@link[guide >index]
+
+[--
+  Think about whether other pages should be in the seealso list.
+  The target page will automatically get a seealso link back.
+@link[seealso >someotherid]
+--]
+
+[--
+  Think about whether external resources should be in the seealso
+  list. These require a title.
+@link[seealso >>http://someurl]
+  @title Link title
+--]
+
+@desc Write a short page description here.
+
+@keywords comma-separated list, of keywords, for search
+
+
+Optionally, an introductory paragraph.
+
+[--
+  The links element is implicit, but you might want to add one
+  explicitly if you want to do link grouping or styles.
+[links topic .STYLE groups="GROUPS"]
+  . Optional title
+--]
diff --git a/templates/guide.page b/templates/guide.page
new file mode 100644
index 0000000..894b276
--- /dev/null
+++ b/templates/guide.page
@@ -0,0 +1,55 @@
+<?yelp-tmpl-desc Navigational glue for Mallard documents?>
+<page xmlns="http://projectmallard.org/1.0/"
+      type="guide" version="1.1"
+      id="@ID@">
+  <info>
+    <!--
+        Recommended statuses: stub incomplete draft outdated review candidate final
+        Remove version attributes you don't use.
+    -->
+    <revision version="0.1" docversion="0.1" pkgversion="0.1" date="@DATE@" status="stub"/>
+
+    <credit type="author copyright">
+      <name>@NAME@</name>
+      <email>@EMAIL@</email>
+      <years>@YEAR@</years>
+    </credit>
+
+    <!--
+        This puts a link to this topic on the index page.
+        Change the xref to link it from another guide.
+    -->
+    <link type="guide" xref="index"/>
+
+    <!--
+        Think about whether other pages should be in the seealso list.
+        The target page will automatically get a seealso link back.
+    <link type="seealso" xref="someotherid"/>
+    -->
+
+    <!--
+        Think about whether external resources should be in the seealso
+        list. These require a title.
+    <link type="seealso" href="http://someurl">
+      <title>Link title</title>
+    </link>
+    -->
+
+    <desc>Write a short page description here.</desc>
+
+    <keywords>comma-separated list, of keywords, for search</keywords>
+  </info>
+
+  <title>@TITLE@</title>
+
+  <p>Optionally, an introductory paragraph.</p>
+
+  <!--
+      The links element is implicit, but you might want to add one
+      explicitly if you want to do link grouping or styles.
+  <links type="topic" style="STYLE" groups="GROUPS">
+    <title>Optional title</title>
+  </links>
+  -->
+
+</page>
diff --git a/templates/reference.duck b/templates/reference.duck
new file mode 100644
index 0000000..0aae96b
--- /dev/null
+++ b/templates/reference.duck
@@ -0,0 +1,90 @@
+@ducktype/1.0
+[-] yelp-tmpl-desc Lists or tables of information for quick lookup
+
+= @TITLE@
+  [topic .reference version=1.1]
+
+[--
+  Recommended statuses: stub incomplete draft outdated review candidate final
+  Remove version attributes you don't use.
+--]
+@revision[version=0.1 docversion=0.1 pkgversion=0.1 date=@DATE@ status=stub]
+
+@credit[author copyright]
+  @name @NAME@
+  @email @EMAIL@
+  @years @YEAR@
+
+[--
+  This puts a link to this topic on the index page.
+  Change the xref to link it from another guide.
+--]
+@link[guide >index]
+
+[--
+  Think about whether other pages should be in the seealso list.
+  The target page will automatically get a seealso link back.
+@link[seealso >someotherid]
+--]
+
+[--
+  Think about whether external resources should be in the seealso
+  list. These require a title.
+@link[seealso >>http://someurl]
+  @title Link title
+--]
+
+@desc Write a short page description here.
+
+@keywords comma-separated list, of keywords, for search
+
+
+Optionally provide introductory text.
+Use terms lists, tables, and bullet lists for reference material.
+
+
+[terms]
+. Example terms list (title optional)
+
+- Term 1
+* Description
+
+- Term 2
+* Terms can have multiple paragraphs.
+
+  [screen]
+  Or any other block element.
+
+- Term 3.1
+- Term 3.2
+* Terms can also have multiple titles.
+
+
+[table]
+. Example table (title optional)
+
+[thead]
+[tr]
+- Column 1
+- Column 2
+
+[tbody]
+[tr]
+* Row 1, column 1
+* Row 1, column 2
+
+[tr]
+* Row 2, column 1
+* Row 2, column 2
+
+
+[list]
+. Example list (title optional)
+
+* Item 1
+
+* Item 2
+  * Subitem 2.1
+  * Subitem 2.2
+
+* Item 3
diff --git a/templates/reference.page b/templates/reference.page
new file mode 100644
index 0000000..c7364ee
--- /dev/null
+++ b/templates/reference.page
@@ -0,0 +1,99 @@
+<?yelp-tmpl-desc Lists or tables of information for quick lookup?>
+<page xmlns="http://projectmallard.org/1.0/"
+      type="topic" style="reference" version="1.1"
+      id="@ID@">
+  <info>
+    <!--
+        Recommended statuses: stub incomplete draft outdated review candidate final
+        Remove version attributes you don't use.
+    -->
+    <revision version="0.1" docversion="0.1" pkgversion="0.1" date="@DATE@" status="stub"/>
+
+    <credit type="author copyright">
+      <name>@NAME@</name>
+      <email>@EMAIL@</email>
+      <years>@YEAR@</years>
+    </credit>
+
+    <!--
+        This puts a link to this topic on the index page.
+        Change the xref to link it from another guide.
+    -->
+    <link type="guide" xref="index"/>
+
+    <!--
+        Think about whether other pages should be in the seealso list.
+        The target page will automatically get a seealso link back.
+    <link type="seealso" xref="someotherid"/>
+    -->
+
+    <!--
+        Think about whether external resources should be in the seealso
+        list. These require a title.
+    <link type="seealso" href="http://someurl">
+      <title>Link title</title>
+    </link>
+    -->
+
+    <desc>Write a short page description here.</desc>
+
+    <keywords>comma-separated list, of keywords, for search</keywords>
+  </info>
+
+  <title>@TITLE@</title>
+
+  <p>Optionally provide introductory text.
+  Use terms lists, tables, and bullet lists for reference material.</p>
+
+  <terms>
+    <title>Example terms list (title optional)</title>
+    <item>
+      <title>Term 1</title>
+      <para>Description</para>
+    </item>
+    <item>
+      <title>Term 2</title>
+      <p>Terms can have multiple paragraphs.</p>
+      <screen>Or any other block element.</screen>
+    </item>
+    <item>
+      <title>Term 3.1</title>
+      <title>Term 3.2</title>
+      <p>Terms can also have multiple titles.</p>
+    </item>
+  </terms>
+
+  <table>
+    <title>Example table (title optional)</title>
+    <thead>
+      <tr>
+        <th>Column 1</th>
+        <th>Column 2</th>
+      </tr>
+    </thead>
+    <tbody>
+      <tr>
+        <td>Row 1, column 1</td>
+        <td>Row 1, column 2</td>
+      </tr>
+      <tr>
+        <td>Row 2, column 1</td>
+        <td>Row 2, column 2</td>
+      </tr>
+    </tbody>
+  </table>
+
+  <list>
+    <title>Example list (title optional)</title>
+    <item><p>Item 1</p></item>
+    <item>
+      <p>Item 2</p>
+      <list>
+        <item><p>Subitem 2.1</p></item>
+        <item><p>Subitem 2.2</p></item>
+      </list>
+    </item>
+    <item><p>Item 3</p></item>
+  </list>
+
+</page>
diff --git a/templates/task.duck b/templates/task.duck
new file mode 100644
index 0000000..75c8243
--- /dev/null
+++ b/templates/task.duck
@@ -0,0 +1,63 @@
+@ducktype/1.0
+[-] yelp-tmpl-desc Description of how to accomplish a user task
+
+= @TITLE@
+  [topic .task version=1.1]
+
+[--
+  Recommended statuses: stub incomplete draft outdated review candidate final
+  Remove version attributes you don't use.
+--]
+@revision[version=0.1 docversion=0.1 pkgversion=0.1 date=@DATE@ status=stub]
+
+@credit[author copyright]
+  @name @NAME@
+  @email @EMAIL@
+  @years @YEAR@
+
+[--
+  This puts a link to this topic on the index page.
+  Change the xref to link it from another guide.
+--]
+@link[guide >index]
+
+[--
+  Think about whether other pages should be in the seealso list.
+  The target page will automatically get a seealso link back.
+@link[seealso >someotherid]
+--]
+
+[--
+  Think about whether external resources should be in the seealso
+  list. These require a title.
+@link[seealso >>http://someurl]
+  @title Link title
+--]
+
+@desc Write a short page description here.
+
+@keywords comma-separated list, of keywords, for search
+
+
+Short introductory text: Write a couple sentences about what the
+user is doing here, and why they might want to do it.
+
+[list]
+. Prerequisites
+* Optionally, list things the user needs to know or do first.
+* Use links to other pages whenever they make sense.
+
+[steps]
+. Optional title if different from page title or if prereqs present
+* First step...
+* Second step...
+* Third step...
+
+Optionally, write expected results if non-obvious.
+
+[list]
+. Next steps
+* Optionally, links to other things the user might do now.
+* But consider using seealso and other info links instead.
+
+
diff --git a/templates/task.page b/templates/task.page
index 225aefe..ba7be5f 100644
--- a/templates/task.page
+++ b/templates/task.page
@@ -1,9 +1,13 @@
 <?yelp-tmpl-desc Description of how to accomplish a user task?>
 <page xmlns="http://projectmallard.org/1.0/"
-      type="topic" style="task"
+      type="topic" style="task" version="1.1"
       id="@ID@">
   <info>
-    <revision version="0.1" date="@DATE@" status="stub"/>
+    <!--
+        Recommended statuses: stub incomplete draft outdated review candidate final
+        Remove version attributes you don't use.
+    -->
+    <revision version="0.1" docversion="0.1" pkgversion="0.1" date="@DATE@" status="stub"/>
 
     <credit type="author copyright">
       <name>@NAME@</name>
@@ -11,23 +15,55 @@
       <years>@YEAR@</years>
     </credit>
 
-    <desc></desc>
+    <!--
+        This puts a link to this topic on the index page.
+        Change the xref to link it from another guide.
+    -->
+    <link type="guide" xref="index"/>
+
+    <!--
+        Think about whether other pages should be in the seealso list.
+        The target page will automatically get a seealso link back.
+    <link type="seealso" xref="someotherid"/>
+    -->
+
+    <!--
+        Think about whether external resources should be in the seealso
+        list. These require a title.
+    <link type="seealso" href="http://someurl">
+      <title>Link title</title>
+    </link>
+    -->
+
+    <desc>Write a short page description here.</desc>
+
+    <keywords>comma-separated list, of keywords, for search</keywords>
   </info>
 
   <title>@TITLE@</title>
 
-  <comment>
-    <cite date="@DATE@" href="mailto:@EMAIL@">@NAME@</cite>
-    <p>This assumes the reader knows how to.... By the end of this page,
-    the reader will be able to....</p>
-  </comment>
+  <p>Short introductory text: Write a couple sentences about what the
+  user is doing here, and why they might want to do it.</p>
 
-  <p>Short introductory text...</p>
+  <list>
+    <title>Prerequisites</title>
+    <item><p>Optionally, list things the user needs to know or do first.</p></item>
+    <item><p>Use links to other pages whenever they make sense.</p></item>
+  </list>
 
   <steps>
+    <title>Optional title if different from page title or if prereqs present</title>
     <item><p>First step...</p></item>
     <item><p>Second step...</p></item>
     <item><p>Third step...</p></item>
   </steps>
 
+  <p>Optionally, write expected results if non-obvious.</p>
+
+  <list>
+    <title>Next steps</title>
+    <item><p>Optionally, links to other things the user might do now.</p></item>
+    <item><p>But consider using seealso and other info links instead.</p></item>
+  </list>
+
 </page>
diff --git a/tools/yelp-build.in b/tools/yelp-build.in
index 898325b..f3e21d8 100755
--- a/tools/yelp-build.in
+++ b/tools/yelp-build.in
@@ -154,13 +154,18 @@ yelp_paths_normalize () {
 
 yelp_cache_in_page () {
     fbase=$(basename "$1")
+    ext=$(echo "$fbase" | sed -e 's/.*\.//')
     fdir=$( (cd $(dirname "$1") && pwd) )
     sdir=${fdir##${cache_site_root}}/
     url=file://$(echo "$fdir/$fbase" | urlencode)
     if [ "x$cache_site" = "x1" ]; then
         siteattr=' site:dir="'"$sdir"'"'
     fi
-    echo '<page cache:href="'"$url"'"'"$siteattr"'/>'
+    if [ "x$ext" = "xstack" ]; then
+        echo '<stack cache:href="'"$url"'"'"$siteattr"'/>'
+    else
+        echo '<page cache:href="'"$url"'"'"$siteattr"'/>'
+    fi
 }
 
 yelp_cache_in_site () {
@@ -171,7 +176,7 @@ yelp_cache_in_site () {
             fi
         fi
     done
-    for page in "$1"/*.page; do
+    for page in "$1"/*.page "$1"/*.stack; do
         if [ -e "$page" ]; then
             yelp_cache_in_page "$page"
         fi
@@ -189,8 +194,10 @@ yelp_cache_in () {
             if [ "x$cache_site" = "x1" ]; then
                 yelp_cache_in_site "$page"
             else
-                for sub in "$page"/*.page; do
-                    yelp_cache_in_page "$sub"
+                for sub in "$page"/*.page "$page"/*.stack; do
+                    if [ -e "$sub" ]; then
+                        yelp_cache_in_page "$sub"
+                    fi
                 done
             fi
         else
@@ -376,15 +383,17 @@ yelp_html_mal2html () {
         html_cache_url='file://'`echo "$html_cache_file" | urlencode`
         echo '<xsl:param name="mal.cache.file" select="'"'$html_cache_url'"'"/>'
         echo '<xsl:template match="/">'
-        echo '<xsl:for-each select="cache:cache/mal:page">'
+        echo '<xsl:for-each select="cache:cache/mal:page | cache:cache/mal:stack">'
         echo '<xsl:variable name="href" select="@cache:href"/>'
-        echo '<xsl:for-each select="document(@cache:href)/mal:page">'
+        echo '<xsl:for-each select="document(@cache:href)">'
+        echo '<xsl:for-each select="mal:page | mal:stack/mal:page">'
         echo '<xsl:call-template name="html.output"/>'
         echo '<xsl:call-template name="mal.files.copy">'
         echo ' <xsl:with-param name="href" select="substring-after($href, '\''file://'\'')"/>'
         echo '</xsl:call-template>'
         echo '</xsl:for-each>'
         echo '</xsl:for-each>'
+        echo '</xsl:for-each>'
         echo '</xsl:template>'
         echo '</xsl:stylesheet>'
     ) | (cd "$html_out" && xsltproc $html_profile \
diff --git a/tools/yelp-check.in b/tools/yelp-check.in
index 526679d..d46e004 100755
--- a/tools/yelp-check.in
+++ b/tools/yelp-check.in
@@ -1060,11 +1060,29 @@ yelp_validate_db () {
     major=$(echo "$version" | cut -c1)
     if [ "x$major" = "x5" ]; then
         check_out_file=`mktemp "${TMPDIR:-/tmp}"/yelp-XXXXXXXX`
-        rng_uri="http://docbook.org/xml/$version/rng/docbook.rng"
+        # Canonical URIs are http, but they 301 redirect to https. jing can handle
+        # https fine, but not the redirect. And jing doesn't look at catalogs. So
+        # just always feed jing an https URI.
         if [ "x$check_jing" = "x1" ]; then
+            rng_uri="https://docbook.org/xml/$version/rng/docbook.rng"
             jing -i "$rng_uri" "$1" > "$check_out_file" 2>&1
         else
-            xmllint --noout --xinclude --noent --relaxng "$rng_uri" "$1" > "$check_out_file" 2>&1
+            # xmllint, on the other hand, does support catalogs. It also doesn't
+            # do the redirect, but it wouldn't matter if it did because it doesn't
+            # do https. So if the schema is available locally in the catalog, hand
+            # xmllint the http URI so it can use the local copy. Otherwise, we have
+            # to get curl involved to do https.
+            rng_uri="http://docbook.org/xml/$version/rng/docbook.rng"
+            incat=$(xmlcatalog /etc/xml/catalog "$rng_uri" | grep -c '^file:')
+            if [ "x$incat" != "x0" ]; then
+                xmllint --noout --xinclude --noent --relaxng "$rng_uri" "$1" > "$check_out_file" 2>&1
+            else
+                rng_uri="https://docbook.org/xml/$version/rng/docbook.rng"
+                check_rng_file=`mktemp "${TMPDIR:-/tmp}"/yelp-XXXXXXXX`
+                curl -sL -o "$check_rng_file" "$rng_uri"
+                xmllint --noout --xinclude --noent --relaxng "$check_rng_file" "$1" > "$check_out_file" 2>&1
+                rm "$check_rng_file"
+            fi
         fi
         yelp_check_retval="$?"
         cat "$check_out_file" | grep -v 'validates$'
diff --git a/tools/yelp-check.py b/tools/yelp-check.py
new file mode 100644
index 0000000..652062e
--- /dev/null
+++ b/tools/yelp-check.py
@@ -0,0 +1,1235 @@
+#!/bin/python3
+#
+# yelp-check
+# Copyright (C) 2011-2020 Shaun McCance <shaunm@gnome.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+import configparser
+import lxml.etree
+import os
+import sys
+import urllib.request
+import shutil
+import subprocess
+import tempfile
+import textwrap
+
+# FIXME: don't hardcode this
+DATADIR = '/usr/share/yelp-tools'
+
+XML_ID = '{http://www.w3.org/XML/1998/namespace}id'
+NAMESPACES = {
+    'mal':   'http://projectmallard.org/1.0/',
+    'cache': 'http://projectmallard.org/cache/1.0/',
+    'db':    'http://docbook.org/ns/docbook',
+    'e':     'http://projectmallard.org/experimental/',
+    'ui':    'http://projectmallard.org/ui/1.0/',
+    'uix':   'http://projectmallard.org/experimental/ui/',
+    'xlink': 'http://www.w3.org/1999/xlink'
+    }
+
+def _stringify(el):
+    ret = el.text or ''
+    for ch in el:
+        ret = ret + _stringify(ch)
+    if el.tail is not None:
+        ret = ret + el.tail
+    return ret
+
+def get_format(node):
+    ns = lxml.etree.QName(node).namespace
+    if ns in (NAMESPACES['mal'], NAMESPACES['cache']):
+        return 'mallard'
+    elif ns == NAMESPACES['db']:
+        return 'docbook5'
+    elif ns is None:
+        # For now, just assume no ns means docbook4
+        return 'docbook4'
+    else:
+        return None
+
+class InputFile:
+    def __init__(self, filepath, filename, sitedir=None):
+        self.filepath = filepath
+        self.filename = filename
+        self.absfile = os.path.join(filepath, filename)
+        self.absdir = os.path.dirname(self.absfile)
+        self.sitedir = sitedir or ''
+        self.sitefilename = self.sitedir + self.filename
+
+
+class Checker:
+    name = None
+    desc = None
+    blurb = None
+    formats = []
+    arguments = []
+    postblurb = None
+    xinclude = True
+    config = None
+
+    def __init__(self, yelpcheck):
+        self.yelpcheck = yelpcheck
+        self.options = {}
+        self.fileargs = []
+        self.tmpdir = None
+
+    def __del__(self):
+        if self.tmpdir is not None:
+            shutil.rmtree(self.tmpdir)
+            self.tmpdir = None
+
+    def parse_args(self, args):
+        while len(args) > 0:
+            argdef = None
+            if args[0].startswith('--'):
+                for arg_ in self.arguments:
+                    if args[0] == '--' + arg_[0]:
+                        argdef = arg_
+                        break
+                if argdef is None:
+                    self.print_help()
+                    return 1
+            elif args[0].startswith('-'):
+                for arg_ in self.arguments:
+                    if args[0] == arg_[1]:
+                        argdef = arg_
+                        break
+                if argdef is None:
+                    self.print_help()
+                    return 1
+            if argdef is not None:
+                takesarg = (argdef[2] is not None)
+                if takesarg:
+                    if len(args) < 2:
+                        self.print_help()
+                        return 1
+                    self.options.setdefault(argdef[0], [])
+                    self.options[argdef[0]].append(args[1])
+                    args = args[2:]
+                else:
+                    self.options[argdef[0]] = True
+                    args = args[1:]
+            else:
+                self.fileargs.append(args[0])
+                args = args[1:]
+        cfgfile = None
+        if len(self.fileargs) > 0:
+            cfgfile = os.path.join(os.path.dirname(self.fileargs[0]), '.yelp-tools.cfg')
+            if not os.path.exists(cfgfile):
+                cfgfile = None
+        if cfgfile is None:
+            cfgfile = os.path.join(os.getcwd(), '.yelp-tools.cfg')
+        if os.path.exists(cfgfile):
+            self.config = configparser.ConfigParser()
+            try:
+                self.config.read(cfgfile)
+            except Exception as e:
+                print(e, file=sys.stderr)
+                sys.exit(1)
+        return 0
+
+    def get_option_bool(self, arg):
+        if arg in self.options:
+            return self.options[arg] == True
+        if self.config is not None:
+            val = self.config.get('check:' + self.name, arg, fallback=None)
+            if val is not None:
+                return (val == 'true')
+            val = self.config.get('check', arg, fallback=None)
+            if val is not None:
+                return (val == 'true')
+        return False
+
+    def get_option_str(self, arg):
+        if arg in self.options:
+            if isinstance(self.options[arg], list):
+                return self.options[arg][-1]
+        if self.config is not None:
+            val = self.config.get('check:' + self.name, arg, fallback=None)
+            if val is not None:
+                return val
+            val = self.config.get('check', arg, fallback=None)
+            if val is not None:
+                return val
+        return None
+
+    def get_option_list(self, arg):
+        if arg in self.options:
+            if isinstance(self.options[arg], list):
+                ret = []
+                for opt in self.options[arg]:
+                    ret.extend(opt.replace(',', ' ').split())
+                return ret
+        if self.config is not None:
+            val = self.config.get('check:' + self.name, arg, fallback=None)
+            if val is not None:
+                return val.replace(',', ' ').split()
+            val = self.config.get('check', arg, fallback=None)
+            if val is not None:
+                return val.replace(',', ' ').split()
+        return None
+
+    def iter_files(self, sitedir=None):
+        issite = self.get_option_bool('site')
+        if len(self.fileargs) == 0:
+            self.fileargs.append('.')
+        for filearg in self.fileargs:
+            if os.path.isdir(filearg):
+                if issite:
+                    for infile in self.iter_site(filearg, '/'):
+                        yield infile
+                else:
+                    for fname in os.listdir(filearg):
+                        if fname.endswith('.page'):
+                            yield InputFile(filearg, fname)
+            else:
+                if issite:
+                    # FIXME: should do some normalization here, I guess.
+                    # It's hard to get this perfect without a defined start dir
+                    yield InputFile(os.getcwd(), filearg, '/' + os.path.dirname(filearg))
+                else:
+                    yield InputFile(os.getcwd(), filearg)
+
+    def iter_site(self, filepath, sitedir):
+        for fname in os.listdir(filepath):
+            newpath = os.path.join(filepath, fname)
+            if os.path.isdir(newpath):
+                if fname == '__pintail__':
+                    continue
+                for infile in self.iter_site(newpath, sitedir + fname + '/'):
+                    yield infile
+            elif fname.endswith('.page'):
+                yield InputFile(filepath, fname, sitedir)
+
+    def get_xml(self, xmlfile):
+        # FIXME: we can cache these if we add a feature to run multiple
+        # checkers at once
+        tree = lxml.etree.parse(xmlfile.absfile)
+        if self.xinclude:
+            lxml.etree.XInclude()(tree.getroot())
+        return tree
+
+    def create_tmpdir(self):
+        if self.tmpdir is None:
+            self.tmpdir = tempfile.mkdtemp()
+
+    def print_help(self):
+        print('Usage:   yelp-check ' + self.name + ' [OPTIONS] [FILES]')
+        print('Formats: ' + ' '.join(self.formats) + '\n')
+        #FIXME: prettify names of formats
+        if self.blurb is not None:
+            print(self.blurb + '\n')
+        print('Options:')
+        maxarglen = 2
+        args = []
+        for arg in self.arguments:
+            argkey = '--' + arg[0]
+            if arg[1] is not None:
+                argkey = arg[1] + ', ' + argkey
+            if arg[2] is not None:
+                argkey = argkey + ' ' + arg[2]
+            args.append((argkey, arg[3]))
+        for arg in args:
+            maxarglen = max(maxarglen, len(arg[0]) + 1)
+        for arg in args:
+            print('  ' + (arg[0]).ljust(maxarglen) + '  ' + arg[1])
+        if self.postblurb is not None:
+            print(self.postblurb)
+
+    def main(self, args):
+        pass
+
+
+class HrefsChecker (Checker):
+    name = 'hrefs'
+    desc = 'Find broken external links in a document'
+    blurb = ('Find broken href links in FILES in a Mallard document, or\n' +
+             'broken ulink or XLink links in FILES in a DocBook document.')
+    formats = ['docbook4', 'docbook5', 'mallard']
+    arguments = [
+        ('help', '-h', None, 'Show this help and exit'),
+        ('site', '-s', None, 'Treat pages as belonging to a Mallard site'),
+        ('allow', None, 'URL', 'Allow URL or list of URLs without checking')
+    ]
+    postblurb = 'URL may be a comma- and/or space-separated list, or specified\nmultiple times.'
+
+    def main(self, args):
+        if self.parse_args(args) != 0:
+            return 1
+        if 'help' in self.options:
+            self.print_help()
+            return 0
+
+        # safelisting URLs that we use as identifiers
+        hrefs = {
+             'http://creativecommons.org/licenses/by-sa/3.0/': True,
+            'https://creativecommons.org/licenses/by-sa/3.0/': True,
+             'http://creativecommons.org/licenses/by-sa/3.0/us/': True,
+            'https://creativecommons.org/licenses/by-sa/3.0/us/': True
+        }
+        allow = self.get_option_list('allow')
+        if allow is not None:
+            for url in allow:
+                hrefs[url] = True
+        retcode = 0
+
+        for infile in self.iter_files():
+            xml = self.get_xml(infile)
+            for el in xml.xpath('//*[@href | @xlink:href | self::ulink/@url]',
+                                namespaces=NAMESPACES):
+                href = el.get('href', None)
+                if href is None:
+                    href = el.get('{www.w3.org/1999/xlink}href')
+                if href is None:
+                    href = el.get('url')
+                if href is None:
+                    continue
+                if href.startswith('mailto:'):
+                    continue
+                if href not in hrefs:
+                    try:
+                        req = urllib.request.urlopen(href)
+                        hrefs[href] = (req.status == 200)
+                    except Exception as e:
+                        hrefs[href] = False
+                if not hrefs[href]:
+                    retcode = 1
+                    print(infile.sitefilename + ': ' + href)
+
+        return retcode
+
+
+class IdsChecker (Checker):
+    name = 'ids'
+    desc = 'Find Mallard page IDs that do not match file names'
+    blurb = ('Find pages in a Mallard document whose page ID does not match\n' +
+             'the base file name of the page file.')
+    formats = ['mallard']
+    arguments = [
+        ('help', '-h', None, 'Show this help and exit'),
+        ('site', '-s', None, 'Treat pages as belonging to a Mallard site')
+    ]
+
+    def main(self, args):
+        if self.parse_args(args) != 0:
+            return 1
+        if 'help' in self.options:
+            self.print_help()
+            return 0
+
+        retcode = 0
+
+        for infile in self.iter_files():
+            xml = self.get_xml(infile)
+            isok = False
+            pageid = None
+            if infile.filename.endswith('.page'):
+                try:
+                    pageid = xml.getroot().get('id')
+                    isok = (pageid == os.path.basename(infile.filename)[:-5])
+                except:
+                    isok = False
+            if not isok:
+                retcode = 1
+                print(infile.sitefilename + ': ' + (pageid or ''))
+
+        return retcode
+
+
+class LinksChecker (Checker):
+    name = 'links'
+    desc = 'Find broken xref or linkend links in a document'
+    blurb = ('Find broken xref links in FILES in a Mallard document,\n' +
+             'or broken linkend links in FILES in a DocBook document.')
+    formats = ['docbook4', 'docbook5', 'mallard']
+    arguments = [
+        ('help', '-h', None, 'Show this help and exit'),
+        ('site', '-s', None, 'Treat pages as belonging to a Mallard site'),
+        ('cache', '-c', 'CACHE', 'Use the existing Mallard cache CACHE'),
+        ('ignore', '-i', None, 'Ignore xrefs where href is present')
+    ]
+
+    def __init__(self, yelpcheck):
+        super().__init__(yelpcheck)
+        self.idstoxrefs = {}
+        self.idstolinkends = {}
+
+    def _accumulate_mal(self, node, pageid, sectid, xrefs, sitedir=None):
+        thisid = node.get('id')
+        if thisid is not None:
+            if node.tag == '{' + NAMESPACES['mal'] + '}page':
+                pageid = thisid
+            else:
+                sectid = thisid
+        curid = pageid
+        ignore = self.get_option_bool('ignore')
+        if curid is not None:
+            if sectid is not None:
+                # id attrs in cache files are already fully formed
+                if '#' in sectid:
+                    curid = sectid
+                else:
+                    curid = curid + '#' + sectid
+            if sitedir is not None:
+                # id attrs in cache files already have sitedir prefixed
+                if curid[0] != '/':
+                    curid = sitedir + curid
+            self.idstoxrefs.setdefault(curid, [])
+            if xrefs:
+                xref = node.get('xref')
+                if xref is not None:
+                    if not (ignore and (node.get('href') is not None)):
+                        self.idstoxrefs[curid].append(xref)
+        for child in node:
+            self._accumulate_mal(child, pageid, sectid, xrefs, sitedir)
+
+    def _accumulate_db(self, node, nodeid):
+        thisid = node.get('id')
+        if thisid is None:
+            thisid = node.get(XML_ID)
+        if thisid is not None:
+            nodeid = thisid
+            self.idstolinkends.setdefault(nodeid, [])
+        if nodeid is not None:
+            linkend = node.get('linkend')
+            if linkend is not None:
+                self.idstolinkends[nodeid].append(linkend)
+        for child in node:
+            self._accumulate_db(child, nodeid)
+
+    def main(self, args):
+        if self.parse_args(args) != 0:
+            return 1
+        if 'help' in self.options:
+            self.print_help()
+            return 0
+
+        retcode = 0
+
+        cachefile = self.get_option_str('cache')
+        if cachefile is not None:
+            xml = self.get_xml(InputFile(os.getcwd(), cachefile))
+            self._accumulate_mal(xml.getroot(), None, None, False)
+
+        for infile in self.iter_files():
+            xml = self.get_xml(infile)
+            format = get_format(xml.getroot())
+            if format == 'mallard':
+                self._accumulate_mal(xml.getroot(), None, None, True, infile.sitedir)
+            elif format in ('docbook4', 'docbook5'):
+                # For DocBook, we assume each filearg is its own document, so
+                # we reset the dict each time and only check within the file.
+                # Note that XInclude and SYSTEM includes DO happen first.
+                self.idstolinkends = {}
+                self._accumulate_db(xml.getroot(), None)
+                for curid in self.idstolinkends:
+                    for linkend in self.idstolinkends[curid]:
+                        if linkend not in self.idstolinkends:
+                            print(curid + ': ' + linkend)
+                            retcode = 1
+
+        for curid in self.idstoxrefs:
+            for xref in self.idstoxrefs[curid]:
+                checkref = xref
+                if checkref[0] == '#':
+                    checkref = curid.split('#')[0] + checkref
+                if curid[0] == '/' and checkref[0] != '/':
+                    checkref = curid[:curid.rfind('/')+1] + checkref
+                if checkref not in self.idstoxrefs:
+                    print(curid + ': ' + xref)
+                    retcode = 1
+
+        return retcode
+
+
+class MediaChecker (Checker):
+    name = 'media'
+    desc = 'Find broken references to media files'
+    blurb = ('Find broken references to media files. In Mallard, this\n' +
+             'checks media and thumb elements. In DocBook, this checks\n' +
+             'audiodata, imagedata, and videodata elements.')
+    formats = ['docbook4', 'docbook5', 'mallard']
+    arguments = [
+        ('help', '-h', None, 'Show this help and exit'),
+        ('site', '-s', None, 'Treat pages as belonging to a Mallard site')
+    ]
+
+    def main(self, args):
+        if self.parse_args(args) != 0:
+            return 1
+        if 'help' in self.options:
+            self.print_help()
+            return 0
+
+        retcode = 0
+
+        for infile in self.iter_files():
+            xml = self.get_xml(infile)
+            format = get_format(xml.getroot())
+            srcs = []
+            if format == 'mallard':
+                for el in xml.xpath('//mal:media[@src] | //uix:thumb | //ui:thumb | //e:mouseover',
+                                    namespaces=NAMESPACES):
+                    srcs.append(el.get('src'))
+            elif format == 'docbook5':
+                # FIXME: do we care about entityref?
+                for el in xml.xpath('//db:audiodata | //db:imagedata | //db:videodata',
+                                    namespaces=NAMESPACES):
+                    srcs.append(el.get('fileref'))
+            elif format == 'docbook4':
+                for el in xml.xpath('//audiodata | //imagedata | //videodata'):
+                    srcs.append(el.get('fileref'))
+            for src in srcs:
+                fsrc = os.path.join(infile.absdir, src)
+                if not os.path.exists(fsrc):
+                    print(infile.sitefilename + ': ' + src)
+                    retcode = 1
+
+        return retcode
+
+
+class OrphansChecker (Checker):
+    name = 'orphans'
+    desc = 'Find orphaned pages in a Mallard document'
+    blurb = ('Locate orphaned pages among FILES in a Mallard document.\n' +
+             'Orphaned pages are any pages that cannot be reached by\n' +
+             'topic links alone from the index page.')
+    formats = ['mallard']
+    arguments = [
+        ('help', '-h', None, 'Show this help and exit'),
+        ('site', '-s', None, 'Treat pages as belonging to a Mallard site'),
+        ('cache', '-c', 'CACHE', 'Use the existing Mallard cache CACHE')
+    ]
+
+    def __init__(self, yelpcheck):
+        super().__init__(yelpcheck)
+        self.guidelinks = {}
+        self.sitesubdirs = set()
+
+    def _collect_links(self, node, sitedir):
+        pageid = node.get('id')
+        if pageid[0] != '/':
+            # id attrs in cache files already have sitedir prefixed
+            pageid = sitedir + pageid
+        else:
+            sitedir = pageid[:pageid.rfind('/')+1]
+        self.guidelinks.setdefault(pageid, set())
+        # For the purposes of finding orphans, we'll just pretend that
+        # all links to or from sections are just to or from pages.
+        for el in node.xpath('//mal:info/mal:link[@type="guide"]',
+                             namespaces=NAMESPACES):
+            xref = el.get('xref')
+            if xref is None or xref == '':
+                continue
+            if xref[0] == '#':
+                continue
+            if '#' in xref:
+                xref = xref[:xref.find('#')]
+            if sitedir is not None and sitedir != '':
+                if xref[0] != '/':
+                    xref = sitedir + xref
+            self.guidelinks[pageid].add(xref)
+        for el in node.xpath('//mal:info/mal:link[@type="topic"]',
+                             namespaces=NAMESPACES):
+            xref = el.get('xref')
+            if xref is None or xref == '':
+                continue
+            if xref[0] == '#':
+                continue
+            if '#' in xref:
+                xref = xref[:xref.find('#')]
+            if sitedir is not None and sitedir != '':
+                if xref[0] != '/':
+                    xref = sitedir + xref
+            self.guidelinks.setdefault(xref, set())
+            self.guidelinks[xref].add(pageid)
+        for el in node.xpath('//mal:links[@type="site-subdirs" or @type="site:subdirs"]',
+                             namespaces=NAMESPACES):
+            self.sitesubdirs.add(pageid)
+
+    def main(self, args):
+        if self.parse_args(args) != 0:
+            return 1
+        if 'help' in self.options:
+            self.print_help()
+            return 0
+
+        retcode = 0
+
+        cachefile = self.get_option_str('cache')
+        if cachefile is not None:
+            xml = self.get_xml(InputFile(os.getcwd(), cachefile))
+            for page in xml.getroot():
+                if page.tag == '{' + NAMESPACES['mal'] + '}page':
+                    pageid = page.get('id')
+                    if pageid is None or pageid == '':
+                        continue
+                    self._collect_links(page, page.get('{http://projectmallard.org/site/1.0/}dir', ''))
+
+        pageids = set()
+        for infile in self.iter_files():
+            xml = self.get_xml(infile)
+            pageid = xml.getroot().get('id')
+            if pageid is None:
+                continue
+            pageids.add(infile.sitedir + pageid)
+            self._collect_links(xml.getroot(), infile.sitedir)
+
+        siteupdirs = {}
+        for pageid in self.sitesubdirs:
+            dirname = pageid[:pageid.rfind('/')+1]
+            for subid in self.guidelinks:
+                if subid.startswith(dirname):
+                    if subid.endswith('/index'):
+                        mid = subid[len(dirname):-6]
+                        if mid != '' and '/' not in mid:
+                            siteupdirs[subid] = pageid
+
+        if self.get_option_bool('site'):
+            okpages = set(['/index'])
+        else:
+            okpages = set(['index'])
+        for pageid in sorted(pageids):
+            if pageid in okpages:
+                isok = True
+            else:
+                isok = False
+                guides = [g for g in self.guidelinks[pageid]]
+                if pageid in siteupdirs:
+                    updir = siteupdirs[pageid]
+                    if updir not in guides:
+                        guides.append(updir)
+                cur = 0
+                while cur < len(guides):
+                    if guides[cur] in okpages:
+                        isok = True
+                        break
+                    if guides[cur] in self.guidelinks:
+                        for guide in self.guidelinks[guides[cur]]:
+                            if guide not in guides:
+                                guides.append(guide)
+                    cur += 1
+            if isok:
+                okpages.add(pageid)
+            else:
+                print(pageid)
+                retcode = 1
+
+        return retcode
+
+
+class ValidateChecker (Checker):
+    name = 'validate'
+    desc = 'Validate files against a DTD or RNG'
+    blurb = ('Validate FILES against the appropriate DTD or RNG.\n' +
+             'For Mallard pages, perform automatic RNG merging\n' +
+             'based on the version attribute.')
+    formats = ['docbook4', 'docbook5', 'mallard']
+    arguments = [
+        ('help', '-h', None, 'Show this help and exit'),
+        ('site', '-s', None, 'Treat pages as belonging to a Mallard site'),
+        ('strict', None, None, 'Disallow unknown namespaces'),
+        ('allow', None, 'NS', 'Explicitly allow namespace NS in strict mode'),
+        ('jing', None, None, 'Use jing instead of xmllint for RNG validation')
+    ]
+    postblurb = 'NS may be a comma- and/or space-separated list, or specified\nmultiple times.'
+
+    def main(self, args):
+        if self.parse_args(args) != 0:
+            return 1
+        if 'help' in self.options:
+            self.print_help()
+            return 0
+
+        retcode = 0
+
+        for infile in self.iter_files():
+            xml = self.get_xml(infile)
+            format = get_format(xml.getroot())
+            command = None
+            if format == 'mallard':
+                version = xml.getroot().get('version')
+                if version is None or version == '':
+                    tag = xml.getroot().tag
+                    if tag == '{' + NAMESPACES['mal'] + '}stack':
+                        # 1.2 isn't final yet as of 2020-01-09. Stacks will
+                        # likely be in 1.2, so we can assume at least that.
+                        version = '1.2'
+                    elif tag == '{' + NAMESPACES['cache'] + '}cache':
+                        version = 'cache/1.0'
+                    else:
+                        version = '1.0'
+                self.create_tmpdir()
+                rng = os.path.join(self.tmpdir,
+                                   version.replace('/', '__').replace(' ', '__'))
+                if not os.path.exists(rng):
+                    strict = 'true()' if self.get_option_bool('strict') else 'false()'
+                    allow = self.get_option_list('allow')
+                    if allow is None:
+                        allow = ''
+                    else:
+                        allow = ' '.join(allow)
+                    subprocess.call(['xsltproc', '-o', rng,
+                                    '--param', 'rng.strict', strict,
+                                    '--stringparam', 'rng.strict.allow', allow,
+                                    os.path.join(DATADIR, 'xslt', 'mal-rng.xsl'),
+                                    infile.absfile])
+                if self.get_option_bool('jing'):
+                    command = ['jing', '-i', rng, infile.filename]
+                else:
+                    command = ['xmllint', '--noout', '--xinclude', '--noent',
+                               '--relaxng', rng, infile.filename]
+            elif format == 'docbook4':
+                if xml.docinfo.doctype.startswith('<!DOCTYPE'):
+                    command = ['xmllint', '--noout', '--xinclude', '--noent',
+                               '--postvalid', infile.filename]
+                else:
+                    command = ['xmllint', '--noout', '--xinclude', '--noent',
+                               '--dtdvalid',
+                               'http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd',
+                               infile.filename]
+            elif format == 'docbook5':
+                version = xml.getroot().get('version')
+                if version is None or version == '':
+                    version = '5.0'
+                # Canonical URIs are http, but they 301 redirect to https. jing
+                # can handle https fine, but not the redirect. And jing doesn't
+                # look at catalogs. So just always feed jing an https URI.
+                rnghttp = 'http://docbook.org/xml/' + version + '/rng/docbook.rng'
+                rnghttps = 'https://docbook.org/xml/' + version + '/rng/docbook.rng'
+                if self.get_option_bool('jing'):
+                    command = ['jing', '-i', rnghttps, infile.filename]
+                else:
+                    # xmllint, on the other hand, does support catalogs. It also
+                    # doesn't do the redirect, but it wouldn't matter if it did
+                    # because it doesn't do https. So if the schema is available
+                    # locally in the catalog, hand xmllint the http URI so it
+                    # can use the local copy. Otherwise, we have to get curl
+                    # involved to do https.
+                    try:
+                        catfile = subprocess.check_output(['xmlcatalog',
+                                                           '/etc/xml/catalog',
+                                                           rnghttp],
+                                                          stderr=subprocess.DEVNULL,
+                                                          text=True)
+                        for catline in catfile.split('\n'):
+                            if catline.startswith('file://'):
+                                command = ['xmllint', '--noout', '--xinclude',  '--noent',
+                                           '--relaxng', rnghttp, infile.filename]
+                    except:
+                        pass
+                    if command is None:
+                        self.create_tmpdir()
+                        rngfile = os.path.join(self.tmpdir, 'docbook-' + version + '.rng')
+                        if not os.path.exists(rngfile):
+                            urllib.request.urlretrieve(rnghttps, rngfile)
+                        command = ['xmllint', '--noout', '--xinclude',  '--noent',
+                                   '--relaxng', rngfile, infile.filename]
+            if command is not None:
+                try:
+                    subprocess.check_output(command,
+                                            cwd=infile.filepath,
+                                            stderr=subprocess.STDOUT,
+                                            text=True)
+                except subprocess.CalledProcessError as e:
+                    retcode = e.returncode
+                    print(e.output)
+            else:
+                retcode = 1
+
+        return retcode
+
+
+class CommentsChecker (Checker):
+    name = 'comments'
+    desc = 'Print the editorial comments in a document'
+    blurb = ('Print the editorial comments in the files FILES, using the\n' +
+             'comment element in Mallard and the remark element in DocBook.')
+    formats = ['docbook4', 'docbook5', 'mallard']
+    arguments = [
+        ('help', '-h', None, 'Show this help and exit'),
+        ('site', '-s', None, 'Treat pages as belonging to a Mallard site')
+    ]
+
+    def main(self, args):
+        if self.parse_args(args) != 0:
+            return 1
+        if 'help' in self.options:
+            self.print_help()
+            return 0
+
+        for infile in self.iter_files():
+            xml = self.get_xml(infile)
+            format = get_format(xml.getroot())
+            if format == 'mallard':
+                for el in xml.xpath('//mal:comment', namespaces=NAMESPACES):
+                    thisid = xml.getroot().get('id')
+                    par = el
+                    while par is not None:
+                        if par.tag == '{' + NAMESPACES['mal'] + '}section':
+                            sectid = par.get('id')
+                            if sectid is not None:
+                                thisid = thisid + '#' + sectid
+                                break
+                        par = par.getparent()
+                    print('Page:  ' + infile.sitedir + thisid)
+                    for ch in el.xpath('mal:cite[1]', namespaces=NAMESPACES):
+                        name = _stringify(ch).strip()
+                        href = ch.get('href')
+                        if href is not None and href.startswith('mailto:'):
+                            name = name + ' <' + href[7:] + '>'
+                        print('From:  ' + name)
+                        date = ch.get('date')
+                        if date is not None:
+                            print('Date:  ' + date)
+                    print('')
+                    for ch in el:
+                        if isinstance(ch, lxml.etree._ProcessingInstruction):
+                            continue
+                        elif ch.tag == '{' + NAMESPACES['mal'] + '}cite':
+                            continue
+                        elif ch.tag in ('{' + NAMESPACES['mal'] + '}p',
+                                        '{' + NAMESPACES['mal'] + '}title'):
+                            for s in _stringify(ch).strip().split('\n'):
+                                print('  ' + s.strip())
+                            print('')
+                        else:
+                            name = lxml.etree.QName(ch).localname
+                            print('  <' + name + '>...</' + name + '>\n')
+            elif format in ('docbook4', 'docbook5'):
+                if format == 'docbook4':
+                    dbxpath = '//remark'
+                else:
+                    dbxpath = '//db:remark'
+                for el in xml.xpath(dbxpath, namespaces=NAMESPACES):
+                    thisid = infile.filename
+                    par = el
+                    while par is not None:
+                        sectid = par.get('id')
+                        if sectid is None:
+                            sectid = par.get(XML_ID)
+                        if sectid is not None:
+                            thisid = thisid + '#' + sectid
+                            break
+                        par = par.getparent()
+                    print('Page:  ' + thisid)
+                    flag = el.get('revisionflag')
+                    if flag is not None:
+                        print('Flag:  ' + flag)
+                    print('')
+                    for s in _stringify(el).strip().split('\n'):
+                        print('  ' + s.strip())
+                    print('')
+
+        return 0
+
+
+class LicenseChecker (Checker):
+    name = 'license'
+    desc = 'Report the license of Mallard pages'
+    blurb = ('Report the license of the Mallard page files FILES. Each\n' +
+             'matching page is reporting along with its license, reported\n' +
+             'based on the href attribute of the license element. Common\n' +
+             'licenses use a shortened identifier. Pages with multiple\n' +
+             'licenses have the identifiers separated by spaces. Pages\n' +
+             'with no license element report \'none\'. Licenses with no\n' +
+             'href attribute are reported as \'unknown\'')
+    formats = ['mallard']
+    arguments = [
+        ('help', '-h', None, 'Show this help and exit'),
+        ('site', '-s', None, 'Treat pages as belonging to a Mallard site'),
+        ('only', None, 'LICENSES', 'Only show pages whose license is in LICENSES'),
+        ('except', None, 'LICENSES', 'Exclude pages whose license is in LICENSES'),
+        ('totals', None, None, 'Show total counts for each license')
+    ]
+    postblurb = 'LICENSES may be a comma- and/or space-separated list, or specified\nmultiple times.'
+
+    def get_license(self, href):
+        if href is None:
+            return 'unknown'
+        elif (href.startswith('http://creativecommons.org/licenses/') or
+              href.startswith('https://creativecommons.org/licenses/')):
+            return 'cc-' + '-'.join([x for x in href.split('/') if x][3:])
+        elif (href.startswith('http://www.gnu.org/licenses/') or
+              href.startswith('https://www.gnu.org/licenses/')):
+            return href.split('/')[-1].replace('.html', '')
+        else:
+            return 'unknown'
+
+    def main(self, args):
+        if self.parse_args(args) != 0:
+            return 1
+        if 'help' in self.options:
+            self.print_help()
+            return 0
+
+        totals = {}
+
+        for infile in self.iter_files():
+            xml = self.get_xml(infile)
+            thisid = xml.getroot().get('id') or infile.filename
+            licenses = []
+            for el in xml.xpath('/mal:page/mal:info/mal:license',
+                                namespaces=NAMESPACES):
+                licenses.append(self.get_license(el.get('href')))
+            if len(licenses) == 0:
+                licenses.append('none')
+
+            only = self.get_option_list('only')
+            if only is not None:
+                skip = True
+                for lic in licenses:
+                    if lic in only:
+                        skip = False
+                if skip:
+                    continue
+            cept = self.get_option_list('except')
+            if cept is not None:
+                skip = False
+                for lic in licenses:
+                    if lic in cept:
+                        skip = True
+                if skip:
+                    continue
+
+            if self.get_option_bool('totals'):
+                for lic in licenses:
+                    totals.setdefault(lic, 0)
+                    totals[lic] += 1
+            else:
+                print(infile.sitedir + thisid + ': ' + ' '.join(licenses))
+
+        if self.get_option_bool('totals'):
+            for lic in sorted(totals):
+                print(lic + ': ' + str(totals[lic]))
+
+        return 0
+
+
+class StatusChecker (Checker):
+    name = 'status'
+    desc = 'Report the status of Mallard pages'
+    blurb = ('Report the status of the Mallard page files FILES. Each\n' +
+             'matching page is reporting along with its status.')
+    formats = ['mallard']
+    arguments = [
+        ('help', '-h', None, 'Show this help and exit'),
+        ('site', '-s', None, 'Treat pages as belonging to a Mallard site'),
+        ('version',    None, 'VER', 'Select revisions with the version attribute VER'),
+        ('docversion', None, 'VER', 'Select revisions with the docversion attribute VER'),
+        ('pkgversion', None, 'VER', 'Select revisions with the pkgversion attribute VER'),
+        ('older',  None, 'DATE', 'Only show pages older than DATE'),
+        ('newer',  None, 'DATE', 'Only show pages newer than DATE'),
+        ('only',   None, 'STATUSES', 'Only show pages whose status is in STATUSES'),
+        ('except', None, 'STATUSES', 'Exclude pages whose status is in STATUSES'),
+        ('totals', None, None, 'Show total counts for each status')
+    ]
+    postblurb = 'VER and STATUSES may be comma- and/or space-separated lists, or specified\nmultiple times.'
+
+    def main(self, args):
+        if self.parse_args(args) != 0:
+            return 1
+        if 'help' in self.options:
+            self.print_help()
+            return 0
+
+        totals = {}
+
+        checks = []
+        ver = self.get_option_list('version')
+        if ver is not None:
+            checks.append(ver)
+        ver = self.get_option_list('docversion')
+        if ver is not None:
+            checks.append(['doc:' + v for v in ver])
+        ver = self.get_option_list('pkgversion')
+        if ver is not None:
+            checks.append(['pkg:' + v for v in ver])
+
+        for infile in self.iter_files():
+            xml = self.get_xml(infile)
+            pageid = xml.getroot().get('id')
+            bestrev = None
+            for rev in xml.xpath('/mal:page/mal:info/mal:revision', namespaces=NAMESPACES):
+                revversion = (rev.get('version') or '').split()
+                docversion = rev.get('docversion')
+                if docversion is not None:
+                    revversion.append('doc:' + docversion)
+                pkgversion = rev.get('pkgversion')
+                if pkgversion is not None:
+                    revversion.append('pkg:' + pkgversion)
+                revok = True
+                for check in checks:
+                    checkok = False
+                    for v in check:
+                        if v in revversion:
+                            checkok = True
+                            break
+                    if not checkok:
+                        revok = False
+                        break
+                if revok:
+                    if bestrev is None:
+                        bestrev = rev
+                        continue
+                    bestdate = bestrev.get('date')
+                    thisdate = rev.get('date')
+                    if bestdate is None:
+                        bestrev = rev
+                    elif thisdate is None:
+                        pass
+                    elif thisdate >= bestdate:
+                        bestrev = rev
+            if bestrev is not None:
+                status = bestrev.get('status') or 'none'
+                date = bestrev.get('date') or None
+            else:
+                status = 'none'
+                date = None
+            older = self.get_option_str('older')
+            if older is not None:
+                if date is None or date >= older:
+                    continue
+            newer = self.get_option_str('newer')
+            if newer is not None:
+                if date is None or date <= newer:
+                    continue
+            only = self.get_option_list('only')
+            if only is not None:
+                if status not in only:
+                    continue
+            cept = self.get_option_list('except')
+            if cept is not None:
+                if status in cept:
+                    continue
+            if self.get_option_bool('totals'):
+                totals.setdefault(status, 0)
+                totals[status] += 1
+            else:
+                print(infile.sitedir + pageid + ': ' + status)
+
+        if self.get_option_bool('totals'):
+            for st in sorted(totals):
+                print(st + ': ' + str(totals[st]))
+
+        return 0
+
+
+class StyleChecker (Checker):
+    name = 'style'
+    desc = 'Report the style attribute of Mallard pages'
+    blurb = ('Report the page style attribute of the Mallard page files\n' +
+             'FILES. Each matching page is reporting along with its status.')
+    formats = ['mallard']
+    arguments = [
+        ('help', '-h', None, 'Show this help and exit'),
+        ('site', '-s', None, 'Treat pages as belonging to a Mallard site'),
+        ('only',   None, 'STYLES', 'Only show pages whose style is in STATUSES'),
+        ('except', None, 'STYLES', 'Exclude pages whose style is in STATUSES'),
+        ('totals', None, None, 'Show total counts for each style')
+    ]
+    postblurb = 'STYLES may be comma- and/or space-separated lists, or specified\nmultiple times.'
+
+    def main(self, args):
+        if self.parse_args(args) != 0:
+            return 1
+        if 'help' in self.options:
+            self.print_help()
+            return 0
+
+        totals = {}
+
+        for infile in self.iter_files():
+            xml = self.get_xml(infile)
+            thisid = xml.getroot().get('id')
+            style = xml.getroot().get('style')
+            if style is None:
+                style = 'none'
+            styles = style.split()
+            # We'll set style to None if it doesn't meat the criteria
+            only = self.get_option_list('only')
+            if only is not None:
+                if len(only) == 0:
+                    # We treat a blank --only as requesting pages with no style
+                    if style != 'none':
+                        style = None
+                else:
+                    allow = False
+                    for st in styles:
+                        if st in only:
+                            allow = True
+                            break
+                    if not allow:
+                        style = None
+            cept = self.get_option_list('except')
+            if cept is not None:
+                for st in styles:
+                    if st in cept:
+                        style = None
+                        break
+            if self.get_option_bool('totals'):
+                if style is not None:
+                    for st in styles:
+                        totals.setdefault(st, 0)
+                        totals[st] += 1
+            else:
+                if style is not None:
+                    print(infile.sitedir + thisid + ': ' + style)
+
+        if self.get_option_bool('totals'):
+            for st in sorted(totals):
+                print(st + ': ' + str(totals[st]))
+
+        return 0
+
+
+class CustomChecker(Checker):
+    formats = ['docbook4', 'docbook5', 'mallard']
+    arguments = [
+        ('help', '-h', None, 'Show this help and exit'),
+        ('site', '-s', None, 'Treat pages as belonging to a Mallard site')
+    ]
+
+    def __init__(self, name, yelpcheck):
+        super().__init__(yelpcheck)
+        self.name = name
+
+    def main(self, args):
+        if self.parse_args(args) != 0:
+            return 1
+
+        sect = 'check:' + self.name
+        if sect not in self.config.sections():
+            print('Unrecognized command: ' + self.name, file=sys.stderr)
+            return 1
+        self.blurb = self.config.get(sect, 'blurb', fallback=None)
+        if self.blurb is not None:
+            self.blurb = '\n'.join(textwrap.wrap(self.blurb))
+
+        if 'help' in self.options:
+            self.print_help()
+            return 0
+
+        assertexpr = self.config.get(sect, 'assert', fallback=None)
+        if assertexpr is not None:
+            return self.run_assert(assertexpr)
+
+        print('No action found for command: ' + self.name, file=sys.stderr)
+        return 1
+
+    def run_assert(self, assertexpr):
+        sect = 'check:' + self.name
+        selectexpr = self.config.get(sect, 'select', fallback='/')
+        message = self.config.get(sect, 'message', fallback='Assertion failed')
+        self.xinclude = self.config.get(sect, 'xinclude', fallback='true') != 'false'
+
+        namespaces = {}
+        if 'namespaces' in self.config.sections():
+            for ns in self.config.options('namespaces'):
+                namespaces[ns] = self.config.get('namespaces', ns)
+
+        for infile in self.iter_files():
+            xml = self.get_xml(infile)
+            thisid = xml.getroot().get('id') or infile.filename
+            for root in xml.xpath(selectexpr, namespaces=namespaces):
+                if not bool(root.xpath(assertexpr, namespaces=namespaces)):
+                    print(infile.sitedir + thisid + ': ' + message)
+        # check if self.config has section check:self.name
+        # check if section has select, assert, message
+
+
+class YelpCheck:
+    def __init__(self):
+        pass
+
+    def main(self):
+        if len(sys.argv) < 2:
+            self.print_usage()
+            return 1
+
+        checker = None
+        for cls in Checker.__subclasses__():
+            if sys.argv[1] == cls.name:
+                checker = cls(self)
+
+        if checker is None:
+            checker = CustomChecker(sys.argv[1], self)
+
+        return checker.main(sys.argv[2:])
+
+    def print_usage(self):
+        print('Usage: yelp-check <COMMAND> [OPTIONS] [FILES]')
+        namelen = 2
+        checks = []
+        reports = []
+        others = []
+        for cls in sorted(Checker.__subclasses__(), key=(lambda cls: cls.name or '')):
+            if cls is CustomChecker:
+                continue
+            namelen = max(namelen, len(cls.name) + 2)
+            if cls in (HrefsChecker, IdsChecker, LinksChecker,
+                       MediaChecker, OrphansChecker, ValidateChecker):
+                checks.append(cls)
+            elif cls in (CommentsChecker, LicenseChecker, StatusChecker,
+                         StyleChecker):
+                reports.append(cls)
+            else:
+                others.append(cls)
+        if len(checks) > 0:
+            print('\nCheck commands:')
+            for cls in checks:
+                print('  ' + cls.name.ljust(namelen) + cls.desc)
+        if len(reports) > 0:
+            print('\nReport commands:')
+            for cls in reports:
+                print('  ' + cls.name.ljust(namelen) + cls.desc)
+        if len(others) > 0:
+            print('\nOther commands:')
+            for cls in others:
+                print('  ' + cls.name.ljust(namelen) + cls.desc)
+        config = configparser.ConfigParser()
+        try:
+            config.read('.yelp-tools.cfg')
+        except:
+            return
+        customs = []
+        for sect in config.sections():
+            if sect.startswith('check:'):
+                name = sect[6:]
+                skip = False
+                for cls in Checker.__subclasses__():
+                    if name == cls.name:
+                        skip = True
+                        break
+                if skip:
+                    continue
+                if config.get(sect, 'assert', fallback=None) == None:
+                    continue
+                desc = config.get(sect, 'desc', fallback='')
+                namelen = max(namelen, len(name) + 2)
+                customs.append((name, desc))
+        if len(customs) > 0:
+            print('\nCustom commands:')
+            for name, desc in customs:
+                print('  ' + name.ljust(namelen) + desc)
+
+
+if __name__ == '__main__':
+    try:
+        sys.exit(YelpCheck().main())
+    except KeyboardInterrupt:
+        sys.exit(1)
diff --git a/yelp-tools.doap b/yelp-tools.doap
index 16c0987..e7e1b89 100644
--- a/yelp-tools.doap
+++ b/yelp-tools.doap
@@ -11,7 +11,7 @@
   <homepage rdf:resource="http://yelp.io"/>
   <mailing-list rdf:resource="http://mail.gnome.org/mailman/listinfo/gnome-doc-devel-list" />
   <download-page rdf:resource="http://download.gnome.org/sources/yelp-tools/" />
-  <bug-database rdf:resource="http://bugzilla.gnome.org/browse.cgi?product=yelp-tools" />
+  <bug-database rdf:resource="https://gitlab.gnome.org/GNOME/yelp-tools/issues/" />
 
   <category rdf:resource="http://api.gnome.org/doap-extensions#core" />
   <programming-language>sh</programming-language>
author	Shaun McCance <shaunm@redhat.com>	2020-12-26 12:33:16 -0500
committer	Shaun McCance <shaunm@redhat.com>	2020-12-26 12:33:16 -0500
commit	c08dcf356862dbac10dd8725dfa8359ee07cb643 (patch)
tree	90dcac6f523dbc3715385213be99144275214d5e
parent	36cac1db2806a6f50baaec01469f710061b42910 (diff)
parent	3eb7368983d5c538df4fa7cb181b7f262a1b115e (diff)
download	yelp-tools-c08dcf356862dbac10dd8725dfa8359ee07cb643.tar.gz