diff options
-rw-r--r-- | NEWS | 24 | ||||
-rw-r--r-- | configure.ac | 2 | ||||
-rw-r--r-- | templates/Makefile.am | 10 | ||||
-rw-r--r-- | templates/concept.duck | 47 | ||||
-rw-r--r-- | templates/concept.page | 52 | ||||
-rw-r--r-- | templates/guide.duck | 49 | ||||
-rw-r--r-- | templates/guide.page | 55 | ||||
-rw-r--r-- | templates/reference.duck | 90 | ||||
-rw-r--r-- | templates/reference.page | 99 | ||||
-rw-r--r-- | templates/task.duck | 63 | ||||
-rw-r--r-- | templates/task.page | 54 | ||||
-rwxr-xr-x | tools/yelp-build.in | 21 | ||||
-rwxr-xr-x | tools/yelp-check.in | 22 | ||||
-rw-r--r-- | tools/yelp-check.py | 1235 | ||||
-rw-r--r-- | yelp-tools.doap | 2 |
15 files changed, 1805 insertions, 20 deletions
@@ -1,3 +1,27 @@ +3.38.0 +====== +* Stable release. No changes since 3.37.90 + +3.37.90 +======= +* yelp-new: Added new templates + +3.32.2 +====== +* yelp-build: Fix spurious warning about missing stack files + +3.32.1 +====== +* yelp-check: Fix validation for DocBook 5 with https redirects + +3.32.0 +====== +* Stable release. No changes since 3.31.90 + +3.31.90 +======= +* Initial support for Mallard 1.2 stack files + 3.28.0 ====== * Stable release. No changes since 3.27.90 diff --git a/configure.ac b/configure.ac index 8093c7a..67f8e7d 100644 --- a/configure.ac +++ b/configure.ac @@ -1,4 +1,4 @@ -AC_INIT([yelp-tools], [3.28.0], +AC_INIT([yelp-tools], [3.38.0], [http://bugzilla.gnome.org/enter_bug.cgi?product=yelp-tools]) AM_INIT_AUTOMAKE([1.9 no-dist-gzip dist-bzip2]) diff --git a/templates/Makefile.am b/templates/Makefile.am index d4bbb75..1535f3a 100644 --- a/templates/Makefile.am +++ b/templates/Makefile.am @@ -1,4 +1,12 @@ tmpldir = $(datadir)/yelp-tools/templates -tmpl_DATA = task.page +tmpl_DATA = \ + concept.duck \ + concept.page \ + guide.duck \ + guide.page \ + reference.duck \ + reference.page \ + task.duck \ + task.page EXTRA_DIST = $(tmpl_DATA) diff --git a/templates/concept.duck b/templates/concept.duck new file mode 100644 index 0000000..c349e88 --- /dev/null +++ b/templates/concept.duck @@ -0,0 +1,47 @@ +@ducktype/1.0 +[-] yelp-tmpl-desc Explanation of a concept or background information + += @TITLE@ + [topic .concept version=1.1] + +[-- + Recommended statuses: stub incomplete draft outdated review candidate final + Remove version attributes you don't use. +--] +@revision[version=0.1 docversion=0.1 pkgversion=0.1 date=@DATE@ status=stub] + +@credit[author copyright] + @name @NAME@ + @email @EMAIL@ + @years @YEAR@ + +[-- + This puts a link to this topic on the index page. + Change the xref to link it from another guide. +--] +@link[guide >index] + +[-- + Think about whether other pages should be in the seealso list. + The target page will automatically get a seealso link back. +@link[seealso >someotherid] +--] + +[-- + Think about whether external resources should be in the seealso + list. These require a title. +@link[seealso >>http://someurl] + @title Link title +--] + +@desc Write a short page description here. + +@keywords comma-separated list, of keywords, for search + + +Provide as many paragraphs, lists, or media as necessary to explain. + +[list] +. Next steps +* Optionally, links to other things the user might do now. +* But consider using seealso and other info links instead. diff --git a/templates/concept.page b/templates/concept.page new file mode 100644 index 0000000..92fbbb8 --- /dev/null +++ b/templates/concept.page @@ -0,0 +1,52 @@ +<?yelp-tmpl-desc Explanation of a concept or background information?> +<page xmlns="http://projectmallard.org/1.0/" + type="topic" style="concept" version="1.1" + id="@ID@"> + <info> + <!-- + Recommended statuses: stub incomplete draft outdated review candidate final + Remove version attributes you don't use. + --> + <revision version="0.1" docversion="0.1" pkgversion="0.1" date="@DATE@" status="stub"/> + + <credit type="author copyright"> + <name>@NAME@</name> + <email>@EMAIL@</email> + <years>@YEAR@</years> + </credit> + + <!-- + This puts a link to this topic on the index page. + Change the xref to link it from another guide. + --> + <link type="guide" xref="index"/> + + <!-- + Think about whether other pages should be in the seealso list. + The target page will automatically get a seealso link back. + <link type="seealso" xref="someotherid"/> + --> + + <!-- + Think about whether external resources should be in the seealso + list. These require a title. + <link type="seealso" href="http://someurl"> + <title>Link title</title> + </link> + --> + + <desc>Write a short page description here.</desc> + + <keywords>comma-separated list, of keywords, for search</keywords> + </info> + + <title>@TITLE@</title> + + <p>Provide as many paragraphs, lists, or media as necessary to explain.</p> + + <list> + <title>Next steps</title> + <item><p>Optionally, links to other things the user might do now.</p></item> + <item><p>But consider using seealso and other info links instead.</p></item> + </list> +</page> diff --git a/templates/guide.duck b/templates/guide.duck new file mode 100644 index 0000000..61cd746 --- /dev/null +++ b/templates/guide.duck @@ -0,0 +1,49 @@ +@ducktype/1.0 +[-] yelp-tmpl-desc Navigational glue for Mallard documents + += @TITLE@ + [guide version=1.1] + +[-- + Recommended statuses: stub incomplete draft outdated review candidate final + Remove version attributes you don't use. +--] +@revision[version=0.1 docversion=0.1 pkgversion=0.1 date=@DATE@ status=stub] + +@credit[author copyright] + @name @NAME@ + @email @EMAIL@ + @years @YEAR@ + +[-- + This puts a link to this topic on the index page. + Change the xref to link it from another guide. +--] +@link[guide >index] + +[-- + Think about whether other pages should be in the seealso list. + The target page will automatically get a seealso link back. +@link[seealso >someotherid] +--] + +[-- + Think about whether external resources should be in the seealso + list. These require a title. +@link[seealso >>http://someurl] + @title Link title +--] + +@desc Write a short page description here. + +@keywords comma-separated list, of keywords, for search + + +Optionally, an introductory paragraph. + +[-- + The links element is implicit, but you might want to add one + explicitly if you want to do link grouping or styles. +[links topic .STYLE groups="GROUPS"] + . Optional title +--] diff --git a/templates/guide.page b/templates/guide.page new file mode 100644 index 0000000..894b276 --- /dev/null +++ b/templates/guide.page @@ -0,0 +1,55 @@ +<?yelp-tmpl-desc Navigational glue for Mallard documents?> +<page xmlns="http://projectmallard.org/1.0/" + type="guide" version="1.1" + id="@ID@"> + <info> + <!-- + Recommended statuses: stub incomplete draft outdated review candidate final + Remove version attributes you don't use. + --> + <revision version="0.1" docversion="0.1" pkgversion="0.1" date="@DATE@" status="stub"/> + + <credit type="author copyright"> + <name>@NAME@</name> + <email>@EMAIL@</email> + <years>@YEAR@</years> + </credit> + + <!-- + This puts a link to this topic on the index page. + Change the xref to link it from another guide. + --> + <link type="guide" xref="index"/> + + <!-- + Think about whether other pages should be in the seealso list. + The target page will automatically get a seealso link back. + <link type="seealso" xref="someotherid"/> + --> + + <!-- + Think about whether external resources should be in the seealso + list. These require a title. + <link type="seealso" href="http://someurl"> + <title>Link title</title> + </link> + --> + + <desc>Write a short page description here.</desc> + + <keywords>comma-separated list, of keywords, for search</keywords> + </info> + + <title>@TITLE@</title> + + <p>Optionally, an introductory paragraph.</p> + + <!-- + The links element is implicit, but you might want to add one + explicitly if you want to do link grouping or styles. + <links type="topic" style="STYLE" groups="GROUPS"> + <title>Optional title</title> + </links> + --> + +</page> diff --git a/templates/reference.duck b/templates/reference.duck new file mode 100644 index 0000000..0aae96b --- /dev/null +++ b/templates/reference.duck @@ -0,0 +1,90 @@ +@ducktype/1.0 +[-] yelp-tmpl-desc Lists or tables of information for quick lookup + += @TITLE@ + [topic .reference version=1.1] + +[-- + Recommended statuses: stub incomplete draft outdated review candidate final + Remove version attributes you don't use. +--] +@revision[version=0.1 docversion=0.1 pkgversion=0.1 date=@DATE@ status=stub] + +@credit[author copyright] + @name @NAME@ + @email @EMAIL@ + @years @YEAR@ + +[-- + This puts a link to this topic on the index page. + Change the xref to link it from another guide. +--] +@link[guide >index] + +[-- + Think about whether other pages should be in the seealso list. + The target page will automatically get a seealso link back. +@link[seealso >someotherid] +--] + +[-- + Think about whether external resources should be in the seealso + list. These require a title. +@link[seealso >>http://someurl] + @title Link title +--] + +@desc Write a short page description here. + +@keywords comma-separated list, of keywords, for search + + +Optionally provide introductory text. +Use terms lists, tables, and bullet lists for reference material. + + +[terms] +. Example terms list (title optional) + +- Term 1 +* Description + +- Term 2 +* Terms can have multiple paragraphs. + + [screen] + Or any other block element. + +- Term 3.1 +- Term 3.2 +* Terms can also have multiple titles. + + +[table] +. Example table (title optional) + +[thead] +[tr] +- Column 1 +- Column 2 + +[tbody] +[tr] +* Row 1, column 1 +* Row 1, column 2 + +[tr] +* Row 2, column 1 +* Row 2, column 2 + + +[list] +. Example list (title optional) + +* Item 1 + +* Item 2 + * Subitem 2.1 + * Subitem 2.2 + +* Item 3 diff --git a/templates/reference.page b/templates/reference.page new file mode 100644 index 0000000..c7364ee --- /dev/null +++ b/templates/reference.page @@ -0,0 +1,99 @@ +<?yelp-tmpl-desc Lists or tables of information for quick lookup?> +<page xmlns="http://projectmallard.org/1.0/" + type="topic" style="reference" version="1.1" + id="@ID@"> + <info> + <!-- + Recommended statuses: stub incomplete draft outdated review candidate final + Remove version attributes you don't use. + --> + <revision version="0.1" docversion="0.1" pkgversion="0.1" date="@DATE@" status="stub"/> + + <credit type="author copyright"> + <name>@NAME@</name> + <email>@EMAIL@</email> + <years>@YEAR@</years> + </credit> + + <!-- + This puts a link to this topic on the index page. + Change the xref to link it from another guide. + --> + <link type="guide" xref="index"/> + + <!-- + Think about whether other pages should be in the seealso list. + The target page will automatically get a seealso link back. + <link type="seealso" xref="someotherid"/> + --> + + <!-- + Think about whether external resources should be in the seealso + list. These require a title. + <link type="seealso" href="http://someurl"> + <title>Link title</title> + </link> + --> + + <desc>Write a short page description here.</desc> + + <keywords>comma-separated list, of keywords, for search</keywords> + </info> + + <title>@TITLE@</title> + + <p>Optionally provide introductory text. + Use terms lists, tables, and bullet lists for reference material.</p> + + <terms> + <title>Example terms list (title optional)</title> + <item> + <title>Term 1</title> + <para>Description</para> + </item> + <item> + <title>Term 2</title> + <p>Terms can have multiple paragraphs.</p> + <screen>Or any other block element.</screen> + </item> + <item> + <title>Term 3.1</title> + <title>Term 3.2</title> + <p>Terms can also have multiple titles.</p> + </item> + </terms> + + <table> + <title>Example table (title optional)</title> + <thead> + <tr> + <th>Column 1</th> + <th>Column 2</th> + </tr> + </thead> + <tbody> + <tr> + <td>Row 1, column 1</td> + <td>Row 1, column 2</td> + </tr> + <tr> + <td>Row 2, column 1</td> + <td>Row 2, column 2</td> + </tr> + </tbody> + </table> + + <list> + <title>Example list (title optional)</title> + <item><p>Item 1</p></item> + <item> + <p>Item 2</p> + <list> + <item><p>Subitem 2.1</p></item> + <item><p>Subitem 2.2</p></item> + </list> + </item> + <item><p>Item 3</p></item> + </list> + +</page> diff --git a/templates/task.duck b/templates/task.duck new file mode 100644 index 0000000..75c8243 --- /dev/null +++ b/templates/task.duck @@ -0,0 +1,63 @@ +@ducktype/1.0 +[-] yelp-tmpl-desc Description of how to accomplish a user task + += @TITLE@ + [topic .task version=1.1] + +[-- + Recommended statuses: stub incomplete draft outdated review candidate final + Remove version attributes you don't use. +--] +@revision[version=0.1 docversion=0.1 pkgversion=0.1 date=@DATE@ status=stub] + +@credit[author copyright] + @name @NAME@ + @email @EMAIL@ + @years @YEAR@ + +[-- + This puts a link to this topic on the index page. + Change the xref to link it from another guide. +--] +@link[guide >index] + +[-- + Think about whether other pages should be in the seealso list. + The target page will automatically get a seealso link back. +@link[seealso >someotherid] +--] + +[-- + Think about whether external resources should be in the seealso + list. These require a title. +@link[seealso >>http://someurl] + @title Link title +--] + +@desc Write a short page description here. + +@keywords comma-separated list, of keywords, for search + + +Short introductory text: Write a couple sentences about what the +user is doing here, and why they might want to do it. + +[list] +. Prerequisites +* Optionally, list things the user needs to know or do first. +* Use links to other pages whenever they make sense. + +[steps] +. Optional title if different from page title or if prereqs present +* First step... +* Second step... +* Third step... + +Optionally, write expected results if non-obvious. + +[list] +. Next steps +* Optionally, links to other things the user might do now. +* But consider using seealso and other info links instead. + + diff --git a/templates/task.page b/templates/task.page index 225aefe..ba7be5f 100644 --- a/templates/task.page +++ b/templates/task.page @@ -1,9 +1,13 @@ <?yelp-tmpl-desc Description of how to accomplish a user task?> <page xmlns="http://projectmallard.org/1.0/" - type="topic" style="task" + type="topic" style="task" version="1.1" id="@ID@"> <info> - <revision version="0.1" date="@DATE@" status="stub"/> + <!-- + Recommended statuses: stub incomplete draft outdated review candidate final + Remove version attributes you don't use. + --> + <revision version="0.1" docversion="0.1" pkgversion="0.1" date="@DATE@" status="stub"/> <credit type="author copyright"> <name>@NAME@</name> @@ -11,23 +15,55 @@ <years>@YEAR@</years> </credit> - <desc></desc> + <!-- + This puts a link to this topic on the index page. + Change the xref to link it from another guide. + --> + <link type="guide" xref="index"/> + + <!-- + Think about whether other pages should be in the seealso list. + The target page will automatically get a seealso link back. + <link type="seealso" xref="someotherid"/> + --> + + <!-- + Think about whether external resources should be in the seealso + list. These require a title. + <link type="seealso" href="http://someurl"> + <title>Link title</title> + </link> + --> + + <desc>Write a short page description here.</desc> + + <keywords>comma-separated list, of keywords, for search</keywords> </info> <title>@TITLE@</title> - <comment> - <cite date="@DATE@" href="mailto:@EMAIL@">@NAME@</cite> - <p>This assumes the reader knows how to.... By the end of this page, - the reader will be able to....</p> - </comment> + <p>Short introductory text: Write a couple sentences about what the + user is doing here, and why they might want to do it.</p> - <p>Short introductory text...</p> + <list> + <title>Prerequisites</title> + <item><p>Optionally, list things the user needs to know or do first.</p></item> + <item><p>Use links to other pages whenever they make sense.</p></item> + </list> <steps> + <title>Optional title if different from page title or if prereqs present</title> <item><p>First step...</p></item> <item><p>Second step...</p></item> <item><p>Third step...</p></item> </steps> + <p>Optionally, write expected results if non-obvious.</p> + + <list> + <title>Next steps</title> + <item><p>Optionally, links to other things the user might do now.</p></item> + <item><p>But consider using seealso and other info links instead.</p></item> + </list> + </page> diff --git a/tools/yelp-build.in b/tools/yelp-build.in index 898325b..f3e21d8 100755 --- a/tools/yelp-build.in +++ b/tools/yelp-build.in @@ -154,13 +154,18 @@ yelp_paths_normalize () { yelp_cache_in_page () { fbase=$(basename "$1") + ext=$(echo "$fbase" | sed -e 's/.*\.//') fdir=$( (cd $(dirname "$1") && pwd) ) sdir=${fdir##${cache_site_root}}/ url=file://$(echo "$fdir/$fbase" | urlencode) if [ "x$cache_site" = "x1" ]; then siteattr=' site:dir="'"$sdir"'"' fi - echo '<page cache:href="'"$url"'"'"$siteattr"'/>' + if [ "x$ext" = "xstack" ]; then + echo '<stack cache:href="'"$url"'"'"$siteattr"'/>' + else + echo '<page cache:href="'"$url"'"'"$siteattr"'/>' + fi } yelp_cache_in_site () { @@ -171,7 +176,7 @@ yelp_cache_in_site () { fi fi done - for page in "$1"/*.page; do + for page in "$1"/*.page "$1"/*.stack; do if [ -e "$page" ]; then yelp_cache_in_page "$page" fi @@ -189,8 +194,10 @@ yelp_cache_in () { if [ "x$cache_site" = "x1" ]; then yelp_cache_in_site "$page" else - for sub in "$page"/*.page; do - yelp_cache_in_page "$sub" + for sub in "$page"/*.page "$page"/*.stack; do + if [ -e "$sub" ]; then + yelp_cache_in_page "$sub" + fi done fi else @@ -376,15 +383,17 @@ yelp_html_mal2html () { html_cache_url='file://'`echo "$html_cache_file" | urlencode` echo '<xsl:param name="mal.cache.file" select="'"'$html_cache_url'"'"/>' echo '<xsl:template match="/">' - echo '<xsl:for-each select="cache:cache/mal:page">' + echo '<xsl:for-each select="cache:cache/mal:page | cache:cache/mal:stack">' echo '<xsl:variable name="href" select="@cache:href"/>' - echo '<xsl:for-each select="document(@cache:href)/mal:page">' + echo '<xsl:for-each select="document(@cache:href)">' + echo '<xsl:for-each select="mal:page | mal:stack/mal:page">' echo '<xsl:call-template name="html.output"/>' echo '<xsl:call-template name="mal.files.copy">' echo ' <xsl:with-param name="href" select="substring-after($href, '\''file://'\'')"/>' echo '</xsl:call-template>' echo '</xsl:for-each>' echo '</xsl:for-each>' + echo '</xsl:for-each>' echo '</xsl:template>' echo '</xsl:stylesheet>' ) | (cd "$html_out" && xsltproc $html_profile \ diff --git a/tools/yelp-check.in b/tools/yelp-check.in index 526679d..d46e004 100755 --- a/tools/yelp-check.in +++ b/tools/yelp-check.in @@ -1060,11 +1060,29 @@ yelp_validate_db () { major=$(echo "$version" | cut -c1) if [ "x$major" = "x5" ]; then check_out_file=`mktemp "${TMPDIR:-/tmp}"/yelp-XXXXXXXX` - rng_uri="http://docbook.org/xml/$version/rng/docbook.rng" + # Canonical URIs are http, but they 301 redirect to https. jing can handle + # https fine, but not the redirect. And jing doesn't look at catalogs. So + # just always feed jing an https URI. if [ "x$check_jing" = "x1" ]; then + rng_uri="https://docbook.org/xml/$version/rng/docbook.rng" jing -i "$rng_uri" "$1" > "$check_out_file" 2>&1 else - xmllint --noout --xinclude --noent --relaxng "$rng_uri" "$1" > "$check_out_file" 2>&1 + # xmllint, on the other hand, does support catalogs. It also doesn't + # do the redirect, but it wouldn't matter if it did because it doesn't + # do https. So if the schema is available locally in the catalog, hand + # xmllint the http URI so it can use the local copy. Otherwise, we have + # to get curl involved to do https. + rng_uri="http://docbook.org/xml/$version/rng/docbook.rng" + incat=$(xmlcatalog /etc/xml/catalog "$rng_uri" | grep -c '^file:') + if [ "x$incat" != "x0" ]; then + xmllint --noout --xinclude --noent --relaxng "$rng_uri" "$1" > "$check_out_file" 2>&1 + else + rng_uri="https://docbook.org/xml/$version/rng/docbook.rng" + check_rng_file=`mktemp "${TMPDIR:-/tmp}"/yelp-XXXXXXXX` + curl -sL -o "$check_rng_file" "$rng_uri" + xmllint --noout --xinclude --noent --relaxng "$check_rng_file" "$1" > "$check_out_file" 2>&1 + rm "$check_rng_file" + fi fi yelp_check_retval="$?" cat "$check_out_file" | grep -v 'validates$' diff --git a/tools/yelp-check.py b/tools/yelp-check.py new file mode 100644 index 0000000..652062e --- /dev/null +++ b/tools/yelp-check.py @@ -0,0 +1,1235 @@ +#!/bin/python3 +# +# yelp-check +# Copyright (C) 2011-2020 Shaun McCance <shaunm@gnome.org> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +import configparser +import lxml.etree +import os +import sys +import urllib.request +import shutil +import subprocess +import tempfile +import textwrap + +# FIXME: don't hardcode this +DATADIR = '/usr/share/yelp-tools' + +XML_ID = '{http://www.w3.org/XML/1998/namespace}id' +NAMESPACES = { + 'mal': 'http://projectmallard.org/1.0/', + 'cache': 'http://projectmallard.org/cache/1.0/', + 'db': 'http://docbook.org/ns/docbook', + 'e': 'http://projectmallard.org/experimental/', + 'ui': 'http://projectmallard.org/ui/1.0/', + 'uix': 'http://projectmallard.org/experimental/ui/', + 'xlink': 'http://www.w3.org/1999/xlink' + } + +def _stringify(el): + ret = el.text or '' + for ch in el: + ret = ret + _stringify(ch) + if el.tail is not None: + ret = ret + el.tail + return ret + +def get_format(node): + ns = lxml.etree.QName(node).namespace + if ns in (NAMESPACES['mal'], NAMESPACES['cache']): + return 'mallard' + elif ns == NAMESPACES['db']: + return 'docbook5' + elif ns is None: + # For now, just assume no ns means docbook4 + return 'docbook4' + else: + return None + +class InputFile: + def __init__(self, filepath, filename, sitedir=None): + self.filepath = filepath + self.filename = filename + self.absfile = os.path.join(filepath, filename) + self.absdir = os.path.dirname(self.absfile) + self.sitedir = sitedir or '' + self.sitefilename = self.sitedir + self.filename + + +class Checker: + name = None + desc = None + blurb = None + formats = [] + arguments = [] + postblurb = None + xinclude = True + config = None + + def __init__(self, yelpcheck): + self.yelpcheck = yelpcheck + self.options = {} + self.fileargs = [] + self.tmpdir = None + + def __del__(self): + if self.tmpdir is not None: + shutil.rmtree(self.tmpdir) + self.tmpdir = None + + def parse_args(self, args): + while len(args) > 0: + argdef = None + if args[0].startswith('--'): + for arg_ in self.arguments: + if args[0] == '--' + arg_[0]: + argdef = arg_ + break + if argdef is None: + self.print_help() + return 1 + elif args[0].startswith('-'): + for arg_ in self.arguments: + if args[0] == arg_[1]: + argdef = arg_ + break + if argdef is None: + self.print_help() + return 1 + if argdef is not None: + takesarg = (argdef[2] is not None) + if takesarg: + if len(args) < 2: + self.print_help() + return 1 + self.options.setdefault(argdef[0], []) + self.options[argdef[0]].append(args[1]) + args = args[2:] + else: + self.options[argdef[0]] = True + args = args[1:] + else: + self.fileargs.append(args[0]) + args = args[1:] + cfgfile = None + if len(self.fileargs) > 0: + cfgfile = os.path.join(os.path.dirname(self.fileargs[0]), '.yelp-tools.cfg') + if not os.path.exists(cfgfile): + cfgfile = None + if cfgfile is None: + cfgfile = os.path.join(os.getcwd(), '.yelp-tools.cfg') + if os.path.exists(cfgfile): + self.config = configparser.ConfigParser() + try: + self.config.read(cfgfile) + except Exception as e: + print(e, file=sys.stderr) + sys.exit(1) + return 0 + + def get_option_bool(self, arg): + if arg in self.options: + return self.options[arg] == True + if self.config is not None: + val = self.config.get('check:' + self.name, arg, fallback=None) + if val is not None: + return (val == 'true') + val = self.config.get('check', arg, fallback=None) + if val is not None: + return (val == 'true') + return False + + def get_option_str(self, arg): + if arg in self.options: + if isinstance(self.options[arg], list): + return self.options[arg][-1] + if self.config is not None: + val = self.config.get('check:' + self.name, arg, fallback=None) + if val is not None: + return val + val = self.config.get('check', arg, fallback=None) + if val is not None: + return val + return None + + def get_option_list(self, arg): + if arg in self.options: + if isinstance(self.options[arg], list): + ret = [] + for opt in self.options[arg]: + ret.extend(opt.replace(',', ' ').split()) + return ret + if self.config is not None: + val = self.config.get('check:' + self.name, arg, fallback=None) + if val is not None: + return val.replace(',', ' ').split() + val = self.config.get('check', arg, fallback=None) + if val is not None: + return val.replace(',', ' ').split() + return None + + def iter_files(self, sitedir=None): + issite = self.get_option_bool('site') + if len(self.fileargs) == 0: + self.fileargs.append('.') + for filearg in self.fileargs: + if os.path.isdir(filearg): + if issite: + for infile in self.iter_site(filearg, '/'): + yield infile + else: + for fname in os.listdir(filearg): + if fname.endswith('.page'): + yield InputFile(filearg, fname) + else: + if issite: + # FIXME: should do some normalization here, I guess. + # It's hard to get this perfect without a defined start dir + yield InputFile(os.getcwd(), filearg, '/' + os.path.dirname(filearg)) + else: + yield InputFile(os.getcwd(), filearg) + + def iter_site(self, filepath, sitedir): + for fname in os.listdir(filepath): + newpath = os.path.join(filepath, fname) + if os.path.isdir(newpath): + if fname == '__pintail__': + continue + for infile in self.iter_site(newpath, sitedir + fname + '/'): + yield infile + elif fname.endswith('.page'): + yield InputFile(filepath, fname, sitedir) + + def get_xml(self, xmlfile): + # FIXME: we can cache these if we add a feature to run multiple + # checkers at once + tree = lxml.etree.parse(xmlfile.absfile) + if self.xinclude: + lxml.etree.XInclude()(tree.getroot()) + return tree + + def create_tmpdir(self): + if self.tmpdir is None: + self.tmpdir = tempfile.mkdtemp() + + def print_help(self): + print('Usage: yelp-check ' + self.name + ' [OPTIONS] [FILES]') + print('Formats: ' + ' '.join(self.formats) + '\n') + #FIXME: prettify names of formats + if self.blurb is not None: + print(self.blurb + '\n') + print('Options:') + maxarglen = 2 + args = [] + for arg in self.arguments: + argkey = '--' + arg[0] + if arg[1] is not None: + argkey = arg[1] + ', ' + argkey + if arg[2] is not None: + argkey = argkey + ' ' + arg[2] + args.append((argkey, arg[3])) + for arg in args: + maxarglen = max(maxarglen, len(arg[0]) + 1) + for arg in args: + print(' ' + (arg[0]).ljust(maxarglen) + ' ' + arg[1]) + if self.postblurb is not None: + print(self.postblurb) + + def main(self, args): + pass + + +class HrefsChecker (Checker): + name = 'hrefs' + desc = 'Find broken external links in a document' + blurb = ('Find broken href links in FILES in a Mallard document, or\n' + + 'broken ulink or XLink links in FILES in a DocBook document.') + formats = ['docbook4', 'docbook5', 'mallard'] + arguments = [ + ('help', '-h', None, 'Show this help and exit'), + ('site', '-s', None, 'Treat pages as belonging to a Mallard site'), + ('allow', None, 'URL', 'Allow URL or list of URLs without checking') + ] + postblurb = 'URL may be a comma- and/or space-separated list, or specified\nmultiple times.' + + def main(self, args): + if self.parse_args(args) != 0: + return 1 + if 'help' in self.options: + self.print_help() + return 0 + + # safelisting URLs that we use as identifiers + hrefs = { + 'http://creativecommons.org/licenses/by-sa/3.0/': True, + 'https://creativecommons.org/licenses/by-sa/3.0/': True, + 'http://creativecommons.org/licenses/by-sa/3.0/us/': True, + 'https://creativecommons.org/licenses/by-sa/3.0/us/': True + } + allow = self.get_option_list('allow') + if allow is not None: + for url in allow: + hrefs[url] = True + retcode = 0 + + for infile in self.iter_files(): + xml = self.get_xml(infile) + for el in xml.xpath('//*[@href | @xlink:href | self::ulink/@url]', + namespaces=NAMESPACES): + href = el.get('href', None) + if href is None: + href = el.get('{www.w3.org/1999/xlink}href') + if href is None: + href = el.get('url') + if href is None: + continue + if href.startswith('mailto:'): + continue + if href not in hrefs: + try: + req = urllib.request.urlopen(href) + hrefs[href] = (req.status == 200) + except Exception as e: + hrefs[href] = False + if not hrefs[href]: + retcode = 1 + print(infile.sitefilename + ': ' + href) + + return retcode + + +class IdsChecker (Checker): + name = 'ids' + desc = 'Find Mallard page IDs that do not match file names' + blurb = ('Find pages in a Mallard document whose page ID does not match\n' + + 'the base file name of the page file.') + formats = ['mallard'] + arguments = [ + ('help', '-h', None, 'Show this help and exit'), + ('site', '-s', None, 'Treat pages as belonging to a Mallard site') + ] + + def main(self, args): + if self.parse_args(args) != 0: + return 1 + if 'help' in self.options: + self.print_help() + return 0 + + retcode = 0 + + for infile in self.iter_files(): + xml = self.get_xml(infile) + isok = False + pageid = None + if infile.filename.endswith('.page'): + try: + pageid = xml.getroot().get('id') + isok = (pageid == os.path.basename(infile.filename)[:-5]) + except: + isok = False + if not isok: + retcode = 1 + print(infile.sitefilename + ': ' + (pageid or '')) + + return retcode + + +class LinksChecker (Checker): + name = 'links' + desc = 'Find broken xref or linkend links in a document' + blurb = ('Find broken xref links in FILES in a Mallard document,\n' + + 'or broken linkend links in FILES in a DocBook document.') + formats = ['docbook4', 'docbook5', 'mallard'] + arguments = [ + ('help', '-h', None, 'Show this help and exit'), + ('site', '-s', None, 'Treat pages as belonging to a Mallard site'), + ('cache', '-c', 'CACHE', 'Use the existing Mallard cache CACHE'), + ('ignore', '-i', None, 'Ignore xrefs where href is present') + ] + + def __init__(self, yelpcheck): + super().__init__(yelpcheck) + self.idstoxrefs = {} + self.idstolinkends = {} + + def _accumulate_mal(self, node, pageid, sectid, xrefs, sitedir=None): + thisid = node.get('id') + if thisid is not None: + if node.tag == '{' + NAMESPACES['mal'] + '}page': + pageid = thisid + else: + sectid = thisid + curid = pageid + ignore = self.get_option_bool('ignore') + if curid is not None: + if sectid is not None: + # id attrs in cache files are already fully formed + if '#' in sectid: + curid = sectid + else: + curid = curid + '#' + sectid + if sitedir is not None: + # id attrs in cache files already have sitedir prefixed + if curid[0] != '/': + curid = sitedir + curid + self.idstoxrefs.setdefault(curid, []) + if xrefs: + xref = node.get('xref') + if xref is not None: + if not (ignore and (node.get('href') is not None)): + self.idstoxrefs[curid].append(xref) + for child in node: + self._accumulate_mal(child, pageid, sectid, xrefs, sitedir) + + def _accumulate_db(self, node, nodeid): + thisid = node.get('id') + if thisid is None: + thisid = node.get(XML_ID) + if thisid is not None: + nodeid = thisid + self.idstolinkends.setdefault(nodeid, []) + if nodeid is not None: + linkend = node.get('linkend') + if linkend is not None: + self.idstolinkends[nodeid].append(linkend) + for child in node: + self._accumulate_db(child, nodeid) + + def main(self, args): + if self.parse_args(args) != 0: + return 1 + if 'help' in self.options: + self.print_help() + return 0 + + retcode = 0 + + cachefile = self.get_option_str('cache') + if cachefile is not None: + xml = self.get_xml(InputFile(os.getcwd(), cachefile)) + self._accumulate_mal(xml.getroot(), None, None, False) + + for infile in self.iter_files(): + xml = self.get_xml(infile) + format = get_format(xml.getroot()) + if format == 'mallard': + self._accumulate_mal(xml.getroot(), None, None, True, infile.sitedir) + elif format in ('docbook4', 'docbook5'): + # For DocBook, we assume each filearg is its own document, so + # we reset the dict each time and only check within the file. + # Note that XInclude and SYSTEM includes DO happen first. + self.idstolinkends = {} + self._accumulate_db(xml.getroot(), None) + for curid in self.idstolinkends: + for linkend in self.idstolinkends[curid]: + if linkend not in self.idstolinkends: + print(curid + ': ' + linkend) + retcode = 1 + + for curid in self.idstoxrefs: + for xref in self.idstoxrefs[curid]: + checkref = xref + if checkref[0] == '#': + checkref = curid.split('#')[0] + checkref + if curid[0] == '/' and checkref[0] != '/': + checkref = curid[:curid.rfind('/')+1] + checkref + if checkref not in self.idstoxrefs: + print(curid + ': ' + xref) + retcode = 1 + + return retcode + + +class MediaChecker (Checker): + name = 'media' + desc = 'Find broken references to media files' + blurb = ('Find broken references to media files. In Mallard, this\n' + + 'checks media and thumb elements. In DocBook, this checks\n' + + 'audiodata, imagedata, and videodata elements.') + formats = ['docbook4', 'docbook5', 'mallard'] + arguments = [ + ('help', '-h', None, 'Show this help and exit'), + ('site', '-s', None, 'Treat pages as belonging to a Mallard site') + ] + + def main(self, args): + if self.parse_args(args) != 0: + return 1 + if 'help' in self.options: + self.print_help() + return 0 + + retcode = 0 + + for infile in self.iter_files(): + xml = self.get_xml(infile) + format = get_format(xml.getroot()) + srcs = [] + if format == 'mallard': + for el in xml.xpath('//mal:media[@src] | //uix:thumb | //ui:thumb | //e:mouseover', + namespaces=NAMESPACES): + srcs.append(el.get('src')) + elif format == 'docbook5': + # FIXME: do we care about entityref? + for el in xml.xpath('//db:audiodata | //db:imagedata | //db:videodata', + namespaces=NAMESPACES): + srcs.append(el.get('fileref')) + elif format == 'docbook4': + for el in xml.xpath('//audiodata | //imagedata | //videodata'): + srcs.append(el.get('fileref')) + for src in srcs: + fsrc = os.path.join(infile.absdir, src) + if not os.path.exists(fsrc): + print(infile.sitefilename + ': ' + src) + retcode = 1 + + return retcode + + +class OrphansChecker (Checker): + name = 'orphans' + desc = 'Find orphaned pages in a Mallard document' + blurb = ('Locate orphaned pages among FILES in a Mallard document.\n' + + 'Orphaned pages are any pages that cannot be reached by\n' + + 'topic links alone from the index page.') + formats = ['mallard'] + arguments = [ + ('help', '-h', None, 'Show this help and exit'), + ('site', '-s', None, 'Treat pages as belonging to a Mallard site'), + ('cache', '-c', 'CACHE', 'Use the existing Mallard cache CACHE') + ] + + def __init__(self, yelpcheck): + super().__init__(yelpcheck) + self.guidelinks = {} + self.sitesubdirs = set() + + def _collect_links(self, node, sitedir): + pageid = node.get('id') + if pageid[0] != '/': + # id attrs in cache files already have sitedir prefixed + pageid = sitedir + pageid + else: + sitedir = pageid[:pageid.rfind('/')+1] + self.guidelinks.setdefault(pageid, set()) + # For the purposes of finding orphans, we'll just pretend that + # all links to or from sections are just to or from pages. + for el in node.xpath('//mal:info/mal:link[@type="guide"]', + namespaces=NAMESPACES): + xref = el.get('xref') + if xref is None or xref == '': + continue + if xref[0] == '#': + continue + if '#' in xref: + xref = xref[:xref.find('#')] + if sitedir is not None and sitedir != '': + if xref[0] != '/': + xref = sitedir + xref + self.guidelinks[pageid].add(xref) + for el in node.xpath('//mal:info/mal:link[@type="topic"]', + namespaces=NAMESPACES): + xref = el.get('xref') + if xref is None or xref == '': + continue + if xref[0] == '#': + continue + if '#' in xref: + xref = xref[:xref.find('#')] + if sitedir is not None and sitedir != '': + if xref[0] != '/': + xref = sitedir + xref + self.guidelinks.setdefault(xref, set()) + self.guidelinks[xref].add(pageid) + for el in node.xpath('//mal:links[@type="site-subdirs" or @type="site:subdirs"]', + namespaces=NAMESPACES): + self.sitesubdirs.add(pageid) + + def main(self, args): + if self.parse_args(args) != 0: + return 1 + if 'help' in self.options: + self.print_help() + return 0 + + retcode = 0 + + cachefile = self.get_option_str('cache') + if cachefile is not None: + xml = self.get_xml(InputFile(os.getcwd(), cachefile)) + for page in xml.getroot(): + if page.tag == '{' + NAMESPACES['mal'] + '}page': + pageid = page.get('id') + if pageid is None or pageid == '': + continue + self._collect_links(page, page.get('{http://projectmallard.org/site/1.0/}dir', '')) + + pageids = set() + for infile in self.iter_files(): + xml = self.get_xml(infile) + pageid = xml.getroot().get('id') + if pageid is None: + continue + pageids.add(infile.sitedir + pageid) + self._collect_links(xml.getroot(), infile.sitedir) + + siteupdirs = {} + for pageid in self.sitesubdirs: + dirname = pageid[:pageid.rfind('/')+1] + for subid in self.guidelinks: + if subid.startswith(dirname): + if subid.endswith('/index'): + mid = subid[len(dirname):-6] + if mid != '' and '/' not in mid: + siteupdirs[subid] = pageid + + if self.get_option_bool('site'): + okpages = set(['/index']) + else: + okpages = set(['index']) + for pageid in sorted(pageids): + if pageid in okpages: + isok = True + else: + isok = False + guides = [g for g in self.guidelinks[pageid]] + if pageid in siteupdirs: + updir = siteupdirs[pageid] + if updir not in guides: + guides.append(updir) + cur = 0 + while cur < len(guides): + if guides[cur] in okpages: + isok = True + break + if guides[cur] in self.guidelinks: + for guide in self.guidelinks[guides[cur]]: + if guide not in guides: + guides.append(guide) + cur += 1 + if isok: + okpages.add(pageid) + else: + print(pageid) + retcode = 1 + + return retcode + + +class ValidateChecker (Checker): + name = 'validate' + desc = 'Validate files against a DTD or RNG' + blurb = ('Validate FILES against the appropriate DTD or RNG.\n' + + 'For Mallard pages, perform automatic RNG merging\n' + + 'based on the version attribute.') + formats = ['docbook4', 'docbook5', 'mallard'] + arguments = [ + ('help', '-h', None, 'Show this help and exit'), + ('site', '-s', None, 'Treat pages as belonging to a Mallard site'), + ('strict', None, None, 'Disallow unknown namespaces'), + ('allow', None, 'NS', 'Explicitly allow namespace NS in strict mode'), + ('jing', None, None, 'Use jing instead of xmllint for RNG validation') + ] + postblurb = 'NS may be a comma- and/or space-separated list, or specified\nmultiple times.' + + def main(self, args): + if self.parse_args(args) != 0: + return 1 + if 'help' in self.options: + self.print_help() + return 0 + + retcode = 0 + + for infile in self.iter_files(): + xml = self.get_xml(infile) + format = get_format(xml.getroot()) + command = None + if format == 'mallard': + version = xml.getroot().get('version') + if version is None or version == '': + tag = xml.getroot().tag + if tag == '{' + NAMESPACES['mal'] + '}stack': + # 1.2 isn't final yet as of 2020-01-09. Stacks will + # likely be in 1.2, so we can assume at least that. + version = '1.2' + elif tag == '{' + NAMESPACES['cache'] + '}cache': + version = 'cache/1.0' + else: + version = '1.0' + self.create_tmpdir() + rng = os.path.join(self.tmpdir, + version.replace('/', '__').replace(' ', '__')) + if not os.path.exists(rng): + strict = 'true()' if self.get_option_bool('strict') else 'false()' + allow = self.get_option_list('allow') + if allow is None: + allow = '' + else: + allow = ' '.join(allow) + subprocess.call(['xsltproc', '-o', rng, + '--param', 'rng.strict', strict, + '--stringparam', 'rng.strict.allow', allow, + os.path.join(DATADIR, 'xslt', 'mal-rng.xsl'), + infile.absfile]) + if self.get_option_bool('jing'): + command = ['jing', '-i', rng, infile.filename] + else: + command = ['xmllint', '--noout', '--xinclude', '--noent', + '--relaxng', rng, infile.filename] + elif format == 'docbook4': + if xml.docinfo.doctype.startswith('<!DOCTYPE'): + command = ['xmllint', '--noout', '--xinclude', '--noent', + '--postvalid', infile.filename] + else: + command = ['xmllint', '--noout', '--xinclude', '--noent', + '--dtdvalid', + 'http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd', + infile.filename] + elif format == 'docbook5': + version = xml.getroot().get('version') + if version is None or version == '': + version = '5.0' + # Canonical URIs are http, but they 301 redirect to https. jing + # can handle https fine, but not the redirect. And jing doesn't + # look at catalogs. So just always feed jing an https URI. + rnghttp = 'http://docbook.org/xml/' + version + '/rng/docbook.rng' + rnghttps = 'https://docbook.org/xml/' + version + '/rng/docbook.rng' + if self.get_option_bool('jing'): + command = ['jing', '-i', rnghttps, infile.filename] + else: + # xmllint, on the other hand, does support catalogs. It also + # doesn't do the redirect, but it wouldn't matter if it did + # because it doesn't do https. So if the schema is available + # locally in the catalog, hand xmllint the http URI so it + # can use the local copy. Otherwise, we have to get curl + # involved to do https. + try: + catfile = subprocess.check_output(['xmlcatalog', + '/etc/xml/catalog', + rnghttp], + stderr=subprocess.DEVNULL, + text=True) + for catline in catfile.split('\n'): + if catline.startswith('file://'): + command = ['xmllint', '--noout', '--xinclude', '--noent', + '--relaxng', rnghttp, infile.filename] + except: + pass + if command is None: + self.create_tmpdir() + rngfile = os.path.join(self.tmpdir, 'docbook-' + version + '.rng') + if not os.path.exists(rngfile): + urllib.request.urlretrieve(rnghttps, rngfile) + command = ['xmllint', '--noout', '--xinclude', '--noent', + '--relaxng', rngfile, infile.filename] + if command is not None: + try: + subprocess.check_output(command, + cwd=infile.filepath, + stderr=subprocess.STDOUT, + text=True) + except subprocess.CalledProcessError as e: + retcode = e.returncode + print(e.output) + else: + retcode = 1 + + return retcode + + +class CommentsChecker (Checker): + name = 'comments' + desc = 'Print the editorial comments in a document' + blurb = ('Print the editorial comments in the files FILES, using the\n' + + 'comment element in Mallard and the remark element in DocBook.') + formats = ['docbook4', 'docbook5', 'mallard'] + arguments = [ + ('help', '-h', None, 'Show this help and exit'), + ('site', '-s', None, 'Treat pages as belonging to a Mallard site') + ] + + def main(self, args): + if self.parse_args(args) != 0: + return 1 + if 'help' in self.options: + self.print_help() + return 0 + + for infile in self.iter_files(): + xml = self.get_xml(infile) + format = get_format(xml.getroot()) + if format == 'mallard': + for el in xml.xpath('//mal:comment', namespaces=NAMESPACES): + thisid = xml.getroot().get('id') + par = el + while par is not None: + if par.tag == '{' + NAMESPACES['mal'] + '}section': + sectid = par.get('id') + if sectid is not None: + thisid = thisid + '#' + sectid + break + par = par.getparent() + print('Page: ' + infile.sitedir + thisid) + for ch in el.xpath('mal:cite[1]', namespaces=NAMESPACES): + name = _stringify(ch).strip() + href = ch.get('href') + if href is not None and href.startswith('mailto:'): + name = name + ' <' + href[7:] + '>' + print('From: ' + name) + date = ch.get('date') + if date is not None: + print('Date: ' + date) + print('') + for ch in el: + if isinstance(ch, lxml.etree._ProcessingInstruction): + continue + elif ch.tag == '{' + NAMESPACES['mal'] + '}cite': + continue + elif ch.tag in ('{' + NAMESPACES['mal'] + '}p', + '{' + NAMESPACES['mal'] + '}title'): + for s in _stringify(ch).strip().split('\n'): + print(' ' + s.strip()) + print('') + else: + name = lxml.etree.QName(ch).localname + print(' <' + name + '>...</' + name + '>\n') + elif format in ('docbook4', 'docbook5'): + if format == 'docbook4': + dbxpath = '//remark' + else: + dbxpath = '//db:remark' + for el in xml.xpath(dbxpath, namespaces=NAMESPACES): + thisid = infile.filename + par = el + while par is not None: + sectid = par.get('id') + if sectid is None: + sectid = par.get(XML_ID) + if sectid is not None: + thisid = thisid + '#' + sectid + break + par = par.getparent() + print('Page: ' + thisid) + flag = el.get('revisionflag') + if flag is not None: + print('Flag: ' + flag) + print('') + for s in _stringify(el).strip().split('\n'): + print(' ' + s.strip()) + print('') + + return 0 + + +class LicenseChecker (Checker): + name = 'license' + desc = 'Report the license of Mallard pages' + blurb = ('Report the license of the Mallard page files FILES. Each\n' + + 'matching page is reporting along with its license, reported\n' + + 'based on the href attribute of the license element. Common\n' + + 'licenses use a shortened identifier. Pages with multiple\n' + + 'licenses have the identifiers separated by spaces. Pages\n' + + 'with no license element report \'none\'. Licenses with no\n' + + 'href attribute are reported as \'unknown\'') + formats = ['mallard'] + arguments = [ + ('help', '-h', None, 'Show this help and exit'), + ('site', '-s', None, 'Treat pages as belonging to a Mallard site'), + ('only', None, 'LICENSES', 'Only show pages whose license is in LICENSES'), + ('except', None, 'LICENSES', 'Exclude pages whose license is in LICENSES'), + ('totals', None, None, 'Show total counts for each license') + ] + postblurb = 'LICENSES may be a comma- and/or space-separated list, or specified\nmultiple times.' + + def get_license(self, href): + if href is None: + return 'unknown' + elif (href.startswith('http://creativecommons.org/licenses/') or + href.startswith('https://creativecommons.org/licenses/')): + return 'cc-' + '-'.join([x for x in href.split('/') if x][3:]) + elif (href.startswith('http://www.gnu.org/licenses/') or + href.startswith('https://www.gnu.org/licenses/')): + return href.split('/')[-1].replace('.html', '') + else: + return 'unknown' + + def main(self, args): + if self.parse_args(args) != 0: + return 1 + if 'help' in self.options: + self.print_help() + return 0 + + totals = {} + + for infile in self.iter_files(): + xml = self.get_xml(infile) + thisid = xml.getroot().get('id') or infile.filename + licenses = [] + for el in xml.xpath('/mal:page/mal:info/mal:license', + namespaces=NAMESPACES): + licenses.append(self.get_license(el.get('href'))) + if len(licenses) == 0: + licenses.append('none') + + only = self.get_option_list('only') + if only is not None: + skip = True + for lic in licenses: + if lic in only: + skip = False + if skip: + continue + cept = self.get_option_list('except') + if cept is not None: + skip = False + for lic in licenses: + if lic in cept: + skip = True + if skip: + continue + + if self.get_option_bool('totals'): + for lic in licenses: + totals.setdefault(lic, 0) + totals[lic] += 1 + else: + print(infile.sitedir + thisid + ': ' + ' '.join(licenses)) + + if self.get_option_bool('totals'): + for lic in sorted(totals): + print(lic + ': ' + str(totals[lic])) + + return 0 + + +class StatusChecker (Checker): + name = 'status' + desc = 'Report the status of Mallard pages' + blurb = ('Report the status of the Mallard page files FILES. Each\n' + + 'matching page is reporting along with its status.') + formats = ['mallard'] + arguments = [ + ('help', '-h', None, 'Show this help and exit'), + ('site', '-s', None, 'Treat pages as belonging to a Mallard site'), + ('version', None, 'VER', 'Select revisions with the version attribute VER'), + ('docversion', None, 'VER', 'Select revisions with the docversion attribute VER'), + ('pkgversion', None, 'VER', 'Select revisions with the pkgversion attribute VER'), + ('older', None, 'DATE', 'Only show pages older than DATE'), + ('newer', None, 'DATE', 'Only show pages newer than DATE'), + ('only', None, 'STATUSES', 'Only show pages whose status is in STATUSES'), + ('except', None, 'STATUSES', 'Exclude pages whose status is in STATUSES'), + ('totals', None, None, 'Show total counts for each status') + ] + postblurb = 'VER and STATUSES may be comma- and/or space-separated lists, or specified\nmultiple times.' + + def main(self, args): + if self.parse_args(args) != 0: + return 1 + if 'help' in self.options: + self.print_help() + return 0 + + totals = {} + + checks = [] + ver = self.get_option_list('version') + if ver is not None: + checks.append(ver) + ver = self.get_option_list('docversion') + if ver is not None: + checks.append(['doc:' + v for v in ver]) + ver = self.get_option_list('pkgversion') + if ver is not None: + checks.append(['pkg:' + v for v in ver]) + + for infile in self.iter_files(): + xml = self.get_xml(infile) + pageid = xml.getroot().get('id') + bestrev = None + for rev in xml.xpath('/mal:page/mal:info/mal:revision', namespaces=NAMESPACES): + revversion = (rev.get('version') or '').split() + docversion = rev.get('docversion') + if docversion is not None: + revversion.append('doc:' + docversion) + pkgversion = rev.get('pkgversion') + if pkgversion is not None: + revversion.append('pkg:' + pkgversion) + revok = True + for check in checks: + checkok = False + for v in check: + if v in revversion: + checkok = True + break + if not checkok: + revok = False + break + if revok: + if bestrev is None: + bestrev = rev + continue + bestdate = bestrev.get('date') + thisdate = rev.get('date') + if bestdate is None: + bestrev = rev + elif thisdate is None: + pass + elif thisdate >= bestdate: + bestrev = rev + if bestrev is not None: + status = bestrev.get('status') or 'none' + date = bestrev.get('date') or None + else: + status = 'none' + date = None + older = self.get_option_str('older') + if older is not None: + if date is None or date >= older: + continue + newer = self.get_option_str('newer') + if newer is not None: + if date is None or date <= newer: + continue + only = self.get_option_list('only') + if only is not None: + if status not in only: + continue + cept = self.get_option_list('except') + if cept is not None: + if status in cept: + continue + if self.get_option_bool('totals'): + totals.setdefault(status, 0) + totals[status] += 1 + else: + print(infile.sitedir + pageid + ': ' + status) + + if self.get_option_bool('totals'): + for st in sorted(totals): + print(st + ': ' + str(totals[st])) + + return 0 + + +class StyleChecker (Checker): + name = 'style' + desc = 'Report the style attribute of Mallard pages' + blurb = ('Report the page style attribute of the Mallard page files\n' + + 'FILES. Each matching page is reporting along with its status.') + formats = ['mallard'] + arguments = [ + ('help', '-h', None, 'Show this help and exit'), + ('site', '-s', None, 'Treat pages as belonging to a Mallard site'), + ('only', None, 'STYLES', 'Only show pages whose style is in STATUSES'), + ('except', None, 'STYLES', 'Exclude pages whose style is in STATUSES'), + ('totals', None, None, 'Show total counts for each style') + ] + postblurb = 'STYLES may be comma- and/or space-separated lists, or specified\nmultiple times.' + + def main(self, args): + if self.parse_args(args) != 0: + return 1 + if 'help' in self.options: + self.print_help() + return 0 + + totals = {} + + for infile in self.iter_files(): + xml = self.get_xml(infile) + thisid = xml.getroot().get('id') + style = xml.getroot().get('style') + if style is None: + style = 'none' + styles = style.split() + # We'll set style to None if it doesn't meat the criteria + only = self.get_option_list('only') + if only is not None: + if len(only) == 0: + # We treat a blank --only as requesting pages with no style + if style != 'none': + style = None + else: + allow = False + for st in styles: + if st in only: + allow = True + break + if not allow: + style = None + cept = self.get_option_list('except') + if cept is not None: + for st in styles: + if st in cept: + style = None + break + if self.get_option_bool('totals'): + if style is not None: + for st in styles: + totals.setdefault(st, 0) + totals[st] += 1 + else: + if style is not None: + print(infile.sitedir + thisid + ': ' + style) + + if self.get_option_bool('totals'): + for st in sorted(totals): + print(st + ': ' + str(totals[st])) + + return 0 + + +class CustomChecker(Checker): + formats = ['docbook4', 'docbook5', 'mallard'] + arguments = [ + ('help', '-h', None, 'Show this help and exit'), + ('site', '-s', None, 'Treat pages as belonging to a Mallard site') + ] + + def __init__(self, name, yelpcheck): + super().__init__(yelpcheck) + self.name = name + + def main(self, args): + if self.parse_args(args) != 0: + return 1 + + sect = 'check:' + self.name + if sect not in self.config.sections(): + print('Unrecognized command: ' + self.name, file=sys.stderr) + return 1 + self.blurb = self.config.get(sect, 'blurb', fallback=None) + if self.blurb is not None: + self.blurb = '\n'.join(textwrap.wrap(self.blurb)) + + if 'help' in self.options: + self.print_help() + return 0 + + assertexpr = self.config.get(sect, 'assert', fallback=None) + if assertexpr is not None: + return self.run_assert(assertexpr) + + print('No action found for command: ' + self.name, file=sys.stderr) + return 1 + + def run_assert(self, assertexpr): + sect = 'check:' + self.name + selectexpr = self.config.get(sect, 'select', fallback='/') + message = self.config.get(sect, 'message', fallback='Assertion failed') + self.xinclude = self.config.get(sect, 'xinclude', fallback='true') != 'false' + + namespaces = {} + if 'namespaces' in self.config.sections(): + for ns in self.config.options('namespaces'): + namespaces[ns] = self.config.get('namespaces', ns) + + for infile in self.iter_files(): + xml = self.get_xml(infile) + thisid = xml.getroot().get('id') or infile.filename + for root in xml.xpath(selectexpr, namespaces=namespaces): + if not bool(root.xpath(assertexpr, namespaces=namespaces)): + print(infile.sitedir + thisid + ': ' + message) + # check if self.config has section check:self.name + # check if section has select, assert, message + + +class YelpCheck: + def __init__(self): + pass + + def main(self): + if len(sys.argv) < 2: + self.print_usage() + return 1 + + checker = None + for cls in Checker.__subclasses__(): + if sys.argv[1] == cls.name: + checker = cls(self) + + if checker is None: + checker = CustomChecker(sys.argv[1], self) + + return checker.main(sys.argv[2:]) + + def print_usage(self): + print('Usage: yelp-check <COMMAND> [OPTIONS] [FILES]') + namelen = 2 + checks = [] + reports = [] + others = [] + for cls in sorted(Checker.__subclasses__(), key=(lambda cls: cls.name or '')): + if cls is CustomChecker: + continue + namelen = max(namelen, len(cls.name) + 2) + if cls in (HrefsChecker, IdsChecker, LinksChecker, + MediaChecker, OrphansChecker, ValidateChecker): + checks.append(cls) + elif cls in (CommentsChecker, LicenseChecker, StatusChecker, + StyleChecker): + reports.append(cls) + else: + others.append(cls) + if len(checks) > 0: + print('\nCheck commands:') + for cls in checks: + print(' ' + cls.name.ljust(namelen) + cls.desc) + if len(reports) > 0: + print('\nReport commands:') + for cls in reports: + print(' ' + cls.name.ljust(namelen) + cls.desc) + if len(others) > 0: + print('\nOther commands:') + for cls in others: + print(' ' + cls.name.ljust(namelen) + cls.desc) + config = configparser.ConfigParser() + try: + config.read('.yelp-tools.cfg') + except: + return + customs = [] + for sect in config.sections(): + if sect.startswith('check:'): + name = sect[6:] + skip = False + for cls in Checker.__subclasses__(): + if name == cls.name: + skip = True + break + if skip: + continue + if config.get(sect, 'assert', fallback=None) == None: + continue + desc = config.get(sect, 'desc', fallback='') + namelen = max(namelen, len(name) + 2) + customs.append((name, desc)) + if len(customs) > 0: + print('\nCustom commands:') + for name, desc in customs: + print(' ' + name.ljust(namelen) + desc) + + +if __name__ == '__main__': + try: + sys.exit(YelpCheck().main()) + except KeyboardInterrupt: + sys.exit(1) diff --git a/yelp-tools.doap b/yelp-tools.doap index 16c0987..e7e1b89 100644 --- a/yelp-tools.doap +++ b/yelp-tools.doap @@ -11,7 +11,7 @@ <homepage rdf:resource="http://yelp.io"/> <mailing-list rdf:resource="http://mail.gnome.org/mailman/listinfo/gnome-doc-devel-list" /> <download-page rdf:resource="http://download.gnome.org/sources/yelp-tools/" /> - <bug-database rdf:resource="http://bugzilla.gnome.org/browse.cgi?product=yelp-tools" /> + <bug-database rdf:resource="https://gitlab.gnome.org/GNOME/yelp-tools/issues/" /> <category rdf:resource="http://api.gnome.org/doap-extensions#core" /> <programming-language>sh</programming-language> |