1 files changed, 614 insertions, 0 deletions
diff --git a/lib/regexprops.c b/lib/regexprops.c
new file mode 100644
index 0000000..3409b4c
--- /dev/null
+++ b/lib/regexprops.c
@@ -0,0 +1,614 @@
+/* regexprops.c -- document the properties of the regular expressions
+   understood by gnulib.
+
+   Copyright 2005, 2007, 2010, 2011, 2015 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+
+/*
+  The output of this program is included in the GNU findutils source
+  distribution.  The copying conditions for that file are generated
+  by the copying() function below.
+*/
+
+/* Written by James Youngman, <jay@gnu.org>. */
+
+/* config.h must be included first. */
+#include <config.h>
+
+/* system headers */
+#include <errno.h>
+#include <regex.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+/* gnulib headers */
+#include "progname.h"
+
+/* find headers */
+#include "regextype.h"
+
+static void
+output (const char *s, int escape)
+{
+  (void) escape;
+
+  fputs (s, stdout);
+}
+
+
+static void
+newline (void)
+{
+  output ("\n", 0);
+}
+
+static void
+content (const char *s)
+{
+  output (s, 1);
+}
+
+static void
+literal (const char *s)
+{
+  output (s, 0);
+}
+
+static void
+directive (const char *s)
+{
+  output (s, 0);
+}
+
+static void
+comment (const char *s)
+{
+  directive ("@c ");
+  literal (s);
+  newline ();
+}
+
+static void
+enum_item (const char *s)
+{
+  newline ();
+  directive ("@item ");
+  literal (s);
+  newline ();
+}
+
+static void
+begin_subsection (const char *name,
+		  const char *next,
+		  const char *prev,
+		  const char *up)
+{
+  (void) next;
+  (void) prev;
+  (void) up;
+
+  newline ();
+
+  directive ("@node ");
+  content (name);
+  content (" regular expression syntax");
+  newline ();
+
+  directive ("@subsection ");
+  output ("@samp{", 0);
+  content (name);
+  output ("}", 0);
+  content (" regular expression syntax");
+  newline ();
+}
+
+static void
+begintable_markup (char const *markup)
+{
+  newline ();
+  directive ("@table ");
+  literal (markup);
+  newline ();
+}
+
+static void
+endtable ()
+{
+  newline ();
+  directive ("@end table");
+  newline ();
+}
+
+static void
+beginenum ()
+{
+  newline ();
+  directive ("@enumerate");
+  newline ();
+}
+
+static void
+endenum ()
+{
+  newline ();
+  directive ("@end enumerate");
+  newline ();
+}
+
+static void
+newpara ()
+{
+  content ("\n\n");
+}
+
+
+static void
+describe_regex_syntax (int options)
+{
+  newpara ();
+  content ("The character @samp{.} matches any single character");
+  if ( (options & RE_DOT_NEWLINE)  == 0 )
+    {
+      content (" except newline");
+    }
+  if (options & RE_DOT_NOT_NULL)
+    {
+      if ( (options & RE_DOT_NEWLINE)  == 0 )
+	content (" and");
+      else
+	content (" except");
+
+      content (" the null character");
+    }
+  content (".  ");
+  newpara ();
+
+  if (!(options & RE_LIMITED_OPS))
+    {
+      begintable_markup ("@samp");
+      if (options & RE_BK_PLUS_QM)
+	{
+	  enum_item ("\\+");
+	  content ("indicates that the regular expression should match one"
+		   " or more occurrences of the previous atom or regexp.  ");
+	  enum_item ("\\?");
+	  content ("indicates that the regular expression should match zero"
+		   " or one occurrence of the previous atom or regexp.  ");
+	  enum_item ("+ and ? ");
+	  content ("match themselves.  ");
+	}
+      else
+	{
+	  enum_item ("+");
+	  content ("indicates that the regular expression should match one"
+		   " or more occurrences of the previous atom or regexp.  ");
+	  enum_item ("?");
+	  content ("indicates that the regular expression should match zero"
+		   " or one occurrence of the previous atom or regexp.  ");
+	  enum_item ("\\+");
+	  literal ("matches a @samp{+}");
+	  enum_item ("\\?");
+	  literal ("matches a @samp{?}.  ");
+	}
+      endtable ();
+    }
+
+  newpara ();
+
+  content ("Bracket expressions are used to match ranges of characters.  ");
+  literal ("Bracket expressions where the range is backward, for example @samp{[z-a]}, are ");
+  if (options & RE_NO_EMPTY_RANGES)
+    content ("invalid");
+  else
+    content ("ignored");
+  content (".  ");
+
+  if (options &  RE_BACKSLASH_ESCAPE_IN_LISTS)
+    literal ("Within square brackets, @samp{\\} can be used to quote "
+	     "the following character.  ");
+  else
+    literal ("Within square brackets, @samp{\\} is taken literally.  ");
+
+  if (options & RE_CHAR_CLASSES)
+    content ("Character classes are supported; for example "
+	     "@samp{[[:digit:]]} will match a single decimal digit.  ");
+  else
+    literal ("Character classes are not supported, so for example "
+	     "you would need to use @samp{[0-9]} "
+	     "instead of @samp{[[:digit:]]}.  ");
+
+  if (options & RE_HAT_LISTS_NOT_NEWLINE)
+    {
+      literal ("Non-matching lists @samp{[^@dots{}]} do not ever match newline.  ");
+    }
+  newpara ();
+  if (options & RE_NO_GNU_OPS)
+    {
+      content ("GNU extensions are not supported and so "
+	       "@samp{\\w}, @samp{\\W}, @samp{\\<}, @samp{\\>}, @samp{\\b}, @samp{\\B}, @samp{\\`}, and @samp{\\'} "
+	       "match "
+	       "@samp{w}, @samp{W}, @samp{<}, @samp{>}, @samp{b}, @samp{B}, @samp{`}, and @samp{'} respectively.  ");
+    }
+  else
+    {
+      content ("GNU extensions are supported:");
+      beginenum ();
+      enum_item ("@samp{\\w} matches a character within a word");
+      enum_item ("@samp{\\W} matches a character which is not within a word");
+      enum_item ("@samp{\\<} matches the beginning of a word");
+      enum_item ("@samp{\\>} matches the end of a word");
+      enum_item ("@samp{\\b} matches a word boundary");
+      enum_item ("@samp{\\B} matches characters which are not a word boundary");
+      enum_item ("@samp{\\`} matches the beginning of the whole input");
+      enum_item ("@samp{\\'} matches the end of the whole input");
+      endenum ();
+    }
+
+  newpara ();
+
+
+  if (options & RE_NO_BK_PARENS)
+    {
+      literal ("Grouping is performed with parentheses @samp{()}.  ");
+
+      if (options & RE_UNMATCHED_RIGHT_PAREN_ORD)
+	literal ("An unmatched @samp{)} matches just itself.  ");
+    }
+  else
+    {
+      literal ("Grouping is performed with backslashes followed by parentheses @samp{\\(}, @samp{\\)}.  ");
+    }
+
+  if (options & RE_NO_BK_REFS)
+    {
+      content ("A backslash followed by a digit matches that digit.  ");
+    }
+  else
+    {
+      literal ("A backslash followed by a digit acts as a back-reference and matches the same thing as the previous grouped expression indicated by that number.  For example @samp{\\2} matches the second group expression.  The order of group expressions is determined by the position of their opening parenthesis ");
+      if (options & RE_NO_BK_PARENS)
+	literal ("@samp{(}");
+      else
+	literal ("@samp{\\(}");
+      content (".  ");
+    }
+
+
+  newpara ();
+  if (!(options & RE_LIMITED_OPS))
+    {
+      if (options & RE_NO_BK_VBAR)
+	literal ("The alternation operator is @samp{|}.  ");
+      else
+	literal ("The alternation operator is @samp{\\|}. ");
+    }
+  newpara ();
+
+  if (options & RE_CONTEXT_INDEP_ANCHORS)
+    {
+      literal ("The characters @samp{^} and @samp{$} always represent the beginning and end of a string respectively, except within square brackets.  Within brackets, @samp{^} can be used to invert the membership of the character class being specified.  ");
+    }
+  else
+    {
+      literal ("The character @samp{^} only represents the beginning of a string when it appears:");
+      beginenum ();
+      enum_item ("\nAt the beginning of a regular expression");
+      enum_item ("After an open-group, signified by ");
+      if (options & RE_NO_BK_PARENS)
+	{
+	  literal ("@samp{(}");
+	}
+      else
+	{
+	  literal ("@samp{\\(}");
+	}
+      newline ();
+      if (!(options & RE_LIMITED_OPS))
+	{
+	  if (options & RE_NEWLINE_ALT)
+	    enum_item ("After a newline");
+
+	  if (options & RE_NO_BK_VBAR )
+	    enum_item ("After the alternation operator @samp{|}");
+	  else
+	    enum_item ("After the alternation operator @samp{\\|}");
+	}
+      endenum ();
+
+      newpara ();
+      literal ("The character @samp{$} only represents the end of a string when it appears:");
+      beginenum ();
+      enum_item ("At the end of a regular expression");
+      enum_item ("Before a close-group, signified by ");
+      if (options & RE_NO_BK_PARENS)
+	{
+	  literal ("@samp{)}");
+	}
+      else
+	{
+	  literal ("@samp{\\)}");
+	}
+      if (!(options & RE_LIMITED_OPS))
+	{
+	  if (options & RE_NEWLINE_ALT)
+	    enum_item ("Before a newline");
+
+	  if (options & RE_NO_BK_VBAR)
+	    enum_item ("Before the alternation operator @samp{|}");
+	  else
+	    enum_item ("Before the alternation operator @samp{\\|}");
+	}
+      endenum ();
+    }
+  newpara ();
+  if (!(options & RE_LIMITED_OPS) )
+    {
+      if ((options & RE_CONTEXT_INDEP_OPS)
+	  && !(options & RE_CONTEXT_INVALID_OPS))
+	{
+	  literal ("The characters @samp{*}, @samp{+} and @samp{?} are special anywhere in a regular expression.  ");
+	}
+      else
+	{
+	  if (options & RE_BK_PLUS_QM)
+	    literal ("@samp{\\*}, @samp{\\+} and @samp{\\?} ");
+	  else
+	    literal ("@samp{*}, @samp{+} and @samp{?} ");
+
+	  if (options & RE_CONTEXT_INVALID_OPS)
+	    {
+	      content ("are special at any point in a regular expression except the following places, where they are not allowed:");
+	    }
+	  else
+	    {
+	      content ("are special at any point in a regular expression except:");
+	    }
+
+	  beginenum ();
+	  enum_item ("At the beginning of a regular expression");
+	  enum_item ("After an open-group, signified by ");
+	  if (options & RE_NO_BK_PARENS)
+	    {
+	      literal ("@samp{(}");
+	    }
+	  else
+	    {
+	      literal ("@samp{\\(}");
+	    }
+	  if (!(options & RE_LIMITED_OPS))
+	    {
+	      if (options & RE_NEWLINE_ALT)
+		enum_item ("After a newline");
+
+	      if (options & RE_NO_BK_VBAR)
+		enum_item ("After the alternation operator @samp{|}");
+	      else
+		enum_item ("After the alternation operator @samp{\\|}");
+	    }
+	  endenum ();
+	}
+    }
+
+
+  newpara ();
+  if (options & RE_INTERVALS)
+    {
+      if (options & RE_NO_BK_BRACES)
+	{
+	  literal ("Intervals are specified by @samp{@{} and @samp{@}}.  ");
+	  if (options & RE_INVALID_INTERVAL_ORD)
+	    {
+	      literal ("Invalid intervals are treated as literals, for example @samp{a@{1} is treated as @samp{a\\@{1}");
+	    }
+	  else
+	    {
+	      literal ("Invalid intervals such as @samp{a@{1z} are not accepted.  ");
+	    }
+	}
+      else
+	{
+	  literal ("Intervals are specified by @samp{\\@{} and @samp{\\@}}.  ");
+	  if (options & RE_INVALID_INTERVAL_ORD)
+	    {
+	      literal ("Invalid intervals are treated as literals, for example @samp{a\\@{1} is treated as @samp{a@{1}");
+	    }
+	  else
+	    {
+	      literal ("Invalid intervals such as @samp{a\\@{1z} are not accepted.  ");
+	    }
+	}
+
+    }
+
+  newpara ();
+  if (options & RE_NO_POSIX_BACKTRACKING)
+    {
+      content ("Matching succeeds as soon as the whole pattern is matched, meaning that the result may not be the longest possible match.  ");
+    }
+  else
+    {
+      content ("The longest possible match is returned; this applies to the regular expression as a whole and (subject to this constraint) to subexpressions within groups.  ");
+    }
+  newpara ();
+}
+
+
+static void
+copying (void)
+{
+  static const char *copy_para[]=
+    {
+      "Copyright (C) 1994, 1996, 1998, 2000, 2001, 2003, 2004, 2005, 2006,"
+      ,"2007, 2009, 2010, 2011 Free Software Foundation, Inc."
+      ,""
+      ,"Permission is granted to copy, distribute and/or modify this document"
+      ,"under the terms of the GNU Free Documentation License, Version 1.3 or"
+      ,"any later version published by the Free Software Foundation; with no"
+      ,"Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts."
+      ,"A copy of the license is included in the ``GNU Free"
+      ,"Documentation License'' file as part of this distribution."
+      ""
+      ,NULL
+    };
+  const char **s = copy_para;
+  while (*s)
+    comment (*s++);
+}
+
+static int
+ignore (int ix, const unsigned int context)
+{
+  return 0 == (get_regex_type_context (ix) & context);
+}
+
+static void
+menu (unsigned int context)
+{
+  int i, options;
+  const char *name;
+
+  output ("@menu\n", 0);
+  for (i=0;
+       options = get_regex_type_flags (i),
+	 name=get_regex_type_name (i);
+       ++i)
+    {
+      if (!ignore (i, context))
+	{
+	  output ("* ", 0);
+	  output (name, 0);
+	  content (" regular expression syntax");
+	  output ("::", 0);
+	  newline ();
+	}
+    }
+  output ("@end menu\n", 0);
+}
+
+
+
+static const char *
+get_next (unsigned int ix, unsigned int context)
+{
+  const char *next;
+  while (get_regex_type_name (ix))
+    {
+      if (!ignore (ix, context))
+	{
+	  next = get_regex_type_name (ix);
+	  if (NULL == next)
+	    return "";
+	  else
+	    return next;
+	}
+      ++ix;
+    }
+  return "";
+}
+
+
+static void
+describe_all (const char *contextname,
+	      unsigned int context,
+	      const char *up)
+{
+  const char *name, *next, *previous;
+  int options;
+  int i, parent;
+
+  copying ();
+  newline ();
+  literal ("@c this regular expression description is for: ");
+  literal (contextname);
+  newline ();
+  newline ();
+  menu (context);
+
+  previous = "";
+
+  for (i=0;
+       options = get_regex_type_flags (i),
+	 name=get_regex_type_name (i);
+       ++i)
+    {
+      if (ignore (i, context))
+	{
+	  fprintf (stderr,
+		   "Skipping regexp type %s for context %s\n",
+		   name, contextname);
+	  name = previous;
+	  continue;
+	}
+
+      next = get_next (i+1, context);
+      if (NULL == next)
+	next = "";
+      begin_subsection (name, next, previous, up);
+      parent = get_regex_type_synonym (i);
+      if (parent >= 0)
+	{
+	  content ("This is a synonym for ");
+	  content (get_regex_type_name (parent));
+	  content (".");
+	}
+      else
+	{
+	  describe_regex_syntax (options);
+	}
+      previous = name;
+    }
+}
+
+
+
+int
+main (int argc, char *argv[])
+{
+  const char *up = "";
+  unsigned int context = CONTEXT_ALL;
+  const char *contextname = "all";
+
+  if (argc)
+    set_program_name (argv[0]);
+  else
+    set_program_name ("regexprops");
+
+  if (argc > 1)
+    {
+      up = argv[1];
+    }
+  if (argc > 2)
+    {
+      contextname = argv[2];
+      if (0 == strcmp (contextname, "findutils"))
+	context = CONTEXT_FINDUTILS;
+      else if (0 == strcmp (contextname, "generic"))
+	context = CONTEXT_GENERIC;
+      else if (0 == strcmp (contextname, "all"))
+	context = CONTEXT_ALL;
+      else
+	{
+	  fprintf (stderr, "Unexpected context %s",
+		   contextname);
+	  return 1;
+	}
+    }
+
+  describe_all (contextname, context, up);
+  return 0;
+}