1 files changed, 677 insertions, 0 deletions
diff --git a/info/search.c b/info/search.c
new file mode 100644
index 0000000..44010bf
--- /dev/null
+++ b/info/search.c
@@ -0,0 +1,677 @@
+/* search.c -- searching large bodies of text.
+   $Id: search.c,v 1.9 2008/06/11 09:55:42 gray Exp $
+
+   Copyright (C) 1993, 1997, 1998, 2002, 2004, 2007, 2008
+   Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+   Originally written by Brian Fox (bfox@ai.mit.edu). */
+
+#include "info.h"
+#include <regex.h>
+
+#include "search.h"
+#include "nodes.h"
+
+/* The search functions take two arguments:
+
+     1) a string to search for, and
+
+     2) a pointer to a SEARCH_BINDING which contains the buffer, start,
+        and end of the search.
+
+   They return a long, which is the offset from the start of the buffer
+   at which the match was found.  An offset of -1 indicates failure. */
+
+/* A function which makes a binding with buffer and bounds. */
+SEARCH_BINDING *
+make_binding (char *buffer, long int start, long int end)
+{
+  SEARCH_BINDING *binding;
+
+  binding = xmalloc (sizeof (SEARCH_BINDING));
+  binding->buffer = buffer;
+  binding->start = start;
+  binding->end = end;
+  binding->flags = 0;
+
+  return binding;
+}
+
+/* Make a copy of BINDING without duplicating the data. */
+SEARCH_BINDING *
+copy_binding (SEARCH_BINDING *binding)
+{
+  SEARCH_BINDING *copy;
+
+  copy = make_binding (binding->buffer, binding->start, binding->end);
+  copy->flags = binding->flags;
+  return copy;
+}
+
+
+/* **************************************************************** */
+/*                                                                  */
+/*                 The Actual Searching Functions                   */
+/*                                                                  */
+/* **************************************************************** */
+
+/* Search forwards or backwards for the text delimited by BINDING.
+   The search is forwards if BINDING->start is greater than BINDING->end. */
+long
+search (char *string, SEARCH_BINDING *binding)
+{
+  long result;
+
+  /* If the search is backwards, then search backwards, otherwise forwards. */
+  if (binding->start > binding->end)
+    result = search_backward (string, binding);
+  else
+    result = search_forward (string, binding);
+
+  return result;
+}
+
+/* Search forwards or backwards for anything matching the regexp in the text
+   delimited by BINDING. The search is forwards if BINDING->start is greater
+   than BINDING->end.
+
+   If PRET is specified, it receives a copy of BINDING at the end of a
+   succeded search.  Its START and END fields contain bounds of the found
+   string instance. 
+*/
+long
+regexp_search (char *regexp, SEARCH_BINDING *binding, long length,
+	       SEARCH_BINDING *pret)
+{
+  static char *previous_regexp = NULL;
+  static char *previous_content = NULL;
+  static int was_insensitive = 0;
+  static regex_t preg;
+  static regmatch_t *matches;
+  static int match_alloc = 0;
+  static int match_count = 0;
+  regoff_t pos;
+
+  if (previous_regexp == NULL
+      || ((binding->flags & S_FoldCase) != was_insensitive)
+      || (strcmp (previous_regexp, regexp) != 0))
+    {
+      /* need to compile a new regexp */
+      int result;
+      char *unescaped_regexp;
+      char *p, *q;
+
+      previous_content = NULL;
+
+      if (previous_regexp != NULL)
+        {
+          free (previous_regexp);
+          previous_regexp = NULL;
+          regfree (&preg);
+        }
+
+      was_insensitive = binding->flags & S_FoldCase;
+
+      /* expand the \n and \t in regexp */
+      unescaped_regexp = xmalloc (1 + strlen (regexp));
+      for (p = regexp, q = unescaped_regexp; *p != '\0'; p++, q++)
+        {
+          if (*p == '\\')
+            switch(*++p)
+              {
+              case 'n':
+                *q = '\n';
+                break;
+              case 't':
+                *q = '\t';
+                break;
+              case '\0':
+                *q = '\\';
+                p--;
+                break;
+              default:
+                *q++ = '\\';
+                *q = *p;
+                break;
+              }
+          else
+            *q = *p;
+        }
+      *q = '\0';
+
+      result = regcomp (&preg, unescaped_regexp,
+                       REG_EXTENDED|
+                       REG_NEWLINE|
+                       (was_insensitive ? REG_ICASE : 0));
+      free (unescaped_regexp);
+
+      if (result != 0)
+        {
+          int size = regerror (result, &preg, NULL, 0);
+          char *buf = xmalloc (size);
+          regerror (result, &preg, buf, size);
+          info_error (_("regexp error: %s"), buf, NULL);
+          return -1;
+        }
+
+      previous_regexp = xstrdup(regexp);
+    }
+
+  if (previous_content != binding->buffer)
+    {
+      /* new buffer to search in, let's scan it */
+      regoff_t start = 0;
+      char saved_char;
+
+      previous_content = binding->buffer;
+      saved_char = previous_content[length-1];
+      previous_content[length-1] = '\0';
+
+      for (match_count = 0; start < length; )
+        {
+          int result = 0;
+          if (match_count >= match_alloc)
+            {
+              /* match list full. Initially allocate 256 entries, then double
+                 every time we fill it */
+              match_alloc = (match_alloc > 0 ? match_alloc * 2 : 256);
+              matches = xrealloc (matches,
+				  match_alloc * sizeof(regmatch_t));
+            }
+
+          result = regexec (&preg, &previous_content[start],
+                            1, &matches[match_count], 0);
+          if (result == 0)
+            {
+              if (matches[match_count].rm_eo == 0)
+                {
+                  /* ignore empty matches */
+                  start++;
+                }
+              else
+                {
+                  matches[match_count].rm_so += start;
+                  matches[match_count].rm_eo += start;
+                  start = matches[match_count++].rm_eo;
+                }
+            }
+          else
+            {
+              break;
+            }
+        }
+      previous_content[length-1] = saved_char;
+    }
+
+  pos = binding->start;
+  if (pos > binding->end)
+    {
+      /* searching backward */
+      int i;
+      for (i = match_count - 1; i >= 0; i--)
+        {
+          if (matches[i].rm_so <= pos)
+	    {
+	      if (pret)
+		{
+		  pret->buffer = binding->buffer;
+		  pret->flags = binding->flags;
+		  pret->start = matches[i].rm_so;
+		  pret->end = matches[i].rm_eo;
+		}
+	      return matches[i].rm_so;
+	    }
+        }
+    }
+  else
+    {
+      /* searching forward */
+      int i;
+      for (i = 0; i < match_count; i++)
+        {
+          if (matches[i].rm_so >= pos)
+            {
+	      if (pret)
+		{
+		  pret->buffer = binding->buffer;
+		  pret->flags = binding->flags;
+		  pret->start = matches[i].rm_so;
+		  pret->end = matches[i].rm_eo;
+		}
+              if (binding->flags & S_SkipDest)
+                return matches[i].rm_eo;
+              else
+                return matches[i].rm_so;
+            }
+        }
+    }
+
+  /* not found */
+  return -1;
+}
+
+/* Search forwards for STRING through the text delimited in BINDING. */
+long
+search_forward (char *string, SEARCH_BINDING *binding)
+{
+  register int c, i, len;
+  register char *buff, *end;
+  char *alternate = NULL;
+
+  len = strlen (string);
+
+  /* We match characters in the search buffer against STRING and ALTERNATE.
+     ALTERNATE is a case reversed version of STRING; this is cheaper than
+     case folding each character before comparison.   Alternate is only
+     used if the case folding bit is turned on in the passed BINDING. */
+
+  if (binding->flags & S_FoldCase)
+    {
+      alternate = xstrdup (string);
+
+      for (i = 0; i < len; i++)
+        {
+          if (islower (alternate[i]))
+            alternate[i] = toupper (alternate[i]);
+          else if (isupper (alternate[i]))
+            alternate[i] = tolower (alternate[i]);
+        }
+    }
+
+  buff = binding->buffer + binding->start;
+  end = binding->buffer + binding->end + 1;
+
+  while (buff < (end - len))
+    {
+      for (i = 0; i < len; i++)
+        {
+          c = buff[i];
+
+          if ((c != string[i]) && (!alternate || c != alternate[i]))
+            break;
+        }
+
+      if (!string[i])
+        {
+          if (alternate)
+            free (alternate);
+          if (binding->flags & S_SkipDest)
+            buff += len;
+          return buff - binding->buffer;
+        }
+
+      buff++;
+    }
+
+  if (alternate)
+    free (alternate);
+
+  return -1;
+}
+
+/* Search for STRING backwards through the text delimited in BINDING. */
+long
+search_backward (char *input_string, SEARCH_BINDING *binding)
+{
+  register int c, i, len;
+  register char *buff, *end;
+  char *string;
+  char *alternate = NULL;
+
+  len = strlen (input_string);
+
+  /* Reverse the characters in the search string. */
+  string = xmalloc (1 + len);
+  for (c = 0, i = len - 1; input_string[c]; c++, i--)
+    string[i] = input_string[c];
+
+  string[c] = '\0';
+
+  /* We match characters in the search buffer against STRING and ALTERNATE.
+     ALTERNATE is a case reversed version of STRING; this is cheaper than
+     case folding each character before comparison.   ALTERNATE is only
+     used if the case folding bit is turned on in the passed BINDING. */
+
+  if (binding->flags & S_FoldCase)
+    {
+      alternate = xstrdup (string);
+
+      for (i = 0; i < len; i++)
+        {
+          if (islower (alternate[i]))
+            alternate[i] = toupper (alternate[i]);
+          else if (isupper (alternate[i]))
+            alternate[i] = tolower (alternate[i]);
+        }
+    }
+
+  buff = binding->buffer + binding->start - 1;
+  end = binding->buffer + binding->end;
+
+  while (buff > (end + len))
+    {
+      for (i = 0; i < len; i++)
+        {
+          c = *(buff - i);
+
+          if (c != string[i] && (!alternate || c != alternate[i]))
+            break;
+        }
+
+      if (!string[i])
+        {
+          free (string);
+          if (alternate)
+            free (alternate);
+
+          if (binding->flags & S_SkipDest)
+            buff -= len;
+          return 1 + buff - binding->buffer;
+        }
+
+      buff--;
+    }
+
+  free (string);
+  if (alternate)
+    free (alternate);
+
+  return -1;
+}
+
+/* Find STRING in LINE, returning the offset of the end of the string.
+   Return an offset of -1 if STRING does not appear in LINE.  The search
+   is bound by the end of the line (i.e., either NEWLINE or 0). */
+int
+string_in_line (char *string, char *line)
+{
+  register int end;
+  SEARCH_BINDING binding;
+
+  /* Find the end of the line. */
+  for (end = 0; line[end] && line[end] != '\n'; end++);
+
+  /* Search for STRING within these confines. */
+  binding.buffer = line;
+  binding.start = 0;
+  binding.end = end;
+  binding.flags = S_FoldCase | S_SkipDest;
+
+  return search_forward (string, &binding);
+}
+
+/* Return non-zero if STRING is the first text to appear at BINDING. */
+int
+looking_at (char *string, SEARCH_BINDING *binding)
+{
+  long search_end;
+
+  search_end = search (string, binding);
+
+  /* If the string was not found, SEARCH_END is -1.  If the string was found,
+     but not right away, SEARCH_END is != binding->start.  Otherwise, the
+     string was found at binding->start. */
+  return search_end == binding->start;
+}
+
+/* **************************************************************** */
+/*                                                                  */
+/*                    Small String Searches                         */
+/*                                                                  */
+/* **************************************************************** */
+
+/* Function names that start with "skip" are passed a string, and return
+   an offset from the start of that string.  Function names that start
+   with "find" are passed a SEARCH_BINDING, and return an absolute position
+   marker of the item being searched for.  "Find" functions return a value
+   of -1 if the item being looked for couldn't be found. */
+
+/* Return the index of the first non-whitespace character in STRING. */
+int
+skip_whitespace (char *string)
+{
+  register int i;
+
+  for (i = 0; string && whitespace (string[i]); i++);
+  return i;
+}
+
+/* Return the index of the first non-whitespace or newline character in
+   STRING. */
+int
+skip_whitespace_and_newlines (char *string)
+{
+  register int i;
+
+  for (i = 0; string && whitespace_or_newline (string[i]); i++);
+  return i;
+}
+
+/* Return the index of the first whitespace character in STRING. */
+int
+skip_non_whitespace (char *string)
+{
+  register int i;
+
+  for (i = 0; string && string[i] && !whitespace (string[i]); i++);
+  return i;
+}
+
+/* Return the index of the first non-node character in STRING.  Note that
+   this function contains quite a bit of hair to ignore periods in some
+   special cases.  This is because we here at GNU ship some info files which
+   contain nodenames that contain periods.  No such nodename can start with
+   a period, or continue with whitespace, newline, or ')' immediately following
+   the period.  If second argument NEWLINES_OKAY is non-zero, newlines should
+   be skipped while parsing out the nodename specification. */
+int
+skip_node_characters (char *string, int newlines_okay)
+{
+  register int c, i = 0;
+  int paren_seen = 0;
+  int paren = 0;
+
+  /* Handle special case.  This is when another function has parsed out the
+     filename component of the node name, and we just want to parse out the
+     nodename proper.  In that case, a period at the start of the nodename
+     indicates an empty nodename. */
+  if (string && *string == '.')
+    return 0;
+
+  if (string && *string == '(')
+    {
+      paren++;
+      paren_seen++;
+      i++;
+    }
+
+  for (; string && (c = string[i]); i++)
+    {
+      if (paren)
+        {
+          if (c == '(')
+            paren++;
+          else if (c == ')')
+            paren--;
+
+          continue;
+        }
+      
+      /* If the character following the close paren is a space or period,
+         then this node name has no more characters associated with it. */
+      if (c == '\t' ||
+          c == ','  ||
+          c == INFO_TAGSEP ||
+          ((!newlines_okay) && (c == '\n')) ||
+          ((paren_seen && string[i - 1] == ')') &&
+           (c == ' ' || c == '.')) ||
+          (c == '.' &&
+           (
+#if 0
+/* This test causes a node name ending in a period, like `This.', not to
+   be found.  The trailing . is stripped.  This occurs in the jargon
+   file (`I see no X here.' is a node name).  */
+           (!string[i + 1]) ||
+#endif
+            (whitespace_or_newline (string[i + 1])) ||
+            (string[i + 1] == ')'))))
+        break;
+    }
+  return i;
+}
+
+
+/* **************************************************************** */
+/*                                                                  */
+/*                   Searching FILE_BUFFER's                        */
+/*                                                                  */
+/* **************************************************************** */
+
+/* Return the absolute position of the first occurence of a node separator in
+   BINDING-buffer.  The search starts at BINDING->start.  Return -1 if no node
+   separator was found. */
+long
+find_node_separator (SEARCH_BINDING *binding)
+{
+  register long i;
+  char *body;
+
+  body = binding->buffer;
+
+  /* A node is started by [^L]^_[^L]\n.  That is to say, the C-l's are
+     optional, but the DELETE and NEWLINE are not.  This separator holds
+     true for all separated elements in an Info file, including the tags
+     table (if present) and the indirect tags table (if present). */
+  for (i = binding->start; i < binding->end - 1; i++)
+    if (((body[i] == INFO_FF && body[i + 1] == INFO_COOKIE) &&
+         (body[i + 2] == '\n' ||
+          (body[i + 2] == INFO_FF && body[i + 3] == '\n'))) ||
+        ((body[i] == INFO_COOKIE) &&
+         (body[i + 1] == '\n' ||
+          (body[i + 1] == INFO_FF && body[i + 2] == '\n'))))
+      return i;
+  return -1;
+}
+
+/* Return the length of the node separator characters that BODY is
+   currently pointing at. */
+int
+skip_node_separator (char *body)
+{
+  register int i;
+
+  i = 0;
+
+  if (body[i] == INFO_FF)
+    i++;
+
+  if (body[i++] != INFO_COOKIE)
+    return 0;
+
+  if (body[i] == INFO_FF)
+    i++;
+
+  if (body[i++] != '\n')
+    return 0;
+
+  return i;
+}
+
+/* Return the number of characters from STRING to the start of
+   the next line. */
+int
+skip_line (char *string)
+{
+  register int i;
+
+  for (i = 0; string && string[i] && string[i] != '\n'; i++);
+
+  if (string[i] == '\n')
+    i++;
+
+  return i;
+}
+
+/* Return the absolute position of the beginning of a tags table in this
+   binding starting the search at binding->start. */
+long
+find_tags_table (SEARCH_BINDING *binding)
+{
+  SEARCH_BINDING tmp_search;
+  long position;
+
+  tmp_search.buffer = binding->buffer;
+  tmp_search.start = binding->start;
+  tmp_search.end = binding->end;
+  tmp_search.flags = S_FoldCase;
+
+  while ((position = find_node_separator (&tmp_search)) != -1 )
+    {
+      tmp_search.start = position;
+      tmp_search.start += skip_node_separator (tmp_search.buffer
+          + tmp_search.start);
+
+      if (looking_at (TAGS_TABLE_BEG_LABEL, &tmp_search))
+        return position;
+    }
+  return -1;
+}
+
+/* Return the absolute position of the node named NODENAME in BINDING.
+   This is a brute force search, and we wish to avoid it when possible.
+   This function is called when a tag (indirect or otherwise) doesn't
+   really point to the right node.  It returns the absolute position of
+   the separator preceding the node. */
+long
+find_node_in_binding (char *nodename, SEARCH_BINDING *binding)
+{
+  long position;
+  int offset, namelen;
+  SEARCH_BINDING tmp_search;
+
+  namelen = strlen (nodename);
+
+  tmp_search.buffer = binding->buffer;
+  tmp_search.start = binding->start;
+  tmp_search.end = binding->end;
+  tmp_search.flags = 0;
+
+  while ((position = find_node_separator (&tmp_search)) != -1)
+    {
+      tmp_search.start = position;
+      tmp_search.start += skip_node_separator
+        (tmp_search.buffer + tmp_search.start);
+
+      offset = string_in_line
+        (INFO_NODE_LABEL, tmp_search.buffer + tmp_search.start);
+
+      if (offset == -1)
+        continue;
+
+      tmp_search.start += offset;
+      tmp_search.start += skip_whitespace (tmp_search.buffer + tmp_search.start);
+      offset = skip_node_characters
+        (tmp_search.buffer + tmp_search.start, DONT_SKIP_NEWLINES);
+
+      /* Notice that this is an exact match.  You cannot grovel through
+         the buffer with this function looking for random nodes. */
+       if ((offset == namelen) &&
+           (tmp_search.buffer[tmp_search.start] == nodename[0]) &&
+           (strncmp (tmp_search.buffer + tmp_search.start, nodename, offset) == 0))
+         return position;
+    }
+  return -1;
+}