diff options
Diffstat (limited to 'texinfo/info/search.c')
-rw-r--r-- | texinfo/info/search.c | 519 |
1 files changed, 519 insertions, 0 deletions
diff --git a/texinfo/info/search.c b/texinfo/info/search.c new file mode 100644 index 00000000000..c5fd47794b0 --- /dev/null +++ b/texinfo/info/search.c @@ -0,0 +1,519 @@ +/* search.c -- How to search large bodies of text. */ + +/* This file is part of GNU Info, a program for reading online documentation + stored in Info format. + + Copyright (C) 1993 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + Written by Brian Fox (bfox@ai.mit.edu). */ + +#include <ctype.h> +#include <sys/types.h> +#include <sys/stat.h> +#include "general.h" +#include "search.h" +#include "nodes.h" + +#if !defined (NULL) +# define NULL 0x0 +#endif /* !NULL */ + +/* The search functions take two arguments: + + 1) a string to search for, and + + 2) a pointer to a SEARCH_BINDING which contains the buffer, start, + and end of the search. + + They return a long, which is the offset from the start of the buffer + at which the match was found. An offset of -1 indicates failure. */ + +/* A function which makes a binding with buffer and bounds. */ +SEARCH_BINDING * +make_binding (buffer, start, end) + char *buffer; + long start, end; +{ + SEARCH_BINDING *binding; + + binding = (SEARCH_BINDING *)xmalloc (sizeof (SEARCH_BINDING)); + binding->buffer = buffer; + binding->start = start; + binding->end = end; + binding->flags = 0; + + return (binding); +} + +/* Make a copy of BINDING without duplicating the data. */ +SEARCH_BINDING * +copy_binding (binding) + SEARCH_BINDING *binding; +{ + SEARCH_BINDING *copy; + + copy = make_binding (binding->buffer, binding->start, binding->end); + copy->flags = binding->flags; + return (copy); +} + + +/* **************************************************************** */ +/* */ +/* The Actual Searching Functions */ +/* */ +/* **************************************************************** */ + +/* Search forwards or backwards for the text delimited by BINDING. + The search is forwards if BINDING->start is greater than BINDING->end. */ +long +search (string, binding) + char *string; + SEARCH_BINDING *binding; +{ + long result; + + /* If the search is backwards, then search backwards, otherwise forwards. */ + if (binding->start > binding->end) + result = search_backward (string, binding); + else + result = search_forward (string, binding); + + return (result); +} + +/* Search forwards for STRING through the text delimited in BINDING. */ +long +search_forward (string, binding) + char *string; + SEARCH_BINDING *binding; +{ + register int c, i, len; + register char *buff, *end; + char *alternate = (char *)NULL; + + len = strlen (string); + + /* We match characters in the search buffer against STRING and ALTERNATE. + ALTERNATE is a case reversed version of STRING; this is cheaper than + case folding each character before comparison. Alternate is only + used if the case folding bit is turned on in the passed BINDING. */ + + if (binding->flags & S_FoldCase) + { + alternate = strdup (string); + + for (i = 0; i < len; i++) + { + if (islower (alternate[i])) + alternate[i] = toupper (alternate[i]); + else if (isupper (alternate[i])) + alternate[i] = tolower (alternate[i]); + } + } + + buff = binding->buffer + binding->start; + end = binding->buffer + binding->end + 1; + + while (buff < (end - len)) + { + for (i = 0; i < len; i++) + { + c = buff[i]; + + if ((c != string[i]) && (!alternate || c != alternate[i])) + break; + } + + if (!string[i]) + { + if (alternate) + free (alternate); + if (binding->flags & S_SkipDest) + buff += len; + return ((long) (buff - binding->buffer)); + } + + buff++; + } + + if (alternate) + free (alternate); + + return ((long) -1); +} + +/* Search for STRING backwards through the text delimited in BINDING. */ +long +search_backward (input_string, binding) + char *input_string; + SEARCH_BINDING *binding; +{ + register int c, i, len; + register char *buff, *end; + char *string; + char *alternate = (char *)NULL; + + len = strlen (input_string); + + /* Reverse the characters in the search string. */ + string = (char *)xmalloc (1 + len); + for (c = 0, i = len - 1; input_string[c]; c++, i--) + string[i] = input_string[c]; + + string[c] = '\0'; + + /* We match characters in the search buffer against STRING and ALTERNATE. + ALTERNATE is a case reversed version of STRING; this is cheaper than + case folding each character before comparison. ALTERNATE is only + used if the case folding bit is turned on in the passed BINDING. */ + + if (binding->flags & S_FoldCase) + { + alternate = strdup (string); + + for (i = 0; i < len; i++) + { + if (islower (alternate[i])) + alternate[i] = toupper (alternate[i]); + else if (isupper (alternate[i])) + alternate[i] = tolower (alternate[i]); + } + } + + buff = binding->buffer + binding->start - 1; + end = binding->buffer + binding->end; + + while (buff > (end + len)) + { + for (i = 0; i < len; i++) + { + c = *(buff - i); + + if (c != string[i] && (alternate && c != alternate[i])) + break; + } + + if (!string[i]) + { + free (string); + if (alternate) + free (alternate); + + if (binding->flags & S_SkipDest) + buff -= len; + return ((long) (1 + (buff - binding->buffer))); + } + + buff--; + } + + free (string); + if (alternate) + free (alternate); + + return ((long) -1); +} + +/* Find STRING in LINE, returning the offset of the end of the string. + Return an offset of -1 if STRING does not appear in LINE. The search + is bound by the end of the line (i.e., either NEWLINE or 0). */ +int +string_in_line (string, line) + char *string, *line; +{ + register int end; + SEARCH_BINDING binding; + + /* Find the end of the line. */ + for (end = 0; line[end] && line[end] != '\n'; end++); + + /* Search for STRING within these confines. */ + binding.buffer = line; + binding.start = 0; + binding.end = end; + binding.flags = S_FoldCase | S_SkipDest; + + return (search_forward (string, &binding)); +} + +/* Return non-zero if STRING is the first text to appear at BINDING. */ +int +looking_at (string, binding) + char *string; + SEARCH_BINDING *binding; +{ + long search_end; + + search_end = search (string, binding); + + /* If the string was not found, SEARCH_END is -1. If the string was found, + but not right away, SEARCH_END is != binding->start. Otherwise, the + string was found at binding->start. */ + return (search_end == binding->start); +} + +/* **************************************************************** */ +/* */ +/* Small String Searches */ +/* */ +/* **************************************************************** */ + +/* Function names that start with "skip" are passed a string, and return + an offset from the start of that string. Function names that start + with "find" are passed a SEARCH_BINDING, and return an absolute position + marker of the item being searched for. "Find" functions return a value + of -1 if the item being looked for couldn't be found. */ + +/* Return the index of the first non-whitespace character in STRING. */ +int +skip_whitespace (string) + char *string; +{ + register int i; + + for (i = 0; string && whitespace (string[i]); i++); + return (i); +} + +/* Return the index of the first non-whitespace or newline character in + STRING. */ +int +skip_whitespace_and_newlines (string) + char *string; +{ + register int i; + + for (i = 0; string && (whitespace (string[i]) || string[i] == '\n'); i++); + return (i); +} + +/* Return the index of the first whitespace character in STRING. */ +int +skip_non_whitespace (string) + char *string; +{ + register int i; + + for (i = 0; string && !whitespace (string[i]); i++); + return (i); +} + +/* Return the index of the first non-node character in STRING. Note that + this function contains quite a bit of hair to ignore periods in some + special cases. This is because we here at GNU ship some info files which + contain nodenames that contain periods. No such nodename can start with + a period, or continue with whitespace, newline, or ')' immediately following + the period. If second argument NEWLINES_OKAY is non-zero, newlines should + be skipped while parsing out the nodename specification. */ +int +skip_node_characters (string, newlines_okay) + char *string; + int newlines_okay; +{ + register int c, i = 0; + int paren_seen = 0; + int paren = 0; + + /* Handle special case. This is when another function has parsed out the + filename component of the node name, and we just want to parse out the + nodename proper. In that case, a period at the start of the nodename + indicates an empty nodename. */ + if (string && *string == '.') + return (0); + + if (string && *string == '(') + { + paren++; + paren_seen++; + i++; + } + + for (; string && (c = string[i]); i++) + { + if (paren) + { + if (c == '(') + paren++; + else if (c == ')') + paren--; + + continue; + } + + /* If the character following the close paren is a space or period, + then this node name has no more characters associated with it. */ + if (c == '\t' || + c == ',' || + c == INFO_TAGSEP || + ((!newlines_okay) && (c == '\n')) || + ((paren_seen && string[i - 1] == ')') && + (c == ' ' || c == '.')) || + (c == '.' && + ((!string[i + 1]) || + (whitespace_or_newline (string[i + 1])) || + (string[i + 1] == ')')))) + break; + } + return (i); +} + + +/* **************************************************************** */ +/* */ +/* Searching FILE_BUFFER's */ +/* */ +/* **************************************************************** */ + +/* Return the absolute position of the first occurence of a node separator in + BINDING-buffer. The search starts at BINDING->start. Return -1 if no node + separator was found. */ +long +find_node_separator (binding) + SEARCH_BINDING *binding; +{ + register long i; + char *body; + + body = binding->buffer; + + /* A node is started by [^L]^_[^L]\n. That is to say, the C-l's are + optional, but the DELETE and NEWLINE are not. This separator holds + true for all separated elements in an Info file, including the tags + table (if present) and the indirect tags table (if present). */ + for (i = binding->start; i < binding->end - 1; i++) + if (((body[i] == INFO_FF && body[i + 1] == INFO_COOKIE) && + (body[i + 2] == '\n' || + (body[i + 2] == INFO_FF && body[i + 3] == '\n'))) || + ((body[i] == INFO_COOKIE) && + (body[i + 1] == '\n' || + (body[i + 1] == INFO_FF && body[i + 2] == '\n')))) + return (i); + return (-1); +} + +/* Return the length of the node separator characters that BODY is + currently pointing at. */ +int +skip_node_separator (body) + char *body; +{ + register int i; + + i = 0; + + if (body[i] == INFO_FF) + i++; + + if (body[i++] != INFO_COOKIE) + return (0); + + if (body[i] == INFO_FF) + i++; + + if (body[i++] != '\n') + return (0); + + return (i); +} + +/* Return the number of characters from STRING to the start of + the next line. */ +int +skip_line (string) + char *string; +{ + register int i; + + for (i = 0; string && string[i] && string[i] != '\n'; i++); + + if (string[i] == '\n') + i++; + + return (i); +} + +/* Return the absolute position of the beginning of a tags table in this + binding starting the search at binding->start. */ +long +find_tags_table (binding) + SEARCH_BINDING *binding; +{ + SEARCH_BINDING search; + long position; + + search.buffer = binding->buffer; + search.start = binding->start; + search.end = binding->end; + search.flags = S_FoldCase; + + while ((position = find_node_separator (&search)) != -1 ) + { + search.start = position; + search.start += skip_node_separator (search.buffer + search.start); + + if (looking_at (TAGS_TABLE_BEG_LABEL, &search)) + return (position); + } + return (-1); +} + +/* Return the absolute position of the node named NODENAME in BINDING. + This is a brute force search, and we wish to avoid it when possible. + This function is called when a tag (indirect or otherwise) doesn't + really point to the right node. It returns the absolute position of + the separator preceding the node. */ +long +find_node_in_binding (nodename, binding) + char *nodename; + SEARCH_BINDING *binding; +{ + register long position; + register int offset, namelen; + SEARCH_BINDING search; + + namelen = strlen (nodename); + + search.buffer = binding->buffer; + search.start = binding->start; + search.end = binding->end; + search.flags = 0; + + while ((position = find_node_separator (&search)) != -1) + { + search.start = position; + search.start += skip_node_separator (search.buffer + search.start); + + offset = string_in_line (INFO_NODE_LABEL, search.buffer + search.start); + + if (offset == -1) + continue; + + search.start += offset; + search.start += skip_whitespace (search.buffer + search.start); + offset = skip_node_characters + (search.buffer + search.start, DONT_SKIP_NEWLINES); + + /* Notice that this is an exact match. You cannot grovel through + the buffer with this function looking for random nodes. */ + if ((offset == namelen) && + (search.buffer[search.start] == nodename[0]) && + (strncmp (search.buffer + search.start, nodename, offset) == 0)) + return (position); + } + return (-1); +} |