1 files changed, 1156 insertions, 0 deletions
diff --git a/src/input.c b/src/input.c
new file mode 100644
index 0000000..579fadd
--- /dev/null
+++ b/src/input.c
@@ -0,0 +1,1156 @@
+/* GNU m4 -- A simple macro processor
+
+   Copyright (C) 1989-1994, 2004-2013 Free Software Foundation, Inc.
+
+   This file is part of GNU M4.
+
+   GNU M4 is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   GNU M4 is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/* Handling of different input sources, and lexical analysis.  */
+
+#include "m4.h"
+
+#include "memchr2.h"
+
+/* Unread input can be either files, that should be read (eg. included
+   files), strings, which should be rescanned (eg. macro expansion text),
+   or quoted macro definitions (as returned by the builtin "defn").
+   Unread input are organised in a stack, implemented with an obstack.
+   Each input source is described by a "struct input_block".  The obstack
+   is "current_input".  The top of the input stack is "isp".
+
+   The macro "m4wrap" places the text to be saved on another input
+   stack, on the obstack "wrapup_stack", whose top is "wsp".  When EOF
+   is seen on normal input (eg, when "current_input" is empty), input is
+   switched over to "wrapup_stack", and the original "current_input" is
+   freed.  A new stack is allocated for "wrapup_stack", which will
+   accept any text produced by calls to "m4wrap" from within the
+   wrapped text.  This process of shuffling "wrapup_stack" to
+   "current_input" can continue indefinitely, even generating infinite
+   loops (e.g. "define(`f',`m4wrap(`f')')f"), without memory leaks.
+
+   Pushing new input on the input stack is done by push_file (),
+   push_string (), push_wrapup () (for wrapup text), and push_macro ()
+   (for macro definitions).  Because macro expansion needs direct access
+   to the current input obstack (for optimisation), push_string () are
+   split in two functions, push_string_init (), which returns a pointer
+   to the current input stack, and push_string_finish (), which return a
+   pointer to the final text.  The input_block *next is used to manage
+   the coordination between the different push routines.
+
+   The current file and line number are stored in two global
+   variables, for use by the error handling functions in m4.c.  Macro
+   expansion wants to report the line where a macro name was detected,
+   rather than where it finished collecting arguments.  This also
+   applies to text resulting from macro expansions.  So each input
+   block maintains its own notion of the current file and line, and
+   swapping between input blocks updates the global variables
+   accordingly.  */
+
+#ifdef ENABLE_CHANGEWORD
+#include "regex.h"
+#endif
+
+enum input_type
+{
+  INPUT_STRING,         /* String resulting from macro expansion.  */
+  INPUT_FILE,           /* File from command line or include.  */
+  INPUT_MACRO           /* Builtin resulting from defn.  */
+};
+
+typedef enum input_type input_type;
+
+struct input_block
+{
+  struct input_block *prev;     /* previous input_block on the input stack */
+  input_type type;              /* see enum values */
+  const char *file;             /* file where this input is from */
+  int line;                     /* line where this input is from */
+  union
+    {
+      struct
+        {
+          char *string;         /* remaining string value */
+          char *end;            /* terminating NUL of string */
+        }
+        u_s;    /* INPUT_STRING */
+      struct
+        {
+          FILE *fp;                  /* input file handle */
+          bool_bitfield end : 1;     /* true if peek has seen EOF */
+          bool_bitfield close : 1;   /* true if we should close file on pop */
+          bool_bitfield advance : 1; /* track previous start_of_input_line */
+        }
+        u_f;    /* INPUT_FILE */
+      builtin_func *func;       /* pointer to macro's function */
+    }
+  u;
+};
+
+typedef struct input_block input_block;
+
+
+/* Current input file name.  */
+const char *current_file;
+
+/* Current input line number.  */
+int current_line;
+
+/* Obstack for storing individual tokens.  */
+static struct obstack token_stack;
+
+/* Obstack for storing file names.  */
+static struct obstack file_names;
+
+/* Wrapup input stack.  */
+static struct obstack *wrapup_stack;
+
+/* Current stack, from input or wrapup.  */
+static struct obstack *current_input;
+
+/* Bottom of token_stack, for obstack_free.  */
+static void *token_bottom;
+
+/* Pointer to top of current_input.  */
+static input_block *isp;
+
+/* Pointer to top of wrapup_stack.  */
+static input_block *wsp;
+
+/* Aux. for handling split push_string ().  */
+static input_block *next;
+
+/* Flag for next_char () to increment current_line.  */
+static bool start_of_input_line;
+
+/* Flag for next_char () to recognize change in input block.  */
+static bool input_change;
+
+#define CHAR_EOF        256     /* character return on EOF */
+#define CHAR_MACRO      257     /* character return for MACRO token */
+
+/* Quote chars.  */
+STRING rquote;
+STRING lquote;
+
+/* Comment chars.  */
+STRING bcomm;
+STRING ecomm;
+
+#ifdef ENABLE_CHANGEWORD
+
+# define DEFAULT_WORD_REGEXP "[_a-zA-Z][_a-zA-Z0-9]*"
+
+static struct re_pattern_buffer word_regexp;
+static int default_word_regexp;
+static struct re_registers regs;
+
+#else /* ! ENABLE_CHANGEWORD */
+# define default_word_regexp 1
+#endif /* ! ENABLE_CHANGEWORD */
+
+#ifdef DEBUG_INPUT
+static const char *token_type_string (token_type);
+#endif
+
+
+/*-------------------------------------------------------------------.
+| push_file () pushes an input file on the input stack, saving the   |
+| current file name and line number.  If next is non-NULL, this push |
+| invalidates a call to push_string_init (), whose storage is        |
+| consequently released.  If CLOSE_WHEN_DONE, then close FP after    |
+| EOF is detected.                                                   |
+`-------------------------------------------------------------------*/
+
+void
+push_file (FILE *fp, const char *title, bool close_when_done)
+{
+  input_block *i;
+
+  if (next != NULL)
+    {
+      obstack_free (current_input, next);
+      next = NULL;
+    }
+
+  if (debug_level & DEBUG_TRACE_INPUT)
+    DEBUG_MESSAGE1 ("input read from %s", title);
+
+  i = (input_block *) obstack_alloc (current_input,
+                                     sizeof (struct input_block));
+  i->type = INPUT_FILE;
+  i->file = (char *) obstack_copy0 (&file_names, title, strlen (title));
+  i->line = 1;
+  input_change = true;
+
+  i->u.u_f.fp = fp;
+  i->u.u_f.end = false;
+  i->u.u_f.close = close_when_done;
+  i->u.u_f.advance = start_of_input_line;
+  output_current_line = -1;
+
+  i->prev = isp;
+  isp = i;
+}
+
+/*---------------------------------------------------------------.
+| push_macro () pushes a builtin macro's definition on the input |
+| stack.  If next is non-NULL, this push invalidates a call to   |
+| push_string_init (), whose storage is consequently released.   |
+`---------------------------------------------------------------*/
+
+void
+push_macro (builtin_func *func)
+{
+  input_block *i;
+
+  if (next != NULL)
+    {
+      obstack_free (current_input, next);
+      next = NULL;
+    }
+
+  i = (input_block *) obstack_alloc (current_input,
+                                     sizeof (struct input_block));
+  i->type = INPUT_MACRO;
+  i->file = current_file;
+  i->line = current_line;
+  input_change = true;
+
+  i->u.func = func;
+  i->prev = isp;
+  isp = i;
+}
+
+/*------------------------------------------------------------------.
+| First half of push_string ().  The pointer next points to the new |
+| input_block.                                                      |
+`------------------------------------------------------------------*/
+
+struct obstack *
+push_string_init (void)
+{
+  if (next != NULL)
+    {
+      M4ERROR ((warning_status, 0,
+                "INTERNAL ERROR: recursive push_string!"));
+      abort ();
+    }
+
+  next = (input_block *) obstack_alloc (current_input,
+                                        sizeof (struct input_block));
+  next->type = INPUT_STRING;
+  next->file = current_file;
+  next->line = current_line;
+
+  return current_input;
+}
+
+/*-------------------------------------------------------------------.
+| Last half of push_string ().  If next is now NULL, a call to       |
+| push_file () has invalidated the previous call to push_string_init |
+| (), so we just give up.  If the new object is void, we do not push |
+| it.  The function push_string_finish () returns a pointer to the   |
+| finished object.  This pointer is only for temporary use, since    |
+| reading the next token might release the memory used for the       |
+| object.                                                            |
+`-------------------------------------------------------------------*/
+
+const char *
+push_string_finish (void)
+{
+  const char *ret = NULL;
+
+  if (next == NULL)
+    return NULL;
+
+  if (obstack_object_size (current_input) > 0)
+    {
+      size_t len = obstack_object_size (current_input);
+      obstack_1grow (current_input, '\0');
+      next->u.u_s.string = (char *) obstack_finish (current_input);
+      next->u.u_s.end = next->u.u_s.string + len;
+      next->prev = isp;
+      isp = next;
+      ret = isp->u.u_s.string; /* for immediate use only */
+      input_change = true;
+    }
+  else
+    obstack_free (current_input, next); /* people might leave garbage on it. */
+  next = NULL;
+  return ret;
+}
+
+/*------------------------------------------------------------------.
+| The function push_wrapup () pushes a string on the wrapup stack.  |
+| When the normal input stack gets empty, the wrapup stack will     |
+| become the input stack, and push_string () and push_file () will  |
+| operate on wrapup_stack.  Push_wrapup should be done as           |
+| push_string (), but this will suffice, as long as arguments to    |
+| m4_m4wrap () are moderate in size.                                |
+`------------------------------------------------------------------*/
+
+void
+push_wrapup (const char *s)
+{
+  size_t len = strlen (s);
+  input_block *i;
+  i = (input_block *) obstack_alloc (wrapup_stack,
+                                     sizeof (struct input_block));
+  i->prev = wsp;
+  i->type = INPUT_STRING;
+  i->file = current_file;
+  i->line = current_line;
+  i->u.u_s.string = (char *) obstack_copy0 (wrapup_stack, s, len);
+  i->u.u_s.end = i->u.u_s.string + len;
+  wsp = i;
+}
+
+
+/*-------------------------------------------------------------------.
+| The function pop_input () pops one level of input sources.  If the |
+| popped input_block is a file, current_file and current_line are    |
+| reset to the saved values before the memory for the input_block is |
+| released.                                                          |
+`-------------------------------------------------------------------*/
+
+static void
+pop_input (void)
+{
+  input_block *tmp = isp->prev;
+
+  switch (isp->type)
+    {
+    case INPUT_STRING:
+    case INPUT_MACRO:
+      break;
+
+    case INPUT_FILE:
+      if (debug_level & DEBUG_TRACE_INPUT)
+        {
+          if (tmp)
+            DEBUG_MESSAGE2 ("input reverted to %s, line %d",
+                            tmp->file, tmp->line);
+          else
+            DEBUG_MESSAGE ("input exhausted");
+        }
+
+      if (ferror (isp->u.u_f.fp))
+        {
+          M4ERROR ((warning_status, 0, "read error"));
+          if (isp->u.u_f.close)
+            fclose (isp->u.u_f.fp);
+          retcode = EXIT_FAILURE;
+        }
+      else if (isp->u.u_f.close && fclose (isp->u.u_f.fp) == EOF)
+        {
+          M4ERROR ((warning_status, errno, "error reading file"));
+          retcode = EXIT_FAILURE;
+        }
+      start_of_input_line = isp->u.u_f.advance;
+      output_current_line = -1;
+      break;
+
+    default:
+      M4ERROR ((warning_status, 0,
+                "INTERNAL ERROR: input stack botch in pop_input ()"));
+      abort ();
+    }
+  obstack_free (current_input, isp);
+  next = NULL; /* might be set in push_string_init () */
+
+  isp = tmp;
+  input_change = true;
+}
+
+/*-------------------------------------------------------------------.
+| To switch input over to the wrapup stack, main calls pop_wrapup    |
+| ().  Since wrapup text can install new wrapup text, pop_wrapup ()  |
+| returns false when there is no wrapup text on the stack, and true  |
+| otherwise.                                                         |
+`-------------------------------------------------------------------*/
+
+bool
+pop_wrapup (void)
+{
+  next = NULL;
+  obstack_free (current_input, NULL);
+  free (current_input);
+
+  if (wsp == NULL)
+    {
+      /* End of the program.  Free all memory even though we are about
+         to exit, since it makes leak detection easier.  */
+      obstack_free (&token_stack, NULL);
+      obstack_free (&file_names, NULL);
+      obstack_free (wrapup_stack, NULL);
+      free (wrapup_stack);
+#ifdef ENABLE_CHANGEWORD
+      regfree (&word_regexp);
+#endif /* ENABLE_CHANGEWORD */
+      return false;
+    }
+
+  current_input = wrapup_stack;
+  wrapup_stack = (struct obstack *) xmalloc (sizeof (struct obstack));
+  obstack_init (wrapup_stack);
+
+  isp = wsp;
+  wsp = NULL;
+  input_change = true;
+
+  return true;
+}
+
+/*-------------------------------------------------------------------.
+| When a MACRO token is seen, next_token () uses init_macro_token () |
+| to retrieve the value of the function pointer.                     |
+`-------------------------------------------------------------------*/
+
+static void
+init_macro_token (token_data *td)
+{
+  if (isp->type != INPUT_MACRO)
+    {
+      M4ERROR ((warning_status, 0,
+                "INTERNAL ERROR: bad call to init_macro_token ()"));
+      abort ();
+    }
+
+  TOKEN_DATA_TYPE (td) = TOKEN_FUNC;
+  TOKEN_DATA_FUNC (td) = isp->u.func;
+}
+
+
+/*-----------------------------------------------------------------.
+| Low level input is done a character at a time.  The function     |
+| peek_input () is used to look at the next character in the input |
+| stream.  At any given time, it reads from the input_block on the |
+| top of the current input stack.                                  |
+`-----------------------------------------------------------------*/
+
+static int
+peek_input (void)
+{
+  int ch;
+  input_block *block = isp;
+
+  while (1)
+    {
+      if (block == NULL)
+        return CHAR_EOF;
+
+      switch (block->type)
+        {
+        case INPUT_STRING:
+          ch = to_uchar (block->u.u_s.string[0]);
+          if (ch != '\0')
+            return ch;
+          break;
+
+        case INPUT_FILE:
+          ch = getc (block->u.u_f.fp);
+          if (ch != EOF)
+            {
+              ungetc (ch, block->u.u_f.fp);
+              return ch;
+            }
+          block->u.u_f.end = true;
+          break;
+
+        case INPUT_MACRO:
+          return CHAR_MACRO;
+
+        default:
+          M4ERROR ((warning_status, 0,
+                    "INTERNAL ERROR: input stack botch in peek_input ()"));
+          abort ();
+        }
+      block = block->prev;
+    }
+}
+
+/*-------------------------------------------------------------------.
+| The function next_char () is used to read and advance the input to |
+| the next character.  It also manages line numbers for error        |
+| messages, so they do not get wrong, due to lookahead.  The token   |
+| consisting of a newline alone is taken as belonging to the line it |
+| ends, and the current line number is not incremented until the     |
+| next character is read.  99.9% of all calls will read from a       |
+| string, so factor that out into a macro for speed.                 |
+`-------------------------------------------------------------------*/
+
+#define next_char() \
+  (isp && isp->type == INPUT_STRING && isp->u.u_s.string[0]     \
+   && !input_change                                             \
+   ? to_uchar (*isp->u.u_s.string++)                            \
+   : next_char_1 ())
+
+static int
+next_char_1 (void)
+{
+  int ch;
+
+  while (1)
+    {
+      if (isp == NULL)
+        {
+          current_file = "";
+          current_line = 0;
+          return CHAR_EOF;
+        }
+
+      if (input_change)
+        {
+          current_file = isp->file;
+          current_line = isp->line;
+          input_change = false;
+        }
+
+      switch (isp->type)
+        {
+        case INPUT_STRING:
+          ch = to_uchar (*isp->u.u_s.string++);
+          if (ch != '\0')
+            return ch;
+          break;
+
+        case INPUT_FILE:
+          if (start_of_input_line)
+            {
+              start_of_input_line = false;
+              current_line = ++isp->line;
+            }
+
+          /* If stdin is a terminal, calling getc after peek_input
+             already called it would make the user have to hit ^D
+             twice to quit.  */
+          ch = isp->u.u_f.end ? EOF : getc (isp->u.u_f.fp);
+          if (ch != EOF)
+            {
+              if (ch == '\n')
+                start_of_input_line = true;
+              return ch;
+            }
+          break;
+
+        case INPUT_MACRO:
+          pop_input (); /* INPUT_MACRO input sources has only one token */
+          return CHAR_MACRO;
+
+        default:
+          M4ERROR ((warning_status, 0,
+                    "INTERNAL ERROR: input stack botch in next_char ()"));
+          abort ();
+        }
+
+      /* End of input source --- pop one level.  */
+      pop_input ();
+    }
+}
+
+/*-------------------------------------------------------------------.
+| skip_line () simply discards all immediately following characters, |
+| upto the first newline.  It is only used from m4_dnl ().           |
+`-------------------------------------------------------------------*/
+
+void
+skip_line (void)
+{
+  int ch;
+  const char *file = current_file;
+  int line = current_line;
+
+  while ((ch = next_char ()) != CHAR_EOF && ch != '\n')
+    ;
+  if (ch == CHAR_EOF)
+    /* current_file changed to "" if we see CHAR_EOF, use the
+       previous value we stored earlier.  */
+    M4ERROR_AT_LINE ((warning_status, 0, file, line,
+                      "Warning: end of file treated as newline"));
+  /* On the rare occasion that dnl crosses include file boundaries
+     (either the input file did not end in a newline, or changeword
+     was used), calling next_char can update current_file and
+     current_line, and that update will be undone as we return to
+     expand_macro.  This informs next_char to fix things again.  */
+  if (file != current_file || line != current_line)
+    input_change = true;
+}
+
+
+/*------------------------------------------------------------------.
+| This function is for matching a string against a prefix of the    |
+| input stream.  If the string matches the input and consume is     |
+| true, the input is discarded; otherwise any characters read are   |
+| pushed back again.  The function is used only when multicharacter |
+| quotes or comment delimiters are used.                            |
+`------------------------------------------------------------------*/
+
+static bool
+match_input (const char *s, bool consume)
+{
+  int n;                        /* number of characters matched */
+  int ch;                       /* input character */
+  const char *t;
+  bool result = false;
+
+  ch = peek_input ();
+  if (ch != to_uchar (*s))
+    return false;                       /* fail */
+
+  if (s[1] == '\0')
+    {
+      if (consume)
+        next_char ();
+      return true;                      /* short match */
+    }
+
+  next_char ();
+  for (n = 1, t = s++; peek_input () == to_uchar (*s++); )
+    {
+      next_char ();
+      n++;
+      if (*s == '\0')           /* long match */
+        {
+          if (consume)
+            return true;
+          result = true;
+          break;
+        }
+    }
+
+  /* Failed or shouldn't consume, push back input.  */
+  {
+    struct obstack *h = push_string_init ();
+
+    /* `obstack_grow' may be macro evaluating its arg 1 several times. */
+    obstack_grow (h, t, n);
+  }
+  push_string_finish ();
+  return result;
+}
+
+/*--------------------------------------------------------------------.
+| The macro MATCH() is used to match a string S against the input.    |
+| The first character is handled inline, for speed.  Hopefully, this  |
+| will not hurt efficiency too much when single character quotes and  |
+| comment delimiters are used.  If CONSUME, then CH is the result of  |
+| next_char, and a successful match will discard the matched string.  |
+| Otherwise, CH is the result of peek_char, and the input stream is   |
+| effectively unchanged.                                              |
+`--------------------------------------------------------------------*/
+
+#define MATCH(ch, s, consume)                                           \
+  (to_uchar ((s)[0]) == (ch)                                            \
+   && (ch) != '\0'                                                      \
+   && ((s)[1] == '\0' || (match_input ((s) + (consume), consume))))
+
+
+/*--------------------------------------------------------.
+| Initialize input stacks, and quote/comment characters.  |
+`--------------------------------------------------------*/
+
+void
+input_init (void)
+{
+  current_file = "";
+  current_line = 0;
+
+  current_input = (struct obstack *) xmalloc (sizeof (struct obstack));
+  obstack_init (current_input);
+  wrapup_stack = (struct obstack *) xmalloc (sizeof (struct obstack));
+  obstack_init (wrapup_stack);
+
+  obstack_init (&file_names);
+
+  /* Allocate an object in the current chunk, so that obstack_free
+     will always work even if the first token parsed spills to a new
+     chunk.  */
+  obstack_init (&token_stack);
+  obstack_alloc (&token_stack, 1);
+  token_bottom = obstack_base (&token_stack);
+
+  isp = NULL;
+  wsp = NULL;
+  next = NULL;
+
+  start_of_input_line = false;
+
+  lquote.string = xstrdup (DEF_LQUOTE);
+  lquote.length = strlen (lquote.string);
+  rquote.string = xstrdup (DEF_RQUOTE);
+  rquote.length = strlen (rquote.string);
+  bcomm.string = xstrdup (DEF_BCOMM);
+  bcomm.length = strlen (bcomm.string);
+  ecomm.string = xstrdup (DEF_ECOMM);
+  ecomm.length = strlen (ecomm.string);
+
+#ifdef ENABLE_CHANGEWORD
+  set_word_regexp (user_word_regexp);
+#endif
+}
+
+
+/*------------------------------------------------------------------.
+| Functions for setting quotes and comment delimiters.  Used by     |
+| m4_changecom () and m4_changequote ().  Pass NULL if the argument |
+| was not present, to distinguish from an explicit empty string.    |
+`------------------------------------------------------------------*/
+
+void
+set_quotes (const char *lq, const char *rq)
+{
+  free (lquote.string);
+  free (rquote.string);
+
+  /* POSIX states that with 0 arguments, the default quotes are used.
+     POSIX XCU ERN 112 states that behavior is implementation-defined
+     if there was only one argument, or if there is an empty string in
+     either position when there are two arguments.  We allow an empty
+     left quote to disable quoting, but a non-empty left quote will
+     always create a non-empty right quote.  See the texinfo for what
+     some other implementations do.  */
+  if (!lq)
+    {
+      lq = DEF_LQUOTE;
+      rq = DEF_RQUOTE;
+    }
+  else if (!rq || (*lq && !*rq))
+    rq = DEF_RQUOTE;
+
+  lquote.string = xstrdup (lq);
+  lquote.length = strlen (lquote.string);
+  rquote.string = xstrdup (rq);
+  rquote.length = strlen (rquote.string);
+}
+
+void
+set_comment (const char *bc, const char *ec)
+{
+  free (bcomm.string);
+  free (ecomm.string);
+
+  /* POSIX requires no arguments to disable comments.  It requires
+     empty arguments to be used as-is, but this is counter to
+     traditional behavior, because a non-null begin and null end makes
+     it impossible to end a comment.  An aardvark has been filed:
+     http://www.opengroup.org/austin/mailarchives/ag-review/msg02168.html
+     This implementation assumes the aardvark will be approved.  See
+     the texinfo for what some other implementations do.  */
+  if (!bc)
+    bc = ec = "";
+  else if (!ec || (*bc && !*ec))
+    ec = DEF_ECOMM;
+
+  bcomm.string = xstrdup (bc);
+  bcomm.length = strlen (bcomm.string);
+  ecomm.string = xstrdup (ec);
+  ecomm.length = strlen (ecomm.string);
+}
+
+#ifdef ENABLE_CHANGEWORD
+
+void
+set_word_regexp (const char *regexp)
+{
+  const char *msg;
+  struct re_pattern_buffer new_word_regexp;
+
+  if (!*regexp || STREQ (regexp, DEFAULT_WORD_REGEXP))
+    {
+      default_word_regexp = true;
+      return;
+    }
+
+  /* Dry run to see whether the new expression is compilable.  */
+  init_pattern_buffer (&new_word_regexp, NULL);
+  msg = re_compile_pattern (regexp, strlen (regexp), &new_word_regexp);
+  regfree (&new_word_regexp);
+
+  if (msg != NULL)
+    {
+      M4ERROR ((warning_status, 0,
+                "bad regular expression `%s': %s", regexp, msg));
+      return;
+    }
+
+  /* If compilation worked, retry using the word_regexp struct.  We
+     can't rely on struct assigns working, so redo the compilation.
+     The fastmap can be reused between compilations, and will be freed
+     by the final regfree.  */
+  if (!word_regexp.fastmap)
+    word_regexp.fastmap = xcharalloc (UCHAR_MAX + 1);
+  msg = re_compile_pattern (regexp, strlen (regexp), &word_regexp);
+  assert (!msg);
+  re_set_registers (&word_regexp, &regs, regs.num_regs, regs.start, regs.end);
+  if (re_compile_fastmap (&word_regexp))
+    assert (false);
+
+  default_word_regexp = false;
+}
+
+#endif /* ENABLE_CHANGEWORD */
+
+
+/*--------------------------------------------------------------------.
+| Parse and return a single token from the input stream.  A token     |
+| can either be TOKEN_EOF, if the input_stack is empty; it can be     |
+| TOKEN_STRING for a quoted string; TOKEN_WORD for something that is  |
+| a potential macro name; and TOKEN_SIMPLE for any single character   |
+| that is not a part of any of the previous types.  If LINE is not    |
+| NULL, set *LINE to the line where the token starts.                 |
+|                                                                     |
+| Next_token () return the token type, and passes back a pointer to   |
+| the token data through TD.  The token text is collected on the      |
+| obstack token_stack, which never contains more than one token text  |
+| at a time.  The storage pointed to by the fields in TD is           |
+| therefore subject to change the next time next_token () is called.  |
+`--------------------------------------------------------------------*/
+
+token_type
+next_token (token_data *td, int *line)
+{
+  int ch;
+  int quote_level;
+  token_type type;
+#ifdef ENABLE_CHANGEWORD
+  int startpos;
+  char *orig_text = NULL;
+#endif
+  const char *file;
+  int dummy;
+
+  obstack_free (&token_stack, token_bottom);
+  if (!line)
+    line = &dummy;
+
+ /* Can't consume character until after CHAR_MACRO is handled.  */
+  ch = peek_input ();
+  if (ch == CHAR_EOF)
+    {
+#ifdef DEBUG_INPUT
+      xfprintf (stderr, "next_token -> EOF\n");
+#endif
+      next_char ();
+      return TOKEN_EOF;
+    }
+  if (ch == CHAR_MACRO)
+    {
+      init_macro_token (td);
+      next_char ();
+#ifdef DEBUG_INPUT
+      xfprintf (stderr, "next_token -> MACDEF (%s)\n",
+                find_builtin_by_addr (TOKEN_DATA_FUNC (td))->name);
+#endif
+      return TOKEN_MACDEF;
+    }
+
+  next_char (); /* Consume character we already peeked at.  */
+  file = current_file;
+  *line = current_line;
+  if (MATCH (ch, bcomm.string, true))
+    {
+      obstack_grow (&token_stack, bcomm.string, bcomm.length);
+      while ((ch = next_char ()) != CHAR_EOF
+             && !MATCH (ch, ecomm.string, true))
+        obstack_1grow (&token_stack, ch);
+      if (ch != CHAR_EOF)
+        obstack_grow (&token_stack, ecomm.string, ecomm.length);
+      else
+        /* current_file changed to "" if we see CHAR_EOF, use the
+           previous value we stored earlier.  */
+        M4ERROR_AT_LINE ((EXIT_FAILURE, 0, file, *line,
+                          "ERROR: end of file in comment"));
+
+      type = TOKEN_STRING;
+    }
+  else if (default_word_regexp && (isalpha (ch) || ch == '_'))
+    {
+      obstack_1grow (&token_stack, ch);
+      while ((ch = peek_input ()) != CHAR_EOF && (isalnum (ch) || ch == '_'))
+        {
+          obstack_1grow (&token_stack, ch);
+          next_char ();
+        }
+      type = TOKEN_WORD;
+    }
+
+#ifdef ENABLE_CHANGEWORD
+
+  else if (!default_word_regexp && word_regexp.fastmap[ch])
+    {
+      obstack_1grow (&token_stack, ch);
+      while (1)
+        {
+          ch = peek_input ();
+          if (ch == CHAR_EOF)
+            break;
+          obstack_1grow (&token_stack, ch);
+          startpos = re_search (&word_regexp,
+                                (char *) obstack_base (&token_stack),
+                                obstack_object_size (&token_stack), 0, 0,
+                                &regs);
+          if (startpos ||
+              regs.end [0] != (regoff_t) obstack_object_size (&token_stack))
+            {
+              *(((char *) obstack_base (&token_stack)
+                 + obstack_object_size (&token_stack)) - 1) = '\0';
+              break;
+            }
+          next_char ();
+        }
+
+      obstack_1grow (&token_stack, '\0');
+      orig_text = (char *) obstack_finish (&token_stack);
+
+      if (regs.start[1] != -1)
+        obstack_grow (&token_stack,orig_text + regs.start[1],
+                      regs.end[1] - regs.start[1]);
+      else
+        obstack_grow (&token_stack, orig_text,regs.end[0]);
+
+      type = TOKEN_WORD;
+    }
+
+#endif /* ENABLE_CHANGEWORD */
+
+  else if (!MATCH (ch, lquote.string, true))
+    {
+      switch (ch)
+        {
+        case '(':
+          type = TOKEN_OPEN;
+          break;
+        case ',':
+          type = TOKEN_COMMA;
+          break;
+        case ')':
+          type = TOKEN_CLOSE;
+          break;
+        default:
+          type = TOKEN_SIMPLE;
+          break;
+        }
+      obstack_1grow (&token_stack, ch);
+    }
+  else
+    {
+      bool fast = lquote.length == 1 && rquote.length == 1;
+      quote_level = 1;
+      while (1)
+        {
+          /* Try scanning a buffer first.  */
+          const char *buffer = (isp && isp->type == INPUT_STRING
+                                ? isp->u.u_s.string : NULL);
+          if (buffer && *buffer)
+            {
+              size_t len = isp->u.u_s.end - buffer;
+              const char *p = buffer;
+              do
+                {
+                  p = (char *) memchr2 (p, *lquote.string, *rquote.string,
+                                        buffer + len - p);
+                }
+              while (p && fast && (*p++ == *rquote.string
+                                   ? --quote_level : ++quote_level));
+              if (p)
+                {
+                  if (fast)
+                    {
+                      assert (!quote_level);
+                      obstack_grow (&token_stack, buffer, p - buffer - 1);
+                      isp->u.u_s.string += p - buffer;
+                      break;
+                    }
+                  obstack_grow (&token_stack, buffer, p - buffer);
+                  ch = to_uchar (*p);
+                  isp->u.u_s.string += p - buffer + 1;
+                }
+              else
+                {
+                  obstack_grow (&token_stack, buffer, len);
+                  isp->u.u_s.string += len;
+                  continue;
+                }
+            }
+          /* Fall back to a byte.  */
+          else
+            ch = next_char ();
+          if (ch == CHAR_EOF)
+            /* current_file changed to "" if we see CHAR_EOF, use
+               the previous value we stored earlier.  */
+            M4ERROR_AT_LINE ((EXIT_FAILURE, 0, file, *line,
+                              "ERROR: end of file in string"));
+
+          if (MATCH (ch, rquote.string, true))
+            {
+              if (--quote_level == 0)
+                break;
+              obstack_grow (&token_stack, rquote.string, rquote.length);
+            }
+          else if (MATCH (ch, lquote.string, true))
+            {
+              quote_level++;
+              obstack_grow (&token_stack, lquote.string, lquote.length);
+            }
+          else
+            obstack_1grow (&token_stack, ch);
+        }
+      type = TOKEN_STRING;
+    }
+
+  obstack_1grow (&token_stack, '\0');
+
+  TOKEN_DATA_TYPE (td) = TOKEN_TEXT;
+  TOKEN_DATA_TEXT (td) = (char *) obstack_finish (&token_stack);
+#ifdef ENABLE_CHANGEWORD
+  if (orig_text == NULL)
+    orig_text = TOKEN_DATA_TEXT (td);
+  TOKEN_DATA_ORIG_TEXT (td) = orig_text;
+#endif
+#ifdef DEBUG_INPUT
+  xfprintf (stderr, "next_token -> %s (%s)\n",
+            token_type_string (type), TOKEN_DATA_TEXT (td));
+#endif
+  return type;
+}
+
+/*-----------------------------------------------.
+| Peek at the next token from the input stream.  |
+`-----------------------------------------------*/
+
+token_type
+peek_token (void)
+{
+  token_type result;
+  int ch = peek_input ();
+
+  if (ch == CHAR_EOF)
+    {
+      result = TOKEN_EOF;
+    }
+  else if (ch == CHAR_MACRO)
+    {
+      result = TOKEN_MACDEF;
+    }
+  else if (MATCH (ch, bcomm.string, false))
+    {
+      result = TOKEN_STRING;
+    }
+  else if ((default_word_regexp && (isalpha (ch) || ch == '_'))
+#ifdef ENABLE_CHANGEWORD
+           || (! default_word_regexp && word_regexp.fastmap[ch])
+#endif /* ENABLE_CHANGEWORD */
+           )
+    {
+      result = TOKEN_WORD;
+    }
+  else if (MATCH (ch, lquote.string, false))
+    {
+      result = TOKEN_STRING;
+    }
+  else
+    switch (ch)
+      {
+      case '(':
+        result = TOKEN_OPEN;
+        break;
+      case ',':
+        result = TOKEN_COMMA;
+        break;
+      case ')':
+        result = TOKEN_CLOSE;
+        break;
+      default:
+        result = TOKEN_SIMPLE;
+      }
+
+#ifdef DEBUG_INPUT
+  xfprintf (stderr, "peek_token -> %s\n", token_type_string (result));
+#endif /* DEBUG_INPUT */
+  return result;
+}
+
+
+#ifdef DEBUG_INPUT
+
+static const char *
+token_type_string (token_type t)
+{
+ switch (t)
+    { /* TOKSW */
+    case TOKEN_EOF:
+      return "EOF";
+    case TOKEN_STRING:
+      return "STRING";
+    case TOKEN_WORD:
+      return "WORD";
+    case TOKEN_OPEN:
+      return "OPEN";
+    case TOKEN_COMMA:
+      return "COMMA";
+    case TOKEN_CLOSE:
+      return "CLOSE";
+    case TOKEN_SIMPLE:
+      return "SIMPLE";
+    case TOKEN_MACDEF:
+      return "MACDEF";
+    default:
+      abort ();
+    }
+ }
+
+static void
+print_token (const char *s, token_type t, token_data *td)
+{
+  xfprintf (stderr, "%s: ", s);
+  switch (t)
+    { /* TOKSW */
+    case TOKEN_OPEN:
+    case TOKEN_COMMA:
+    case TOKEN_CLOSE:
+    case TOKEN_SIMPLE:
+      xfprintf (stderr, "char:");
+      break;
+
+    case TOKEN_WORD:
+      xfprintf (stderr, "word:");
+      break;
+
+    case TOKEN_STRING:
+      xfprintf (stderr, "string:");
+      break;
+
+    case TOKEN_MACDEF:
+      xfprintf (stderr, "macro: %p\n", TOKEN_DATA_FUNC (td));
+      break;
+
+    case TOKEN_EOF:
+      xfprintf (stderr, "eof\n");
+      break;
+    }
+  xfprintf (stderr, "\t\"%s\"\n", TOKEN_DATA_TEXT (td));
+}
+
+static void M4_GNUC_UNUSED
+lex_debug (void)
+{
+  token_type t;
+  token_data td;
+
+  while ((t = next_token (&td, NULL)) != TOKEN_EOF)
+    print_token ("lex", t, &td);
+}
+#endif /* DEBUG_INPUT */