summaryrefslogtreecommitdiff
path: root/gcc/java/lex.c
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/java/lex.c')
-rw-r--r--gcc/java/lex.c2073
1 files changed, 0 insertions, 2073 deletions
diff --git a/gcc/java/lex.c b/gcc/java/lex.c
deleted file mode 100644
index 730c1447fbd..00000000000
--- a/gcc/java/lex.c
+++ /dev/null
@@ -1,2073 +0,0 @@
-/* Language lexer for the GNU compiler for the Java(TM) language.
- Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005
- Free Software Foundation, Inc.
- Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com)
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
-any later version.
-
-GCC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GCC; see the file COPYING. If not, write to
-the Free Software Foundation, 51 Franklin Street, Fifth Floor,
-Boston, MA 02110-1301, USA.
-
-Java and all Java-based marks are trademarks or registered trademarks
-of Sun Microsystems, Inc. in the United States and other countries.
-The Free Software Foundation is independent of Sun Microsystems, Inc. */
-
-/* It defines java_lex (yylex) that reads a Java ASCII source file
- possibly containing Unicode escape sequence or utf8 encoded
- characters and returns a token for everything found but comments,
- white spaces and line terminators. When necessary, it also fills
- the java_lval (yylval) union. It's implemented to be called by a
- re-entrant parser generated by Bison.
-
- The lexical analysis conforms to the Java grammar described in "The
- Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele.
- Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */
-
-#include "keyword.h"
-#include "flags.h"
-#include "chartables.h"
-#ifndef JC1_LITE
-#include "timevar.h"
-#endif
-
-/* Function declarations. */
-static char *java_sprint_unicode (int);
-static void java_unicode_2_utf8 (unicode_t);
-static void java_lex_error (const char *, int);
-#ifndef JC1_LITE
-static int do_java_lex (YYSTYPE *);
-static int java_lex (YYSTYPE *);
-static int java_is_eol (FILE *, int);
-static tree build_wfl_node (tree);
-#endif
-static int java_parse_escape_sequence (void);
-static int java_start_char_p (unicode_t);
-static int java_part_char_p (unicode_t);
-static int java_space_char_p (unicode_t);
-static void java_parse_doc_section (int);
-static void java_parse_end_comment (int);
-static int java_read_char (java_lexer *);
-static int java_get_unicode (void);
-static int java_peek_unicode (void);
-static void java_next_unicode (void);
-static int java_read_unicode (java_lexer *, int *);
-#ifndef JC1_LITE
-static int utf8_cmp (const unsigned char *, int, const char *);
-#endif
-
-java_lexer *java_new_lexer (FILE *, const char *);
-#ifndef JC1_LITE
-static void error_if_numeric_overflow (tree);
-#endif
-
-#ifdef HAVE_ICONV
-/* This is nonzero if we have initialized `need_byteswap'. */
-static int byteswap_init = 0;
-
-/* Some versions of iconv() (e.g., glibc 2.1.3) will return UCS-2 in
- big-endian order -- not native endian order. We handle this by
- doing a conversion once at startup and seeing what happens. This
- flag holds the results of this determination. */
-static int need_byteswap = 0;
-#endif
-
-void
-java_init_lex (FILE *finput, const char *encoding)
-{
-#ifndef JC1_LITE
- int java_lang_imported = 0;
-
- if (!java_lang_id)
- java_lang_id = get_identifier ("java.lang");
- if (!inst_id)
- inst_id = get_identifier ("inst$");
- if (!wpv_id)
- wpv_id = get_identifier ("write_parm_value$");
-
- if (!java_lang_imported)
- {
- tree node = build_tree_list (build_unknown_wfl (java_lang_id),
- NULL_TREE);
- read_import_dir (TREE_PURPOSE (node));
- TREE_CHAIN (node) = ctxp->import_demand_list;
- ctxp->import_demand_list = node;
- java_lang_imported = 1;
- }
-
- if (!wfl_operator)
- {
-#ifndef JC1_LITE
-#ifdef USE_MAPPED_LOCATION
- wfl_operator = build_expr_wfl (NULL_TREE, input_location);
-#else
- wfl_operator = build_expr_wfl (NULL_TREE, ctxp->filename, 0, 0);
-#endif
-#endif
- }
- if (!label_id)
- label_id = get_identifier ("$L");
- if (!wfl_append)
- wfl_append = build_unknown_wfl (get_identifier ("append"));
- if (!wfl_string_buffer)
- wfl_string_buffer =
- build_unknown_wfl (get_identifier (flag_emit_class_files
- ? "java.lang.StringBuffer"
- : "gnu.gcj.runtime.StringBuffer"));
- if (!wfl_to_string)
- wfl_to_string = build_unknown_wfl (get_identifier ("toString"));
-
- CPC_INITIALIZER_LIST (ctxp) = CPC_STATIC_INITIALIZER_LIST (ctxp) =
- CPC_INSTANCE_INITIALIZER_LIST (ctxp) = NULL_TREE;
-
- memset (ctxp->modifier_ctx, 0, sizeof (ctxp->modifier_ctx));
- ctxp->current_parsed_class = NULL;
- ctxp->package = NULL_TREE;
-#endif
-
-#ifndef JC1_LITE
- ctxp->save_location = input_location;
-#endif
- ctxp->java_error_flag = 0;
- ctxp->lexer = java_new_lexer (finput, encoding);
-}
-
-static char *
-java_sprint_unicode (int c)
-{
- static char buffer [10];
- if (c < ' ' || c >= 127)
- sprintf (buffer, "\\u%04x", c);
- else
- {
- buffer [0] = c;
- buffer [1] = '\0';
- }
- return buffer;
-}
-
-/* Create a new lexer object. */
-
-java_lexer *
-java_new_lexer (FILE *finput, const char *encoding)
-{
- java_lexer *lex = XNEW (java_lexer);
- int enc_error = 0;
-
- lex->finput = finput;
- lex->bs_count = 0;
- lex->unget_value = 0;
- lex->next_unicode = 0;
- lex->avail_unicode = 0;
- lex->next_columns = 1;
- lex->encoding = encoding;
- lex->position.line = 1;
- lex->position.col = 1;
-#ifndef JC1_LITE
-#ifdef USE_MAPPED_LOCATION
- input_location
- = linemap_line_start (&line_table, 1, 120);
-#else
- input_line = 1;
-#endif
-#endif
-
-#ifdef HAVE_ICONV
- lex->handle = iconv_open ("UCS-2", encoding);
- if (lex->handle != (iconv_t) -1)
- {
- lex->first = -1;
- lex->last = -1;
- lex->out_first = -1;
- lex->out_last = -1;
- lex->read_anything = 0;
- lex->use_fallback = 0;
-
- /* Work around broken iconv() implementations by doing checking at
- runtime. We assume that if the UTF-8 => UCS-2 encoder is broken,
- then all UCS-2 encoders will be broken. Perhaps not a valid
- assumption. */
- if (! byteswap_init)
- {
- iconv_t handle;
-
- byteswap_init = 1;
-
- handle = iconv_open ("UCS-2", "UTF-8");
- if (handle != (iconv_t) -1)
- {
- unicode_t result;
- unsigned char in[3];
- char *inp, *outp;
- size_t inc, outc, r;
-
- /* This is the UTF-8 encoding of \ufeff. */
- in[0] = 0xef;
- in[1] = 0xbb;
- in[2] = 0xbf;
-
- inp = (char *) in;
- inc = 3;
- outp = (char *) &result;
- outc = 2;
-
- r = iconv (handle, (ICONV_CONST char **) &inp, &inc,
- &outp, &outc);
- iconv_close (handle);
- /* Conversion must be complete for us to use the result. */
- if (r != (size_t) -1 && inc == 0 && outc == 0)
- need_byteswap = (result != 0xfeff);
- }
- }
-
- lex->byte_swap = need_byteswap;
- }
- else
-#endif /* HAVE_ICONV */
- {
- /* If iconv failed, use the internal decoder if the default
- encoding was requested. This code is used on platforms where
- iconv exists but is insufficient for our needs. For
- instance, on Solaris 2.5 iconv cannot handle UTF-8 or UCS-2.
-
- On Solaris the default encoding, as returned by nl_langinfo(),
- is `646' (aka ASCII), but the Solaris iconv_open() doesn't
- understand that. We work around that by pretending
- `646' to be the same as UTF-8. */
- if (strcmp (encoding, DEFAULT_ENCODING) && strcmp (encoding, "646"))
- enc_error = 1;
-#ifdef HAVE_ICONV
- else
- {
- lex->use_fallback = 1;
- lex->encoding = "UTF-8";
- }
-#endif /* HAVE_ICONV */
- }
-
- if (enc_error)
- fatal_error ("unknown encoding: %qs\nThis might mean that your locale's encoding is not supported\nby your system's iconv(3) implementation. If you aren't trying\nto use a particular encoding for your input file, try the\n%<--encoding=UTF-8%> option", encoding);
-
- return lex;
-}
-
-void
-java_destroy_lexer (java_lexer *lex)
-{
-#ifdef HAVE_ICONV
- if (! lex->use_fallback)
- iconv_close (lex->handle);
-#endif
- free (lex);
-}
-
-static int
-java_read_char (java_lexer *lex)
-{
-#ifdef HAVE_ICONV
- if (! lex->use_fallback)
- {
- size_t ir, inbytesleft, in_save, out_count, out_save;
- char *inp, *outp;
- unicode_t result;
-
- /* If there is data which has already been converted, use it. */
- if (lex->out_first == -1 || lex->out_first >= lex->out_last)
- {
- lex->out_first = 0;
- lex->out_last = 0;
-
- while (1)
- {
- /* See if we need to read more data. If FIRST == 0 then
- the previous conversion attempt ended in the middle of
- a character at the end of the buffer. Otherwise we
- only have to read if the buffer is empty. */
- if (lex->first == 0 || lex->first >= lex->last)
- {
- int r;
-
- if (lex->first >= lex->last)
- {
- lex->first = 0;
- lex->last = 0;
- }
- if (feof (lex->finput))
- return UEOF;
- r = fread (&lex->buffer[lex->last], 1,
- sizeof (lex->buffer) - lex->last,
- lex->finput);
- lex->last += r;
- }
-
- inbytesleft = lex->last - lex->first;
- out_count = sizeof (lex->out_buffer) - lex->out_last;
-
- if (inbytesleft == 0)
- {
- /* We've tried to read and there is nothing left. */
- return UEOF;
- }
-
- in_save = inbytesleft;
- out_save = out_count;
- inp = &lex->buffer[lex->first];
- outp = (char *) &lex->out_buffer[lex->out_last];
- ir = iconv (lex->handle, (ICONV_CONST char **) &inp,
- &inbytesleft, &outp, &out_count);
-
- /* If we haven't read any bytes, then look to see if we
- have read a BOM. */
- if (! lex->read_anything && out_save - out_count >= 2)
- {
- unicode_t uc = * (unicode_t *) &lex->out_buffer[0];
- if (uc == 0xfeff)
- {
- lex->byte_swap = 0;
- lex->out_first += 2;
- }
- else if (uc == 0xfffe)
- {
- lex->byte_swap = 1;
- lex->out_first += 2;
- }
- lex->read_anything = 1;
- }
-
- if (lex->byte_swap)
- {
- unsigned int i;
- for (i = 0; i < out_save - out_count; i += 2)
- {
- char t = lex->out_buffer[lex->out_last + i];
- lex->out_buffer[lex->out_last + i]
- = lex->out_buffer[lex->out_last + i + 1];
- lex->out_buffer[lex->out_last + i + 1] = t;
- }
- }
-
- lex->first += in_save - inbytesleft;
- lex->out_last += out_save - out_count;
-
- /* If we converted anything at all, move along. */
- if (out_count != out_save)
- break;
-
- if (ir == (size_t) -1)
- {
- if (errno == EINVAL)
- {
- /* This is ok. This means that the end of our buffer
- is in the middle of a character sequence. We just
- move the valid part of the buffer to the beginning
- to force a read. */
- memmove (&lex->buffer[0], &lex->buffer[lex->first],
- lex->last - lex->first);
- lex->last -= lex->first;
- lex->first = 0;
- }
- else
- {
- /* A more serious error. */
- char buffer[128];
- sprintf (buffer,
- "Unrecognized character for encoding '%s'",
- lex->encoding);
- java_lex_error (buffer, 0);
- return UEOF;
- }
- }
- }
- }
-
- if (lex->out_first == -1 || lex->out_first >= lex->out_last)
- {
- /* Don't have any data. */
- return UEOF;
- }
-
- /* Success. */
- result = * ((unicode_t *) &lex->out_buffer[lex->out_first]);
- lex->out_first += 2;
- return result;
- }
- else
-#endif /* HAVE_ICONV */
- {
- int c, c1, c2;
- c = getc (lex->finput);
-
- if (c == EOF)
- return UEOF;
- if (c < 128)
- return (unicode_t) c;
- else
- {
- if ((c & 0xe0) == 0xc0)
- {
- c1 = getc (lex->finput);
- if ((c1 & 0xc0) == 0x80)
- {
- unicode_t r = (unicode_t)(((c & 0x1f) << 6) + (c1 & 0x3f));
- /* Check for valid 2-byte characters. We explicitly
- allow \0 because this encoding is common in the
- Java world. */
- if (r == 0 || (r >= 0x80 && r <= 0x7ff))
- return r;
- }
- }
- else if ((c & 0xf0) == 0xe0)
- {
- c1 = getc (lex->finput);
- if ((c1 & 0xc0) == 0x80)
- {
- c2 = getc (lex->finput);
- if ((c2 & 0xc0) == 0x80)
- {
- unicode_t r = (unicode_t)(((c & 0xf) << 12) +
- (( c1 & 0x3f) << 6)
- + (c2 & 0x3f));
- /* Check for valid 3-byte characters.
- Don't allow surrogate, \ufffe or \uffff. */
- if (IN_RANGE (r, 0x800, 0xffff)
- && ! IN_RANGE (r, 0xd800, 0xdfff)
- && r != 0xfffe && r != 0xffff)
- return r;
- }
- }
- }
-
- /* We simply don't support invalid characters. We also
- don't support 4-, 5-, or 6-byte UTF-8 sequences, as these
- cannot be valid Java characters. */
- java_lex_error ("malformed UTF-8 character", 0);
- }
- }
-
- /* We only get here on error. */
- return UEOF;
-}
-
-static int
-java_read_unicode (java_lexer *lex, int *unicode_escape_p)
-{
- int c;
-
- if (lex->unget_value)
- {
- c = lex->unget_value;
- lex->unget_value = 0;
- }
- else
- c = java_read_char (lex);
-
- *unicode_escape_p = 0;
-
- if (c != '\\')
- {
- lex->bs_count = 0;
- return c;
- }
-
- ++lex->bs_count;
- if ((lex->bs_count) % 2 == 1)
- {
- /* Odd number of \ seen. */
- c = java_read_char (lex);
- if (c == 'u')
- {
- unicode_t unicode = 0;
- int shift = 12;
-
- /* Recognize any number of `u's in \u. */
- while ((c = java_read_char (lex)) == 'u')
- ;
-
- shift = 12;
- do
- {
- if (c == UEOF)
- {
- java_lex_error ("prematurely terminated \\u sequence", 0);
- return UEOF;
- }
-
- if (hex_p (c))
- unicode |= (unicode_t)(hex_value (c) << shift);
- else
- {
- java_lex_error ("non-hex digit in \\u sequence", 0);
- break;
- }
-
- c = java_read_char (lex);
- shift -= 4;
- }
- while (shift >= 0);
-
- if (c != UEOF)
- lex->unget_value = c;
-
- lex->bs_count = 0;
- *unicode_escape_p = 1;
- return unicode;
- }
- lex->unget_value = c;
- }
- return (unicode_t) '\\';
-}
-
-/* Get the next Unicode character (post-Unicode-escape-handling).
- Move the current position to just after returned character. */
-
-static int
-java_get_unicode (void)
-{
- int next = java_peek_unicode ();
- java_next_unicode ();
- return next;
-}
-
-/* Return the next Unicode character (post-Unicode-escape-handling).
- Do not move the current position, which remains just before
- the returned character. */
-
-static int
-java_peek_unicode (void)
-{
- int unicode_escape_p;
- java_lexer *lex = ctxp->lexer;
- int next;
-
- if (lex->avail_unicode)
- return lex->next_unicode;
-
- next = java_read_unicode (lex, &unicode_escape_p);
-
- if (next == '\r')
- {
- /* We have to read ahead to see if we got \r\n.
- In that case we return a single line terminator. */
- int dummy;
- next = java_read_unicode (lex, &dummy);
- if (next != '\n' && next != UEOF)
- lex->unget_value = next;
- /* In either case we must return a newline. */
- next = '\n';
- }
-
- lex->next_unicode = next;
- lex->avail_unicode = 1;
-
- if (next == UEOF)
- {
- lex->next_columns = 0;
- return next;
- }
-
- if (next == '\n')
- {
- lex->next_columns = 1 - lex->position.col;
- }
- else if (next == '\t')
- {
- int cur_col = lex->position.col;
- lex->next_columns = ((cur_col + 7) & ~7) + 1 - cur_col;
-
- }
- else
- {
- lex->next_columns = 1;
- }
- if (unicode_escape_p)
- lex->next_columns = 6;
- return next;
-}
-
-/* Move forward one Unicode character (post-Unicode-escape-handling).
- Only allowed after java_peek_unicode. The combination java_peek_unicode
- followed by java_next_unicode is equivalent to java_get_unicode. */
-
-static void java_next_unicode (void)
-{
- struct java_lexer *lex = ctxp->lexer;
- lex->position.col += lex->next_columns;
- if (lex->next_unicode == '\n')
- {
- lex->position.line++;
-#ifndef JC1_LITE
-#ifdef USE_MAPPED_LOCATION
- input_location
- = linemap_line_start (&line_table, lex->position.line, 120);
-#else
- input_line = lex->position.line;
-#endif
-#endif
- }
- lex->avail_unicode = 0;
-}
-
-#if 0
-/* The inverse of java_next_unicode.
- Not currently used, but could be if it would be cleaner or faster.
- java_peek_unicode == java_get_unicode + java_unget_unicode.
- java_get_unicode == java_peek_unicode + java_next_unicode.
-*/
-static void java_unget_unicode ()
-{
- struct java_lexer *lex = ctxp->lexer;
- if (lex->avail_unicode)
- fatal_error ("internal error - bad unget");
- lex->avail_unicode = 1;
- lex->position.col -= lex->next_columns;
-}
-#endif
-
-/* Parse the end of a C style comment.
- * C is the first character following the '/' and '*'. */
-static void
-java_parse_end_comment (int c)
-{
- for ( ;; c = java_get_unicode ())
- {
- switch (c)
- {
- case UEOF:
- java_lex_error ("Comment not terminated at end of input", 0);
- return;
- case '*':
- switch (c = java_peek_unicode ())
- {
- case UEOF:
- java_lex_error ("Comment not terminated at end of input", 0);
- return;
- case '/':
- java_next_unicode ();
- return;
- case '*': /* Reparse only '*'. */
- ;
- }
- }
- }
-}
-
-/* Parse the documentation section. Keywords must be at the beginning
- of a documentation comment line (ignoring white space and any `*'
- character). Parsed keyword(s): @DEPRECATED. */
-
-static void
-java_parse_doc_section (int c)
-{
- int last_was_star;
-
- /* We reset this here, because only the most recent doc comment
- applies to the following declaration. */
- ctxp->deprecated = 0;
-
- /* We loop over all the lines of the comment. We'll eventually exit
- if we hit EOF prematurely, or when we see the comment
- terminator. */
- while (1)
- {
- /* These first steps need only be done if we're still looking
- for the deprecated tag. If we've already seen it, we might
- as well skip looking for it again. */
- if (! ctxp->deprecated)
- {
- /* Skip whitespace and '*'s. We must also check for the end
- of the comment here. */
- while (JAVA_WHITE_SPACE_P (c) || c == '*')
- {
- last_was_star = (c == '*');
- c = java_get_unicode ();
- if (last_was_star && c == '/')
- {
- /* We just saw the comment terminator. */
- return;
- }
- }
-
- if (c == UEOF)
- goto eof;
-
- if (c == '@')
- {
- const char *deprecated = "@deprecated";
- int i;
-
- for (i = 0; deprecated[i]; ++i)
- {
- if (c != deprecated[i])
- break;
- /* We write the code in this way, with the
- update at the end, so that after the loop
- we're left with the next character in C. */
- c = java_get_unicode ();
- }
-
- if (c == UEOF)
- goto eof;
-
- /* @deprecated must be followed by a space or newline.
- We also allow a '*' in case it appears just before
- the end of a comment. In this position only we also
- must allow any Unicode space character. */
- if (c == ' ' || c == '\n' || c == '*' || java_space_char_p (c))
- {
- if (! deprecated[i])
- ctxp->deprecated = 1;
- }
- }
- }
-
- /* We've examined the relevant content from this line. Now we
- skip the remaining characters and start over with the next
- line. We also check for end of comment here. */
- while (c != '\n' && c != UEOF)
- {
- last_was_star = (c == '*');
- c = java_get_unicode ();
- if (last_was_star && c == '/')
- return;
- }
-
- if (c == UEOF)
- goto eof;
- /* We have to advance past the \n. */
- c = java_get_unicode ();
- if (c == UEOF)
- goto eof;
- }
-
- eof:
- java_lex_error ("Comment not terminated at end of input", 0);
-}
-
-/* Return true if C is a valid start character for a Java identifier.
- This is only called if C >= 128 -- smaller values are handled
- inline. However, this function handles all values anyway. */
-static int
-java_start_char_p (unicode_t c)
-{
- unsigned int hi = c / 256;
- const char *const page = type_table[hi];
- unsigned long val = (unsigned long) page;
- int flags;
-
- if ((val & ~ LETTER_MASK) != 0)
- flags = page[c & 255];
- else
- flags = val;
-
- return flags & LETTER_START;
-}
-
-/* Return true if C is a valid part character for a Java identifier.
- This is only called if C >= 128 -- smaller values are handled
- inline. However, this function handles all values anyway. */
-static int
-java_part_char_p (unicode_t c)
-{
- unsigned int hi = c / 256;
- const char *const page = type_table[hi];
- unsigned long val = (unsigned long) page;
- int flags;
-
- if ((val & ~ LETTER_MASK) != 0)
- flags = page[c & 255];
- else
- flags = val;
-
- return flags & LETTER_PART;
-}
-
-/* Return true if C is whitespace. */
-static int
-java_space_char_p (unicode_t c)
-{
- unsigned int hi = c / 256;
- const char *const page = type_table[hi];
- unsigned long val = (unsigned long) page;
- int flags;
-
- if ((val & ~ LETTER_MASK) != 0)
- flags = page[c & 255];
- else
- flags = val;
-
- return flags & LETTER_SPACE;
-}
-
-static int
-java_parse_escape_sequence (void)
-{
- int c;
-
- switch (c = java_get_unicode ())
- {
- case 'b':
- return (unicode_t)0x8;
- case 't':
- return (unicode_t)0x9;
- case 'n':
- return (unicode_t)0xa;
- case 'f':
- return (unicode_t)0xc;
- case 'r':
- return (unicode_t)0xd;
- case '"':
- return (unicode_t)0x22;
- case '\'':
- return (unicode_t)0x27;
- case '\\':
- return (unicode_t)0x5c;
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7':
- {
- int more = 3;
- unicode_t char_lit = 0;
-
- if (c > '3')
- {
- /* According to the grammar, `\477' has a well-defined
- meaning -- it is `\47' followed by `7'. */
- --more;
- }
- char_lit = 0;
- for (;;)
- {
- char_lit = 8 * char_lit + c - '0';
- if (--more == 0)
- break;
- c = java_peek_unicode ();
- if (! RANGE (c, '0', '7'))
- break;
- java_next_unicode ();
- }
-
- return char_lit;
- }
- default:
- java_lex_error ("Invalid character in escape sequence", -1);
- return JAVA_CHAR_ERROR;
- }
-}
-
-#ifndef JC1_LITE
-#define IS_ZERO(X) REAL_VALUES_EQUAL (X, dconst0)
-
-/* Subroutine of java_lex: converts floating-point literals to tree
- nodes. LITERAL_TOKEN is the input literal, JAVA_LVAL is where to
- store the result. FFLAG indicates whether the literal was tagged
- with an 'f', indicating it is of type 'float'; NUMBER_BEGINNING
- is the line number on which to report any error. */
-
-static void java_perform_atof (YYSTYPE *, char *, int, int);
-
-static void
-java_perform_atof (YYSTYPE *java_lval, char *literal_token, int fflag,
- int number_beginning)
-{
- REAL_VALUE_TYPE value;
- tree type = (fflag ? FLOAT_TYPE_NODE : DOUBLE_TYPE_NODE);
-
- SET_REAL_VALUE_ATOF (value,
- REAL_VALUE_ATOF (literal_token, TYPE_MODE (type)));
-
- if (REAL_VALUE_ISINF (value) || REAL_VALUE_ISNAN (value))
- {
- JAVA_FLOAT_RANGE_ERROR (fflag ? "float" : "double");
- value = DCONST0;
- }
- else if (IS_ZERO (value))
- {
- /* We check to see if the value is really 0 or if we've found an
- underflow. We do this in the most primitive imaginable way. */
- int really_zero = 1;
- char *p = literal_token;
- if (*p == '-')
- ++p;
- while (*p && *p != 'e' && *p != 'E')
- {
- if (*p != '0' && *p != '.')
- {
- really_zero = 0;
- break;
- }
- ++p;
- }
- if (! really_zero)
- {
- int save_col = ctxp->lexer->position.col;
- ctxp->lexer->position.col = number_beginning;
- java_lex_error ("Floating point literal underflow", 0);
- ctxp->lexer->position.col = save_col;
- }
- }
-
- SET_LVAL_NODE (build_real (type, value));
-}
-#endif
-
-static int yylex (YYSTYPE *);
-
-static int
-#ifdef JC1_LITE
-yylex (YYSTYPE *java_lval)
-#else
-do_java_lex (YYSTYPE *java_lval)
-#endif
-{
- int c;
- char *string;
-
- /* Translation of the Unicode escape in the raw stream of Unicode
- characters. Takes care of line terminator. */
- step1:
- /* Skip white spaces: SP, TAB and FF or ULT. */
- for (;;)
- {
- c = java_peek_unicode ();
- if (c != '\n' && ! JAVA_WHITE_SPACE_P (c))
- break;
- java_next_unicode ();
- }
-
- /* Handle EOF here. */
- if (c == UEOF) /* Should probably do something here... */
- return 0;
-
-#ifndef JC1_LITE
-#ifdef USE_MAPPED_LOCATION
- LINEMAP_POSITION_FOR_COLUMN (input_location, &line_table,
- ctxp->lexer->position.col);
-#else
- ctxp->lexer->token_start = ctxp->lexer->position;
-#endif
-#endif
-
- /* Numeric literals. */
- if (JAVA_ASCII_DIGIT (c) || (c == '.'))
- {
- /* This section of code is borrowed from gcc/c-lex.c. */
-#define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2)
- int parts[TOTAL_PARTS];
- HOST_WIDE_INT high, low;
- /* End borrowed section. */
-
-#define MAX_TOKEN_LEN 256
- char literal_token [MAX_TOKEN_LEN + 1];
- int literal_index = 0, radix = 10, long_suffix = 0, overflow = 0, bytes;
- int found_hex_digits = 0, found_non_octal_digits = -1;
- int i;
-#ifndef JC1_LITE
- int number_beginning = ctxp->lexer->position.col;
- tree value;
-#endif
-
- for (i = 0; i < TOTAL_PARTS; i++)
- parts [i] = 0;
-
- if (c == '0')
- {
- java_next_unicode ();
- c = java_peek_unicode ();
- if (c == 'x' || c == 'X')
- {
- radix = 16;
- java_next_unicode ();
- c = java_peek_unicode ();
- }
- else if (JAVA_ASCII_DIGIT (c))
- {
- literal_token [literal_index++] = '0';
- radix = 8;
- }
- else if (c == '.' || c == 'e' || c =='E')
- {
- literal_token [literal_index++] = '0';
- /* Handle C during floating-point parsing. */
- }
- else
- {
- /* We have a zero literal: 0, 0{l,L}, 0{f,F}, 0{d,D}. */
- switch (c)
- {
- case 'L': case 'l':
- java_next_unicode ();
- SET_LVAL_NODE (long_zero_node);
- return (INT_LIT_TK);
- case 'f': case 'F':
- java_next_unicode ();
- SET_LVAL_NODE (float_zero_node);
- return (FP_LIT_TK);
- case 'd': case 'D':
- java_next_unicode ();
- SET_LVAL_NODE (double_zero_node);
- return (FP_LIT_TK);
- default:
- SET_LVAL_NODE (integer_zero_node);
- return (INT_LIT_TK);
- }
- }
- }
-
- /* Terminate LITERAL_TOKEN in case we bail out on large tokens. */
- literal_token [MAX_TOKEN_LEN] = '\0';
-
- /* Parse the first part of the literal, until we find something
- which is not a number. */
- while ((radix == 16 ? JAVA_ASCII_HEXDIGIT (c) : JAVA_ASCII_DIGIT (c))
- && literal_index < MAX_TOKEN_LEN)
- {
- /* We store in a string (in case it turns out to be a FP) and in
- PARTS if we have to process a integer literal. */
- int numeric = hex_value (c);
- int count;
-
- /* Remember when we find a valid hexadecimal digit. */
- if (radix == 16)
- found_hex_digits = 1;
- /* Remember when we find an invalid octal digit. */
- else if (radix == 8 && numeric >= 8 && found_non_octal_digits < 0)
- found_non_octal_digits = literal_index;
-
- literal_token [literal_index++] = c;
- /* This section of code if borrowed from gcc/c-lex.c. */
- for (count = 0; count < TOTAL_PARTS; count++)
- {
- parts[count] *= radix;
- if (count)
- {
- parts[count] += (parts[count-1] >> HOST_BITS_PER_CHAR);
- parts[count-1] &= (1 << HOST_BITS_PER_CHAR) - 1;
- }
- else
- parts[0] += numeric;
- }
- if (parts [TOTAL_PARTS-1] != 0)
- overflow = 1;
- /* End borrowed section. */
- java_next_unicode ();
- c = java_peek_unicode ();
- }
-
- /* If we have something from the FP char set but not a digit, parse
- a FP literal. */
- if (JAVA_ASCII_FPCHAR (c) && !JAVA_ASCII_DIGIT (c))
- {
- /* stage==0: seen digits only
- * stage==1: seen '.'
- * stage==2: seen 'e' or 'E'.
- * stage==3: seen '+' or '-' after 'e' or 'E'.
- * stage==4: seen type suffix ('f'/'F'/'d'/'D')
- */
- int stage = 0;
- int seen_digit = (literal_index ? 1 : 0);
- int seen_exponent = 0;
- int fflag = 0; /* 1 for {f,F}, 0 for {d,D}. FP literal are
- double unless specified. */
-
- /* It is ok if the radix is 8 because this just means we've
- seen a leading `0'. However, radix==16 is invalid. */
- if (radix == 16)
- java_lex_error ("Can't express non-decimal FP literal", 0);
- radix = 10;
-
- for (; literal_index < MAX_TOKEN_LEN;)
- {
- if (c == '.')
- {
- if (stage < 1)
- {
- stage = 1;
- literal_token [literal_index++ ] = c;
- java_next_unicode ();
- c = java_peek_unicode ();
- if (literal_index == 1 && !JAVA_ASCII_DIGIT (c))
- BUILD_OPERATOR (DOT_TK);
- }
- else
- java_lex_error ("Invalid character in FP literal", 0);
- }
-
- if ((c == 'e' || c == 'E') && literal_index < MAX_TOKEN_LEN)
- {
- if (stage < 2)
- {
- /* {E,e} must have seen at least a digit. */
- if (!seen_digit)
- java_lex_error
- ("Invalid FP literal, mantissa must have digit", 0);
- seen_digit = 0;
- seen_exponent = 1;
- stage = 2;
- literal_token [literal_index++] = c;
- java_next_unicode ();
- c = java_peek_unicode ();
- }
- else
- java_lex_error ("Invalid character in FP literal", 0);
- }
- if ( c == 'f' || c == 'F' || c == 'd' || c == 'D')
- {
- fflag = ((c == 'd') || (c == 'D')) ? 0 : 1;
- stage = 4; /* So we fall through. */
- }
-
- if ((c=='-' || c =='+') && stage == 2
- && literal_index < MAX_TOKEN_LEN)
- {
- stage = 3;
- literal_token [literal_index++] = c;
- java_next_unicode ();
- c = java_peek_unicode ();
- }
-
- if (((stage == 0 && JAVA_ASCII_FPCHAR (c))
- || (stage == 1 && JAVA_ASCII_FPCHAR (c) && !(c == '.'))
- || (stage == 2 && (JAVA_ASCII_DIGIT (c) || JAVA_FP_PM (c)))
- || (stage == 3 && JAVA_ASCII_DIGIT (c)))
- && literal_index < MAX_TOKEN_LEN)
- {
- if (JAVA_ASCII_DIGIT (c))
- seen_digit = 1;
- if (stage == 2)
- stage = 3;
- literal_token [literal_index++ ] = c;
- java_next_unicode ();
- c = java_peek_unicode ();
- }
- else if (literal_index < MAX_TOKEN_LEN)
- {
- if (stage == 4) /* Don't push back fF/dD. */
- java_next_unicode ();
-
- /* An exponent (if any) must have seen a digit. */
- if (seen_exponent && !seen_digit)
- java_lex_error
- ("Invalid FP literal, exponent must have digit", 0);
-
- literal_token [literal_index] = '\0';
-
-#ifndef JC1_LITE
- java_perform_atof (java_lval, literal_token,
- fflag, number_beginning);
-#endif
- return FP_LIT_TK;
- }
- }
- } /* JAVA_ASCII_FPCHAR (c) */
-
- /* Here we get back to converting the integral literal. */
- if (radix == 16 && ! found_hex_digits)
- java_lex_error
- ("0x must be followed by at least one hexadecimal digit", 0);
- else if (radix == 8 && found_non_octal_digits >= 0)
- {
- int back = literal_index - found_non_octal_digits;
- ctxp->lexer->position.col -= back;
- java_lex_error ("Octal literal contains digit out of range", 0);
- ctxp->lexer->position.col += back;
- }
- else if (c == 'L' || c == 'l')
- {
- java_next_unicode ();
- long_suffix = 1;
- }
-
- /* This section of code is borrowed from gcc/c-lex.c. */
- if (!overflow)
- {
- bytes = GET_TYPE_PRECISION (long_type_node);
- for (i = bytes; i < TOTAL_PARTS; i++)
- if (parts [i])
- {
- overflow = 1;
- break;
- }
- }
- high = low = 0;
- for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
- {
- high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
- / HOST_BITS_PER_CHAR)]
- << (i * HOST_BITS_PER_CHAR));
- low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
- }
- /* End borrowed section. */
-
-#ifndef JC1_LITE
- /* Range checking. */
- /* Temporarily set type to unsigned. */
- value = build_int_cst_wide (long_suffix
- ? unsigned_long_type_node
- : unsigned_int_type_node, low, high);
- SET_LVAL_NODE (value);
-
- /* For base 10 numbers, only values up to the highest value
- (plus one) can be written. For instance, only ints up to
- 2147483648 can be written. The special case of the largest
- negative value is handled elsewhere. For other bases, any
- number can be represented. */
- if (overflow || (radix == 10
- && tree_int_cst_lt (long_suffix
- ? decimal_long_max
- : decimal_int_max,
- value)))
- {
- if (long_suffix)
- JAVA_RANGE_ERROR ("Numeric overflow for 'long' literal");
- else
- JAVA_RANGE_ERROR ("Numeric overflow for 'int' literal");
- }
-
- /* Sign extend the value. */
- value = build_int_cst_wide_type (long_suffix ? long_type_node
- : int_type_node, low, high);
-
- if (radix != 10)
- {
- value = copy_node (value);
- JAVA_NOT_RADIX10_FLAG (value) = 1;
- }
-
- SET_LVAL_NODE (value);
-#endif
- return INT_LIT_TK;
- }
-
- /* We may have an ID here. */
- if (JAVA_START_CHAR_P (c))
- {
- int ascii_index = 0, all_ascii = 1;
-
- /* Keyword, boolean literal or null literal. */
- while (c != UEOF && JAVA_PART_CHAR_P (c))
- {
- java_unicode_2_utf8 (c);
- if (c >= 128)
- all_ascii = 0;
- java_next_unicode ();
- ascii_index++;
- c = java_peek_unicode ();
- }
-
- obstack_1grow (&temporary_obstack, '\0');
- string = obstack_finish (&temporary_obstack);
-
- /* If we have something all ascii, we consider a keyword, a boolean
- literal, a null literal or an all ASCII identifier. Otherwise,
- this is an identifier (possibly not respecting formation rule). */
- if (all_ascii)
- {
- const struct java_keyword *kw;
- if ((kw=java_keyword (string, ascii_index)))
- {
- switch (kw->token)
- {
- case PUBLIC_TK: case PROTECTED_TK: case STATIC_TK:
- case ABSTRACT_TK: case FINAL_TK: case NATIVE_TK:
- case SYNCHRONIZED_TK: case TRANSIENT_TK: case VOLATILE_TK:
- case PRIVATE_TK: case STRICT_TK:
- SET_MODIFIER_CTX (kw->token);
- return MODIFIER_TK;
- case FLOAT_TK:
- SET_LVAL_NODE (float_type_node);
- return FP_TK;
- case DOUBLE_TK:
- SET_LVAL_NODE (double_type_node);
- return FP_TK;
- case BOOLEAN_TK:
- SET_LVAL_NODE (boolean_type_node);
- return BOOLEAN_TK;
- case BYTE_TK:
- SET_LVAL_NODE (byte_type_node);
- return INTEGRAL_TK;
- case SHORT_TK:
- SET_LVAL_NODE (short_type_node);
- return INTEGRAL_TK;
- case INT_TK:
- SET_LVAL_NODE (int_type_node);
- return INTEGRAL_TK;
- case LONG_TK:
- SET_LVAL_NODE (long_type_node);
- return INTEGRAL_TK;
- case CHAR_TK:
- SET_LVAL_NODE (char_type_node);
- return INTEGRAL_TK;
-
- /* Keyword based literals. */
- case TRUE_TK:
- case FALSE_TK:
- SET_LVAL_NODE ((kw->token == TRUE_TK ?
- boolean_true_node : boolean_false_node));
- return BOOL_LIT_TK;
- case NULL_TK:
- SET_LVAL_NODE (null_pointer_node);
- return NULL_TK;
-
- case ASSERT_TK:
- if (flag_assert)
- {
- BUILD_OPERATOR (kw->token);
- return kw->token;
- }
- else
- break;
-
- /* Some keyword we want to retain information on the location
- they where found. */
- case CASE_TK:
- case DEFAULT_TK:
- case SUPER_TK:
- case THIS_TK:
- case RETURN_TK:
- case BREAK_TK:
- case CONTINUE_TK:
- case TRY_TK:
- case CATCH_TK:
- case THROW_TK:
- case INSTANCEOF_TK:
- BUILD_OPERATOR (kw->token);
-
- default:
- return kw->token;
- }
- }
- }
-
- java_lval->node = BUILD_ID_WFL (GET_IDENTIFIER (string));
- return ID_TK;
- }
-
- java_next_unicode ();
-
- /* Character literals. */
- if (c == '\'')
- {
- int char_lit;
-
- if ((c = java_get_unicode ()) == '\\')
- char_lit = java_parse_escape_sequence ();
- else
- {
- if (c == '\n' || c == '\'')
- java_lex_error ("Invalid character literal", 0);
- char_lit = c;
- }
-
- c = java_get_unicode ();
-
- if ((c == '\n') || (c == UEOF))
- java_lex_error ("Character literal not terminated at end of line", 0);
- if (c != '\'')
- java_lex_error ("Syntax error in character literal", 0);
-
- if (char_lit == JAVA_CHAR_ERROR)
- char_lit = 0; /* We silently convert it to zero. */
-
- SET_LVAL_NODE (build_int_cst (char_type_node, char_lit));
- return CHAR_LIT_TK;
- }
-
- /* String literals. */
- if (c == '"')
- {
- int no_error = 1;
- char *string;
-
- for (;;)
- {
- c = java_peek_unicode ();
- if (c == '\n' || c == UEOF) /* ULT. */
- {
- java_lex_error ("String not terminated at end of line", 0);
- break;
- }
- java_next_unicode ();
- if (c == '"')
- break;
- if (c == '\\')
- c = java_parse_escape_sequence ();
- if (c == JAVA_CHAR_ERROR)
- {
- no_error = 0;
- c = 0; /* We silently convert it to zero. */
- }
- java_unicode_2_utf8 (c);
- }
-
- obstack_1grow (&temporary_obstack, '\0');
- string = obstack_finish (&temporary_obstack);
-#ifndef JC1_LITE
- if (!no_error || (c != '"'))
- java_lval->node = error_mark_node; /* FIXME: Requires further
- testing. */
- else
- java_lval->node = build_string (strlen (string), string);
-#endif
- obstack_free (&temporary_obstack, string);
- return STRING_LIT_TK;
- }
-
- switch (c)
- {
- case '/':
- /* Check for comment. */
- switch (c = java_peek_unicode ())
- {
- case '/':
- java_next_unicode ();
- for (;;)
- {
- c = java_get_unicode ();
- if (c == UEOF)
- {
- /* It is ok to end a `//' comment with EOF, unless
- we're being pedantic. */
- if (pedantic)
- java_lex_error ("Comment not terminated at end of input",
- 0);
- return 0;
- }
- if (c == '\n') /* ULT */
- goto step1;
- }
- break;
-
- case '*':
- java_next_unicode ();
- if ((c = java_get_unicode ()) == '*')
- {
- c = java_get_unicode ();
- if (c == '/')
- {
- /* Empty documentation comment. We have to reset
- the deprecation marker as only the most recent
- doc comment applies. */
- ctxp->deprecated = 0;
- }
- else
- java_parse_doc_section (c);
- }
- else
- java_parse_end_comment ((c = java_get_unicode ()));
- goto step1;
- break;
-
- case '=':
- java_next_unicode ();
- BUILD_OPERATOR2 (DIV_ASSIGN_TK);
-
- default:
- BUILD_OPERATOR (DIV_TK);
- }
-
- case '(':
- BUILD_OPERATOR (OP_TK);
- case ')':
- return CP_TK;
- case '{':
-#ifndef JC1_LITE
- java_lval->operator.token = OCB_TK;
- java_lval->operator.location = BUILD_LOCATION();
-#ifdef USE_MAPPED_LOCATION
- if (ctxp->ccb_indent == 1)
- ctxp->first_ccb_indent1 = input_location;
-#else
- if (ctxp->ccb_indent == 1)
- ctxp->first_ccb_indent1 = input_line;
-#endif
-#endif
- ctxp->ccb_indent++;
- return OCB_TK;
- case '}':
- ctxp->ccb_indent--;
-#ifndef JC1_LITE
- java_lval->operator.token = CCB_TK;
- java_lval->operator.location = BUILD_LOCATION();
-#ifdef USE_MAPPED_LOCATION
- if (ctxp->ccb_indent == 1)
- ctxp->last_ccb_indent1 = input_location;
-#else
- if (ctxp->ccb_indent == 1)
- ctxp->last_ccb_indent1 = input_line;
-#endif
-#endif
- return CCB_TK;
- case '[':
- BUILD_OPERATOR (OSB_TK);
- case ']':
- return CSB_TK;
- case ';':
- return SC_TK;
- case ',':
- return C_TK;
- case '.':
- BUILD_OPERATOR (DOT_TK);
-
- /* Operators. */
- case '=':
- c = java_peek_unicode ();
- if (c == '=')
- {
- java_next_unicode ();
- BUILD_OPERATOR (EQ_TK);
- }
- else
- {
- /* Equals is used in two different locations. In the
- variable_declarator: rule, it has to be seen as '=' as opposed
- to being seen as an ordinary assignment operator in
- assignment_operators: rule. */
- BUILD_OPERATOR (ASSIGN_TK);
- }
-
- case '>':
- switch ((c = java_peek_unicode ()))
- {
- case '=':
- java_next_unicode ();
- BUILD_OPERATOR (GTE_TK);
- case '>':
- java_next_unicode ();
- switch ((c = java_peek_unicode ()))
- {
- case '>':
- java_next_unicode ();
- c = java_peek_unicode ();
- if (c == '=')
- {
- java_next_unicode ();
- BUILD_OPERATOR2 (ZRS_ASSIGN_TK);
- }
- else
- {
- BUILD_OPERATOR (ZRS_TK);
- }
- case '=':
- java_next_unicode ();
- BUILD_OPERATOR2 (SRS_ASSIGN_TK);
- default:
- BUILD_OPERATOR (SRS_TK);
- }
- default:
- BUILD_OPERATOR (GT_TK);
- }
-
- case '<':
- switch ((c = java_peek_unicode ()))
- {
- case '=':
- java_next_unicode ();
- BUILD_OPERATOR (LTE_TK);
- case '<':
- java_next_unicode ();
- if ((c = java_peek_unicode ()) == '=')
- {
- java_next_unicode ();
- BUILD_OPERATOR2 (LS_ASSIGN_TK);
- }
- else
- {
- BUILD_OPERATOR (LS_TK);
- }
- default:
- BUILD_OPERATOR (LT_TK);
- }
-
- case '&':
- switch ((c = java_peek_unicode ()))
- {
- case '&':
- java_next_unicode ();
- BUILD_OPERATOR (BOOL_AND_TK);
- case '=':
- java_next_unicode ();
- BUILD_OPERATOR2 (AND_ASSIGN_TK);
- default:
- BUILD_OPERATOR (AND_TK);
- }
-
- case '|':
- switch ((c = java_peek_unicode ()))
- {
- case '|':
- java_next_unicode ();
- BUILD_OPERATOR (BOOL_OR_TK);
- case '=':
- java_next_unicode ();
- BUILD_OPERATOR2 (OR_ASSIGN_TK);
- default:
- BUILD_OPERATOR (OR_TK);
- }
-
- case '+':
- switch ((c = java_peek_unicode ()))
- {
- case '+':
- java_next_unicode ();
- BUILD_OPERATOR (INCR_TK);
- case '=':
- java_next_unicode ();
- BUILD_OPERATOR2 (PLUS_ASSIGN_TK);
- default:
- BUILD_OPERATOR (PLUS_TK);
- }
-
- case '-':
- switch ((c = java_peek_unicode ()))
- {
- case '-':
- java_next_unicode ();
- BUILD_OPERATOR (DECR_TK);
- case '=':
- java_next_unicode ();
- BUILD_OPERATOR2 (MINUS_ASSIGN_TK);
- default:
- BUILD_OPERATOR (MINUS_TK);
- }
-
- case '*':
- if ((c = java_peek_unicode ()) == '=')
- {
- java_next_unicode ();
- BUILD_OPERATOR2 (MULT_ASSIGN_TK);
- }
- else
- {
- BUILD_OPERATOR (MULT_TK);
- }
-
- case '^':
- if ((c = java_peek_unicode ()) == '=')
- {
- java_next_unicode ();
- BUILD_OPERATOR2 (XOR_ASSIGN_TK);
- }
- else
- {
- BUILD_OPERATOR (XOR_TK);
- }
-
- case '%':
- if ((c = java_peek_unicode ()) == '=')
- {
- java_next_unicode ();
- BUILD_OPERATOR2 (REM_ASSIGN_TK);
- }
- else
- {
- BUILD_OPERATOR (REM_TK);
- }
-
- case '!':
- if ((c = java_peek_unicode()) == '=')
- {
- java_next_unicode ();
- BUILD_OPERATOR (NEQ_TK);
- }
- else
- {
- BUILD_OPERATOR (NEG_TK);
- }
-
- case '?':
- BUILD_OPERATOR (REL_QM_TK);
- case ':':
- BUILD_OPERATOR (REL_CL_TK);
- case '~':
- BUILD_OPERATOR (NOT_TK);
- }
-
- if (c == 0x1a) /* CTRL-Z. */
- {
- if ((c = java_peek_unicode ()) == UEOF)
- return 0; /* Ok here. */
- }
-
- /* Everything else is an invalid character in the input. */
- {
- char lex_error_buffer [128];
- sprintf (lex_error_buffer, "Invalid character '%s' in input",
- java_sprint_unicode (c));
- java_lex_error (lex_error_buffer, -1);
- }
- return 0;
-}
-
-#ifndef JC1_LITE
-
-/* The exported interface to the lexer. */
-static int
-java_lex (YYSTYPE *java_lval)
-{
- int r;
-
- timevar_push (TV_LEX);
- r = do_java_lex (java_lval);
- timevar_pop (TV_LEX);
- return r;
-}
-
-/* This is called by the parser to see if an error should be generated
- due to numeric overflow. This function only handles the particular
- case of the largest negative value, and is only called in the case
- where this value is not preceded by `-'. */
-static void
-error_if_numeric_overflow (tree value)
-{
- if (TREE_CODE (value) == INTEGER_CST
- && !JAVA_NOT_RADIX10_FLAG (value)
- && tree_int_cst_sgn (value) < 0)
- {
- if (TREE_TYPE (value) == long_type_node)
- java_lex_error ("Numeric overflow for 'long' literal", 0);
- else
- java_lex_error ("Numeric overflow for 'int' literal", 0);
- }
-}
-
-#endif /* JC1_LITE */
-
-static void
-java_unicode_2_utf8 (unicode_t unicode)
-{
- if (RANGE (unicode, 0x01, 0x7f))
- obstack_1grow (&temporary_obstack, (char)unicode);
- else if (RANGE (unicode, 0x80, 0x7ff) || unicode == 0)
- {
- obstack_1grow (&temporary_obstack,
- (unsigned char)(0xc0 | ((0x7c0 & unicode) >> 6)));
- obstack_1grow (&temporary_obstack,
- (unsigned char)(0x80 | (unicode & 0x3f)));
- }
- else /* Range 0x800-0xffff. */
- {
- obstack_1grow (&temporary_obstack,
- (unsigned char)(0xe0 | (unicode & 0xf000) >> 12));
- obstack_1grow (&temporary_obstack,
- (unsigned char)(0x80 | (unicode & 0x0fc0) >> 6));
- obstack_1grow (&temporary_obstack,
- (unsigned char)(0x80 | (unicode & 0x003f)));
- }
-}
-
-#ifndef JC1_LITE
-static tree
-build_wfl_node (tree node)
-{
-#ifdef USE_MAPPED_LOCATION
- node = build_expr_wfl (node, input_location);
-#else
- node = build_expr_wfl (node, ctxp->filename,
- ctxp->lexer->token_start.line,
- ctxp->lexer->token_start.col);
-#endif
- /* Prevent java_complete_lhs from short-circuiting node (if constant). */
- TREE_TYPE (node) = NULL_TREE;
- return node;
-}
-#endif
-
-static void
-java_lex_error (const char *msg ATTRIBUTE_UNUSED, int forward ATTRIBUTE_UNUSED)
-{
-#ifndef JC1_LITE
- int col = (ctxp->lexer->position.col
- + forward * ctxp->lexer->next_columns);
-#if USE_MAPPED_LOCATION
- source_location save_location = input_location;
- LINEMAP_POSITION_FOR_COLUMN (input_location, &line_table, col);
-
- /* Might be caught in the middle of some error report. */
- ctxp->java_error_flag = 0;
- java_error (NULL);
- java_error (msg);
- input_location = save_location;
-#else
- java_lc save = ctxp->lexer->token_start;
- ctxp->lexer->token_start.line = ctxp->lexer->position.line;
- ctxp->lexer->token_start.col = col;
-
- /* Might be caught in the middle of some error report. */
- ctxp->java_error_flag = 0;
- java_error (NULL);
- java_error (msg);
- ctxp->lexer->token_start = save;
-#endif
-#endif
-}
-
-#ifndef JC1_LITE
-static int
-java_is_eol (FILE *fp, int c)
-{
- int next;
- switch (c)
- {
- case '\r':
- next = getc (fp);
- if (next != '\n' && next != EOF)
- ungetc (next, fp);
- return 1;
- case '\n':
- return 1;
- default:
- return 0;
- }
-}
-#endif
-
-char *
-java_get_line_col (const char *filename ATTRIBUTE_UNUSED,
- int line ATTRIBUTE_UNUSED, int col ATTRIBUTE_UNUSED)
-{
-#ifdef JC1_LITE
- return 0;
-#else
- /* Dumb implementation. Doesn't try to cache or optimize things. */
- /* First line of the file is line 1, first column is 1. */
-
- /* COL == -1 means, at the CR/LF in LINE. */
- /* COL == -2 means, at the first non space char in LINE. */
-
- FILE *fp;
- int c, ccol, cline = 1;
- int current_line_col = 0;
- int first_non_space = 0;
- char *base;
-
- if (!(fp = fopen (filename, "r")))
- fatal_error ("can't open %s: %m", filename);
-
- while (cline != line)
- {
- c = getc (fp);
- if (c == EOF)
- {
- static const char msg[] = "<<file too short - unexpected EOF>>";
- obstack_grow (&temporary_obstack, msg, sizeof(msg)-1);
- goto have_line;
- }
- if (java_is_eol (fp, c))
- cline++;
- }
-
- /* Gather the chars of the current line in a buffer. */
- for (;;)
- {
- c = getc (fp);
- if (c < 0 || java_is_eol (fp, c))
- break;
- if (!first_non_space && !JAVA_WHITE_SPACE_P (c))
- first_non_space = current_line_col;
- obstack_1grow (&temporary_obstack, c);
- current_line_col++;
- }
- have_line:
-
- obstack_1grow (&temporary_obstack, '\n');
-
- if (col == -1)
- {
- col = current_line_col;
- first_non_space = 0;
- }
- else if (col == -2)
- col = first_non_space;
- else
- first_non_space = 0;
-
- /* Place the '^' a the right position. */
- base = obstack_base (&temporary_obstack);
- for (col += 2, ccol = 0; ccol < col; ccol++)
- {
- /* Compute \t when reaching first_non_space. */
- char c = (first_non_space ?
- (base [ccol] == '\t' ? '\t' : ' ') : ' ');
- obstack_1grow (&temporary_obstack, c);
- }
- obstack_grow0 (&temporary_obstack, "^", 1);
-
- fclose (fp);
- return obstack_finish (&temporary_obstack);
-#endif
-}
-
-#ifndef JC1_LITE
-static int
-utf8_cmp (const unsigned char *str, int length, const char *name)
-{
- const unsigned char *limit = str + length;
- int i;
-
- for (i = 0; name[i]; ++i)
- {
- int ch = UTF8_GET (str, limit);
- if (ch != name[i])
- return ch - name[i];
- }
-
- return str == limit ? 0 : 1;
-}
-
-/* A sorted list of all C++ keywords. */
-
-static const char *const cxx_keywords[] =
-{
- "_Complex",
- "__alignof",
- "__alignof__",
- "__asm",
- "__asm__",
- "__attribute",
- "__attribute__",
- "__builtin_va_arg",
- "__complex",
- "__complex__",
- "__const",
- "__const__",
- "__extension__",
- "__imag",
- "__imag__",
- "__inline",
- "__inline__",
- "__label__",
- "__null",
- "__real",
- "__real__",
- "__restrict",
- "__restrict__",
- "__signed",
- "__signed__",
- "__typeof",
- "__typeof__",
- "__volatile",
- "__volatile__",
- "and",
- "and_eq",
- "asm",
- "auto",
- "bitand",
- "bitor",
- "bool",
- "break",
- "case",
- "catch",
- "char",
- "class",
- "compl",
- "const",
- "const_cast",
- "continue",
- "default",
- "delete",
- "do",
- "double",
- "dynamic_cast",
- "else",
- "enum",
- "explicit",
- "export",
- "extern",
- "false",
- "float",
- "for",
- "friend",
- "goto",
- "if",
- "inline",
- "int",
- "long",
- "mutable",
- "namespace",
- "new",
- "not",
- "not_eq",
- "operator",
- "or",
- "or_eq",
- "private",
- "protected",
- "public",
- "register",
- "reinterpret_cast",
- "return",
- "short",
- "signed",
- "sizeof",
- "static",
- "static_cast",
- "struct",
- "switch",
- "template",
- "this",
- "throw",
- "true",
- "try",
- "typedef",
- "typeid",
- "typename",
- "typeof",
- "union",
- "unsigned",
- "using",
- "virtual",
- "void",
- "volatile",
- "wchar_t",
- "while",
- "xor",
- "xor_eq"
-};
-
-/* Return true if NAME is a C++ keyword. */
-
-int
-cxx_keyword_p (const char *name, int length)
-{
- int last = ARRAY_SIZE (cxx_keywords);
- int first = 0;
- int mid = (last + first) / 2;
- int old = -1;
-
- for (mid = (last + first) / 2;
- mid != old;
- old = mid, mid = (last + first) / 2)
- {
- int kwl = strlen (cxx_keywords[mid]);
- int min_length = kwl > length ? length : kwl;
- int r = utf8_cmp ((const unsigned char *) name, min_length, cxx_keywords[mid]);
-
- if (r == 0)
- {
- int i;
- /* We've found a match if all the remaining characters are `$'. */
- for (i = min_length; i < length && name[i] == '$'; ++i)
- ;
- if (i == length)
- return 1;
- r = 1;
- }
-
- if (r < 0)
- last = mid;
- else
- first = mid;
- }
- return 0;
-}
-#endif /* JC1_LITE */