diff options
Diffstat (limited to 'gcc/java/lex.c')
-rw-r--r-- | gcc/java/lex.c | 2073 |
1 files changed, 0 insertions, 2073 deletions
diff --git a/gcc/java/lex.c b/gcc/java/lex.c deleted file mode 100644 index 730c1447fbd..00000000000 --- a/gcc/java/lex.c +++ /dev/null @@ -1,2073 +0,0 @@ -/* Language lexer for the GNU compiler for the Java(TM) language. - Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005 - Free Software Foundation, Inc. - Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com) - -This file is part of GCC. - -GCC is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2, or (at your option) -any later version. - -GCC is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with GCC; see the file COPYING. If not, write to -the Free Software Foundation, 51 Franklin Street, Fifth Floor, -Boston, MA 02110-1301, USA. - -Java and all Java-based marks are trademarks or registered trademarks -of Sun Microsystems, Inc. in the United States and other countries. -The Free Software Foundation is independent of Sun Microsystems, Inc. */ - -/* It defines java_lex (yylex) that reads a Java ASCII source file - possibly containing Unicode escape sequence or utf8 encoded - characters and returns a token for everything found but comments, - white spaces and line terminators. When necessary, it also fills - the java_lval (yylval) union. It's implemented to be called by a - re-entrant parser generated by Bison. - - The lexical analysis conforms to the Java grammar described in "The - Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele. - Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */ - -#include "keyword.h" -#include "flags.h" -#include "chartables.h" -#ifndef JC1_LITE -#include "timevar.h" -#endif - -/* Function declarations. */ -static char *java_sprint_unicode (int); -static void java_unicode_2_utf8 (unicode_t); -static void java_lex_error (const char *, int); -#ifndef JC1_LITE -static int do_java_lex (YYSTYPE *); -static int java_lex (YYSTYPE *); -static int java_is_eol (FILE *, int); -static tree build_wfl_node (tree); -#endif -static int java_parse_escape_sequence (void); -static int java_start_char_p (unicode_t); -static int java_part_char_p (unicode_t); -static int java_space_char_p (unicode_t); -static void java_parse_doc_section (int); -static void java_parse_end_comment (int); -static int java_read_char (java_lexer *); -static int java_get_unicode (void); -static int java_peek_unicode (void); -static void java_next_unicode (void); -static int java_read_unicode (java_lexer *, int *); -#ifndef JC1_LITE -static int utf8_cmp (const unsigned char *, int, const char *); -#endif - -java_lexer *java_new_lexer (FILE *, const char *); -#ifndef JC1_LITE -static void error_if_numeric_overflow (tree); -#endif - -#ifdef HAVE_ICONV -/* This is nonzero if we have initialized `need_byteswap'. */ -static int byteswap_init = 0; - -/* Some versions of iconv() (e.g., glibc 2.1.3) will return UCS-2 in - big-endian order -- not native endian order. We handle this by - doing a conversion once at startup and seeing what happens. This - flag holds the results of this determination. */ -static int need_byteswap = 0; -#endif - -void -java_init_lex (FILE *finput, const char *encoding) -{ -#ifndef JC1_LITE - int java_lang_imported = 0; - - if (!java_lang_id) - java_lang_id = get_identifier ("java.lang"); - if (!inst_id) - inst_id = get_identifier ("inst$"); - if (!wpv_id) - wpv_id = get_identifier ("write_parm_value$"); - - if (!java_lang_imported) - { - tree node = build_tree_list (build_unknown_wfl (java_lang_id), - NULL_TREE); - read_import_dir (TREE_PURPOSE (node)); - TREE_CHAIN (node) = ctxp->import_demand_list; - ctxp->import_demand_list = node; - java_lang_imported = 1; - } - - if (!wfl_operator) - { -#ifndef JC1_LITE -#ifdef USE_MAPPED_LOCATION - wfl_operator = build_expr_wfl (NULL_TREE, input_location); -#else - wfl_operator = build_expr_wfl (NULL_TREE, ctxp->filename, 0, 0); -#endif -#endif - } - if (!label_id) - label_id = get_identifier ("$L"); - if (!wfl_append) - wfl_append = build_unknown_wfl (get_identifier ("append")); - if (!wfl_string_buffer) - wfl_string_buffer = - build_unknown_wfl (get_identifier (flag_emit_class_files - ? "java.lang.StringBuffer" - : "gnu.gcj.runtime.StringBuffer")); - if (!wfl_to_string) - wfl_to_string = build_unknown_wfl (get_identifier ("toString")); - - CPC_INITIALIZER_LIST (ctxp) = CPC_STATIC_INITIALIZER_LIST (ctxp) = - CPC_INSTANCE_INITIALIZER_LIST (ctxp) = NULL_TREE; - - memset (ctxp->modifier_ctx, 0, sizeof (ctxp->modifier_ctx)); - ctxp->current_parsed_class = NULL; - ctxp->package = NULL_TREE; -#endif - -#ifndef JC1_LITE - ctxp->save_location = input_location; -#endif - ctxp->java_error_flag = 0; - ctxp->lexer = java_new_lexer (finput, encoding); -} - -static char * -java_sprint_unicode (int c) -{ - static char buffer [10]; - if (c < ' ' || c >= 127) - sprintf (buffer, "\\u%04x", c); - else - { - buffer [0] = c; - buffer [1] = '\0'; - } - return buffer; -} - -/* Create a new lexer object. */ - -java_lexer * -java_new_lexer (FILE *finput, const char *encoding) -{ - java_lexer *lex = XNEW (java_lexer); - int enc_error = 0; - - lex->finput = finput; - lex->bs_count = 0; - lex->unget_value = 0; - lex->next_unicode = 0; - lex->avail_unicode = 0; - lex->next_columns = 1; - lex->encoding = encoding; - lex->position.line = 1; - lex->position.col = 1; -#ifndef JC1_LITE -#ifdef USE_MAPPED_LOCATION - input_location - = linemap_line_start (&line_table, 1, 120); -#else - input_line = 1; -#endif -#endif - -#ifdef HAVE_ICONV - lex->handle = iconv_open ("UCS-2", encoding); - if (lex->handle != (iconv_t) -1) - { - lex->first = -1; - lex->last = -1; - lex->out_first = -1; - lex->out_last = -1; - lex->read_anything = 0; - lex->use_fallback = 0; - - /* Work around broken iconv() implementations by doing checking at - runtime. We assume that if the UTF-8 => UCS-2 encoder is broken, - then all UCS-2 encoders will be broken. Perhaps not a valid - assumption. */ - if (! byteswap_init) - { - iconv_t handle; - - byteswap_init = 1; - - handle = iconv_open ("UCS-2", "UTF-8"); - if (handle != (iconv_t) -1) - { - unicode_t result; - unsigned char in[3]; - char *inp, *outp; - size_t inc, outc, r; - - /* This is the UTF-8 encoding of \ufeff. */ - in[0] = 0xef; - in[1] = 0xbb; - in[2] = 0xbf; - - inp = (char *) in; - inc = 3; - outp = (char *) &result; - outc = 2; - - r = iconv (handle, (ICONV_CONST char **) &inp, &inc, - &outp, &outc); - iconv_close (handle); - /* Conversion must be complete for us to use the result. */ - if (r != (size_t) -1 && inc == 0 && outc == 0) - need_byteswap = (result != 0xfeff); - } - } - - lex->byte_swap = need_byteswap; - } - else -#endif /* HAVE_ICONV */ - { - /* If iconv failed, use the internal decoder if the default - encoding was requested. This code is used on platforms where - iconv exists but is insufficient for our needs. For - instance, on Solaris 2.5 iconv cannot handle UTF-8 or UCS-2. - - On Solaris the default encoding, as returned by nl_langinfo(), - is `646' (aka ASCII), but the Solaris iconv_open() doesn't - understand that. We work around that by pretending - `646' to be the same as UTF-8. */ - if (strcmp (encoding, DEFAULT_ENCODING) && strcmp (encoding, "646")) - enc_error = 1; -#ifdef HAVE_ICONV - else - { - lex->use_fallback = 1; - lex->encoding = "UTF-8"; - } -#endif /* HAVE_ICONV */ - } - - if (enc_error) - fatal_error ("unknown encoding: %qs\nThis might mean that your locale's encoding is not supported\nby your system's iconv(3) implementation. If you aren't trying\nto use a particular encoding for your input file, try the\n%<--encoding=UTF-8%> option", encoding); - - return lex; -} - -void -java_destroy_lexer (java_lexer *lex) -{ -#ifdef HAVE_ICONV - if (! lex->use_fallback) - iconv_close (lex->handle); -#endif - free (lex); -} - -static int -java_read_char (java_lexer *lex) -{ -#ifdef HAVE_ICONV - if (! lex->use_fallback) - { - size_t ir, inbytesleft, in_save, out_count, out_save; - char *inp, *outp; - unicode_t result; - - /* If there is data which has already been converted, use it. */ - if (lex->out_first == -1 || lex->out_first >= lex->out_last) - { - lex->out_first = 0; - lex->out_last = 0; - - while (1) - { - /* See if we need to read more data. If FIRST == 0 then - the previous conversion attempt ended in the middle of - a character at the end of the buffer. Otherwise we - only have to read if the buffer is empty. */ - if (lex->first == 0 || lex->first >= lex->last) - { - int r; - - if (lex->first >= lex->last) - { - lex->first = 0; - lex->last = 0; - } - if (feof (lex->finput)) - return UEOF; - r = fread (&lex->buffer[lex->last], 1, - sizeof (lex->buffer) - lex->last, - lex->finput); - lex->last += r; - } - - inbytesleft = lex->last - lex->first; - out_count = sizeof (lex->out_buffer) - lex->out_last; - - if (inbytesleft == 0) - { - /* We've tried to read and there is nothing left. */ - return UEOF; - } - - in_save = inbytesleft; - out_save = out_count; - inp = &lex->buffer[lex->first]; - outp = (char *) &lex->out_buffer[lex->out_last]; - ir = iconv (lex->handle, (ICONV_CONST char **) &inp, - &inbytesleft, &outp, &out_count); - - /* If we haven't read any bytes, then look to see if we - have read a BOM. */ - if (! lex->read_anything && out_save - out_count >= 2) - { - unicode_t uc = * (unicode_t *) &lex->out_buffer[0]; - if (uc == 0xfeff) - { - lex->byte_swap = 0; - lex->out_first += 2; - } - else if (uc == 0xfffe) - { - lex->byte_swap = 1; - lex->out_first += 2; - } - lex->read_anything = 1; - } - - if (lex->byte_swap) - { - unsigned int i; - for (i = 0; i < out_save - out_count; i += 2) - { - char t = lex->out_buffer[lex->out_last + i]; - lex->out_buffer[lex->out_last + i] - = lex->out_buffer[lex->out_last + i + 1]; - lex->out_buffer[lex->out_last + i + 1] = t; - } - } - - lex->first += in_save - inbytesleft; - lex->out_last += out_save - out_count; - - /* If we converted anything at all, move along. */ - if (out_count != out_save) - break; - - if (ir == (size_t) -1) - { - if (errno == EINVAL) - { - /* This is ok. This means that the end of our buffer - is in the middle of a character sequence. We just - move the valid part of the buffer to the beginning - to force a read. */ - memmove (&lex->buffer[0], &lex->buffer[lex->first], - lex->last - lex->first); - lex->last -= lex->first; - lex->first = 0; - } - else - { - /* A more serious error. */ - char buffer[128]; - sprintf (buffer, - "Unrecognized character for encoding '%s'", - lex->encoding); - java_lex_error (buffer, 0); - return UEOF; - } - } - } - } - - if (lex->out_first == -1 || lex->out_first >= lex->out_last) - { - /* Don't have any data. */ - return UEOF; - } - - /* Success. */ - result = * ((unicode_t *) &lex->out_buffer[lex->out_first]); - lex->out_first += 2; - return result; - } - else -#endif /* HAVE_ICONV */ - { - int c, c1, c2; - c = getc (lex->finput); - - if (c == EOF) - return UEOF; - if (c < 128) - return (unicode_t) c; - else - { - if ((c & 0xe0) == 0xc0) - { - c1 = getc (lex->finput); - if ((c1 & 0xc0) == 0x80) - { - unicode_t r = (unicode_t)(((c & 0x1f) << 6) + (c1 & 0x3f)); - /* Check for valid 2-byte characters. We explicitly - allow \0 because this encoding is common in the - Java world. */ - if (r == 0 || (r >= 0x80 && r <= 0x7ff)) - return r; - } - } - else if ((c & 0xf0) == 0xe0) - { - c1 = getc (lex->finput); - if ((c1 & 0xc0) == 0x80) - { - c2 = getc (lex->finput); - if ((c2 & 0xc0) == 0x80) - { - unicode_t r = (unicode_t)(((c & 0xf) << 12) + - (( c1 & 0x3f) << 6) - + (c2 & 0x3f)); - /* Check for valid 3-byte characters. - Don't allow surrogate, \ufffe or \uffff. */ - if (IN_RANGE (r, 0x800, 0xffff) - && ! IN_RANGE (r, 0xd800, 0xdfff) - && r != 0xfffe && r != 0xffff) - return r; - } - } - } - - /* We simply don't support invalid characters. We also - don't support 4-, 5-, or 6-byte UTF-8 sequences, as these - cannot be valid Java characters. */ - java_lex_error ("malformed UTF-8 character", 0); - } - } - - /* We only get here on error. */ - return UEOF; -} - -static int -java_read_unicode (java_lexer *lex, int *unicode_escape_p) -{ - int c; - - if (lex->unget_value) - { - c = lex->unget_value; - lex->unget_value = 0; - } - else - c = java_read_char (lex); - - *unicode_escape_p = 0; - - if (c != '\\') - { - lex->bs_count = 0; - return c; - } - - ++lex->bs_count; - if ((lex->bs_count) % 2 == 1) - { - /* Odd number of \ seen. */ - c = java_read_char (lex); - if (c == 'u') - { - unicode_t unicode = 0; - int shift = 12; - - /* Recognize any number of `u's in \u. */ - while ((c = java_read_char (lex)) == 'u') - ; - - shift = 12; - do - { - if (c == UEOF) - { - java_lex_error ("prematurely terminated \\u sequence", 0); - return UEOF; - } - - if (hex_p (c)) - unicode |= (unicode_t)(hex_value (c) << shift); - else - { - java_lex_error ("non-hex digit in \\u sequence", 0); - break; - } - - c = java_read_char (lex); - shift -= 4; - } - while (shift >= 0); - - if (c != UEOF) - lex->unget_value = c; - - lex->bs_count = 0; - *unicode_escape_p = 1; - return unicode; - } - lex->unget_value = c; - } - return (unicode_t) '\\'; -} - -/* Get the next Unicode character (post-Unicode-escape-handling). - Move the current position to just after returned character. */ - -static int -java_get_unicode (void) -{ - int next = java_peek_unicode (); - java_next_unicode (); - return next; -} - -/* Return the next Unicode character (post-Unicode-escape-handling). - Do not move the current position, which remains just before - the returned character. */ - -static int -java_peek_unicode (void) -{ - int unicode_escape_p; - java_lexer *lex = ctxp->lexer; - int next; - - if (lex->avail_unicode) - return lex->next_unicode; - - next = java_read_unicode (lex, &unicode_escape_p); - - if (next == '\r') - { - /* We have to read ahead to see if we got \r\n. - In that case we return a single line terminator. */ - int dummy; - next = java_read_unicode (lex, &dummy); - if (next != '\n' && next != UEOF) - lex->unget_value = next; - /* In either case we must return a newline. */ - next = '\n'; - } - - lex->next_unicode = next; - lex->avail_unicode = 1; - - if (next == UEOF) - { - lex->next_columns = 0; - return next; - } - - if (next == '\n') - { - lex->next_columns = 1 - lex->position.col; - } - else if (next == '\t') - { - int cur_col = lex->position.col; - lex->next_columns = ((cur_col + 7) & ~7) + 1 - cur_col; - - } - else - { - lex->next_columns = 1; - } - if (unicode_escape_p) - lex->next_columns = 6; - return next; -} - -/* Move forward one Unicode character (post-Unicode-escape-handling). - Only allowed after java_peek_unicode. The combination java_peek_unicode - followed by java_next_unicode is equivalent to java_get_unicode. */ - -static void java_next_unicode (void) -{ - struct java_lexer *lex = ctxp->lexer; - lex->position.col += lex->next_columns; - if (lex->next_unicode == '\n') - { - lex->position.line++; -#ifndef JC1_LITE -#ifdef USE_MAPPED_LOCATION - input_location - = linemap_line_start (&line_table, lex->position.line, 120); -#else - input_line = lex->position.line; -#endif -#endif - } - lex->avail_unicode = 0; -} - -#if 0 -/* The inverse of java_next_unicode. - Not currently used, but could be if it would be cleaner or faster. - java_peek_unicode == java_get_unicode + java_unget_unicode. - java_get_unicode == java_peek_unicode + java_next_unicode. -*/ -static void java_unget_unicode () -{ - struct java_lexer *lex = ctxp->lexer; - if (lex->avail_unicode) - fatal_error ("internal error - bad unget"); - lex->avail_unicode = 1; - lex->position.col -= lex->next_columns; -} -#endif - -/* Parse the end of a C style comment. - * C is the first character following the '/' and '*'. */ -static void -java_parse_end_comment (int c) -{ - for ( ;; c = java_get_unicode ()) - { - switch (c) - { - case UEOF: - java_lex_error ("Comment not terminated at end of input", 0); - return; - case '*': - switch (c = java_peek_unicode ()) - { - case UEOF: - java_lex_error ("Comment not terminated at end of input", 0); - return; - case '/': - java_next_unicode (); - return; - case '*': /* Reparse only '*'. */ - ; - } - } - } -} - -/* Parse the documentation section. Keywords must be at the beginning - of a documentation comment line (ignoring white space and any `*' - character). Parsed keyword(s): @DEPRECATED. */ - -static void -java_parse_doc_section (int c) -{ - int last_was_star; - - /* We reset this here, because only the most recent doc comment - applies to the following declaration. */ - ctxp->deprecated = 0; - - /* We loop over all the lines of the comment. We'll eventually exit - if we hit EOF prematurely, or when we see the comment - terminator. */ - while (1) - { - /* These first steps need only be done if we're still looking - for the deprecated tag. If we've already seen it, we might - as well skip looking for it again. */ - if (! ctxp->deprecated) - { - /* Skip whitespace and '*'s. We must also check for the end - of the comment here. */ - while (JAVA_WHITE_SPACE_P (c) || c == '*') - { - last_was_star = (c == '*'); - c = java_get_unicode (); - if (last_was_star && c == '/') - { - /* We just saw the comment terminator. */ - return; - } - } - - if (c == UEOF) - goto eof; - - if (c == '@') - { - const char *deprecated = "@deprecated"; - int i; - - for (i = 0; deprecated[i]; ++i) - { - if (c != deprecated[i]) - break; - /* We write the code in this way, with the - update at the end, so that after the loop - we're left with the next character in C. */ - c = java_get_unicode (); - } - - if (c == UEOF) - goto eof; - - /* @deprecated must be followed by a space or newline. - We also allow a '*' in case it appears just before - the end of a comment. In this position only we also - must allow any Unicode space character. */ - if (c == ' ' || c == '\n' || c == '*' || java_space_char_p (c)) - { - if (! deprecated[i]) - ctxp->deprecated = 1; - } - } - } - - /* We've examined the relevant content from this line. Now we - skip the remaining characters and start over with the next - line. We also check for end of comment here. */ - while (c != '\n' && c != UEOF) - { - last_was_star = (c == '*'); - c = java_get_unicode (); - if (last_was_star && c == '/') - return; - } - - if (c == UEOF) - goto eof; - /* We have to advance past the \n. */ - c = java_get_unicode (); - if (c == UEOF) - goto eof; - } - - eof: - java_lex_error ("Comment not terminated at end of input", 0); -} - -/* Return true if C is a valid start character for a Java identifier. - This is only called if C >= 128 -- smaller values are handled - inline. However, this function handles all values anyway. */ -static int -java_start_char_p (unicode_t c) -{ - unsigned int hi = c / 256; - const char *const page = type_table[hi]; - unsigned long val = (unsigned long) page; - int flags; - - if ((val & ~ LETTER_MASK) != 0) - flags = page[c & 255]; - else - flags = val; - - return flags & LETTER_START; -} - -/* Return true if C is a valid part character for a Java identifier. - This is only called if C >= 128 -- smaller values are handled - inline. However, this function handles all values anyway. */ -static int -java_part_char_p (unicode_t c) -{ - unsigned int hi = c / 256; - const char *const page = type_table[hi]; - unsigned long val = (unsigned long) page; - int flags; - - if ((val & ~ LETTER_MASK) != 0) - flags = page[c & 255]; - else - flags = val; - - return flags & LETTER_PART; -} - -/* Return true if C is whitespace. */ -static int -java_space_char_p (unicode_t c) -{ - unsigned int hi = c / 256; - const char *const page = type_table[hi]; - unsigned long val = (unsigned long) page; - int flags; - - if ((val & ~ LETTER_MASK) != 0) - flags = page[c & 255]; - else - flags = val; - - return flags & LETTER_SPACE; -} - -static int -java_parse_escape_sequence (void) -{ - int c; - - switch (c = java_get_unicode ()) - { - case 'b': - return (unicode_t)0x8; - case 't': - return (unicode_t)0x9; - case 'n': - return (unicode_t)0xa; - case 'f': - return (unicode_t)0xc; - case 'r': - return (unicode_t)0xd; - case '"': - return (unicode_t)0x22; - case '\'': - return (unicode_t)0x27; - case '\\': - return (unicode_t)0x5c; - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': - { - int more = 3; - unicode_t char_lit = 0; - - if (c > '3') - { - /* According to the grammar, `\477' has a well-defined - meaning -- it is `\47' followed by `7'. */ - --more; - } - char_lit = 0; - for (;;) - { - char_lit = 8 * char_lit + c - '0'; - if (--more == 0) - break; - c = java_peek_unicode (); - if (! RANGE (c, '0', '7')) - break; - java_next_unicode (); - } - - return char_lit; - } - default: - java_lex_error ("Invalid character in escape sequence", -1); - return JAVA_CHAR_ERROR; - } -} - -#ifndef JC1_LITE -#define IS_ZERO(X) REAL_VALUES_EQUAL (X, dconst0) - -/* Subroutine of java_lex: converts floating-point literals to tree - nodes. LITERAL_TOKEN is the input literal, JAVA_LVAL is where to - store the result. FFLAG indicates whether the literal was tagged - with an 'f', indicating it is of type 'float'; NUMBER_BEGINNING - is the line number on which to report any error. */ - -static void java_perform_atof (YYSTYPE *, char *, int, int); - -static void -java_perform_atof (YYSTYPE *java_lval, char *literal_token, int fflag, - int number_beginning) -{ - REAL_VALUE_TYPE value; - tree type = (fflag ? FLOAT_TYPE_NODE : DOUBLE_TYPE_NODE); - - SET_REAL_VALUE_ATOF (value, - REAL_VALUE_ATOF (literal_token, TYPE_MODE (type))); - - if (REAL_VALUE_ISINF (value) || REAL_VALUE_ISNAN (value)) - { - JAVA_FLOAT_RANGE_ERROR (fflag ? "float" : "double"); - value = DCONST0; - } - else if (IS_ZERO (value)) - { - /* We check to see if the value is really 0 or if we've found an - underflow. We do this in the most primitive imaginable way. */ - int really_zero = 1; - char *p = literal_token; - if (*p == '-') - ++p; - while (*p && *p != 'e' && *p != 'E') - { - if (*p != '0' && *p != '.') - { - really_zero = 0; - break; - } - ++p; - } - if (! really_zero) - { - int save_col = ctxp->lexer->position.col; - ctxp->lexer->position.col = number_beginning; - java_lex_error ("Floating point literal underflow", 0); - ctxp->lexer->position.col = save_col; - } - } - - SET_LVAL_NODE (build_real (type, value)); -} -#endif - -static int yylex (YYSTYPE *); - -static int -#ifdef JC1_LITE -yylex (YYSTYPE *java_lval) -#else -do_java_lex (YYSTYPE *java_lval) -#endif -{ - int c; - char *string; - - /* Translation of the Unicode escape in the raw stream of Unicode - characters. Takes care of line terminator. */ - step1: - /* Skip white spaces: SP, TAB and FF or ULT. */ - for (;;) - { - c = java_peek_unicode (); - if (c != '\n' && ! JAVA_WHITE_SPACE_P (c)) - break; - java_next_unicode (); - } - - /* Handle EOF here. */ - if (c == UEOF) /* Should probably do something here... */ - return 0; - -#ifndef JC1_LITE -#ifdef USE_MAPPED_LOCATION - LINEMAP_POSITION_FOR_COLUMN (input_location, &line_table, - ctxp->lexer->position.col); -#else - ctxp->lexer->token_start = ctxp->lexer->position; -#endif -#endif - - /* Numeric literals. */ - if (JAVA_ASCII_DIGIT (c) || (c == '.')) - { - /* This section of code is borrowed from gcc/c-lex.c. */ -#define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2) - int parts[TOTAL_PARTS]; - HOST_WIDE_INT high, low; - /* End borrowed section. */ - -#define MAX_TOKEN_LEN 256 - char literal_token [MAX_TOKEN_LEN + 1]; - int literal_index = 0, radix = 10, long_suffix = 0, overflow = 0, bytes; - int found_hex_digits = 0, found_non_octal_digits = -1; - int i; -#ifndef JC1_LITE - int number_beginning = ctxp->lexer->position.col; - tree value; -#endif - - for (i = 0; i < TOTAL_PARTS; i++) - parts [i] = 0; - - if (c == '0') - { - java_next_unicode (); - c = java_peek_unicode (); - if (c == 'x' || c == 'X') - { - radix = 16; - java_next_unicode (); - c = java_peek_unicode (); - } - else if (JAVA_ASCII_DIGIT (c)) - { - literal_token [literal_index++] = '0'; - radix = 8; - } - else if (c == '.' || c == 'e' || c =='E') - { - literal_token [literal_index++] = '0'; - /* Handle C during floating-point parsing. */ - } - else - { - /* We have a zero literal: 0, 0{l,L}, 0{f,F}, 0{d,D}. */ - switch (c) - { - case 'L': case 'l': - java_next_unicode (); - SET_LVAL_NODE (long_zero_node); - return (INT_LIT_TK); - case 'f': case 'F': - java_next_unicode (); - SET_LVAL_NODE (float_zero_node); - return (FP_LIT_TK); - case 'd': case 'D': - java_next_unicode (); - SET_LVAL_NODE (double_zero_node); - return (FP_LIT_TK); - default: - SET_LVAL_NODE (integer_zero_node); - return (INT_LIT_TK); - } - } - } - - /* Terminate LITERAL_TOKEN in case we bail out on large tokens. */ - literal_token [MAX_TOKEN_LEN] = '\0'; - - /* Parse the first part of the literal, until we find something - which is not a number. */ - while ((radix == 16 ? JAVA_ASCII_HEXDIGIT (c) : JAVA_ASCII_DIGIT (c)) - && literal_index < MAX_TOKEN_LEN) - { - /* We store in a string (in case it turns out to be a FP) and in - PARTS if we have to process a integer literal. */ - int numeric = hex_value (c); - int count; - - /* Remember when we find a valid hexadecimal digit. */ - if (radix == 16) - found_hex_digits = 1; - /* Remember when we find an invalid octal digit. */ - else if (radix == 8 && numeric >= 8 && found_non_octal_digits < 0) - found_non_octal_digits = literal_index; - - literal_token [literal_index++] = c; - /* This section of code if borrowed from gcc/c-lex.c. */ - for (count = 0; count < TOTAL_PARTS; count++) - { - parts[count] *= radix; - if (count) - { - parts[count] += (parts[count-1] >> HOST_BITS_PER_CHAR); - parts[count-1] &= (1 << HOST_BITS_PER_CHAR) - 1; - } - else - parts[0] += numeric; - } - if (parts [TOTAL_PARTS-1] != 0) - overflow = 1; - /* End borrowed section. */ - java_next_unicode (); - c = java_peek_unicode (); - } - - /* If we have something from the FP char set but not a digit, parse - a FP literal. */ - if (JAVA_ASCII_FPCHAR (c) && !JAVA_ASCII_DIGIT (c)) - { - /* stage==0: seen digits only - * stage==1: seen '.' - * stage==2: seen 'e' or 'E'. - * stage==3: seen '+' or '-' after 'e' or 'E'. - * stage==4: seen type suffix ('f'/'F'/'d'/'D') - */ - int stage = 0; - int seen_digit = (literal_index ? 1 : 0); - int seen_exponent = 0; - int fflag = 0; /* 1 for {f,F}, 0 for {d,D}. FP literal are - double unless specified. */ - - /* It is ok if the radix is 8 because this just means we've - seen a leading `0'. However, radix==16 is invalid. */ - if (radix == 16) - java_lex_error ("Can't express non-decimal FP literal", 0); - radix = 10; - - for (; literal_index < MAX_TOKEN_LEN;) - { - if (c == '.') - { - if (stage < 1) - { - stage = 1; - literal_token [literal_index++ ] = c; - java_next_unicode (); - c = java_peek_unicode (); - if (literal_index == 1 && !JAVA_ASCII_DIGIT (c)) - BUILD_OPERATOR (DOT_TK); - } - else - java_lex_error ("Invalid character in FP literal", 0); - } - - if ((c == 'e' || c == 'E') && literal_index < MAX_TOKEN_LEN) - { - if (stage < 2) - { - /* {E,e} must have seen at least a digit. */ - if (!seen_digit) - java_lex_error - ("Invalid FP literal, mantissa must have digit", 0); - seen_digit = 0; - seen_exponent = 1; - stage = 2; - literal_token [literal_index++] = c; - java_next_unicode (); - c = java_peek_unicode (); - } - else - java_lex_error ("Invalid character in FP literal", 0); - } - if ( c == 'f' || c == 'F' || c == 'd' || c == 'D') - { - fflag = ((c == 'd') || (c == 'D')) ? 0 : 1; - stage = 4; /* So we fall through. */ - } - - if ((c=='-' || c =='+') && stage == 2 - && literal_index < MAX_TOKEN_LEN) - { - stage = 3; - literal_token [literal_index++] = c; - java_next_unicode (); - c = java_peek_unicode (); - } - - if (((stage == 0 && JAVA_ASCII_FPCHAR (c)) - || (stage == 1 && JAVA_ASCII_FPCHAR (c) && !(c == '.')) - || (stage == 2 && (JAVA_ASCII_DIGIT (c) || JAVA_FP_PM (c))) - || (stage == 3 && JAVA_ASCII_DIGIT (c))) - && literal_index < MAX_TOKEN_LEN) - { - if (JAVA_ASCII_DIGIT (c)) - seen_digit = 1; - if (stage == 2) - stage = 3; - literal_token [literal_index++ ] = c; - java_next_unicode (); - c = java_peek_unicode (); - } - else if (literal_index < MAX_TOKEN_LEN) - { - if (stage == 4) /* Don't push back fF/dD. */ - java_next_unicode (); - - /* An exponent (if any) must have seen a digit. */ - if (seen_exponent && !seen_digit) - java_lex_error - ("Invalid FP literal, exponent must have digit", 0); - - literal_token [literal_index] = '\0'; - -#ifndef JC1_LITE - java_perform_atof (java_lval, literal_token, - fflag, number_beginning); -#endif - return FP_LIT_TK; - } - } - } /* JAVA_ASCII_FPCHAR (c) */ - - /* Here we get back to converting the integral literal. */ - if (radix == 16 && ! found_hex_digits) - java_lex_error - ("0x must be followed by at least one hexadecimal digit", 0); - else if (radix == 8 && found_non_octal_digits >= 0) - { - int back = literal_index - found_non_octal_digits; - ctxp->lexer->position.col -= back; - java_lex_error ("Octal literal contains digit out of range", 0); - ctxp->lexer->position.col += back; - } - else if (c == 'L' || c == 'l') - { - java_next_unicode (); - long_suffix = 1; - } - - /* This section of code is borrowed from gcc/c-lex.c. */ - if (!overflow) - { - bytes = GET_TYPE_PRECISION (long_type_node); - for (i = bytes; i < TOTAL_PARTS; i++) - if (parts [i]) - { - overflow = 1; - break; - } - } - high = low = 0; - for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++) - { - high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT - / HOST_BITS_PER_CHAR)] - << (i * HOST_BITS_PER_CHAR)); - low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR); - } - /* End borrowed section. */ - -#ifndef JC1_LITE - /* Range checking. */ - /* Temporarily set type to unsigned. */ - value = build_int_cst_wide (long_suffix - ? unsigned_long_type_node - : unsigned_int_type_node, low, high); - SET_LVAL_NODE (value); - - /* For base 10 numbers, only values up to the highest value - (plus one) can be written. For instance, only ints up to - 2147483648 can be written. The special case of the largest - negative value is handled elsewhere. For other bases, any - number can be represented. */ - if (overflow || (radix == 10 - && tree_int_cst_lt (long_suffix - ? decimal_long_max - : decimal_int_max, - value))) - { - if (long_suffix) - JAVA_RANGE_ERROR ("Numeric overflow for 'long' literal"); - else - JAVA_RANGE_ERROR ("Numeric overflow for 'int' literal"); - } - - /* Sign extend the value. */ - value = build_int_cst_wide_type (long_suffix ? long_type_node - : int_type_node, low, high); - - if (radix != 10) - { - value = copy_node (value); - JAVA_NOT_RADIX10_FLAG (value) = 1; - } - - SET_LVAL_NODE (value); -#endif - return INT_LIT_TK; - } - - /* We may have an ID here. */ - if (JAVA_START_CHAR_P (c)) - { - int ascii_index = 0, all_ascii = 1; - - /* Keyword, boolean literal or null literal. */ - while (c != UEOF && JAVA_PART_CHAR_P (c)) - { - java_unicode_2_utf8 (c); - if (c >= 128) - all_ascii = 0; - java_next_unicode (); - ascii_index++; - c = java_peek_unicode (); - } - - obstack_1grow (&temporary_obstack, '\0'); - string = obstack_finish (&temporary_obstack); - - /* If we have something all ascii, we consider a keyword, a boolean - literal, a null literal or an all ASCII identifier. Otherwise, - this is an identifier (possibly not respecting formation rule). */ - if (all_ascii) - { - const struct java_keyword *kw; - if ((kw=java_keyword (string, ascii_index))) - { - switch (kw->token) - { - case PUBLIC_TK: case PROTECTED_TK: case STATIC_TK: - case ABSTRACT_TK: case FINAL_TK: case NATIVE_TK: - case SYNCHRONIZED_TK: case TRANSIENT_TK: case VOLATILE_TK: - case PRIVATE_TK: case STRICT_TK: - SET_MODIFIER_CTX (kw->token); - return MODIFIER_TK; - case FLOAT_TK: - SET_LVAL_NODE (float_type_node); - return FP_TK; - case DOUBLE_TK: - SET_LVAL_NODE (double_type_node); - return FP_TK; - case BOOLEAN_TK: - SET_LVAL_NODE (boolean_type_node); - return BOOLEAN_TK; - case BYTE_TK: - SET_LVAL_NODE (byte_type_node); - return INTEGRAL_TK; - case SHORT_TK: - SET_LVAL_NODE (short_type_node); - return INTEGRAL_TK; - case INT_TK: - SET_LVAL_NODE (int_type_node); - return INTEGRAL_TK; - case LONG_TK: - SET_LVAL_NODE (long_type_node); - return INTEGRAL_TK; - case CHAR_TK: - SET_LVAL_NODE (char_type_node); - return INTEGRAL_TK; - - /* Keyword based literals. */ - case TRUE_TK: - case FALSE_TK: - SET_LVAL_NODE ((kw->token == TRUE_TK ? - boolean_true_node : boolean_false_node)); - return BOOL_LIT_TK; - case NULL_TK: - SET_LVAL_NODE (null_pointer_node); - return NULL_TK; - - case ASSERT_TK: - if (flag_assert) - { - BUILD_OPERATOR (kw->token); - return kw->token; - } - else - break; - - /* Some keyword we want to retain information on the location - they where found. */ - case CASE_TK: - case DEFAULT_TK: - case SUPER_TK: - case THIS_TK: - case RETURN_TK: - case BREAK_TK: - case CONTINUE_TK: - case TRY_TK: - case CATCH_TK: - case THROW_TK: - case INSTANCEOF_TK: - BUILD_OPERATOR (kw->token); - - default: - return kw->token; - } - } - } - - java_lval->node = BUILD_ID_WFL (GET_IDENTIFIER (string)); - return ID_TK; - } - - java_next_unicode (); - - /* Character literals. */ - if (c == '\'') - { - int char_lit; - - if ((c = java_get_unicode ()) == '\\') - char_lit = java_parse_escape_sequence (); - else - { - if (c == '\n' || c == '\'') - java_lex_error ("Invalid character literal", 0); - char_lit = c; - } - - c = java_get_unicode (); - - if ((c == '\n') || (c == UEOF)) - java_lex_error ("Character literal not terminated at end of line", 0); - if (c != '\'') - java_lex_error ("Syntax error in character literal", 0); - - if (char_lit == JAVA_CHAR_ERROR) - char_lit = 0; /* We silently convert it to zero. */ - - SET_LVAL_NODE (build_int_cst (char_type_node, char_lit)); - return CHAR_LIT_TK; - } - - /* String literals. */ - if (c == '"') - { - int no_error = 1; - char *string; - - for (;;) - { - c = java_peek_unicode (); - if (c == '\n' || c == UEOF) /* ULT. */ - { - java_lex_error ("String not terminated at end of line", 0); - break; - } - java_next_unicode (); - if (c == '"') - break; - if (c == '\\') - c = java_parse_escape_sequence (); - if (c == JAVA_CHAR_ERROR) - { - no_error = 0; - c = 0; /* We silently convert it to zero. */ - } - java_unicode_2_utf8 (c); - } - - obstack_1grow (&temporary_obstack, '\0'); - string = obstack_finish (&temporary_obstack); -#ifndef JC1_LITE - if (!no_error || (c != '"')) - java_lval->node = error_mark_node; /* FIXME: Requires further - testing. */ - else - java_lval->node = build_string (strlen (string), string); -#endif - obstack_free (&temporary_obstack, string); - return STRING_LIT_TK; - } - - switch (c) - { - case '/': - /* Check for comment. */ - switch (c = java_peek_unicode ()) - { - case '/': - java_next_unicode (); - for (;;) - { - c = java_get_unicode (); - if (c == UEOF) - { - /* It is ok to end a `//' comment with EOF, unless - we're being pedantic. */ - if (pedantic) - java_lex_error ("Comment not terminated at end of input", - 0); - return 0; - } - if (c == '\n') /* ULT */ - goto step1; - } - break; - - case '*': - java_next_unicode (); - if ((c = java_get_unicode ()) == '*') - { - c = java_get_unicode (); - if (c == '/') - { - /* Empty documentation comment. We have to reset - the deprecation marker as only the most recent - doc comment applies. */ - ctxp->deprecated = 0; - } - else - java_parse_doc_section (c); - } - else - java_parse_end_comment ((c = java_get_unicode ())); - goto step1; - break; - - case '=': - java_next_unicode (); - BUILD_OPERATOR2 (DIV_ASSIGN_TK); - - default: - BUILD_OPERATOR (DIV_TK); - } - - case '(': - BUILD_OPERATOR (OP_TK); - case ')': - return CP_TK; - case '{': -#ifndef JC1_LITE - java_lval->operator.token = OCB_TK; - java_lval->operator.location = BUILD_LOCATION(); -#ifdef USE_MAPPED_LOCATION - if (ctxp->ccb_indent == 1) - ctxp->first_ccb_indent1 = input_location; -#else - if (ctxp->ccb_indent == 1) - ctxp->first_ccb_indent1 = input_line; -#endif -#endif - ctxp->ccb_indent++; - return OCB_TK; - case '}': - ctxp->ccb_indent--; -#ifndef JC1_LITE - java_lval->operator.token = CCB_TK; - java_lval->operator.location = BUILD_LOCATION(); -#ifdef USE_MAPPED_LOCATION - if (ctxp->ccb_indent == 1) - ctxp->last_ccb_indent1 = input_location; -#else - if (ctxp->ccb_indent == 1) - ctxp->last_ccb_indent1 = input_line; -#endif -#endif - return CCB_TK; - case '[': - BUILD_OPERATOR (OSB_TK); - case ']': - return CSB_TK; - case ';': - return SC_TK; - case ',': - return C_TK; - case '.': - BUILD_OPERATOR (DOT_TK); - - /* Operators. */ - case '=': - c = java_peek_unicode (); - if (c == '=') - { - java_next_unicode (); - BUILD_OPERATOR (EQ_TK); - } - else - { - /* Equals is used in two different locations. In the - variable_declarator: rule, it has to be seen as '=' as opposed - to being seen as an ordinary assignment operator in - assignment_operators: rule. */ - BUILD_OPERATOR (ASSIGN_TK); - } - - case '>': - switch ((c = java_peek_unicode ())) - { - case '=': - java_next_unicode (); - BUILD_OPERATOR (GTE_TK); - case '>': - java_next_unicode (); - switch ((c = java_peek_unicode ())) - { - case '>': - java_next_unicode (); - c = java_peek_unicode (); - if (c == '=') - { - java_next_unicode (); - BUILD_OPERATOR2 (ZRS_ASSIGN_TK); - } - else - { - BUILD_OPERATOR (ZRS_TK); - } - case '=': - java_next_unicode (); - BUILD_OPERATOR2 (SRS_ASSIGN_TK); - default: - BUILD_OPERATOR (SRS_TK); - } - default: - BUILD_OPERATOR (GT_TK); - } - - case '<': - switch ((c = java_peek_unicode ())) - { - case '=': - java_next_unicode (); - BUILD_OPERATOR (LTE_TK); - case '<': - java_next_unicode (); - if ((c = java_peek_unicode ()) == '=') - { - java_next_unicode (); - BUILD_OPERATOR2 (LS_ASSIGN_TK); - } - else - { - BUILD_OPERATOR (LS_TK); - } - default: - BUILD_OPERATOR (LT_TK); - } - - case '&': - switch ((c = java_peek_unicode ())) - { - case '&': - java_next_unicode (); - BUILD_OPERATOR (BOOL_AND_TK); - case '=': - java_next_unicode (); - BUILD_OPERATOR2 (AND_ASSIGN_TK); - default: - BUILD_OPERATOR (AND_TK); - } - - case '|': - switch ((c = java_peek_unicode ())) - { - case '|': - java_next_unicode (); - BUILD_OPERATOR (BOOL_OR_TK); - case '=': - java_next_unicode (); - BUILD_OPERATOR2 (OR_ASSIGN_TK); - default: - BUILD_OPERATOR (OR_TK); - } - - case '+': - switch ((c = java_peek_unicode ())) - { - case '+': - java_next_unicode (); - BUILD_OPERATOR (INCR_TK); - case '=': - java_next_unicode (); - BUILD_OPERATOR2 (PLUS_ASSIGN_TK); - default: - BUILD_OPERATOR (PLUS_TK); - } - - case '-': - switch ((c = java_peek_unicode ())) - { - case '-': - java_next_unicode (); - BUILD_OPERATOR (DECR_TK); - case '=': - java_next_unicode (); - BUILD_OPERATOR2 (MINUS_ASSIGN_TK); - default: - BUILD_OPERATOR (MINUS_TK); - } - - case '*': - if ((c = java_peek_unicode ()) == '=') - { - java_next_unicode (); - BUILD_OPERATOR2 (MULT_ASSIGN_TK); - } - else - { - BUILD_OPERATOR (MULT_TK); - } - - case '^': - if ((c = java_peek_unicode ()) == '=') - { - java_next_unicode (); - BUILD_OPERATOR2 (XOR_ASSIGN_TK); - } - else - { - BUILD_OPERATOR (XOR_TK); - } - - case '%': - if ((c = java_peek_unicode ()) == '=') - { - java_next_unicode (); - BUILD_OPERATOR2 (REM_ASSIGN_TK); - } - else - { - BUILD_OPERATOR (REM_TK); - } - - case '!': - if ((c = java_peek_unicode()) == '=') - { - java_next_unicode (); - BUILD_OPERATOR (NEQ_TK); - } - else - { - BUILD_OPERATOR (NEG_TK); - } - - case '?': - BUILD_OPERATOR (REL_QM_TK); - case ':': - BUILD_OPERATOR (REL_CL_TK); - case '~': - BUILD_OPERATOR (NOT_TK); - } - - if (c == 0x1a) /* CTRL-Z. */ - { - if ((c = java_peek_unicode ()) == UEOF) - return 0; /* Ok here. */ - } - - /* Everything else is an invalid character in the input. */ - { - char lex_error_buffer [128]; - sprintf (lex_error_buffer, "Invalid character '%s' in input", - java_sprint_unicode (c)); - java_lex_error (lex_error_buffer, -1); - } - return 0; -} - -#ifndef JC1_LITE - -/* The exported interface to the lexer. */ -static int -java_lex (YYSTYPE *java_lval) -{ - int r; - - timevar_push (TV_LEX); - r = do_java_lex (java_lval); - timevar_pop (TV_LEX); - return r; -} - -/* This is called by the parser to see if an error should be generated - due to numeric overflow. This function only handles the particular - case of the largest negative value, and is only called in the case - where this value is not preceded by `-'. */ -static void -error_if_numeric_overflow (tree value) -{ - if (TREE_CODE (value) == INTEGER_CST - && !JAVA_NOT_RADIX10_FLAG (value) - && tree_int_cst_sgn (value) < 0) - { - if (TREE_TYPE (value) == long_type_node) - java_lex_error ("Numeric overflow for 'long' literal", 0); - else - java_lex_error ("Numeric overflow for 'int' literal", 0); - } -} - -#endif /* JC1_LITE */ - -static void -java_unicode_2_utf8 (unicode_t unicode) -{ - if (RANGE (unicode, 0x01, 0x7f)) - obstack_1grow (&temporary_obstack, (char)unicode); - else if (RANGE (unicode, 0x80, 0x7ff) || unicode == 0) - { - obstack_1grow (&temporary_obstack, - (unsigned char)(0xc0 | ((0x7c0 & unicode) >> 6))); - obstack_1grow (&temporary_obstack, - (unsigned char)(0x80 | (unicode & 0x3f))); - } - else /* Range 0x800-0xffff. */ - { - obstack_1grow (&temporary_obstack, - (unsigned char)(0xe0 | (unicode & 0xf000) >> 12)); - obstack_1grow (&temporary_obstack, - (unsigned char)(0x80 | (unicode & 0x0fc0) >> 6)); - obstack_1grow (&temporary_obstack, - (unsigned char)(0x80 | (unicode & 0x003f))); - } -} - -#ifndef JC1_LITE -static tree -build_wfl_node (tree node) -{ -#ifdef USE_MAPPED_LOCATION - node = build_expr_wfl (node, input_location); -#else - node = build_expr_wfl (node, ctxp->filename, - ctxp->lexer->token_start.line, - ctxp->lexer->token_start.col); -#endif - /* Prevent java_complete_lhs from short-circuiting node (if constant). */ - TREE_TYPE (node) = NULL_TREE; - return node; -} -#endif - -static void -java_lex_error (const char *msg ATTRIBUTE_UNUSED, int forward ATTRIBUTE_UNUSED) -{ -#ifndef JC1_LITE - int col = (ctxp->lexer->position.col - + forward * ctxp->lexer->next_columns); -#if USE_MAPPED_LOCATION - source_location save_location = input_location; - LINEMAP_POSITION_FOR_COLUMN (input_location, &line_table, col); - - /* Might be caught in the middle of some error report. */ - ctxp->java_error_flag = 0; - java_error (NULL); - java_error (msg); - input_location = save_location; -#else - java_lc save = ctxp->lexer->token_start; - ctxp->lexer->token_start.line = ctxp->lexer->position.line; - ctxp->lexer->token_start.col = col; - - /* Might be caught in the middle of some error report. */ - ctxp->java_error_flag = 0; - java_error (NULL); - java_error (msg); - ctxp->lexer->token_start = save; -#endif -#endif -} - -#ifndef JC1_LITE -static int -java_is_eol (FILE *fp, int c) -{ - int next; - switch (c) - { - case '\r': - next = getc (fp); - if (next != '\n' && next != EOF) - ungetc (next, fp); - return 1; - case '\n': - return 1; - default: - return 0; - } -} -#endif - -char * -java_get_line_col (const char *filename ATTRIBUTE_UNUSED, - int line ATTRIBUTE_UNUSED, int col ATTRIBUTE_UNUSED) -{ -#ifdef JC1_LITE - return 0; -#else - /* Dumb implementation. Doesn't try to cache or optimize things. */ - /* First line of the file is line 1, first column is 1. */ - - /* COL == -1 means, at the CR/LF in LINE. */ - /* COL == -2 means, at the first non space char in LINE. */ - - FILE *fp; - int c, ccol, cline = 1; - int current_line_col = 0; - int first_non_space = 0; - char *base; - - if (!(fp = fopen (filename, "r"))) - fatal_error ("can't open %s: %m", filename); - - while (cline != line) - { - c = getc (fp); - if (c == EOF) - { - static const char msg[] = "<<file too short - unexpected EOF>>"; - obstack_grow (&temporary_obstack, msg, sizeof(msg)-1); - goto have_line; - } - if (java_is_eol (fp, c)) - cline++; - } - - /* Gather the chars of the current line in a buffer. */ - for (;;) - { - c = getc (fp); - if (c < 0 || java_is_eol (fp, c)) - break; - if (!first_non_space && !JAVA_WHITE_SPACE_P (c)) - first_non_space = current_line_col; - obstack_1grow (&temporary_obstack, c); - current_line_col++; - } - have_line: - - obstack_1grow (&temporary_obstack, '\n'); - - if (col == -1) - { - col = current_line_col; - first_non_space = 0; - } - else if (col == -2) - col = first_non_space; - else - first_non_space = 0; - - /* Place the '^' a the right position. */ - base = obstack_base (&temporary_obstack); - for (col += 2, ccol = 0; ccol < col; ccol++) - { - /* Compute \t when reaching first_non_space. */ - char c = (first_non_space ? - (base [ccol] == '\t' ? '\t' : ' ') : ' '); - obstack_1grow (&temporary_obstack, c); - } - obstack_grow0 (&temporary_obstack, "^", 1); - - fclose (fp); - return obstack_finish (&temporary_obstack); -#endif -} - -#ifndef JC1_LITE -static int -utf8_cmp (const unsigned char *str, int length, const char *name) -{ - const unsigned char *limit = str + length; - int i; - - for (i = 0; name[i]; ++i) - { - int ch = UTF8_GET (str, limit); - if (ch != name[i]) - return ch - name[i]; - } - - return str == limit ? 0 : 1; -} - -/* A sorted list of all C++ keywords. */ - -static const char *const cxx_keywords[] = -{ - "_Complex", - "__alignof", - "__alignof__", - "__asm", - "__asm__", - "__attribute", - "__attribute__", - "__builtin_va_arg", - "__complex", - "__complex__", - "__const", - "__const__", - "__extension__", - "__imag", - "__imag__", - "__inline", - "__inline__", - "__label__", - "__null", - "__real", - "__real__", - "__restrict", - "__restrict__", - "__signed", - "__signed__", - "__typeof", - "__typeof__", - "__volatile", - "__volatile__", - "and", - "and_eq", - "asm", - "auto", - "bitand", - "bitor", - "bool", - "break", - "case", - "catch", - "char", - "class", - "compl", - "const", - "const_cast", - "continue", - "default", - "delete", - "do", - "double", - "dynamic_cast", - "else", - "enum", - "explicit", - "export", - "extern", - "false", - "float", - "for", - "friend", - "goto", - "if", - "inline", - "int", - "long", - "mutable", - "namespace", - "new", - "not", - "not_eq", - "operator", - "or", - "or_eq", - "private", - "protected", - "public", - "register", - "reinterpret_cast", - "return", - "short", - "signed", - "sizeof", - "static", - "static_cast", - "struct", - "switch", - "template", - "this", - "throw", - "true", - "try", - "typedef", - "typeid", - "typename", - "typeof", - "union", - "unsigned", - "using", - "virtual", - "void", - "volatile", - "wchar_t", - "while", - "xor", - "xor_eq" -}; - -/* Return true if NAME is a C++ keyword. */ - -int -cxx_keyword_p (const char *name, int length) -{ - int last = ARRAY_SIZE (cxx_keywords); - int first = 0; - int mid = (last + first) / 2; - int old = -1; - - for (mid = (last + first) / 2; - mid != old; - old = mid, mid = (last + first) / 2) - { - int kwl = strlen (cxx_keywords[mid]); - int min_length = kwl > length ? length : kwl; - int r = utf8_cmp ((const unsigned char *) name, min_length, cxx_keywords[mid]); - - if (r == 0) - { - int i; - /* We've found a match if all the remaining characters are `$'. */ - for (i = min_length; i < length && name[i] == '$'; ++i) - ; - if (i == length) - return 1; - r = 1; - } - - if (r < 0) - last = mid; - else - first = mid; - } - return 0; -} -#endif /* JC1_LITE */ |