diff options
Diffstat (limited to 'libcpp')
-rw-r--r-- | libcpp/ChangeLog | 37 | ||||
-rw-r--r-- | libcpp/charset.c | 115 | ||||
-rw-r--r-- | libcpp/directives.c | 13 | ||||
-rw-r--r-- | libcpp/expr.c | 4 | ||||
-rw-r--r-- | libcpp/include/cpp-id-data.h | 2 | ||||
-rw-r--r-- | libcpp/include/cpplib.h | 11 | ||||
-rw-r--r-- | libcpp/init.c | 24 | ||||
-rw-r--r-- | libcpp/internal.h | 9 | ||||
-rw-r--r-- | libcpp/lex.c | 37 | ||||
-rw-r--r-- | libcpp/macro.c | 12 |
10 files changed, 187 insertions, 77 deletions
diff --git a/libcpp/ChangeLog b/libcpp/ChangeLog index 0bd2aad74c5..9eef6efb3e9 100644 --- a/libcpp/ChangeLog +++ b/libcpp/ChangeLog @@ -1,3 +1,40 @@ +2008-04-18 Kris Van Hees <kris.van.hees@oracle.com> + + * include/cpp-id-data.h (UC): Was U, conflicts with U"..." literal. + * include/cpplib.h (CHAR16, CHAR32, STRING16, STRING32): New tokens. + (struct cpp_options): Added uliterals. + (cpp_interpret_string): Update prototype. + (cpp_interpret_string_notranslate): Idem. + * charset.c (init_iconv_desc): New width member in cset_converter. + (cpp_init_iconv): Add support for char{16,32}_cset_desc. + (convert_ucn): Idem. + (emit_numeric_escape): Idem. + (convert_hex): Idem. + (convert_oct): Idem. + (convert_escape): Idem. + (converter_for_type): New function. + (cpp_interpret_string): Use converter_for_type, support u and U prefix. + (cpp_interpret_string_notranslate): Match changed prototype. + (wide_str_to_charconst): Use converter_for_type. + (cpp_interpret_charconst): Add support for CPP_CHAR{16,32}. + * directives.c (linemarker_dir): Macro U changed to UC. + (parse_include): Idem. + (register_pragma_1): Idem. + (restore_registered_pragmas): Idem. + (get__Pragma_string): Support CPP_STRING{16,32}. + * expr.c (eval_token): Support CPP_CHAR{16,32}. + * init.c (struct lang_flags): Added uliterals. + (lang_defaults): Idem. + * internal.h (struct cset_converter) <width>: New field. + (struct cpp_reader) <char16_cset_desc>: Idem. + (struct cpp_reader) <char32_cset_desc>: Idem. + * lex.c (digraph_spellings): Macro U changed to UC. + (OP, TK): Idem. + (lex_string): Add support for u'...', U'...', u"..." and U"...". + (_cpp_lex_direct): Idem. + * macro.c (_cpp_builtin_macro_text): Macro U changed to UC. + (stringify_arg): Support CPP_CHAR{16,32} and CPP_STRING{16,32}. + 2008-04-18 Paolo Bonzini <bonzini@gnu.org> PR bootstrap/35457 diff --git a/libcpp/charset.c b/libcpp/charset.c index 5db8fc13430..225cdb4915e 100644 --- a/libcpp/charset.c +++ b/libcpp/charset.c @@ -642,6 +642,7 @@ init_iconv_desc (cpp_reader *pfile, const char *to, const char *from) { ret.func = convert_no_conversion; ret.cd = (iconv_t) -1; + ret.width = -1; return ret; } @@ -655,6 +656,7 @@ init_iconv_desc (cpp_reader *pfile, const char *to, const char *from) { ret.func = conversion_tab[i].func; ret.cd = conversion_tab[i].fake_cd; + ret.width = -1; return ret; } @@ -663,6 +665,7 @@ init_iconv_desc (cpp_reader *pfile, const char *to, const char *from) { ret.func = convert_using_iconv; ret.cd = iconv_open (to, from); + ret.width = -1; if (ret.cd == (iconv_t) -1) { @@ -683,6 +686,7 @@ init_iconv_desc (cpp_reader *pfile, const char *to, const char *from) from, to); ret.func = convert_no_conversion; ret.cd = (iconv_t) -1; + ret.width = -1; } return ret; } @@ -716,7 +720,17 @@ cpp_init_iconv (cpp_reader *pfile) wcset = default_wcset; pfile->narrow_cset_desc = init_iconv_desc (pfile, ncset, SOURCE_CHARSET); + pfile->narrow_cset_desc.width = CPP_OPTION (pfile, char_precision); + pfile->char16_cset_desc = init_iconv_desc (pfile, + be ? "UTF-16BE" : "UTF-16LE", + SOURCE_CHARSET); + pfile->char16_cset_desc.width = 16; + pfile->char32_cset_desc = init_iconv_desc (pfile, + be ? "UTF-32BE" : "UTF-32LE", + SOURCE_CHARSET); + pfile->char32_cset_desc.width = 32; pfile->wide_cset_desc = init_iconv_desc (pfile, wcset, SOURCE_CHARSET); + pfile->wide_cset_desc.width = CPP_OPTION (pfile, wchar_precision); } /* Destroy iconv(3) descriptors set up by cpp_init_iconv, if necessary. */ @@ -1051,15 +1065,13 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr, An advanced pointer is returned. Issues all relevant diagnostics. */ static const uchar * convert_ucn (cpp_reader *pfile, const uchar *from, const uchar *limit, - struct _cpp_strbuf *tbuf, bool wide) + struct _cpp_strbuf *tbuf, struct cset_converter cvt) { cppchar_t ucn; uchar buf[6]; uchar *bufp = buf; size_t bytesleft = 6; int rval; - struct cset_converter cvt - = wide ? pfile->wide_cset_desc : pfile->narrow_cset_desc; struct normalize_state nst = INITIAL_NORMALIZE_STATE; from++; /* Skip u/U. */ @@ -1086,14 +1098,15 @@ convert_ucn (cpp_reader *pfile, const uchar *from, const uchar *limit, function issues no diagnostics and never fails. */ static void emit_numeric_escape (cpp_reader *pfile, cppchar_t n, - struct _cpp_strbuf *tbuf, bool wide) + struct _cpp_strbuf *tbuf, struct cset_converter cvt) { - if (wide) + size_t width = cvt.width; + + if (width != CPP_OPTION (pfile, char_precision)) { /* We have to render this into the target byte order, which may not be our byte order. */ bool bigend = CPP_OPTION (pfile, bytes_big_endian); - size_t width = CPP_OPTION (pfile, wchar_precision); size_t cwidth = CPP_OPTION (pfile, char_precision); size_t cmask = width_to_mask (cwidth); size_t nbwc = width / cwidth; @@ -1136,12 +1149,11 @@ emit_numeric_escape (cpp_reader *pfile, cppchar_t n, number. You can, e.g. generate surrogate pairs this way. */ static const uchar * convert_hex (cpp_reader *pfile, const uchar *from, const uchar *limit, - struct _cpp_strbuf *tbuf, bool wide) + struct _cpp_strbuf *tbuf, struct cset_converter cvt) { cppchar_t c, n = 0, overflow = 0; int digits_found = 0; - size_t width = (wide ? CPP_OPTION (pfile, wchar_precision) - : CPP_OPTION (pfile, char_precision)); + size_t width = cvt.width; size_t mask = width_to_mask (width); if (CPP_WTRADITIONAL (pfile)) @@ -1174,7 +1186,7 @@ convert_hex (cpp_reader *pfile, const uchar *from, const uchar *limit, n &= mask; } - emit_numeric_escape (pfile, n, tbuf, wide); + emit_numeric_escape (pfile, n, tbuf, cvt); return from; } @@ -1187,12 +1199,11 @@ convert_hex (cpp_reader *pfile, const uchar *from, const uchar *limit, number. */ static const uchar * convert_oct (cpp_reader *pfile, const uchar *from, const uchar *limit, - struct _cpp_strbuf *tbuf, bool wide) + struct _cpp_strbuf *tbuf, struct cset_converter cvt) { size_t count = 0; cppchar_t c, n = 0; - size_t width = (wide ? CPP_OPTION (pfile, wchar_precision) - : CPP_OPTION (pfile, char_precision)); + size_t width = cvt.width; size_t mask = width_to_mask (width); bool overflow = false; @@ -1213,7 +1224,7 @@ convert_oct (cpp_reader *pfile, const uchar *from, const uchar *limit, n &= mask; } - emit_numeric_escape (pfile, n, tbuf, wide); + emit_numeric_escape (pfile, n, tbuf, cvt); return from; } @@ -1224,7 +1235,7 @@ convert_oct (cpp_reader *pfile, const uchar *from, const uchar *limit, pointer. Handles all relevant diagnostics. */ static const uchar * convert_escape (cpp_reader *pfile, const uchar *from, const uchar *limit, - struct _cpp_strbuf *tbuf, bool wide) + struct _cpp_strbuf *tbuf, struct cset_converter cvt) { /* Values of \a \b \e \f \n \r \t \v respectively. */ #if HOST_CHARSET == HOST_CHARSET_ASCII @@ -1236,23 +1247,21 @@ convert_escape (cpp_reader *pfile, const uchar *from, const uchar *limit, #endif uchar c; - struct cset_converter cvt - = wide ? pfile->wide_cset_desc : pfile->narrow_cset_desc; c = *from; switch (c) { /* UCNs, hex escapes, and octal escapes are processed separately. */ case 'u': case 'U': - return convert_ucn (pfile, from, limit, tbuf, wide); + return convert_ucn (pfile, from, limit, tbuf, cvt); case 'x': - return convert_hex (pfile, from, limit, tbuf, wide); + return convert_hex (pfile, from, limit, tbuf, cvt); break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': - return convert_oct (pfile, from, limit, tbuf, wide); + return convert_oct (pfile, from, limit, tbuf, cvt); /* Various letter escapes. Get the appropriate host-charset value into C. */ @@ -1312,6 +1321,27 @@ convert_escape (cpp_reader *pfile, const uchar *from, const uchar *limit, return from + 1; } +/* TYPE is a token type. The return value is the conversion needed to + convert from source to execution character set for the given type. */ +static struct cset_converter +converter_for_type (cpp_reader *pfile, enum cpp_ttype type) +{ + switch (type) + { + default: + return pfile->narrow_cset_desc; + case CPP_CHAR16: + case CPP_STRING16: + return pfile->char16_cset_desc; + case CPP_CHAR32: + case CPP_STRING32: + return pfile->char32_cset_desc; + case CPP_WCHAR: + case CPP_WSTRING: + return pfile->wide_cset_desc; + } +} + /* FROM is an array of cpp_string structures of length COUNT. These are to be converted from the source to the execution character set, escape sequences translated, and finally all are to be @@ -1320,13 +1350,12 @@ convert_escape (cpp_reader *pfile, const uchar *from, const uchar *limit, false for failure. */ bool cpp_interpret_string (cpp_reader *pfile, const cpp_string *from, size_t count, - cpp_string *to, bool wide) + cpp_string *to, enum cpp_ttype type) { struct _cpp_strbuf tbuf; const uchar *p, *base, *limit; size_t i; - struct cset_converter cvt - = wide ? pfile->wide_cset_desc : pfile->narrow_cset_desc; + struct cset_converter cvt = converter_for_type (pfile, type); tbuf.asize = MAX (OUTBUF_BLOCK_SIZE, from->len); tbuf.text = XNEWVEC (uchar, tbuf.asize); @@ -1335,7 +1364,7 @@ cpp_interpret_string (cpp_reader *pfile, const cpp_string *from, size_t count, for (i = 0; i < count; i++) { p = from[i].text; - if (*p == 'L') p++; + if (*p == 'L' || *p == 'u' || *p == 'U') p++; p++; /* Skip leading quote. */ limit = from[i].text + from[i].len - 1; /* Skip trailing quote. */ @@ -1354,12 +1383,12 @@ cpp_interpret_string (cpp_reader *pfile, const cpp_string *from, size_t count, if (p == limit) break; - p = convert_escape (pfile, p + 1, limit, &tbuf, wide); + p = convert_escape (pfile, p + 1, limit, &tbuf, cvt); } } /* NUL-terminate the 'to' buffer and translate it to a cpp_string structure. */ - emit_numeric_escape (pfile, 0, &tbuf, wide); + emit_numeric_escape (pfile, 0, &tbuf, cvt); tbuf.text = XRESIZEVEC (uchar, tbuf.text, tbuf.len); to->text = tbuf.text; to->len = tbuf.len; @@ -1375,7 +1404,8 @@ cpp_interpret_string (cpp_reader *pfile, const cpp_string *from, size_t count, in a string, but do not perform character set conversion. */ bool cpp_interpret_string_notranslate (cpp_reader *pfile, const cpp_string *from, - size_t count, cpp_string *to, bool wide) + size_t count, cpp_string *to, + enum cpp_ttype type ATTRIBUTE_UNUSED) { struct cset_converter save_narrow_cset_desc = pfile->narrow_cset_desc; bool retval; @@ -1383,7 +1413,7 @@ cpp_interpret_string_notranslate (cpp_reader *pfile, const cpp_string *from, pfile->narrow_cset_desc.func = convert_no_conversion; pfile->narrow_cset_desc.cd = (iconv_t) -1; - retval = cpp_interpret_string (pfile, from, count, to, wide); + retval = cpp_interpret_string (pfile, from, count, to, CPP_STRING); pfile->narrow_cset_desc = save_narrow_cset_desc; return retval; @@ -1462,13 +1492,14 @@ narrow_str_to_charconst (cpp_reader *pfile, cpp_string str, /* Subroutine of cpp_interpret_charconst which performs the conversion to a number, for wide strings. STR is the string structure returned by cpp_interpret_string. PCHARS_SEEN and UNSIGNEDP are as for - cpp_interpret_charconst. */ + cpp_interpret_charconst. TYPE is the token type. */ static cppchar_t wide_str_to_charconst (cpp_reader *pfile, cpp_string str, - unsigned int *pchars_seen, int *unsignedp) + unsigned int *pchars_seen, int *unsignedp, + enum cpp_ttype type) { bool bigend = CPP_OPTION (pfile, bytes_big_endian); - size_t width = CPP_OPTION (pfile, wchar_precision); + size_t width = converter_for_type (pfile, type).width; size_t cwidth = CPP_OPTION (pfile, char_precision); size_t mask = width_to_mask (width); size_t cmask = width_to_mask (cwidth); @@ -1490,7 +1521,7 @@ wide_str_to_charconst (cpp_reader *pfile, cpp_string str, /* Wide character constants have type wchar_t, and a single character exactly fills a wchar_t, so a multi-character wide character constant is guaranteed to overflow. */ - if (off > 0) + if (str.len > nbwc * 2) cpp_error (pfile, CPP_DL_WARNING, "character constant too long for its type"); @@ -1498,13 +1529,20 @@ wide_str_to_charconst (cpp_reader *pfile, cpp_string str, sign- or zero-extend to the full width of cppchar_t. */ if (width < BITS_PER_CPPCHAR_T) { - if (CPP_OPTION (pfile, unsigned_wchar) || !(result & (1 << (width - 1)))) + if (type == CPP_CHAR16 || type == CPP_CHAR32 + || CPP_OPTION (pfile, unsigned_wchar) + || !(result & (1 << (width - 1)))) result &= mask; else result |= ~mask; } - *unsignedp = CPP_OPTION (pfile, unsigned_wchar); + if (type == CPP_CHAR16 || type == CPP_CHAR32 + || CPP_OPTION (pfile, unsigned_wchar)) + *unsignedp = 1; + else + *unsignedp = 0; + *pchars_seen = 1; return result; } @@ -1518,20 +1556,21 @@ cpp_interpret_charconst (cpp_reader *pfile, const cpp_token *token, unsigned int *pchars_seen, int *unsignedp) { cpp_string str = { 0, 0 }; - bool wide = (token->type == CPP_WCHAR); + bool wide = (token->type != CPP_CHAR); cppchar_t result; - /* an empty constant will appear as L'' or '' */ + /* an empty constant will appear as L'', u'', U'' or '' */ if (token->val.str.len == (size_t) (2 + wide)) { cpp_error (pfile, CPP_DL_ERROR, "empty character constant"); return 0; } - else if (!cpp_interpret_string (pfile, &token->val.str, 1, &str, wide)) + else if (!cpp_interpret_string (pfile, &token->val.str, 1, &str, token->type)) return 0; if (wide) - result = wide_str_to_charconst (pfile, str, pchars_seen, unsignedp); + result = wide_str_to_charconst (pfile, str, pchars_seen, unsignedp, + token->type); else result = narrow_str_to_charconst (pfile, str, pchars_seen, unsignedp); diff --git a/libcpp/directives.c b/libcpp/directives.c index 0ca1117c19a..3478cd5047a 100644 --- a/libcpp/directives.c +++ b/libcpp/directives.c @@ -188,7 +188,7 @@ DIRECTIVE_TABLE did use this notation in its preprocessed output. */ static const directive linemarker_dir = { - do_linemarker, U"#", 1, KANDR, IN_I + do_linemarker, UC"#", 1, KANDR, IN_I }; #define SEEN_EOL() (pfile->cur_token[-1].type == CPP_EOF) @@ -697,7 +697,7 @@ parse_include (cpp_reader *pfile, int *pangle_brackets, const unsigned char *dir; if (pfile->directive == &dtable[T_PRAGMA]) - dir = U"pragma dependency"; + dir = UC"pragma dependency"; else dir = pfile->directive->name; cpp_error (pfile, CPP_DL_ERROR, "#%s expects \"FILENAME\" or <FILENAME>", @@ -1085,7 +1085,7 @@ register_pragma_1 (cpp_reader *pfile, const char *space, const char *name, if (space) { - node = cpp_lookup (pfile, U space, strlen (space)); + node = cpp_lookup (pfile, UC space, strlen (space)); entry = lookup_pragma_entry (*chain, node); if (!entry) { @@ -1114,7 +1114,7 @@ register_pragma_1 (cpp_reader *pfile, const char *space, const char *name, } /* Check for duplicates. */ - node = cpp_lookup (pfile, U name, strlen (name)); + node = cpp_lookup (pfile, UC name, strlen (name)); entry = lookup_pragma_entry (*chain, node); if (entry == NULL) { @@ -1262,7 +1262,7 @@ restore_registered_pragmas (cpp_reader *pfile, struct pragma_entry *pe, { if (pe->is_nspace) sd = restore_registered_pragmas (pfile, pe->u.space, sd); - pe->pragma = cpp_lookup (pfile, U *sd, strlen (*sd)); + pe->pragma = cpp_lookup (pfile, UC *sd, strlen (*sd)); free (*sd); sd++; } @@ -1491,7 +1491,8 @@ get__Pragma_string (cpp_reader *pfile) string = get_token_no_padding (pfile); if (string->type == CPP_EOF) _cpp_backup_tokens (pfile, 1); - if (string->type != CPP_STRING && string->type != CPP_WSTRING) + if (string->type != CPP_STRING && string->type != CPP_WSTRING + && string->type != CPP_STRING32 && string->type != CPP_STRING16) return NULL; paren = get_token_no_padding (pfile); diff --git a/libcpp/expr.c b/libcpp/expr.c index 9e89dd9574a..00149b2422d 100644 --- a/libcpp/expr.c +++ b/libcpp/expr.c @@ -705,6 +705,8 @@ eval_token (cpp_reader *pfile, const cpp_token *token) case CPP_WCHAR: case CPP_CHAR: + case CPP_CHAR16: + case CPP_CHAR32: { cppchar_t cc = cpp_interpret_charconst (pfile, token, &temp, &unsignedp); @@ -863,6 +865,8 @@ _cpp_parse_expr (cpp_reader *pfile) case CPP_NUMBER: case CPP_CHAR: case CPP_WCHAR: + case CPP_CHAR16: + case CPP_CHAR32: case CPP_NAME: case CPP_HASH: if (!want_value) diff --git a/libcpp/include/cpp-id-data.h b/libcpp/include/cpp-id-data.h index 2445186c228..db37c2beccc 100644 --- a/libcpp/include/cpp-id-data.h +++ b/libcpp/include/cpp-id-data.h @@ -22,7 +22,7 @@ Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ typedef unsigned char uchar; #endif -#define U (const unsigned char *) /* Intended use: U"string" */ +#define UC (const unsigned char *) /* Intended use: UC"string" */ /* Chained list of answers to an assertion. */ struct answer GTY(()) diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h index 84de0e09975..483c54331fb 100644 --- a/libcpp/include/cpplib.h +++ b/libcpp/include/cpplib.h @@ -123,10 +123,14 @@ struct _cpp_file; \ TK(CHAR, LITERAL) /* 'char' */ \ TK(WCHAR, LITERAL) /* L'char' */ \ + TK(CHAR16, LITERAL) /* u'char' */ \ + TK(CHAR32, LITERAL) /* U'char' */ \ TK(OTHER, LITERAL) /* stray punctuation */ \ \ TK(STRING, LITERAL) /* "string" */ \ TK(WSTRING, LITERAL) /* L"string" */ \ + TK(STRING16, LITERAL) /* u"string" */ \ + TK(STRING32, LITERAL) /* U"string" */ \ TK(OBJC_STRING, LITERAL) /* @"string" - Objective-C */ \ TK(HEADER_NAME, LITERAL) /* <stdio.h> in #include */ \ \ @@ -291,6 +295,9 @@ struct cpp_options /* Nonzero means to allow hexadecimal floats and LL suffixes. */ unsigned char extended_numbers; + /* Nonzero means process u/U prefix literals (UTF-16/32). */ + unsigned char uliterals; + /* Nonzero means print names of header files (-H). */ unsigned char print_include_names; @@ -712,10 +719,10 @@ extern cppchar_t cpp_interpret_charconst (cpp_reader *, const cpp_token *, /* Evaluate a vector of CPP_STRING or CPP_WSTRING tokens. */ extern bool cpp_interpret_string (cpp_reader *, const cpp_string *, size_t, - cpp_string *, bool); + cpp_string *, enum cpp_ttype); extern bool cpp_interpret_string_notranslate (cpp_reader *, const cpp_string *, size_t, - cpp_string *, bool); + cpp_string *, enum cpp_ttype); /* Convert a host character constant to the execution character set. */ extern cppchar_t cpp_host_to_exec_charset (cpp_reader *, cppchar_t); diff --git a/libcpp/init.c b/libcpp/init.c index aa0c0b10e3d..040bf2a0489 100644 --- a/libcpp/init.c +++ b/libcpp/init.c @@ -76,20 +76,21 @@ struct lang_flags char std; char cplusplus_comments; char digraphs; + char uliterals; }; static const struct lang_flags lang_defaults[] = -{ /* c99 c++ xnum xid std // digr */ - /* GNUC89 */ { 0, 0, 1, 0, 0, 1, 1 }, - /* GNUC99 */ { 1, 0, 1, 0, 0, 1, 1 }, - /* STDC89 */ { 0, 0, 0, 0, 1, 0, 0 }, - /* STDC94 */ { 0, 0, 0, 0, 1, 0, 1 }, - /* STDC99 */ { 1, 0, 1, 0, 1, 1, 1 }, - /* GNUCXX */ { 0, 1, 1, 0, 0, 1, 1 }, - /* CXX98 */ { 0, 1, 1, 0, 1, 1, 1 }, - /* GNUCXX0X */ { 1, 1, 1, 0, 0, 1, 1 }, - /* CXX0X */ { 1, 1, 1, 0, 1, 1, 1 }, - /* ASM */ { 0, 0, 1, 0, 0, 1, 0 } +{ /* c99 c++ xnum xid std // digr ulit */ + /* GNUC89 */ { 0, 0, 1, 0, 0, 1, 1, 0 }, + /* GNUC99 */ { 1, 0, 1, 0, 0, 1, 1, 1 }, + /* STDC89 */ { 0, 0, 0, 0, 1, 0, 0, 0 }, + /* STDC94 */ { 0, 0, 0, 0, 1, 0, 1, 0 }, + /* STDC99 */ { 1, 0, 1, 0, 1, 1, 1, 0 }, + /* GNUCXX */ { 0, 1, 1, 0, 0, 1, 1, 0 }, + /* CXX98 */ { 0, 1, 1, 0, 1, 1, 1, 0 }, + /* GNUCXX0X */ { 1, 1, 1, 0, 0, 1, 1, 1 }, + /* CXX0X */ { 1, 1, 1, 0, 1, 1, 1, 1 }, + /* ASM */ { 0, 0, 1, 0, 0, 1, 0, 0 } /* xid should be 1 for GNUC99, STDC99, GNUCXX, CXX98, GNUCXX0X, and CXX0X when no longer experimental (when all uses of identifiers in the compiler have been audited for correct handling of @@ -112,6 +113,7 @@ cpp_set_lang (cpp_reader *pfile, enum c_lang lang) CPP_OPTION (pfile, trigraphs) = l->std; CPP_OPTION (pfile, cplusplus_comments) = l->cplusplus_comments; CPP_OPTION (pfile, digraphs) = l->digraphs; + CPP_OPTION (pfile, uliterals) = l->uliterals; } /* Initialize library global state. */ diff --git a/libcpp/internal.h b/libcpp/internal.h index 6110e5cdb08..bf6c5f8c8d2 100644 --- a/libcpp/internal.h +++ b/libcpp/internal.h @@ -48,6 +48,7 @@ struct cset_converter { convert_f func; iconv_t cd; + int width; }; #define BITS_PER_CPPCHAR_T (CHAR_BIT * sizeof (cppchar_t)) @@ -399,6 +400,14 @@ struct cpp_reader struct cset_converter narrow_cset_desc; /* Descriptor for converting from the source character set to the + UTF-16 execution character set. */ + struct cset_converter char16_cset_desc; + + /* Descriptor for converting from the source character set to the + UTF-32 execution character set. */ + struct cset_converter char32_cset_desc; + + /* Descriptor for converting from the source character set to the wide execution character set. */ struct cset_converter wide_cset_desc; diff --git a/libcpp/lex.c b/libcpp/lex.c index 2eaf6105922..772a8701654 100644 --- a/libcpp/lex.c +++ b/libcpp/lex.c @@ -39,10 +39,10 @@ struct token_spelling }; static const unsigned char *const digraph_spellings[] = -{ U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" }; +{ UC"%:", UC"%:%:", UC"<:", UC":>", UC"<%", UC"%>" }; -#define OP(e, s) { SPELL_OPERATOR, U s }, -#define TK(e, s) { SPELL_ ## s, U #e }, +#define OP(e, s) { SPELL_OPERATOR, UC s }, +#define TK(e, s) { SPELL_ ## s, UC #e }, static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE }; #undef OP #undef TK @@ -611,8 +611,8 @@ create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base, /* Lexes a string, character constant, or angle-bracketed header file name. The stored string contains the spelling, including opening - quote and leading any leading 'L'. It returns the type of the - literal, or CPP_OTHER if it was not properly terminated. + quote and leading any leading 'L', 'u' or 'U'. It returns the type + of the literal, or CPP_OTHER if it was not properly terminated. The spelling is NUL-terminated, but it is not guaranteed that this is the first NUL since embedded NULs are preserved. */ @@ -626,12 +626,16 @@ lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base) cur = base; terminator = *cur++; - if (terminator == 'L') + if (terminator == 'L' || terminator == 'u' || terminator == 'U') terminator = *cur++; if (terminator == '\"') - type = *base == 'L' ? CPP_WSTRING: CPP_STRING; + type = (*base == 'L' ? CPP_WSTRING : + *base == 'U' ? CPP_STRING32 : + *base == 'u' ? CPP_STRING16 : CPP_STRING); else if (terminator == '\'') - type = *base == 'L' ? CPP_WCHAR: CPP_CHAR; + type = (*base == 'L' ? CPP_WCHAR : + *base == 'U' ? CPP_CHAR32 : + *base == 'u' ? CPP_CHAR16 : CPP_CHAR); else terminator = '>', type = CPP_HEADER_NAME; @@ -965,11 +969,16 @@ _cpp_lex_direct (cpp_reader *pfile) } case 'L': - /* 'L' may introduce wide characters or strings. */ - if (*buffer->cur == '\'' || *buffer->cur == '"') + case 'u': + case 'U': + /* 'L', 'u' or 'U' may introduce wide characters or strings. */ + if (c == 'L' || CPP_OPTION (pfile, uliterals)) { - lex_string (pfile, result, buffer->cur - 1); - break; + if (*buffer->cur == '\'' || *buffer->cur == '"') + { + lex_string (pfile, result, buffer->cur - 1); + break; + } } /* Fall through. */ @@ -977,12 +986,12 @@ _cpp_lex_direct (cpp_reader *pfile) case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': - case 's': case 't': case 'u': case 'v': case 'w': case 'x': + case 's': case 't': case 'v': case 'w': case 'x': case 'y': case 'z': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': - case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': + case 'S': case 'T': case 'V': case 'W': case 'X': case 'Y': case 'Z': result->type = CPP_NAME; { diff --git a/libcpp/macro.c b/libcpp/macro.c index 587b94814cc..016754bc952 100644 --- a/libcpp/macro.c +++ b/libcpp/macro.c @@ -158,7 +158,7 @@ _cpp_builtin_macro_text (cpp_reader *pfile, cpp_hashnode *node) { cpp_errno (pfile, CPP_DL_WARNING, "could not determine file timestamp"); - pbuffer->timestamp = U"\"??? ??? ?? ??:??:?? ????\""; + pbuffer->timestamp = UC"\"??? ??? ?? ??:??:?? ????\""; } } } @@ -256,8 +256,8 @@ _cpp_builtin_macro_text (cpp_reader *pfile, cpp_hashnode *node) cpp_errno (pfile, CPP_DL_WARNING, "could not determine date and time"); - pfile->date = U"\"??? ?? ????\""; - pfile->time = U"\"??:??:??\""; + pfile->date = UC"\"??? ?? ????\""; + pfile->time = UC"\"??:??:??\""; } } @@ -375,8 +375,10 @@ stringify_arg (cpp_reader *pfile, macro_arg *arg) continue; } - escape_it = (token->type == CPP_STRING || token->type == CPP_WSTRING - || token->type == CPP_CHAR || token->type == CPP_WCHAR); + escape_it = (token->type == CPP_STRING || token->type == CPP_CHAR + || token->type == CPP_WSTRING || token->type == CPP_STRING + || token->type == CPP_STRING32 || token->type == CPP_CHAR32 + || token->type == CPP_STRING16 || token->type == CPP_CHAR16); /* Room for each char being written in octal, initial space and final quote and NUL. */ |