From 924bbf0237ffce7bd66e35cba2afbbe96bea0202 Mon Sep 17 00:00:00 2001 From: jason Date: Fri, 18 Apr 2008 13:58:08 +0000 Subject: libcpp/ChangeLog: 2008-04-14 Kris Van Hees * include/cpp-id-data.h (UC): Was U, conflicts with U... literal. * include/cpplib.h (CHAR16, CHAR32, STRING16, STRING32): New tokens. (struct cpp_options): Added uliterals. (cpp_interpret_string): Update prototype. (cpp_interpret_string_notranslate): Idem. * charset.c (init_iconv_desc): New width member in cset_converter. (cpp_init_iconv): Add support for char{16,32}_cset_desc. (convert_ucn): Idem. (emit_numeric_escape): Idem. (convert_hex): Idem. (convert_oct): Idem. (convert_escape): Idem. (converter_for_type): New function. (cpp_interpret_string): Use converter_for_type, support u and U prefix. (cpp_interpret_string_notranslate): Match changed prototype. (wide_str_to_charconst): Use converter_for_type. (cpp_interpret_charconst): Add support for CPP_CHAR{16,32}. * directives.c (linemarker_dir): Macro U changed to UC. (parse_include): Idem. (register_pragma_1): Idem. (restore_registered_pragmas): Idem. (get__Pragma_string): Support CPP_STRING{16,32}. * expr.c (eval_token): Support CPP_CHAR{16,32}. * init.c (struct lang_flags): Added uliterals. (lang_defaults): Idem. * internal.h (struct cset_converter) : New field. (struct cpp_reader) : Idem. (struct cpp_reader) : Idem. * lex.c (digraph_spellings): Macro U changed to UC. (OP, TK): Idem. (lex_string): Add support for u'...', U'...', u... and U.... (_cpp_lex_direct): Idem. * macro.c (_cpp_builtin_macro_text): Macro U changed to UC. (stringify_arg): Support CPP_CHAR{16,32} and CPP_STRING{16,32}. gcc/ChangeLog: 2008-04-14 Kris Van Hees * c-common.c (CHAR16_TYPE, CHAR32_TYPE): New macros. (fname_as_string): Match updated cpp_interpret_string prototype. (fix_string_type): Support char16_t* and char32_t*. (c_common_nodes_and_builtins): Add char16_t and char32_t (and derivative) nodes. Register as builtin if C++0x. (c_parse_error): Support CPP_CHAR{16,32}. * c-common.h (RID_CHAR16, RID_CHAR32): New elements. (enum c_tree_index) : New elements. (char16_type_node, signed_char16_type_node, unsigned_char16_type_node, char32_type_node, signed_char32_type_node, char16_array_type_node, char32_array_type_node): New defines. * c-lex.c (cb_ident): Match updated cpp_interpret_string prototype. (c_lex_with_flags): Support CPP_CHAR{16,32} and CPP_STRING{16,32}. (lex_string): Support CPP_STRING{16,32}, match updated cpp_interpret_string and cpp_interpret_string_notranslate prototypes. (lex_charconst): Support CPP_CHAR{16,32}. * c-parser.c (c_parser_postfix_expression): Support CPP_CHAR{16,32} and CPP_STRING{16,32}. gcc/cp/ChangeLog: 2008-04-14 Kris Van Hees * cvt.c (type_promotes_to): Support char16_t and char32_t. * decl.c (grokdeclarator): Disallow signed/unsigned/short/long on char16_t and char32_t. * lex.c (reswords): Add char16_t and char32_t (for c++0x). * mangle.c (write_builtin_type): Mangle char16_t/char32_t as vendor extended builtin type u8char32_t. * parser.c (cp_lexer_next_token_is_decl_specifier_keyword): Support RID_CHAR{16,32}. (cp_lexer_print_token): Support CPP_STRING{16,32}. (cp_parser_is_string_literal): Idem. (cp_parser_string_literal): Idem. (cp_parser_primary_expression): Support CPP_CHAR{16,32} and CPP_STRING{16,32}. (cp_parser_simple_type_specifier): Support RID_CHAR{16,32}. * tree.c (char_type_p): Support char16_t and char32_t as char types. * typeck.c (string_conv_p): Support char16_t and char32_t. gcc/testsuite/ChangeLog: 2008-04-14 Kris Van Hees Tests for char16_t and char32_t support. * g++.dg/ext/utf-cvt.C: New * g++.dg/ext/utf-cxx0x.C: New * g++.dg/ext/utf-cxx98.C: New * g++.dg/ext/utf-dflt.C: New * g++.dg/ext/utf-gnuxx0x.C: New * g++.dg/ext/utf-gnuxx98.C: New * g++.dg/ext/utf-mangle.C: New * g++.dg/ext/utf-typedef-cxx0x.C: New * g++.dg/ext/utf-typedef- * g++.dg/ext/utf-typespec.C: New * g++.dg/ext/utf16-1.C: New * g++.dg/ext/utf16-2.C: New * g++.dg/ext/utf16-3.C: New * g++.dg/ext/utf16-4.C: New * g++.dg/ext/utf32-1.C: New * g++.dg/ext/utf32-2.C: New * g++.dg/ext/utf32-3.C: New * g++.dg/ext/utf32-4.C: New * gcc.dg/utf-cvt.c: New * gcc.dg/utf-dflt.c: New * gcc.dg/utf16-1.c: New * gcc.dg/utf16-2.c: New * gcc.dg/utf16-3.c: New * gcc.dg/utf16-4.c: New * gcc.dg/utf32-1.c: New * gcc.dg/utf32-2.c: New * gcc.dg/utf32-3.c: New * gcc.dg/utf32-4.c: New libiberty/ChangeLog: 2008-04-14 Kris Van Hees * testsuite/demangle-expected: Added tests for char16_t and char32_t. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@134438 138bc75d-0d04-0410-961f-82ee72b054a4 --- libcpp/charset.c | 115 +++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 77 insertions(+), 38 deletions(-) (limited to 'libcpp/charset.c') diff --git a/libcpp/charset.c b/libcpp/charset.c index 5db8fc13430..225cdb4915e 100644 --- a/libcpp/charset.c +++ b/libcpp/charset.c @@ -642,6 +642,7 @@ init_iconv_desc (cpp_reader *pfile, const char *to, const char *from) { ret.func = convert_no_conversion; ret.cd = (iconv_t) -1; + ret.width = -1; return ret; } @@ -655,6 +656,7 @@ init_iconv_desc (cpp_reader *pfile, const char *to, const char *from) { ret.func = conversion_tab[i].func; ret.cd = conversion_tab[i].fake_cd; + ret.width = -1; return ret; } @@ -663,6 +665,7 @@ init_iconv_desc (cpp_reader *pfile, const char *to, const char *from) { ret.func = convert_using_iconv; ret.cd = iconv_open (to, from); + ret.width = -1; if (ret.cd == (iconv_t) -1) { @@ -683,6 +686,7 @@ init_iconv_desc (cpp_reader *pfile, const char *to, const char *from) from, to); ret.func = convert_no_conversion; ret.cd = (iconv_t) -1; + ret.width = -1; } return ret; } @@ -716,7 +720,17 @@ cpp_init_iconv (cpp_reader *pfile) wcset = default_wcset; pfile->narrow_cset_desc = init_iconv_desc (pfile, ncset, SOURCE_CHARSET); + pfile->narrow_cset_desc.width = CPP_OPTION (pfile, char_precision); + pfile->char16_cset_desc = init_iconv_desc (pfile, + be ? "UTF-16BE" : "UTF-16LE", + SOURCE_CHARSET); + pfile->char16_cset_desc.width = 16; + pfile->char32_cset_desc = init_iconv_desc (pfile, + be ? "UTF-32BE" : "UTF-32LE", + SOURCE_CHARSET); + pfile->char32_cset_desc.width = 32; pfile->wide_cset_desc = init_iconv_desc (pfile, wcset, SOURCE_CHARSET); + pfile->wide_cset_desc.width = CPP_OPTION (pfile, wchar_precision); } /* Destroy iconv(3) descriptors set up by cpp_init_iconv, if necessary. */ @@ -1051,15 +1065,13 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr, An advanced pointer is returned. Issues all relevant diagnostics. */ static const uchar * convert_ucn (cpp_reader *pfile, const uchar *from, const uchar *limit, - struct _cpp_strbuf *tbuf, bool wide) + struct _cpp_strbuf *tbuf, struct cset_converter cvt) { cppchar_t ucn; uchar buf[6]; uchar *bufp = buf; size_t bytesleft = 6; int rval; - struct cset_converter cvt - = wide ? pfile->wide_cset_desc : pfile->narrow_cset_desc; struct normalize_state nst = INITIAL_NORMALIZE_STATE; from++; /* Skip u/U. */ @@ -1086,14 +1098,15 @@ convert_ucn (cpp_reader *pfile, const uchar *from, const uchar *limit, function issues no diagnostics and never fails. */ static void emit_numeric_escape (cpp_reader *pfile, cppchar_t n, - struct _cpp_strbuf *tbuf, bool wide) + struct _cpp_strbuf *tbuf, struct cset_converter cvt) { - if (wide) + size_t width = cvt.width; + + if (width != CPP_OPTION (pfile, char_precision)) { /* We have to render this into the target byte order, which may not be our byte order. */ bool bigend = CPP_OPTION (pfile, bytes_big_endian); - size_t width = CPP_OPTION (pfile, wchar_precision); size_t cwidth = CPP_OPTION (pfile, char_precision); size_t cmask = width_to_mask (cwidth); size_t nbwc = width / cwidth; @@ -1136,12 +1149,11 @@ emit_numeric_escape (cpp_reader *pfile, cppchar_t n, number. You can, e.g. generate surrogate pairs this way. */ static const uchar * convert_hex (cpp_reader *pfile, const uchar *from, const uchar *limit, - struct _cpp_strbuf *tbuf, bool wide) + struct _cpp_strbuf *tbuf, struct cset_converter cvt) { cppchar_t c, n = 0, overflow = 0; int digits_found = 0; - size_t width = (wide ? CPP_OPTION (pfile, wchar_precision) - : CPP_OPTION (pfile, char_precision)); + size_t width = cvt.width; size_t mask = width_to_mask (width); if (CPP_WTRADITIONAL (pfile)) @@ -1174,7 +1186,7 @@ convert_hex (cpp_reader *pfile, const uchar *from, const uchar *limit, n &= mask; } - emit_numeric_escape (pfile, n, tbuf, wide); + emit_numeric_escape (pfile, n, tbuf, cvt); return from; } @@ -1187,12 +1199,11 @@ convert_hex (cpp_reader *pfile, const uchar *from, const uchar *limit, number. */ static const uchar * convert_oct (cpp_reader *pfile, const uchar *from, const uchar *limit, - struct _cpp_strbuf *tbuf, bool wide) + struct _cpp_strbuf *tbuf, struct cset_converter cvt) { size_t count = 0; cppchar_t c, n = 0; - size_t width = (wide ? CPP_OPTION (pfile, wchar_precision) - : CPP_OPTION (pfile, char_precision)); + size_t width = cvt.width; size_t mask = width_to_mask (width); bool overflow = false; @@ -1213,7 +1224,7 @@ convert_oct (cpp_reader *pfile, const uchar *from, const uchar *limit, n &= mask; } - emit_numeric_escape (pfile, n, tbuf, wide); + emit_numeric_escape (pfile, n, tbuf, cvt); return from; } @@ -1224,7 +1235,7 @@ convert_oct (cpp_reader *pfile, const uchar *from, const uchar *limit, pointer. Handles all relevant diagnostics. */ static const uchar * convert_escape (cpp_reader *pfile, const uchar *from, const uchar *limit, - struct _cpp_strbuf *tbuf, bool wide) + struct _cpp_strbuf *tbuf, struct cset_converter cvt) { /* Values of \a \b \e \f \n \r \t \v respectively. */ #if HOST_CHARSET == HOST_CHARSET_ASCII @@ -1236,23 +1247,21 @@ convert_escape (cpp_reader *pfile, const uchar *from, const uchar *limit, #endif uchar c; - struct cset_converter cvt - = wide ? pfile->wide_cset_desc : pfile->narrow_cset_desc; c = *from; switch (c) { /* UCNs, hex escapes, and octal escapes are processed separately. */ case 'u': case 'U': - return convert_ucn (pfile, from, limit, tbuf, wide); + return convert_ucn (pfile, from, limit, tbuf, cvt); case 'x': - return convert_hex (pfile, from, limit, tbuf, wide); + return convert_hex (pfile, from, limit, tbuf, cvt); break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': - return convert_oct (pfile, from, limit, tbuf, wide); + return convert_oct (pfile, from, limit, tbuf, cvt); /* Various letter escapes. Get the appropriate host-charset value into C. */ @@ -1312,6 +1321,27 @@ convert_escape (cpp_reader *pfile, const uchar *from, const uchar *limit, return from + 1; } +/* TYPE is a token type. The return value is the conversion needed to + convert from source to execution character set for the given type. */ +static struct cset_converter +converter_for_type (cpp_reader *pfile, enum cpp_ttype type) +{ + switch (type) + { + default: + return pfile->narrow_cset_desc; + case CPP_CHAR16: + case CPP_STRING16: + return pfile->char16_cset_desc; + case CPP_CHAR32: + case CPP_STRING32: + return pfile->char32_cset_desc; + case CPP_WCHAR: + case CPP_WSTRING: + return pfile->wide_cset_desc; + } +} + /* FROM is an array of cpp_string structures of length COUNT. These are to be converted from the source to the execution character set, escape sequences translated, and finally all are to be @@ -1320,13 +1350,12 @@ convert_escape (cpp_reader *pfile, const uchar *from, const uchar *limit, false for failure. */ bool cpp_interpret_string (cpp_reader *pfile, const cpp_string *from, size_t count, - cpp_string *to, bool wide) + cpp_string *to, enum cpp_ttype type) { struct _cpp_strbuf tbuf; const uchar *p, *base, *limit; size_t i; - struct cset_converter cvt - = wide ? pfile->wide_cset_desc : pfile->narrow_cset_desc; + struct cset_converter cvt = converter_for_type (pfile, type); tbuf.asize = MAX (OUTBUF_BLOCK_SIZE, from->len); tbuf.text = XNEWVEC (uchar, tbuf.asize); @@ -1335,7 +1364,7 @@ cpp_interpret_string (cpp_reader *pfile, const cpp_string *from, size_t count, for (i = 0; i < count; i++) { p = from[i].text; - if (*p == 'L') p++; + if (*p == 'L' || *p == 'u' || *p == 'U') p++; p++; /* Skip leading quote. */ limit = from[i].text + from[i].len - 1; /* Skip trailing quote. */ @@ -1354,12 +1383,12 @@ cpp_interpret_string (cpp_reader *pfile, const cpp_string *from, size_t count, if (p == limit) break; - p = convert_escape (pfile, p + 1, limit, &tbuf, wide); + p = convert_escape (pfile, p + 1, limit, &tbuf, cvt); } } /* NUL-terminate the 'to' buffer and translate it to a cpp_string structure. */ - emit_numeric_escape (pfile, 0, &tbuf, wide); + emit_numeric_escape (pfile, 0, &tbuf, cvt); tbuf.text = XRESIZEVEC (uchar, tbuf.text, tbuf.len); to->text = tbuf.text; to->len = tbuf.len; @@ -1375,7 +1404,8 @@ cpp_interpret_string (cpp_reader *pfile, const cpp_string *from, size_t count, in a string, but do not perform character set conversion. */ bool cpp_interpret_string_notranslate (cpp_reader *pfile, const cpp_string *from, - size_t count, cpp_string *to, bool wide) + size_t count, cpp_string *to, + enum cpp_ttype type ATTRIBUTE_UNUSED) { struct cset_converter save_narrow_cset_desc = pfile->narrow_cset_desc; bool retval; @@ -1383,7 +1413,7 @@ cpp_interpret_string_notranslate (cpp_reader *pfile, const cpp_string *from, pfile->narrow_cset_desc.func = convert_no_conversion; pfile->narrow_cset_desc.cd = (iconv_t) -1; - retval = cpp_interpret_string (pfile, from, count, to, wide); + retval = cpp_interpret_string (pfile, from, count, to, CPP_STRING); pfile->narrow_cset_desc = save_narrow_cset_desc; return retval; @@ -1462,13 +1492,14 @@ narrow_str_to_charconst (cpp_reader *pfile, cpp_string str, /* Subroutine of cpp_interpret_charconst which performs the conversion to a number, for wide strings. STR is the string structure returned by cpp_interpret_string. PCHARS_SEEN and UNSIGNEDP are as for - cpp_interpret_charconst. */ + cpp_interpret_charconst. TYPE is the token type. */ static cppchar_t wide_str_to_charconst (cpp_reader *pfile, cpp_string str, - unsigned int *pchars_seen, int *unsignedp) + unsigned int *pchars_seen, int *unsignedp, + enum cpp_ttype type) { bool bigend = CPP_OPTION (pfile, bytes_big_endian); - size_t width = CPP_OPTION (pfile, wchar_precision); + size_t width = converter_for_type (pfile, type).width; size_t cwidth = CPP_OPTION (pfile, char_precision); size_t mask = width_to_mask (width); size_t cmask = width_to_mask (cwidth); @@ -1490,7 +1521,7 @@ wide_str_to_charconst (cpp_reader *pfile, cpp_string str, /* Wide character constants have type wchar_t, and a single character exactly fills a wchar_t, so a multi-character wide character constant is guaranteed to overflow. */ - if (off > 0) + if (str.len > nbwc * 2) cpp_error (pfile, CPP_DL_WARNING, "character constant too long for its type"); @@ -1498,13 +1529,20 @@ wide_str_to_charconst (cpp_reader *pfile, cpp_string str, sign- or zero-extend to the full width of cppchar_t. */ if (width < BITS_PER_CPPCHAR_T) { - if (CPP_OPTION (pfile, unsigned_wchar) || !(result & (1 << (width - 1)))) + if (type == CPP_CHAR16 || type == CPP_CHAR32 + || CPP_OPTION (pfile, unsigned_wchar) + || !(result & (1 << (width - 1)))) result &= mask; else result |= ~mask; } - *unsignedp = CPP_OPTION (pfile, unsigned_wchar); + if (type == CPP_CHAR16 || type == CPP_CHAR32 + || CPP_OPTION (pfile, unsigned_wchar)) + *unsignedp = 1; + else + *unsignedp = 0; + *pchars_seen = 1; return result; } @@ -1518,20 +1556,21 @@ cpp_interpret_charconst (cpp_reader *pfile, const cpp_token *token, unsigned int *pchars_seen, int *unsignedp) { cpp_string str = { 0, 0 }; - bool wide = (token->type == CPP_WCHAR); + bool wide = (token->type != CPP_CHAR); cppchar_t result; - /* an empty constant will appear as L'' or '' */ + /* an empty constant will appear as L'', u'', U'' or '' */ if (token->val.str.len == (size_t) (2 + wide)) { cpp_error (pfile, CPP_DL_ERROR, "empty character constant"); return 0; } - else if (!cpp_interpret_string (pfile, &token->val.str, 1, &str, wide)) + else if (!cpp_interpret_string (pfile, &token->val.str, 1, &str, token->type)) return 0; if (wide) - result = wide_str_to_charconst (pfile, str, pchars_seen, unsignedp); + result = wide_str_to_charconst (pfile, str, pchars_seen, unsignedp, + token->type); else result = narrow_str_to_charconst (pfile, str, pchars_seen, unsignedp); -- cgit v1.2.1