diff options
author | Joseph Myers <joseph@codesourcery.com> | 2009-05-04 13:23:50 +0100 |
---|---|---|
committer | Joseph Myers <jsm28@gcc.gnu.org> | 2009-05-04 13:23:50 +0100 |
commit | a3af5087d9b0e120764cb3852da73149be17dfac (patch) | |
tree | f7e08408fe3883d28eb96bdfd6745551e07b82a0 /gcc/pretty-print.c | |
parent | ea5cd5f17f80c6f6d6cf8b2a0868675e7c8ea391 (diff) | |
download | gcc-a3af5087d9b0e120764cb3852da73149be17dfac.tar.gz |
intl.c (locale_encoding, [...]): New.
* intl.c (locale_encoding, locale_utf8): New.
(gcc_init_libintl): Initialize locale_encoding and locale_utf8.
* intl.h (locale_encoding, locale_utf8): Declare.
* pretty-print.c: Include ggc.h. Include iconv.h if HAVE_ICONV.
(pp_base_tree_identifier, decode_utf8_char, identifier_to_locale):
New.
* pretty-print.h (pp_identifier): Call identifier_to_locale on ID
argument.
(pp_tree_identifier): Define to call pp_base_tree_identifier.
(pp_base_tree_identifier): Declare as function.
(identifier_to_locale): Declare.
* Makefile.in (pretty-print.o): Update dependencies.
* varasm.c (finish_aliases_1): Use %qE for identifiers in
diagnostics.
testsuite:
* gcc.dg/attr-alias-5.c, gcc.dg/ucnid-7.c: New tests.
From-SVN: r147096
Diffstat (limited to 'gcc/pretty-print.c')
-rw-r--r-- | gcc/pretty-print.c | 230 |
1 files changed, 230 insertions, 0 deletions
diff --git a/gcc/pretty-print.c b/gcc/pretty-print.c index b611ec98516..d531075c933 100644 --- a/gcc/pretty-print.c +++ b/gcc/pretty-print.c @@ -26,6 +26,11 @@ along with GCC; see the file COPYING3. If not see #include "intl.h" #include "pretty-print.h" #include "tree.h" +#include "ggc.h" + +#if HAVE_ICONV +#include <iconv.h> +#endif #define obstack_chunk_alloc xmalloc #define obstack_chunk_free free @@ -844,3 +849,228 @@ pp_base_maybe_space (pretty_printer *pp) pp_base (pp)->padding = pp_none; } } + +/* Print the identifier ID to PRETTY-PRINTER. */ + +void +pp_base_tree_identifier (pretty_printer *pp, tree id) +{ + const char *text = identifier_to_locale (IDENTIFIER_POINTER (id)); + pp_append_text (pp, text, text + strlen (text)); +} + +/* The string starting at P has LEN (at least 1) bytes left; if they + start with a valid UTF-8 sequence, return the length of that + sequence and set *VALUE to the value of that sequence, and + otherwise return 0 and set *VALUE to (unsigned int) -1. */ + +static int +decode_utf8_char (const unsigned char *p, size_t len, unsigned int *value) +{ + unsigned int t = *p; + + if (len == 0) + abort (); + if (t & 0x80) + { + size_t utf8_len = 0; + unsigned int ch; + size_t i; + for (t = *p; t & 0x80; t <<= 1) + utf8_len++; + + if (utf8_len > len || utf8_len < 2 || utf8_len > 6) + { + *value = (unsigned int) -1; + return 0; + } + ch = *p & ((1 << (7 - utf8_len)) - 1); + for (i = 1; i < utf8_len; i++) + { + unsigned int u = p[i]; + if ((u & 0xC0) != 0x80) + { + *value = (unsigned int) -1; + return 0; + } + ch = (ch << 6) | (u & 0x3F); + } + if ( (ch <= 0x7F && utf8_len > 1) + || (ch <= 0x7FF && utf8_len > 2) + || (ch <= 0xFFFF && utf8_len > 3) + || (ch <= 0x1FFFFF && utf8_len > 4) + || (ch <= 0x3FFFFFF && utf8_len > 5) + || (ch >= 0xD800 && ch <= 0xDFFF)) + { + *value = (unsigned int) -1; + return 0; + } + *value = ch; + return utf8_len; + } + else + { + *value = t; + return 1; + } +} + +/* Given IDENT, an identifier in the internal encoding, return a + version of IDENT suitable for diagnostics in the locale character + set: either IDENT itself, or a garbage-collected string converted + to the locale character set and using escape sequences if not + representable in the locale character set or containing control + characters or invalid byte sequences. Existing backslashes in + IDENT are not doubled, so the result may not uniquely specify the + contents of an arbitrary byte sequence identifier. */ + +const char * +identifier_to_locale (const char *ident) +{ + const unsigned char *uid = (const unsigned char *) ident; + size_t idlen = strlen (ident); + bool valid_printable_utf8 = true; + bool all_ascii = true; + size_t i; + + for (i = 0; i < idlen;) + { + unsigned int c; + size_t utf8_len = decode_utf8_char (&uid[i], idlen - i, &c); + if (utf8_len == 0 || c <= 0x1F || (c >= 0x7F && c <= 0x9F)) + { + valid_printable_utf8 = false; + break; + } + if (utf8_len > 1) + all_ascii = false; + i += utf8_len; + } + + /* If IDENT contains invalid UTF-8 sequences (which may occur with + attributes putting arbitrary byte sequences in identifiers), or + control characters, we use octal escape sequences for all bytes + outside printable ASCII. */ + if (!valid_printable_utf8) + { + char *ret = GGC_NEWVEC (char, 4 * idlen + 1); + char *p = ret; + for (i = 0; i < idlen; i++) + { + if (uid[i] > 0x1F && uid[i] < 0x7F) + *p++ = uid[i]; + else + { + sprintf (p, "\\%03o", uid[i]); + p += 4; + } + } + *p = 0; + return ret; + } + + /* Otherwise, if it is valid printable ASCII, or printable UTF-8 + with the locale character set being UTF-8, IDENT is used. */ + if (all_ascii || locale_utf8) + return ident; + + /* Otherwise IDENT is converted to the locale character set if + possible. */ +#if defined ENABLE_NLS && defined HAVE_LANGINFO_CODESET && HAVE_ICONV + if (locale_encoding != NULL) + { + iconv_t cd = iconv_open (locale_encoding, "UTF-8"); + bool conversion_ok = true; + char *ret = NULL; + if (cd != (iconv_t) -1) + { + size_t ret_alloc = 4 * idlen + 1; + for (;;) + { + /* Repeat the whole conversion process as needed with + larger buffers so non-reversible transformations can + always be detected. */ + ICONV_CONST char *inbuf = CONST_CAST (char *, ident); + char *outbuf; + size_t inbytesleft = idlen; + size_t outbytesleft = ret_alloc - 1; + size_t iconv_ret; + + ret = GGC_NEWVEC (char, ret_alloc); + outbuf = ret; + + if (iconv (cd, 0, 0, 0, 0) == (size_t) -1) + { + conversion_ok = false; + break; + } + + iconv_ret = iconv (cd, &inbuf, &inbytesleft, + &outbuf, &outbytesleft); + if (iconv_ret == (size_t) -1 || inbytesleft != 0) + { + if (errno == E2BIG) + { + ret_alloc *= 2; + ggc_free (ret); + ret = NULL; + continue; + } + else + { + conversion_ok = false; + break; + } + } + else if (iconv_ret != 0) + { + conversion_ok = false; + break; + } + /* Return to initial shift state. */ + if (iconv (cd, 0, 0, &outbuf, &outbytesleft) == (size_t) -1) + { + if (errno == E2BIG) + { + ret_alloc *= 2; + ggc_free (ret); + ret = NULL; + continue; + } + else + { + conversion_ok = false; + break; + } + } + *outbuf = 0; + break; + } + iconv_close (cd); + if (conversion_ok) + return ret; + } + } +#endif + + /* Otherwise, convert non-ASCII characters in IDENT to UCNs. */ + { + char *ret = GGC_NEWVEC (char, 10 * idlen + 1); + char *p = ret; + for (i = 0; i < idlen;) + { + unsigned int c; + size_t utf8_len = decode_utf8_char (&uid[i], idlen - i, &c); + if (utf8_len == 1) + *p++ = uid[i]; + else + { + sprintf (p, "\\U%08x", c); + p += 10; + } + i += utf8_len; + } + *p = 0; + return ret; + } +} |