diff options
Diffstat (limited to 'libcpp/charset.c')
-rw-r--r-- | libcpp/charset.c | 61 |
1 files changed, 59 insertions, 2 deletions
diff --git a/libcpp/charset.c b/libcpp/charset.c index 37859c52a31..6b6c360f73d 100644 --- a/libcpp/charset.c +++ b/libcpp/charset.c @@ -81,8 +81,10 @@ Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #if HOST_CHARSET == HOST_CHARSET_ASCII #define SOURCE_CHARSET "UTF-8" +#define LAST_POSSIBLY_BASIC_SOURCE_CHAR 0x7e #elif HOST_CHARSET == HOST_CHARSET_EBCDIC #define SOURCE_CHARSET "UTF-EBCDIC" +#define LAST_POSSIBLY_BASIC_SOURCE_CHAR 0xFF #else #error "Unrecognized basic host character set" #endif @@ -714,6 +716,63 @@ _cpp_destroy_iconv (cpp_reader *pfile) } } +/* Utility routine for use by a full compiler. C is a character taken + from the *basic* source character set, encoded in the host's + execution encoding. Convert it to (the target's) execution + encoding, and return that value. + + Issues an internal error if C's representation in the narrow + execution character set fails to be a single-byte value (C99 + 5.2.1p3: "The representation of each member of the source and + execution character sets shall fit in a byte.") May also issue an + internal error if C fails to be a member of the basic source + character set (testing this exactly is too hard, especially when + the host character set is EBCDIC). */ +cppchar_t +cpp_host_to_exec_charset (cpp_reader *pfile, cppchar_t c) +{ + uchar sbuf[1]; + struct _cpp_strbuf tbuf; + + /* This test is merely an approximation, but it suffices to catch + the most important thing, which is that we don't get handed a + character outside the unibyte range of the host character set. */ + if (c > LAST_POSSIBLY_BASIC_SOURCE_CHAR) + { + cpp_error (pfile, CPP_DL_ICE, + "character 0x%lx is not in the basic source character set\n", + (unsigned long)c); + return 0; + } + + /* Being a character in the unibyte range of the host character set, + we can safely splat it into a one-byte buffer and trust that that + is a well-formed string. */ + sbuf[0] = c; + + /* This should never need to reallocate, but just in case... */ + tbuf.asize = 1; + tbuf.text = xmalloc (tbuf.asize); + tbuf.len = 0; + + if (!APPLY_CONVERSION (pfile->narrow_cset_desc, sbuf, 1, &tbuf)) + { + cpp_errno (pfile, CPP_DL_ICE, "converting to execution character set"); + return 0; + } + if (tbuf.len != 1) + { + cpp_error (pfile, CPP_DL_ICE, + "character 0x%lx is not unibyte in execution character set", + (unsigned long)c); + return 0; + } + c = tbuf.text[0]; + free(tbuf.text); + return c; +} + + /* Utility routine that computes a mask of the form 0000...111... with WIDTH 1-bits. */ @@ -727,8 +786,6 @@ width_to_mask (size_t width) return ((size_t) 1 << width) - 1; } - - /* Returns 1 if C is valid in an identifier, 2 if C is valid except at the start of an identifier, and 0 if C is not valid in an identifier. We assume C has already gone through the checks of |