10 files changed, 187 insertions, 77 deletions
diff --git a/libcpp/ChangeLog b/libcpp/ChangeLog
index 0bd2aad74c5..9eef6efb3e9 100644
--- a/libcpp/ChangeLog
+++ b/libcpp/ChangeLog
@@ -1,3 +1,40 @@
+2008-04-18  Kris Van Hees <kris.van.hees@oracle.com>
+
+	* include/cpp-id-data.h (UC): Was U, conflicts with U"..." literal.
+	* include/cpplib.h (CHAR16, CHAR32, STRING16, STRING32): New tokens.
+	(struct cpp_options): Added uliterals.
+	(cpp_interpret_string): Update prototype.
+	(cpp_interpret_string_notranslate): Idem.
+	* charset.c (init_iconv_desc): New width member in cset_converter.
+	(cpp_init_iconv): Add support for char{16,32}_cset_desc.
+	(convert_ucn): Idem.
+	(emit_numeric_escape): Idem.
+	(convert_hex): Idem.
+	(convert_oct): Idem.
+	(convert_escape): Idem.
+	(converter_for_type): New function.
+	(cpp_interpret_string): Use converter_for_type, support u and U prefix.
+	(cpp_interpret_string_notranslate): Match changed prototype.
+	(wide_str_to_charconst): Use converter_for_type.
+	(cpp_interpret_charconst): Add support for CPP_CHAR{16,32}.
+	* directives.c (linemarker_dir): Macro U changed to UC.
+	(parse_include): Idem.
+	(register_pragma_1): Idem.
+	(restore_registered_pragmas): Idem.
+	(get__Pragma_string): Support CPP_STRING{16,32}.
+	* expr.c (eval_token): Support CPP_CHAR{16,32}.
+	* init.c (struct lang_flags): Added uliterals.
+	(lang_defaults): Idem.
+	* internal.h (struct cset_converter) <width>: New field.
+	(struct cpp_reader) <char16_cset_desc>: Idem.
+	(struct cpp_reader) <char32_cset_desc>: Idem.
+	* lex.c (digraph_spellings): Macro U changed to UC.
+	(OP, TK): Idem.
+	(lex_string): Add support for u'...', U'...', u"..." and U"...".
+	(_cpp_lex_direct): Idem.
+	* macro.c (_cpp_builtin_macro_text): Macro U changed to UC.
+	(stringify_arg): Support CPP_CHAR{16,32} and CPP_STRING{16,32}.
+
 2008-04-18  Paolo Bonzini  <bonzini@gnu.org>
 
 	PR bootstrap/35457
diff --git a/libcpp/charset.c b/libcpp/charset.c
index 5db8fc13430..225cdb4915e 100644
--- a/libcpp/charset.c
+++ b/libcpp/charset.c
@@ -642,6 +642,7 @@ init_iconv_desc (cpp_reader *pfile, const char *to, const char *from)
     {
       ret.func = convert_no_conversion;
       ret.cd = (iconv_t) -1;
+      ret.width = -1;
       return ret;
     }
 
@@ -655,6 +656,7 @@ init_iconv_desc (cpp_reader *pfile, const char *to, const char *from)
       {
 	ret.func = conversion_tab[i].func;
 	ret.cd = conversion_tab[i].fake_cd;
+	ret.width = -1;
 	return ret;
       }
 
@@ -663,6 +665,7 @@ init_iconv_desc (cpp_reader *pfile, const char *to, const char *from)
     {
       ret.func = convert_using_iconv;
       ret.cd = iconv_open (to, from);
+      ret.width = -1;
 
       if (ret.cd == (iconv_t) -1)
 	{
@@ -683,6 +686,7 @@ init_iconv_desc (cpp_reader *pfile, const char *to, const char *from)
 		 from, to);
       ret.func = convert_no_conversion;
       ret.cd = (iconv_t) -1;
+      ret.width = -1;
     }
   return ret;
 }
@@ -716,7 +720,17 @@ cpp_init_iconv (cpp_reader *pfile)
     wcset = default_wcset;
 
   pfile->narrow_cset_desc = init_iconv_desc (pfile, ncset, SOURCE_CHARSET);
+  pfile->narrow_cset_desc.width = CPP_OPTION (pfile, char_precision);
+  pfile->char16_cset_desc = init_iconv_desc (pfile,
+					     be ? "UTF-16BE" : "UTF-16LE",
+					     SOURCE_CHARSET);
+  pfile->char16_cset_desc.width = 16;
+  pfile->char32_cset_desc = init_iconv_desc (pfile,
+					     be ? "UTF-32BE" : "UTF-32LE",
+					     SOURCE_CHARSET);
+  pfile->char32_cset_desc.width = 32;
   pfile->wide_cset_desc = init_iconv_desc (pfile, wcset, SOURCE_CHARSET);
+  pfile->wide_cset_desc.width = CPP_OPTION (pfile, wchar_precision);
 }
 
 /* Destroy iconv(3) descriptors set up by cpp_init_iconv, if necessary.  */
@@ -1051,15 +1065,13 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr,
    An advanced pointer is returned.  Issues all relevant diagnostics.  */
 static const uchar *
 convert_ucn (cpp_reader *pfile, const uchar *from, const uchar *limit,
-	     struct _cpp_strbuf *tbuf, bool wide)
+	     struct _cpp_strbuf *tbuf, struct cset_converter cvt)
 {
   cppchar_t ucn;
   uchar buf[6];
   uchar *bufp = buf;
   size_t bytesleft = 6;
   int rval;
-  struct cset_converter cvt
-    = wide ? pfile->wide_cset_desc : pfile->narrow_cset_desc;
   struct normalize_state nst = INITIAL_NORMALIZE_STATE;
 
   from++;  /* Skip u/U.  */
@@ -1086,14 +1098,15 @@ convert_ucn (cpp_reader *pfile, const uchar *from, const uchar *limit,
    function issues no diagnostics and never fails.  */
 static void
 emit_numeric_escape (cpp_reader *pfile, cppchar_t n,
-		     struct _cpp_strbuf *tbuf, bool wide)
+		     struct _cpp_strbuf *tbuf, struct cset_converter cvt)
 {
-  if (wide)
+  size_t width = cvt.width;
+
+  if (width != CPP_OPTION (pfile, char_precision))
     {
       /* We have to render this into the target byte order, which may not
 	 be our byte order.  */
       bool bigend = CPP_OPTION (pfile, bytes_big_endian);
-      size_t width = CPP_OPTION (pfile, wchar_precision);
       size_t cwidth = CPP_OPTION (pfile, char_precision);
       size_t cmask = width_to_mask (cwidth);
       size_t nbwc = width / cwidth;
@@ -1136,12 +1149,11 @@ emit_numeric_escape (cpp_reader *pfile, cppchar_t n,
    number.  You can, e.g. generate surrogate pairs this way.  */
 static const uchar *
 convert_hex (cpp_reader *pfile, const uchar *from, const uchar *limit,
-	     struct _cpp_strbuf *tbuf, bool wide)
+	     struct _cpp_strbuf *tbuf, struct cset_converter cvt)
 {
   cppchar_t c, n = 0, overflow = 0;
   int digits_found = 0;
-  size_t width = (wide ? CPP_OPTION (pfile, wchar_precision)
-		  : CPP_OPTION (pfile, char_precision));
+  size_t width = cvt.width;
   size_t mask = width_to_mask (width);
 
   if (CPP_WTRADITIONAL (pfile))
@@ -1174,7 +1186,7 @@ convert_hex (cpp_reader *pfile, const uchar *from, const uchar *limit,
       n &= mask;
     }
 
-  emit_numeric_escape (pfile, n, tbuf, wide);
+  emit_numeric_escape (pfile, n, tbuf, cvt);
 
   return from;
 }
@@ -1187,12 +1199,11 @@ convert_hex (cpp_reader *pfile, const uchar *from, const uchar *limit,
    number.  */
 static const uchar *
 convert_oct (cpp_reader *pfile, const uchar *from, const uchar *limit,
-	     struct _cpp_strbuf *tbuf, bool wide)
+	     struct _cpp_strbuf *tbuf, struct cset_converter cvt)
 {
   size_t count = 0;
   cppchar_t c, n = 0;
-  size_t width = (wide ? CPP_OPTION (pfile, wchar_precision)
-		  : CPP_OPTION (pfile, char_precision));
+  size_t width = cvt.width;
   size_t mask = width_to_mask (width);
   bool overflow = false;
 
@@ -1213,7 +1224,7 @@ convert_oct (cpp_reader *pfile, const uchar *from, const uchar *limit,
       n &= mask;
     }
 
-  emit_numeric_escape (pfile, n, tbuf, wide);
+  emit_numeric_escape (pfile, n, tbuf, cvt);
 
   return from;
 }
@@ -1224,7 +1235,7 @@ convert_oct (cpp_reader *pfile, const uchar *from, const uchar *limit,
    pointer.  Handles all relevant diagnostics.  */
 static const uchar *
 convert_escape (cpp_reader *pfile, const uchar *from, const uchar *limit,
-		struct _cpp_strbuf *tbuf, bool wide)
+		struct _cpp_strbuf *tbuf, struct cset_converter cvt)
 {
   /* Values of \a \b \e \f \n \r \t \v respectively.  */
 #if HOST_CHARSET == HOST_CHARSET_ASCII
@@ -1236,23 +1247,21 @@ convert_escape (cpp_reader *pfile, const uchar *from, const uchar *limit,
 #endif
 
   uchar c;
-  struct cset_converter cvt
-    = wide ? pfile->wide_cset_desc : pfile->narrow_cset_desc;
 
   c = *from;
   switch (c)
     {
       /* UCNs, hex escapes, and octal escapes are processed separately.  */
     case 'u': case 'U':
-      return convert_ucn (pfile, from, limit, tbuf, wide);
+      return convert_ucn (pfile, from, limit, tbuf, cvt);
 
     case 'x':
-      return convert_hex (pfile, from, limit, tbuf, wide);
+      return convert_hex (pfile, from, limit, tbuf, cvt);
       break;
 
     case '0':  case '1':  case '2':  case '3':
     case '4':  case '5':  case '6':  case '7':
-      return convert_oct (pfile, from, limit, tbuf, wide);
+      return convert_oct (pfile, from, limit, tbuf, cvt);
 
       /* Various letter escapes.  Get the appropriate host-charset
 	 value into C.  */
@@ -1312,6 +1321,27 @@ convert_escape (cpp_reader *pfile, const uchar *from, const uchar *limit,
   return from + 1;
 }
 
+/* TYPE is a token type.  The return value is the conversion needed to
+   convert from source to execution character set for the given type. */
+static struct cset_converter
+converter_for_type (cpp_reader *pfile, enum cpp_ttype type)
+{
+  switch (type)
+    {
+    default:
+	return pfile->narrow_cset_desc;
+    case CPP_CHAR16:
+    case CPP_STRING16:
+	return pfile->char16_cset_desc;
+    case CPP_CHAR32:
+    case CPP_STRING32:
+	return pfile->char32_cset_desc;
+    case CPP_WCHAR:
+    case CPP_WSTRING:
+	return pfile->wide_cset_desc;
+    }
+}
+
 /* FROM is an array of cpp_string structures of length COUNT.  These
    are to be converted from the source to the execution character set,
    escape sequences translated, and finally all are to be
@@ -1320,13 +1350,12 @@ convert_escape (cpp_reader *pfile, const uchar *from, const uchar *limit,
    false for failure.  */
 bool
 cpp_interpret_string (cpp_reader *pfile, const cpp_string *from, size_t count,
-		      cpp_string *to, bool wide)
+		      cpp_string *to,  enum cpp_ttype type)
 {
   struct _cpp_strbuf tbuf;
   const uchar *p, *base, *limit;
   size_t i;
-  struct cset_converter cvt
-    = wide ? pfile->wide_cset_desc : pfile->narrow_cset_desc;
+  struct cset_converter cvt = converter_for_type (pfile, type);
 
   tbuf.asize = MAX (OUTBUF_BLOCK_SIZE, from->len);
   tbuf.text = XNEWVEC (uchar, tbuf.asize);
@@ -1335,7 +1364,7 @@ cpp_interpret_string (cpp_reader *pfile, const cpp_string *from, size_t count,
   for (i = 0; i < count; i++)
     {
       p = from[i].text;
-      if (*p == 'L') p++;
+      if (*p == 'L' || *p == 'u' || *p == 'U') p++;
       p++; /* Skip leading quote.  */
       limit = from[i].text + from[i].len - 1; /* Skip trailing quote.  */
 
@@ -1354,12 +1383,12 @@ cpp_interpret_string (cpp_reader *pfile, const cpp_string *from, size_t count,
 	  if (p == limit)
 	    break;
 
-	  p = convert_escape (pfile, p + 1, limit, &tbuf, wide);
+	  p = convert_escape (pfile, p + 1, limit, &tbuf, cvt);
 	}
     }
   /* NUL-terminate the 'to' buffer and translate it to a cpp_string
      structure.  */
-  emit_numeric_escape (pfile, 0, &tbuf, wide);
+  emit_numeric_escape (pfile, 0, &tbuf, cvt);
   tbuf.text = XRESIZEVEC (uchar, tbuf.text, tbuf.len);
   to->text = tbuf.text;
   to->len = tbuf.len;
@@ -1375,7 +1404,8 @@ cpp_interpret_string (cpp_reader *pfile, const cpp_string *from, size_t count,
    in a string, but do not perform character set conversion.  */
 bool
 cpp_interpret_string_notranslate (cpp_reader *pfile, const cpp_string *from,
-				  size_t count,	cpp_string *to, bool wide)
+				  size_t count,	cpp_string *to,
+				  enum cpp_ttype type ATTRIBUTE_UNUSED)
 {
   struct cset_converter save_narrow_cset_desc = pfile->narrow_cset_desc;
   bool retval;
@@ -1383,7 +1413,7 @@ cpp_interpret_string_notranslate (cpp_reader *pfile, const cpp_string *from,
   pfile->narrow_cset_desc.func = convert_no_conversion;
   pfile->narrow_cset_desc.cd = (iconv_t) -1;
 
-  retval = cpp_interpret_string (pfile, from, count, to, wide);
+  retval = cpp_interpret_string (pfile, from, count, to, CPP_STRING);
 
   pfile->narrow_cset_desc = save_narrow_cset_desc;
   return retval;
@@ -1462,13 +1492,14 @@ narrow_str_to_charconst (cpp_reader *pfile, cpp_string str,
 /* Subroutine of cpp_interpret_charconst which performs the conversion
    to a number, for wide strings.  STR is the string structure returned
    by cpp_interpret_string.  PCHARS_SEEN and UNSIGNEDP are as for
-   cpp_interpret_charconst.  */
+   cpp_interpret_charconst.  TYPE is the token type.  */
 static cppchar_t
 wide_str_to_charconst (cpp_reader *pfile, cpp_string str,
-		       unsigned int *pchars_seen, int *unsignedp)
+		       unsigned int *pchars_seen, int *unsignedp,
+		       enum cpp_ttype type)
 {
   bool bigend = CPP_OPTION (pfile, bytes_big_endian);
-  size_t width = CPP_OPTION (pfile, wchar_precision);
+  size_t width = converter_for_type (pfile, type).width;
   size_t cwidth = CPP_OPTION (pfile, char_precision);
   size_t mask = width_to_mask (width);
   size_t cmask = width_to_mask (cwidth);
@@ -1490,7 +1521,7 @@ wide_str_to_charconst (cpp_reader *pfile, cpp_string str,
   /* Wide character constants have type wchar_t, and a single
      character exactly fills a wchar_t, so a multi-character wide
      character constant is guaranteed to overflow.  */
-  if (off > 0)
+  if (str.len > nbwc * 2)
     cpp_error (pfile, CPP_DL_WARNING,
 	       "character constant too long for its type");
 
@@ -1498,13 +1529,20 @@ wide_str_to_charconst (cpp_reader *pfile, cpp_string str,
      sign- or zero-extend to the full width of cppchar_t.  */
   if (width < BITS_PER_CPPCHAR_T)
     {
-      if (CPP_OPTION (pfile, unsigned_wchar) || !(result & (1 << (width - 1))))
+      if (type == CPP_CHAR16 || type == CPP_CHAR32
+	  || CPP_OPTION (pfile, unsigned_wchar)
+	  || !(result & (1 << (width - 1))))
 	result &= mask;
       else
 	result |= ~mask;
     }
 
-  *unsignedp = CPP_OPTION (pfile, unsigned_wchar);
+  if (type == CPP_CHAR16 || type == CPP_CHAR32
+      || CPP_OPTION (pfile, unsigned_wchar))
+    *unsignedp = 1;
+  else
+    *unsignedp = 0;
+
   *pchars_seen = 1;
   return result;
 }
@@ -1518,20 +1556,21 @@ cpp_interpret_charconst (cpp_reader *pfile, const cpp_token *token,
 			 unsigned int *pchars_seen, int *unsignedp)
 {
   cpp_string str = { 0, 0 };
-  bool wide = (token->type == CPP_WCHAR);
+  bool wide = (token->type != CPP_CHAR);
   cppchar_t result;
 
-  /* an empty constant will appear as L'' or '' */
+  /* an empty constant will appear as L'', u'', U'' or '' */
   if (token->val.str.len == (size_t) (2 + wide))
     {
       cpp_error (pfile, CPP_DL_ERROR, "empty character constant");
       return 0;
     }
-  else if (!cpp_interpret_string (pfile, &token->val.str, 1, &str, wide))
+  else if (!cpp_interpret_string (pfile, &token->val.str, 1, &str, token->type))
     return 0;
 
   if (wide)
-    result = wide_str_to_charconst (pfile, str, pchars_seen, unsignedp);
+    result = wide_str_to_charconst (pfile, str, pchars_seen, unsignedp,
+				    token->type);
   else
     result = narrow_str_to_charconst (pfile, str, pchars_seen, unsignedp);
 
diff --git a/libcpp/directives.c b/libcpp/directives.c
index 0ca1117c19a..3478cd5047a 100644
--- a/libcpp/directives.c
+++ b/libcpp/directives.c
@@ -188,7 +188,7 @@ DIRECTIVE_TABLE
    did use this notation in its preprocessed output.  */
 static const directive linemarker_dir =
 {
-  do_linemarker, U"#", 1, KANDR, IN_I
+  do_linemarker, UC"#", 1, KANDR, IN_I
 };
 
 #define SEEN_EOL() (pfile->cur_token[-1].type == CPP_EOF)
@@ -697,7 +697,7 @@ parse_include (cpp_reader *pfile, int *pangle_brackets,
       const unsigned char *dir;
 
       if (pfile->directive == &dtable[T_PRAGMA])
-	dir = U"pragma dependency";
+	dir = UC"pragma dependency";
       else
 	dir = pfile->directive->name;
       cpp_error (pfile, CPP_DL_ERROR, "#%s expects \"FILENAME\" or <FILENAME>",
@@ -1085,7 +1085,7 @@ register_pragma_1 (cpp_reader *pfile, const char *space, const char *name,
 
   if (space)
     {
-      node = cpp_lookup (pfile, U space, strlen (space));
+      node = cpp_lookup (pfile, UC space, strlen (space));
       entry = lookup_pragma_entry (*chain, node);
       if (!entry)
 	{
@@ -1114,7 +1114,7 @@ register_pragma_1 (cpp_reader *pfile, const char *space, const char *name,
     }
 
   /* Check for duplicates.  */
-  node = cpp_lookup (pfile, U name, strlen (name));
+  node = cpp_lookup (pfile, UC name, strlen (name));
   entry = lookup_pragma_entry (*chain, node);
   if (entry == NULL)
     {
@@ -1262,7 +1262,7 @@ restore_registered_pragmas (cpp_reader *pfile, struct pragma_entry *pe,
     {
       if (pe->is_nspace)
 	sd = restore_registered_pragmas (pfile, pe->u.space, sd);
-      pe->pragma = cpp_lookup (pfile, U *sd, strlen (*sd));
+      pe->pragma = cpp_lookup (pfile, UC *sd, strlen (*sd));
       free (*sd);
       sd++;
     }
@@ -1491,7 +1491,8 @@ get__Pragma_string (cpp_reader *pfile)
   string = get_token_no_padding (pfile);
   if (string->type == CPP_EOF)
     _cpp_backup_tokens (pfile, 1);
-  if (string->type != CPP_STRING && string->type != CPP_WSTRING)
+  if (string->type != CPP_STRING && string->type != CPP_WSTRING
+      && string->type != CPP_STRING32 && string->type != CPP_STRING16)
     return NULL;
 
   paren = get_token_no_padding (pfile);
diff --git a/libcpp/expr.c b/libcpp/expr.c
index 9e89dd9574a..00149b2422d 100644
--- a/libcpp/expr.c
+++ b/libcpp/expr.c
@@ -705,6 +705,8 @@ eval_token (cpp_reader *pfile, const cpp_token *token)
 
     case CPP_WCHAR:
     case CPP_CHAR:
+    case CPP_CHAR16:
+    case CPP_CHAR32:
       {
 	cppchar_t cc = cpp_interpret_charconst (pfile, token,
 						&temp, &unsignedp);
@@ -863,6 +865,8 @@ _cpp_parse_expr (cpp_reader *pfile)
 	case CPP_NUMBER:
 	case CPP_CHAR:
 	case CPP_WCHAR:
+	case CPP_CHAR16:
+	case CPP_CHAR32:
 	case CPP_NAME:
 	case CPP_HASH:
 	  if (!want_value)
diff --git a/libcpp/include/cpp-id-data.h b/libcpp/include/cpp-id-data.h
index 2445186c228..db37c2beccc 100644
--- a/libcpp/include/cpp-id-data.h
+++ b/libcpp/include/cpp-id-data.h
@@ -22,7 +22,7 @@ Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
 typedef unsigned char uchar;
 #endif
 
-#define U (const unsigned char *)  /* Intended use: U"string" */
+#define UC (const unsigned char *)  /* Intended use: UC"string" */
 
 /* Chained list of answers to an assertion.  */
 struct answer GTY(())
diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h
index 84de0e09975..483c54331fb 100644
--- a/libcpp/include/cpplib.h
+++ b/libcpp/include/cpplib.h
@@ -123,10 +123,14 @@ struct _cpp_file;
 									\
   TK(CHAR,		LITERAL) /* 'char' */				\
   TK(WCHAR,		LITERAL) /* L'char' */				\
+  TK(CHAR16,		LITERAL) /* u'char' */				\
+  TK(CHAR32,		LITERAL) /* U'char' */				\
   TK(OTHER,		LITERAL) /* stray punctuation */		\
 									\
   TK(STRING,		LITERAL) /* "string" */				\
   TK(WSTRING,		LITERAL) /* L"string" */			\
+  TK(STRING16,		LITERAL) /* u"string" */			\
+  TK(STRING32,		LITERAL) /* U"string" */			\
   TK(OBJC_STRING,	LITERAL) /* @"string" - Objective-C */		\
   TK(HEADER_NAME,	LITERAL) /* <stdio.h> in #include */		\
 									\
@@ -291,6 +295,9 @@ struct cpp_options
   /* Nonzero means to allow hexadecimal floats and LL suffixes.  */
   unsigned char extended_numbers;
 
+  /* Nonzero means process u/U prefix literals (UTF-16/32).  */
+  unsigned char uliterals;
+
   /* Nonzero means print names of header files (-H).  */
   unsigned char print_include_names;
 
@@ -712,10 +719,10 @@ extern cppchar_t cpp_interpret_charconst (cpp_reader *, const cpp_token *,
 /* Evaluate a vector of CPP_STRING or CPP_WSTRING tokens.  */
 extern bool cpp_interpret_string (cpp_reader *,
 				  const cpp_string *, size_t,
-				  cpp_string *, bool);
+				  cpp_string *, enum cpp_ttype);
 extern bool cpp_interpret_string_notranslate (cpp_reader *,
 					      const cpp_string *, size_t,
-					      cpp_string *, bool);
+					      cpp_string *, enum cpp_ttype);
 
 /* Convert a host character constant to the execution character set.  */
 extern cppchar_t cpp_host_to_exec_charset (cpp_reader *, cppchar_t);
diff --git a/libcpp/init.c b/libcpp/init.c
index aa0c0b10e3d..040bf2a0489 100644
--- a/libcpp/init.c
+++ b/libcpp/init.c
@@ -76,20 +76,21 @@ struct lang_flags
   char std;
   char cplusplus_comments;
   char digraphs;
+  char uliterals;
 };
 
 static const struct lang_flags lang_defaults[] =
-{ /*              c99 c++ xnum xid std  //   digr  */
-  /* GNUC89   */  { 0,  0,  1,   0,  0,   1,   1     },
-  /* GNUC99   */  { 1,  0,  1,   0,  0,   1,   1     },
-  /* STDC89   */  { 0,  0,  0,   0,  1,   0,   0     },
-  /* STDC94   */  { 0,  0,  0,   0,  1,   0,   1     },
-  /* STDC99   */  { 1,  0,  1,   0,  1,   1,   1     },
-  /* GNUCXX   */  { 0,  1,  1,   0,  0,   1,   1     },
-  /* CXX98    */  { 0,  1,  1,   0,  1,   1,   1     },
-  /* GNUCXX0X */  { 1,  1,  1,   0,  0,   1,   1     },
-  /* CXX0X    */  { 1,  1,  1,   0,  1,   1,   1     },
-  /* ASM      */  { 0,  0,  1,   0,  0,   1,   0     }
+{ /*              c99 c++ xnum xid std  //   digr ulit */
+  /* GNUC89   */  { 0,  0,  1,   0,  0,   1,   1,   0 },
+  /* GNUC99   */  { 1,  0,  1,   0,  0,   1,   1,   1 },
+  /* STDC89   */  { 0,  0,  0,   0,  1,   0,   0,   0 },
+  /* STDC94   */  { 0,  0,  0,   0,  1,   0,   1,   0 },
+  /* STDC99   */  { 1,  0,  1,   0,  1,   1,   1,   0 },
+  /* GNUCXX   */  { 0,  1,  1,   0,  0,   1,   1,   0 },
+  /* CXX98    */  { 0,  1,  1,   0,  1,   1,   1,   0 },
+  /* GNUCXX0X */  { 1,  1,  1,   0,  0,   1,   1,   1 },
+  /* CXX0X    */  { 1,  1,  1,   0,  1,   1,   1,   1 },
+  /* ASM      */  { 0,  0,  1,   0,  0,   1,   0,   0 }
   /* xid should be 1 for GNUC99, STDC99, GNUCXX, CXX98, GNUCXX0X, and
      CXX0X when no longer experimental (when all uses of identifiers
      in the compiler have been audited for correct handling of
@@ -112,6 +113,7 @@ cpp_set_lang (cpp_reader *pfile, enum c_lang lang)
   CPP_OPTION (pfile, trigraphs)			 = l->std;
   CPP_OPTION (pfile, cplusplus_comments)	 = l->cplusplus_comments;
   CPP_OPTION (pfile, digraphs)			 = l->digraphs;
+  CPP_OPTION (pfile, uliterals)			 = l->uliterals;
 }
 
 /* Initialize library global state.  */
diff --git a/libcpp/internal.h b/libcpp/internal.h
index 6110e5cdb08..bf6c5f8c8d2 100644
--- a/libcpp/internal.h
+++ b/libcpp/internal.h
@@ -48,6 +48,7 @@ struct cset_converter
 {
   convert_f func;
   iconv_t cd;
+  int width;
 };
 
 #define BITS_PER_CPPCHAR_T (CHAR_BIT * sizeof (cppchar_t))
@@ -399,6 +400,14 @@ struct cpp_reader
   struct cset_converter narrow_cset_desc;
 
   /* Descriptor for converting from the source character set to the
+     UTF-16 execution character set.  */
+  struct cset_converter char16_cset_desc;
+
+  /* Descriptor for converting from the source character set to the
+     UTF-32 execution character set.  */
+  struct cset_converter char32_cset_desc;
+
+  /* Descriptor for converting from the source character set to the
      wide execution character set.  */
   struct cset_converter wide_cset_desc;
 
diff --git a/libcpp/lex.c b/libcpp/lex.c
index 2eaf6105922..772a8701654 100644
--- a/libcpp/lex.c
+++ b/libcpp/lex.c
@@ -39,10 +39,10 @@ struct token_spelling
 };
 
 static const unsigned char *const digraph_spellings[] =
-{ U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
+{ UC"%:", UC"%:%:", UC"<:", UC":>", UC"<%", UC"%>" };
 
-#define OP(e, s) { SPELL_OPERATOR, U s  },
-#define TK(e, s) { SPELL_ ## s,    U #e },
+#define OP(e, s) { SPELL_OPERATOR, UC s  },
+#define TK(e, s) { SPELL_ ## s,    UC #e },
 static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
 #undef OP
 #undef TK
@@ -611,8 +611,8 @@ create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
 
 /* Lexes a string, character constant, or angle-bracketed header file
    name.  The stored string contains the spelling, including opening
-   quote and leading any leading 'L'.  It returns the type of the
-   literal, or CPP_OTHER if it was not properly terminated.
+   quote and leading any leading 'L', 'u' or 'U'.  It returns the type
+   of the literal, or CPP_OTHER if it was not properly terminated.
 
    The spelling is NUL-terminated, but it is not guaranteed that this
    is the first NUL since embedded NULs are preserved.  */
@@ -626,12 +626,16 @@ lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
 
   cur = base;
   terminator = *cur++;
-  if (terminator == 'L')
+  if (terminator == 'L' || terminator == 'u' || terminator == 'U')
     terminator = *cur++;
   if (terminator == '\"')
-    type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
+    type = (*base == 'L' ? CPP_WSTRING :
+	    *base == 'U' ? CPP_STRING32 :
+	    *base == 'u' ? CPP_STRING16 : CPP_STRING);
   else if (terminator == '\'')
-    type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
+    type = (*base == 'L' ? CPP_WCHAR :
+	    *base == 'U' ? CPP_CHAR32 :
+	    *base == 'u' ? CPP_CHAR16 : CPP_CHAR);
   else
     terminator = '>', type = CPP_HEADER_NAME;
 
@@ -965,11 +969,16 @@ _cpp_lex_direct (cpp_reader *pfile)
       }
 
     case 'L':
-      /* 'L' may introduce wide characters or strings.  */
-      if (*buffer->cur == '\'' || *buffer->cur == '"')
+    case 'u':
+    case 'U':
+      /* 'L', 'u' or 'U' may introduce wide characters or strings.  */
+      if (c == 'L' || CPP_OPTION (pfile, uliterals))
 	{
-	  lex_string (pfile, result, buffer->cur - 1);
-	  break;
+	  if (*buffer->cur == '\'' || *buffer->cur == '"')
+	    {
+	      lex_string (pfile, result, buffer->cur - 1);
+	      break;
+	    }
 	}
       /* Fall through.  */
 
@@ -977,12 +986,12 @@ _cpp_lex_direct (cpp_reader *pfile)
     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
-    case 's': case 't': case 'u': case 'v': case 'w': case 'x':
+    case 's': case 't':           case 'v': case 'w': case 'x':
     case 'y': case 'z':
     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
     case 'G': case 'H': case 'I': case 'J': case 'K':
     case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
-    case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
+    case 'S': case 'T':           case 'V': case 'W': case 'X':
     case 'Y': case 'Z':
       result->type = CPP_NAME;
       {
diff --git a/libcpp/macro.c b/libcpp/macro.c
index 587b94814cc..016754bc952 100644
--- a/libcpp/macro.c
+++ b/libcpp/macro.c
@@ -158,7 +158,7 @@ _cpp_builtin_macro_text (cpp_reader *pfile, cpp_hashnode *node)
 		  {
 		    cpp_errno (pfile, CPP_DL_WARNING,
 			"could not determine file timestamp");
-		    pbuffer->timestamp = U"\"??? ??? ?? ??:??:?? ????\"";
+		    pbuffer->timestamp = UC"\"??? ??? ?? ??:??:?? ????\"";
 		  }
 	      }
 	  }
@@ -256,8 +256,8 @@ _cpp_builtin_macro_text (cpp_reader *pfile, cpp_hashnode *node)
 	      cpp_errno (pfile, CPP_DL_WARNING,
 			 "could not determine date and time");
 		
-	      pfile->date = U"\"??? ?? ????\"";
-	      pfile->time = U"\"??:??:??\"";
+	      pfile->date = UC"\"??? ?? ????\"";
+	      pfile->time = UC"\"??:??:??\"";
 	    }
 	}
 
@@ -375,8 +375,10 @@ stringify_arg (cpp_reader *pfile, macro_arg *arg)
 	  continue;
 	}
 
-      escape_it = (token->type == CPP_STRING || token->type == CPP_WSTRING
-		   || token->type == CPP_CHAR || token->type == CPP_WCHAR);
+      escape_it = (token->type == CPP_STRING || token->type == CPP_CHAR
+		   || token->type == CPP_WSTRING || token->type == CPP_STRING
+		   || token->type == CPP_STRING32 || token->type == CPP_CHAR32
+		   || token->type == CPP_STRING16 || token->type == CPP_CHAR16);
 
       /* Room for each char being written in octal, initial space and
 	 final quote and NUL.  */