11 files changed, 55 insertions, 2 deletions
diff --git a/include/clang/AST/Expr.h b/include/clang/AST/Expr.h
index 095dd6a1ef..38733eee82 100644
--- a/include/clang/AST/Expr.h
+++ b/include/clang/AST/Expr.h
@@ -1292,6 +1292,7 @@ public:
   enum CharacterKind {
     Ascii,
     Wide,
+    UTF8,
     UTF16,
     UTF32
   };
diff --git a/include/clang/AST/Stmt.h b/include/clang/AST/Stmt.h
index e48b7dcc28..d3950e92cf 100644
--- a/include/clang/AST/Stmt.h
+++ b/include/clang/AST/Stmt.h
@@ -130,7 +130,7 @@ protected:
     friend class CharacterLiteral;
     unsigned : NumExprBits;
 
-    unsigned Kind : 2;
+    unsigned Kind : 3;
   };
 
   enum APFloatSemantics {
diff --git a/include/clang/Lex/LiteralSupport.h b/include/clang/Lex/LiteralSupport.h
index 5210e3f2e1..d568614e2a 100644
--- a/include/clang/Lex/LiteralSupport.h
+++ b/include/clang/Lex/LiteralSupport.h
@@ -166,6 +166,7 @@ public:
   bool hadError() const { return HadError; }
   bool isAscii() const { return Kind == tok::char_constant; }
   bool isWide() const { return Kind == tok::wide_char_constant; }
+  bool isUTF8() const { return Kind == tok::utf8_char_constant; }
   bool isUTF16() const { return Kind == tok::utf16_char_constant; }
   bool isUTF32() const { return Kind == tok::utf32_char_constant; }
   bool isMultiChar() const { return IsMultiChar; }
diff --git a/lib/AST/StmtPrinter.cpp b/lib/AST/StmtPrinter.cpp
index e55b2fc19a..69f52f52b6 100644
--- a/lib/AST/StmtPrinter.cpp
+++ b/lib/AST/StmtPrinter.cpp
@@ -1165,6 +1165,7 @@ void StmtPrinter::VisitCharacterLiteral(CharacterLiteral *Node) {
   switch (Node->getKind()) {
   case CharacterLiteral::Ascii: break; // no prefix.
   case CharacterLiteral::Wide:  OS << 'L'; break;
+  case CharacterLiteral::UTF8:  OS << "u8"; break;
   case CharacterLiteral::UTF16: OS << 'u'; break;
   case CharacterLiteral::UTF32: OS << 'U'; break;
   }
diff --git a/lib/Lex/LiteralSupport.cpp b/lib/Lex/LiteralSupport.cpp
index 1e7858af89..5b1c49344e 100644
--- a/lib/Lex/LiteralSupport.cpp
+++ b/lib/Lex/LiteralSupport.cpp
@@ -983,6 +983,7 @@ NumericLiteralParser::GetFloatValue(llvm::APFloat &Result) {
 ///         u' c-char-sequence '
 ///         U' c-char-sequence '
 ///         L' c-char-sequence '
+///         u8' c-char-sequence ' [C++1z lex.ccon]
 ///       c-char-sequence:
 ///         c-char
 ///         c-char-sequence c-char
diff --git a/lib/Sema/SemaExpr.cpp b/lib/Sema/SemaExpr.cpp
index 67d5db15cf..76d0ca56c0 100644
--- a/lib/Sema/SemaExpr.cpp
+++ b/lib/Sema/SemaExpr.cpp
@@ -3084,6 +3084,8 @@ ExprResult Sema::ActOnCharacterConstant(const Token &Tok, Scope *UDLScope) {
     Kind = CharacterLiteral::UTF16;
   else if (Literal.isUTF32())
     Kind = CharacterLiteral::UTF32;
+  else if (Literal.isUTF8())
+    Kind = CharacterLiteral::UTF8;
 
   Expr *Lit = new (Context) CharacterLiteral(Literal.getValue(), Kind, Ty,
                                              Tok.getLocation());
diff --git a/lib/Sema/SemaExprObjC.cpp b/lib/Sema/SemaExprObjC.cpp
index 57a08b94f5..1d86ca3541 100644
--- a/lib/Sema/SemaExprObjC.cpp
+++ b/lib/Sema/SemaExprObjC.cpp
@@ -319,6 +319,7 @@ ExprResult Sema::BuildObjCNumericLiteral(SourceLocation AtLoc, Expr *Number) {
     // to use to determine the Objective-c literal kind.
     switch (Char->getKind()) {
     case CharacterLiteral::Ascii:
+    case CharacterLiteral::UTF8:
       NumberType = Context.CharTy;
       break;
       
@@ -577,6 +578,7 @@ ExprResult Sema::BuildObjCBoxedExpr(SourceRange SR, Expr *ValueExpr) {
       // to use to determine the Objective-c literal kind.
       switch (Char->getKind()) {
       case CharacterLiteral::Ascii:
+      case CharacterLiteral::UTF8:
         ValueType = Context.CharTy;
         break;
         
diff --git a/lib/Sema/SemaTemplate.cpp b/lib/Sema/SemaTemplate.cpp
index 6cc8588334..9775e4d940 100644
--- a/lib/Sema/SemaTemplate.cpp
+++ b/lib/Sema/SemaTemplate.cpp
@@ -5503,6 +5503,8 @@ Sema::BuildExpressionFromIntegralTemplateArgument(const TemplateArgument &Arg,
 
   Expr *E;
   if (T->isAnyCharacterType()) {
+    // This does not need to handle u8 character literals because those are
+    // of type char, and so can also be covered by an ASCII character literal.
     CharacterLiteral::CharacterKind Kind;
     if (T->isWideCharType())
       Kind = CharacterLiteral::Wide;
diff --git a/lib/Serialization/ASTWriterDecl.cpp b/lib/Serialization/ASTWriterDecl.cpp
index 20ca6d6fd5..54bba282ab 100644
--- a/lib/Serialization/ASTWriterDecl.cpp
+++ b/lib/Serialization/ASTWriterDecl.cpp
@@ -2033,7 +2033,7 @@ void ASTWriter::WriteDeclAbbrevs() {
   //Character Literal
   Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // getValue
   Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Location
-  Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 2)); // getKind
+  Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 3)); // getKind
   CharacterLiteralAbbrev = Stream.EmitAbbrev(Abv);
 
   // Abbreviation for EXPR_IMPLICIT_CAST
diff --git a/test/Misc/ast-print-char-literal.cpp b/test/Misc/ast-print-char-literal.cpp
new file mode 100644
index 0000000000..bb5daa2444
--- /dev/null
+++ b/test/Misc/ast-print-char-literal.cpp
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -ast-print -std=c++1z %s -o - | FileCheck %s
+
+char c = u8'1';
+char d = '1';
+char e = U'1';
+char f = L'1';
+char g = u'1';
+
+template <char c = u8'1'>
+void h();
+
+void i() {
+  h<u8'2'>();
+}
+
+// CHECK: char c = u8'1';
+// CHECK-NEXT: char d = '1';
+// CHECK-NEXT: char e = U'1';
+// CHECK-NEXT: char f = L'1';
+// CHECK-NEXT: char g = u'1';
+
+// CHECK: template <char c = u8'1'>
+
+// CHECK: h<u8'2'>();
diff --git a/test/PCH/cxx-char-literal.cpp b/test/PCH/cxx-char-literal.cpp
new file mode 100644
index 0000000000..0990517c55
--- /dev/null
+++ b/test/PCH/cxx-char-literal.cpp
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 -emit-pch -std=c++1z -o %t %s
+// RUN: %clang_cc1 -std=c++1z -x ast -ast-print %t | FileCheck %s
+
+// Ensure that character literals are properly surfaced through PCH.
+
+char a = '0';
+// CHECK: char a = '0';
+
+char b = L'1';
+// CHECK: char b = L'1';
+
+char c = u8'2';
+// CHECK: char c = u8'2';
+
+char d = U'3';
+// CHECK: char d = U'3';
+
+char e = u'4';
+// CHECK: char e = u'4';