From b39c40c58951f18b5812ec839cdff5d9f41fe40e Mon Sep 17 00:00:00 2001 From: Alex Lorenz Date: Tue, 8 Oct 2019 22:42:44 +0000 Subject: [clang-scan-deps] Improve string/character literal skipping The existing string/character literal skipping code in the dependency directives source minimizer has two issues: - It doesn't stop the scanning when a newline is reached before the terminating character, unlike the lexer which considers the token to be done (even if it's invalid) at the end of the line. - It doesn't support whitespace between '\' and the newline when looking if the '\' is used as a line continuation character. This commit fixes both issues. Differential Revision: https://reviews.llvm.org/D68436 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@374127 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Lex/DependencyDirectivesSourceMinimizer.cpp | 40 ++++++++++++++------ .../DependencyDirectivesSourceMinimizerTest.cpp | 44 ++++++++++++++++++++++ 2 files changed, 73 insertions(+), 11 deletions(-) diff --git a/lib/Lex/DependencyDirectivesSourceMinimizer.cpp b/lib/Lex/DependencyDirectivesSourceMinimizer.cpp index 2ffbcfb7e6..f063ed711c 100644 --- a/lib/Lex/DependencyDirectivesSourceMinimizer.cpp +++ b/lib/Lex/DependencyDirectivesSourceMinimizer.cpp @@ -185,17 +185,6 @@ static void skipRawString(const char *&First, const char *const End) { } } -static void skipString(const char *&First, const char *const End) { - assert(*First == '\'' || *First == '"' || *First == '<'); - const char Terminator = *First == '<' ? '>' : *First; - for (++First; First != End && *First != Terminator; ++First) - if (*First == '\\') - if (++First == End) - return; - if (First != End) - ++First; // Finish off the string. -} - // Returns the length of EOL, either 0 (no end-of-line), 1 (\n) or 2 (\r\n) static unsigned isEOL(const char *First, const char *const End) { if (First == End) @@ -206,6 +195,35 @@ static unsigned isEOL(const char *First, const char *const End) { return !!isVerticalWhitespace(First[0]); } +static void skipString(const char *&First, const char *const End) { + assert(*First == '\'' || *First == '"' || *First == '<'); + const char Terminator = *First == '<' ? '>' : *First; + for (++First; First != End && *First != Terminator; ++First) { + // String and character literals don't extend past the end of the line. + if (isVerticalWhitespace(*First)) + return; + if (*First != '\\') + continue; + // Skip past backslash to the next character. This ensures that the + // character right after it is skipped as well, which matters if it's + // the terminator. + if (++First == End) + return; + if (!isWhitespace(*First)) + continue; + // Whitespace after the backslash might indicate a line continuation. + const char *FirstAfterBackslashPastSpace = First; + skipOverSpaces(FirstAfterBackslashPastSpace, End); + if (unsigned NLSize = isEOL(FirstAfterBackslashPastSpace, End)) { + // Advance the character pointer to the next line for the next + // iteration. + First = FirstAfterBackslashPastSpace + NLSize - 1; + } + } + if (First != End) + ++First; // Finish off the string. +} + // Returns the length of the skipped newline static unsigned skipNewline(const char *&First, const char *End) { if (First == End) diff --git a/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp b/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp index 5eb7d256a3..ed44cd86b3 100644 --- a/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp +++ b/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp @@ -594,6 +594,50 @@ TEST(MinimizeSourceToDependencyDirectivesTest, PragmaOnce) { EXPECT_STREQ("#pragma once\n#include \n", Out.data()); } +TEST(MinimizeSourceToDependencyDirectivesTest, + SkipLineStringCharLiteralsUntilNewline) { + SmallVector Out; + + StringRef Source = R"(#if NEVER_ENABLED + #define why(fmt, ...) #error don't try me + #endif + + void foo(); +)"; + ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out)); + EXPECT_STREQ( + "#if NEVER_ENABLED\n#define why(fmt,...) #error don't try me\n#endif\n", + Out.data()); + + Source = R"(#if NEVER_ENABLED + #define why(fmt, ...) "quote dropped + #endif + + void foo(); + )"; + ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out)); + EXPECT_STREQ( + "#if NEVER_ENABLED\n#define why(fmt,...) \"quote dropped\n#endif\n", + Out.data()); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, + SupportWhitespaceBeforeLineContinuationInStringSkipping) { + SmallVector Out; + + StringRef Source = "#define X '\\ \t\nx'\nvoid foo() {}"; + ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out)); + EXPECT_STREQ("#define X '\\ \t\nx'\n", Out.data()); + + Source = "#define X \"\\ \r\nx\"\nvoid foo() {}"; + ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out)); + EXPECT_STREQ("#define X \"\\ \r\nx\"\n", Out.data()); + + Source = "#define X \"\\ \r\nx\n#include \n"; + ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out)); + EXPECT_STREQ("#define X \"\\ \r\nx\n#include \n", Out.data()); +} + TEST(MinimizeSourceToDependencyDirectivesTest, CxxModules) { SmallVector Out; SmallVector Tokens; -- cgit v1.2.1