C++: Fix cplusplus-keywordgen to generate Keywords.cpp

Fix kewordgen to generate current Keywords.cpp with minimal diff, but also to be compatible with old usage and kwgen files. Add new parameters: %no-namespace-for-tokens - not use namespace for tokens %pre-check-argument - set type and name of additional variable to use %function-name - name of a function ("classify" by default) %pre-check - name of a variable to check for a keyword (i.e. cxxEnabled) Now "%%" is also used to end and restart keywords parsing to be able to generate additional keywords for classifyOperator() Automatically add checks for LanguageFeatures to generated code Pass input and output files with command line arguments, print usage Update Keywords.kwgen to include all keywords currently used Add missing aliases to Token.h Change-Id: I6cc84e150e0d797277204032fc05ce9cfbd01f58 Reviewed-by: Nikolai Kosjar <nikolai.kosjar@qt.io>
author: Volodymyr Zibarov <gogan419@gmail.com> 2020-05-26 22:05:11 +0300
committer: Volodymyr Zibarov <gogan419@gmail.com> 2020-05-28 11:12:38 +0000
commit: df6d95c3e10678288c39f0ee135d3f4d30aa0a36 (patch)
tree: 78737844e437734bb5fda1a8988ece180015725f /src/tools/3rdparty
parent: 3563c457a55fc80384957a41eee0d9ebcf3cc125 (diff)
download: qt-creator-df6d95c3e10678288c39f0ee135d3f4d30aa0a36.tar.gz
1 files changed, 300 insertions, 99 deletions
diff --git a/src/tools/3rdparty/cplusplus-keywordgen/cplusplus-keywordgen.cpp b/src/tools/3rdparty/cplusplus-keywordgen/cplusplus-keywordgen.cpp
index 45300661e6..51c72978d4 100644
--- a/src/tools/3rdparty/cplusplus-keywordgen/cplusplus-keywordgen.cpp
+++ b/src/tools/3rdparty/cplusplus-keywordgen/cplusplus-keywordgen.cpp
@@ -17,23 +17,29 @@
 // IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 // CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
-// ### TODO: Rewrite me.
-
 #include <algorithm>
 #include <cctype>
 #include <cstdlib>
+#include <fstream>
 #include <functional>
 #include <iostream>
 #include <list>
 #include <map>
 #include <set>
+#include <sstream>
 #include <string>
 #include <vector>
 
 class State;
 class DottedItem;
 
-typedef std::list<std::string> RuleList;
+struct Rule
+{
+    std::string keyword;
+    std::string preCheck;
+};
+
+typedef std::list<Rule> RuleList;
 typedef RuleList::iterator RulePtr;
 typedef std::list<State> StateList;
 typedef StateList::iterator StatePtr;
@@ -60,7 +66,7 @@ public:
 
     bool operator!=(const DottedItem &other) const { return !operator==(other); }
 
-    bool terminal() const { return dot == rule->end(); }
+    bool terminal() const { return dot == rule->keyword.end(); }
 
     DottedItem next() const
     {
@@ -101,14 +107,52 @@ public:
         return intern(State(n.begin(), n.end()));
     }
 
-    std::set<char> firsts()
+    std::vector<char> firsts()
     {
-        std::set<char> s;
+        std::set<char> charsSet;
         for (DottedItemPtr it = first_item(); it != last_item(); ++it) {
             if (!it->terminal())
-                s.insert(*it->dot);
+                charsSet.insert(*it->dot);
         }
-        return s;
+        std::vector<char> charsOrderedUpperToBack; // to minimize Keywords.cpp diff
+        charsOrderedUpperToBack.reserve(charsSet.size());
+        for (char c : charsSet) {
+            charsOrderedUpperToBack.push_back(c);
+        }
+        std::stable_partition(charsOrderedUpperToBack.begin(),
+                              charsOrderedUpperToBack.end(),
+                              [](char c) {
+                                  return !std::isupper(static_cast<unsigned char>(c));
+                              });
+        return charsOrderedUpperToBack;
+    }
+
+    bool hasPreChecks()
+    {
+        for (DottedItemPtr it = first_item(); it != last_item(); ++it) {
+            if (!it->rule->preCheck.empty()) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    std::string commonPreCheck(char ch)
+    {
+        std::string result;
+        for (DottedItemPtr it = first_item(); it != last_item(); ++it) {
+            if (!it->terminal() && *it->dot == ch) {
+                if (result.empty()) {
+                    if (it->rule->preCheck.empty()) {
+                        return "";
+                    }
+                    result = (it->rule->preCheck);
+                } else if (result != it->rule->preCheck) {
+                    return "";
+                }
+            }
+        }
+        return result;
     }
 
     size_t item_count() const { return _items.size(); }
@@ -127,7 +171,7 @@ public:
     {
         std::vector<DottedItem> items;
         for (; first != last; ++first)
-            items.push_back(DottedItem(first, first->begin()));
+            items.push_back(DottedItem(first, first->keyword.begin()));
         return intern(State(items.begin(), items.end()));
     }
 
@@ -151,6 +195,15 @@ static std::string option_token_prefix = "Token_";
 static std::string option_char_type = "char";
 static std::string option_unicode_function = "";
 
+static std::string option_preCheck_arg_type;
+static std::string option_preCheck_arg_name;
+
+static std::string option_tokens_namespace;
+static std::string option_function_name = "classify";
+
+std::stringstream input;
+std::stringstream output;
+
 std::string token_id(const std::string &id)
 {
     std::string token = option_token_prefix;
@@ -167,7 +220,7 @@ std::string token_id(const std::string &id)
 
 bool starts_with(const std::string &line, const std::string &text)
 {
-    if (text.length() < line.length()) {
+    if (text.length() <= line.length()) {
         return std::equal(line.begin(), line.begin() + text.size(), text.begin());
     }
     return false;
@@ -176,16 +229,27 @@ bool starts_with(const std::string &line, const std::string &text)
 void doit(State &state)
 {
     static int depth{0};
+    static int preCheckDepth{0};
 
     ++depth;
 
     std::string indent(depth * 2, ' ');
 
-    std::set<char> firsts = state.firsts();
-    for (std::set<char>::iterator it = firsts.begin(); it != firsts.end(); ++it) {
+    std::vector<char> firsts = state.firsts();
+    for (std::vector<char>::iterator it = firsts.begin(); it != firsts.end(); ++it) {
         std::string _else = it == firsts.begin() ? "" : "else ";
-        std::cout << indent << _else << "if (s[" << (depth - 1) << "]" << option_unicode_function
-                  << " == '" << *it << "') {" << std::endl;
+        output << indent << _else << "if (";
+        if (preCheckDepth == 0) {
+            std::string commonPreCheck = state.commonPreCheck(*it);
+            if (!commonPreCheck.empty()) {
+                output << commonPreCheck << " && ";
+                preCheckDepth++;
+            }
+        } else if (preCheckDepth > 0) {
+            preCheckDepth++;
+        }
+        output << "s[" << (depth - 1) << "]" << option_unicode_function << " == '" << *it << "'";
+        output << ") {" << std::endl;
         State &next_state = state.next(*it);
 
         bool found = false;
@@ -196,58 +260,76 @@ void doit(State &state)
                     exit(EXIT_FAILURE);
                 }
                 found = true;
-                std::cout << indent << "  return " << option_namespace_name << token_id(*item->rule)
-                          << ";" << std::endl;
+                output << indent << "  return " << option_tokens_namespace
+                       << token_id(item->rule->keyword) << ";" << std::endl;
             }
         }
 
         if (!found)
             doit(next_state);
 
-        std::cout << indent << "}" << std::endl;
+        if (preCheckDepth > 0)
+            preCheckDepth--;
+
+        output << indent << "}" << std::endl;
     }
 
     --depth;
 }
 
-void gen_classify_n(State &start_state, int N)
+void gen_classify_n(State &start_state, size_t N)
 {
-    std::cout << "static inline int classify" << N << "(const " << option_char_type << " *s) {"
-              << std::endl;
+    output << "static inline int " << option_function_name << N << "(const " << option_char_type
+           << " *s";
+    if (!option_preCheck_arg_type.empty()) {
+        output << ", " << option_preCheck_arg_type;
+        if (start_state.hasPreChecks()) {
+            output << " " << option_preCheck_arg_name;
+        }
+    }
+    output << ")" << std::endl << "{" << std::endl;
     doit(start_state);
-    std::cout << "  return " << option_namespace_name << token_id("identifier") << ";" << std::endl
-              << "}" << std::endl
-              << std::endl;
+    output << "  return " << option_tokens_namespace << token_id("identifier") << ";" << std::endl
+           << "}" << std::endl
+           << std::endl;
 }
 
-void gen_classify(const std::multimap<size_t, std::string> &keywords)
+void gen_classify(const std::multimap<size_t, Rule> &keywords)
 {
-    std::cout << "int " << option_namespace_name << "classify(const " << option_char_type
-              << " *s, int n) {" << std::endl
-              << "  switch (n) {" << std::endl;
-    std::multimap<size_t, std::string>::const_iterator it = keywords.begin();
+    output << "int " << option_namespace_name << option_function_name << "(const "
+           << option_char_type << " *s, int n";
+    if (!option_preCheck_arg_type.empty()) {
+        output << ", " << option_preCheck_arg_type << " " << option_preCheck_arg_name;
+    }
+    output << ")" << std::endl;
+    output << "{" << std::endl << "  switch (n) {" << std::endl;
+    std::multimap<size_t, Rule>::const_iterator it = keywords.begin();
     while (it != keywords.end()) {
         size_t size = it->first;
-        std::cout << "    case " << size << ": return classify" << size << "(s);" << std::endl;
+        output << "    case " << size << ": return " << option_function_name << size << "(s";
+        if (!option_preCheck_arg_type.empty()) {
+            output << ", " << option_preCheck_arg_name;
+        }
+        output << ");" << std::endl;
         do {
             ++it;
         } while (it != keywords.end() && it->first == size);
     }
-    std::cout << "    default: return " << option_namespace_name << token_id("identifier") << ";"
-              << std::endl
-              << "  } // switch" << std::endl
-              << "}" << std::endl
-              << std::endl;
+    output << "    default: return " << option_tokens_namespace << token_id("identifier") << ";"
+           << std::endl
+           << "  } // switch" << std::endl
+           << "}" << std::endl
+           << std::endl;
 }
 
-void gen_enums(const std::multimap<size_t, std::string> &keywords)
+void gen_enums(const std::multimap<size_t, Rule> &keywords)
 {
-    std::cout << "enum {" << std::endl;
-    std::multimap<size_t, std::string>::const_iterator it = keywords.begin();
+    output << "enum {" << std::endl;
+    std::multimap<size_t, Rule>::const_iterator it = keywords.begin();
     for (; it != keywords.end(); ++it) {
-        std::cout << "  " << token_id(it->second) << "," << std::endl;
+        output << "  " << token_id(it->second.keyword) << "," << std::endl;
     }
-    std::cout << "  " << token_id("identifier") << std::endl << "};" << std::endl << std::endl;
+    output << "  " << token_id("identifier") << std::endl << "};" << std::endl << std::endl;
 }
 
 inline bool not_whitespace_p(char ch)
@@ -258,6 +340,11 @@ inline bool not_whitespace_p(char ch)
 int main(int argc, char *argv[])
 {
     const std::string ns = "--namespace=";
+    const std::string inputFileOpt = "--input";
+    const std::string outputFileOpt = "--output";
+
+    std::string inputFilename;
+    std::string outputFilename;
 
     for (int i = 0; i < argc; ++i) {
         const std::string arg(argv[i]);
@@ -266,13 +353,33 @@ int main(int argc, char *argv[])
         else if (starts_with(arg, ns)) {
             option_namespace_name.assign(arg.begin() + ns.size(), arg.end());
             option_namespace_name += "::";
+        } else if (arg == inputFileOpt && i + 1 < argc) {
+            inputFilename = argv[i + 1];
+            ++i;
+        } else if (arg == outputFileOpt && i + 1 < argc) {
+            outputFilename = argv[i + 1];
+            ++i;
+        }else if (arg == "--help" || arg == "-h") {
+            std::cout << "usage: cplusplus-keywordgen [--input <kwgen file>] [--output <cpp file>]"
+                      << std::endl;
+            std::cout << "\t If no input or output specified: std::cin/cout will be used"
+                      << std::endl;
+            exit(EXIT_SUCCESS);
         }
     }
 
-    std::multimap<size_t, std::string> keywords;
-    std::string textline;
-
-    bool readKeywords = false;
+    if (inputFilename.empty()) {
+        std::string textline;
+        while (getline(std::cin, textline)) {
+            input << textline << std::endl;
+        }
+    } else {
+        std::ifstream fileInput(inputFilename, std::ios_base::in);
+        std::string textline;
+        while (getline(fileInput, textline)) {
+            input << textline << std::endl;
+        }
+    }
 
     const std::string opt_no_enums = "%no-enums";
     const std::string opt_toupper = "%toupper";
@@ -281,73 +388,167 @@ int main(int argc, char *argv[])
     const std::string opt_char_type = "%char-type=";
     const std::string opt_unicode_function = "%unicode-function=";
 
-    while (getline(std::cin, textline)) {
-        // remove trailing spaces
-        textline.assign(textline.begin(),
+    const std::string opt_preCheck_arg = "%pre-check-argument=";
+    const std::string opt_function_name = "%function-name=";
+
+    const std::string opt_no_namespace_for_tokens = "%no-namespace-for-tokens";
+
+    // this may be only in keywords section
+    const std::string preCheckOpt = "%pre-check=";
+
+    bool useNamespaceForTokens = true;
+
+    bool finished = false;
+    while (!finished) {
+        finished = true;
+
+        bool readKeywords = false;
+        std::string preCheckValue;
+
+        std::multimap<size_t, Rule> keywords;
+        std::string textline;
+
+        while (getline(input, textline)) {
+            // remove trailing spaces
+            textline
+                .assign(textline.begin(),
                         std::find_if(textline.rbegin(), textline.rend(), not_whitespace_p).base());
 
-        if (!readKeywords) {
-            if (textline.size() >= 2 && textline[0] == '%') {
-                if (textline[1] == '%') {
-                    readKeywords = true;
-                } else if (textline == opt_no_enums) {
-                    option_no_enums = true;
-                } else if (textline == opt_toupper) {
-                    option_toupper = true;
-                } else if (starts_with(textline, opt_tok_prefix)) {
-                    option_token_prefix.assign(textline.begin() + opt_tok_prefix.size(),
-                                               textline.end());
-                } else if (starts_with(textline, opt_char_type)) {
-                    option_char_type.assign(textline.begin() + opt_char_type.size(), textline.end());
-                } else if (starts_with(textline, opt_unicode_function)) {
-                    option_unicode_function.assign(textline.begin() + opt_unicode_function.size(),
+            if (!readKeywords) {
+                if (textline.size() >= 2 && textline[0] == '%') {
+                    if (textline[1] == '%') {
+                        readKeywords = true;
+
+                        static bool generatedMessageAdded=false;
+                        if(!generatedMessageAdded){
+                            generatedMessageAdded=true;
+                            output
+                                << "// === following code is generated with cplusplus-keywordgen tool"
+                                << std::endl;
+                            for (auto it = inputFilename.rbegin(); it != inputFilename.rend(); ++it) {
+                                if (*it == '\\' || *it == '/') {
+                                    output
+                                        << "// === from source file: "
+                                        << inputFilename.substr(std::distance(it, inputFilename.rend()))
+                                        << std::endl;
+                                    break;
+                                }
+                            }
+                            output << std::endl;
+                        }
+                        output << "// === keywords begin" << std::endl;
+                        output << std::endl;
+                    } else if (textline == opt_no_enums) {
+                        option_no_enums = true;
+                    } else if (textline == opt_toupper) {
+                        option_toupper = true;
+                    } else if (starts_with(textline, opt_tok_prefix)) {
+                        option_token_prefix.assign(textline.begin() + opt_tok_prefix.size(),
                                                    textline.end());
-                } else if (starts_with(textline, opt_ns)) {
-                    option_namespace_name.assign(textline.begin() + opt_ns.size(), textline.end());
-                    option_namespace_name += "::";
+                    } else if (starts_with(textline, opt_char_type)) {
+                        option_char_type.assign(textline.begin() + opt_char_type.size(),
+                                                textline.end());
+                    } else if (starts_with(textline, opt_unicode_function)) {
+                        option_unicode_function.assign(textline.begin()
+                                                           + opt_unicode_function.size(),
+                                                       textline.end());
+                    } else if (starts_with(textline, opt_ns)) {
+                        option_namespace_name.assign(textline.begin() + opt_ns.size(),
+                                                     textline.end());
+                        option_namespace_name += "::";
+                        if (useNamespaceForTokens) {
+                            option_tokens_namespace = option_namespace_name;
+                        }
+                    } else if (starts_with(textline, opt_preCheck_arg)) {
+                        std::string::size_type spacePos = textline.find(' ',
+                                                                        opt_preCheck_arg.size());
+                        if (spacePos == std::string::npos) {
+                            option_preCheck_arg_type.clear();
+                            option_preCheck_arg_name.clear();
+                        } else {
+                            option_preCheck_arg_type
+                                = textline.substr(opt_preCheck_arg.size(),
+                                                  spacePos - opt_preCheck_arg.size());
+                            option_preCheck_arg_name = textline.substr(spacePos + 1);
+                        }
+                    } else if (starts_with(textline, opt_function_name)) {
+                        option_function_name.assign(textline.begin() + opt_function_name.size(),
+                                                    textline.end());
+                    } else if (textline == opt_no_namespace_for_tokens) {
+                        useNamespaceForTokens = false;
+                        option_tokens_namespace.clear();
+                    }
+
+                    continue;
+                }
+                output << textline << std::endl;
+            } else {
+                if (textline.empty())
+                    continue;
+
+                if (textline == "%%") {
+                    finished = false;
+                    break;
                 }
 
-                continue;
-            }
-            std::cout << textline << std::endl;
-        } else {
-            if (textline.empty())
-                continue;
-
-            std::string::iterator start = textline.begin();
-            while (start != textline.end() && std::isspace(*start))
-                ++start;
-
-            std::string::iterator stop = start;
-            while (stop != textline.end() && (std::isalnum(*stop) || *stop == '_'))
-                ++stop;
-
-            if (start != stop) {
-                std::string keyword(start, stop);
-                if (keyword == "identifier") {
-                    std::cerr << "*** Error. `identifier' is reserved" << std::endl;
-                    exit(EXIT_FAILURE);
+                if (starts_with(textline, preCheckOpt)) {
+                    preCheckValue = textline.substr(preCheckOpt.size());
                 }
 
-                keywords.insert(std::make_pair(keyword.size(), keyword));
+                std::string::iterator start = textline.begin();
+                while (start != textline.end() && std::isspace(*start))
+                    ++start;
+
+                std::string::iterator stop = start;
+                while (stop != textline.end() && (std::isalnum(*stop) || *stop == '_'))
+                    ++stop;
+
+                if (start != stop) {
+                    Rule rule;
+                    rule.keyword.assign(start, stop);
+                    if (rule.keyword == "identifier") {
+                        std::cerr << "*** Error. `identifier' is reserved" << std::endl;
+                        exit(EXIT_FAILURE);
+                    }
+                    rule.preCheck = preCheckValue;
+                    keywords.insert(std::make_pair(rule.keyword.size(), rule));
+                }
             }
         }
-    }
 
-    if (!option_no_enums)
-        gen_enums(keywords);
+        if (readKeywords) {
+            if (!option_no_enums)
+                gen_enums(keywords);
+
+            std::multimap<size_t, Rule>::iterator it = keywords.begin();
+            while (it != keywords.end()) {
+                size_t size = it->first;
+                RuleList rules;
+                do {
+                    rules.push_back(it->second);
+                    ++it;
+                } while (it != keywords.end() && it->first == size);
+                gen_classify_n(State::start(rules.begin(), rules.end()), size);
+                State::reset();
+            }
 
-    std::multimap<size_t, std::string>::iterator it = keywords.begin();
-    while (it != keywords.end()) {
-        size_t size = it->first;
-        RuleList rules;
-        do {
-            rules.push_back(it->second);
-            ++it;
-        } while (it != keywords.end() && it->first == size);
-        gen_classify_n(State::start(rules.begin(), rules.end()), size);
-        State::reset();
+            gen_classify(keywords);
+
+            output << "// === keywords end" << std::endl;
+        }
     }
 
-    gen_classify(keywords);
+    if (outputFilename.empty()) {
+        std::string textline;
+        while (getline(output, textline)) {
+            std::cout << textline << std::endl;
+        }
+    } else {
+        std::ofstream outFile(outputFilename, std::ios_base::out);
+        std::string textline;
+        while (getline(output, textline)) {
+            outFile << textline << std::endl;
+        }
+        std::cout << "Generated: " << outputFilename << std::endl;
+    }
 }
author	Volodymyr Zibarov <gogan419@gmail.com>	2020-05-26 22:05:11 +0300
committer	Volodymyr Zibarov <gogan419@gmail.com>	2020-05-28 11:12:38 +0000
commit	df6d95c3e10678288c39f0ee135d3f4d30aa0a36 (patch)
tree	78737844e437734bb5fda1a8988ece180015725f /src/tools/3rdparty
parent	3563c457a55fc80384957a41eee0d9ebcf3cc125 (diff)
download	qt-creator-df6d95c3e10678288c39f0ee135d3f4d30aa0a36.tar.gz