diff options
Diffstat (limited to 'chromium/tools/gn/parser.cc')
-rw-r--r-- | chromium/tools/gn/parser.cc | 884 |
1 files changed, 0 insertions, 884 deletions
diff --git a/chromium/tools/gn/parser.cc b/chromium/tools/gn/parser.cc deleted file mode 100644 index c8dd6547dc4..00000000000 --- a/chromium/tools/gn/parser.cc +++ /dev/null @@ -1,884 +0,0 @@ -// Copyright (c) 2013 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "tools/gn/parser.h" - -#include <memory> -#include <utility> - -#include "base/logging.h" -#include "tools/gn/functions.h" -#include "tools/gn/operators.h" -#include "tools/gn/token.h" - -const char kGrammar_Help[] = - R"*(Language and grammar for GN build files - -Tokens - - GN build files are read as sequences of tokens. While splitting the file - into tokens, the next token is the longest sequence of characters that form a - valid token. - -White space and comments - - White space is comprised of spaces (U+0020), horizontal tabs (U+0009), - carriage returns (U+000D), and newlines (U+000A). - - Comments start at the character "#" and stop at the next newline. - - White space and comments are ignored except that they may separate tokens - that would otherwise combine into a single token. - -Identifiers - - Identifiers name variables and functions. - - identifier = letter { letter | digit } . - letter = "A" ... "Z" | "a" ... "z" | "_" . - digit = "0" ... "9" . - -Keywords - - The following keywords are reserved and may not be used as identifiers: - - else false if true - -Integer literals - - An integer literal represents a decimal integer value. - - integer = [ "-" ] digit { digit } . - - Leading zeros and negative zero are disallowed. - -String literals - - A string literal represents a string value consisting of the quoted - characters with possible escape sequences and variable expansions. - - string = `"` { char | escape | expansion } `"` . - escape = `\` ( "$" | `"` | char ) . - BracketExpansion = "{" ( identifier | ArrayAccess | ScopeAccess " - ") "}" . - Hex = "0x" [0-9A-Fa-f][0-9A-Fa-f] - expansion = "$" ( identifier | BracketExpansion | Hex ) . - char = /* any character except "$", `"`, or newline " - "*/ . - - After a backslash, certain sequences represent special characters: - - \" U+0022 quotation mark - \$ U+0024 dollar sign - \\ U+005C backslash - - All other backslashes represent themselves. - - To insert an arbitrary byte value, use $0xFF. For example, to insert a - newline character: "Line one$0x0ALine two". - - An expansion will evaluate the variable following the '$' and insert a - stringified version of it into the result. For example, to concat two path - components with a slash separating them: - "$var_one/$var_two" - Use the "${var_one}" format to be explicitly deliniate the variable for - otherwise-ambiguous cases. - -Punctuation - - The following character sequences represent punctuation: - - + += == != ( ) - - -= < <= [ ] - ! = > >= { } - && || . , - -Grammar - - The input tokens form a syntax tree following a context-free grammar: - - File = StatementList . - - Statement = Assignment | Call | Condition . - LValue = identifier | ArrayAccess | ScopeAccess . - Assignment = LValue AssignOp Expr . - Call = identifier "(" [ ExprList ] ")" [ Block ] . - Condition = "if" "(" Expr ")" Block - [ "else" ( Condition | Block ) ] . - Block = "{" StatementList "}" . - StatementList = { Statement } . - - ArrayAccess = identifier "[" Expr "]" . - ScopeAccess = identifier "." identifier . - Expr = UnaryExpr | Expr BinaryOp Expr . - UnaryExpr = PrimaryExpr | UnaryOp UnaryExpr . - PrimaryExpr = identifier | integer | string | Call - | ArrayAccess | ScopeAccess | Block - | "(" Expr ")" - | "[" [ ExprList [ "," ] ] "]" . - ExprList = Expr { "," Expr } . - - AssignOp = "=" | "+=" | "-=" . - UnaryOp = "!" . - BinaryOp = "+" | "-" // highest priority - | "<" | "<=" | ">" | ">=" - | "==" | "!=" - | "&&" - | "||" . // lowest priority - - All binary operators are left-associative. - -Types - - The GN language is dynamically typed. The following types are used: - - - Boolean: Uses the keywords "true" and "false". There is no implicit - conversion between booleans and integers. - - - Integers: All numbers in GN are signed 64-bit integers. - - - Strings: Strings are 8-bit with no enforced encoding. When a string is - used to interact with other systems with particular encodings (like the - Windows and Mac filesystems) it is assumed to be UTF-8. See "String - literals" above for more. - - - Lists: Lists are arbitrary-length ordered lists of values. See "Lists" - below for more. - - - Scopes: Scopes are like dictionaries that use variable names for keys. See - "Scopes" below for more. - -Lists - - Lists are created with [] and using commas to separate items: - - mylist = [ 0, 1, 2, "some string" ] - - A comma after the last item is optional. Lists are dereferenced using 0-based - indexing: - - mylist[0] += 1 - var = mylist[2] - - Lists can be concatenated using the '+' and '+=' operators. Bare values can - not be concatenated with lists, to add a single item, it must be put into a - list of length one. - - Items can be removed from lists using the '-' and '-=' operators. This will - remove all occurrences of every item in the right-hand list from the - left-hand list. It is an error to remove an item not in the list. This is to - prevent common typos and to detect dead code that is removing things that no - longer apply. - - It is an error to use '=' to replace a nonempty list with another nonempty - list. This is to prevent accidentally overwriting data when in most cases - '+=' was intended. To overwrite a list on purpose, first assign it to the - empty list: - - mylist = [] - mylist = otherlist - - When assigning to a list named 'sources' using '=' or '+=', list items may be - automatically filtered out. See "gn help set_sources_assignment_filter" for - more. - -Scopes - - All execution happens in the context of a scope which holds the current state - (like variables). With the exception of loops and conditions, '{' introduces - a new scope that has a parent reference to the old scope. - - Variable reads recursively search all nested scopes until the variable is - found or there are no more scopes. Variable writes always go into the current - scope. This means that after the closing '}' (again excepting loops and - conditions), all local variables will be restored to the previous values. - This also means that "foo = foo" can do useful work by copying a variable - into the current scope that was defined in a containing scope. - - Scopes can also be assigned to variables. Such scopes can be created by - functions like exec_script, when invoking a template (the template code - refers to the variables set by the invoking code by the implicitly-created - "invoker" scope), or explicitly like: - - empty_scope = {} - myvalues = { - foo = 21 - bar = "something" - } - - Inside such a scope definition can be any GN code including conditionals and - function calls. After the close of the scope, it will contain all variables - explicitly set by the code contained inside it. After this, the values can be - read, modified, or added to: - - myvalues.foo += 2 - empty_scope.new_thing = [ 1, 2, 3 ] -)*"; - -enum Precedence { - PRECEDENCE_ASSIGNMENT = 1, // Lowest precedence. - PRECEDENCE_OR = 2, - PRECEDENCE_AND = 3, - PRECEDENCE_EQUALITY = 4, - PRECEDENCE_RELATION = 5, - PRECEDENCE_SUM = 6, - PRECEDENCE_PREFIX = 7, - PRECEDENCE_CALL = 8, - PRECEDENCE_DOT = 9, // Highest precedence. -}; - -// The top-level for blocks/ifs is recursive descent, the expression parser is -// a Pratt parser. The basic idea there is to have the precedences (and -// associativities) encoded relative to each other and only parse up until you -// hit something of that precedence. There's a dispatch table in expressions_ -// at the top of parser.cc that describes how each token dispatches if it's -// seen as either a prefix or infix operator, and if it's infix, what its -// precedence is. -// -// Refs: -// - http://javascript.crockford.com/tdop/tdop.html -// - http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/ - -// Indexed by Token::Type. -ParserHelper Parser::expressions_[] = { - {nullptr, nullptr, -1}, // INVALID - {&Parser::Literal, nullptr, -1}, // INTEGER - {&Parser::Literal, nullptr, -1}, // STRING - {&Parser::Literal, nullptr, -1}, // TRUE_TOKEN - {&Parser::Literal, nullptr, -1}, // FALSE_TOKEN - {nullptr, &Parser::Assignment, PRECEDENCE_ASSIGNMENT}, // EQUAL - {nullptr, &Parser::BinaryOperator, PRECEDENCE_SUM}, // PLUS - {nullptr, &Parser::BinaryOperator, PRECEDENCE_SUM}, // MINUS - {nullptr, &Parser::Assignment, PRECEDENCE_ASSIGNMENT}, // PLUS_EQUALS - {nullptr, &Parser::Assignment, PRECEDENCE_ASSIGNMENT}, // MINUS_EQUALS - {nullptr, &Parser::BinaryOperator, PRECEDENCE_EQUALITY}, // EQUAL_EQUAL - {nullptr, &Parser::BinaryOperator, PRECEDENCE_EQUALITY}, // NOT_EQUAL - {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION}, // LESS_EQUAL - {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION}, // GREATER_EQUAL - {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION}, // LESS_THAN - {nullptr, &Parser::BinaryOperator, PRECEDENCE_RELATION}, // GREATER_THAN - {nullptr, &Parser::BinaryOperator, PRECEDENCE_AND}, // BOOLEAN_AND - {nullptr, &Parser::BinaryOperator, PRECEDENCE_OR}, // BOOLEAN_OR - {&Parser::Not, nullptr, -1}, // BANG - {nullptr, &Parser::DotOperator, PRECEDENCE_DOT}, // DOT - {&Parser::Group, nullptr, -1}, // LEFT_PAREN - {nullptr, nullptr, -1}, // RIGHT_PAREN - {&Parser::List, &Parser::Subscript, PRECEDENCE_CALL}, // LEFT_BRACKET - {nullptr, nullptr, -1}, // RIGHT_BRACKET - {&Parser::Block, nullptr, -1}, // LEFT_BRACE - {nullptr, nullptr, -1}, // RIGHT_BRACE - {nullptr, nullptr, -1}, // IF - {nullptr, nullptr, -1}, // ELSE - {&Parser::Name, &Parser::IdentifierOrCall, PRECEDENCE_CALL}, // IDENTIFIER - {nullptr, nullptr, -1}, // COMMA - {nullptr, nullptr, -1}, // UNCLASSIFIED_COMMENT - {nullptr, nullptr, -1}, // LINE_COMMENT - {nullptr, nullptr, -1}, // SUFFIX_COMMENT - {&Parser::BlockComment, nullptr, -1}, // BLOCK_COMMENT -}; - -Parser::Parser(const std::vector<Token>& tokens, Err* err) - : invalid_token_(Location(), Token::INVALID, base::StringPiece()), - err_(err), - cur_(0) { - for (const auto& token : tokens) { - switch (token.type()) { - case Token::LINE_COMMENT: - line_comment_tokens_.push_back(token); - break; - case Token::SUFFIX_COMMENT: - suffix_comment_tokens_.push_back(token); - break; - default: - // Note that BLOCK_COMMENTs (top-level standalone comments) are passed - // through the real parser. - tokens_.push_back(token); - break; - } - } -} - -Parser::~Parser() = default; - -// static -std::unique_ptr<ParseNode> Parser::Parse(const std::vector<Token>& tokens, - Err* err) { - Parser p(tokens, err); - return p.ParseFile(); -} - -// static -std::unique_ptr<ParseNode> Parser::ParseExpression( - const std::vector<Token>& tokens, - Err* err) { - Parser p(tokens, err); - std::unique_ptr<ParseNode> expr = p.ParseExpression(); - if (!p.at_end() && !err->has_error()) { - *err = Err(p.cur_token(), "Trailing garbage"); - return nullptr; - } - return expr; -} - -// static -std::unique_ptr<ParseNode> Parser::ParseValue(const std::vector<Token>& tokens, - Err* err) { - for (const Token& token : tokens) { - switch (token.type()) { - case Token::INTEGER: - case Token::STRING: - case Token::TRUE_TOKEN: - case Token::FALSE_TOKEN: - case Token::LEFT_BRACKET: - case Token::RIGHT_BRACKET: - case Token::COMMA: - continue; - default: - *err = Err(token, "Invalid token in literal value"); - return nullptr; - } - } - - return ParseExpression(tokens, err); -} - -bool Parser::IsAssignment(const ParseNode* node) const { - return node && node->AsBinaryOp() && - (node->AsBinaryOp()->op().type() == Token::EQUAL || - node->AsBinaryOp()->op().type() == Token::PLUS_EQUALS || - node->AsBinaryOp()->op().type() == Token::MINUS_EQUALS); -} - -bool Parser::IsStatementBreak(Token::Type token_type) const { - switch (token_type) { - case Token::IDENTIFIER: - case Token::LEFT_BRACE: - case Token::RIGHT_BRACE: - case Token::IF: - case Token::ELSE: - return true; - default: - return false; - } -} - -bool Parser::LookAhead(Token::Type type) { - if (at_end()) - return false; - return cur_token().type() == type; -} - -bool Parser::Match(Token::Type type) { - if (!LookAhead(type)) - return false; - Consume(); - return true; -} - -const Token& Parser::Consume(Token::Type type, const char* error_message) { - Token::Type types[1] = { type }; - return Consume(types, 1, error_message); -} - -const Token& Parser::Consume(Token::Type* types, - size_t num_types, - const char* error_message) { - if (has_error()) { - // Don't overwrite current error, but make progress through tokens so that - // a loop that's expecting a particular token will still terminate. - if (!at_end()) - cur_++; - return invalid_token_; - } - if (at_end()) { - const char kEOFMsg[] = "I hit EOF instead."; - if (tokens_.empty()) - *err_ = Err(Location(), error_message, kEOFMsg); - else - *err_ = Err(tokens_[tokens_.size() - 1], error_message, kEOFMsg); - return invalid_token_; - } - - for (size_t i = 0; i < num_types; ++i) { - if (cur_token().type() == types[i]) - return Consume(); - } - *err_ = Err(cur_token(), error_message); - return invalid_token_; -} - -const Token& Parser::Consume() { - return tokens_[cur_++]; -} - -std::unique_ptr<ParseNode> Parser::ParseExpression() { - return ParseExpression(0); -} - -std::unique_ptr<ParseNode> Parser::ParseExpression(int precedence) { - if (at_end()) - return std::unique_ptr<ParseNode>(); - - const Token& token = Consume(); - PrefixFunc prefix = expressions_[token.type()].prefix; - - if (prefix == nullptr) { - *err_ = Err(token, - std::string("Unexpected token '") + token.value().as_string() + - std::string("'")); - return std::unique_ptr<ParseNode>(); - } - - std::unique_ptr<ParseNode> left = (this->*prefix)(token); - if (has_error()) - return left; - - while (!at_end() && !IsStatementBreak(cur_token().type()) && - precedence <= expressions_[cur_token().type()].precedence) { - const Token& next_token = Consume(); - InfixFunc infix = expressions_[next_token.type()].infix; - if (infix == nullptr) { - *err_ = Err(next_token, std::string("Unexpected token '") + - next_token.value().as_string() + - std::string("'")); - return std::unique_ptr<ParseNode>(); - } - left = (this->*infix)(std::move(left), next_token); - if (has_error()) - return std::unique_ptr<ParseNode>(); - } - - return left; -} - -std::unique_ptr<ParseNode> Parser::Block(const Token& token) { - // This entrypoint into ParseBlock means it's part of an expression and we - // always want the result. - return ParseBlock(token, BlockNode::RETURNS_SCOPE); -} - -std::unique_ptr<ParseNode> Parser::Literal(const Token& token) { - return std::make_unique<LiteralNode>(token); -} - -std::unique_ptr<ParseNode> Parser::Name(const Token& token) { - return IdentifierOrCall(std::unique_ptr<ParseNode>(), token); -} - -std::unique_ptr<ParseNode> Parser::BlockComment(const Token& token) { - std::unique_ptr<BlockCommentNode> comment = - std::make_unique<BlockCommentNode>(); - comment->set_comment(token); - return std::move(comment); -} - -std::unique_ptr<ParseNode> Parser::Group(const Token& token) { - std::unique_ptr<ParseNode> expr = ParseExpression(); - if (has_error()) - return std::unique_ptr<ParseNode>(); - Consume(Token::RIGHT_PAREN, "Expected ')'"); - return expr; -} - -std::unique_ptr<ParseNode> Parser::Not(const Token& token) { - std::unique_ptr<ParseNode> expr = ParseExpression(PRECEDENCE_PREFIX + 1); - if (has_error()) - return std::unique_ptr<ParseNode>(); - if (!expr) { - if (!has_error()) - *err_ = Err(token, "Expected right-hand side for '!'."); - return std::unique_ptr<ParseNode>(); - } - std::unique_ptr<UnaryOpNode> unary_op = std::make_unique<UnaryOpNode>(); - unary_op->set_op(token); - unary_op->set_operand(std::move(expr)); - return std::move(unary_op); -} - -std::unique_ptr<ParseNode> Parser::List(const Token& node) { - std::unique_ptr<ParseNode> list(ParseList(node, Token::RIGHT_BRACKET, true)); - if (!has_error() && !at_end()) - Consume(Token::RIGHT_BRACKET, "Expected ']'"); - return list; -} - -std::unique_ptr<ParseNode> Parser::BinaryOperator( - std::unique_ptr<ParseNode> left, - const Token& token) { - std::unique_ptr<ParseNode> right = - ParseExpression(expressions_[token.type()].precedence + 1); - if (!right) { - if (!has_error()) { - *err_ = Err(token, "Expected right-hand side for '" + - token.value().as_string() + "'"); - } - return std::unique_ptr<ParseNode>(); - } - std::unique_ptr<BinaryOpNode> binary_op = std::make_unique<BinaryOpNode>(); - binary_op->set_op(token); - binary_op->set_left(std::move(left)); - binary_op->set_right(std::move(right)); - return std::move(binary_op); -} - -std::unique_ptr<ParseNode> Parser::IdentifierOrCall( - std::unique_ptr<ParseNode> left, - const Token& token) { - std::unique_ptr<ListNode> list = std::make_unique<ListNode>(); - list->set_begin_token(token); - list->set_end(std::make_unique<EndNode>(token)); - std::unique_ptr<BlockNode> block; - bool has_arg = false; - if (LookAhead(Token::LEFT_PAREN)) { - const Token& start_token = Consume(); - // Parsing a function call. - has_arg = true; - if (Match(Token::RIGHT_PAREN)) { - // Nothing, just an empty call. - } else { - list = ParseList(start_token, Token::RIGHT_PAREN, false); - if (has_error()) - return std::unique_ptr<ParseNode>(); - Consume(Token::RIGHT_PAREN, "Expected ')' after call"); - } - // Optionally with a scope. - if (LookAhead(Token::LEFT_BRACE)) { - block = ParseBlock(Consume(), BlockNode::DISCARDS_RESULT); - if (has_error()) - return std::unique_ptr<ParseNode>(); - } - } - - if (!left && !has_arg) { - // Not a function call, just a standalone identifier. - return std::make_unique<IdentifierNode>(token); - } - std::unique_ptr<FunctionCallNode> func_call = - std::make_unique<FunctionCallNode>(); - func_call->set_function(token); - func_call->set_args(std::move(list)); - if (block) - func_call->set_block(std::move(block)); - return std::move(func_call); -} - -std::unique_ptr<ParseNode> Parser::Assignment(std::unique_ptr<ParseNode> left, - const Token& token) { - if (left->AsIdentifier() == nullptr && left->AsAccessor() == nullptr) { - *err_ = Err(left.get(), - "The left-hand side of an assignment must be an identifier, " - "scope access, or array access."); - return std::unique_ptr<ParseNode>(); - } - std::unique_ptr<ParseNode> value = ParseExpression(PRECEDENCE_ASSIGNMENT); - if (!value) { - if (!has_error()) - *err_ = Err(token, "Expected right-hand side for assignment."); - return std::unique_ptr<ParseNode>(); - } - std::unique_ptr<BinaryOpNode> assign = std::make_unique<BinaryOpNode>(); - assign->set_op(token); - assign->set_left(std::move(left)); - assign->set_right(std::move(value)); - return std::move(assign); -} - -std::unique_ptr<ParseNode> Parser::Subscript(std::unique_ptr<ParseNode> left, - const Token& token) { - // TODO: Maybe support more complex expressions like a[0][0]. This would - // require work on the evaluator too. - if (left->AsIdentifier() == nullptr) { - *err_ = Err(left.get(), "May only subscript identifiers.", - "The thing on the left hand side of the [] must be an identifier\n" - "and not an expression. If you need this, you'll have to assign the\n" - "value to a temporary before subscripting. Sorry."); - return std::unique_ptr<ParseNode>(); - } - std::unique_ptr<ParseNode> value = ParseExpression(); - Consume(Token::RIGHT_BRACKET, "Expecting ']' after subscript."); - std::unique_ptr<AccessorNode> accessor = std::make_unique<AccessorNode>(); - accessor->set_base(left->AsIdentifier()->value()); - accessor->set_index(std::move(value)); - return std::move(accessor); -} - -std::unique_ptr<ParseNode> Parser::DotOperator(std::unique_ptr<ParseNode> left, - const Token& token) { - if (left->AsIdentifier() == nullptr) { - *err_ = Err(left.get(), "May only use \".\" for identifiers.", - "The thing on the left hand side of the dot must be an identifier\n" - "and not an expression. If you need this, you'll have to assign the\n" - "value to a temporary first. Sorry."); - return std::unique_ptr<ParseNode>(); - } - - std::unique_ptr<ParseNode> right = ParseExpression(PRECEDENCE_DOT); - if (!right || !right->AsIdentifier()) { - *err_ = Err(token, "Expected identifier for right-hand-side of \".\"", - "Good: a.cookies\nBad: a.42\nLooks good but still bad: a.cookies()"); - return std::unique_ptr<ParseNode>(); - } - - std::unique_ptr<AccessorNode> accessor = std::make_unique<AccessorNode>(); - accessor->set_base(left->AsIdentifier()->value()); - accessor->set_member(std::unique_ptr<IdentifierNode>( - static_cast<IdentifierNode*>(right.release()))); - return std::move(accessor); -} - -// Does not Consume the start or end token. -std::unique_ptr<ListNode> Parser::ParseList(const Token& start_token, - Token::Type stop_before, - bool allow_trailing_comma) { - std::unique_ptr<ListNode> list = std::make_unique<ListNode>(); - list->set_begin_token(start_token); - bool just_got_comma = false; - bool first_time = true; - while (!LookAhead(stop_before)) { - if (!first_time) { - if (!just_got_comma) { - // Require commas separate things in lists. - *err_ = Err(cur_token(), "Expected comma between items."); - return std::unique_ptr<ListNode>(); - } - } - first_time = false; - - // Why _OR? We're parsing things that are higher precedence than the , - // that separates the items of the list. , should appear lower than - // boolean expressions (the lowest of which is OR), but above assignments. - list->append_item(ParseExpression(PRECEDENCE_OR)); - if (has_error()) - return std::unique_ptr<ListNode>(); - if (at_end()) { - *err_ = - Err(tokens_[tokens_.size() - 1], "Unexpected end of file in list."); - return std::unique_ptr<ListNode>(); - } - if (list->contents().back()->AsBlockComment()) { - // If there was a comment inside the list, we don't need a comma to the - // next item, so pretend we got one, if we're expecting one. - just_got_comma = allow_trailing_comma; - } else { - just_got_comma = Match(Token::COMMA); - } - } - if (just_got_comma && !allow_trailing_comma) { - *err_ = Err(cur_token(), "Trailing comma"); - return std::unique_ptr<ListNode>(); - } - list->set_end(std::make_unique<EndNode>(cur_token())); - return list; -} - -std::unique_ptr<ParseNode> Parser::ParseFile() { - std::unique_ptr<BlockNode> file = - std::make_unique<BlockNode>(BlockNode::DISCARDS_RESULT); - for (;;) { - if (at_end()) - break; - std::unique_ptr<ParseNode> statement = ParseStatement(); - if (!statement) - break; - file->append_statement(std::move(statement)); - } - if (!at_end() && !has_error()) - *err_ = Err(cur_token(), "Unexpected here, should be newline."); - if (has_error()) - return std::unique_ptr<ParseNode>(); - - // TODO(scottmg): If this is measurably expensive, it could be done only - // when necessary (when reformatting, or during tests). Comments are - // separate from the parse tree at this point, so downstream code can remain - // ignorant of them. - AssignComments(file.get()); - - return std::move(file); -} - -std::unique_ptr<ParseNode> Parser::ParseStatement() { - if (LookAhead(Token::IF)) { - return ParseCondition(); - } else if (LookAhead(Token::BLOCK_COMMENT)) { - return BlockComment(Consume()); - } else { - // TODO(scottmg): Is this too strict? Just drop all the testing if we want - // to allow "pointless" expressions and return ParseExpression() directly. - std::unique_ptr<ParseNode> stmt = ParseExpression(); - if (stmt) { - if (stmt->AsFunctionCall() || IsAssignment(stmt.get())) - return stmt; - } - if (!has_error()) { - const Token& token = cur_or_last_token(); - *err_ = Err(token, "Expecting assignment or function call."); - } - return std::unique_ptr<ParseNode>(); - } -} - -std::unique_ptr<BlockNode> Parser::ParseBlock( - const Token& begin_brace, - BlockNode::ResultMode result_mode) { - if (has_error()) - return std::unique_ptr<BlockNode>(); - std::unique_ptr<BlockNode> block = std::make_unique<BlockNode>(result_mode); - block->set_begin_token(begin_brace); - - for (;;) { - if (LookAhead(Token::RIGHT_BRACE)) { - block->set_end(std::make_unique<EndNode>(Consume())); - break; - } - - std::unique_ptr<ParseNode> statement = ParseStatement(); - if (!statement) - return std::unique_ptr<BlockNode>(); - block->append_statement(std::move(statement)); - } - return block; -} - -std::unique_ptr<ParseNode> Parser::ParseCondition() { - std::unique_ptr<ConditionNode> condition = std::make_unique<ConditionNode>(); - condition->set_if_token(Consume(Token::IF, "Expected 'if'")); - Consume(Token::LEFT_PAREN, "Expected '(' after 'if'."); - condition->set_condition(ParseExpression()); - if (IsAssignment(condition->condition())) - *err_ = Err(condition->condition(), "Assignment not allowed in 'if'."); - Consume(Token::RIGHT_PAREN, "Expected ')' after condition of 'if'."); - condition->set_if_true(ParseBlock( - Consume(Token::LEFT_BRACE, "Expected '{' to start 'if' block."), - BlockNode::DISCARDS_RESULT)); - if (Match(Token::ELSE)) { - if (LookAhead(Token::LEFT_BRACE)) { - condition->set_if_false(ParseBlock(Consume(), - BlockNode::DISCARDS_RESULT)); - } else if (LookAhead(Token::IF)) { - condition->set_if_false(ParseStatement()); - } else { - *err_ = Err(cur_or_last_token(), "Expected '{' or 'if' after 'else'."); - return std::unique_ptr<ParseNode>(); - } - } - if (has_error()) - return std::unique_ptr<ParseNode>(); - return std::move(condition); -} - -void Parser::TraverseOrder(const ParseNode* root, - std::vector<const ParseNode*>* pre, - std::vector<const ParseNode*>* post) { - if (root) { - pre->push_back(root); - - if (const AccessorNode* accessor = root->AsAccessor()) { - TraverseOrder(accessor->index(), pre, post); - TraverseOrder(accessor->member(), pre, post); - } else if (const BinaryOpNode* binop = root->AsBinaryOp()) { - TraverseOrder(binop->left(), pre, post); - TraverseOrder(binop->right(), pre, post); - } else if (const BlockNode* block = root->AsBlock()) { - for (const auto& statement : block->statements()) - TraverseOrder(statement.get(), pre, post); - TraverseOrder(block->End(), pre, post); - } else if (const ConditionNode* condition = root->AsConditionNode()) { - TraverseOrder(condition->condition(), pre, post); - TraverseOrder(condition->if_true(), pre, post); - TraverseOrder(condition->if_false(), pre, post); - } else if (const FunctionCallNode* func_call = root->AsFunctionCall()) { - TraverseOrder(func_call->args(), pre, post); - TraverseOrder(func_call->block(), pre, post); - } else if (root->AsIdentifier()) { - // Nothing. - } else if (const ListNode* list = root->AsList()) { - for (const auto& node : list->contents()) - TraverseOrder(node.get(), pre, post); - TraverseOrder(list->End(), pre, post); - } else if (root->AsLiteral()) { - // Nothing. - } else if (const UnaryOpNode* unaryop = root->AsUnaryOp()) { - TraverseOrder(unaryop->operand(), pre, post); - } else if (root->AsBlockComment()) { - // Nothing. - } else if (root->AsEnd()) { - // Nothing. - } else { - CHECK(false) << "Unhandled case in TraverseOrder."; - } - - post->push_back(root); - } -} - -void Parser::AssignComments(ParseNode* file) { - // Start by generating a pre- and post- order traversal of the tree so we - // can determine what's before and after comments. - std::vector<const ParseNode*> pre; - std::vector<const ParseNode*> post; - TraverseOrder(file, &pre, &post); - - // Assign line comments to syntax immediately following. - int cur_comment = 0; - for (auto* node : pre) { - if (node->GetRange().is_null()) { - CHECK_EQ(node, file) << "Only expected on top file node"; - continue; - } - const Location start = node->GetRange().begin(); - while (cur_comment < static_cast<int>(line_comment_tokens_.size())) { - if (start.byte() >= line_comment_tokens_[cur_comment].location().byte()) { - const_cast<ParseNode*>(node)->comments_mutable()->append_before( - line_comment_tokens_[cur_comment]); - ++cur_comment; - } else { - break; - } - } - } - - // Remaining line comments go at end of file. - for (; cur_comment < static_cast<int>(line_comment_tokens_.size()); - ++cur_comment) - file->comments_mutable()->append_after(line_comment_tokens_[cur_comment]); - - // Assign suffix to syntax immediately before. - cur_comment = static_cast<int>(suffix_comment_tokens_.size() - 1); - for (std::vector<const ParseNode*>::const_reverse_iterator i = post.rbegin(); - i != post.rend(); - ++i) { - // Don't assign suffix comments to the function, list, or block, but instead - // to the last thing inside. - if ((*i)->AsFunctionCall() || (*i)->AsList() || (*i)->AsBlock()) - continue; - - Location start = (*i)->GetRange().begin(); - Location end = (*i)->GetRange().end(); - - // Don't assign suffix comments to something that starts on an earlier - // line, so that in: - // - // sources = [ "a", - // "b" ] # comment - // - // it's attached to "b", not sources = [ ... ]. - if (start.line_number() != end.line_number()) - continue; - - while (cur_comment >= 0) { - if (end.byte() <= suffix_comment_tokens_[cur_comment].location().byte()) { - const_cast<ParseNode*>(*i)->comments_mutable()->append_suffix( - suffix_comment_tokens_[cur_comment]); - --cur_comment; - } else { - break; - } - } - - // Suffix comments were assigned in reverse, so if there were multiple on - // the same node, they need to be reversed. - if ((*i)->comments() && !(*i)->comments()->suffix().empty()) - const_cast<ParseNode*>(*i)->comments_mutable()->ReverseSuffix(); - } -} |