summaryrefslogtreecommitdiff
path: root/chromium/components/subresource_filter/tools/rule_parser/rule_parser.h
blob: 75c6b673e4f239a8a143bfe351a9a7ef55b232c7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef COMPONENTS_SUBRESOURCE_FILTER_TOOLS_RULE_PARSER_RULE_PARSER_H_
#define COMPONENTS_SUBRESOURCE_FILTER_TOOLS_RULE_PARSER_RULE_PARSER_H_

#include <stddef.h>
#include <ostream>
#include <string>

#include "base/macros.h"
#include "base/strings/string_piece.h"
#include "components/subresource_filter/tools/rule_parser/rule.h"

namespace subresource_filter {

// A parser of EasyList rules. It is intended to be (re-)used for parsing
// multiple rules.
// TODO(pkalinnikov): Support 'sitekey', 'collapse', and 'donottrack' options.
class RuleParser {
 public:
  // Detailed information about a parse error (if any).
  struct ParseError {
    // Indicates the type of an error occured during a Parse(...) call.
    enum ErrorCode {
      NONE,  // Parsing was successful.

      EMPTY_RULE,             // The parsed line does not contain any rule.
      BAD_WHITELIST_SYNTAX,   // Used wrong sytnax for a whitelist rule.
      UNKNOWN_OPTION,         // Using of unknown option in a URL rule.
      NOT_A_TRISTATE_OPTION,  // Used negation for a non-tristate option.
      DEPRECATED_OPTION,      // Used a deprecated option.
      WHITELIST_ONLY_OPTION,  // The option applies to whitelist rules only.
      NO_VALUE_PROVIDED,      // A valued option is used without a value.

      WRONG_CSS_RULE_DELIM,  // Using of a wrong delimiter in a CSS rule.
      EMPTY_CSS_SELECTOR,    // No CSS selector specified in a CSS rule.

      UNSUPPORTED_FEATURE,  // Using not currently supported EasyList feature.
    };

    // TODO(pkalinnikov): Introduce warnings for, e.g., using an inverted
    // "document" activation type, using unsupported option, etc. This would let
    // a client have a best-effort version of the rule. Leave it up to clients
    // to decide what warnings/errors are critical for them.

    // Constructs a ParseError in a default (no error) state.
    ParseError();
    ~ParseError();

    ErrorCode error_code = NONE;

    // A copy of the parsed line. If no error occurred, it is empty.
    std::string line;

    // Position of the character in the |line| that introduced the error. If
    // |error_code| != NONE, then 0 <= |error_index| <= line.size(), otherwise
    // |error_index| == std::string::npos.
    size_t error_index = std::string::npos;
  };

  RuleParser();
  ~RuleParser();

  // Returns a human-readable detailed explanation of a parsing error.
  static const char* GetParseErrorCodeDescription(ParseError::ErrorCode code);

  // Parses a rule from the |line|. Returns the type of the rule parsed, or
  // RULE_TYPE_UNSPECIFIED on error. Notes:
  //  - When parsing a URL rule, URL syntax is not verified.
  //  - When parsing a CSS rule, the CSS selector syntax is not verified.
  RuleType Parse(base::StringPiece line);

  // Returns error diagnostics on the latest parsed line.
  const ParseError& parse_error() const { return parse_error_; }

  // Gets the last parsed rule type. It is guaranteed to return the same value
  // as the last Parse(...) invocation, or RULE_TYPE_UNSPECIFIED if no calls
  // were done.
  RuleType rule_type() const { return rule_type_; }

  // Gets the last parsed URL filtering rule. The result is undefined if
  // rule_type() != RULE_TYPE_URL,
  const UrlRule& url_rule() const { return url_rule_; }

  // Gets the last parsed CSS element hiding rule. The result is undefined if
  // rule_type() != RULE_TYPE_CSS.
  const CssRule& css_rule() const { return css_rule_; }

 private:
  // Parses the |part| and saves parsed URL filtering rule to the |url_rule_|
  // member. |origin| is used for a proper error reporting. Returns
  // RULE_TYPE_URL ff the |part| is a well-formed URL rule. Otherwise returns
  // RULE_TYPE_UNSPECIFIED and sets |parse_error_|.
  RuleType ParseUrlRule(base::StringPiece origin, base::StringPiece part);

  // Parses the |options| segment of a URL filtering rule and saves the parsed
  // options to the |url_rule_| member. Returns true if the options were parsed
  // correctly. Otherwise sets an error in |parse_error_| and returns false.
  bool ParseUrlRuleOptions(base::StringPiece origin, base::StringPiece options);

  // Parses the |part| and saves parsed CSS rule to the |css_rule_| member.
  // |css_section_start| denotes a position of '#' in the |part|, used to
  // separate a CSS selector. Returns true iff the line is a well-formed CSS
  // rule. Sets |parse_error_| on error.
  RuleType ParseCssRule(base::StringPiece origin,
                        base::StringPiece part,
                        size_t css_section_start);

  // Sets |parse_error_| to contain specific error, starting at |error_begin|.
  void SetParseError(ParseError::ErrorCode code,
                     base::StringPiece origin,
                     const char* error_begin);

  ParseError parse_error_;
  RuleType rule_type_;
  UrlRule url_rule_;
  CssRule css_rule_;

  DISALLOW_COPY_AND_ASSIGN(RuleParser);
};

// Pretty-prints the parsing |error| to |out|, e.g. like this:
//   (error:22) Unknown URL rule option:
//   @@example.org$script,unknown_option
//                        ^
std::ostream& operator<<(std::ostream& out,
                         const RuleParser::ParseError& error);

}  // namespace subresource_filter

#endif  // COMPONENTS_SUBRESOURCE_FILTER_TOOLS_RULE_PARSER_RULE_PARSER_H_