summaryrefslogtreecommitdiff
path: root/chromium/components/url_matcher/url_util.h
blob: 89a33dfcb62717040074736350db29fdf50c359a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
// Copyright 2018 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef COMPONENTS_URL_MATCHER_URL_UTIL_H_
#define COMPONENTS_URL_MATCHER_URL_UTIL_H_

#include "base/memory/scoped_refptr.h"
#include "base/values.h"
#include "components/url_matcher/url_matcher.h"
#include "components/url_matcher/url_matcher_export.h"

class GURL;

namespace url_matcher {
namespace util {

// Converts a ValueList `value` of strings into a vector. Returns true if
// successful.
bool GetAsStringVector(const base::Value* value, std::vector<std::string>* out);

// Normalizes a URL for matching purposes.
URL_MATCHER_EXPORT GURL Normalize(const GURL& url);

// Helper function to extract the underlying URL wrapped by services such as
// Google AMP or Google Translate. Returns an empty GURL if `url` doesn't match
// a known format.
URL_MATCHER_EXPORT GURL GetEmbeddedURL(const GURL& url);

// Utility struct used to represent a url filter scheme into its components.
struct URL_MATCHER_EXPORT FilterComponents {
  FilterComponents();
  FilterComponents(const FilterComponents&) = delete;
  FilterComponents(FilterComponents&&);
  FilterComponents& operator=(const FilterComponents&) = delete;
  FilterComponents& operator=(FilterComponents&&) = default;

  ~FilterComponents();

  // Returns true if `this` represents the "*" filter.
  bool IsWildcard() const;
  std::string scheme;
  std::string host;
  uint16_t port = 0;
  std::string path;
  std::string query;
  // Number of conditions that a url needs to match it to be considered a match
  // for this filter.
  int number_of_url_matching_conditions = 0;
  bool match_subdomains = true;
  bool allow = true;
};

// Creates a condition set that can be used with the `url_matcher`. `id` needs
// to be a unique number that will be returned by the `url_matcher` if the URL
// matches that condition set. `allow` indicates if it is an allow-list (true)
// or block-list (false) filter.
URL_MATCHER_EXPORT scoped_refptr<url_matcher::URLMatcherConditionSet>
CreateConditionSet(url_matcher::URLMatcher* url_matcher,
                   base::MatcherStringPattern::ID id,
                   const std::string& scheme,
                   const std::string& host,
                   bool match_subdomains,
                   uint16_t port,
                   const std::string& path,
                   const std::string& query,
                   bool allow);

// Splits a URL filter into its components. A GURL isn't used because these
// can be invalid URLs e.g. "google.com".
// Returns false if the URL couldn't be parsed. In case false is returned,
// the values of output parameters are undefined.
// The `filter` should have the format described at
// http://www.chromium.org/administrators/url-blocklist-filter-format and
// accepts wildcards. The `host` is preprocessed so it can be passed to
// URLMatcher for the appropriate condition. The optional username and password
// are ignored. `match_subdomains` specifies whether the filter should include
// subdomains of the hostname (if it is one.) `port` is 0 if none is explicitly
// defined. `path` does not include query parameters. `query` contains the query
// parameters ('?' not included). All arguments are mandatory.
URL_MATCHER_EXPORT bool FilterToComponents(const std::string& filter,
                                           std::string* scheme,
                                           std::string* host,
                                           bool* match_subdomains,
                                           uint16_t* port,
                                           std::string* path,
                                           std::string* query);

// Adds the filters in `patterns` to `url_matcher` as a ConditionSet::Vector.
// `matcher` is the URLMatcher where filters are added.
// `allow` specifies whether the filter accepts or blocks the macthed urls.
// `id` is the id of given to the filter being added.
// `patterns` is a list of url schemes following the format described
// http://www.chromium.org/administrators/url-blocklist-filter-format and
// accepts wildcards.
// `filters` is an optional map of id to FilterComponent where the generated
// FilterComponent will be added.
URL_MATCHER_EXPORT void AddFilters(
    url_matcher::URLMatcher* matcher,
    bool allow,
    base::MatcherStringPattern::ID* id,
    const base::Value::List& patterns,
    std::map<base::MatcherStringPattern::ID,
             url_matcher::util::FilterComponents>* filters = nullptr);

// Adds the filters in `patterns` to `url_matcher` as a ConditionSet::Vector.
// `matcher` is the URLMatcher where filters are added.
// `allow` specifies whether the filter accepts or blocks the macthed urls.
// `id` is the id of given to the filter being added.
// `patterns` is a list of url schemes following the format described
// http://www.chromium.org/administrators/url-blocklist-filter-format and
// accepts wildcards.
// `filters` is an optional map of id to FilterComponent where the generated
// FilterComponent will be added.
URL_MATCHER_EXPORT void AddFilters(
    url_matcher::URLMatcher* matcher,
    bool allow,
    base::MatcherStringPattern::ID* id,
    const std::vector<std::string>& patterns,
    std::map<base::MatcherStringPattern::ID,
             url_matcher::util::FilterComponents>* filters = nullptr);

URL_MATCHER_EXPORT void AddAllowFilters(url_matcher::URLMatcher* matcher,
                                        const base::Value::List& patterns);

URL_MATCHER_EXPORT void AddAllowFilters(
    url_matcher::URLMatcher* matcher,
    const std::vector<std::string>& patterns);

}  // namespace util
}  // namespace url_matcher

#endif  // COMPONENTS_URL_MATCHER_URL_UTIL_H_