summaryrefslogtreecommitdiff
path: root/chromium/components/subresource_filter/core/common/fuzzy_pattern_matching.h
blob: 68472e76ae6d85957281704c589f53f8fe565121 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
// Copyright 2016 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

// The separator placeholder '^' symbol is used in subpatterns to match any
// separator character, which is any ASCII symbol except letters, digits, and
// the following: '_', '-', '.', '%'. Note that the separator placeholder
// character '^' is itself a separator, as well as '\0'.
// TODO(pkalinnikov): In addition, a separator placeholder at the end of the
// pattern can be matched by the end of |text|.
//
// We define a fuzzy occurrence as an occurrence of a |subpattern| in |text|
// such that all its non-placeholder characters are equal to the corresponding
// characters of the |text|, whereas each '^' placeholder can correspond to any
// type of separator in |text|.

#ifndef COMPONENTS_SUBRESOURCE_FILTER_CORE_COMMON_FUZZY_PATTERN_MATCHING_H_
#define COMPONENTS_SUBRESOURCE_FILTER_CORE_COMMON_FUZZY_PATTERN_MATCHING_H_

#include <stddef.h>

#include "base/strings/string_piece.h"

namespace subresource_filter {

constexpr char kSeparatorPlaceholder = '^';

inline bool IsAscii(char c) {
  return !(c & ~0x7F);
}

inline bool IsAlphaNumericAscii(char c) {
  if (c <= '9')
    return c >= '0';
  c |= 0x20;  // Puts all alphabetics (and only them) into the 'a'-'z' range.
  return c >= 'a' && c <= 'z';
}

// Returns whether |c| is a separator.
inline bool IsSeparator(char c) {
  switch (c) {
    case '_':
    case '-':
    case '.':
    case '%':
      return false;
    case kSeparatorPlaceholder:
      return true;
    default:
      return !IsAlphaNumericAscii(c) && IsAscii(c);
  }
}

// Returns whether |text| starts with a fuzzy occurrence of |subpattern|.
bool StartsWithFuzzy(base::StringPiece text, base::StringPiece subpattern);

// Returns whether |text| ends with a fuzzy occurrence of |subpattern|.
bool EndsWithFuzzy(base::StringPiece text, base::StringPiece subpattern);

// Returns the position of the leftmost fuzzy occurrence of a |subpattern| in
// the |text| starting no earlier than |from| the specified position.
size_t FindFuzzy(base::StringPiece text,
                 base::StringPiece subpattern,
                 size_t from = 0);

}  // namespace subresource_filter

#endif  // COMPONENTS_SUBRESOURCE_FILTER_CORE_COMMON_FUZZY_PATTERN_MATCHING_H_