diff options
Diffstat (limited to 'chromium/components/subresource_filter/core/common/fuzzy_pattern_matching.h')
-rw-r--r-- | chromium/components/subresource_filter/core/common/fuzzy_pattern_matching.h | 101 |
1 files changed, 5 insertions, 96 deletions
diff --git a/chromium/components/subresource_filter/core/common/fuzzy_pattern_matching.h b/chromium/components/subresource_filter/core/common/fuzzy_pattern_matching.h index 15e9fe79262..68472e76ae6 100644 --- a/chromium/components/subresource_filter/core/common/fuzzy_pattern_matching.h +++ b/chromium/components/subresource_filter/core/common/fuzzy_pattern_matching.h @@ -19,12 +19,7 @@ #include <stddef.h> -#include <type_traits> -#include <vector> - -#include "base/logging.h" #include "base/strings/string_piece.h" -#include "components/subresource_filter/core/common/knuth_morris_pratt.h" namespace subresource_filter { @@ -56,103 +51,17 @@ inline bool IsSeparator(char c) { } } -class IsEqualOrBothSeparators { - public: - bool operator()(char first, char second) const { - return first == second || (IsSeparator(first) && IsSeparator(second)); - } -}; - // Returns whether |text| starts with a fuzzy occurrence of |subpattern|. bool StartsWithFuzzy(base::StringPiece text, base::StringPiece subpattern); // Returns whether |text| ends with a fuzzy occurrence of |subpattern|. bool EndsWithFuzzy(base::StringPiece text, base::StringPiece subpattern); -// Appends elements of the Knuth-Morris-Pratt failure function corresponding to -// |subpattern| at the end of the |failure| vector. All separator characters, -// including the separator placeholder, are considered equivalent. This is -// necessary in order to support fuzzy separator matching while also keeping the -// equality predicate transitive and symmetric which is needed for KMP. However, -// the separators will need to be checked in a second pass for KMP match -// candidates, which is implemented by AllOccurrencesFuzzy. -template <typename FailureFunctionElement> -void BuildFailureFunctionFuzzy(base::StringPiece subpattern, - std::vector<FailureFunctionElement>* failure) { - static_assert(std::is_unsigned<FailureFunctionElement>::value, - "The type is not unsigned."); - BuildFailureFunction<FailureFunctionElement, IsEqualOrBothSeparators>( - subpattern, failure); -} - -// Iterator range that yields all fuzzy occurrences of |subpattern| in |text|. -// The values in the range are the positions just beyond each occurrence. -// Out-of-range iterators are dereferenced to base::StringPiece::npos. -// -// The |failure| array is the failure function created by -// BuildFailureFunctionFuzzy for the same |subpattern|, and containing at least -// subpattern.size() values. -template <typename IntType> -class AllOccurrencesFuzzy - : private AllOccurrences<IntType, IsEqualOrBothSeparators> { - public: - using Base = AllOccurrences<IntType, IsEqualOrBothSeparators>; - using BaseIterator = typename Base::Iterator; - - class Iterator : public BaseIterator { - public: - Iterator& operator++() { - BaseIterator::operator++(); - AdvanceWhileFalsePositive(); - return *this; - } - - Iterator operator++(int) { - Iterator copy(*this); - operator++(); - return copy; - } - - private: - friend class AllOccurrencesFuzzy; - - explicit Iterator(const BaseIterator& base_iterator) - : BaseIterator(base_iterator) { - AdvanceWhileFalsePositive(); - } - - const AllOccurrencesFuzzy& owner() const { - return *static_cast<const AllOccurrencesFuzzy*>(BaseIterator::owner_); - } - - // Moves the iterator forward until it either reaches the end, or meets an - // occurrence exactly matching all non-placeholder separators. - void AdvanceWhileFalsePositive() { - while (BaseIterator::match_end_ != base::StringPiece::npos && !IsMatch()) - BaseIterator::operator++(); - } - - // Returns whether the currenct match meets all separators. - bool IsMatch() const { - DCHECK_LE(BaseIterator::match_end_, owner().text_.size()); - DCHECK_GE(BaseIterator::match_end_, owner().pattern_.size()); - - // TODO(pkalinnikov): Store a vector of separator positions externally and - // check if they are equal to the corresponding characters of the |text|. - return StartsWithFuzzy(owner().text_.substr(BaseIterator::match_end_ - - owner().pattern_.size()), - owner().pattern_); - } - }; - - AllOccurrencesFuzzy(base::StringPiece text, - base::StringPiece subpattern, - const IntType* failure) - : Base(text, subpattern, failure) {} - - Iterator begin() const { return Iterator(Base::begin()); } - Iterator end() const { return Iterator(Base::end()); } -}; +// Returns the position of the leftmost fuzzy occurrence of a |subpattern| in +// the |text| starting no earlier than |from| the specified position. +size_t FindFuzzy(base::StringPiece text, + base::StringPiece subpattern, + size_t from = 0); } // namespace subresource_filter |