// Copyright 2020 The Chromium Authors. All rights reserved. // Copyright 2014 Blake Embrey (hello@blakeembrey.com) // Use of this source code is governed by an MIT-style license that can be // found in the LICENSE file or at https://opensource.org/licenses/MIT. #ifndef THIRD_PARTY_LIBURLPATTERN_PATTERN_H_ #define THIRD_PARTY_LIBURLPATTERN_PATTERN_H_ #include #include #include "base/component_export.h" #include "third_party/liburlpattern/options.h" namespace liburlpattern { // Numeric values are set such that more restrictive values come last. This // is important for comparison routines in calling code, like URLPattern. enum class PartType { // A part that matches any character to the end of the input string. kFullWildcard = 0, // A part that matches any character to the next segment separator. kSegmentWildcard = 1, // A part with a custom regular expression. kRegex = 2, // A fixed, non-variable part of the pattern. Consists of kChar and // kEscapedChar Tokens. kFixed = 3, }; // Numeric values are set such that more restrictive values come last. This // is important for comparison routines in calling code, like URLPattern. enum class Modifier { // The `*` modifier. kZeroOrMore = 0, // The `?` modifier. kOptional = 1, // The `+` modifier. kOneOrMore = 2, // No modifier. kNone = 3, }; // A structure representing one part of a parsed Pattern. A full Pattern // consists of an ordered sequence of Part objects. struct COMPONENT_EXPORT(LIBURLPATTERN) Part { // The type of the Part. PartType type = PartType::kFixed; // The name of the Part. Only kRegex, kSegmentWildcard, and kFullWildcard // parts may have a |name|. kFixed parts must have an empty |name|. std::string name; // A fixed string prefix that is expected before any regex or wildcard match. // kFixed parts must have an empty |prefix|. std::string prefix; // The meaning of the |value| depends on the |type| of the Part. For kFixed // parts the |value| contains the fixed string to match. For kRegex parts // the |value| contains a regular expression to match. The |value| is empty // for kSegmentWildcard and kFullWildcard parts since the |type| encodes what // to match. std::string value; // A fixed string prefix that is expected after any regex or wildcard match. // kFixed parts must have an empty |suffix|. std::string suffix; // A |modifier| indicating whether the Part is optional and/or repeated. Any // Part type may have a |modifier|. Modifier modifier = Modifier::kNone; Part(PartType type, std::string value, Modifier modifier); Part(PartType type, std::string name, std::string prefix, std::string value, std::string suffix, Modifier modifier); Part() = default; }; COMPONENT_EXPORT(LIBURLPATTERN) inline bool operator==(const Part& lh, const Part& rh) { return lh.name == rh.name && lh.prefix == rh.prefix && lh.value == rh.value && lh.suffix == rh.suffix && lh.modifier == rh.modifier; } inline bool operator!=(const Part& lh, const Part& rh) { return !(lh == rh); } COMPONENT_EXPORT(LIBURLPATTERN) std::ostream& operator<<(std::ostream& o, Part part); // This class represents a successfully parsed pattern string. It will contain // an intermediate representation that can be used to generate either a regular // expression string or to directly match against input strings. Not all // patterns are supported for direct matching. class COMPONENT_EXPORT(LIBURLPATTERN) Pattern { public: Pattern(std::vector part_list, Options options, std::string segment_wildcard_regex); // Generate a canonical string for the parsed pattern. This may result // in a value different from the pattern string originally passed to // Parse(). For example, no-op syntax like `{bar}` will be simplified to // `bar`. In addition, the generated string will include any changes mad // by EncodingCallback hooks. Finally, regular expressions equivalent to // `*` and named group default matching will be simplified; e.g. `(.*)` // will become just `*`. std::string GeneratePatternString() const; // Generate an ECMA-262 regular expression string that is equivalent to this // pattern. A vector of strings can be optionally passed to |name_list_out| // to be populated with the list of group names. These correspond // sequentially to the regular expression capture groups. Note, the // regular expression string does not currently used named capture groups // directly in order to match the upstream path-to-regexp behavior. std::string GenerateRegexString( std::vector* name_list_out = nullptr) const; const std::vector& PartList() const { return part_list_; } private: // Compute the expected size of the string that will be returned by // GenerateRegexString(). size_t RegexStringLength() const; // Utility method to help with generating the regex string and length. void AppendDelimiterList(std::string& append_target) const; size_t DelimiterListLength() const; void AppendEndsWith(std::string& append_target) const; size_t EndsWithLength() const; std::vector part_list_; Options options_; std::string segment_wildcard_regex_; }; } // namespace liburlpattern #endif // THIRD_PARTY_LIBURLPATTERN_PATTERN_H_