diff options
author | Allan Sandfeld Jensen <allan.jensen@theqtcompany.com> | 2016-07-14 17:41:05 +0200 |
---|---|---|
committer | Allan Sandfeld Jensen <allan.jensen@qt.io> | 2016-08-04 12:37:36 +0000 |
commit | 399c965b6064c440ddcf4015f5f8e9d131c7a0a6 (patch) | |
tree | 6b06b60ff365abef0e13b3503d593a0df48d20e8 /chromium/components/link_header_util/link_header_util.cc | |
parent | 7366110654eec46f21b6824f302356426f48cd74 (diff) | |
download | qtwebengine-chromium-399c965b6064c440ddcf4015f5f8e9d131c7a0a6.tar.gz |
BASELINE: Update Chromium to 52.0.2743.76 and Ninja to 1.7.1
Change-Id: I382f51b959689505a60f8b707255ecb344f7d8b4
Reviewed-by: Michael BrĂ¼ning <michael.bruning@qt.io>
Diffstat (limited to 'chromium/components/link_header_util/link_header_util.cc')
-rw-r--r-- | chromium/components/link_header_util/link_header_util.cc | 195 |
1 files changed, 195 insertions, 0 deletions
diff --git a/chromium/components/link_header_util/link_header_util.cc b/chromium/components/link_header_util/link_header_util.cc new file mode 100644 index 00000000000..1438a22c404 --- /dev/null +++ b/chromium/components/link_header_util/link_header_util.cc @@ -0,0 +1,195 @@ +// Copyright 2016 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/link_header_util/link_header_util.h" + +#include "base/strings/string_util.h" +#include "net/http/http_util.h" + +namespace link_header_util { + +namespace { + +// A variation of base::StringTokenizer and net::HttpUtil::ValuesIterator. +// Takes the parsing of StringTokenizer and adds support for quoted strings that +// are quoted by matching <> (and does not support escaping in those strings). +// Also has the behavior of ValuesIterator where it strips whitespace from all +// values and only outputs non-empty values. +// Only supports ',' as separator and supports "" and <> as quote chars. +class ValueTokenizer { + public: + ValueTokenizer(std::string::const_iterator begin, + std::string::const_iterator end) + : token_begin_(begin), token_end_(begin), end_(end) {} + + std::string::const_iterator token_begin() const { return token_begin_; } + std::string::const_iterator token_end() const { return token_end_; } + + bool GetNext() { + while (GetNextInternal()) { + net::HttpUtil::TrimLWS(&token_begin_, &token_end_); + + // Only return non-empty values. + if (token_begin_ != token_end_) + return true; + } + return false; + } + + private: + // Updates token_begin_ and token_end_ to point to the (possibly empty) next + // token. Returns false if end-of-string was reached first. + bool GetNextInternal() { + // First time this is called token_end_ points to the first character in the + // input. Every other time token_end_ points to the delimiter at the end of + // the last returned token (which could be the end of the string). + + // End of string, return false. + if (token_end_ == end_) + return false; + + // Skip past the delimiter. + if (*token_end_ == ',') + ++token_end_; + + // Make token_begin_ point to the beginning of the next token, and search + // for the end of the token in token_end_. + token_begin_ = token_end_; + + // Set to true if we're currently inside a quoted string. + bool in_quote = false; + // Set to true if we're currently inside a quoted string, and have just + // encountered an escape character. In this case a closing quote will be + // ignored. + bool in_escape = false; + // If currently in a quoted string, this is the character that (when not + // escaped) indicates the end of the string. + char quote_close_char = '\0'; + // If currently in a quoted string, this is set to true if it is possible to + // escape the closing quote using '\'. + bool quote_allows_escape = false; + + while (token_end_ != end_) { + char c = *token_end_; + if (in_quote) { + if (in_escape) { + in_escape = false; + } else if (quote_allows_escape && c == '\\') { + in_escape = true; + } else if (c == quote_close_char) { + in_quote = false; + } + } else { + if (c == ',') + break; + if (c == '"' || c == '<') { + in_quote = true; + quote_close_char = (c == '<' ? '>' : c); + quote_allows_escape = (c != '<'); + } + } + ++token_end_; + } + return true; + } + + std::string::const_iterator token_begin_; + std::string::const_iterator token_end_; + std::string::const_iterator end_; +}; + +// Parses the URL part of a Link header. When successful |url_begin| points +// to the beginning of the url, |url_end| points to the end of the url and +// |params_begin| points to the first character after the '>' character at the +// end of the url. +bool ExtractURL(std::string::const_iterator begin, + std::string::const_iterator end, + std::string::const_iterator* url_begin, + std::string::const_iterator* url_end, + std::string::const_iterator* params_begin) { + // Extract the URL part (everything between '<' and first '>' character). + if (*begin != '<') + return false; + + ++begin; + *url_begin = begin; + *url_end = std::find(begin, end, '>'); + + // Fail if we did not find a '>'. + if (*url_end == end) + return false; + + *params_begin = *url_end; + // Skip the '>' at the end of the URL. + ++*params_begin; + + // Trim whitespace from the URL. + net::HttpUtil::TrimLWS(url_begin, url_end); + return true; +} + +} // namespace + +std::vector<StringIteratorPair> SplitLinkHeader(const std::string& header) { + std::vector<StringIteratorPair> values; + ValueTokenizer tokenizer(header.begin(), header.end()); + while (tokenizer.GetNext()) { + values.push_back( + StringIteratorPair(tokenizer.token_begin(), tokenizer.token_end())); + } + return values; +} + +// Parses one link in a link header into its url and parameters. +// A link is of the form "<some-url>; param1=value1; param2=value2". +// Returns false if parsing the link failed, returns true on success. This +// method is more lenient than the RFC. It doesn't fail on things like invalid +// characters in the URL, and also doesn't verify that certain parameters should +// or shouldn't be quoted strings. +// If a parameter occurs more than once in the link, only the first value is +// returned in params as this is the required behavior for all attributes chrome +// currently cares about in link headers. +bool ParseLinkHeaderValue( + std::string::const_iterator begin, + std::string::const_iterator end, + std::string* url, + std::unordered_map<std::string, base::Optional<std::string>>* params) { + // Can't parse an empty string. + if (begin == end) + return false; + + // Extract the URL part (everything between '<' and first '>' character). + std::string::const_iterator url_begin; + std::string::const_iterator url_end; + if (!ExtractURL(begin, end, &url_begin, &url_end, &begin)) + return false; + *url = std::string(url_begin, url_end); + + // Trim any remaining whitespace, and make sure there is a ';' separating + // parameters from the URL. + net::HttpUtil::TrimLWS(&begin, &end); + if (begin != end && *begin != ';') + return false; + + // Parse all the parameters. + net::HttpUtil::NameValuePairsIterator params_iterator( + begin, end, ';', + net::HttpUtil::NameValuePairsIterator::Values::NOT_REQUIRED, + net::HttpUtil::NameValuePairsIterator::Quotes::STRICT_QUOTES); + while (params_iterator.GetNext()) { + if (!net::HttpUtil::IsParmName(params_iterator.name_begin(), + params_iterator.name_end())) + return false; + std::string name = base::ToLowerASCII(base::StringPiece( + params_iterator.name_begin(), params_iterator.name_end())); + if (!params_iterator.value_is_quoted() && + params_iterator.value_begin() == params_iterator.value_end()) + params->insert(std::make_pair(name, base::nullopt)); + else + params->insert(std::make_pair(name, params_iterator.value())); + } + return params_iterator.valid(); +} + +} // namespace link_header_util |