summaryrefslogtreecommitdiff
path: root/chromium/components/link_header_util/link_header_util.cc
diff options
context:
space:
mode:
authorAllan Sandfeld Jensen <allan.jensen@theqtcompany.com>2016-07-14 17:41:05 +0200
committerAllan Sandfeld Jensen <allan.jensen@qt.io>2016-08-04 12:37:36 +0000
commit399c965b6064c440ddcf4015f5f8e9d131c7a0a6 (patch)
tree6b06b60ff365abef0e13b3503d593a0df48d20e8 /chromium/components/link_header_util/link_header_util.cc
parent7366110654eec46f21b6824f302356426f48cd74 (diff)
downloadqtwebengine-chromium-399c965b6064c440ddcf4015f5f8e9d131c7a0a6.tar.gz
BASELINE: Update Chromium to 52.0.2743.76 and Ninja to 1.7.1
Change-Id: I382f51b959689505a60f8b707255ecb344f7d8b4 Reviewed-by: Michael BrĂ¼ning <michael.bruning@qt.io>
Diffstat (limited to 'chromium/components/link_header_util/link_header_util.cc')
-rw-r--r--chromium/components/link_header_util/link_header_util.cc195
1 files changed, 195 insertions, 0 deletions
diff --git a/chromium/components/link_header_util/link_header_util.cc b/chromium/components/link_header_util/link_header_util.cc
new file mode 100644
index 00000000000..1438a22c404
--- /dev/null
+++ b/chromium/components/link_header_util/link_header_util.cc
@@ -0,0 +1,195 @@
+// Copyright 2016 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "components/link_header_util/link_header_util.h"
+
+#include "base/strings/string_util.h"
+#include "net/http/http_util.h"
+
+namespace link_header_util {
+
+namespace {
+
+// A variation of base::StringTokenizer and net::HttpUtil::ValuesIterator.
+// Takes the parsing of StringTokenizer and adds support for quoted strings that
+// are quoted by matching <> (and does not support escaping in those strings).
+// Also has the behavior of ValuesIterator where it strips whitespace from all
+// values and only outputs non-empty values.
+// Only supports ',' as separator and supports "" and <> as quote chars.
+class ValueTokenizer {
+ public:
+ ValueTokenizer(std::string::const_iterator begin,
+ std::string::const_iterator end)
+ : token_begin_(begin), token_end_(begin), end_(end) {}
+
+ std::string::const_iterator token_begin() const { return token_begin_; }
+ std::string::const_iterator token_end() const { return token_end_; }
+
+ bool GetNext() {
+ while (GetNextInternal()) {
+ net::HttpUtil::TrimLWS(&token_begin_, &token_end_);
+
+ // Only return non-empty values.
+ if (token_begin_ != token_end_)
+ return true;
+ }
+ return false;
+ }
+
+ private:
+ // Updates token_begin_ and token_end_ to point to the (possibly empty) next
+ // token. Returns false if end-of-string was reached first.
+ bool GetNextInternal() {
+ // First time this is called token_end_ points to the first character in the
+ // input. Every other time token_end_ points to the delimiter at the end of
+ // the last returned token (which could be the end of the string).
+
+ // End of string, return false.
+ if (token_end_ == end_)
+ return false;
+
+ // Skip past the delimiter.
+ if (*token_end_ == ',')
+ ++token_end_;
+
+ // Make token_begin_ point to the beginning of the next token, and search
+ // for the end of the token in token_end_.
+ token_begin_ = token_end_;
+
+ // Set to true if we're currently inside a quoted string.
+ bool in_quote = false;
+ // Set to true if we're currently inside a quoted string, and have just
+ // encountered an escape character. In this case a closing quote will be
+ // ignored.
+ bool in_escape = false;
+ // If currently in a quoted string, this is the character that (when not
+ // escaped) indicates the end of the string.
+ char quote_close_char = '\0';
+ // If currently in a quoted string, this is set to true if it is possible to
+ // escape the closing quote using '\'.
+ bool quote_allows_escape = false;
+
+ while (token_end_ != end_) {
+ char c = *token_end_;
+ if (in_quote) {
+ if (in_escape) {
+ in_escape = false;
+ } else if (quote_allows_escape && c == '\\') {
+ in_escape = true;
+ } else if (c == quote_close_char) {
+ in_quote = false;
+ }
+ } else {
+ if (c == ',')
+ break;
+ if (c == '"' || c == '<') {
+ in_quote = true;
+ quote_close_char = (c == '<' ? '>' : c);
+ quote_allows_escape = (c != '<');
+ }
+ }
+ ++token_end_;
+ }
+ return true;
+ }
+
+ std::string::const_iterator token_begin_;
+ std::string::const_iterator token_end_;
+ std::string::const_iterator end_;
+};
+
+// Parses the URL part of a Link header. When successful |url_begin| points
+// to the beginning of the url, |url_end| points to the end of the url and
+// |params_begin| points to the first character after the '>' character at the
+// end of the url.
+bool ExtractURL(std::string::const_iterator begin,
+ std::string::const_iterator end,
+ std::string::const_iterator* url_begin,
+ std::string::const_iterator* url_end,
+ std::string::const_iterator* params_begin) {
+ // Extract the URL part (everything between '<' and first '>' character).
+ if (*begin != '<')
+ return false;
+
+ ++begin;
+ *url_begin = begin;
+ *url_end = std::find(begin, end, '>');
+
+ // Fail if we did not find a '>'.
+ if (*url_end == end)
+ return false;
+
+ *params_begin = *url_end;
+ // Skip the '>' at the end of the URL.
+ ++*params_begin;
+
+ // Trim whitespace from the URL.
+ net::HttpUtil::TrimLWS(url_begin, url_end);
+ return true;
+}
+
+} // namespace
+
+std::vector<StringIteratorPair> SplitLinkHeader(const std::string& header) {
+ std::vector<StringIteratorPair> values;
+ ValueTokenizer tokenizer(header.begin(), header.end());
+ while (tokenizer.GetNext()) {
+ values.push_back(
+ StringIteratorPair(tokenizer.token_begin(), tokenizer.token_end()));
+ }
+ return values;
+}
+
+// Parses one link in a link header into its url and parameters.
+// A link is of the form "<some-url>; param1=value1; param2=value2".
+// Returns false if parsing the link failed, returns true on success. This
+// method is more lenient than the RFC. It doesn't fail on things like invalid
+// characters in the URL, and also doesn't verify that certain parameters should
+// or shouldn't be quoted strings.
+// If a parameter occurs more than once in the link, only the first value is
+// returned in params as this is the required behavior for all attributes chrome
+// currently cares about in link headers.
+bool ParseLinkHeaderValue(
+ std::string::const_iterator begin,
+ std::string::const_iterator end,
+ std::string* url,
+ std::unordered_map<std::string, base::Optional<std::string>>* params) {
+ // Can't parse an empty string.
+ if (begin == end)
+ return false;
+
+ // Extract the URL part (everything between '<' and first '>' character).
+ std::string::const_iterator url_begin;
+ std::string::const_iterator url_end;
+ if (!ExtractURL(begin, end, &url_begin, &url_end, &begin))
+ return false;
+ *url = std::string(url_begin, url_end);
+
+ // Trim any remaining whitespace, and make sure there is a ';' separating
+ // parameters from the URL.
+ net::HttpUtil::TrimLWS(&begin, &end);
+ if (begin != end && *begin != ';')
+ return false;
+
+ // Parse all the parameters.
+ net::HttpUtil::NameValuePairsIterator params_iterator(
+ begin, end, ';',
+ net::HttpUtil::NameValuePairsIterator::Values::NOT_REQUIRED,
+ net::HttpUtil::NameValuePairsIterator::Quotes::STRICT_QUOTES);
+ while (params_iterator.GetNext()) {
+ if (!net::HttpUtil::IsParmName(params_iterator.name_begin(),
+ params_iterator.name_end()))
+ return false;
+ std::string name = base::ToLowerASCII(base::StringPiece(
+ params_iterator.name_begin(), params_iterator.name_end()));
+ if (!params_iterator.value_is_quoted() &&
+ params_iterator.value_begin() == params_iterator.value_end())
+ params->insert(std::make_pair(name, base::nullopt));
+ else
+ params->insert(std::make_pair(name, params_iterator.value()));
+ }
+ return params_iterator.valid();
+}
+
+} // namespace link_header_util