chromium/content/browser/loader/merkle_integrity_source_stream.cc


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238

// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "content/browser/loader/merkle_integrity_source_stream.h"

#include <string.h>

#include "base/base64url.h"
#include "base/big_endian.h"
#include "base/numerics/safe_conversions.h"
#include "net/base/io_buffer.h"

namespace content {

namespace {

// Limit the record size to 16KiB to prevent browser OOM. This matches the
// maximum record size in TLS and the default maximum frame size in HTTP/2.
constexpr uint64_t kMaxRecordSize = 16 * 1024;

constexpr char kMiSha256Header[] = "mi-sha256-draft2=";
constexpr size_t kMiSha256HeaderLength = sizeof(kMiSha256Header) - 1;

// Copies as many bytes from |input| as will fit in |output| and advances both.
size_t CopyClamped(base::span<const char>* input, base::span<char>* output) {
  size_t size = std::min(output->size(), input->size());
  memcpy(output->data(), input->data(), size);
  *output = output->subspan(size);
  *input = input->subspan(size);
  return size;
}

}  // namespace

MerkleIntegritySourceStream::MerkleIntegritySourceStream(
    base::StringPiece mi_header_value,
    std::unique_ptr<SourceStream> upstream)
    // TODO(ksakamoto): Use appropriate SourceType.
    : net::FilterSourceStream(SourceStream::TYPE_NONE, std::move(upstream)) {
  // TODO(ksakamoto): Support quoted parameter value.
  std::string next_proof;
  if (!mi_header_value.starts_with(kMiSha256Header) ||
      !base::Base64UrlDecode(mi_header_value.substr(kMiSha256HeaderLength),
                             base::Base64UrlDecodePolicy::DISALLOW_PADDING,
                             &next_proof) ||
      next_proof.size() != SHA256_DIGEST_LENGTH) {
    failed_ = true;
  } else {
    memcpy(next_proof_, next_proof.data(), SHA256_DIGEST_LENGTH);
  }
}

MerkleIntegritySourceStream::~MerkleIntegritySourceStream() = default;

int MerkleIntegritySourceStream::FilterData(net::IOBuffer* output_buffer,
                                            int output_buffer_size,
                                            net::IOBuffer* input_buffer,
                                            int input_buffer_size,
                                            int* consumed_bytes,
                                            bool upstream_eof_reached) {
  if (failed_) {
    return net::ERR_CONTENT_DECODING_FAILED;
  }

  base::span<const char> remaining_input = base::make_span(
      input_buffer->data(), base::checked_cast<size_t>(input_buffer_size));
  base::span<char> remaining_output = base::make_span(
      output_buffer->data(), base::checked_cast<size_t>(output_buffer_size));
  bool ok =
      FilterDataImpl(&remaining_output, &remaining_input, upstream_eof_reached);
  *consumed_bytes =
      input_buffer_size - base::checked_cast<int>(remaining_input.size());
  if (!ok) {
    failed_ = true;
    return net::ERR_CONTENT_DECODING_FAILED;
  }
  return output_buffer_size - base::checked_cast<int>(remaining_output.size());
}

std::string MerkleIntegritySourceStream::GetTypeAsString() const {
  return "MI-SHA256";
}

bool MerkleIntegritySourceStream::FilterDataImpl(base::span<char>* output,
                                                 base::span<const char>* input,
                                                 bool upstream_eof_reached) {
  std::string storage;

  // Process the record size in front, if we haven't yet.
  if (record_size_ == 0) {
    base::span<const char> bytes;
    if (!ConsumeBytes(input, 8, &bytes, &storage)) {
      return !upstream_eof_reached;
    }
    uint64_t record_size;
    base::ReadBigEndian(bytes.data(), &record_size);
    if (record_size == 0) {
      return false;
    }
    if (record_size > kMaxRecordSize) {
      DVLOG(1)
          << "Rejecting MI content encoding because record size is too big: "
          << record_size;
      return false;
    }
    record_size_ = base::checked_cast<size_t>(record_size);
  }

  // Clear any previous output before continuing.
  if (!CopyPartialOutput(output)) {
    DCHECK(output->empty());
    return true;
  }

  // Process records until we're done or there's no more room in |output|.
  while (!output->empty() && !final_record_done_) {
    base::span<const char> record;
    if (!ConsumeBytes(input, record_size_ + SHA256_DIGEST_LENGTH, &record,
                      &storage)) {
      DCHECK(input->empty());
      if (!upstream_eof_reached) {
        return true;  // Wait for more data later.
      }

      // The final record is shorter and does not contain a hash. Process all
      // remaining input the final record.
      //
      // TODO(davidben): This matches the previous implementation in that it
      // allows empty final records, but this does not match the specification
      // and means some inputs have two valid encodings. However, the
      // specification's version cannot represent the empty string. Update this
      // when https://github.com/martinthomson/http-mice/issues/3 is resolved.
      if (partial_input_.size() > record_size_) {
        return false;
      }
      record = partial_input_;
      final_record_done_ = true;
    }
    if (!ProcessRecord(record, final_record_done_, output)) {
      return false;
    }
  }

  if (final_record_done_) {
    DCHECK(upstream_eof_reached);
    DCHECK(input->empty());
  }
  return true;
}

bool MerkleIntegritySourceStream::CopyPartialOutput(base::span<char>* output) {
  if (partial_output_offset_ == partial_output_.size()) {
    return true;
  }
  base::span<const char> partial =
      base::make_span(partial_output_).subspan(partial_output_offset_);
  partial_output_offset_ += CopyClamped(&partial, output);
  if (partial_output_offset_ < partial_output_.size()) {
    return false;
  }
  partial_output_.clear();
  partial_output_offset_ = 0;
  return true;
}

bool MerkleIntegritySourceStream::ConsumeBytes(base::span<const char>* input,
                                               size_t len,
                                               base::span<const char>* result,
                                               std::string* storage) {
  // This comes from the requirement that, when ConsumeBytes returns false, the
  // next call must use the same |len|.
  DCHECK_LT(partial_input_.size(), len);

  // Return data directly from |input| if possible.
  if (partial_input_.empty() && input->size() >= len) {
    *result = input->subspan(0, len);
    *input = input->subspan(len);
    return true;
  }

  // Reassemble |len| bytes from |partial_input_| and |input|.
  size_t to_copy = std::min(len - partial_input_.size(), input->size());
  partial_input_.append(input->data(), to_copy);
  *input = input->subspan(to_copy);

  if (partial_input_.size() < len) {
    return false;
  }
  *storage = std::move(partial_input_);
  partial_input_.clear();
  *result = *storage;
  return true;
}

bool MerkleIntegritySourceStream::ProcessRecord(base::span<const char> record,
                                                bool is_final,
                                                base::span<char>* output) {
  DCHECK(partial_output_.empty());

  // Check the hash.
  SHA256_CTX ctx;
  SHA256_Init(&ctx);
  SHA256_Update(&ctx, reinterpret_cast<const uint8_t*>(record.data()),
                record.size());
  uint8_t type = is_final ? 0 : 1;
  SHA256_Update(&ctx, &type, 1);
  uint8_t sha256[SHA256_DIGEST_LENGTH];
  SHA256_Final(sha256, &ctx);
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
  // The fuzzer will have a hard time fixing up chains of hashes, so, if
  // building in fuzzer mode, everything hashes to the same garbage value.
  memset(sha256, 0x42, SHA256_DIGEST_LENGTH);
#endif
  if (memcmp(sha256, next_proof_, SHA256_DIGEST_LENGTH) != 0) {
    return false;
  }

  if (!is_final) {
    // Split into data and a hash.
    base::span<const char> hash = record.subspan(record_size_);
    record = record.subspan(0, record_size_);

    // Save the next proof.
    CHECK_EQ(static_cast<size_t>(SHA256_DIGEST_LENGTH), hash.size());
    memcpy(next_proof_, hash.data(), SHA256_DIGEST_LENGTH);
  }

  // Copy whatever output there is room for.
  CopyClamped(&record, output);

  // If it didn't all fit, save the remaining in |partial_output_|.
  DCHECK(record.empty() || output->empty());
  partial_output_.append(record.data(), record.size());
  return true;
}

}  // namespace content