// Copyright (c) 2010 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "pdf/document_loader_impl.h" #include #include #include #include #include "base/bind.h" #include "base/callback.h" #include "base/check_op.h" #include "base/feature_list.h" #include "base/notreached.h" #include "base/numerics/safe_math.h" #include "base/strings/string_util.h" #include "pdf/pdf_features.h" #include "pdf/url_loader_wrapper.h" #include "ppapi/c/pp_errors.h" #include "ui/gfx/range/range.h" namespace chrome_pdf { namespace { // The distance from last received chunk, when we wait requesting data, using // current connection (like playing a cassette tape) and do not send new range // request (like rewind a cassette tape, and continue playing after). // Experimentally chosen value. constexpr int kChunkCloseDistance = 10; // Return true if the HTTP response of |loader| is a successful one and loading // should continue. 4xx error indicate subsequent requests will fail too. // e.g. resource has been removed from the server while loading it. 301 // indicates a redirect was returned which won't be successful because we // disable following redirects for PDF loading (we assume they are already // resolved by the browser. bool ResponseStatusSuccess(const URLLoaderWrapper* loader) { int32_t http_code = loader->GetStatusCode(); return (http_code < 400 && http_code != 301) || http_code >= 500; } bool IsValidContentType(const std::string& type) { return ( base::EndsWith(type, "/pdf", base::CompareCase::INSENSITIVE_ASCII) || base::EndsWith(type, ".pdf", base::CompareCase::INSENSITIVE_ASCII) || base::EndsWith(type, "/x-pdf", base::CompareCase::INSENSITIVE_ASCII) || base::EndsWith(type, "/*", base::CompareCase::INSENSITIVE_ASCII) || base::EndsWith(type, "/octet-stream", base::CompareCase::INSENSITIVE_ASCII) || base::EndsWith(type, "/acrobat", base::CompareCase::INSENSITIVE_ASCII) || base::EndsWith(type, "/unknown", base::CompareCase::INSENSITIVE_ASCII)); } } // namespace DocumentLoaderImpl::Chunk::Chunk() = default; DocumentLoaderImpl::Chunk::~Chunk() = default; void DocumentLoaderImpl::Chunk::Clear() { chunk_index = 0; data_size = 0; chunk_data.reset(); } DocumentLoaderImpl::DocumentLoaderImpl(Client* client) : client_(client), partial_loading_enabled_( base::FeatureList::IsEnabled(features::kPdfPartialLoading)) {} DocumentLoaderImpl::~DocumentLoaderImpl() = default; bool DocumentLoaderImpl::Init(std::unique_ptr loader, const std::string& url) { DCHECK(url_.empty()); DCHECK(!loader_); // Check that the initial response status is a valid one. if (!ResponseStatusSuccess(loader.get())) return false; std::string type = loader->GetContentType(); // This happens for PDFs not loaded from http(s) sources. if (type == "text/plain") { if (!base::StartsWith(url, "http://", base::CompareCase::INSENSITIVE_ASCII) && !base::StartsWith(url, "https://", base::CompareCase::INSENSITIVE_ASCII)) { type = "application/pdf"; } } if (!type.empty() && !IsValidContentType(type)) return false; if (base::StartsWith(loader->GetContentDisposition(), "attachment", base::CompareCase::INSENSITIVE_ASCII)) return false; url_ = url; loader_ = std::move(loader); if (!loader_->IsContentEncoded()) chunk_stream_.set_eof_pos(std::max(0, loader_->GetContentLength())); SetPartialLoadingEnabled( partial_loading_enabled_ && !base::StartsWith(url, "file://", base::CompareCase::INSENSITIVE_ASCII) && loader_->IsAcceptRangesBytes() && !loader_->IsContentEncoded() && GetDocumentSize()); ReadMore(); return true; } bool DocumentLoaderImpl::IsDocumentComplete() const { return chunk_stream_.IsComplete(); } uint32_t DocumentLoaderImpl::GetDocumentSize() const { return chunk_stream_.eof_pos(); } uint32_t DocumentLoaderImpl::BytesReceived() const { return bytes_received_; } void DocumentLoaderImpl::ClearPendingRequests() { pending_requests_.Clear(); } bool DocumentLoaderImpl::GetBlock(uint32_t position, uint32_t size, void* buf) const { base::CheckedNumeric addition_result = position; addition_result += size; if (!addition_result.IsValid()) return false; return chunk_stream_.ReadData( gfx::Range(position, addition_result.ValueOrDie()), buf); } bool DocumentLoaderImpl::IsDataAvailable(uint32_t position, uint32_t size) const { base::CheckedNumeric addition_result = position; addition_result += size; if (!addition_result.IsValid()) return false; return chunk_stream_.IsRangeAvailable( gfx::Range(position, addition_result.ValueOrDie())); } void DocumentLoaderImpl::RequestData(uint32_t position, uint32_t size) { if (size == 0 || IsDataAvailable(position, size)) return; const uint32_t document_size = GetDocumentSize(); if (document_size != 0) { // Check for integer overflow. base::CheckedNumeric addition_result = position; addition_result += size; if (!addition_result.IsValid()) return; if (addition_result.ValueOrDie() > document_size) return; } // We have some artifact request from // PDFiumEngine::OnDocumentComplete() -> FPDFAvail_IsPageAvail after // document is complete. // We need this fix in PDFIum. Adding this as a work around. // Bug: http://code.google.com/p/chromium/issues/detail?id=79996 // Test url: // http://www.icann.org/en/correspondence/holtzman-to-jeffrey-02mar11-en.pdf if (!loader_) return; RangeSet requested_chunks(chunk_stream_.GetChunksRange(position, size)); requested_chunks.Subtract(chunk_stream_.filled_chunks()); if (requested_chunks.IsEmpty()) { NOTREACHED(); return; } pending_requests_.Union(requested_chunks); } void DocumentLoaderImpl::SetPartialLoadingEnabled(bool enabled) { partial_loading_enabled_ = enabled; if (!enabled) { is_partial_loader_active_ = false; } } bool DocumentLoaderImpl::ShouldCancelLoading() const { if (!loader_) return true; if (!partial_loading_enabled_) return false; if (pending_requests_.IsEmpty()) { // Cancel loading if this is unepected data from server. return !chunk_stream_.IsValidChunkIndex(chunk_.chunk_index) || chunk_stream_.IsChunkAvailable(chunk_.chunk_index); } const gfx::Range current_range(chunk_.chunk_index, chunk_.chunk_index + kChunkCloseDistance); return !pending_requests_.Intersects(current_range); } void DocumentLoaderImpl::ContinueDownload() { if (!ShouldCancelLoading()) return ReadMore(); DCHECK(partial_loading_enabled_); DCHECK(!IsDocumentComplete()); DCHECK_GT(GetDocumentSize(), 0U); const uint32_t range_start = pending_requests_.IsEmpty() ? 0 : pending_requests_.First().start(); RangeSet candidates_for_request( gfx::Range(range_start, chunk_stream_.total_chunks_count())); candidates_for_request.Subtract(chunk_stream_.filled_chunks()); DCHECK(!candidates_for_request.IsEmpty()); gfx::Range next_request = candidates_for_request.First(); if (candidates_for_request.Size() == 1 && next_request.length() < kChunkCloseDistance) { // We have only request at the end, try to enlarge it to improve back order // reading. const int additional_chunks_count = kChunkCloseDistance - next_request.length(); int new_start = std::max( 0, static_cast(next_request.start()) - additional_chunks_count); candidates_for_request = RangeSet(gfx::Range(new_start, next_request.end())); candidates_for_request.Subtract(chunk_stream_.filled_chunks()); next_request = candidates_for_request.Last(); } loader_.reset(); chunk_.Clear(); is_partial_loader_active_ = true; const uint32_t start = next_request.start() * DataStream::kChunkSize; const uint32_t length = std::min(GetDocumentSize() - start, next_request.length() * DataStream::kChunkSize); loader_ = client_->CreateURLLoader(); loader_->OpenRange(url_, url_, start, length, base::BindOnce(&DocumentLoaderImpl::DidOpenPartial, weak_factory_.GetWeakPtr())); } void DocumentLoaderImpl::DidOpenPartial(int32_t result) { if (result != PP_OK) { return ReadComplete(); } if (!ResponseStatusSuccess(loader_.get())) return ReadComplete(); // Leave position untouched for multiparted responce for now, when we read the // data we'll get it. if (loader_->IsMultipart()) { // Needs more data to calc chunk index. return ReadMore(); } // Need to make sure that the server returned a byte-range, since it's // possible for a server to just ignore our byte-range request and just // return the entire document even if it supports byte-range requests. // i.e. sniff response to // http://www.act.org/compass/sample/pdf/geometry.pdf int start_pos = 0; if (loader_->GetByteRangeStart(&start_pos)) { if (start_pos % DataStream::kChunkSize != 0) return ReadComplete(); DCHECK(!chunk_.chunk_data); chunk_.chunk_index = chunk_stream_.GetChunkIndex(start_pos); } else { SetPartialLoadingEnabled(false); } return ContinueDownload(); } void DocumentLoaderImpl::ReadMore() { loader_->ReadResponseBody( buffer_, sizeof(buffer_), base::BindOnce(&DocumentLoaderImpl::DidRead, weak_factory_.GetWeakPtr())); } void DocumentLoaderImpl::DidRead(int32_t result) { if (result < 0) { // An error occurred. // The renderer will detect that we're missing data and will display a // message. return ReadComplete(); } if (result == 0) { loader_.reset(); if (!is_partial_loader_active_) return ReadComplete(); return ContinueDownload(); } if (loader_->IsMultipart()) { int start_pos = 0; if (!loader_->GetByteRangeStart(&start_pos)) return ReadComplete(); DCHECK(!chunk_.chunk_data); chunk_.chunk_index = chunk_stream_.GetChunkIndex(start_pos); } if (!SaveBuffer(buffer_, result)) return ReadMore(); if (IsDocumentComplete()) return ReadComplete(); return ContinueDownload(); } bool DocumentLoaderImpl::SaveBuffer(char* input, uint32_t input_size) { const uint32_t document_size = GetDocumentSize(); bytes_received_ += input_size; bool chunk_saved = false; bool loading_pending_request = pending_requests_.Contains(chunk_.chunk_index); while (input_size > 0) { if (chunk_.data_size == 0) chunk_.chunk_data = std::make_unique(); const uint32_t new_chunk_data_len = std::min(DataStream::kChunkSize - chunk_.data_size, input_size); memcpy(chunk_.chunk_data->data() + chunk_.data_size, input, new_chunk_data_len); chunk_.data_size += new_chunk_data_len; if (chunk_.data_size == DataStream::kChunkSize || (document_size > 0 && document_size <= EndOfCurrentChunk())) { pending_requests_.Subtract( gfx::Range(chunk_.chunk_index, chunk_.chunk_index + 1)); SaveChunkData(); chunk_saved = true; } input += new_chunk_data_len; input_size -= new_chunk_data_len; } client_->OnNewDataReceived(); if (IsDocumentComplete()) return true; if (!chunk_saved) return false; if (loading_pending_request && !pending_requests_.Contains(chunk_.chunk_index)) { client_->OnPendingRequestComplete(); } return true; } void DocumentLoaderImpl::SaveChunkData() { chunk_stream_.SetChunkData(chunk_.chunk_index, std::move(chunk_.chunk_data)); chunk_.data_size = 0; ++chunk_.chunk_index; } uint32_t DocumentLoaderImpl::EndOfCurrentChunk() const { return chunk_.chunk_index * DataStream::kChunkSize + chunk_.data_size; } void DocumentLoaderImpl::ReadComplete() { if (GetDocumentSize() != 0) { // If there is remaining data in |chunk_|, then save whatever can be saved. // e.g. In the underrun case. if (chunk_.data_size != 0) SaveChunkData(); } else { uint32_t eof = EndOfCurrentChunk(); if (!chunk_stream_.filled_chunks().IsEmpty()) { eof = std::max( chunk_stream_.filled_chunks().Last().end() * DataStream::kChunkSize, eof); } chunk_stream_.set_eof_pos(eof); if (eof == EndOfCurrentChunk()) SaveChunkData(); } loader_.reset(); if (IsDocumentComplete()) { client_->OnDocumentComplete(); } else { client_->OnDocumentCanceled(); } } } // namespace chrome_pdf