summaryrefslogtreecommitdiff
path: root/chromium/pdf
diff options
context:
space:
mode:
authorAllan Sandfeld Jensen <allan.jensen@theqtcompany.com>2016-08-01 12:59:39 +0200
committerAllan Sandfeld Jensen <allan.jensen@qt.io>2016-08-04 12:40:43 +0000
commit28b1110370900897ab652cb420c371fab8857ad4 (patch)
tree41b32127d23b0df4f2add2a27e12dc87bddb260e /chromium/pdf
parent399c965b6064c440ddcf4015f5f8e9d131c7a0a6 (diff)
downloadqtwebengine-chromium-28b1110370900897ab652cb420c371fab8857ad4.tar.gz
BASELINE: Update Chromium to 53.0.2785.41
Also adds a few extra files for extensions. Change-Id: Iccdd55d98660903331cf8b7b29188da781830af4 Reviewed-by: Michael BrĂ¼ning <michael.bruning@qt.io>
Diffstat (limited to 'chromium/pdf')
-rw-r--r--chromium/pdf/document_loader.cc153
-rw-r--r--chromium/pdf/out_of_process_instance.cc161
-rw-r--r--chromium/pdf/out_of_process_instance.h15
-rw-r--r--chromium/pdf/paint_aggregator.h2
-rw-r--r--chromium/pdf/pdf.cc90
-rw-r--r--chromium/pdf/pdf_engine.h18
-rw-r--r--chromium/pdf/pdfium/fuzzers/BUILD.gn53
-rw-r--r--chromium/pdf/pdfium/fuzzers/dicts/pdf_css.dict73
-rw-r--r--chromium/pdf/pdfium/pdfium_engine.cc120
-rw-r--r--chromium/pdf/pdfium/pdfium_engine.h26
-rw-r--r--chromium/pdf/pdfium/pdfium_page.cc224
-rw-r--r--chromium/pdf/pdfium/pdfium_page.h17
12 files changed, 566 insertions, 386 deletions
diff --git a/chromium/pdf/document_loader.cc b/chromium/pdf/document_loader.cc
index 3117d08fffc..ae608eff93e 100644
--- a/chromium/pdf/document_loader.cc
+++ b/chromium/pdf/document_loader.cc
@@ -410,93 +410,94 @@ void DocumentLoader::ReadMore() {
}
void DocumentLoader::DidRead(int32_t result) {
- if (result > 0) {
- char* start = buffer_;
- size_t length = result;
- if (is_multipart_ && result > 2) {
- for (int i = 2; i < result; ++i) {
- if ((buffer_[i - 1] == '\n' && buffer_[i - 2] == '\n') ||
- (i >= 4 &&
- buffer_[i - 1] == '\n' && buffer_[i - 2] == '\r' &&
- buffer_[i - 3] == '\n' && buffer_[i - 4] == '\r')) {
- uint32_t start_pos, end_pos;
- if (GetByteRange(std::string(buffer_, i), &start_pos, &end_pos)) {
- current_pos_ = start_pos;
- start += i;
- length -= i;
- if (end_pos && end_pos > start_pos)
- current_chunk_size_ = end_pos - start_pos + 1;
- }
- break;
+ if (result <= 0) {
+ // If |result| == PP_OK, the document was loaded, otherwise an error was
+ // encountered. Either way we want to stop processing the response. In the
+ // case where an error occurred, the renderer will detect that we're missing
+ // data and will display a message.
+ ReadComplete();
+ return;
+ }
+
+ char* start = buffer_;
+ size_t length = result;
+ if (is_multipart_ && result > 2) {
+ for (int i = 2; i < result; ++i) {
+ if ((buffer_[i - 1] == '\n' && buffer_[i - 2] == '\n') ||
+ (i >= 4 && buffer_[i - 1] == '\n' && buffer_[i - 2] == '\r' &&
+ buffer_[i - 3] == '\n' && buffer_[i - 4] == '\r')) {
+ uint32_t start_pos, end_pos;
+ if (GetByteRange(std::string(buffer_, i), &start_pos, &end_pos)) {
+ current_pos_ = start_pos;
+ start += i;
+ length -= i;
+ if (end_pos && end_pos > start_pos)
+ current_chunk_size_ = end_pos - start_pos + 1;
}
+ break;
}
-
- // Reset this flag so we don't look inside the buffer in future calls of
- // DidRead for this response. Note that this code DOES NOT handle multi-
- // part responses with more than one part (we don't issue them at the
- // moment, so they shouldn't arrive).
- is_multipart_ = false;
}
- if (current_chunk_size_ &&
- current_chunk_read_ + length > current_chunk_size_)
- length = current_chunk_size_ - current_chunk_read_;
-
- if (length) {
- if (document_size_ > 0) {
- chunk_stream_.WriteData(current_pos_, start, length);
- } else {
- // If we did not get content-length in the response, we can't
- // preallocate buffer for the entire document. Resizing array causing
- // memory fragmentation issues on the large files and OOM exceptions.
- // To fix this, we collect all chunks of the file to the list and
- // concatenate them together after request is complete.
- std::vector<unsigned char> buf(length);
- memcpy(buf.data(), start, length);
- chunk_buffer_.push_back(std::move(buf));
- }
- current_pos_ += length;
- current_chunk_read_ += length;
- client_->OnNewDataAvailable();
- }
+ // Reset this flag so we don't look inside the buffer in future calls of
+ // DidRead for this response. Note that this code DOES NOT handle multi-
+ // part responses with more than one part (we don't issue them at the
+ // moment, so they shouldn't arrive).
+ is_multipart_ = false;
+ }
- // Only call the renderer if we allow partial loading.
- if (!partial_document_) {
- ReadMore();
- return;
+ if (current_chunk_size_ && current_chunk_read_ + length > current_chunk_size_)
+ length = current_chunk_size_ - current_chunk_read_;
+
+ if (length) {
+ if (document_size_ > 0) {
+ chunk_stream_.WriteData(current_pos_, start, length);
+ } else {
+ // If we did not get content-length in the response, we can't
+ // preallocate buffer for the entire document. Resizing array causing
+ // memory fragmentation issues on the large files and OOM exceptions.
+ // To fix this, we collect all chunks of the file to the list and
+ // concatenate them together after request is complete.
+ std::vector<unsigned char> buf(length);
+ memcpy(buf.data(), start, length);
+ chunk_buffer_.push_back(std::move(buf));
}
+ current_pos_ += length;
+ current_chunk_read_ += length;
+ client_->OnNewDataAvailable();
+ }
- UpdateRendering();
- RemoveCompletedRanges();
+ // Only call the renderer if we allow partial loading.
+ if (!partial_document_) {
+ ReadMore();
+ return;
+ }
- if (!pending_requests_.empty()) {
- // If there are pending requests and the current content we're downloading
- // doesn't satisfy any of these requests, cancel the current request to
- // fullfill those more important requests.
- bool satisfying_pending_request =
- SatisfyingRequest(current_request_offset_, current_request_size_);
- for (const auto& pending_request : pending_requests_) {
- if (SatisfyingRequest(pending_request.first, pending_request.second)) {
- satisfying_pending_request = true;
- break;
- }
- }
- // Cancel the request as it's not satisfying any request from the
- // renderer, unless the current request is finished in which case we let
- // it finish cleanly.
- if (!satisfying_pending_request &&
- current_pos_ < current_request_offset_ +
- current_request_extended_size_) {
- loader_.Close();
+ UpdateRendering();
+ RemoveCompletedRanges();
+
+ if (!pending_requests_.empty()) {
+ // If there are pending requests and the current content we're downloading
+ // doesn't satisfy any of these requests, cancel the current request to
+ // fullfill those more important requests.
+ bool satisfying_pending_request =
+ SatisfyingRequest(current_request_offset_, current_request_size_);
+ for (const auto& pending_request : pending_requests_) {
+ if (SatisfyingRequest(pending_request.first, pending_request.second)) {
+ satisfying_pending_request = true;
+ break;
}
}
-
- ReadMore();
- } else if (result == PP_OK || result == PP_ERROR_ABORTED) {
- ReadComplete();
- } else {
- NOTREACHED();
+ // Cancel the request as it's not satisfying any request from the
+ // renderer, unless the current request is finished in which case we let
+ // it finish cleanly.
+ if (!satisfying_pending_request &&
+ current_pos_ <
+ current_request_offset_ + current_request_extended_size_) {
+ loader_.Close();
+ }
}
+
+ ReadMore();
}
bool DocumentLoader::SatisfyingRequest(size_t offset, size_t size) const {
diff --git a/chromium/pdf/out_of_process_instance.cc b/chromium/pdf/out_of_process_instance.cc
index 869fdf24958..ae680bde0ef 100644
--- a/chromium/pdf/out_of_process_instance.cc
+++ b/chromium/pdf/out_of_process_instance.cc
@@ -13,8 +13,6 @@
#include <math.h>
#include <list>
-#include "base/json/json_reader.h"
-#include "base/json/json_writer.h"
#include "base/logging.h"
#include "base/strings/string_number_conversions.h"
#include "base/strings/string_split.h"
@@ -51,11 +49,6 @@ const char kChromePrint[] = "chrome://print/";
const char kChromeExtension[] =
"chrome-extension://mhjfbmdgcfjbbpaeojofohoefgiehjai";
-// Dictionary Value key names for the document accessibility info
-const char kAccessibleNumberOfPages[] = "numberOfPages";
-const char kAccessibleLoaded[] = "loaded";
-const char kAccessibleCopyable[] = "copyable";
-
// Constants used in handling postMessage() messages.
const char kType[] = "type";
// Viewport message arguments. (Page -> Plugin).
@@ -106,12 +99,6 @@ const char kJSPreviewPageIndex[] = "index";
const char kJSSetScrollPositionType[] = "setScrollPosition";
const char kJSPositionX[] = "x";
const char kJSPositionY[] = "y";
-// Request accessibility JSON data (Page -> Plugin)
-const char kJSGetAccessibilityJSONType[] = "getAccessibilityJSON";
-const char kJSAccessibilityPageNumber[] = "page";
-// Reply with accessibility JSON data (Plugin -> Page)
-const char kJSGetAccessibilityJSONReplyType[] = "getAccessibilityJSONReply";
-const char kJSAccessibilityJSON[] = "json";
// Cancel the stream URL request (Plugin -> Page)
const char kJSCancelStreamUrlType[] = "cancelStreamUrl";
// Navigate to the given URL (Plugin -> Page)
@@ -153,6 +140,10 @@ const char kJSFieldFocus[] = "focused";
const int kFindResultCooldownMs = 100;
+// A delay to wait between each accessibility page to keep the system
+// responsive.
+const int kAccessibilityPageDelayMs = 100;
+
const double kMinZoom = 0.01;
namespace {
@@ -206,10 +197,20 @@ PP_Bool GetPrintPresetOptionsFromDocument(
return PP_TRUE;
}
+void EnableAccessibility(PP_Instance instance) {
+ void* object = pp::Instance::GetPerInstanceObject(instance, kPPPPdfInterface);
+ if (object) {
+ OutOfProcessInstance* obj_instance =
+ static_cast<OutOfProcessInstance*>(object);
+ return obj_instance->EnableAccessibility();
+ }
+}
+
const PPP_Pdf ppp_private = {
&GetLinkAtPosition,
&Transform,
- &GetPrintPresetOptionsFromDocument
+ &GetPrintPresetOptionsFromDocument,
+ &EnableAccessibility,
};
int ExtractPrintPreviewPageIndex(const std::string& src_url) {
@@ -288,7 +289,8 @@ OutOfProcessInstance::OutOfProcessInstance(PP_Instance instance)
did_call_start_loading_(false),
stop_scrolling_(false),
background_color_(0),
- top_toolbar_height_(0) {
+ top_toolbar_height_(0),
+ accessibility_state_(ACCESSIBILITY_STATE_OFF) {
loader_factory_.Initialize(this);
timer_factory_.Initialize(this);
form_factory_.Initialize(this);
@@ -446,27 +448,6 @@ void OutOfProcessInstance::HandleMessage(const pp::Var& message) {
dict.Get(pp::Var(kJSPreviewPageIndex)).is_int()) {
ProcessPreviewPageInfo(dict.Get(pp::Var(kJSPreviewPageUrl)).AsString(),
dict.Get(pp::Var(kJSPreviewPageIndex)).AsInt());
- } else if (type == kJSGetAccessibilityJSONType) {
- pp::VarDictionary reply;
- reply.Set(pp::Var(kType), pp::Var(kJSGetAccessibilityJSONReplyType));
- if (dict.Get(pp::Var(kJSAccessibilityPageNumber)).is_int()) {
- int page = dict.Get(pp::Var(kJSAccessibilityPageNumber)).AsInt();
- reply.Set(pp::Var(kJSAccessibilityJSON),
- pp::Var(engine_->GetPageAsJSON(page)));
- } else {
- base::DictionaryValue node;
- node.SetInteger(kAccessibleNumberOfPages, engine_->GetNumberOfPages());
- node.SetBoolean(kAccessibleLoaded,
- document_load_state_ != LOAD_STATE_LOADING);
- bool has_permissions =
- engine_->HasPermission(PDFEngine::PERMISSION_COPY) ||
- engine_->HasPermission(PDFEngine::PERMISSION_COPY_ACCESSIBLE);
- node.SetBoolean(kAccessibleCopyable, has_permissions);
- std::string json;
- base::JSONWriter::Write(node, &json);
- reply.Set(pp::Var(kJSAccessibilityJSON), pp::Var(json));
- }
- PostMessage(reply);
} else if (type == kJSStopScrollingType) {
stop_scrolling_ = true;
} else if (type == kJSGetSelectedTextType) {
@@ -614,6 +595,109 @@ void OutOfProcessInstance::GetPrintPresetOptionsFromDocument(
options->uniform_page_size = uniform_page_size;
}
+void OutOfProcessInstance::EnableAccessibility() {
+ if (accessibility_state_ == ACCESSIBILITY_STATE_LOADED)
+ return;
+
+ if (accessibility_state_ == ACCESSIBILITY_STATE_OFF)
+ accessibility_state_ = ACCESSIBILITY_STATE_PENDING;
+
+ if (document_load_state_ == LOAD_STATE_COMPLETE)
+ LoadAccessibility();
+}
+
+void OutOfProcessInstance::LoadAccessibility() {
+ accessibility_state_ = ACCESSIBILITY_STATE_LOADED;
+ PP_PrivateAccessibilityDocInfo doc_info;
+ doc_info.page_count = engine_->GetNumberOfPages();
+ doc_info.text_accessible = PP_FromBool(
+ engine_->HasPermission(PDFEngine::PERMISSION_COPY_ACCESSIBLE));
+ doc_info.text_copyable = PP_FromBool(
+ engine_->HasPermission(PDFEngine::PERMISSION_COPY));
+
+ pp::PDF::SetAccessibilityDocInfo(GetPluginInstance(), &doc_info);
+
+ // If the document contents isn't accessible, don't send anything more.
+ if (!(engine_->HasPermission(PDFEngine::PERMISSION_COPY) ||
+ engine_->HasPermission(PDFEngine::PERMISSION_COPY_ACCESSIBLE))) {
+ return;
+ }
+
+ PP_PrivateAccessibilityViewportInfo viewport_info;
+ viewport_info.scroll.x = 0;
+ viewport_info.scroll.y = -top_toolbar_height_ * device_scale_;
+ viewport_info.offset = available_area_.point();
+ viewport_info.zoom = zoom_ * device_scale_;
+ pp::PDF::SetAccessibilityViewportInfo(GetPluginInstance(), &viewport_info);
+
+ // Schedule loading the first page.
+ pp::CompletionCallback callback = timer_factory_.NewCallback(
+ &OutOfProcessInstance::SendNextAccessibilityPage);
+ pp::Module::Get()->core()->CallOnMainThread(kAccessibilityPageDelayMs,
+ callback, 0);
+}
+
+void OutOfProcessInstance::SendNextAccessibilityPage(int32_t page_index) {
+ int page_count = engine_->GetNumberOfPages();
+ if (page_index < 0 || page_index >= page_count)
+ return;
+
+ int char_count = engine_->GetCharCount(page_index);
+ PP_PrivateAccessibilityPageInfo page_info;
+ page_info.page_index = page_index;
+ page_info.bounds = engine_->GetPageBoundsRect(page_index);
+ page_info.char_count = char_count;
+
+ std::vector<PP_PrivateAccessibilityCharInfo> chars(page_info.char_count);
+ for (uint32_t i = 0; i < page_info.char_count; ++i) {
+ chars[i].unicode_character = engine_->GetCharUnicode(page_index, i);
+ }
+
+ std::vector<PP_PrivateAccessibilityTextRunInfo> text_runs;
+ int char_index = 0;
+ while (char_index < char_count) {
+ PP_PrivateAccessibilityTextRunInfo text_run_info;
+ pp::FloatRect bounds;
+ engine_->GetTextRunInfo(page_index, char_index, &text_run_info.len,
+ &text_run_info.font_size, &bounds);
+ DCHECK_LE(char_index + text_run_info.len,
+ static_cast<uint32_t>(char_count));
+ text_run_info.direction = PP_PRIVATEDIRECTION_LTR;
+ text_run_info.bounds = bounds;
+ text_runs.push_back(text_run_info);
+
+ // We need to provide enough information to draw a bounding box
+ // around any arbitrary text range, but the bounding boxes of characters
+ // we get from PDFium don't necessarily "line up". Walk through the
+ // characters in each text run and let the width of each character be
+ // the difference between the x coordinate of one character and the
+ // x coordinate of the next. The rest of the bounds of each character
+ // can be computed from the bounds of the text run.
+ pp::FloatRect char_bounds = engine_->GetCharBounds(page_index, char_index);
+ for (uint32_t i = 0; i < text_run_info.len - 1; i++) {
+ DCHECK_LT(char_index + i + 1,
+ static_cast<uint32_t>(char_count));
+ pp::FloatRect next_char_bounds = engine_->GetCharBounds(
+ page_index, char_index + i + 1);
+ chars[char_index + i].char_width = next_char_bounds.x() - char_bounds.x();
+ char_bounds = next_char_bounds;
+ }
+ chars[char_index + text_run_info.len - 1].char_width = char_bounds.width();
+
+ char_index += text_run_info.len;
+ }
+
+ page_info.text_run_count = text_runs.size();
+ pp::PDF::SetAccessibilityPageInfo(GetPluginInstance(), &page_info,
+ text_runs.data(), chars.data());
+
+ // Schedule loading the next page.
+ pp::CompletionCallback callback = timer_factory_.NewCallback(
+ &OutOfProcessInstance::SendNextAccessibilityPage);
+ pp::Module::Get()->core()->CallOnMainThread(kAccessibilityPageDelayMs,
+ callback, page_index + 1);
+}
+
pp::Var OutOfProcessInstance::GetLinkAtPosition(
const pp::Point& point) {
pp::Point offset_point(point);
@@ -1164,6 +1248,9 @@ void OutOfProcessInstance::DocumentLoadComplete(int page_count) {
pp::PDF::SetContentRestriction(this, content_restrictions);
uma_.HistogramCustomCounts("PDF.PageCount", page_count, 1, 1000000, 50);
+
+ if (accessibility_state_ == ACCESSIBILITY_STATE_PENDING)
+ LoadAccessibility();
}
void OutOfProcessInstance::RotateClockwise() {
@@ -1324,7 +1411,7 @@ void OutOfProcessInstance::OnGeometryChanged(double old_zoom,
engine_->PageOffsetUpdated(available_area_.point());
engine_->PluginSizeUpdated(available_area_.size());
- if (!document_size_.GetArea())
+ if (document_size_.IsEmpty())
return;
paint_manager_.InvalidateRect(pp::Rect(pp::Point(), plugin_size_));
}
diff --git a/chromium/pdf/out_of_process_instance.h b/chromium/pdf/out_of_process_instance.h
index 0842d33bde1..fb08986078a 100644
--- a/chromium/pdf/out_of_process_instance.h
+++ b/chromium/pdf/out_of_process_instance.h
@@ -77,6 +77,13 @@ class OutOfProcessInstance : public pp::Instance,
// pp::Private implementation.
pp::Var GetLinkAtPosition(const pp::Point& point);
void GetPrintPresetOptionsFromDocument(PP_PdfPrintPresetOptions_Dev* options);
+ void EnableAccessibility();
+
+ // Start loading accessibility information.
+ void LoadAccessibility();
+
+ // Send accessibility information about the given page index.
+ void SendNextAccessibilityPage(int32_t page_index);
void FlushCallback(int32_t result);
void DidOpen(int32_t result);
@@ -342,6 +349,14 @@ class OutOfProcessInstance : public pp::Instance,
// toolbar.
int top_toolbar_height_;
+ // The current state of accessibility: either off, enabled but waiting
+ // for the document to load, or fully loaded.
+ enum AccessibilityState {
+ ACCESSIBILITY_STATE_OFF,
+ ACCESSIBILITY_STATE_PENDING, // Enabled but waiting for doc to load.
+ ACCESSIBILITY_STATE_LOADED
+ } accessibility_state_;
+
DISALLOW_COPY_AND_ASSIGN(OutOfProcessInstance);
};
diff --git a/chromium/pdf/paint_aggregator.h b/chromium/pdf/paint_aggregator.h
index 96f61e08787..74737ab8fb6 100644
--- a/chromium/pdf/paint_aggregator.h
+++ b/chromium/pdf/paint_aggregator.h
@@ -90,7 +90,7 @@ class PaintAggregator {
// InvalidateRect. We need to know this distinction for some operations.
//
// - The paint bounds union is computed on the fly so we don't have to keep
- // a rectangle up-to-date as we do different operations.
+ // a rectangle up to date as we do different operations.
class InternalPaintUpdate {
public:
InternalPaintUpdate();
diff --git a/chromium/pdf/pdf.cc b/chromium/pdf/pdf.cc
index fa43eaeb3c7..bad2ac4e2a8 100644
--- a/chromium/pdf/pdf.cc
+++ b/chromium/pdf/pdf.cc
@@ -31,7 +31,7 @@ PDFModule::PDFModule() {
PDFModule::~PDFModule() {
if (g_sdk_initialized_via_pepper) {
- chrome_pdf::ShutdownSDK();
+ ShutdownSDK();
g_sdk_initialized_via_pepper = false;
}
}
@@ -51,8 +51,8 @@ pp::Instance* PDFModule::CreateInstance(PP_Instance instance) {
v8::V8::SetNativesDataBlob(&natives);
v8::V8::SetSnapshotDataBlob(&snapshot);
}
- if (!chrome_pdf::InitializeSDK())
- return NULL;
+ if (!InitializeSDK())
+ return nullptr;
g_sdk_initialized_via_pepper = true;
}
@@ -63,78 +63,73 @@ pp::Instance* PDFModule::CreateInstance(PP_Instance instance) {
// Implementation of Global PPP functions ---------------------------------
int32_t PPP_InitializeModule(PP_Module module_id,
PPB_GetInterface get_browser_interface) {
- PDFModule* module = new PDFModule();
- if (!module->InternalInit(module_id, get_browser_interface)) {
- delete module;
+ std::unique_ptr<PDFModule> module(new PDFModule);
+ if (!module->InternalInit(module_id, get_browser_interface))
return PP_ERROR_FAILED;
- }
- pp::InternalSetModuleSingleton(module);
+ pp::InternalSetModuleSingleton(module.release());
return PP_OK;
}
void PPP_ShutdownModule() {
delete pp::Module::Get();
- pp::InternalSetModuleSingleton(NULL);
+ pp::InternalSetModuleSingleton(nullptr);
}
const void* PPP_GetInterface(const char* interface_name) {
- if (!pp::Module::Get())
- return NULL;
- return pp::Module::Get()->GetPluginInterface(interface_name);
+ auto* module = pp::Module::Get();
+ return module ? module->GetPluginInterface(interface_name) : nullptr;
}
#if defined(OS_WIN)
bool RenderPDFPageToDC(const void* pdf_buffer,
- int buffer_size,
- int page_number,
- HDC dc,
- int dpi,
- int bounds_origin_x,
- int bounds_origin_y,
- int bounds_width,
- int bounds_height,
- bool fit_to_bounds,
- bool stretch_to_bounds,
- bool keep_aspect_ratio,
- bool center_in_bounds,
- bool autorotate) {
+ int buffer_size,
+ int page_number,
+ HDC dc,
+ int dpi,
+ int bounds_origin_x,
+ int bounds_origin_y,
+ int bounds_width,
+ int bounds_height,
+ bool fit_to_bounds,
+ bool stretch_to_bounds,
+ bool keep_aspect_ratio,
+ bool center_in_bounds,
+ bool autorotate) {
if (!g_sdk_initialized_via_pepper) {
- if (!chrome_pdf::InitializeSDK()) {
+ if (!InitializeSDK()) {
return false;
}
}
- chrome_pdf::PDFEngineExports* engine_exports =
- chrome_pdf::PDFEngineExports::Get();
- chrome_pdf::PDFEngineExports::RenderingSettings settings(
- dpi, dpi, pp::Rect(bounds_origin_x, bounds_origin_y, bounds_width,
- bounds_height),
+ PDFEngineExports* engine_exports = PDFEngineExports::Get();
+ PDFEngineExports::RenderingSettings settings(
+ dpi, dpi,
+ pp::Rect(bounds_origin_x, bounds_origin_y, bounds_width, bounds_height),
fit_to_bounds, stretch_to_bounds, keep_aspect_ratio, center_in_bounds,
autorotate);
bool ret = engine_exports->RenderPDFPageToDC(pdf_buffer, buffer_size,
page_number, settings, dc);
- if (!g_sdk_initialized_via_pepper) {
- chrome_pdf::ShutdownSDK();
- }
+ if (!g_sdk_initialized_via_pepper)
+ ShutdownSDK();
+
return ret;
}
-#endif // OS_WIN
+#endif // defined(OS_WIN)
bool GetPDFDocInfo(const void* pdf_buffer,
int buffer_size, int* page_count,
double* max_page_width) {
if (!g_sdk_initialized_via_pepper) {
- if (!chrome_pdf::InitializeSDK())
+ if (!InitializeSDK())
return false;
}
- chrome_pdf::PDFEngineExports* engine_exports =
- chrome_pdf::PDFEngineExports::Get();
+ PDFEngineExports* engine_exports = PDFEngineExports::Get();
bool ret = engine_exports->GetPDFDocInfo(
pdf_buffer, buffer_size, page_count, max_page_width);
- if (!g_sdk_initialized_via_pepper) {
- chrome_pdf::ShutdownSDK();
- }
+ if (!g_sdk_initialized_via_pepper)
+ ShutdownSDK();
+
return ret;
}
@@ -163,19 +158,18 @@ bool RenderPDFPageToBitmap(const void* pdf_buffer,
int dpi,
bool autorotate) {
if (!g_sdk_initialized_via_pepper) {
- if (!chrome_pdf::InitializeSDK())
+ if (!InitializeSDK())
return false;
}
- chrome_pdf::PDFEngineExports* engine_exports =
- chrome_pdf::PDFEngineExports::Get();
- chrome_pdf::PDFEngineExports::RenderingSettings settings(
+ PDFEngineExports* engine_exports = PDFEngineExports::Get();
+ PDFEngineExports::RenderingSettings settings(
dpi, dpi, pp::Rect(bitmap_width, bitmap_height), true, false, true, true,
autorotate);
bool ret = engine_exports->RenderPDFPageToBitmap(
pdf_buffer, pdf_buffer_size, page_number, settings, bitmap_buffer);
- if (!g_sdk_initialized_via_pepper) {
- chrome_pdf::ShutdownSDK();
- }
+ if (!g_sdk_initialized_via_pepper)
+ ShutdownSDK();
+
return ret;
}
diff --git a/chromium/pdf/pdf_engine.h b/chromium/pdf/pdf_engine.h
index c5d33c87066..5bba8b9f860 100644
--- a/chromium/pdf/pdf_engine.h
+++ b/chromium/pdf/pdf_engine.h
@@ -233,6 +233,8 @@ class PDFEngine {
virtual int GetMostVisiblePage() = 0;
// Gets the rectangle of the page including shadow.
virtual pp::Rect GetPageRect(int index) = 0;
+ // Gets the rectangle of the page not including the shadow.
+ virtual pp::Rect GetPageBoundsRect(int index) = 0;
// Gets the rectangle of the page excluding any additional areas.
virtual pp::Rect GetPageContentsRect(int index) = 0;
// Returns a page's rect in screen coordinates, as well as its surrounding
@@ -245,8 +247,20 @@ class PDFEngine {
virtual void SetGrayscale(bool grayscale) = 0;
// Callback for timer that's set with ScheduleCallback().
virtual void OnCallback(int id) = 0;
- // Gets the JSON representation of the PDF file
- virtual std::string GetPageAsJSON(int index) = 0;
+ // Get the number of characters on a given page.
+ virtual int GetCharCount(int page_index) = 0;
+ // Get the bounds in page pixels of a character on a given page.
+ virtual pp::FloatRect GetCharBounds(int page_index, int char_index) = 0;
+ // Get a given unicode character on a given page.
+ virtual uint32_t GetCharUnicode(int page_index, int char_index) = 0;
+ // Given a start char index, find the longest continuous run of text that's
+ // in a single direction and with the same style and font size. Return the
+ // length of that sequence and its font size and bounding box.
+ virtual void GetTextRunInfo(int page_index,
+ int start_char_index,
+ uint32_t* out_len,
+ double* out_font_size,
+ pp::FloatRect* out_bounds) = 0;
// Gets the PDF document's print scaling preference. True if the document can
// be scaled to fit.
virtual bool GetPrintScaling() = 0;
diff --git a/chromium/pdf/pdfium/fuzzers/BUILD.gn b/chromium/pdf/pdfium/fuzzers/BUILD.gn
index fce0b8c4eac..2df2e9afded 100644
--- a/chromium/pdf/pdfium/fuzzers/BUILD.gn
+++ b/chromium/pdf/pdfium/fuzzers/BUILD.gn
@@ -22,7 +22,7 @@ fuzzer_test("pdfium_fuzzer") {
"//v8:v8_libplatform",
]
additional_configs = [
- "//third_party/pdfium:pdfium_config",
+ "//third_party/pdfium:pdfium_core_config",
"//v8:external_startup_data",
]
dict = "dicts/pdf.dict"
@@ -36,6 +36,49 @@ fuzzer_test("pdf_jpx_fuzzer") {
}
if (pdf_enable_xfa) {
+ fuzzer_test("pdf_codec_bmp_fuzzer") {
+ sources = []
+ deps = [
+ "//third_party/pdfium/testing/libfuzzer:pdf_codec_bmp_fuzzer",
+ ]
+ }
+
+ fuzzer_test("pdf_codec_gif_fuzzer") {
+ sources = []
+ deps = [
+ "//third_party/pdfium/testing/libfuzzer:pdf_codec_gif_fuzzer",
+ ]
+ }
+
+ fuzzer_test("pdf_codec_jpeg_fuzzer") {
+ sources = []
+ deps = [
+ "//third_party/pdfium/testing/libfuzzer:pdf_codec_jpeg_fuzzer",
+ ]
+ }
+
+ fuzzer_test("pdf_codec_png_fuzzer") {
+ sources = []
+ deps = [
+ "//third_party/pdfium/testing/libfuzzer:pdf_codec_png_fuzzer",
+ ]
+ }
+
+ fuzzer_test("pdf_codec_tiff_fuzzer") {
+ sources = []
+ deps = [
+ "//third_party/pdfium/testing/libfuzzer:pdf_codec_tiff_fuzzer",
+ ]
+ }
+
+ fuzzer_test("pdf_css_fuzzer") {
+ sources = []
+ deps = [
+ "//third_party/pdfium/testing/libfuzzer:pdf_css_fuzzer",
+ ]
+ dict = "dicts/pdf_css.dict"
+ }
+
fuzzer_test("pdf_fm2js_fuzzer") {
sources = []
deps = [
@@ -51,4 +94,12 @@ if (pdf_enable_xfa) {
]
dict = "dicts/pdf_xml.dict"
}
+
+ fuzzer_test("pdf_cfx_saxreader_fuzzer") {
+ sources = []
+ deps = [
+ "//third_party/pdfium/testing/libfuzzer:pdf_cfx_saxreader_fuzzer",
+ ]
+ dict = "dicts/pdf_xml.dict"
+ }
}
diff --git a/chromium/pdf/pdfium/fuzzers/dicts/pdf_css.dict b/chromium/pdf/pdfium/fuzzers/dicts/pdf_css.dict
new file mode 100644
index 00000000000..b59b3444748
--- /dev/null
+++ b/chromium/pdf/pdfium/fuzzers/dicts/pdf_css.dict
@@ -0,0 +1,73 @@
+kw0=";"
+kw1="{"
+kw2="}"
+kw3=":"
+kw4="/"
+kw5=","
+kw6="+"
+kw7=">"
+kw8="-"
+kw9="]"
+kw10="["
+kw11="."
+kw12="="
+kw13="*"
+kw14=")"
+kw15="<!--"
+kw16="-->"
+kw17="~="
+kw18="|="
+kw19="#"
+kw20="@import"
+kw21="@page"
+kw22="@media"
+kw23="@"
+kw24="@charset"
+kw25="!important"
+kw26="em"
+kw27="ex"
+kw28="px"
+kw29="cm"
+kw30="mm"
+kw31="in"
+kw32="pt"
+kw33="pc"
+kw34="deg"
+kw35="rad"
+kw36="grad"
+kw37="ms"
+kw38="s"
+kw39="hz"
+kw40="khz"
+kw41="ident"
+kw42="url("
+kw43="/*"
+kw44="*/"
+kw45="color"
+kw46="font"
+kw47="font-family"
+kw48="font-size"
+kw49="font-stretch"
+kw50="font-style"
+kw51="font-weight"
+kw52="margin"
+kw53="margin-bottom"
+kw54="margin-top"
+kw55="margin-left"
+kw56="margin-right"
+kw57="letter-spacing"
+kw58="line-height"
+kw59="orphans"
+kw60="page-break-after"
+kw61="page-break-before"
+kw62="page-break-inside"
+kw63="tab-interval"
+kw64="tab-stop"
+kw65="text-decoration"
+kw66="text-indent"
+kw67="vertical-align"
+kw68="widows"
+kw69="kerning-mode"
+kw70="xfa-font-horizontal-scale"
+kw71="xfa-font-vertical-scale"
+kw72="xfa-tab-stops"
diff --git a/chromium/pdf/pdfium/pdfium_engine.cc b/chromium/pdf/pdfium/pdfium_engine.cc
index eedf9f706b1..05f73098ec7 100644
--- a/chromium/pdf/pdfium/pdfium_engine.cc
+++ b/chromium/pdf/pdfium/pdfium_engine.cc
@@ -13,7 +13,6 @@
#include "base/i18n/icu_encoding_detection.h"
#include "base/i18n/icu_string_conversions.h"
-#include "base/json/json_writer.h"
#include "base/lazy_instance.h"
#include "base/logging.h"
#include "base/macros.h"
@@ -23,7 +22,6 @@
#include "base/strings/string_piece.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
-#include "base/values.h"
#include "gin/array_buffer.h"
#include "gin/public/gin_embedders.h"
#include "gin/public/isolate_holder.h"
@@ -67,46 +65,48 @@ namespace chrome_pdf {
namespace {
-#define kPageShadowTop 3
-#define kPageShadowBottom 7
-#define kPageShadowLeft 5
-#define kPageShadowRight 5
+const int32_t kPageShadowTop = 3;
+const int32_t kPageShadowBottom = 7;
+const int32_t kPageShadowLeft = 5;
+const int32_t kPageShadowRight = 5;
-#define kPageSeparatorThickness 4
-#define kHighlightColorR 153
-#define kHighlightColorG 193
-#define kHighlightColorB 218
+const int32_t kPageSeparatorThickness = 4;
+const int32_t kHighlightColorR = 153;
+const int32_t kHighlightColorG = 193;
+const int32_t kHighlightColorB = 218;
const uint32_t kPendingPageColor = 0xFFEEEEEE;
-#define kFormHighlightColor 0xFFE4DD
-#define kFormHighlightAlpha 100
+const uint32_t kFormHighlightColor = 0xFFE4DD;
+const int32_t kFormHighlightAlpha = 100;
-#define kMaxPasswordTries 3
+const int32_t kMaxPasswordTries = 3;
// See Table 3.20 in
// http://www.adobe.com/devnet/acrobat/pdfs/pdf_reference_1-7.pdf
-#define kPDFPermissionPrintLowQualityMask 1 << 2
-#define kPDFPermissionPrintHighQualityMask 1 << 11
-#define kPDFPermissionCopyMask 1 << 4
-#define kPDFPermissionCopyAccessibleMask 1 << 9
+const uint32_t kPDFPermissionPrintLowQualityMask = 1 << 2;
+const uint32_t kPDFPermissionPrintHighQualityMask = 1 << 11;
+const uint32_t kPDFPermissionCopyMask = 1 << 4;
+const uint32_t kPDFPermissionCopyAccessibleMask = 1 << 9;
-#define kLoadingTextVerticalOffset 50
+const int32_t kLoadingTextVerticalOffset = 50;
// The maximum amount of time we'll spend doing a paint before we give back
// control of the thread.
-#define kMaxProgressivePaintTimeMs 50
+const int32_t kMaxProgressivePaintTimeMs = 300;
// The maximum amount of time we'll spend doing the first paint. This is less
-// than the above to keep things smooth if the user is scrolling quickly. We
-// try painting a little because with accelerated compositing, we get flushes
-// only every 16 ms. If we were to wait until the next flush to paint the rest
-// of the pdf, we would never get to draw the pdf and would only draw the
-// scrollbars. This value is picked to give enough time for gpu related code to
-// do its thing and still fit within the timelimit for 60Hz. For the
-// non-composited case, this doesn't make things worse since we're still
-// painting the scrollbars > 60 Hz.
-#define kMaxInitialProgressivePaintTimeMs 10
+// than the above to keep things smooth if the user is scrolling quickly. This
+// is set to 250 ms to give enough time for most PDFs to render, while avoiding
+// adding too much latency to the display of the final image when the user
+// stops scrolling.
+// Setting a higher value has minimal benefit (scrolling at less than 4 fps will
+// never be a great experience) and there is some cost, since when the user
+// stops scrolling the in-progress painting has to complete or timeout before
+// the final painting can start.
+// The scrollbar will always be responsive since it is managed by a separate
+// process.
+const int32_t kMaxInitialProgressivePaintTimeMs = 250;
std::vector<uint32_t> GetPageNumbersFromPrintPageNumberRange(
const PP_PrintPageNumberRange_Dev* page_ranges,
@@ -2271,6 +2271,10 @@ pp::Rect PDFiumEngine::GetPageRect(int index) {
return rc;
}
+pp::Rect PDFiumEngine::GetPageBoundsRect(int index) {
+ return pages_[index]->rect();
+}
+
pp::Rect PDFiumEngine::GetPageContentsRect(int index) {
return GetScreenRect(pages_[index]->rect());
}
@@ -2288,20 +2292,29 @@ void PDFiumEngine::OnCallback(int id) {
client_->ScheduleCallback(id, timers_[id].first);
}
-std::string PDFiumEngine::GetPageAsJSON(int index) {
- if (!(HasPermission(PERMISSION_COPY) ||
- HasPermission(PERMISSION_COPY_ACCESSIBLE))) {
- return "{}";
- }
+int PDFiumEngine::GetCharCount(int page_index) {
+ DCHECK(page_index >= 0 && page_index < static_cast<int>(pages_.size()));
+ return pages_[page_index]->GetCharCount();
+}
- if (index < 0 || static_cast<size_t>(index) > pages_.size() - 1)
- return "{}";
+pp::FloatRect PDFiumEngine::GetCharBounds(int page_index, int char_index) {
+ DCHECK(page_index >= 0 && page_index < static_cast<int>(pages_.size()));
+ return pages_[page_index]->GetCharBounds(char_index);
+}
+
+uint32_t PDFiumEngine::GetCharUnicode(int page_index, int char_index) {
+ DCHECK(page_index >= 0 && page_index < static_cast<int>(pages_.size()));
+ return pages_[page_index]->GetCharUnicode(char_index);
+}
- std::unique_ptr<base::Value> node(
- pages_[index]->GetAccessibleContentAsValue(current_rotation_));
- std::string page_json;
- base::JSONWriter::Write(*node, &page_json);
- return page_json;
+void PDFiumEngine::GetTextRunInfo(int page_index,
+ int start_char_index,
+ uint32_t* out_len,
+ double* out_font_size,
+ pp::FloatRect* out_bounds) {
+ DCHECK(page_index >= 0 && page_index < static_cast<int>(pages_.size()));
+ return pages_[page_index]->GetTextRunInfo(start_char_index, out_len,
+ out_font_size, out_bounds);
}
bool PDFiumEngine::GetPrintScaling() {
@@ -2404,8 +2417,8 @@ void PDFiumEngine::LoadDocument() {
ScopedUnsupportedFeature scoped_unsupported_feature(this);
bool needs_password = false;
- if (TryLoadingDoc(false, std::string(), &needs_password)) {
- ContinueLoadingDocument(false, std::string());
+ if (TryLoadingDoc(std::string(), &needs_password)) {
+ ContinueLoadingDocument(std::string());
return;
}
if (needs_password)
@@ -2414,8 +2427,7 @@ void PDFiumEngine::LoadDocument() {
client_->DocumentLoadFailed();
}
-bool PDFiumEngine::TryLoadingDoc(bool with_password,
- const std::string& password,
+bool PDFiumEngine::TryLoadingDoc(const std::string& password,
bool* needs_password) {
*needs_password = false;
if (doc_) {
@@ -2427,7 +2439,7 @@ bool PDFiumEngine::TryLoadingDoc(bool with_password,
}
const char* password_cstr = nullptr;
- if (with_password) {
+ if (!password.empty()) {
password_cstr = password.c_str();
password_tries_remaining_--;
}
@@ -2460,24 +2472,18 @@ void PDFiumEngine::OnGetPasswordComplete(int32_t result,
const pp::Var& password) {
getting_password_ = false;
- bool password_given = false;
std::string password_text;
- if (result == PP_OK && password.is_string()) {
+ if (result == PP_OK && password.is_string())
password_text = password.AsString();
- if (!password_text.empty())
- password_given = true;
- }
- ContinueLoadingDocument(password_given, password_text);
+ ContinueLoadingDocument(password_text);
}
-void PDFiumEngine::ContinueLoadingDocument(
- bool has_password,
- const std::string& password) {
+void PDFiumEngine::ContinueLoadingDocument(const std::string& password) {
ScopedUnsupportedFeature scoped_unsupported_feature(this);
bool needs_password = false;
- bool loaded = TryLoadingDoc(has_password, password, &needs_password);
- bool password_incorrect = !loaded && has_password && needs_password;
+ bool loaded = TryLoadingDoc(password, &needs_password);
+ bool password_incorrect = !loaded && needs_password && !password.empty();
if (password_incorrect && password_tries_remaining_ > 0) {
GetPasswordAndLoad();
return;
@@ -2660,7 +2666,7 @@ bool PDFiumEngine::CheckPageAvailable(int index, std::vector<int>* pending) {
if (index < num_pages)
pages_[index]->set_available(true);
- if (!default_page_size_.GetArea())
+ if (default_page_size_.IsEmpty())
default_page_size_ = GetPageSize(index);
return true;
}
diff --git a/chromium/pdf/pdfium/pdfium_engine.h b/chromium/pdf/pdfium/pdfium_engine.h
index dcfbe3d132b..e6392b4a482 100644
--- a/chromium/pdf/pdfium/pdfium_engine.h
+++ b/chromium/pdf/pdfium/pdfium_engine.h
@@ -84,12 +84,20 @@ class PDFiumEngine : public PDFEngine,
int GetNamedDestinationPage(const std::string& destination) override;
int GetMostVisiblePage() override;
pp::Rect GetPageRect(int index) override;
+ pp::Rect GetPageBoundsRect(int index) override;
pp::Rect GetPageContentsRect(int index) override;
pp::Rect GetPageScreenRect(int page_index) const override;
int GetVerticalScrollbarYPosition() override { return position_.y(); }
void SetGrayscale(bool grayscale) override;
void OnCallback(int id) override;
- std::string GetPageAsJSON(int index) override;
+ int GetCharCount(int page_index) override;
+ pp::FloatRect GetCharBounds(int page_index, int char_index) override;
+ uint32_t GetCharUnicode(int page_index, int char_index) override;
+ void GetTextRunInfo(int page_index,
+ int start_char_index,
+ uint32_t* out_len,
+ double* out_font_size,
+ pp::FloatRect* out_bounds) override;
bool GetPrintScaling() override;
int GetCopiesToPrint() override;
int GetDuplexType() override;
@@ -204,13 +212,10 @@ class PDFiumEngine : public PDFEngine,
void LoadDocument();
// Try loading the document. Returns true if the document is successfully
- // loaded or is already loaded otherwise it will return false. If
- // |with_password| is set to true, the document will be loaded with
- // |password|. If the document could not be loaded and needs a password,
- // |needs_password| will be set to true.
- bool TryLoadingDoc(bool with_password,
- const std::string& password,
- bool* needs_password);
+ // loaded or is already loaded otherwise it will return false. If there is a
+ // password, then |password| is non-empty. If the document could not be loaded
+ // and needs a password, |needs_password| will be set to true.
+ bool TryLoadingDoc(const std::string& password, bool* needs_password);
// Asks the user for the document password and then continue loading the
// document.
@@ -221,9 +226,8 @@ class PDFiumEngine : public PDFEngine,
const pp::Var& password);
// Continues loading the document when the password has been retrieved, or if
- // there is no password.
- void ContinueLoadingDocument(bool has_password,
- const std::string& password);
+ // there is no password. If there is no password, then |password| is empty.
+ void ContinueLoadingDocument(const std::string& password);
// Finishes loading the document. Recalculate the document size if there were
// pages that were not previously available.
diff --git a/chromium/pdf/pdfium/pdfium_page.cc b/chromium/pdf/pdfium/pdfium_page.cc
index af64660a2a8..e95296986d8 100644
--- a/chromium/pdf/pdfium/pdfium_page.cc
+++ b/chromium/pdf/pdfium/pdfium_page.cc
@@ -9,35 +9,27 @@
#include <algorithm>
#include <memory>
+#include <utility>
#include "base/logging.h"
#include "base/strings/string_number_conversions.h"
#include "base/strings/string_util.h"
#include "base/strings/utf_string_conversions.h"
-#include "base/values.h"
#include "pdf/pdfium/pdfium_api_string_buffer_adapter.h"
#include "pdf/pdfium/pdfium_engine.h"
+#include "printing/units.h"
// Used when doing hit detection.
#define kTolerance 20.0
+using printing::ConvertUnitDouble;
+using printing::kPointsPerInch;
+using printing::kPixelsPerInch;
+
namespace {
-// Dictionary Value key names for returning the accessible page content as JSON.
-const char kPageWidth[] = "width";
-const char kPageHeight[] = "height";
-const char kPageTextBox[] = "textBox";
-const char kTextBoxLeft[] = "left";
-const char kTextBoxTop[] = "top";
-const char kTextBoxWidth[] = "width";
-const char kTextBoxHeight[] = "height";
-const char kTextBoxFontSize[] = "fontSize";
-const char kTextBoxNodes[] = "textNodes";
-const char kTextNodeType[] = "type";
-const char kTextNodeText[] = "text";
-const char kTextNodeTypeText[] = "text";
-
-pp::Rect PageRectToGViewRect(FPDF_PAGE page, const pp::Rect& input) {
+pp::FloatRect FloatPageRectToPixelRect(FPDF_PAGE page,
+ const pp::FloatRect& input) {
int output_width = FPDF_GetPageWidth(page);
int output_height = FPDF_GetPageHeight(page);
@@ -45,65 +37,42 @@ pp::Rect PageRectToGViewRect(FPDF_PAGE page, const pp::Rect& input) {
int min_y;
int max_x;
int max_y;
- FPDF_PageToDevice(page, 0, 0, output_width, output_height, 0,
- input.x(), input.y(), &min_x, &min_y);
- FPDF_PageToDevice(page, 0, 0, output_width, output_height, 0,
- input.right(), input.bottom(), &max_x, &max_y);
+ FPDF_PageToDevice(page, 0, 0, output_width, output_height, 0, input.x(),
+ input.y(), &min_x, &min_y);
+ FPDF_PageToDevice(page, 0, 0, output_width, output_height, 0, input.right(),
+ input.bottom(), &max_x, &max_y);
if (max_x < min_x)
std::swap(min_x, max_x);
if (max_y < min_y)
std::swap(min_y, max_y);
- pp::Rect output_rect(min_x, min_y, max_x - min_x, max_y - min_y);
- output_rect.Intersect(pp::Rect(0, 0, output_width, output_height));
+ pp::FloatRect output_rect(
+ ConvertUnitDouble(min_x, kPointsPerInch, kPixelsPerInch),
+ ConvertUnitDouble(min_y, kPointsPerInch, kPixelsPerInch),
+ ConvertUnitDouble(max_x - min_x, kPointsPerInch, kPixelsPerInch),
+ ConvertUnitDouble(max_y - min_y, kPointsPerInch, kPixelsPerInch));
return output_rect;
}
-pp::Rect GetCharRectInGViewCoords(FPDF_PAGE page, FPDF_TEXTPAGE text_page,
- int index) {
+pp::FloatRect GetFloatCharRectInPixels(FPDF_PAGE page,
+ FPDF_TEXTPAGE text_page,
+ int index) {
double left, right, bottom, top;
FPDFText_GetCharBox(text_page, index, &left, &right, &bottom, &top);
if (right < left)
std::swap(left, right);
if (bottom < top)
std::swap(top, bottom);
- pp::Rect page_coords(left, top, right - left, bottom - top);
- return PageRectToGViewRect(page, page_coords);
-}
-
-// This is the character PDFium inserts where a word is broken across lines.
-const unsigned int kSoftHyphen = 0x02;
-
-// The following characters should all be recognized as Unicode newlines:
-// LF: Line Feed, U+000A
-// VT: Vertical Tab, U+000B
-// FF: Form Feed, U+000C
-// CR: Carriage Return, U+000D
-// CR+LF: CR (U+000D) followed by LF (U+000A)
-// NEL: Next Line, U+0085
-// LS: Line Separator, U+2028
-// PS: Paragraph Separator, U+2029.
-// Source: http://en.wikipedia.org/wiki/Newline#Unicode .
-const unsigned int kUnicodeNewlines[] = {
- 0xA, 0xB, 0xC, 0xD, 0X85, 0x2028, 0x2029
-};
-
-bool IsSoftHyphen(unsigned int character) {
- return kSoftHyphen == character;
+ pp::FloatRect page_coords(left, top, right - left, bottom - top);
+ return FloatPageRectToPixelRect(page, page_coords);
}
-bool OverlapsOnYAxis(const pp::Rect &a, const pp::Rect& b) {
+bool OverlapsOnYAxis(const pp::FloatRect &a, const pp::FloatRect& b) {
return !(a.IsEmpty() || b.IsEmpty() ||
a.bottom() < b.y() || b.bottom() < a.y());
}
-bool IsEol(unsigned int character) {
- const unsigned int* first = kUnicodeNewlines;
- const unsigned int* last = kUnicodeNewlines + arraysize(kUnicodeNewlines);
- return std::find(first, last, character) != last;
-}
-
} // namespace
namespace chrome_pdf {
@@ -191,112 +160,73 @@ FPDF_TEXTPAGE PDFiumPage::GetTextPage() {
return text_page_;
}
-base::Value* PDFiumPage::GetAccessibleContentAsValue(int rotation) {
- base::DictionaryValue* node = new base::DictionaryValue();
-
- if (!available_)
- return node;
-
+void PDFiumPage::GetTextRunInfo(int start_char_index,
+ uint32_t* out_len,
+ double* out_font_size,
+ pp::FloatRect* out_bounds) {
FPDF_PAGE page = GetPage();
FPDF_TEXTPAGE text_page = GetTextPage();
-
- double width = FPDF_GetPageWidth(page);
- double height = FPDF_GetPageHeight(page);
-
- node->SetDouble(kPageWidth, width);
- node->SetDouble(kPageHeight, height);
- std::unique_ptr<base::ListValue> text(new base::ListValue());
-
int chars_count = FPDFText_CountChars(text_page);
- pp::Rect line_rect;
- pp::Rect word_rect;
- bool seen_literal_text_in_word = false;
-
- // Iterate over all of the chars on the page. Explicitly run the loop
- // with |i == chars_count|, which is one past the last character, and
- // pretend it's a newline character in order to ensure we always flush
- // the last line.
- base::string16 line;
- for (int i = 0; i <= chars_count; i++) {
- unsigned int character;
- pp::Rect char_rect;
-
- if (i < chars_count) {
- character = FPDFText_GetUnicode(text_page, i);
- char_rect = GetCharRectInGViewCoords(page, text_page, i);
- } else {
- // Make the last character a newline so the last line isn't lost.
- character = '\n';
- }
-
- // There are spurious STX chars appearing in place
- // of ligatures. Apply a heuristic to check that some vertical displacement
- // is involved before assuming they are line-breaks.
- bool is_intraword_linebreak = false;
- if (i < chars_count - 1 && IsSoftHyphen(character)) {
- // check if the next char and this char are in different lines.
- pp::Rect next_char_rect = GetCharRectInGViewCoords(
- page, text_page, i + 1);
-
+ int char_index = start_char_index;
+ while (
+ char_index < chars_count &&
+ base::IsUnicodeWhitespace(FPDFText_GetUnicode(text_page, char_index))) {
+ char_index++;
+ }
+ int text_run_font_size = FPDFText_GetFontSize(text_page, char_index);
+ pp::FloatRect text_run_bounds =
+ GetFloatCharRectInPixels(page, text_page, char_index);
+ char_index++;
+ while (char_index < chars_count) {
+ unsigned int character = FPDFText_GetUnicode(text_page, char_index);
+
+ if (!base::IsUnicodeWhitespace(character)) {
// TODO(dmazzoni): this assumes horizontal text.
// https://crbug.com/580311
- is_intraword_linebreak = !OverlapsOnYAxis(char_rect, next_char_rect);
- }
- if (is_intraword_linebreak ||
- base::IsUnicodeWhitespace(character) ||
- IsEol(character)) {
- if (!word_rect.IsEmpty() && seen_literal_text_in_word) {
- word_rect = pp::Rect();
- seen_literal_text_in_word = false;
- }
- }
+ pp::FloatRect char_rect = GetFloatCharRectInPixels(
+ page, text_page, char_index);
+ if (!char_rect.IsEmpty() && !OverlapsOnYAxis(text_run_bounds, char_rect))
+ break;
- if (is_intraword_linebreak || IsEol(character)) {
- if (!line_rect.IsEmpty()) {
- if (is_intraword_linebreak) {
- // Add a 0-width hyphen.
- line.push_back('-');
- }
+ int font_size = FPDFText_GetFontSize(text_page, char_index);
+ if (font_size != text_run_font_size)
+ break;
- base::DictionaryValue* text_node = new base::DictionaryValue();
- text_node->SetString(kTextNodeType, kTextNodeTypeText);
- text_node->SetString(kTextNodeText, line);
-
- base::ListValue* text_nodes = new base::ListValue();
- text_nodes->Append(text_node);
-
- base::DictionaryValue* line_node = new base::DictionaryValue();
- line_node->SetDouble(kTextBoxLeft, line_rect.x());
- line_node->SetDouble(kTextBoxTop, line_rect.y());
- line_node->SetDouble(kTextBoxWidth, line_rect.width());
- line_node->SetDouble(kTextBoxHeight, line_rect.height());
- line_node->SetDouble(kTextBoxFontSize,
- FPDFText_GetFontSize(text_page, i));
- line_node->Set(kTextBoxNodes, text_nodes);
- text->Append(line_node);
-
- line.clear();
- line_rect = pp::Rect();
- word_rect = pp::Rect();
- seen_literal_text_in_word = false;
- }
- continue;
+ // Heuristic: split a text run after a space longer than 3 average
+ // characters.
+ double avg_char_width =
+ text_run_bounds.width() / (char_index - start_char_index);
+ if (char_rect.x() - text_run_bounds.right() > avg_char_width * 3)
+ break;
+
+ text_run_bounds = text_run_bounds.Union(char_rect);
}
- seen_literal_text_in_word = seen_literal_text_in_word ||
- !base::IsUnicodeWhitespace(character);
- line.push_back(character);
- if (!char_rect.IsEmpty()) {
- line_rect = line_rect.Union(char_rect);
+ char_index++;
+ }
- if (!base::IsUnicodeWhitespace(character))
- word_rect = word_rect.Union(char_rect);
- }
+ // Some PDFs have missing or obviously bogus font sizes; substitute the
+ // height of the bounding box in those cases.
+ if (text_run_font_size <= 1 ||
+ text_run_font_size < text_run_bounds.height() / 2 ||
+ text_run_font_size > text_run_bounds.height() * 2) {
+ text_run_font_size = text_run_bounds.height();
}
- node->Set(kPageTextBox, text.release()); // Takes ownership of |text|
+ *out_len = char_index - start_char_index;
+ *out_font_size = text_run_font_size;
+ *out_bounds = text_run_bounds;
+}
+
+uint32_t PDFiumPage::GetCharUnicode(int char_index) {
+ FPDF_TEXTPAGE text_page = GetTextPage();
+ return FPDFText_GetUnicode(text_page, char_index);
+}
- return node;
+pp::FloatRect PDFiumPage::GetCharBounds(int char_index) {
+ FPDF_PAGE page = GetPage();
+ FPDF_TEXTPAGE text_page = GetTextPage();
+ return GetFloatCharRectInPixels(page, text_page, char_index);
}
PDFiumPage::Area PDFiumPage::GetCharIndex(const pp::Point& point,
diff --git a/chromium/pdf/pdfium/pdfium_page.h b/chromium/pdf/pdfium/pdfium_page.h
index 802ecb64e55..fa94ed96a08 100644
--- a/chromium/pdf/pdfium/pdfium_page.h
+++ b/chromium/pdf/pdfium/pdfium_page.h
@@ -14,10 +14,6 @@
#include "third_party/pdfium/public/fpdf_formfill.h"
#include "third_party/pdfium/public/fpdf_text.h"
-namespace base {
-class Value;
-}
-
namespace chrome_pdf {
class PDFiumEngine;
@@ -43,8 +39,17 @@ class PDFiumPage {
// Returns FPDF_TEXTPAGE for the page, loading and parsing it if necessary.
FPDF_TEXTPAGE GetTextPage();
- // Returns a DictionaryValue version of the page.
- base::Value* GetAccessibleContentAsValue(int rotation);
+ // Given a start char index, find the longest continuous run of text that's
+ // in a single direction and with the same style and font size. Return the
+ // length of that sequence and its font size and bounding box.
+ void GetTextRunInfo(int start_char_index,
+ uint32_t* out_len,
+ double* out_font_size,
+ pp::FloatRect* out_bounds);
+ // Get a unicode character from the page.
+ uint32_t GetCharUnicode(int char_index);
+ // Get the bounds of a character in page pixels.
+ pp::FloatRect GetCharBounds(int char_index);
enum Area {
NONSELECTABLE_AREA,