/* * Copyright (C) 2010 Google, Inc. All Rights Reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "third_party/blink/renderer/core/html/parser/html_document_parser.h" #include #include #include "base/auto_reset.h" #include "base/numerics/safe_conversions.h" #include "third_party/blink/public/common/features.h" #include "third_party/blink/public/common/loader/loading_behavior_flag.h" #include "third_party/blink/public/mojom/appcache/appcache.mojom-blink.h" #include "third_party/blink/public/platform/platform.h" #include "third_party/blink/public/platform/task_type.h" #include "third_party/blink/renderer/core/css/media_values_cached.h" #include "third_party/blink/renderer/core/css/style_engine.h" #include "third_party/blink/renderer/core/dom/document_fragment.h" #include "third_party/blink/renderer/core/dom/element.h" #include "third_party/blink/renderer/core/frame/local_frame.h" #include "third_party/blink/renderer/core/html/html_document.h" #include "third_party/blink/renderer/core/html/parser/atomic_html_token.h" #include "third_party/blink/renderer/core/html/parser/background_html_parser.h" #include "third_party/blink/renderer/core/html/parser/html_parser_metrics.h" #include "third_party/blink/renderer/core/html/parser/html_parser_scheduler.h" #include "third_party/blink/renderer/core/html/parser/html_resource_preloader.h" #include "third_party/blink/renderer/core/html/parser/html_tree_builder.h" #include "third_party/blink/renderer/core/html/parser/pump_session.h" #include "third_party/blink/renderer/core/html_names.h" #include "third_party/blink/renderer/core/inspector/inspector_trace_events.h" #include "third_party/blink/renderer/core/loader/document_loader.h" #include "third_party/blink/renderer/core/loader/prefetched_signed_exchange_manager.h" #include "third_party/blink/renderer/core/loader/preload_helper.h" #include "third_party/blink/renderer/core/probe/core_probes.h" #include "third_party/blink/renderer/core/script/html_parser_script_runner.h" #include "third_party/blink/renderer/platform/bindings/runtime_call_stats.h" #include "third_party/blink/renderer/platform/bindings/v8_per_isolate_data.h" #include "third_party/blink/renderer/platform/heap/handle.h" #include "third_party/blink/renderer/platform/heap/heap.h" #include "third_party/blink/renderer/platform/instrumentation/tracing/trace_event.h" #include "third_party/blink/renderer/platform/loader/fetch/resource_fetcher.h" #include "third_party/blink/renderer/platform/runtime_enabled_features.h" #include "third_party/blink/renderer/platform/scheduler/public/cooperative_scheduling_manager.h" #include "third_party/blink/renderer/platform/scheduler/public/thread.h" #include "third_party/blink/renderer/platform/scheduler/public/thread_scheduler.h" #include "third_party/blink/renderer/platform/wtf/cross_thread_functional.h" #include "third_party/blink/renderer/platform/wtf/shared_buffer.h" namespace blink { static size_t g_discarded_token_count_for_testing = 0; void ResetDiscardedTokenCountForTesting() { g_discarded_token_count_for_testing = 0; } size_t GetDiscardedTokenCountForTesting() { return g_discarded_token_count_for_testing; } // This sets the (default) maximum number of tokens which the foreground HTML // parser should try to process in one go. Lower values generally mean faster // first paints, larger values delay first paint, but make sure it's closer to // the final page. This is the default value to use, if no Finch-provided // value exists. constexpr int kDefaultMaxTokenizationBudget = 250; class EndIfDelayedForbiddenScope; class ShouldCompleteScope; class AttemptToEndForbiddenScope; // This class encapsulates the internal state needed for synchronous foreground // HTML parsing (e.g. if HTMLDocumentParser::PumpTokenizer yields, this class // tracks what should be done after the pump completes.) class HTMLDocumentParserState : public GarbageCollected { friend EndIfDelayedForbiddenScope; friend ShouldCompleteScope; friend AttemptToEndForbiddenScope; public: // Keeps track of whether the parser needs to complete tokenization work, // optionally followed by EndIfDelayed. enum class DeferredParserState { // Indicates that a tokenizer pump has either completed or hasn't been // scheduled. kNotScheduled = 0, // Enforce ordering in this enum. // Indicates that a tokenizer pump is scheduled and hasn't completed yet. kScheduled = 1, // Indicates that a tokenizer pump, followed by EndIfDelayed, is scheduled. kScheduledWithEndIfDelayed = 2 }; enum class MetaCSPTokenState { // If we've seen a meta CSP token in an upcoming HTML chunk, then we need to // defer any preloads until we've added the CSP token to the document and // applied the Content Security Policy. kSeen = 0, // Indicates that there is no meta CSP token in the upcoming chunk. kNotSeen = 1, // Indicates that we've added the CSP token to the document and we can now // fetch preloads. kProcessed = 2, // Indicates that it's too late to apply a Content-Security policy (because // we've exited the header section.) kUnenforceable = 3, }; explicit HTMLDocumentParserState(ParserSynchronizationPolicy mode) : state_(DeferredParserState::kNotScheduled), meta_csp_state_(MetaCSPTokenState::kNotSeen), mode_(mode), end_if_delayed_forbidden_(0), should_complete_(0), should_attempt_to_end_on_eof_(0), needs_link_header_dispatch_(true), have_seen_first_byte_(false) {} void Trace(Visitor* v) const {} void SetState(DeferredParserState state) { DCHECK(!(state == DeferredParserState::kScheduled && ShouldComplete())); state_ = state; } DeferredParserState GetState() const { return state_; } bool IsScheduled() const { return state_ >= DeferredParserState::kScheduled; } const char* GetStateAsString() const { switch (state_) { case DeferredParserState::kNotScheduled: return "not_scheduled"; case DeferredParserState::kScheduled: return "scheduled"; case DeferredParserState::kScheduledWithEndIfDelayed: return "scheduled_with_end_if_delayed"; } } bool NeedsLinkHeaderPreloadsDispatch() const { return needs_link_header_dispatch_; } void DispatchedLinkHeaderPreloads() { needs_link_header_dispatch_ = false; } bool HaveSeenFirstByte() const { return have_seen_first_byte_; } void SetHaveSeenFirstByte() { have_seen_first_byte_ = true; } // Keeps track of whether Document::Finish has been called whilst parsing // asynchronously. ShouldAttemptToEndOnEOF() means that the parser should // close when there's no more input. bool ShouldAttemptToEndOnEOF() const { return should_attempt_to_end_on_eof_ > 0; } void SetAttemptToEndOnEOF() { // This method should only be called from ::Finish. should_attempt_to_end_on_eof_++; // Should only ever call ::Finish once. DCHECK(should_attempt_to_end_on_eof_ < 2); } bool ShouldEndIfDelayed() const { return end_if_delayed_forbidden_ == 0; } bool ShouldComplete() const { return should_complete_ || GetMode() != kAllowDeferredParsing; } bool IsSynchronous() const { return mode_ == ParserSynchronizationPolicy::kForceSynchronousParsing; } ParserSynchronizationPolicy GetMode() const { return mode_; } void SetSeenCSPMetaTag(const bool seen) { if (meta_csp_state_ == MetaCSPTokenState::kUnenforceable) return; if (seen) meta_csp_state_ = MetaCSPTokenState::kSeen; else meta_csp_state_ = MetaCSPTokenState::kNotSeen; } void SetExitedHeader() { meta_csp_state_ = MetaCSPTokenState::kUnenforceable; } bool HaveExitedHeader() const { return meta_csp_state_ == MetaCSPTokenState::kUnenforceable; } private: void EnterEndIfDelayedForbidden() { end_if_delayed_forbidden_++; } void ExitEndIfDelayedForbidden() { end_if_delayed_forbidden_--; DCHECK_GE(end_if_delayed_forbidden_, 0); } void EnterAttemptToEndForbidden() { DCHECK(should_attempt_to_end_on_eof_ > 0); should_attempt_to_end_on_eof_ = 0; } void EnterShouldComplete() { should_complete_++; } void ExitShouldComplete() { should_complete_--; DCHECK_GE(should_complete_, 0); } DeferredParserState state_; MetaCSPTokenState meta_csp_state_; ParserSynchronizationPolicy mode_; int end_if_delayed_forbidden_; int should_complete_; // Set to non-zero if Document::Finish has been called and we're operating // asynchronously. int should_attempt_to_end_on_eof_; bool needs_link_header_dispatch_; bool have_seen_first_byte_; }; class EndIfDelayedForbiddenScope { STACK_ALLOCATED(); public: explicit EndIfDelayedForbiddenScope(HTMLDocumentParserState* state) : state_(state) { state_->EnterEndIfDelayedForbidden(); } ~EndIfDelayedForbiddenScope() { state_->ExitEndIfDelayedForbidden(); } private: HTMLDocumentParserState* state_; }; class AttemptToEndForbiddenScope { STACK_ALLOCATED(); public: explicit AttemptToEndForbiddenScope(HTMLDocumentParserState* state) : state_(state) { state_->EnterAttemptToEndForbidden(); } private: HTMLDocumentParserState* state_; }; class ShouldCompleteScope { STACK_ALLOCATED(); public: explicit ShouldCompleteScope(HTMLDocumentParserState* state) : state_(state) { state_->EnterShouldComplete(); } ~ShouldCompleteScope() { state_->ExitShouldComplete(); } private: HTMLDocumentParserState* state_; }; // This is a direct transcription of step 4 from: // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#fragment-case static HTMLTokenizer::State TokenizerStateForContextElement( Element* context_element, bool report_errors, const HTMLParserOptions& options) { if (!context_element) return HTMLTokenizer::kDataState; const QualifiedName& context_tag = context_element->TagQName(); if (context_tag.Matches(html_names::kTitleTag) || context_tag.Matches(html_names::kTextareaTag)) return HTMLTokenizer::kRCDATAState; if (context_tag.Matches(html_names::kStyleTag) || context_tag.Matches(html_names::kXmpTag) || context_tag.Matches(html_names::kIFrameTag) || context_tag.Matches(html_names::kNoembedTag) || (context_tag.Matches(html_names::kNoscriptTag) && options.scripting_flag) || context_tag.Matches(html_names::kNoframesTag)) return report_errors ? HTMLTokenizer::kRAWTEXTState : HTMLTokenizer::kPLAINTEXTState; if (context_tag.Matches(html_names::kScriptTag)) return report_errors ? HTMLTokenizer::kScriptDataState : HTMLTokenizer::kPLAINTEXTState; if (context_tag.Matches(html_names::kPlaintextTag)) return HTMLTokenizer::kPLAINTEXTState; return HTMLTokenizer::kDataState; } class ScopedYieldTimer { public: // This object is created at the start of a block of parsing, and will // report the time since the last block yielded if known. ScopedYieldTimer(std::unique_ptr* timer, HTMLParserMetrics* metrics_reporter) : timer_(timer), reporting_metrics_(metrics_reporter) { if (!reporting_metrics_ || !(*timer_)) return; metrics_reporter->AddYieldInterval((*timer_)->Elapsed()); timer_->reset(); } // The destructor creates a new timer, which will keep track of time until // the next block starts. ~ScopedYieldTimer() { if (reporting_metrics_) *timer_ = std::make_unique(); } private: std::unique_ptr* timer_; bool reporting_metrics_; }; HTMLDocumentParser::HTMLDocumentParser(HTMLDocument& document, ParserSynchronizationPolicy sync_policy, ParserPrefetchPolicy prefetch_policy) : HTMLDocumentParser(document, kAllowScriptingContent, sync_policy, prefetch_policy) { script_runner_ = HTMLParserScriptRunner::Create(ReentryPermit(), &document, this); // Allow declarative shadow DOM for the document parser, if not explicitly // disabled. bool include_shadow_roots = document.GetDeclarativeShadowRootAllowState() != Document::DeclarativeShadowRootAllowState::kDeny; tree_builder_ = MakeGarbageCollected( this, document, kAllowScriptingContent, options_, include_shadow_roots); } HTMLDocumentParser::HTMLDocumentParser( DocumentFragment* fragment, Element* context_element, ParserContentPolicy parser_content_policy, ParserPrefetchPolicy parser_prefetch_policy) : HTMLDocumentParser(fragment->GetDocument(), parser_content_policy, kForceSynchronousParsing, parser_prefetch_policy) { // Allow declarative shadow DOM for the fragment parser only if explicitly // enabled. bool include_shadow_roots = fragment->GetDocument().GetDeclarativeShadowRootAllowState() == Document::DeclarativeShadowRootAllowState::kAllow; // No script_runner_ in fragment parser. tree_builder_ = MakeGarbageCollected( this, fragment, context_element, parser_content_policy, options_, include_shadow_roots); // For now document fragment parsing never reports errors. bool report_errors = false; tokenizer_->SetState(TokenizerStateForContextElement( context_element, report_errors, options_)); } namespace { int GetMaxTokenizationBudget() { static int max = base::GetFieldTrialParamByFeatureAsInt( features::kForceSynchronousHTMLParsing, "MaxTokenizationBudget", kDefaultMaxTokenizationBudget); return max; } } // namespace HTMLDocumentParser::HTMLDocumentParser(Document& document, ParserContentPolicy content_policy, ParserSynchronizationPolicy sync_policy, ParserPrefetchPolicy prefetch_policy) : ScriptableDocumentParser(document, content_policy), options_(&document), reentry_permit_(HTMLParserReentryPermit::Create()), token_(sync_policy != kAllowAsynchronousParsing ? std::make_unique() : nullptr), tokenizer_(sync_policy != kAllowAsynchronousParsing ? std::make_unique(options_) : nullptr), loading_task_runner_(sync_policy == kForceSynchronousParsing ? nullptr : document.GetTaskRunner(TaskType::kNetworking)), parser_scheduler_(sync_policy == kAllowAsynchronousParsing ? MakeGarbageCollected( this, loading_task_runner_.get()) : nullptr), task_runner_state_( MakeGarbageCollected(sync_policy)), pending_csp_meta_token_(nullptr), can_parse_asynchronously_(sync_policy == kAllowAsynchronousParsing), end_was_delayed_(false), have_background_parser_(false), pump_session_nesting_level_(0), pump_speculations_session_nesting_level_(0), is_parsing_at_line_number_(false), tried_loading_link_headers_(false), added_pending_parser_blocking_stylesheet_(false), is_waiting_for_stylesheets_(false), scheduler_(sync_policy == kAllowDeferredParsing ? Thread::Current()->Scheduler() : nullptr) { DCHECK(CanParseAsynchronously() || (token_ && tokenizer_)); // Asynchronous parsing is not allowed in prefetch mode. DCHECK(!document.IsPrefetchOnly() || !CanParseAsynchronously()); // It is permissible to request the background HTML parser whilst also using // --enable-blink-features=ForceSynchronousHTMLParsing, but it's usually // unintentional. To help flush out these cases, trigger a DCHECK. DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled() || !CanParseAsynchronously()); // Report metrics for async document parsing only. The document // must be main frame to meet UKM requirements, and must have a high // resolution clock for high quality data. if (sync_policy == kAllowAsynchronousParsing && document.GetFrame() && document.GetFrame()->IsMainFrame() && base::TimeTicks::IsHighResolution()) { metrics_reporter_ = std::make_unique( document.UkmSourceID(), document.UkmRecorder()); } max_tokenization_budget_ = GetMaxTokenizationBudget(); // Don't create preloader for parsing clipboard content. if (content_policy == kDisallowScriptingAndPluginContent) return; // Create preloader only when the document is: // - attached to a frame (likely the prefetched resources will be loaded // soon), // - a HTML import document (blocks rendering and also resources will be // loaded soon), or // - is for no-state prefetch (made specifically for running preloader). if (!document.GetFrame() && !document.IsHTMLImport() && !document.IsPrefetchOnly()) return; if (prefetch_policy == kAllowPrefetching) preloader_ = MakeGarbageCollected(document); } HTMLDocumentParser::~HTMLDocumentParser() = default; void HTMLDocumentParser::Dispose() { // In Oilpan, HTMLDocumentParser can die together with Document, and detach() // is not called in this case. if (have_background_parser_) StopBackgroundParser(); } void HTMLDocumentParser::Trace(Visitor* visitor) const { visitor->Trace(tree_builder_); visitor->Trace(parser_scheduler_); visitor->Trace(script_runner_); visitor->Trace(preloader_); visitor->Trace(task_runner_state_); ScriptableDocumentParser::Trace(visitor); HTMLParserScriptRunnerHost::Trace(visitor); } bool HTMLDocumentParser::HasPendingWorkScheduledForTesting() const { return task_runner_state_->IsScheduled(); } void HTMLDocumentParser::Detach() { if (have_background_parser_) StopBackgroundParser(); // Deschedule any pending tokenizer pumps. task_runner_state_->SetState( HTMLDocumentParserState::DeferredParserState::kNotScheduled); DocumentParser::Detach(); if (script_runner_) script_runner_->Detach(); tree_builder_->Detach(); // FIXME: It seems wrong that we would have a preload scanner here. Yet during // fast/dom/HTMLScriptElement/script-load-events.html we do. preload_scanner_.reset(); insertion_preload_scanner_.reset(); if (parser_scheduler_) { parser_scheduler_->Detach(); parser_scheduler_.Clear(); } // Oilpan: It is important to clear token_ to deallocate backing memory of // HTMLToken::data_ and let the allocator reuse the memory for // HTMLToken::data_ of a next HTMLDocumentParser. We need to clear // tokenizer_ first because tokenizer_ has a raw pointer to token_. tokenizer_.reset(); token_.reset(); } void HTMLDocumentParser::StopParsing() { DocumentParser::StopParsing(); if (parser_scheduler_) { parser_scheduler_->Detach(); parser_scheduler_.Clear(); } task_runner_state_->SetState( HTMLDocumentParserState::DeferredParserState::kNotScheduled); if (have_background_parser_) StopBackgroundParser(); } // This kicks off "Once the user agent stops parsing" as described by: // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#the-end void HTMLDocumentParser::PrepareToStopParsing() { TRACE_EVENT1("blink", "HTMLDocumentParser::PrepareToStopParsing", "parser", (void*)this); // FIXME: It may not be correct to disable this for the background parser. // That means hasInsertionPoint() may not be correct in some cases. DCHECK(!HasInsertionPoint() || have_background_parser_); // NOTE: This pump should only ever emit buffered character tokens. if (tokenizer_ && !GetDocument()->IsPrefetchOnly()) { DCHECK(!have_background_parser_); ShouldCompleteScope should_complete(task_runner_state_); EndIfDelayedForbiddenScope should_not_end_if_delayed(task_runner_state_); PumpTokenizerIfPossible(); } if (IsStopped()) return; DocumentParser::PrepareToStopParsing(); // We will not have a scriptRunner when parsing a DocumentFragment. if (script_runner_) GetDocument()->SetReadyState(Document::kInteractive); // Setting the ready state above can fire mutation event and detach us from // underneath. In that case, just bail out. if (IsDetached()) return; if (script_runner_) script_runner_->RecordMetricsAtParseEnd(); AttemptToRunDeferredScriptsAndEnd(); } bool HTMLDocumentParser::IsParsingFragment() const { return tree_builder_->IsParsingFragment(); } void HTMLDocumentParser::DeferredPumpTokenizerIfPossible() { // This method is called asynchronously, continues building the HTML document. // This function should only be called when // --enable-blink-features=ForceSynchronousHTMLParsing is available. DCHECK(RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled()); // If we're scheduled for a tokenizer pump, then document should be attached // and the parser should not be stopped, but sometimes a script completes // loading (so we schedule a pump) but the Document is stopped in the meantime // (e.g. fast/parser/iframe-onload-document-close-with-external-script.html). DCHECK(task_runner_state_->GetState() == HTMLDocumentParserState::DeferredParserState::kNotScheduled || !IsDetached()); TRACE_EVENT2("blink", "HTMLDocumentParser::DeferredPumpTokenizerIfPossible", "parser", (void*)this, "state", task_runner_state_->GetStateAsString()); bool should_call_delay_end = task_runner_state_->GetState() == HTMLDocumentParserState::DeferredParserState::kScheduledWithEndIfDelayed; if (task_runner_state_->IsScheduled()) { task_runner_state_->SetState( HTMLDocumentParserState::DeferredParserState::kNotScheduled); if (should_call_delay_end) { EndIfDelayedForbiddenScope should_not_end_if_delayed(task_runner_state_); PumpTokenizerIfPossible(); EndIfDelayed(); } else { PumpTokenizerIfPossible(); } } } void HTMLDocumentParser::PumpTokenizerIfPossible() { // This method is called synchronously, builds the HTML document up to // the current budget, and optionally completes. TRACE_EVENT1("blink", "HTMLDocumentParser::PumpTokenizerIfPossible", "parser", (void*)this); bool yielded = false; CheckIfBlockingStylesheetAdded(); if (!IsStopped() && (!IsPaused() || task_runner_state_->ShouldEndIfDelayed())) { yielded = PumpTokenizer(); } if (yielded) { DCHECK(!task_runner_state_->ShouldComplete()); SchedulePumpTokenizer(); } else if (task_runner_state_->ShouldAttemptToEndOnEOF()) { // Fall into this branch if ::Finish has been previously called and we've // just finished asynchronously parsing everything. AttemptToEnd(); } else if (task_runner_state_->ShouldEndIfDelayed()) { // If we did not exceed the budget or parsed everything there was to // parse, check if we should complete the document. if (task_runner_state_->ShouldComplete() || IsStopped() || IsStopping()) { EndIfDelayed(); } else { ScheduleEndIfDelayed(); } } } bool HTMLDocumentParser::IsScheduledForUnpause() const { return parser_scheduler_ && parser_scheduler_->IsScheduledForUnpause(); } // Used by HTMLParserScheduler void HTMLDocumentParser::ResumeParsingAfterYield() { DCHECK(CanParseAsynchronously()); DCHECK(have_background_parser_); DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled()); ScopedYieldTimer timer(&yield_timer_, metrics_reporter_.get()); CheckIfBlockingStylesheetAdded(); if (IsStopped() || IsPaused()) return; PumpPendingSpeculations(); } void HTMLDocumentParser::RunScriptsForPausedTreeBuilder() { TRACE_EVENT1("blink", "HTMLDocumentParser::RunScriptsForPausedTreeBuilder", "parser", (void*)this); DCHECK(ScriptingContentIsAllowed(GetParserContentPolicy())); TextPosition script_start_position = TextPosition::BelowRangePosition(); Element* script_element = tree_builder_->TakeScriptToProcess(script_start_position); // We will not have a scriptRunner when parsing a DocumentFragment. if (script_runner_) script_runner_->ProcessScriptElement(script_element, script_start_position); CheckIfBlockingStylesheetAdded(); } HTMLDocumentParser::NextTokenStatus HTMLDocumentParser::CanTakeNextToken() { if (IsStopped()) return NoTokens; // If we're paused waiting for a script, we try to execute scripts before // continuing. auto ret = HaveTokens; if (tree_builder_->HasParserBlockingScript()) { RunScriptsForPausedTreeBuilder(); ret = HaveTokensAfterScript; } if (IsStopped() || IsPaused()) return NoTokens; return ret; } void HTMLDocumentParser::EnqueueTokenizedChunk( std::unique_ptr chunk) { DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled()); TRACE_EVENT0("blink", "HTMLDocumentParser::EnqueueTokenizedChunk"); DCHECK(chunk); DCHECK(GetDocument()); if (!IsParsing()) return; // ApplicationCache needs to be initialized before issuing preloads. We // suspend preload until HTMLHTMLElement is inserted and ApplicationCache is // initialized. Note: link rel preloads don't follow this policy per the spec. // These directives should initiate a fetch as fast as possible. if (!tried_loading_link_headers_ && GetDocument()->Loader()) { // Note that on commit, the loader dispatched preloads for all the non-media // links. GetDocument()->Loader()->DispatchLinkHeaderPreloads( base::OptionalOrNullptr(chunk->viewport), PreloadHelper::kOnlyLoadMedia); tried_loading_link_headers_ = true; if (GetDocument()->Loader()->GetPrefetchedSignedExchangeManager()) { // Link header preloads for prefetched signed exchanges won't be started // until StartPrefetchedLinkHeaderPreloads() is called. See the header // comment of PrefetchedSignedExchangeManager. GetDocument() ->Loader() ->GetPrefetchedSignedExchangeManager() ->StartPrefetchedLinkHeaderPreloads(); } } // Defer preloads if any of the chunks contains a csp tag. if (chunk->pending_csp_meta_token_index != TokenizedChunk::kNoPendingToken) { pending_csp_meta_token_ = &chunk->tokens.at(chunk->pending_csp_meta_token_index); } if (preloader_) { bool appcache_fetched = false; if (GetDocument()->Loader()) { appcache_fetched = (GetDocument()->Loader()->GetResponse().AppCacheID() != mojom::blink::kAppCacheNoCacheId); } bool appcache_initialized = GetDocument()->documentElement(); // Delay sending some requests if meta tag based CSP is present or // if AppCache was used to fetch the HTML but was not yet initialized for // this document. if (pending_csp_meta_token_ || ((!base::FeatureList::IsEnabled( blink::features::kVerifyHTMLFetchedFromAppCacheBeforeDelay) || appcache_fetched) && !appcache_initialized)) { PreloadRequestStream link_rel_preloads; for (auto& request : chunk->preloads) { // Link rel preloads don't need to wait for AppCache but they // should probably wait for CSP. if (!pending_csp_meta_token_ && request->IsLinkRelPreload()) link_rel_preloads.push_back(std::move(request)); else queued_preloads_.push_back(std::move(request)); } preloader_->TakeAndPreload(link_rel_preloads); } else { // We can safely assume that there are no queued preloads request after // the document element is available, as we empty the queue immediately // after the document element is created in documentElementAvailable(). DCHECK(queued_preloads_.IsEmpty()); preloader_->TakeAndPreload(chunk->preloads); } } speculations_.push_back(std::move(chunk)); if (!IsPaused() && !IsScheduledForUnpause()) parser_scheduler_->ScheduleForUnpause(); } void HTMLDocumentParser::DidReceiveEncodingDataFromBackgroundParser( const DocumentEncodingData& data) { DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled()); GetDocument()->SetEncodingData(data); } void HTMLDocumentParser::ValidateSpeculations( std::unique_ptr chunk) { DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled()); DCHECK(chunk); // TODO(kouhei): We should simplify codepath here by disallowing // ValidateSpeculations // while IsPaused, and last_chunk_before_pause_ can simply be // pushed to speculations_. if (IsPaused()) { // We're waiting on a network script or stylesheet, just save the chunk, // we'll get a second ValidateSpeculations call after the script or // stylesheet completes. This call should have been made immediately after // RunScriptsForPausedTreeBuilder in the script case which may have started // a network load and left us waiting. DCHECK(!last_chunk_before_pause_); last_chunk_before_pause_ = std::move(chunk); return; } DCHECK(!last_chunk_before_pause_); std::unique_ptr tokenizer = std::move(tokenizer_); std::unique_ptr token = std::move(token_); if (!tokenizer) { // There must not have been any changes to the HTMLTokenizer state on the // main thread, which means the speculation buffer is correct. return; } // Currently we're only smart enough to reuse the speculation buffer if the // tokenizer both starts and ends in the DataState. That state is simplest // because the HTMLToken is always in the Uninitialized state. We should // consider whether we can reuse the speculation buffer in other states, but // we'd likely need to do something more sophisticated with the HTMLToken. if (chunk->tokenizer_state == HTMLTokenizer::kDataState && tokenizer->GetState() == HTMLTokenizer::kDataState && input_.Current().IsEmpty() && chunk->tree_builder_state == HTMLTreeBuilderSimulator::StateFor(tree_builder_.Get())) { DCHECK(token->IsUninitialized()); return; } DiscardSpeculationsAndResumeFrom(std::move(chunk), std::move(token), std::move(tokenizer)); } void HTMLDocumentParser::DiscardSpeculationsAndResumeFrom( std::unique_ptr last_chunk_before_script, std::unique_ptr token, std::unique_ptr tokenizer) { DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled()); // Clear back ref. background_parser_->ClearParser(); size_t discarded_token_count = 0; for (const auto& speculation : speculations_) { discarded_token_count += speculation->tokens.size(); } g_discarded_token_count_for_testing += discarded_token_count; speculations_.clear(); pending_csp_meta_token_ = nullptr; queued_preloads_.clear(); std::unique_ptr checkpoint = std::make_unique(); checkpoint->parser = this; checkpoint->token = std::move(token); checkpoint->tokenizer = std::move(tokenizer); checkpoint->tree_builder_state = HTMLTreeBuilderSimulator::StateFor(tree_builder_.Get()); checkpoint->input_checkpoint = last_chunk_before_script->input_checkpoint; checkpoint->preload_scanner_checkpoint = last_chunk_before_script->preload_scanner_checkpoint; checkpoint->unparsed_input = input_.Current().ToString().IsolatedCopy(); // FIXME: This should be passed in instead of cleared. input_.Current().Clear(); DCHECK(checkpoint->unparsed_input.IsSafeToSendToAnotherThread()); loading_task_runner_->PostTask( FROM_HERE, WTF::Bind(&BackgroundHTMLParser::ResumeFrom, background_parser_, std::move(checkpoint))); } size_t HTMLDocumentParser::ProcessTokenizedChunkFromBackgroundParser( std::unique_ptr pop_chunk, bool* reached_end_of_file) { TRACE_EVENT_WITH_FLOW0( "blink,loading", "HTMLDocumentParser::processTokenizedChunkFromBackgroundParser", pop_chunk.get(), TRACE_EVENT_FLAG_FLOW_IN); base::AutoReset has_line_number(&is_parsing_at_line_number_, true); SECURITY_DCHECK(pump_speculations_session_nesting_level_ == 1); SECURITY_DCHECK(!InPumpSession()); DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled()); DCHECK(!IsParsingFragment()); DCHECK(!IsPaused()); DCHECK(!IsStopped()); DCHECK(CanParseAsynchronously()); DCHECK(!tokenizer_); DCHECK(!token_); DCHECK(!last_chunk_before_pause_); std::unique_ptr chunk(std::move(pop_chunk)); const CompactHTMLTokenStream& tokens = chunk->tokens; size_t element_token_count = 0; loading_task_runner_->PostTask( FROM_HERE, WTF::Bind(&BackgroundHTMLParser::StartedChunkWithCheckpoint, background_parser_, chunk->input_checkpoint)); for (const auto& token : tokens) { DCHECK(!IsWaitingForScripts()); if (!chunk->starting_script && (token.GetType() == HTMLToken::kStartTag || token.GetType() == HTMLToken::kEndTag)) element_token_count++; text_position_ = token.GetTextPosition(); ConstructTreeFromCompactHTMLToken(token); if (IsStopped()) break; // Preloads were queued if there was a csp token in a tokenized // chunk. if (pending_csp_meta_token_ && &token == pending_csp_meta_token_) { pending_csp_meta_token_ = nullptr; FetchQueuedPreloads(); } if (IsPaused()) { // The script or stylesheet should be the last token of this bunch. DCHECK_EQ(&token, &tokens.back()); if (IsWaitingForScripts()) RunScriptsForPausedTreeBuilder(); ValidateSpeculations(std::move(chunk)); break; } if (token.GetType() == HTMLToken::kEndOfFile) { // The EOF is assumed to be the last token of this bunch. DCHECK_EQ(&token, &tokens.back()); // There should never be any chunks after the EOF. DCHECK(speculations_.IsEmpty()); PrepareToStopParsing(); *reached_end_of_file = true; break; } DCHECK(!tokenizer_); DCHECK(!token_); } // Make sure all required pending text nodes are emitted before returning. // This leaves "script", "style" and "svg" nodes text nodes intact. if (!IsStopped()) tree_builder_->Flush(kFlushIfAtTextLimit); is_parsing_at_line_number_ = false; return element_token_count; } void HTMLDocumentParser::PumpPendingSpeculations() { // If this assert fails, you need to call ValidateSpeculations to make sure // tokenizer_ and token_ don't have state that invalidates speculations_. DCHECK(!tokenizer_); DCHECK(!token_); DCHECK(!last_chunk_before_pause_); DCHECK(!IsPaused()); DCHECK(!IsStopped()); DCHECK(!IsScheduledForUnpause()); DCHECK(!InPumpSession()); DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled()); // FIXME: Here should never be reached when there is a blocking script, // but it happens in unknown scenarios. See https://crbug.com/440901 if (IsWaitingForScripts()) { parser_scheduler_->ScheduleForUnpause(); return; } // Do not allow pumping speculations in nested event loops. if (pump_speculations_session_nesting_level_) { parser_scheduler_->ScheduleForUnpause(); return; } probe::ParseHTML probe(GetDocument(), this); SpeculationsPumpSession session(pump_speculations_session_nesting_level_); bool reached_end_of_file = false; while (!speculations_.IsEmpty()) { DCHECK(!IsScheduledForUnpause()); size_t element_token_count = ProcessTokenizedChunkFromBackgroundParser( speculations_.TakeFirst(), &reached_end_of_file); session.AddedElementTokens(element_token_count); // Always check IsParsing first as document_ may be null. Surprisingly, // IsScheduledForUnpause() may be set here as a result of // ProcessTokenizedChunkFromBackgroundParser running arbitrary javascript // which invokes nested event loops. (e.g. inspector breakpoints) CheckIfBlockingStylesheetAdded(); if (!IsParsing() || IsPaused() || IsScheduledForUnpause()) break; if (speculations_.IsEmpty() || parser_scheduler_->YieldIfNeeded( session, speculations_.front()->starting_script)) break; } if (metrics_reporter_) { metrics_reporter_->AddChunk(session.ElapsedTime(), session.ProcessedElementTokens()); if (reached_end_of_file) metrics_reporter_->ReportMetricsAtParseEnd(); } } void HTMLDocumentParser::ForcePlaintextForTextDocument() { if (CanParseAsynchronously()) { // This method is called before any data is appended, so we have to start // the background parser ourselves. if (!have_background_parser_) StartBackgroundParser(); // This task should be synchronous, because otherwise synchronous // tokenizing can happen before plaintext is forced. background_parser_->ForcePlaintextForTextDocument(); } else tokenizer_->SetState(HTMLTokenizer::kPLAINTEXTState); } bool HTMLDocumentParser::PumpTokenizer() { DCHECK(!GetDocument()->IsPrefetchOnly()); DCHECK(!IsStopped()); DCHECK(tokenizer_); DCHECK(token_); PumpSession session(pump_session_nesting_level_); // If we're in kForceSynchronousParsing, always run until all available input // is consumed. bool should_run_until_completion = task_runner_state_->ShouldComplete() || task_runner_state_->IsSynchronous() || pump_session_nesting_level_ > 1; TRACE_EVENT2("blink", "HTMLDocumentParser::PumpTokenizer", "should_complete", should_run_until_completion, "parser", (void*)this); // We tell the InspectorInstrumentation about every pump, even if we end up // pumping nothing. It can filter out empty pumps itself. // FIXME: input_.Current().length() is only accurate if we end up parsing the // whole buffer in this pump. We should pass how much we parsed as part of // DidWriteHTML instead of WillWriteHTML. probe::ParseHTML probe(GetDocument(), this); bool should_yield = false; int budget = max_tokenization_budget_; while (!should_yield) { const auto next_token_status = CanTakeNextToken(); if (next_token_status == NoTokens) { // No tokens left to process in this pump, so break break; } else if (next_token_status == HaveTokensAfterScript && task_runner_state_->HaveExitedHeader()) { // Just executed a parser-blocking script in the body (which is usually // very expensive), so expire the budget, yield, and permit paint if // needed. budget = 0; if (!should_run_until_completion) { should_yield = true; break; } } { RUNTIME_CALL_TIMER_SCOPE( V8PerIsolateData::MainThreadIsolate(), RuntimeCallStats::CounterId::kHTMLTokenizerNextToken); if (!tokenizer_->NextToken(input_.Current(), Token())) break; budget--; } ConstructTreeFromHTMLToken(); if (!should_run_until_completion && !IsPaused()) { DCHECK_EQ(task_runner_state_->GetMode(), kAllowDeferredParsing); should_yield = budget <= 0; should_yield |= scheduler_->ShouldYieldForHighPriorityWork(); should_yield &= task_runner_state_->HaveExitedHeader(); } else { should_yield = false; } DCHECK(IsStopped() || Token().IsUninitialized()); } if (IsStopped()) return false; // There should only be PendingText left since the tree-builder always flushes // the task queue before returning. In case that ever changes, crash. tree_builder_->Flush(kFlushAlways); CHECK(!IsStopped()); if (IsPaused()) { DCHECK_EQ(tokenizer_->GetState(), HTMLTokenizer::kDataState); if (preloader_) { if (!preload_scanner_) { preload_scanner_ = CreatePreloadScanner( TokenPreloadScanner::ScannerType::kMainDocument); preload_scanner_->AppendToEnd(input_.Current()); } ScanAndPreload(preload_scanner_.get()); } } // should_run_until_completion implies that we should not yield CHECK(!should_run_until_completion || !should_yield); return should_yield; } void HTMLDocumentParser::SchedulePumpTokenizer() { TRACE_EVENT0("blink", "HTMLDocumentParser::SchedulePumpTokenizer"); DCHECK(RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled()); DCHECK(!IsStopped()); DCHECK(!InPumpSession()); DCHECK(!task_runner_state_->ShouldComplete()); if (task_runner_state_->IsScheduled()) { // If the parser is already scheduled, there's no need to do anything. return; } loading_task_runner_->PostTask( FROM_HERE, WTF::Bind(&HTMLDocumentParser::DeferredPumpTokenizerIfPossible, WrapPersistent(this))); task_runner_state_->SetState( HTMLDocumentParserState::DeferredParserState::kScheduled); } void HTMLDocumentParser::ScheduleEndIfDelayed() { TRACE_EVENT0("blink", "HTMLDocumentParser::ScheduleEndIfDelayed"); DCHECK(RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled()); DCHECK(!IsStopped()); DCHECK(!InPumpSession()); DCHECK(!task_runner_state_->ShouldComplete()); // Schedule a pump callback if needed. if (!task_runner_state_->IsScheduled()) { loading_task_runner_->PostTask( FROM_HERE, WTF::Bind(&HTMLDocumentParser::DeferredPumpTokenizerIfPossible, WrapPersistent(this))); } // If a pump is already scheduled, it's OK to just upgrade it to one // which calls EndIfDelayed afterwards. task_runner_state_->SetState( HTMLDocumentParserState::DeferredParserState::kScheduledWithEndIfDelayed); } void HTMLDocumentParser::ConstructTreeFromHTMLToken() { DCHECK(!GetDocument()->IsPrefetchOnly()); AtomicHTMLToken atomic_token(Token()); // Check whether we've exited the header. if (!task_runner_state_->HaveExitedHeader()) { if (GetDocument()->body()) { task_runner_state_->SetExitedHeader(); } } // We clear the token_ in case ConstructTreeFromAtomicToken // synchronously re-enters the parser. We don't clear the token immedately // for kCharacter tokens because the AtomicHTMLToken avoids copying the // characters by keeping a pointer to the underlying buffer in the // HTMLToken. Fortunately, kCharacter tokens can't cause us to re-enter // the parser. // // FIXME: Stop clearing the token_ once we start running the parser off // the main thread or once we stop allowing synchronous JavaScript // execution from ParseAttribute. if (Token().GetType() != HTMLToken::kCharacter) Token().Clear(); tree_builder_->ConstructTree(&atomic_token); CheckIfBlockingStylesheetAdded(); // FIXME: ConstructTree may synchronously cause Document to be detached. if (!token_) return; if (!Token().IsUninitialized()) { DCHECK_EQ(Token().GetType(), HTMLToken::kCharacter); Token().Clear(); } } void HTMLDocumentParser::ConstructTreeFromCompactHTMLToken( const CompactHTMLToken& compact_token) { DCHECK(!GetDocument()->IsPrefetchOnly()); DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled()); AtomicHTMLToken token(compact_token); tree_builder_->ConstructTree(&token); CheckIfBlockingStylesheetAdded(); } bool HTMLDocumentParser::HasInsertionPoint() { // FIXME: The wasCreatedByScript() branch here might not be fully correct. Our // model of the EOF character differs slightly from the one in the spec // because our treatment is uniform between network-sourced and script-sourced // input streams whereas the spec treats them differently. return input_.HasInsertionPoint() || (WasCreatedByScript() && !input_.HaveSeenEndOfFile()); } void HTMLDocumentParser::insert(const String& source) { if (IsStopped()) return; TRACE_EVENT2("blink", "HTMLDocumentParser::insert", "source_length", source.length(), "parser", (void*)this); if (!tokenizer_) { DCHECK(!InPumpSession()); DCHECK(have_background_parser_ || WasCreatedByScript()); token_ = std::make_unique(); tokenizer_ = std::make_unique(options_); } SegmentedString excluded_line_number_source(source); excluded_line_number_source.SetExcludeLineNumbers(); input_.InsertAtCurrentInsertionPoint(excluded_line_number_source); // Pump the the tokenizer to build the document from the given insert point. // Should process everything available and not defer anything. ShouldCompleteScope should_complete(task_runner_state_); EndIfDelayedForbiddenScope should_not_end_if_delayed(task_runner_state_); // Call EndIfDelayed manually at the end to maintain preload behaviour. PumpTokenizerIfPossible(); if (IsPaused()) { // Check the document.write() output with a separate preload scanner as // the main scanner can't deal with insertions. if (!insertion_preload_scanner_) { insertion_preload_scanner_ = CreatePreloadScanner(TokenPreloadScanner::ScannerType::kInsertion); } insertion_preload_scanner_->AppendToEnd(source); if (preloader_) { ScanAndPreload(insertion_preload_scanner_.get()); } } EndIfDelayed(); } void HTMLDocumentParser::StartBackgroundParser() { TRACE_EVENT0("blink,loading", "HTMLDocumentParser::StartBackgroundParser"); DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled()); DCHECK(!IsStopped()); DCHECK(CanParseAsynchronously()); DCHECK(!have_background_parser_); DCHECK(GetDocument()); have_background_parser_ = true; // Make sure that the viewport is up-to-date, so that the correct viewport // dimensions will be fed to the background parser and preload scanner. if (GetDocument()->Loader()) GetDocument()->GetStyleEngine().UpdateViewport(); std::unique_ptr config = std::make_unique(); config->options = options_; config->parser = this; config->decoder = TakeDecoder(); // The background parser is created on the main thread, but may otherwise // only be used from the parser thread. background_parser_ = BackgroundHTMLParser::Create(std::move(config), loading_task_runner_); // TODO(csharrison): This is a hack to initialize MediaValuesCached on the // correct thread. We should get rid of it. // TODO(domfarolino): Remove this once Priority Hints is no longer in Origin // Trial. This currently exists because the TokenPreloadScanner needs to know // the status of the Priority Hints Origin Trial, and has no way of figuring // this out on its own. See https://crbug.com/821464. bool priority_hints_origin_trial_enabled = RuntimeEnabledFeatures::PriorityHintsEnabled( GetDocument()->GetExecutionContext()); background_parser_->Init( GetDocument()->Url(), std::make_unique(GetDocument()), MediaValuesCached::MediaValuesCachedData(*GetDocument()), priority_hints_origin_trial_enabled); } void HTMLDocumentParser::StopBackgroundParser() { DCHECK(CanParseAsynchronously()); DCHECK(have_background_parser_); DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled()); have_background_parser_ = false; // Make this sync, as lsan triggers on some unittests if the task runner is // used. background_parser_->Stop(); } void HTMLDocumentParser::Append(const String& input_source) { TRACE_EVENT2("blink", "HTMLDocumentParser::append", "size", input_source.length(), "parser", (void*)this); if (IsStopped()) return; // We should never reach this point if we're using a parser thread, as // appendBytes() will directly ship the data to the thread. DCHECK(!CanParseAsynchronously()); const SegmentedString source(input_source); if (!preload_scanner_ && GetDocument()->Url().IsValid() && (!task_runner_state_->IsSynchronous() || GetDocument()->IsPrefetchOnly() || IsPaused())) { // If we're operating with synchronous, budgeted foreground HTML parsing // or using the background parser, need to create a preload scanner to // make sure that parser-blocking Javascript requests are dispatched in // plenty of time, which prevents unnecessary delays. // When parsing without a budget (e.g. for HTML fragment parsing), it's // additional overhead to scan the string unless the parser's already // paused whilst executing a script. preload_scanner_ = CreatePreloadScanner(TokenPreloadScanner::ScannerType::kMainDocument); } if (GetDocument()->IsPrefetchOnly()) { // Do not prefetch if there is an appcache. if (GetDocument()->Loader()->GetResponse().AppCacheID() != 0) return; preload_scanner_->AppendToEnd(source); if (preloader_) { // TODO(Richard.Townsend@arm.com): add test coverage of this branch. // The crash in crbug.com/1166786 indicates that text documents are being // speculatively prefetched. ScanAndPreload(preload_scanner_.get()); } // Return after the preload scanner, do not actually parse the document. return; } if (preload_scanner_ && preloader_) { preload_scanner_->AppendToEnd(source); if (task_runner_state_->GetMode() == kAllowDeferredParsing && (IsPaused() || !task_runner_state_->HaveSeenFirstByte())) { // Should scan and preload if the parser's paused waiting for a resource, // or if we're starting a document for the first time (we want to at least // prefetch anything that's in the section). ScanAndPreload(preload_scanner_.get()); } } input_.AppendToEnd(source); task_runner_state_->SetHaveSeenFirstByte(); if (InPumpSession()) { // We've gotten data off the network in a nested write. We don't want to // consume any more of the input stream now. Do not worry. We'll consume // this data in a less-nested write(). return; } // Schedule a tokenizer pump to process this new data. if (task_runner_state_->GetMode() == ParserSynchronizationPolicy::kAllowDeferredParsing && !task_runner_state_->ShouldComplete()) { SchedulePumpTokenizer(); } else { PumpTokenizerIfPossible(); } } void HTMLDocumentParser::end() { DCHECK(!IsDetached()); DCHECK(!IsScheduledForUnpause()); if (have_background_parser_) StopBackgroundParser(); // Informs the the rest of WebCore that parsing is really finished (and // deletes this). tree_builder_->Finished(); // All preloads should be done. preloader_ = nullptr; DocumentParser::StopParsing(); } void HTMLDocumentParser::AttemptToRunDeferredScriptsAndEnd() { DCHECK(IsStopping()); // FIXME: It may not be correct to disable this for the background parser. // That means hasInsertionPoint() may not be correct in some cases. DCHECK(!HasInsertionPoint() || have_background_parser_); if (script_runner_ && !script_runner_->ExecuteScriptsWaitingForParsing()) return; end(); } bool HTMLDocumentParser::ShouldDelayEnd() const { return InPumpSession() || IsPaused() || IsExecutingScript() || task_runner_state_->IsScheduled(); } void HTMLDocumentParser::AttemptToEnd() { // finish() indicates we will not receive any more data. If we are waiting on // an external script to load, we can't finish parsing quite yet. TRACE_EVENT1("blink", "HTMLDocumentParser::AttemptToEnd", "parser", (void*)this); DCHECK(task_runner_state_->ShouldAttemptToEndOnEOF()); AttemptToEndForbiddenScope should_not_attempt_to_end(task_runner_state_); // We should only be in this state once after calling Finish. // If there are pending scripts, future control flow should pass to // EndIfDelayed. if (ShouldDelayEnd()) { end_was_delayed_ = true; return; } PrepareToStopParsing(); } void HTMLDocumentParser::EndIfDelayed() { TRACE_EVENT1("blink", "HTMLDocumentParser::EndIfDelayed", "parser", (void*)this); ShouldCompleteScope should_complete(task_runner_state_); EndIfDelayedForbiddenScope should_not_end_if_delayed(task_runner_state_); // If we've already been detached, don't bother ending. if (IsDetached()) return; if (!end_was_delayed_ || ShouldDelayEnd()) return; end_was_delayed_ = false; PrepareToStopParsing(); } void HTMLDocumentParser::Finish() { // FIXME: We should DCHECK(!parser_stopped_) here, since it does not makes // sense to call any methods on DocumentParser once it's been stopped. // However, FrameLoader::Stop calls DocumentParser::Finish unconditionally. ShouldCompleteScope should_complete(task_runner_state_); EndIfDelayedForbiddenScope should_not_end_if_delayed(task_runner_state_); Flush(); if (IsDetached()) return; // Empty documents never got an append() call, and thus have never started a // background parser. In those cases, we ignore CanParseAsynchronously() and // fall through to the synchronous case. if (have_background_parser_) { if (!input_.HaveSeenEndOfFile()) input_.CloseWithoutMarkingEndOfFile(); loading_task_runner_->PostTask( FROM_HERE, WTF::Bind(&BackgroundHTMLParser::Finish, background_parser_)); return; } if (!tokenizer_) { DCHECK(!token_); // We're finishing before receiving any data. Rather than booting up the // background parser just to spin it down, we finish parsing synchronously. token_ = std::make_unique(); tokenizer_ = std::make_unique(options_); } // We're not going to get any more data off the network, so we tell the input // stream we've reached the end of file. finish() can be called more than // once, if the first time does not call end(). if (!input_.HaveSeenEndOfFile()) input_.MarkEndOfFile(); // If there's any deferred work remaining, signal that we // want to end the document once all work's complete. task_runner_state_->SetAttemptToEndOnEOF(); if (task_runner_state_->IsScheduled() && !GetDocument()->IsPrefetchOnly()) { return; } AttemptToEnd(); } bool HTMLDocumentParser::IsExecutingScript() const { if (!script_runner_) return false; return script_runner_->IsExecutingScript(); } bool HTMLDocumentParser::IsParsingAtLineNumber() const { if (CanParseAsynchronously()) { return is_parsing_at_line_number_ && ScriptableDocumentParser::IsParsingAtLineNumber(); } return ScriptableDocumentParser::IsParsingAtLineNumber(); } OrdinalNumber HTMLDocumentParser::LineNumber() const { if (have_background_parser_) return text_position_.line_; return input_.Current().CurrentLine(); } TextPosition HTMLDocumentParser::GetTextPosition() const { if (have_background_parser_) return text_position_; const SegmentedString& current_string = input_.Current(); OrdinalNumber line = current_string.CurrentLine(); OrdinalNumber column = current_string.CurrentColumn(); return TextPosition(line, column); } bool HTMLDocumentParser::IsWaitingForScripts() const { // When the TreeBuilder encounters a tag, it returns to the // HTMLDocumentParser where the script is transfered from the treebuilder to // the script runner. The script runner will hold the script until its loaded // and run. During any of this time, we want to count ourselves as "waiting // for a script" and thus run the preload scanner, as well as delay completion // of parsing. bool tree_builder_has_blocking_script = tree_builder_->HasParserBlockingScript(); bool script_runner_has_blocking_script = script_runner_ && script_runner_->HasParserBlockingScript(); // Since the parser is paused while a script runner has a blocking script, it // should never be possible to end up with both objects holding a blocking // script. DCHECK( !(tree_builder_has_blocking_script && script_runner_has_blocking_script)); // If either object has a blocking script, the parser should be paused. return tree_builder_has_blocking_script || script_runner_has_blocking_script || reentry_permit_->ParserPauseFlag(); } void HTMLDocumentParser::ResumeParsingAfterPause() { // This function runs after a parser-blocking script has completed. There are // four possible cases: // 1) Parsing with kForceSynchronousParsing, where there is no background // parser and a tokenizer_'s defined. // 2) Parsing with kAllowAsynchronousParsing, without a background parser. In // this case, the document is usually being completed or parsing has // otherwise stopped. // 3) Parsing with kAllowAsynchronousParsing with a background parser. In this // case, need to add any pending speculations to the document. // 4) Parsing with kAllowDeferredParsing, with a tokenizer_. TRACE_EVENT1("blink", "HTMLDocumentParser::ResumeParsingAfterPause", "parser", (void*)this); DCHECK(!IsExecutingScript()); DCHECK(!IsPaused()); CheckIfBlockingStylesheetAdded(); if (IsStopped() || IsPaused()) return; if (have_background_parser_) { // Case 3) // If we paused in the middle of processing a token chunk, // deal with that before starting to pump. if (last_chunk_before_pause_) { ValidateSpeculations(std::move(last_chunk_before_pause_)); DCHECK(!last_chunk_before_pause_); PumpPendingSpeculations(); } else if (!IsScheduledForUnpause()) { // Otherwise, start pumping if we're not already scheduled to unpause // already. PumpPendingSpeculations(); } return; } insertion_preload_scanner_.reset(); if (tokenizer_) { // Case 1) or 4): kForceSynchronousParsing, kAllowDeferredParsing. // kForceSynchronousParsing must pump the tokenizer synchronously, // otherwise it can be deferred. if (task_runner_state_->GetMode() == kAllowDeferredParsing && !task_runner_state_->ShouldComplete() && !InPumpSession()) { SchedulePumpTokenizer(); } else { ShouldCompleteScope should_complete(task_runner_state_); PumpTokenizerIfPossible(); } } else { // Case 2): kAllowAsynchronousParsing, no background parser available // (indicating possible Document shutdown). EndIfDelayed(); } } void HTMLDocumentParser::AppendCurrentInputStreamToPreloadScannerAndScan() { TRACE_EVENT1( "blink", "HTMLDocumentParser::AppendCurrentInputStreamToPreloadScannerAndScan", "parser", (void*)this); DCHECK(preload_scanner_); DCHECK(preloader_); preload_scanner_->AppendToEnd(input_.Current()); ScanAndPreload(preload_scanner_.get()); } void HTMLDocumentParser::NotifyScriptLoaded() { TRACE_EVENT1("blink", "HTMLDocumentParser::NotifyScriptLoaded", "parser", (void*)this); DCHECK(script_runner_); DCHECK(!IsExecutingScript()); scheduler::CooperativeSchedulingManager::AllowedStackScope allowed_stack_scope(scheduler::CooperativeSchedulingManager::Instance()); if (IsStopped()) { return; } if (IsStopping()) { AttemptToRunDeferredScriptsAndEnd(); return; } script_runner_->ExecuteScriptsWaitingForLoad(); if (!IsPaused()) ResumeParsingAfterPause(); } void HTMLDocumentParser::ExecuteScriptsWaitingForResources() { TRACE_EVENT0("blink", "HTMLDocumentParser::ExecuteScriptsWaitingForResources"); if (IsStopped()) return; DCHECK(GetDocument()->IsScriptExecutionReady()); if (is_waiting_for_stylesheets_) is_waiting_for_stylesheets_ = false; // Document only calls this when the Document owns the DocumentParser so this // will not be called in the DocumentFragment case. DCHECK(script_runner_); script_runner_->ExecuteScriptsWaitingForResources(); if (!IsPaused()) ResumeParsingAfterPause(); } void HTMLDocumentParser::DidAddPendingParserBlockingStylesheet() { // In-body CSS doesn't block painting. The parser needs to pause so that // the DOM doesn't include any elements that may depend on the CSS for style. // The stylesheet can be added and removed during the parsing of a single // token so don't actually set the bit to block parsing here, just track // the state of the added sheet in case it does persist beyond a single // token. added_pending_parser_blocking_stylesheet_ = true; } void HTMLDocumentParser::DidLoadAllPendingParserBlockingStylesheets() { // Just toggle the stylesheet flag here (mostly for synchronous sheets). // The document will also call into executeScriptsWaitingForResources // which is when the parser will re-start, otherwise it will attempt to // resume twice which could cause state machine issues. added_pending_parser_blocking_stylesheet_ = false; } void HTMLDocumentParser::CheckIfBlockingStylesheetAdded() { if (added_pending_parser_blocking_stylesheet_) { added_pending_parser_blocking_stylesheet_ = false; is_waiting_for_stylesheets_ = true; } } void HTMLDocumentParser::ParseDocumentFragment( const String& source, DocumentFragment* fragment, Element* context_element, ParserContentPolicy parser_content_policy) { auto* parser = MakeGarbageCollected( fragment, context_element, parser_content_policy); parser->Append(source); parser->Finish(); // Allows ~DocumentParser to assert it was detached before destruction. parser->Detach(); } void HTMLDocumentParser::AppendBytes(const char* data, size_t length) { TRACE_EVENT2("blink", "HTMLDocumentParser::appendBytes", "size", (unsigned)length, "parser", (void*)this); DCHECK(Thread::MainThread()->IsCurrentThread()); if (!length || IsStopped()) return; if (CanParseAsynchronously()) { if (!have_background_parser_) StartBackgroundParser(); std::unique_ptr> buffer = std::make_unique>(length); memcpy(buffer->data(), data, length); loading_task_runner_->PostTask( FROM_HERE, WTF::Bind(&BackgroundHTMLParser::AppendRawBytesFromMainThread, background_parser_, std::move(buffer))); return; } DecodedDataDocumentParser::AppendBytes(data, length); } void HTMLDocumentParser::Flush() { TRACE_EVENT1("blink", "HTMLDocumentParser::Flush", "parser", (void*)this); // If we've got no decoder, we never received any data. if (IsDetached() || NeedsDecoder()) return; if (CanParseAsynchronously()) { // In some cases, flush() is called without any invocation of appendBytes. // Fallback to synchronous parsing in that case. if (!have_background_parser_) { can_parse_asynchronously_ = false; token_ = std::make_unique(); tokenizer_ = std::make_unique(options_); DecodedDataDocumentParser::Flush(); return; } loading_task_runner_->PostTask( FROM_HERE, WTF::Bind(&BackgroundHTMLParser::Flush, background_parser_)); } else { DecodedDataDocumentParser::Flush(); } } void HTMLDocumentParser::SetDecoder( std::unique_ptr decoder) { DCHECK(decoder); DecodedDataDocumentParser::SetDecoder(std::move(decoder)); if (have_background_parser_) { loading_task_runner_->PostTask( FROM_HERE, WTF::Bind(&BackgroundHTMLParser::SetDecoder, background_parser_, TakeDecoder())); } } void HTMLDocumentParser::DocumentElementAvailable() { TRACE_EVENT0("blink,loading", "HTMLDocumentParser::DocumentElementAvailable"); Document* document = GetDocument(); DCHECK(document); DCHECK(document->documentElement()); Element* documentElement = GetDocument()->documentElement(); if (documentElement->hasAttribute(u"\u26A1") || documentElement->hasAttribute("amp") || documentElement->hasAttribute("i-amphtml-layout")) { // The DocumentLoader fetches a main resource and handles the result. // But it may not be available if JavaScript appends HTML to the page later // in the page's lifetime. This can happen both from in-page JavaScript and // from extensions. See example callstacks linked from crbug.com/931330. if (document->Loader()) { document->Loader()->DidObserveLoadingBehavior( kLoadingBehaviorAmpDocumentLoaded); } } if (preloader_) FetchQueuedPreloads(); } std::unique_ptr HTMLDocumentParser::CreatePreloadScanner( TokenPreloadScanner::ScannerType scanner_type) { return std::make_unique( options_, GetDocument()->Url(), std::make_unique(GetDocument()), MediaValuesCached::MediaValuesCachedData(*GetDocument()), scanner_type); } void HTMLDocumentParser::ScanAndPreload(HTMLPreloadScanner* scanner) { TRACE_EVENT0("blink", "HTMLDocumentParser::ScanAndPreload"); DCHECK(preloader_); bool seen_csp_meta_tag = false; base::Optional viewport_description; PreloadRequestStream requests = scanner->Scan(GetDocument()->ValidBaseElementURL(), &viewport_description, seen_csp_meta_tag); // Make sure that the viewport is up-to-date, so that the correct viewport // dimensions will be fed to the background parser and preload scanner. if (GetDocument()->Loader() && task_runner_state_->GetMode() == kAllowDeferredParsing) { if (viewport_description.has_value()) { GetDocument()->GetStyleEngine().UpdateViewport(); } if (task_runner_state_->NeedsLinkHeaderPreloadsDispatch()) { if (GetDocument()->Loader()->GetPrefetchedSignedExchangeManager()) { TRACE_EVENT0("blink", "HTMLDocumentParser::DispatchSignedExchangeManager"); // Link header preloads for prefetched signed exchanges won't be started // until StartPrefetchedLinkHeaderPreloads() is called. See the header // comment of PrefetchedSignedExchangeManager. GetDocument() ->Loader() ->GetPrefetchedSignedExchangeManager() ->StartPrefetchedLinkHeaderPreloads(); } else { TRACE_EVENT0("blink", "HTMLDocumentParser::DispatchLinkHeaderPreloads"); GetDocument()->Loader()->DispatchLinkHeaderPreloads( base::OptionalOrNullptr(viewport_description), PreloadHelper::kOnlyLoadMedia); } task_runner_state_->DispatchedLinkHeaderPreloads(); } } task_runner_state_->SetSeenCSPMetaTag(seen_csp_meta_tag); for (auto& request : requests) { queued_preloads_.push_back(std::move(request)); } FetchQueuedPreloads(); } void HTMLDocumentParser::FetchQueuedPreloads() { DCHECK(preloader_); TRACE_EVENT0("blink", "HTMLDocumentParser::FetchQueuedPreloads"); if (CanParseAsynchronously()) { if (pending_csp_meta_token_ || !GetDocument()->documentElement()) return; } if (!queued_preloads_.IsEmpty()) preloader_->TakeAndPreload(queued_preloads_); } } // namespace blink