/*
 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include "third_party/blink/renderer/core/html/parser/html_document_parser.h"

#include <memory>
#include <utility>

#include "base/auto_reset.h"
#include "base/numerics/safe_conversions.h"
#include "third_party/blink/public/common/features.h"
#include "third_party/blink/public/common/loader/loading_behavior_flag.h"
#include "third_party/blink/public/mojom/appcache/appcache.mojom-blink.h"
#include "third_party/blink/public/platform/platform.h"
#include "third_party/blink/public/platform/task_type.h"
#include "third_party/blink/renderer/core/css/media_values_cached.h"
#include "third_party/blink/renderer/core/css/style_engine.h"
#include "third_party/blink/renderer/core/dom/document_fragment.h"
#include "third_party/blink/renderer/core/dom/element.h"
#include "third_party/blink/renderer/core/frame/local_frame.h"
#include "third_party/blink/renderer/core/html/html_document.h"
#include "third_party/blink/renderer/core/html/parser/atomic_html_token.h"
#include "third_party/blink/renderer/core/html/parser/background_html_parser.h"
#include "third_party/blink/renderer/core/html/parser/html_parser_metrics.h"
#include "third_party/blink/renderer/core/html/parser/html_parser_scheduler.h"
#include "third_party/blink/renderer/core/html/parser/html_resource_preloader.h"
#include "third_party/blink/renderer/core/html/parser/html_tree_builder.h"
#include "third_party/blink/renderer/core/html/parser/pump_session.h"
#include "third_party/blink/renderer/core/html_names.h"
#include "third_party/blink/renderer/core/inspector/inspector_trace_events.h"
#include "third_party/blink/renderer/core/loader/document_loader.h"
#include "third_party/blink/renderer/core/loader/prefetched_signed_exchange_manager.h"
#include "third_party/blink/renderer/core/loader/preload_helper.h"
#include "third_party/blink/renderer/core/probe/core_probes.h"
#include "third_party/blink/renderer/core/script/html_parser_script_runner.h"
#include "third_party/blink/renderer/platform/bindings/runtime_call_stats.h"
#include "third_party/blink/renderer/platform/bindings/v8_per_isolate_data.h"
#include "third_party/blink/renderer/platform/heap/handle.h"
#include "third_party/blink/renderer/platform/heap/heap.h"
#include "third_party/blink/renderer/platform/instrumentation/tracing/trace_event.h"
#include "third_party/blink/renderer/platform/loader/fetch/resource_fetcher.h"
#include "third_party/blink/renderer/platform/runtime_enabled_features.h"
#include "third_party/blink/renderer/platform/scheduler/public/cooperative_scheduling_manager.h"
#include "third_party/blink/renderer/platform/scheduler/public/thread.h"
#include "third_party/blink/renderer/platform/scheduler/public/thread_scheduler.h"
#include "third_party/blink/renderer/platform/wtf/cross_thread_functional.h"
#include "third_party/blink/renderer/platform/wtf/shared_buffer.h"

namespace blink {

static size_t g_discarded_token_count_for_testing = 0;

void ResetDiscardedTokenCountForTesting() {
  g_discarded_token_count_for_testing = 0;
}

size_t GetDiscardedTokenCountForTesting() {
  return g_discarded_token_count_for_testing;
}

// This sets the (default) maximum number of tokens which the foreground HTML
// parser should try to process in one go. Lower values generally mean faster
// first paints, larger values delay first paint, but make sure it's closer to
// the final page. This is the default value to use, if no Finch-provided
// value exists.
constexpr int kDefaultMaxTokenizationBudget = 250;

class EndIfDelayedForbiddenScope;
class ShouldCompleteScope;
class AttemptToEndForbiddenScope;

// This class encapsulates the internal state needed for synchronous foreground
// HTML parsing (e.g. if HTMLDocumentParser::PumpTokenizer yields, this class
// tracks what should be done after the pump completes.)
class HTMLDocumentParserState
    : public GarbageCollected<HTMLDocumentParserState> {
  friend EndIfDelayedForbiddenScope;
  friend ShouldCompleteScope;
  friend AttemptToEndForbiddenScope;

 public:
  // Keeps track of whether the parser needs to complete tokenization work,
  // optionally followed by EndIfDelayed.
  enum class DeferredParserState {
    // Indicates that a tokenizer pump has either completed or hasn't been
    // scheduled.
    kNotScheduled = 0,  // Enforce ordering in this enum.
    // Indicates that a tokenizer pump is scheduled and hasn't completed yet.
    kScheduled = 1,
    // Indicates that a tokenizer pump, followed by EndIfDelayed, is scheduled.
    kScheduledWithEndIfDelayed = 2
  };

  enum class MetaCSPTokenState {
    // If we've seen a meta CSP token in an upcoming HTML chunk, then we need to
    // defer any preloads until we've added the CSP token to the document and
    // applied the Content Security Policy.
    kSeen = 0,
    // Indicates that there is no meta CSP token in the upcoming chunk.
    kNotSeen = 1,
    // Indicates that we've added the CSP token to the document and we can now
    // fetch preloads.
    kProcessed = 2,
    // Indicates that it's too late to apply a Content-Security policy (because
    // we've exited the header section.)
    kUnenforceable = 3,
  };

  explicit HTMLDocumentParserState(ParserSynchronizationPolicy mode)
      : state_(DeferredParserState::kNotScheduled),
        meta_csp_state_(MetaCSPTokenState::kNotSeen),
        mode_(mode),
        end_if_delayed_forbidden_(0),
        should_complete_(0),
        should_attempt_to_end_on_eof_(0),
        needs_link_header_dispatch_(true),
        have_seen_first_byte_(false) {}

  void Trace(Visitor* v) const {}

  void SetState(DeferredParserState state) {
    DCHECK(!(state == DeferredParserState::kScheduled && ShouldComplete()));
    state_ = state;
  }
  DeferredParserState GetState() const { return state_; }

  bool IsScheduled() const { return state_ >= DeferredParserState::kScheduled; }
  const char* GetStateAsString() const {
    switch (state_) {
      case DeferredParserState::kNotScheduled:
        return "not_scheduled";
      case DeferredParserState::kScheduled:
        return "scheduled";
      case DeferredParserState::kScheduledWithEndIfDelayed:
        return "scheduled_with_end_if_delayed";
    }
  }

  bool NeedsLinkHeaderPreloadsDispatch() const {
    return needs_link_header_dispatch_;
  }
  void DispatchedLinkHeaderPreloads() { needs_link_header_dispatch_ = false; }

  bool HaveSeenFirstByte() const { return have_seen_first_byte_; }
  void SetHaveSeenFirstByte() { have_seen_first_byte_ = true; }

  // Keeps track of whether Document::Finish has been called whilst parsing
  // asynchronously. ShouldAttemptToEndOnEOF() means that the parser should
  // close when there's no more input.
  bool ShouldAttemptToEndOnEOF() const {
    return should_attempt_to_end_on_eof_ > 0;
  }
  void SetAttemptToEndOnEOF() {
    // This method should only be called from ::Finish.
    should_attempt_to_end_on_eof_++;
    // Should only ever call ::Finish once.
    DCHECK(should_attempt_to_end_on_eof_ < 2);
  }

  bool ShouldEndIfDelayed() const { return end_if_delayed_forbidden_ == 0; }
  bool ShouldComplete() const {
    return should_complete_ || GetMode() != kAllowDeferredParsing;
  }
  bool IsSynchronous() const {
    return mode_ == ParserSynchronizationPolicy::kForceSynchronousParsing;
  }
  ParserSynchronizationPolicy GetMode() const { return mode_; }

  void SetSeenCSPMetaTag(const bool seen) {
    if (meta_csp_state_ == MetaCSPTokenState::kUnenforceable)
      return;
    if (seen)
      meta_csp_state_ = MetaCSPTokenState::kSeen;
    else
      meta_csp_state_ = MetaCSPTokenState::kNotSeen;
  }

  void SetExitedHeader() {
    meta_csp_state_ = MetaCSPTokenState::kUnenforceable;
  }
  bool HaveExitedHeader() const {
    return meta_csp_state_ == MetaCSPTokenState::kUnenforceable;
  }

 private:
  void EnterEndIfDelayedForbidden() { end_if_delayed_forbidden_++; }
  void ExitEndIfDelayedForbidden() {
    end_if_delayed_forbidden_--;
    DCHECK_GE(end_if_delayed_forbidden_, 0);
  }

  void EnterAttemptToEndForbidden() {
    DCHECK(should_attempt_to_end_on_eof_ > 0);
    should_attempt_to_end_on_eof_ = 0;
  }

  void EnterShouldComplete() { should_complete_++; }
  void ExitShouldComplete() {
    should_complete_--;
    DCHECK_GE(should_complete_, 0);
  }

  DeferredParserState state_;
  MetaCSPTokenState meta_csp_state_;
  ParserSynchronizationPolicy mode_;
  int end_if_delayed_forbidden_;
  int should_complete_;
  // Set to non-zero if Document::Finish has been called and we're operating
  // asynchronously.
  int should_attempt_to_end_on_eof_;
  bool needs_link_header_dispatch_;
  bool have_seen_first_byte_;
};

class EndIfDelayedForbiddenScope {
  STACK_ALLOCATED();

 public:
  explicit EndIfDelayedForbiddenScope(HTMLDocumentParserState* state)
      : state_(state) {
    state_->EnterEndIfDelayedForbidden();
  }
  ~EndIfDelayedForbiddenScope() { state_->ExitEndIfDelayedForbidden(); }

 private:
  HTMLDocumentParserState* state_;
};

class AttemptToEndForbiddenScope {
  STACK_ALLOCATED();

 public:
  explicit AttemptToEndForbiddenScope(HTMLDocumentParserState* state)
      : state_(state) {
    state_->EnterAttemptToEndForbidden();
  }

 private:
  HTMLDocumentParserState* state_;
};

class ShouldCompleteScope {
  STACK_ALLOCATED();

 public:
  explicit ShouldCompleteScope(HTMLDocumentParserState* state) : state_(state) {
    state_->EnterShouldComplete();
  }
  ~ShouldCompleteScope() { state_->ExitShouldComplete(); }

 private:
  HTMLDocumentParserState* state_;
};

// This is a direct transcription of step 4 from:
// http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#fragment-case
static HTMLTokenizer::State TokenizerStateForContextElement(
    Element* context_element,
    bool report_errors,
    const HTMLParserOptions& options) {
  if (!context_element)
    return HTMLTokenizer::kDataState;

  const QualifiedName& context_tag = context_element->TagQName();

  if (context_tag.Matches(html_names::kTitleTag) ||
      context_tag.Matches(html_names::kTextareaTag))
    return HTMLTokenizer::kRCDATAState;
  if (context_tag.Matches(html_names::kStyleTag) ||
      context_tag.Matches(html_names::kXmpTag) ||
      context_tag.Matches(html_names::kIFrameTag) ||
      context_tag.Matches(html_names::kNoembedTag) ||
      (context_tag.Matches(html_names::kNoscriptTag) &&
       options.scripting_flag) ||
      context_tag.Matches(html_names::kNoframesTag))
    return report_errors ? HTMLTokenizer::kRAWTEXTState
                         : HTMLTokenizer::kPLAINTEXTState;
  if (context_tag.Matches(html_names::kScriptTag))
    return report_errors ? HTMLTokenizer::kScriptDataState
                         : HTMLTokenizer::kPLAINTEXTState;
  if (context_tag.Matches(html_names::kPlaintextTag))
    return HTMLTokenizer::kPLAINTEXTState;
  return HTMLTokenizer::kDataState;
}

class ScopedYieldTimer {
 public:
  // This object is created at the start of a block of parsing, and will
  // report the time since the last block yielded if known.
  ScopedYieldTimer(std::unique_ptr<base::ElapsedTimer>* timer,
                   HTMLParserMetrics* metrics_reporter)
      : timer_(timer), reporting_metrics_(metrics_reporter) {
    if (!reporting_metrics_ || !(*timer_))
      return;

    metrics_reporter->AddYieldInterval((*timer_)->Elapsed());
    timer_->reset();
  }

  // The destructor creates a new timer, which will keep track of time until
  // the next block starts.
  ~ScopedYieldTimer() {
    if (reporting_metrics_)
      *timer_ = std::make_unique<base::ElapsedTimer>();
  }

 private:
  std::unique_ptr<base::ElapsedTimer>* timer_;
  bool reporting_metrics_;
};

HTMLDocumentParser::HTMLDocumentParser(HTMLDocument& document,
                                       ParserSynchronizationPolicy sync_policy,
                                       ParserPrefetchPolicy prefetch_policy)
    : HTMLDocumentParser(document,
                         kAllowScriptingContent,
                         sync_policy,
                         prefetch_policy) {
  script_runner_ =
      HTMLParserScriptRunner::Create(ReentryPermit(), &document, this);

  // Allow declarative shadow DOM for the document parser, if not explicitly
  // disabled.
  bool include_shadow_roots = document.GetDeclarativeShadowRootAllowState() !=
                              Document::DeclarativeShadowRootAllowState::kDeny;
  tree_builder_ = MakeGarbageCollected<HTMLTreeBuilder>(
      this, document, kAllowScriptingContent, options_, include_shadow_roots);
}

HTMLDocumentParser::HTMLDocumentParser(
    DocumentFragment* fragment,
    Element* context_element,
    ParserContentPolicy parser_content_policy,
    ParserPrefetchPolicy parser_prefetch_policy)
    : HTMLDocumentParser(fragment->GetDocument(),
                         parser_content_policy,
                         kForceSynchronousParsing,
                         parser_prefetch_policy) {
  // Allow declarative shadow DOM for the fragment parser only if explicitly
  // enabled.
  bool include_shadow_roots =
      fragment->GetDocument().GetDeclarativeShadowRootAllowState() ==
      Document::DeclarativeShadowRootAllowState::kAllow;

  // No script_runner_ in fragment parser.
  tree_builder_ = MakeGarbageCollected<HTMLTreeBuilder>(
      this, fragment, context_element, parser_content_policy, options_,
      include_shadow_roots);

  // For now document fragment parsing never reports errors.
  bool report_errors = false;
  tokenizer_->SetState(TokenizerStateForContextElement(
      context_element, report_errors, options_));
}

namespace {
int GetMaxTokenizationBudget() {
  static int max = base::GetFieldTrialParamByFeatureAsInt(
      features::kForceSynchronousHTMLParsing, "MaxTokenizationBudget",
      kDefaultMaxTokenizationBudget);
  return max;
}
}  // namespace

HTMLDocumentParser::HTMLDocumentParser(Document& document,
                                       ParserContentPolicy content_policy,
                                       ParserSynchronizationPolicy sync_policy,
                                       ParserPrefetchPolicy prefetch_policy)
    : ScriptableDocumentParser(document, content_policy),
      options_(&document),
      reentry_permit_(HTMLParserReentryPermit::Create()),
      token_(sync_policy != kAllowAsynchronousParsing
                 ? std::make_unique<HTMLToken>()
                 : nullptr),
      tokenizer_(sync_policy != kAllowAsynchronousParsing
                     ? std::make_unique<HTMLTokenizer>(options_)
                     : nullptr),
      loading_task_runner_(sync_policy == kForceSynchronousParsing
                               ? nullptr
                               : document.GetTaskRunner(TaskType::kNetworking)),
      parser_scheduler_(sync_policy == kAllowAsynchronousParsing
                            ? MakeGarbageCollected<HTMLParserScheduler>(
                                  this,
                                  loading_task_runner_.get())
                            : nullptr),
      task_runner_state_(
          MakeGarbageCollected<HTMLDocumentParserState>(sync_policy)),
      pending_csp_meta_token_(nullptr),
      can_parse_asynchronously_(sync_policy == kAllowAsynchronousParsing),
      end_was_delayed_(false),
      have_background_parser_(false),
      pump_session_nesting_level_(0),
      pump_speculations_session_nesting_level_(0),
      is_parsing_at_line_number_(false),
      tried_loading_link_headers_(false),
      added_pending_parser_blocking_stylesheet_(false),
      is_waiting_for_stylesheets_(false),
      scheduler_(sync_policy == kAllowDeferredParsing
                     ? Thread::Current()->Scheduler()
                     : nullptr) {
  DCHECK(CanParseAsynchronously() || (token_ && tokenizer_));
  // Asynchronous parsing is not allowed in prefetch mode.
  DCHECK(!document.IsPrefetchOnly() || !CanParseAsynchronously());

  // It is permissible to request the background HTML parser whilst also using
  // --enable-blink-features=ForceSynchronousHTMLParsing, but it's usually
  // unintentional. To help flush out these cases, trigger a DCHECK.
  DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled() ||
         !CanParseAsynchronously());

  // Report metrics for async document parsing only. The document
  // must be main frame to meet UKM requirements, and must have a high
  // resolution clock for high quality data.
  if (sync_policy == kAllowAsynchronousParsing && document.GetFrame() &&
      document.GetFrame()->IsMainFrame() &&
      base::TimeTicks::IsHighResolution()) {
    metrics_reporter_ = std::make_unique<HTMLParserMetrics>(
        document.UkmSourceID(), document.UkmRecorder());
  }

  max_tokenization_budget_ = GetMaxTokenizationBudget();

  // Don't create preloader for parsing clipboard content.
  if (content_policy == kDisallowScriptingAndPluginContent)
    return;

  // Create preloader only when the document is:
  // - attached to a frame (likely the prefetched resources will be loaded
  // soon),
  // - a HTML import document (blocks rendering and also resources will be
  // loaded soon), or
  // - is for no-state prefetch (made specifically for running preloader).
  if (!document.GetFrame() && !document.IsHTMLImport() &&
      !document.IsPrefetchOnly())
    return;

  if (prefetch_policy == kAllowPrefetching)
    preloader_ = MakeGarbageCollected<HTMLResourcePreloader>(document);
}

HTMLDocumentParser::~HTMLDocumentParser() = default;

void HTMLDocumentParser::Dispose() {
  // In Oilpan, HTMLDocumentParser can die together with Document, and detach()
  // is not called in this case.
  if (have_background_parser_)
    StopBackgroundParser();
}

void HTMLDocumentParser::Trace(Visitor* visitor) const {
  visitor->Trace(tree_builder_);
  visitor->Trace(parser_scheduler_);
  visitor->Trace(script_runner_);
  visitor->Trace(preloader_);
  visitor->Trace(task_runner_state_);
  ScriptableDocumentParser::Trace(visitor);
  HTMLParserScriptRunnerHost::Trace(visitor);
}

bool HTMLDocumentParser::HasPendingWorkScheduledForTesting() const {
  return task_runner_state_->IsScheduled();
}

void HTMLDocumentParser::Detach() {
  if (have_background_parser_)
    StopBackgroundParser();
  // Deschedule any pending tokenizer pumps.
  task_runner_state_->SetState(
      HTMLDocumentParserState::DeferredParserState::kNotScheduled);
  DocumentParser::Detach();
  if (script_runner_)
    script_runner_->Detach();
  tree_builder_->Detach();
  // FIXME: It seems wrong that we would have a preload scanner here. Yet during
  // fast/dom/HTMLScriptElement/script-load-events.html we do.
  preload_scanner_.reset();
  insertion_preload_scanner_.reset();
  if (parser_scheduler_) {
    parser_scheduler_->Detach();
    parser_scheduler_.Clear();
  }
  // Oilpan: It is important to clear token_ to deallocate backing memory of
  // HTMLToken::data_ and let the allocator reuse the memory for
  // HTMLToken::data_ of a next HTMLDocumentParser. We need to clear
  // tokenizer_ first because tokenizer_ has a raw pointer to token_.
  tokenizer_.reset();
  token_.reset();
}

void HTMLDocumentParser::StopParsing() {
  DocumentParser::StopParsing();
  if (parser_scheduler_) {
    parser_scheduler_->Detach();
    parser_scheduler_.Clear();
  }
  task_runner_state_->SetState(
      HTMLDocumentParserState::DeferredParserState::kNotScheduled);
  if (have_background_parser_)
    StopBackgroundParser();
}

// This kicks off "Once the user agent stops parsing" as described by:
// http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#the-end
void HTMLDocumentParser::PrepareToStopParsing() {
  TRACE_EVENT1("blink", "HTMLDocumentParser::PrepareToStopParsing", "parser",
               (void*)this);
  // FIXME: It may not be correct to disable this for the background parser.
  // That means hasInsertionPoint() may not be correct in some cases.
  DCHECK(!HasInsertionPoint() || have_background_parser_);

  // NOTE: This pump should only ever emit buffered character tokens.
  if (tokenizer_ && !GetDocument()->IsPrefetchOnly()) {
    DCHECK(!have_background_parser_);
    ShouldCompleteScope should_complete(task_runner_state_);
    EndIfDelayedForbiddenScope should_not_end_if_delayed(task_runner_state_);
    PumpTokenizerIfPossible();
  }

  if (IsStopped())
    return;

  DocumentParser::PrepareToStopParsing();

  // We will not have a scriptRunner when parsing a DocumentFragment.
  if (script_runner_)
    GetDocument()->SetReadyState(Document::kInteractive);

  // Setting the ready state above can fire mutation event and detach us from
  // underneath. In that case, just bail out.
  if (IsDetached())
    return;

  if (script_runner_)
    script_runner_->RecordMetricsAtParseEnd();

  AttemptToRunDeferredScriptsAndEnd();
}

bool HTMLDocumentParser::IsParsingFragment() const {
  return tree_builder_->IsParsingFragment();
}

void HTMLDocumentParser::DeferredPumpTokenizerIfPossible() {
  // This method is called asynchronously, continues building the HTML document.
  // This function should only be called when
  // --enable-blink-features=ForceSynchronousHTMLParsing is available.
  DCHECK(RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
  // If we're scheduled for a tokenizer pump, then document should be attached
  // and the parser should not be stopped, but sometimes a script completes
  // loading (so we schedule a pump) but the Document is stopped in the meantime
  // (e.g. fast/parser/iframe-onload-document-close-with-external-script.html).
  DCHECK(task_runner_state_->GetState() ==
             HTMLDocumentParserState::DeferredParserState::kNotScheduled ||
         !IsDetached());
  TRACE_EVENT2("blink", "HTMLDocumentParser::DeferredPumpTokenizerIfPossible",
               "parser", (void*)this, "state",
               task_runner_state_->GetStateAsString());
  bool should_call_delay_end =
      task_runner_state_->GetState() ==
      HTMLDocumentParserState::DeferredParserState::kScheduledWithEndIfDelayed;
  if (task_runner_state_->IsScheduled()) {
    task_runner_state_->SetState(
        HTMLDocumentParserState::DeferredParserState::kNotScheduled);
    if (should_call_delay_end) {
      EndIfDelayedForbiddenScope should_not_end_if_delayed(task_runner_state_);
      PumpTokenizerIfPossible();
      EndIfDelayed();
    } else {
      PumpTokenizerIfPossible();
    }
  }
}

void HTMLDocumentParser::PumpTokenizerIfPossible() {
  // This method is called synchronously, builds the HTML document up to
  // the current budget, and optionally completes.
  TRACE_EVENT1("blink", "HTMLDocumentParser::PumpTokenizerIfPossible", "parser",
               (void*)this);

  bool yielded = false;
  CheckIfBlockingStylesheetAdded();
  if (!IsStopped() &&
      (!IsPaused() || task_runner_state_->ShouldEndIfDelayed())) {
    yielded = PumpTokenizer();
  }

  if (yielded) {
    DCHECK(!task_runner_state_->ShouldComplete());
    SchedulePumpTokenizer();
  } else if (task_runner_state_->ShouldAttemptToEndOnEOF()) {
    // Fall into this branch if ::Finish has been previously called and we've
    // just finished asynchronously parsing everything.
    AttemptToEnd();
  } else if (task_runner_state_->ShouldEndIfDelayed()) {
    // If we did not exceed the budget or parsed everything there was to
    // parse, check if we should complete the document.
    if (task_runner_state_->ShouldComplete() || IsStopped() || IsStopping()) {
      EndIfDelayed();
    } else {
      ScheduleEndIfDelayed();
    }
  }
}

bool HTMLDocumentParser::IsScheduledForUnpause() const {
  return parser_scheduler_ && parser_scheduler_->IsScheduledForUnpause();
}

// Used by HTMLParserScheduler
void HTMLDocumentParser::ResumeParsingAfterYield() {
  DCHECK(CanParseAsynchronously());
  DCHECK(have_background_parser_);
  DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());

  ScopedYieldTimer timer(&yield_timer_, metrics_reporter_.get());

  CheckIfBlockingStylesheetAdded();
  if (IsStopped() || IsPaused())
    return;

  PumpPendingSpeculations();
}

void HTMLDocumentParser::RunScriptsForPausedTreeBuilder() {
  TRACE_EVENT1("blink", "HTMLDocumentParser::RunScriptsForPausedTreeBuilder",
               "parser", (void*)this);
  DCHECK(ScriptingContentIsAllowed(GetParserContentPolicy()));

  TextPosition script_start_position = TextPosition::BelowRangePosition();
  Element* script_element =
      tree_builder_->TakeScriptToProcess(script_start_position);
  // We will not have a scriptRunner when parsing a DocumentFragment.
  if (script_runner_)
    script_runner_->ProcessScriptElement(script_element, script_start_position);
  CheckIfBlockingStylesheetAdded();
}

HTMLDocumentParser::NextTokenStatus HTMLDocumentParser::CanTakeNextToken() {
  if (IsStopped())
    return NoTokens;

  // If we're paused waiting for a script, we try to execute scripts before
  // continuing.
  auto ret = HaveTokens;
  if (tree_builder_->HasParserBlockingScript()) {
    RunScriptsForPausedTreeBuilder();
    ret = HaveTokensAfterScript;
  }
  if (IsStopped() || IsPaused())
    return NoTokens;
  return ret;
}

void HTMLDocumentParser::EnqueueTokenizedChunk(
    std::unique_ptr<TokenizedChunk> chunk) {
  DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
  TRACE_EVENT0("blink", "HTMLDocumentParser::EnqueueTokenizedChunk");

  DCHECK(chunk);
  DCHECK(GetDocument());

  if (!IsParsing())
    return;

  // ApplicationCache needs to be initialized before issuing preloads. We
  // suspend preload until HTMLHTMLElement is inserted and ApplicationCache is
  // initialized. Note: link rel preloads don't follow this policy per the spec.
  // These directives should initiate a fetch as fast as possible.
  if (!tried_loading_link_headers_ && GetDocument()->Loader()) {
    // Note that on commit, the loader dispatched preloads for all the non-media
    // links.
    GetDocument()->Loader()->DispatchLinkHeaderPreloads(
        base::OptionalOrNullptr(chunk->viewport),
        PreloadHelper::kOnlyLoadMedia);
    tried_loading_link_headers_ = true;
    if (GetDocument()->Loader()->GetPrefetchedSignedExchangeManager()) {
      // Link header preloads for prefetched signed exchanges won't be started
      // until StartPrefetchedLinkHeaderPreloads() is called. See the header
      // comment of PrefetchedSignedExchangeManager.
      GetDocument()
          ->Loader()
          ->GetPrefetchedSignedExchangeManager()
          ->StartPrefetchedLinkHeaderPreloads();
    }
  }

  // Defer preloads if any of the chunks contains a <meta> csp tag.
  if (chunk->pending_csp_meta_token_index != TokenizedChunk::kNoPendingToken) {
    pending_csp_meta_token_ =
        &chunk->tokens.at(chunk->pending_csp_meta_token_index);
  }

  if (preloader_) {
    bool appcache_fetched = false;
    if (GetDocument()->Loader()) {
      appcache_fetched = (GetDocument()->Loader()->GetResponse().AppCacheID() !=
                          mojom::blink::kAppCacheNoCacheId);
    }
    bool appcache_initialized = GetDocument()->documentElement();
    // Delay sending some requests if meta tag based CSP is present or
    // if AppCache was used to fetch the HTML but was not yet initialized for
    // this document.
    if (pending_csp_meta_token_ ||
        ((!base::FeatureList::IsEnabled(
              blink::features::kVerifyHTMLFetchedFromAppCacheBeforeDelay) ||
          appcache_fetched) &&
         !appcache_initialized)) {
      PreloadRequestStream link_rel_preloads;
      for (auto& request : chunk->preloads) {
        // Link rel preloads don't need to wait for AppCache but they
        // should probably wait for CSP.
        if (!pending_csp_meta_token_ && request->IsLinkRelPreload())
          link_rel_preloads.push_back(std::move(request));
        else
          queued_preloads_.push_back(std::move(request));
      }
      preloader_->TakeAndPreload(link_rel_preloads);
    } else {
      // We can safely assume that there are no queued preloads request after
      // the document element is available, as we empty the queue immediately
      // after the document element is created in documentElementAvailable().
      DCHECK(queued_preloads_.IsEmpty());
      preloader_->TakeAndPreload(chunk->preloads);
    }
  }

  speculations_.push_back(std::move(chunk));

  if (!IsPaused() && !IsScheduledForUnpause())
    parser_scheduler_->ScheduleForUnpause();
}

void HTMLDocumentParser::DidReceiveEncodingDataFromBackgroundParser(
    const DocumentEncodingData& data) {
  DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
  GetDocument()->SetEncodingData(data);
}

void HTMLDocumentParser::ValidateSpeculations(
    std::unique_ptr<TokenizedChunk> chunk) {
  DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
  DCHECK(chunk);
  // TODO(kouhei): We should simplify codepath here by disallowing
  // ValidateSpeculations
  // while IsPaused, and last_chunk_before_pause_ can simply be
  // pushed to speculations_.
  if (IsPaused()) {
    // We're waiting on a network script or stylesheet, just save the chunk,
    // we'll get a second ValidateSpeculations call after the script or
    // stylesheet completes. This call should have been made immediately after
    // RunScriptsForPausedTreeBuilder in the script case which may have started
    // a network load and left us waiting.
    DCHECK(!last_chunk_before_pause_);
    last_chunk_before_pause_ = std::move(chunk);
    return;
  }

  DCHECK(!last_chunk_before_pause_);
  std::unique_ptr<HTMLTokenizer> tokenizer = std::move(tokenizer_);
  std::unique_ptr<HTMLToken> token = std::move(token_);

  if (!tokenizer) {
    // There must not have been any changes to the HTMLTokenizer state on the
    // main thread, which means the speculation buffer is correct.
    return;
  }

  // Currently we're only smart enough to reuse the speculation buffer if the
  // tokenizer both starts and ends in the DataState. That state is simplest
  // because the HTMLToken is always in the Uninitialized state. We should
  // consider whether we can reuse the speculation buffer in other states, but
  // we'd likely need to do something more sophisticated with the HTMLToken.
  if (chunk->tokenizer_state == HTMLTokenizer::kDataState &&
      tokenizer->GetState() == HTMLTokenizer::kDataState &&
      input_.Current().IsEmpty() &&
      chunk->tree_builder_state ==
          HTMLTreeBuilderSimulator::StateFor(tree_builder_.Get())) {
    DCHECK(token->IsUninitialized());
    return;
  }

  DiscardSpeculationsAndResumeFrom(std::move(chunk), std::move(token),
                                   std::move(tokenizer));
}

void HTMLDocumentParser::DiscardSpeculationsAndResumeFrom(
    std::unique_ptr<TokenizedChunk> last_chunk_before_script,
    std::unique_ptr<HTMLToken> token,
    std::unique_ptr<HTMLTokenizer> tokenizer) {
  DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
  // Clear back ref.
  background_parser_->ClearParser();

  size_t discarded_token_count = 0;
  for (const auto& speculation : speculations_) {
    discarded_token_count += speculation->tokens.size();
  }
  g_discarded_token_count_for_testing += discarded_token_count;

  speculations_.clear();
  pending_csp_meta_token_ = nullptr;
  queued_preloads_.clear();

  std::unique_ptr<BackgroundHTMLParser::Checkpoint> checkpoint =
      std::make_unique<BackgroundHTMLParser::Checkpoint>();
  checkpoint->parser = this;
  checkpoint->token = std::move(token);
  checkpoint->tokenizer = std::move(tokenizer);
  checkpoint->tree_builder_state =
      HTMLTreeBuilderSimulator::StateFor(tree_builder_.Get());
  checkpoint->input_checkpoint = last_chunk_before_script->input_checkpoint;
  checkpoint->preload_scanner_checkpoint =
      last_chunk_before_script->preload_scanner_checkpoint;
  checkpoint->unparsed_input = input_.Current().ToString().IsolatedCopy();
  // FIXME: This should be passed in instead of cleared.
  input_.Current().Clear();

  DCHECK(checkpoint->unparsed_input.IsSafeToSendToAnotherThread());
  loading_task_runner_->PostTask(
      FROM_HERE, WTF::Bind(&BackgroundHTMLParser::ResumeFrom,
                           background_parser_, std::move(checkpoint)));
}

size_t HTMLDocumentParser::ProcessTokenizedChunkFromBackgroundParser(
    std::unique_ptr<TokenizedChunk> pop_chunk,
    bool* reached_end_of_file) {
  TRACE_EVENT_WITH_FLOW0(
      "blink,loading",
      "HTMLDocumentParser::processTokenizedChunkFromBackgroundParser",
      pop_chunk.get(), TRACE_EVENT_FLAG_FLOW_IN);
  base::AutoReset<bool> has_line_number(&is_parsing_at_line_number_, true);

  SECURITY_DCHECK(pump_speculations_session_nesting_level_ == 1);
  SECURITY_DCHECK(!InPumpSession());
  DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
  DCHECK(!IsParsingFragment());
  DCHECK(!IsPaused());
  DCHECK(!IsStopped());
  DCHECK(CanParseAsynchronously());
  DCHECK(!tokenizer_);
  DCHECK(!token_);
  DCHECK(!last_chunk_before_pause_);

  std::unique_ptr<TokenizedChunk> chunk(std::move(pop_chunk));
  const CompactHTMLTokenStream& tokens = chunk->tokens;
  size_t element_token_count = 0;

  loading_task_runner_->PostTask(
      FROM_HERE, WTF::Bind(&BackgroundHTMLParser::StartedChunkWithCheckpoint,
                           background_parser_, chunk->input_checkpoint));

  for (const auto& token : tokens) {
    DCHECK(!IsWaitingForScripts());

    if (!chunk->starting_script && (token.GetType() == HTMLToken::kStartTag ||
                                    token.GetType() == HTMLToken::kEndTag))
      element_token_count++;

    text_position_ = token.GetTextPosition();

    ConstructTreeFromCompactHTMLToken(token);

    if (IsStopped())
      break;

    // Preloads were queued if there was a <meta> csp token in a tokenized
    // chunk.
    if (pending_csp_meta_token_ && &token == pending_csp_meta_token_) {
      pending_csp_meta_token_ = nullptr;
      FetchQueuedPreloads();
    }

    if (IsPaused()) {
      // The script or stylesheet should be the last token of this bunch.
      DCHECK_EQ(&token, &tokens.back());
      if (IsWaitingForScripts())
        RunScriptsForPausedTreeBuilder();
      ValidateSpeculations(std::move(chunk));
      break;
    }

    if (token.GetType() == HTMLToken::kEndOfFile) {
      // The EOF is assumed to be the last token of this bunch.
      DCHECK_EQ(&token, &tokens.back());
      // There should never be any chunks after the EOF.
      DCHECK(speculations_.IsEmpty());
      PrepareToStopParsing();
      *reached_end_of_file = true;
      break;
    }

    DCHECK(!tokenizer_);
    DCHECK(!token_);
  }

  // Make sure all required pending text nodes are emitted before returning.
  // This leaves "script", "style" and "svg" nodes text nodes intact.
  if (!IsStopped())
    tree_builder_->Flush(kFlushIfAtTextLimit);

  is_parsing_at_line_number_ = false;

  return element_token_count;
}

void HTMLDocumentParser::PumpPendingSpeculations() {
  // If this assert fails, you need to call ValidateSpeculations to make sure
  // tokenizer_ and token_ don't have state that invalidates speculations_.
  DCHECK(!tokenizer_);
  DCHECK(!token_);
  DCHECK(!last_chunk_before_pause_);
  DCHECK(!IsPaused());
  DCHECK(!IsStopped());
  DCHECK(!IsScheduledForUnpause());
  DCHECK(!InPumpSession());
  DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());

  // FIXME: Here should never be reached when there is a blocking script,
  // but it happens in unknown scenarios. See https://crbug.com/440901
  if (IsWaitingForScripts()) {
    parser_scheduler_->ScheduleForUnpause();
    return;
  }

  // Do not allow pumping speculations in nested event loops.
  if (pump_speculations_session_nesting_level_) {
    parser_scheduler_->ScheduleForUnpause();
    return;
  }

  probe::ParseHTML probe(GetDocument(), this);

  SpeculationsPumpSession session(pump_speculations_session_nesting_level_);
  bool reached_end_of_file = false;
  while (!speculations_.IsEmpty()) {
    DCHECK(!IsScheduledForUnpause());
    size_t element_token_count = ProcessTokenizedChunkFromBackgroundParser(
        speculations_.TakeFirst(), &reached_end_of_file);
    session.AddedElementTokens(element_token_count);

    // Always check IsParsing first as document_ may be null. Surprisingly,
    // IsScheduledForUnpause() may be set here as a result of
    // ProcessTokenizedChunkFromBackgroundParser running arbitrary javascript
    // which invokes nested event loops. (e.g. inspector breakpoints)
    CheckIfBlockingStylesheetAdded();
    if (!IsParsing() || IsPaused() || IsScheduledForUnpause())
      break;

    if (speculations_.IsEmpty() ||
        parser_scheduler_->YieldIfNeeded(
            session, speculations_.front()->starting_script))
      break;
  }

  if (metrics_reporter_) {
    metrics_reporter_->AddChunk(session.ElapsedTime(),
                                session.ProcessedElementTokens());
    if (reached_end_of_file)
      metrics_reporter_->ReportMetricsAtParseEnd();
  }
}

void HTMLDocumentParser::ForcePlaintextForTextDocument() {
  if (CanParseAsynchronously()) {
    // This method is called before any data is appended, so we have to start
    // the background parser ourselves.
    if (!have_background_parser_)
      StartBackgroundParser();

    // This task should be synchronous, because otherwise synchronous
    // tokenizing can happen before plaintext is forced.
    background_parser_->ForcePlaintextForTextDocument();
  } else
    tokenizer_->SetState(HTMLTokenizer::kPLAINTEXTState);
}

bool HTMLDocumentParser::PumpTokenizer() {
  DCHECK(!GetDocument()->IsPrefetchOnly());
  DCHECK(!IsStopped());
  DCHECK(tokenizer_);
  DCHECK(token_);

  PumpSession session(pump_session_nesting_level_);

  // If we're in kForceSynchronousParsing, always run until all available input
  // is consumed.
  bool should_run_until_completion = task_runner_state_->ShouldComplete() ||
                                     task_runner_state_->IsSynchronous() ||
                                     pump_session_nesting_level_ > 1;
  TRACE_EVENT2("blink", "HTMLDocumentParser::PumpTokenizer", "should_complete",
               should_run_until_completion, "parser", (void*)this);

  // We tell the InspectorInstrumentation about every pump, even if we end up
  // pumping nothing.  It can filter out empty pumps itself.
  // FIXME: input_.Current().length() is only accurate if we end up parsing the
  // whole buffer in this pump.  We should pass how much we parsed as part of
  // DidWriteHTML instead of WillWriteHTML.
  probe::ParseHTML probe(GetDocument(), this);

  bool should_yield = false;
  int budget = max_tokenization_budget_;

  while (!should_yield) {
    const auto next_token_status = CanTakeNextToken();
    if (next_token_status == NoTokens) {
      // No tokens left to process in this pump, so break
      break;
    } else if (next_token_status == HaveTokensAfterScript &&
               task_runner_state_->HaveExitedHeader()) {
      // Just executed a parser-blocking script in the body (which is usually
      // very expensive), so expire the budget, yield, and permit paint if
      // needed.
      budget = 0;
      if (!should_run_until_completion) {
        should_yield = true;
        break;
      }
    }
    {
      RUNTIME_CALL_TIMER_SCOPE(
          V8PerIsolateData::MainThreadIsolate(),
          RuntimeCallStats::CounterId::kHTMLTokenizerNextToken);
      if (!tokenizer_->NextToken(input_.Current(), Token()))
        break;
      budget--;
    }
    ConstructTreeFromHTMLToken();
    if (!should_run_until_completion && !IsPaused()) {
      DCHECK_EQ(task_runner_state_->GetMode(), kAllowDeferredParsing);
      should_yield = budget <= 0;
      should_yield |= scheduler_->ShouldYieldForHighPriorityWork();
      should_yield &= task_runner_state_->HaveExitedHeader();
    } else {
      should_yield = false;
    }
    DCHECK(IsStopped() || Token().IsUninitialized());
  }

  if (IsStopped())
    return false;

  // There should only be PendingText left since the tree-builder always flushes
  // the task queue before returning. In case that ever changes, crash.
  tree_builder_->Flush(kFlushAlways);
  CHECK(!IsStopped());

  if (IsPaused()) {
    DCHECK_EQ(tokenizer_->GetState(), HTMLTokenizer::kDataState);

    if (preloader_) {
      if (!preload_scanner_) {
        preload_scanner_ = CreatePreloadScanner(
            TokenPreloadScanner::ScannerType::kMainDocument);
        preload_scanner_->AppendToEnd(input_.Current());
      }
      ScanAndPreload(preload_scanner_.get());
    }
  }

  // should_run_until_completion implies that we should not yield
  CHECK(!should_run_until_completion || !should_yield);
  return should_yield;
}

void HTMLDocumentParser::SchedulePumpTokenizer() {
  TRACE_EVENT0("blink", "HTMLDocumentParser::SchedulePumpTokenizer");
  DCHECK(RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
  DCHECK(!IsStopped());
  DCHECK(!InPumpSession());
  DCHECK(!task_runner_state_->ShouldComplete());
  if (task_runner_state_->IsScheduled()) {
    // If the parser is already scheduled, there's no need to do anything.
    return;
  }
  loading_task_runner_->PostTask(
      FROM_HERE, WTF::Bind(&HTMLDocumentParser::DeferredPumpTokenizerIfPossible,
                           WrapPersistent(this)));
  task_runner_state_->SetState(
      HTMLDocumentParserState::DeferredParserState::kScheduled);
}

void HTMLDocumentParser::ScheduleEndIfDelayed() {
  TRACE_EVENT0("blink", "HTMLDocumentParser::ScheduleEndIfDelayed");
  DCHECK(RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
  DCHECK(!IsStopped());
  DCHECK(!InPumpSession());
  DCHECK(!task_runner_state_->ShouldComplete());

  // Schedule a pump callback if needed.
  if (!task_runner_state_->IsScheduled()) {
    loading_task_runner_->PostTask(
        FROM_HERE,
        WTF::Bind(&HTMLDocumentParser::DeferredPumpTokenizerIfPossible,
                  WrapPersistent(this)));
  }
  // If a pump is already scheduled, it's OK to just upgrade it to one
  // which calls EndIfDelayed afterwards.
  task_runner_state_->SetState(
      HTMLDocumentParserState::DeferredParserState::kScheduledWithEndIfDelayed);
}

void HTMLDocumentParser::ConstructTreeFromHTMLToken() {
  DCHECK(!GetDocument()->IsPrefetchOnly());

  AtomicHTMLToken atomic_token(Token());

  // Check whether we've exited the header.
  if (!task_runner_state_->HaveExitedHeader()) {
    if (GetDocument()->body()) {
      task_runner_state_->SetExitedHeader();
    }
  }

  // We clear the token_ in case ConstructTreeFromAtomicToken
  // synchronously re-enters the parser. We don't clear the token immedately
  // for kCharacter tokens because the AtomicHTMLToken avoids copying the
  // characters by keeping a pointer to the underlying buffer in the
  // HTMLToken. Fortunately, kCharacter tokens can't cause us to re-enter
  // the parser.
  //
  // FIXME: Stop clearing the token_ once we start running the parser off
  // the main thread or once we stop allowing synchronous JavaScript
  // execution from ParseAttribute.
  if (Token().GetType() != HTMLToken::kCharacter)
    Token().Clear();

  tree_builder_->ConstructTree(&atomic_token);
  CheckIfBlockingStylesheetAdded();

  // FIXME: ConstructTree may synchronously cause Document to be detached.
  if (!token_)
    return;

  if (!Token().IsUninitialized()) {
    DCHECK_EQ(Token().GetType(), HTMLToken::kCharacter);
    Token().Clear();
  }
}

void HTMLDocumentParser::ConstructTreeFromCompactHTMLToken(
    const CompactHTMLToken& compact_token) {
  DCHECK(!GetDocument()->IsPrefetchOnly());
  DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
  AtomicHTMLToken token(compact_token);
  tree_builder_->ConstructTree(&token);
  CheckIfBlockingStylesheetAdded();
}

bool HTMLDocumentParser::HasInsertionPoint() {
  // FIXME: The wasCreatedByScript() branch here might not be fully correct. Our
  // model of the EOF character differs slightly from the one in the spec
  // because our treatment is uniform between network-sourced and script-sourced
  // input streams whereas the spec treats them differently.
  return input_.HasInsertionPoint() ||
         (WasCreatedByScript() && !input_.HaveSeenEndOfFile());
}

void HTMLDocumentParser::insert(const String& source) {
  if (IsStopped())
    return;

  TRACE_EVENT2("blink", "HTMLDocumentParser::insert", "source_length",
               source.length(), "parser", (void*)this);

  if (!tokenizer_) {
    DCHECK(!InPumpSession());
    DCHECK(have_background_parser_ || WasCreatedByScript());
    token_ = std::make_unique<HTMLToken>();
    tokenizer_ = std::make_unique<HTMLTokenizer>(options_);
  }

  SegmentedString excluded_line_number_source(source);
  excluded_line_number_source.SetExcludeLineNumbers();
  input_.InsertAtCurrentInsertionPoint(excluded_line_number_source);

  // Pump the the tokenizer to build the document from the given insert point.
  // Should process everything available and not defer anything.
  ShouldCompleteScope should_complete(task_runner_state_);
  EndIfDelayedForbiddenScope should_not_end_if_delayed(task_runner_state_);
  // Call EndIfDelayed manually at the end to maintain preload behaviour.
  PumpTokenizerIfPossible();

  if (IsPaused()) {
    // Check the document.write() output with a separate preload scanner as
    // the main scanner can't deal with insertions.
    if (!insertion_preload_scanner_) {
      insertion_preload_scanner_ =
          CreatePreloadScanner(TokenPreloadScanner::ScannerType::kInsertion);
    }
    insertion_preload_scanner_->AppendToEnd(source);
    if (preloader_) {
      ScanAndPreload(insertion_preload_scanner_.get());
    }
  }
  EndIfDelayed();
}

void HTMLDocumentParser::StartBackgroundParser() {
  TRACE_EVENT0("blink,loading", "HTMLDocumentParser::StartBackgroundParser");
  DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());
  DCHECK(!IsStopped());
  DCHECK(CanParseAsynchronously());
  DCHECK(!have_background_parser_);
  DCHECK(GetDocument());
  have_background_parser_ = true;

  // Make sure that the viewport is up-to-date, so that the correct viewport
  // dimensions will be fed to the background parser and preload scanner.
  if (GetDocument()->Loader())
    GetDocument()->GetStyleEngine().UpdateViewport();

  std::unique_ptr<BackgroundHTMLParser::Configuration> config =
      std::make_unique<BackgroundHTMLParser::Configuration>();
  config->options = options_;
  config->parser = this;
  config->decoder = TakeDecoder();

  // The background parser is created on the main thread, but may otherwise
  // only be used from the parser thread.
  background_parser_ =
      BackgroundHTMLParser::Create(std::move(config), loading_task_runner_);
  // TODO(csharrison): This is a hack to initialize MediaValuesCached on the
  // correct thread. We should get rid of it.

  // TODO(domfarolino): Remove this once Priority Hints is no longer in Origin
  // Trial. This currently exists because the TokenPreloadScanner needs to know
  // the status of the Priority Hints Origin Trial, and has no way of figuring
  // this out on its own. See https://crbug.com/821464.
  bool priority_hints_origin_trial_enabled =
      RuntimeEnabledFeatures::PriorityHintsEnabled(
          GetDocument()->GetExecutionContext());

  background_parser_->Init(
      GetDocument()->Url(),
      std::make_unique<CachedDocumentParameters>(GetDocument()),
      MediaValuesCached::MediaValuesCachedData(*GetDocument()),
      priority_hints_origin_trial_enabled);
}

void HTMLDocumentParser::StopBackgroundParser() {
  DCHECK(CanParseAsynchronously());
  DCHECK(have_background_parser_);
  DCHECK(!RuntimeEnabledFeatures::ForceSynchronousHTMLParsingEnabled());

  have_background_parser_ = false;

  // Make this sync, as lsan triggers on some unittests if the task runner is
  // used.
  background_parser_->Stop();
}

void HTMLDocumentParser::Append(const String& input_source) {
  TRACE_EVENT2("blink", "HTMLDocumentParser::append", "size",
               input_source.length(), "parser", (void*)this);

  if (IsStopped())
    return;

  // We should never reach this point if we're using a parser thread, as
  // appendBytes() will directly ship the data to the thread.
  DCHECK(!CanParseAsynchronously());

  const SegmentedString source(input_source);

  if (!preload_scanner_ && GetDocument()->Url().IsValid() &&
      (!task_runner_state_->IsSynchronous() ||
       GetDocument()->IsPrefetchOnly() || IsPaused())) {
    // If we're operating with synchronous, budgeted foreground HTML parsing
    // or using the background parser, need to create a preload scanner to
    // make sure that parser-blocking Javascript requests are dispatched in
    // plenty of time, which prevents unnecessary delays.
    // When parsing without a budget (e.g. for HTML fragment parsing), it's
    // additional overhead to scan the string unless the parser's already
    // paused whilst executing a script.
    preload_scanner_ =
        CreatePreloadScanner(TokenPreloadScanner::ScannerType::kMainDocument);
  }

  if (GetDocument()->IsPrefetchOnly()) {
    // Do not prefetch if there is an appcache.
    if (GetDocument()->Loader()->GetResponse().AppCacheID() != 0)
      return;

    preload_scanner_->AppendToEnd(source);
    if (preloader_) {
      // TODO(Richard.Townsend@arm.com): add test coverage of this branch.
      // The crash in crbug.com/1166786 indicates that text documents are being
      // speculatively prefetched.
      ScanAndPreload(preload_scanner_.get());
    }

    // Return after the preload scanner, do not actually parse the document.
    return;
  }
  if (preload_scanner_ && preloader_) {
    preload_scanner_->AppendToEnd(source);
    if (task_runner_state_->GetMode() == kAllowDeferredParsing &&
        (IsPaused() || !task_runner_state_->HaveSeenFirstByte())) {
      // Should scan and preload if the parser's paused waiting for a resource,
      // or if we're starting a document for the first time (we want to at least
      // prefetch anything that's in the <head> section).
      ScanAndPreload(preload_scanner_.get());
    }
  }

  input_.AppendToEnd(source);
  task_runner_state_->SetHaveSeenFirstByte();

  if (InPumpSession()) {
    // We've gotten data off the network in a nested write. We don't want to
    // consume any more of the input stream now.  Do not worry.  We'll consume
    // this data in a less-nested write().
    return;
  }

  // Schedule a tokenizer pump to process this new data.
  if (task_runner_state_->GetMode() ==
          ParserSynchronizationPolicy::kAllowDeferredParsing &&
      !task_runner_state_->ShouldComplete()) {
    SchedulePumpTokenizer();
  } else {
    PumpTokenizerIfPossible();
  }
}

void HTMLDocumentParser::end() {
  DCHECK(!IsDetached());
  DCHECK(!IsScheduledForUnpause());

  if (have_background_parser_)
    StopBackgroundParser();

  // Informs the the rest of WebCore that parsing is really finished (and
  // deletes this).
  tree_builder_->Finished();

  // All preloads should be done.
  preloader_ = nullptr;

  DocumentParser::StopParsing();
}

void HTMLDocumentParser::AttemptToRunDeferredScriptsAndEnd() {
  DCHECK(IsStopping());
  // FIXME: It may not be correct to disable this for the background parser.
  // That means hasInsertionPoint() may not be correct in some cases.
  DCHECK(!HasInsertionPoint() || have_background_parser_);
  if (script_runner_ && !script_runner_->ExecuteScriptsWaitingForParsing())
    return;
  end();
}

bool HTMLDocumentParser::ShouldDelayEnd() const {
  return InPumpSession() || IsPaused() || IsExecutingScript() ||
         task_runner_state_->IsScheduled();
}

void HTMLDocumentParser::AttemptToEnd() {
  // finish() indicates we will not receive any more data. If we are waiting on
  // an external script to load, we can't finish parsing quite yet.
  TRACE_EVENT1("blink", "HTMLDocumentParser::AttemptToEnd", "parser",
               (void*)this);
  DCHECK(task_runner_state_->ShouldAttemptToEndOnEOF());
  AttemptToEndForbiddenScope should_not_attempt_to_end(task_runner_state_);
  // We should only be in this state once after calling Finish.
  // If there are pending scripts, future control flow should pass to
  // EndIfDelayed.
  if (ShouldDelayEnd()) {
    end_was_delayed_ = true;
    return;
  }
  PrepareToStopParsing();
}

void HTMLDocumentParser::EndIfDelayed() {
  TRACE_EVENT1("blink", "HTMLDocumentParser::EndIfDelayed", "parser",
               (void*)this);
  ShouldCompleteScope should_complete(task_runner_state_);
  EndIfDelayedForbiddenScope should_not_end_if_delayed(task_runner_state_);
  // If we've already been detached, don't bother ending.
  if (IsDetached())
    return;

  if (!end_was_delayed_ || ShouldDelayEnd())
    return;

  end_was_delayed_ = false;
  PrepareToStopParsing();
}

void HTMLDocumentParser::Finish() {
  // FIXME: We should DCHECK(!parser_stopped_) here, since it does not makes
  // sense to call any methods on DocumentParser once it's been stopped.
  // However, FrameLoader::Stop calls DocumentParser::Finish unconditionally.

  ShouldCompleteScope should_complete(task_runner_state_);
  EndIfDelayedForbiddenScope should_not_end_if_delayed(task_runner_state_);
  Flush();
  if (IsDetached())
    return;

  // Empty documents never got an append() call, and thus have never started a
  // background parser. In those cases, we ignore CanParseAsynchronously() and
  // fall through to the synchronous case.
  if (have_background_parser_) {
    if (!input_.HaveSeenEndOfFile())
      input_.CloseWithoutMarkingEndOfFile();
    loading_task_runner_->PostTask(
        FROM_HERE,
        WTF::Bind(&BackgroundHTMLParser::Finish, background_parser_));
    return;
  }

  if (!tokenizer_) {
    DCHECK(!token_);
    // We're finishing before receiving any data. Rather than booting up the
    // background parser just to spin it down, we finish parsing synchronously.
    token_ = std::make_unique<HTMLToken>();
    tokenizer_ = std::make_unique<HTMLTokenizer>(options_);
  }

  // We're not going to get any more data off the network, so we tell the input
  // stream we've reached the end of file. finish() can be called more than
  // once, if the first time does not call end().
  if (!input_.HaveSeenEndOfFile())
    input_.MarkEndOfFile();

  // If there's any deferred work remaining, signal that we
  // want to end the document once all work's complete.
  task_runner_state_->SetAttemptToEndOnEOF();
  if (task_runner_state_->IsScheduled() && !GetDocument()->IsPrefetchOnly()) {
    return;
  }

  AttemptToEnd();
}

bool HTMLDocumentParser::IsExecutingScript() const {
  if (!script_runner_)
    return false;
  return script_runner_->IsExecutingScript();
}

bool HTMLDocumentParser::IsParsingAtLineNumber() const {
  if (CanParseAsynchronously()) {
    return is_parsing_at_line_number_ &&
           ScriptableDocumentParser::IsParsingAtLineNumber();
  }
  return ScriptableDocumentParser::IsParsingAtLineNumber();
}

OrdinalNumber HTMLDocumentParser::LineNumber() const {
  if (have_background_parser_)
    return text_position_.line_;

  return input_.Current().CurrentLine();
}

TextPosition HTMLDocumentParser::GetTextPosition() const {
  if (have_background_parser_)
    return text_position_;

  const SegmentedString& current_string = input_.Current();
  OrdinalNumber line = current_string.CurrentLine();
  OrdinalNumber column = current_string.CurrentColumn();

  return TextPosition(line, column);
}

bool HTMLDocumentParser::IsWaitingForScripts() const {
  // When the TreeBuilder encounters a </script> tag, it returns to the
  // HTMLDocumentParser where the script is transfered from the treebuilder to
  // the script runner. The script runner will hold the script until its loaded
  // and run. During any of this time, we want to count ourselves as "waiting
  // for a script" and thus run the preload scanner, as well as delay completion
  // of parsing.
  bool tree_builder_has_blocking_script =
      tree_builder_->HasParserBlockingScript();
  bool script_runner_has_blocking_script =
      script_runner_ && script_runner_->HasParserBlockingScript();
  // Since the parser is paused while a script runner has a blocking script, it
  // should never be possible to end up with both objects holding a blocking
  // script.
  DCHECK(
      !(tree_builder_has_blocking_script && script_runner_has_blocking_script));
  // If either object has a blocking script, the parser should be paused.
  return tree_builder_has_blocking_script ||
         script_runner_has_blocking_script ||
         reentry_permit_->ParserPauseFlag();
}

void HTMLDocumentParser::ResumeParsingAfterPause() {
  // This function runs after a parser-blocking script has completed. There are
  // four possible cases:
  // 1) Parsing with kForceSynchronousParsing, where there is no background
  //    parser and a tokenizer_'s defined.
  // 2) Parsing with kAllowAsynchronousParsing, without a background parser. In
  //    this case, the document is usually being completed or parsing has
  //    otherwise stopped.
  // 3) Parsing with kAllowAsynchronousParsing with a background parser. In this
  //    case, need to add any pending speculations to the document.
  // 4) Parsing with kAllowDeferredParsing, with a tokenizer_.
  TRACE_EVENT1("blink", "HTMLDocumentParser::ResumeParsingAfterPause", "parser",
               (void*)this);
  DCHECK(!IsExecutingScript());
  DCHECK(!IsPaused());

  CheckIfBlockingStylesheetAdded();
  if (IsStopped() || IsPaused())
    return;

  if (have_background_parser_) {  // Case 3)
    // If we paused in the middle of processing a token chunk,
    // deal with that before starting to pump.
    if (last_chunk_before_pause_) {
      ValidateSpeculations(std::move(last_chunk_before_pause_));
      DCHECK(!last_chunk_before_pause_);
      PumpPendingSpeculations();
    } else if (!IsScheduledForUnpause()) {
      // Otherwise, start pumping if we're not already scheduled to unpause
      // already.
      PumpPendingSpeculations();
    }
    return;
  }

  insertion_preload_scanner_.reset();
  if (tokenizer_) {
    // Case 1) or 4): kForceSynchronousParsing, kAllowDeferredParsing.
    // kForceSynchronousParsing must pump the tokenizer synchronously,
    // otherwise it can be deferred.
    if (task_runner_state_->GetMode() == kAllowDeferredParsing &&
        !task_runner_state_->ShouldComplete() && !InPumpSession()) {
      SchedulePumpTokenizer();
    } else {
      ShouldCompleteScope should_complete(task_runner_state_);
      PumpTokenizerIfPossible();
    }
  } else {
    // Case 2): kAllowAsynchronousParsing, no background parser available
    // (indicating possible Document shutdown).
    EndIfDelayed();
  }
}

void HTMLDocumentParser::AppendCurrentInputStreamToPreloadScannerAndScan() {
  TRACE_EVENT1(
      "blink",
      "HTMLDocumentParser::AppendCurrentInputStreamToPreloadScannerAndScan",
      "parser", (void*)this);
  DCHECK(preload_scanner_);
  DCHECK(preloader_);
  preload_scanner_->AppendToEnd(input_.Current());
  ScanAndPreload(preload_scanner_.get());
}

void HTMLDocumentParser::NotifyScriptLoaded() {
  TRACE_EVENT1("blink", "HTMLDocumentParser::NotifyScriptLoaded", "parser",
               (void*)this);
  DCHECK(script_runner_);
  DCHECK(!IsExecutingScript());

  scheduler::CooperativeSchedulingManager::AllowedStackScope
      allowed_stack_scope(scheduler::CooperativeSchedulingManager::Instance());

  if (IsStopped()) {
    return;
  }

  if (IsStopping()) {
    AttemptToRunDeferredScriptsAndEnd();
    return;
  }

  script_runner_->ExecuteScriptsWaitingForLoad();
  if (!IsPaused())
    ResumeParsingAfterPause();
}

void HTMLDocumentParser::ExecuteScriptsWaitingForResources() {
  TRACE_EVENT0("blink",
               "HTMLDocumentParser::ExecuteScriptsWaitingForResources");
  if (IsStopped())
    return;

  DCHECK(GetDocument()->IsScriptExecutionReady());

  if (is_waiting_for_stylesheets_)
    is_waiting_for_stylesheets_ = false;

  // Document only calls this when the Document owns the DocumentParser so this
  // will not be called in the DocumentFragment case.
  DCHECK(script_runner_);
  script_runner_->ExecuteScriptsWaitingForResources();
  if (!IsPaused())
    ResumeParsingAfterPause();
}

void HTMLDocumentParser::DidAddPendingParserBlockingStylesheet() {
  // In-body CSS doesn't block painting. The parser needs to pause so that
  // the DOM doesn't include any elements that may depend on the CSS for style.
  // The stylesheet can be added and removed during the parsing of a single
  // token so don't actually set the bit to block parsing here, just track
  // the state of the added sheet in case it does persist beyond a single
  // token.
  added_pending_parser_blocking_stylesheet_ = true;
}

void HTMLDocumentParser::DidLoadAllPendingParserBlockingStylesheets() {
  // Just toggle the stylesheet flag here (mostly for synchronous sheets).
  // The document will also call into executeScriptsWaitingForResources
  // which is when the parser will re-start, otherwise it will attempt to
  // resume twice which could cause state machine issues.
  added_pending_parser_blocking_stylesheet_ = false;
}

void HTMLDocumentParser::CheckIfBlockingStylesheetAdded() {
  if (added_pending_parser_blocking_stylesheet_) {
    added_pending_parser_blocking_stylesheet_ = false;
    is_waiting_for_stylesheets_ = true;
  }
}

void HTMLDocumentParser::ParseDocumentFragment(
    const String& source,
    DocumentFragment* fragment,
    Element* context_element,
    ParserContentPolicy parser_content_policy) {
  auto* parser = MakeGarbageCollected<HTMLDocumentParser>(
      fragment, context_element, parser_content_policy);
  parser->Append(source);
  parser->Finish();
  // Allows ~DocumentParser to assert it was detached before destruction.
  parser->Detach();
}

void HTMLDocumentParser::AppendBytes(const char* data, size_t length) {
  TRACE_EVENT2("blink", "HTMLDocumentParser::appendBytes", "size",
               (unsigned)length, "parser", (void*)this);

  DCHECK(Thread::MainThread()->IsCurrentThread());

  if (!length || IsStopped())
    return;

  if (CanParseAsynchronously()) {
    if (!have_background_parser_)
      StartBackgroundParser();

    std::unique_ptr<Vector<char>> buffer =
        std::make_unique<Vector<char>>(length);
    memcpy(buffer->data(), data, length);

    loading_task_runner_->PostTask(
        FROM_HERE,
        WTF::Bind(&BackgroundHTMLParser::AppendRawBytesFromMainThread,
                  background_parser_, std::move(buffer)));
    return;
  }

  DecodedDataDocumentParser::AppendBytes(data, length);
}

void HTMLDocumentParser::Flush() {
  TRACE_EVENT1("blink", "HTMLDocumentParser::Flush", "parser", (void*)this);
  // If we've got no decoder, we never received any data.
  if (IsDetached() || NeedsDecoder())
    return;

  if (CanParseAsynchronously()) {
    // In some cases, flush() is called without any invocation of appendBytes.
    // Fallback to synchronous parsing in that case.
    if (!have_background_parser_) {
      can_parse_asynchronously_ = false;
      token_ = std::make_unique<HTMLToken>();
      tokenizer_ = std::make_unique<HTMLTokenizer>(options_);
      DecodedDataDocumentParser::Flush();
      return;
    }

    loading_task_runner_->PostTask(
        FROM_HERE, WTF::Bind(&BackgroundHTMLParser::Flush, background_parser_));
  } else {
    DecodedDataDocumentParser::Flush();
  }
}

void HTMLDocumentParser::SetDecoder(
    std::unique_ptr<TextResourceDecoder> decoder) {
  DCHECK(decoder);
  DecodedDataDocumentParser::SetDecoder(std::move(decoder));

  if (have_background_parser_) {
    loading_task_runner_->PostTask(
        FROM_HERE, WTF::Bind(&BackgroundHTMLParser::SetDecoder,
                             background_parser_, TakeDecoder()));
  }
}

void HTMLDocumentParser::DocumentElementAvailable() {
  TRACE_EVENT0("blink,loading", "HTMLDocumentParser::DocumentElementAvailable");
  Document* document = GetDocument();
  DCHECK(document);
  DCHECK(document->documentElement());
  Element* documentElement = GetDocument()->documentElement();
  if (documentElement->hasAttribute(u"\u26A1") ||
      documentElement->hasAttribute("amp") ||
      documentElement->hasAttribute("i-amphtml-layout")) {
    // The DocumentLoader fetches a main resource and handles the result.
    // But it may not be available if JavaScript appends HTML to the page later
    // in the page's lifetime. This can happen both from in-page JavaScript and
    // from extensions. See example callstacks linked from crbug.com/931330.
    if (document->Loader()) {
      document->Loader()->DidObserveLoadingBehavior(
          kLoadingBehaviorAmpDocumentLoaded);
    }
  }
  if (preloader_)
    FetchQueuedPreloads();
}

std::unique_ptr<HTMLPreloadScanner> HTMLDocumentParser::CreatePreloadScanner(
    TokenPreloadScanner::ScannerType scanner_type) {
  return std::make_unique<HTMLPreloadScanner>(
      options_, GetDocument()->Url(),
      std::make_unique<CachedDocumentParameters>(GetDocument()),
      MediaValuesCached::MediaValuesCachedData(*GetDocument()), scanner_type);
}

void HTMLDocumentParser::ScanAndPreload(HTMLPreloadScanner* scanner) {
  TRACE_EVENT0("blink", "HTMLDocumentParser::ScanAndPreload");
  DCHECK(preloader_);
  bool seen_csp_meta_tag = false;
  base::Optional<ViewportDescription> viewport_description;
  PreloadRequestStream requests =
      scanner->Scan(GetDocument()->ValidBaseElementURL(), &viewport_description,
                    seen_csp_meta_tag);
  // Make sure that the viewport is up-to-date, so that the correct viewport
  // dimensions will be fed to the background parser and preload scanner.
  if (GetDocument()->Loader() &&
      task_runner_state_->GetMode() == kAllowDeferredParsing) {
    if (viewport_description.has_value()) {
      GetDocument()->GetStyleEngine().UpdateViewport();
    }
    if (task_runner_state_->NeedsLinkHeaderPreloadsDispatch()) {
      if (GetDocument()->Loader()->GetPrefetchedSignedExchangeManager()) {
        TRACE_EVENT0("blink",
                     "HTMLDocumentParser::DispatchSignedExchangeManager");
        // Link header preloads for prefetched signed exchanges won't be started
        // until StartPrefetchedLinkHeaderPreloads() is called. See the header
        // comment of PrefetchedSignedExchangeManager.
        GetDocument()
            ->Loader()
            ->GetPrefetchedSignedExchangeManager()
            ->StartPrefetchedLinkHeaderPreloads();
      } else {
        TRACE_EVENT0("blink", "HTMLDocumentParser::DispatchLinkHeaderPreloads");
        GetDocument()->Loader()->DispatchLinkHeaderPreloads(
            base::OptionalOrNullptr(viewport_description),
            PreloadHelper::kOnlyLoadMedia);
      }
      task_runner_state_->DispatchedLinkHeaderPreloads();
    }
  }

  task_runner_state_->SetSeenCSPMetaTag(seen_csp_meta_tag);
  for (auto& request : requests) {
    queued_preloads_.push_back(std::move(request));
  }
  FetchQueuedPreloads();
}

void HTMLDocumentParser::FetchQueuedPreloads() {
  DCHECK(preloader_);
  TRACE_EVENT0("blink", "HTMLDocumentParser::FetchQueuedPreloads");

  if (CanParseAsynchronously()) {
    if (pending_csp_meta_token_ || !GetDocument()->documentElement())
      return;
  }

  if (!queued_preloads_.IsEmpty())
    preloader_->TakeAndPreload(queued_preloads_);
}

}  // namespace blink