/* * Copyright (C) 2010 Google, Inc. All Rights Reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef HTMLDocumentParser_h #define HTMLDocumentParser_h #include "core/dom/ParserContentPolicy.h" #include "core/dom/ScriptableDocumentParser.h" #include "core/fetch/ResourceClient.h" #include "core/html/parser/BackgroundHTMLInputStream.h" #include "core/html/parser/CompactHTMLToken.h" #include "core/html/parser/HTMLInputStream.h" #include "core/html/parser/HTMLParserOptions.h" #include "core/html/parser/HTMLPreloadScanner.h" #include "core/html/parser/HTMLScriptRunnerHost.h" #include "core/html/parser/HTMLSourceTracker.h" #include "core/html/parser/HTMLToken.h" #include "core/html/parser/HTMLTokenizer.h" #include "core/html/parser/HTMLTreeBuilderSimulator.h" #include "core/html/parser/XSSAuditor.h" #include "core/html/parser/XSSAuditorDelegate.h" #include "core/platform/text/SegmentedString.h" #include "wtf/Deque.h" #include "wtf/OwnPtr.h" #include "wtf/WeakPtr.h" #include "wtf/text/TextPosition.h" namespace WebCore { class BackgroundHTMLParser; class CompactHTMLToken; class Document; class DocumentFragment; class HTMLDocument; class HTMLParserScheduler; class HTMLScriptRunner; class HTMLTreeBuilder; class HTMLResourcePreloader; class ScriptController; class ScriptSourceCode; class PumpSession; class HTMLDocumentParser : public ScriptableDocumentParser, HTMLScriptRunnerHost, ResourceClient { WTF_MAKE_FAST_ALLOCATED; public: static PassRefPtr create(Document* document, bool reportErrors) { return adoptRef(new HTMLDocumentParser(document, reportErrors)); } virtual ~HTMLDocumentParser(); // Exposed for HTMLParserScheduler void resumeParsingAfterYield(); static void parseDocumentFragment(const String&, DocumentFragment*, Element* contextElement, ParserContentPolicy = AllowScriptingContent); HTMLTokenizer* tokenizer() const { return m_tokenizer.get(); } virtual TextPosition textPosition() const; virtual OrdinalNumber lineNumber() const; virtual void suspendScheduledTasks(); virtual void resumeScheduledTasks(); struct ParsedChunk { OwnPtr tokens; PreloadRequestStream preloads; XSSInfoStream xssInfos; HTMLTokenizer::State tokenizerState; HTMLTreeBuilderSimulator::State treeBuilderState; HTMLInputCheckpoint inputCheckpoint; TokenPreloadScannerCheckpoint preloadScannerCheckpoint; }; void didReceiveParsedChunkFromBackgroundParser(PassOwnPtr); protected: virtual void insert(const SegmentedString&) OVERRIDE; virtual void append(PassRefPtr) OVERRIDE; virtual void finish() OVERRIDE; HTMLDocumentParser(Document*, bool reportErrors); HTMLDocumentParser(DocumentFragment*, Element* contextElement, ParserContentPolicy); HTMLTreeBuilder* treeBuilder() const { return m_treeBuilder.get(); } void forcePlaintextForTextDocument(); private: static PassRefPtr create(DocumentFragment* fragment, Element* contextElement, ParserContentPolicy parserContentPolicy) { return adoptRef(new HTMLDocumentParser(fragment, contextElement, parserContentPolicy)); } // DocumentParser virtual void pinToMainThread() OVERRIDE; virtual void detach() OVERRIDE; virtual bool hasInsertionPoint() OVERRIDE; virtual bool processingData() const OVERRIDE; virtual void prepareToStopParsing() OVERRIDE; virtual void stopParsing() OVERRIDE; virtual bool isWaitingForScripts() const OVERRIDE; virtual bool isExecutingScript() const OVERRIDE; virtual void executeScriptsWaitingForResources() OVERRIDE; // HTMLScriptRunnerHost virtual void watchForLoad(Resource*) OVERRIDE; virtual void stopWatchingForLoad(Resource*) OVERRIDE; virtual HTMLInputStream& inputStream() { return m_input; } virtual bool hasPreloadScanner() const { return m_preloadScanner.get() && !shouldUseThreading(); } virtual void appendCurrentInputStreamToPreloadScannerAndScan() OVERRIDE; // ResourceClient virtual void notifyFinished(Resource*); void startBackgroundParser(); void stopBackgroundParser(); void validateSpeculations(PassOwnPtr lastChunk); void discardSpeculationsAndResumeFrom(PassOwnPtr lastChunk, PassOwnPtr, PassOwnPtr); void processParsedChunkFromBackgroundParser(PassOwnPtr); void pumpPendingSpeculations(); Document* contextForParsingSession(); enum SynchronousMode { AllowYield, ForceSynchronous, }; bool canTakeNextToken(SynchronousMode, PumpSession&); void pumpTokenizer(SynchronousMode); void pumpTokenizerIfPossible(SynchronousMode); void constructTreeFromHTMLToken(HTMLToken&); void constructTreeFromCompactHTMLToken(const CompactHTMLToken&); void runScriptsForPausedTreeBuilder(); void resumeParsingAfterScriptExecution(); void attemptToEnd(); void endIfDelayed(); void attemptToRunDeferredScriptsAndEnd(); void end(); bool shouldUseThreading() const { return m_options.useThreading && !m_isPinnedToMainThread; } bool isParsingFragment() const; bool isScheduledForResume() const; bool inPumpSession() const { return m_pumpSessionNestingLevel > 0; } bool shouldDelayEnd() const { return inPumpSession() || isWaitingForScripts() || isScheduledForResume() || isExecutingScript(); } HTMLToken& token() { return *m_token; } HTMLParserOptions m_options; HTMLInputStream m_input; OwnPtr m_token; OwnPtr m_tokenizer; OwnPtr m_scriptRunner; OwnPtr m_treeBuilder; OwnPtr m_preloadScanner; OwnPtr m_insertionPreloadScanner; OwnPtr m_parserScheduler; HTMLSourceTracker m_sourceTracker; TextPosition m_textPosition; XSSAuditor m_xssAuditor; XSSAuditorDelegate m_xssAuditorDelegate; // FIXME: m_lastChunkBeforeScript, m_tokenizer, m_token, and m_input should be combined into a single state object // so they can be set and cleared together and passed between threads together. OwnPtr m_lastChunkBeforeScript; Deque > m_speculations; WeakPtrFactory m_weakFactory; WeakPtr m_backgroundParser; OwnPtr m_preloader; bool m_isPinnedToMainThread; bool m_endWasDelayed; bool m_haveBackgroundParser; unsigned m_pumpSessionNestingLevel; }; } #endif