diff options
Diffstat (limited to 'Source/WebCore/editing/TextIterator.cpp')
-rw-r--r-- | Source/WebCore/editing/TextIterator.cpp | 1702 |
1 files changed, 896 insertions, 806 deletions
diff --git a/Source/WebCore/editing/TextIterator.cpp b/Source/WebCore/editing/TextIterator.cpp index 8baee8525..3cedb1c59 100644 --- a/Source/WebCore/editing/TextIterator.cpp +++ b/Source/WebCore/editing/TextIterator.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2012 Apple Inc. All rights reserved. + * Copyright (C) 2004-2014 Apple Inc. All rights reserved. * Copyright (C) 2005 Alexey Proskuryakov. * * Redistribution and use in source and binary forms, with or without @@ -11,10 +11,10 @@ * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -28,11 +28,19 @@ #include "TextIterator.h" #include "Document.h" -#include "ExceptionCodePlaceholder.h" -#include "Font.h" +#include "FontCascade.h" #include "Frame.h" +#include "HTMLBodyElement.h" #include "HTMLElement.h" +#include "HTMLFrameOwnerElement.h" +#include "HTMLInputElement.h" +#include "HTMLLegendElement.h" +#include "HTMLMeterElement.h" #include "HTMLNames.h" +#include "HTMLParagraphElement.h" +#include "HTMLProgressElement.h" +#include "HTMLSlotElement.h" +#include "HTMLTextAreaElement.h" #include "HTMLTextFormControlElement.h" #include "InlineTextBox.h" #include "NodeTraversal.h" @@ -43,18 +51,21 @@ #include "RenderTextControl.h" #include "RenderTextFragment.h" #include "ShadowRoot.h" +#include "SimpleLineLayout.h" +#include "SimpleLineLayoutResolver.h" #include "TextBoundaries.h" -#include "TextBreakIterator.h" +#include <wtf/text/TextBreakIterator.h> #include "TextControlInnerElements.h" #include "VisiblePosition.h" #include "VisibleUnits.h" #include "htmlediting.h" #include <wtf/text/CString.h> +#include <wtf/text/StringBuilder.h> #include <wtf/unicode/CharacterNames.h> #if !UCONFIG_NO_COLLATION -#include "TextBreakIteratorInternalICU.h" #include <unicode/usearch.h> +#include <wtf/text/TextBreakIteratorInternalICU.h> #endif using namespace WTF::Unicode; @@ -76,9 +87,9 @@ public: ~SearchBuffer(); // Returns number of characters appended; guaranteed to be in the range [1, length]. - size_t append(const UChar*, size_t length); + size_t append(StringView); bool needsMoreContext() const; - void prependContext(const UChar*, size_t length); + void prependContext(StringView); void reachedBreak(); // Result is the size in characters of what was found. @@ -91,8 +102,10 @@ public: private: bool isBadMatch(const UChar*, size_t length) const; bool isWordStartMatch(size_t start, size_t length) const; + bool isWordEndMatch(size_t start, size_t length) const; - String m_target; + const String m_target; + const StringView::UpconvertedCharacters m_targetCharacters; FindOptions m_options; Vector<UChar> m_buffer; @@ -101,7 +114,7 @@ private: bool m_atBreak; bool m_needsMoreContext; - bool m_targetRequiresKanaWorkaround; + const bool m_targetRequiresKanaWorkaround; Vector<UChar> m_normalizedTarget; mutable Vector<UChar> m_normalizedMatch; @@ -174,186 +187,193 @@ unsigned BitStack::size() const // -------- -#if !ASSERT_DISABLED - -static unsigned depthCrossingShadowBoundaries(Node* node) -{ - unsigned depth = 0; - for (Node* parent = node->parentOrShadowHostNode(); parent; parent = parent->parentOrShadowHostNode()) - ++depth; - return depth; -} - -#endif - // This function is like Range::pastLastNode, except for the fact that it can climb up out of shadow trees. -static Node* nextInPreOrderCrossingShadowBoundaries(Node* rangeEndContainer, int rangeEndOffset) +static Node* nextInPreOrderCrossingShadowBoundaries(Node& rangeEndContainer, int rangeEndOffset) { - if (!rangeEndContainer) - return 0; - if (rangeEndOffset >= 0 && !rangeEndContainer->offsetInCharacters()) { - if (Node* next = rangeEndContainer->childNode(rangeEndOffset)) + if (rangeEndOffset >= 0 && !rangeEndContainer.offsetInCharacters()) { + if (Node* next = rangeEndContainer.traverseToChildAt(rangeEndOffset)) return next; } - for (Node* node = rangeEndContainer; node; node = node->parentOrShadowHostNode()) { + for (Node* node = &rangeEndContainer; node; node = node->parentOrShadowHostNode()) { if (Node* next = node->nextSibling()) return next; } - return 0; + return nullptr; } -// -------- - -static inline bool fullyClipsContents(Node* node) +static inline bool fullyClipsContents(Node& node) { - RenderObject* renderer = node->renderer(); - if (!renderer || !renderer->isBox() || !renderer->hasOverflowClip()) + auto* renderer = node.renderer(); + if (!renderer) { + if (!is<Element>(node)) + return false; + return !downcast<Element>(node).hasDisplayContents(); + } + if (!is<RenderBox>(*renderer)) + return false; + auto& box = downcast<RenderBox>(*renderer); + if (!box.hasOverflowClip()) return false; - return toRenderBox(renderer)->size().isEmpty(); + + // Quirk to keep copy/paste in the CodeMirror editor version used in Jenkins working. + if (is<HTMLTextAreaElement>(node)) + return box.size().isEmpty(); + + return box.contentSize().isEmpty(); } -static inline bool ignoresContainerClip(Node* node) +static inline bool ignoresContainerClip(Node& node) { - RenderObject* renderer = node->renderer(); + auto* renderer = node.renderer(); if (!renderer || renderer->isTextOrLineBreak()) return false; return renderer->style().hasOutOfFlowPosition(); } -static void pushFullyClippedState(BitStack& stack, Node* node) +static void pushFullyClippedState(BitStack& stack, Node& node) { - ASSERT(stack.size() == depthCrossingShadowBoundaries(node)); - // Push true if this node full clips its contents, or if a parent already has fully // clipped and this is not a node that ignores its container's clip. stack.push(fullyClipsContents(node) || (stack.top() && !ignoresContainerClip(node))); } -static void setUpFullyClippedStack(BitStack& stack, Node* node) +static void setUpFullyClippedStack(BitStack& stack, Node& node) { // Put the nodes in a vector so we can iterate in reverse order. + // FIXME: This (and TextIterator in general) should use ComposedTreeIterator. Vector<Node*, 100> ancestry; - for (Node* parent = node->parentOrShadowHostNode(); parent; parent = parent->parentOrShadowHostNode()) + for (Node* parent = node.parentOrShadowHostNode(); parent; parent = parent->parentOrShadowHostNode()) ancestry.append(parent); // Call pushFullyClippedState on each node starting with the earliest ancestor. size_t size = ancestry.size(); for (size_t i = 0; i < size; ++i) - pushFullyClippedState(stack, ancestry[size - i - 1]); + pushFullyClippedState(stack, *ancestry[size - i - 1]); pushFullyClippedState(stack, node); +} - ASSERT(stack.size() == 1 + depthCrossingShadowBoundaries(node)); +static bool isClippedByFrameAncestor(const Document& document, TextIteratorBehavior behavior) +{ + if (!(behavior & TextIteratorClipsToFrameAncestors)) + return false; + + for (auto* owner = document.ownerElement(); owner; owner = owner->document().ownerElement()) { + BitStack ownerClipStack; + setUpFullyClippedStack(ownerClipStack, *owner); + if (ownerClipStack.top()) + return true; + } + return false; } - + +// FIXME: editingIgnoresContent and isRendererReplacedElement try to do the same job. +// It's not good to have both of them. bool isRendererReplacedElement(RenderObject* renderer) { if (!renderer) return false; -#if ENABLE(PLUGIN_PROXY_FOR_VIDEO) - if (renderer->isImage() || renderer->isMedia()) - return true; - if (renderer->isWidget()) { - if (renderer->node() && renderer->node()->isElementNode()) { - Element* element = toElement(renderer->node()); - if (element->hasTagName(videoTag) || !element->hasTagName(audioTag)) - return false; // See <rdar://problem/6893793>. - } - return true; - } -#else - if (renderer->isImage() || renderer->isWidget() || renderer->isMedia()) - return true; + bool isAttachment = false; +#if ENABLE(ATTACHMENT_ELEMENT) + isAttachment = renderer->isAttachment(); #endif - - if (renderer->node() && renderer->node()->isElementNode()) { - Element* element = toElement(renderer->node()); - if (element->isFormControlElement() || element->hasTagName(legendTag) - || element->hasTagName(meterTag) || element->hasTagName(progressTag)) + if (renderer->isImage() || renderer->isWidget() || renderer->isMedia() || isAttachment) + return true; + + if (is<Element>(renderer->node())) { + Element& element = downcast<Element>(*renderer->node()); + if (is<HTMLFormControlElement>(element) || is<HTMLLegendElement>(element) || is<HTMLProgressElement>(element) || element.hasTagName(meterTag)) return true; - - if (equalIgnoringCase(element->getAttribute(roleAttr), "img")) + if (equalLettersIgnoringASCIICase(element.attributeWithoutSynchronization(roleAttr), "img")) return true; } - + return false; } // -------- -TextIterator::TextIterator(const Range* r, TextIteratorBehavior behavior) - : m_startContainer(0) - , m_startOffset(0) - , m_endContainer(0) - , m_endOffset(0) - , m_positionNode(0) - , m_textCharacters(0) - , m_textLength(0) - , m_remainingTextBox(0) - , m_firstLetterText(0) - , m_sortedTextBoxesPosition(0) - , m_emitsCharactersBetweenAllVisiblePositions(behavior & TextIteratorEmitsCharactersBetweenAllVisiblePositions) - , m_entersTextControls(behavior & TextIteratorEntersTextControls) - , m_emitsTextWithoutTranscoding(behavior & TextIteratorEmitsTextsWithoutTranscoding) - , m_emitsOriginalText(behavior & TextIteratorEmitsOriginalText) - , m_handledFirstLetter(false) - , m_ignoresStyleVisibility(behavior & TextIteratorIgnoresStyleVisibility) - , m_emitsObjectReplacementCharacters(behavior & TextIteratorEmitsObjectReplacementCharacters) - , m_stopsOnFormControls(behavior & TextIteratorStopsOnFormControls) - , m_shouldStop(false) - , m_emitsImageAltText(behavior & TextIteratorEmitsImageAltText) -{ - if (!r) - return; +inline void TextIteratorCopyableText::reset() +{ + m_singleCharacter = 0; + m_string = String(); + m_offset = 0; + m_length = 0; +} + +inline void TextIteratorCopyableText::set(String&& string) +{ + m_singleCharacter = 0; + m_string = WTFMove(string); + m_offset = 0; + m_length = m_string.length(); +} + +inline void TextIteratorCopyableText::set(String&& string, unsigned offset, unsigned length) +{ + ASSERT(offset < string.length()); + ASSERT(length); + ASSERT(length <= string.length() - offset); + + m_singleCharacter = 0; + m_string = WTFMove(string); + m_offset = offset; + m_length = length; +} + +inline void TextIteratorCopyableText::set(UChar singleCharacter) +{ + m_singleCharacter = singleCharacter; + m_string = String(); + m_offset = 0; + m_length = 0; +} - r->ownerDocument().updateLayoutIgnorePendingStylesheets(); +void TextIteratorCopyableText::appendToStringBuilder(StringBuilder& builder) const +{ + if (m_singleCharacter) + builder.append(m_singleCharacter); + else + builder.append(m_string, m_offset, m_length); +} + +// -------- - // get and validate the range endpoints - Node* startContainer = r->startContainer(); - if (!startContainer) +TextIterator::TextIterator(const Range* range, TextIteratorBehavior behavior) + : m_behavior(behavior) +{ + // FIXME: Only m_positionNode above needs to be initialized if range is null. + if (!range) return; - int startOffset = r->startOffset(); - Node* endContainer = r->endContainer(); - int endOffset = r->endOffset(); + + range->ownerDocument().updateLayoutIgnorePendingStylesheets(); + + m_startContainer = &range->startContainer(); // Callers should be handing us well-formed ranges. If we discover that this isn't // the case, we could consider changing this assertion to an early return. - ASSERT(r->boundaryPointsValid()); + ASSERT(range->boundaryPointsValid()); - // remember range - this does not change - m_startContainer = startContainer; - m_startOffset = startOffset; - m_endContainer = endContainer; - m_endOffset = endOffset; + m_startOffset = range->startOffset(); + m_endContainer = &range->endContainer(); + m_endOffset = range->endOffset(); - // set up the current node for processing - m_node = r->firstNode(); + // Set up the current node for processing. + m_node = range->firstNode(); if (!m_node) return; - setUpFullyClippedStack(m_fullyClippedStack, m_node); - m_offset = m_node == m_startContainer ? m_startOffset : 0; - m_handledNode = false; - m_handledChildren = false; - // calculate first out of bounds node - m_pastEndNode = nextInPreOrderCrossingShadowBoundaries(endContainer, endOffset); + if (isClippedByFrameAncestor(m_node->document(), m_behavior)) + return; - // initialize node processing state - m_needsAnotherNewline = false; - m_textBox = 0; + setUpFullyClippedStack(m_fullyClippedStack, *m_node); - // initialize record of previous node processing - m_hasEmitted = false; - m_lastTextNode = 0; - m_lastTextNodeEndedWithCollapsedSpace = false; - m_lastCharacter = 0; + m_offset = m_node == m_startContainer ? m_startOffset : 0; + + m_pastEndNode = nextInPreOrderCrossingShadowBoundaries(*m_endContainer, m_endOffset); -#ifndef NDEBUG - // need this just because of the assert in advance() m_positionNode = m_node; -#endif - // identify the first run advance(); } @@ -361,33 +381,95 @@ TextIterator::~TextIterator() { } +static HTMLSlotElement* assignedAuthorSlot(Node& node) +{ + auto* slot = node.assignedSlot(); + if (!slot || slot->containingShadowRoot()->mode() == ShadowRootMode::UserAgent) + return nullptr; + return slot; +} + +static ShadowRoot* authorShadowRoot(Node& node) +{ + auto* shadowRoot = node.shadowRoot(); + if (!shadowRoot || shadowRoot->mode() == ShadowRootMode::UserAgent) + return nullptr; + return shadowRoot; +} + +// FIXME: Use ComposedTreeIterator instead. These functions are more expensive because they might do O(n) work. +static inline Node* firstChildInFlatTreeIgnoringUserAgentShadow(Node& node) +{ + if (auto* shadowRoot = authorShadowRoot(node)) + return shadowRoot->firstChild(); + if (is<HTMLSlotElement>(node)) { + if (auto* assignedNodes = downcast<HTMLSlotElement>(node).assignedNodes()) + return assignedNodes->at(0); + } + return node.firstChild(); +} + +static inline Node* nextSiblingInFlatTreeIgnoringUserAgentShadow(Node& node) +{ + if (auto* slot = assignedAuthorSlot(node)) { + auto* assignedNodes = slot->assignedNodes(); + ASSERT(assignedNodes); + auto nodeIndex = assignedNodes->find(&node); + ASSERT(nodeIndex != notFound); + if (assignedNodes->size() > nodeIndex + 1) + return assignedNodes->at(nodeIndex + 1); + return nullptr; + } + return node.nextSibling(); +} + +static inline Node* firstChild(TextIteratorBehavior options, Node& node) +{ + if (UNLIKELY(options & TextIteratorTraversesFlatTree)) + return firstChildInFlatTreeIgnoringUserAgentShadow(node); + return node.firstChild(); +} + +static inline Node* nextSibling(TextIteratorBehavior options, Node& node) +{ + if (UNLIKELY(options & TextIteratorTraversesFlatTree)) + return nextSiblingInFlatTreeIgnoringUserAgentShadow(node); + return node.nextSibling(); +} + +static inline Node* parentNodeOrShadowHost(TextIteratorBehavior options, Node& node) +{ + if (UNLIKELY(options & TextIteratorTraversesFlatTree)) + return node.parentInComposedTree(); + return node.parentOrShadowHostNode(); +} + void TextIterator::advance() { - if (m_shouldStop) - return; + ASSERT(!atEnd()); // reset the run information - m_positionNode = 0; - m_textLength = 0; + m_positionNode = nullptr; + m_copyableText.reset(); + m_text = StringView(); // handle remembered node that needed a newline after the text node's newline - if (m_needsAnotherNewline) { + if (m_nodeForAdditionalNewline) { // Emit the extra newline, and position it *inside* m_node, after m_node's // contents, in case it's a block, in the same way that we position the first - // newline. The range for the emitted newline should start where the line + // newline. The range for the emitted newline should start where the line // break begins. // FIXME: It would be cleaner if we emitted two newlines during the last // iteration, instead of using m_needsAnotherNewline. - Node* baseNode = m_node->lastChild() ? m_node->lastChild() : m_node; - emitCharacter('\n', baseNode->parentNode(), baseNode, 1, 1); - m_needsAnotherNewline = false; + emitCharacter('\n', *m_nodeForAdditionalNewline->parentNode(), m_nodeForAdditionalNewline, 1, 1); + m_nodeForAdditionalNewline = nullptr; return; } if (!m_textBox && m_remainingTextBox) { m_textBox = m_remainingTextBox; - m_remainingTextBox = 0; - m_firstLetterText = 0; + m_remainingTextBox = nullptr; + m_firstLetterText = nullptr; m_offset = 0; } // handle remembered text box @@ -398,27 +480,24 @@ void TextIterator::advance() } while (m_node && m_node != m_pastEndNode) { - if (!m_shouldStop && m_stopsOnFormControls && HTMLFormControlElement::enclosingFormControlElement(m_node)) - m_shouldStop = true; - // if the range ends at offset 0 of an element, represent the // position, but not the content, of that element e.g. if the // node is a blockflow element, emit a newline that // precedes the element - if (m_node == m_endContainer && m_endOffset == 0) { + if (m_node == m_endContainer && !m_endOffset) { representNodeOffsetZero(); - m_node = 0; + m_node = nullptr; return; } - RenderObject* renderer = m_node->renderer(); + auto* renderer = m_node->renderer(); if (!renderer) { m_handledNode = true; - m_handledChildren = true; + m_handledChildren = !((m_behavior & TextIteratorTraversesFlatTree) && is<Element>(*m_node) && downcast<Element>(*m_node).hasDisplayContents()); } else { // handle current node according to its type if (!m_handledNode) { - if (renderer->isText() && m_node->nodeType() == Node::TEXT_NODE) // FIXME: What about CDATA_SECTION_NODE? + if (renderer->isText() && m_node->isTextNode()) m_handledNode = handleTextNode(); else if (isRendererReplacedElement(renderer)) m_handledNode = handleReplacedElement(); @@ -431,28 +510,29 @@ void TextIterator::advance() // find a new current node to handle in depth-first manner, // calling exitNode() as we come back thru a parent node - Node* next = m_handledChildren ? 0 : m_node->firstChild(); + Node* next = m_handledChildren ? nullptr : firstChild(m_behavior, *m_node); m_offset = 0; if (!next) { - next = m_node->nextSibling(); + next = nextSibling(m_behavior, *m_node); if (!next) { - bool pastEnd = NodeTraversal::next(m_node) == m_pastEndNode; - Node* parentNode = m_node->parentOrShadowHostNode(); + bool pastEnd = NodeTraversal::next(*m_node) == m_pastEndNode; + Node* parentNode = parentNodeOrShadowHost(m_behavior, *m_node); while (!next && parentNode) { - if ((pastEnd && parentNode == m_endContainer) || m_endContainer->isDescendantOf(parentNode)) + if ((pastEnd && parentNode == m_endContainer) || m_endContainer->isDescendantOf(*parentNode)) return; bool haveRenderer = m_node->renderer(); + Node* exitedNode = m_node; m_node = parentNode; m_fullyClippedStack.pop(); - parentNode = m_node->parentOrShadowHostNode(); + parentNode = parentNodeOrShadowHost(m_behavior, *m_node); if (haveRenderer) - exitNode(); + exitNode(exitedNode); if (m_positionNode) { m_handledNode = true; m_handledChildren = true; return; } - next = m_node->nextSibling(); + next = nextSibling(m_behavior, *m_node); } } m_fullyClippedStack.pop(); @@ -461,11 +541,11 @@ void TextIterator::advance() // set the new current node m_node = next; if (m_node) - pushFullyClippedState(m_fullyClippedStack, m_node); + pushFullyClippedState(m_fullyClippedStack, *m_node); m_handledNode = false; m_handledChildren = false; m_handledFirstLetter = false; - m_firstLetterText = 0; + m_firstLetterText = nullptr; // how would this ever be? if (m_positionNode) @@ -473,122 +553,179 @@ void TextIterator::advance() } } -UChar TextIterator::characterAt(unsigned index) const +static bool hasVisibleTextNode(RenderText& renderer) { - ASSERT_WITH_SECURITY_IMPLICATION(index < static_cast<unsigned>(length())); - if (!(index < static_cast<unsigned>(length()))) - return 0; - - if (!m_textCharacters) - return string()[startOffset() + index]; + if (renderer.style().visibility() == VISIBLE) + return true; + if (is<RenderTextFragment>(renderer)) { + if (auto firstLetter = downcast<RenderTextFragment>(renderer).firstLetter()) { + if (firstLetter->style().visibility() == VISIBLE) + return true; + } + } + return false; +} - return m_textCharacters[index]; +static unsigned textNodeOffsetInFlow(const Text& firstTextNodeInRange) +{ + // Calculate the text offset for simple lines. + RenderObject* renderer = firstTextNodeInRange.renderer(); + if (!renderer) + return 0; + unsigned textOffset = 0; + for (renderer = renderer->previousSibling(); renderer; renderer = renderer->previousSibling()) { + if (is<RenderText>(renderer)) + textOffset += downcast<RenderText>(renderer)->textLength(); + } + return textOffset; } -void TextIterator::appendTextToStringBuilder(StringBuilder& builder) const +static bool isNewLineOrTabCharacter(UChar character) { - if (!m_textCharacters) - builder.append(string(), startOffset(), length()); - else - builder.append(characters(), length()); + return character == '\n' || character == '\t'; } bool TextIterator::handleTextNode() { - if (m_fullyClippedStack.top() && !m_ignoresStyleVisibility) + Text& textNode = downcast<Text>(*m_node); + + if (m_fullyClippedStack.top() && !(m_behavior & TextIteratorIgnoresStyleVisibility)) return false; - RenderText* renderer = toRenderText(m_node->renderer()); - - m_lastTextNode = m_node; - String str = renderer->text(); + auto& renderer = *textNode.renderer(); + m_lastTextNode = &textNode; + String rendererText = renderer.text(); // handle pre-formatted text - if (!renderer->style().collapseWhiteSpace()) { + if (!renderer.style().collapseWhiteSpace()) { int runStart = m_offset; if (m_lastTextNodeEndedWithCollapsedSpace && hasVisibleTextNode(renderer)) { - emitCharacter(' ', m_node, 0, runStart, runStart); + emitCharacter(' ', textNode, nullptr, runStart, runStart); return false; } - if (!m_handledFirstLetter && renderer->isTextFragment() && !m_offset) { - handleTextNodeFirstLetter(static_cast<RenderTextFragment*>(renderer)); + if (!m_handledFirstLetter && is<RenderTextFragment>(renderer) && !m_offset) { + handleTextNodeFirstLetter(downcast<RenderTextFragment>(renderer)); if (m_firstLetterText) { String firstLetter = m_firstLetterText->text(); - emitText(m_node, m_firstLetterText, m_offset, m_offset + firstLetter.length()); - m_firstLetterText = 0; - m_textBox = 0; + emitText(textNode, *m_firstLetterText, m_offset, m_offset + firstLetter.length()); + m_firstLetterText = nullptr; + m_textBox = nullptr; return false; } } - if (renderer->style().visibility() != VISIBLE && !m_ignoresStyleVisibility) + if (renderer.style().visibility() != VISIBLE && !(m_behavior & TextIteratorIgnoresStyleVisibility)) return false; - int strLength = str.length(); - int end = (m_node == m_endContainer) ? m_endOffset : INT_MAX; - int runEnd = std::min(strLength, end); + int rendererTextLength = rendererText.length(); + int end = (&textNode == m_endContainer) ? m_endOffset : INT_MAX; + int runEnd = std::min(rendererTextLength, end); if (runStart >= runEnd) return true; - emitText(m_node, runStart, runEnd); + emitText(textNode, renderer, runStart, runEnd); return true; } - if (renderer->simpleLineLayout()) { - if (renderer->style().visibility() != VISIBLE && !m_ignoresStyleVisibility) + if (const auto* layout = renderer.simpleLineLayout()) { + if (renderer.style().visibility() != VISIBLE && !(m_behavior & TextIteratorIgnoresStyleVisibility)) return true; - // This code aims to produce same results as handleTextBox() below so test results don't change. It does not make much logical sense. - unsigned runEnd = m_offset; - unsigned runStart = m_offset; - while (runEnd < str.length() && (isCollapsibleWhitespace(str[runEnd]) || str[runEnd] == '\t')) - ++runEnd; - bool addSpaceForPrevious = m_lastTextNodeEndedWithCollapsedSpace && m_lastCharacter && !isCollapsibleWhitespace(m_lastCharacter); - if (runEnd > runStart || addSpaceForPrevious) { - if (runEnd == str.length()) { - m_lastTextNodeEndedWithCollapsedSpace = true; - return true; + ASSERT(renderer.parent()); + ASSERT(is<RenderBlockFlow>(*renderer.parent())); + const auto& blockFlow = downcast<RenderBlockFlow>(*renderer.parent()); + // Use the simple layout runs to iterate over the text content. + bool isNewTextNode = m_previousSimpleTextNodeInFlow && m_previousSimpleTextNodeInFlow != &textNode; + // Simple line layout run positions are all absolute to the parent flow. + // Offsetting is required when multiple renderers are present. + m_accumulatedSimpleTextLengthInFlow += isNewTextNode ? m_previousSimpleTextNodeInFlow->renderer()->text()->length() : 0; + m_previousSimpleTextNodeInFlow = &textNode; + + unsigned endPosition = (m_node == m_endContainer) ? static_cast<unsigned>(m_endOffset) : rendererText.length(); + if (!m_flowRunResolverCache || &m_flowRunResolverCache->flow() != &blockFlow) { + m_accumulatedSimpleTextLengthInFlow = m_flowRunResolverCache ? 0 : textNodeOffsetInFlow(textNode); + m_flowRunResolverCache = std::make_unique<SimpleLineLayout::RunResolver>(blockFlow, *layout); + } + // Skip to m_offset position. + auto range = m_flowRunResolverCache->rangeForRenderer(renderer); + auto it = range.begin(); + auto end = range.end(); + while (it != end && (*it).end() <= (static_cast<unsigned>(m_offset) + m_accumulatedSimpleTextLengthInFlow)) + ++it; + if (m_nextRunNeedsWhitespace && rendererText[m_offset - 1] == '\n') { + emitCharacter(' ', textNode, nullptr, m_offset, m_offset + 1); + return it == end; + } + if (it == end) { + // Collapsed trailing whitespace. + m_offset = endPosition; + m_lastTextNodeEndedWithCollapsedSpace = true; + return true; + } + if (m_nextRunNeedsWhitespace) { + emitCharacter(' ', textNode, nullptr, m_offset, m_offset + 1); + return false; + } + const auto run = *it; + ASSERT(run.end() - run.start() <= rendererText.length()); + // contentStart skips leading whitespace. + unsigned contentStart = std::max<unsigned>(m_offset, run.start() - m_accumulatedSimpleTextLengthInFlow); + unsigned contentEnd = std::min(endPosition, run.end() - m_accumulatedSimpleTextLengthInFlow); + ASSERT_WITH_SECURITY_IMPLICATION(contentStart <= contentEnd); + // Check if whitespace adjustment is needed when crossing renderer boundary. + if (isNewTextNode) { + bool lastCharacterIsNotWhitespace = m_lastCharacter && !renderer.style().isCollapsibleWhiteSpace(m_lastCharacter); + bool addTrailingWhitespaceForPrevious = m_lastTextNodeEndedWithCollapsedSpace && lastCharacterIsNotWhitespace; + bool leadingWhitespaceIsNeededForCurrent = contentStart > static_cast<unsigned>(m_offset) && lastCharacterIsNotWhitespace; + if (addTrailingWhitespaceForPrevious || leadingWhitespaceIsNeededForCurrent) { + emitCharacter(' ', textNode, nullptr, m_offset, m_offset + 1); + return false; } - bool addSpaceForCurrent = runStart || (m_lastCharacter && !isCollapsibleWhitespace(m_lastCharacter)); - if (addSpaceForCurrent || addSpaceForPrevious) { - emitCharacter(' ', m_node, 0, runStart, runEnd); - m_offset = runEnd; + } + // \n \t single whitespace characters need replacing so that the new line/tab characters don't show up. + unsigned stopPosition = contentStart; + while (stopPosition < contentEnd && !isNewLineOrTabCharacter(rendererText[stopPosition])) + ++stopPosition; + // Emit the text up to the new line/tab character. + if (stopPosition < contentEnd) { + if (stopPosition == contentStart) { + emitCharacter(' ', textNode, nullptr, contentStart, contentStart + 1); + m_offset = contentStart + 1; return false; } - runStart = runEnd; + emitText(textNode, renderer, contentStart, stopPosition); + m_offset = stopPosition + 1; + m_nextRunNeedsWhitespace = true; + return false; } - while (runEnd < str.length() && !isCollapsibleWhitespace(str[runEnd])) - ++runEnd; - if (runStart < str.length()) - emitText(m_node, renderer, runStart, runEnd); - m_offset = runEnd; - return runEnd == str.length(); + emitText(textNode, renderer, contentStart, contentEnd); + // When line ending with collapsed whitespace is present, we need to carry over one whitespace: foo(end of line)bar -> foo bar (otherwise we would end up with foobar). + m_nextRunNeedsWhitespace = run.isEndOfLine() && contentEnd < endPosition && renderer.style().isCollapsibleWhiteSpace(rendererText[contentEnd]); + m_offset = contentEnd; + return static_cast<unsigned>(m_offset) == endPosition; } - if (renderer->firstTextBox()) - m_textBox = renderer->firstTextBox(); + if (renderer.firstTextBox()) + m_textBox = renderer.firstTextBox(); - bool shouldHandleFirstLetter = !m_handledFirstLetter && renderer->isTextFragment() && !m_offset; + bool shouldHandleFirstLetter = !m_handledFirstLetter && is<RenderTextFragment>(renderer) && !m_offset; if (shouldHandleFirstLetter) - handleTextNodeFirstLetter(static_cast<RenderTextFragment*>(renderer)); + handleTextNodeFirstLetter(downcast<RenderTextFragment>(renderer)); - if (!renderer->firstTextBox() && str.length() > 0 && !shouldHandleFirstLetter) { - if (renderer->style().visibility() != VISIBLE && !m_ignoresStyleVisibility) + if (!renderer.firstTextBox() && rendererText.length() && !shouldHandleFirstLetter) { + if (renderer.style().visibility() != VISIBLE && !(m_behavior & TextIteratorIgnoresStyleVisibility)) return false; m_lastTextNodeEndedWithCollapsedSpace = true; // entire block is collapsed space return true; } - if (m_firstLetterText) - renderer = m_firstLetterText; - // Used when text boxes are out of order (Hebrew/Arabic w/ embeded LTR text) - if (renderer->containsReversedText()) { + auto& boxesRenderer = m_firstLetterText ? *m_firstLetterText : renderer; + if (boxesRenderer.containsReversedText()) { m_sortedTextBoxes.clear(); - for (InlineTextBox* textBox = renderer->firstTextBox(); textBox; textBox = textBox->nextTextBox()) { + for (InlineTextBox* textBox = boxesRenderer.firstTextBox(); textBox; textBox = textBox->nextTextBox()) m_sortedTextBoxes.append(textBox); - } - std::sort(m_sortedTextBoxes.begin(), m_sortedTextBoxes.end(), InlineTextBox::compareByStart); + std::sort(m_sortedTextBoxes.begin(), m_sortedTextBoxes.end(), InlineTextBox::compareByStart); m_sortedTextBoxesPosition = 0; - m_textBox = m_sortedTextBoxes.isEmpty() ? 0 : m_sortedTextBoxes[0]; + m_textBox = m_sortedTextBoxes.isEmpty() ? nullptr : m_sortedTextBoxes[0]; } handleTextBox(); @@ -597,58 +734,62 @@ bool TextIterator::handleTextNode() void TextIterator::handleTextBox() { - RenderText* renderer = m_firstLetterText ? m_firstLetterText : toRenderText(m_node->renderer()); - if (renderer->style().visibility() != VISIBLE && !m_ignoresStyleVisibility) { - m_textBox = 0; + Text& textNode = downcast<Text>(*m_node); + + auto& renderer = m_firstLetterText ? *m_firstLetterText : *textNode.renderer(); + if (renderer.style().visibility() != VISIBLE && !(m_behavior & TextIteratorIgnoresStyleVisibility)) { + m_textBox = nullptr; return; } - String str = renderer->text(); + String rendererText = renderer.text(); unsigned start = m_offset; - unsigned end = (m_node == m_endContainer) ? static_cast<unsigned>(m_endOffset) : UINT_MAX; + unsigned end = (&textNode == m_endContainer) ? static_cast<unsigned>(m_endOffset) : UINT_MAX; while (m_textBox) { unsigned textBoxStart = m_textBox->start(); unsigned runStart = std::max(textBoxStart, start); // Check for collapsed space at the start of this run. - InlineTextBox* firstTextBox = renderer->containsReversedText() ? (m_sortedTextBoxes.isEmpty() ? 0 : m_sortedTextBoxes[0]) : renderer->firstTextBox(); - bool needSpace = m_lastTextNodeEndedWithCollapsedSpace - || (m_textBox == firstTextBox && textBoxStart == runStart && runStart > 0); - if (needSpace && !isCollapsibleWhitespace(m_lastCharacter) && m_lastCharacter) { - if (m_lastTextNode == m_node && runStart > 0 && str[runStart - 1] == ' ') { + InlineTextBox* firstTextBox = renderer.containsReversedText() ? (m_sortedTextBoxes.isEmpty() ? nullptr : m_sortedTextBoxes[0]) : renderer.firstTextBox(); + bool needSpace = m_lastTextNodeEndedWithCollapsedSpace || (m_textBox == firstTextBox && textBoxStart == runStart && runStart); + if (needSpace && !renderer.style().isCollapsibleWhiteSpace(m_lastCharacter) && m_lastCharacter) { + if (m_lastTextNode == &textNode && runStart && rendererText[runStart - 1] == ' ') { unsigned spaceRunStart = runStart - 1; - while (spaceRunStart > 0 && str[spaceRunStart - 1] == ' ') + while (spaceRunStart && rendererText[spaceRunStart - 1] == ' ') --spaceRunStart; - emitText(m_node, renderer, spaceRunStart, spaceRunStart + 1); + emitText(textNode, renderer, spaceRunStart, spaceRunStart + 1); } else - emitCharacter(' ', m_node, 0, runStart, runStart); + emitCharacter(' ', textNode, nullptr, runStart, runStart); return; } unsigned textBoxEnd = textBoxStart + m_textBox->len(); unsigned runEnd = std::min(textBoxEnd, end); // Determine what the next text box will be, but don't advance yet - InlineTextBox* nextTextBox = 0; - if (renderer->containsReversedText()) { + InlineTextBox* nextTextBox = nullptr; + if (renderer.containsReversedText()) { if (m_sortedTextBoxesPosition + 1 < m_sortedTextBoxes.size()) nextTextBox = m_sortedTextBoxes[m_sortedTextBoxesPosition + 1]; } else nextTextBox = m_textBox->nextTextBox(); - ASSERT(!nextTextBox || &nextTextBox->renderer() == renderer); + ASSERT(!nextTextBox || &nextTextBox->renderer() == &renderer); if (runStart < runEnd) { // Handle either a single newline character (which becomes a space), // or a run of characters that does not include a newline. // This effectively translates newlines to spaces without copying the text. - if (str[runStart] == '\n') { - emitCharacter(' ', m_node, 0, runStart, runStart + 1); + if (rendererText[runStart] == '\n') { + emitCharacter(' ', textNode, nullptr, runStart, runStart + 1); m_offset = runStart + 1; } else { - size_t subrunEnd = str.find('\n', runStart); - if (subrunEnd == notFound || subrunEnd > runEnd) + size_t subrunEnd = rendererText.find('\n', runStart); + if (subrunEnd == notFound || subrunEnd > runEnd) { subrunEnd = runEnd; - + bool lastSpaceCollapsedByNextNonTextBox = !nextTextBox && (m_behavior & TextIteratorBehavesAsIfNodesFollowing) && rendererText.length() > runEnd; + if (lastSpaceCollapsedByNextNonTextBox) + ++subrunEnd; // runEnd stopped before last space. Increment by one to restore the space. + } m_offset = subrunEnd; - emitText(m_node, renderer, runStart, subrunEnd); + emitText(textNode, renderer, runStart, subrunEnd); } // If we are doing a subrun that doesn't go to the end of the text box, @@ -657,23 +798,23 @@ void TextIterator::handleTextBox() return; // Advance and return - unsigned nextRunStart = nextTextBox ? nextTextBox->start() : str.length(); + unsigned nextRunStart = nextTextBox ? nextTextBox->start() : rendererText.length(); if (nextRunStart > runEnd) m_lastTextNodeEndedWithCollapsedSpace = true; // collapsed space between runs or at the end m_textBox = nextTextBox; - if (renderer->containsReversedText()) + if (renderer.containsReversedText()) ++m_sortedTextBoxesPosition; return; } // Advance and continue m_textBox = nextTextBox; - if (renderer->containsReversedText()) + if (renderer.containsReversedText()) ++m_sortedTextBoxesPosition; } if (!m_textBox && m_remainingTextBox) { m_textBox = m_remainingTextBox; - m_remainingTextBox = 0; - m_firstLetterText = 0; + m_remainingTextBox = nullptr; + m_firstLetterText = nullptr; m_offset = 0; handleTextBox(); } @@ -688,12 +829,12 @@ static inline RenderText* firstRenderTextInFirstLetter(RenderBoxModelObject* fir return childrenOfType<RenderText>(*firstLetter).first(); } -void TextIterator::handleTextNodeFirstLetter(RenderTextFragment* renderer) +void TextIterator::handleTextNodeFirstLetter(RenderTextFragment& renderer) { - if (auto firstLetter = renderer->firstLetter()) { - if (firstLetter->style().visibility() != VISIBLE && !m_ignoresStyleVisibility) + if (auto* firstLetter = renderer.firstLetter()) { + if (firstLetter->style().visibility() != VISIBLE && !(m_behavior & TextIteratorIgnoresStyleVisibility)) return; - if (RenderText* firstLetterText = firstRenderTextInFirstLetter(firstLetter)) { + if (auto* firstLetterText = firstRenderTextInFirstLetter(firstLetter)) { m_handledFirstLetter = true; m_remainingTextBox = m_textBox; m_textBox = firstLetterText->firstTextBox(); @@ -709,19 +850,19 @@ bool TextIterator::handleReplacedElement() if (m_fullyClippedStack.top()) return false; - RenderObject* renderer = m_node->renderer(); - if (renderer->style().visibility() != VISIBLE && !m_ignoresStyleVisibility) + auto& renderer = *m_node->renderer(); + if (renderer.style().visibility() != VISIBLE && !(m_behavior & TextIteratorIgnoresStyleVisibility)) return false; if (m_lastTextNodeEndedWithCollapsedSpace) { - emitCharacter(' ', m_lastTextNode->parentNode(), m_lastTextNode, 1, 1); + emitCharacter(' ', *m_lastTextNode->parentNode(), m_lastTextNode, 1, 1); return false; } - if (m_entersTextControls && renderer->isTextControl()) { - if (TextControlInnerTextElement* innerTextElement = toRenderTextControl(renderer)->textFormControlElement().innerTextElement()) { + if ((m_behavior & TextIteratorEntersTextControls) && is<RenderTextControl>(renderer)) { + if (TextControlInnerTextElement* innerTextElement = downcast<RenderTextControl>(renderer).textFormControlElement().innerTextElement()) { m_node = innerTextElement->containingShadowRoot(); - pushFullyClippedState(m_fullyClippedStack, m_node); + pushFullyClippedState(m_fullyClippedStack, *m_node); m_offset = 0; return false; } @@ -729,19 +870,19 @@ bool TextIterator::handleReplacedElement() m_hasEmitted = true; - if (m_emitsObjectReplacementCharacters && renderer && renderer->isReplaced()) { - emitCharacter(objectReplacementCharacter, m_node->parentNode(), m_node, 0, 1); + if ((m_behavior & TextIteratorEmitsObjectReplacementCharacters) && renderer.isReplaced()) { + emitCharacter(objectReplacementCharacter, *m_node->parentNode(), m_node, 0, 1); // Don't process subtrees for embedded objects. If the text there is required, // it must be explicitly asked by specifying a range falling inside its boundaries. m_handledChildren = true; return true; } - if (m_emitsCharactersBetweenAllVisiblePositions) { + if (m_behavior & TextIteratorEmitsCharactersBetweenAllVisiblePositions) { // We want replaced elements to behave like punctuation for boundary // finding, and to simply take up space for the selection preservation // code in moveParagraphs, so we use a comma. - emitCharacter(',', m_node->parentNode(), m_node, 0, 1); + emitCharacter(',', *m_node->parentNode(), m_node, 0, 1); return true; } @@ -749,174 +890,169 @@ bool TextIterator::handleReplacedElement() m_positionOffsetBaseNode = m_node; m_positionStartOffset = 0; m_positionEndOffset = 1; - m_textCharacters = 0; - if (m_emitsImageAltText && renderer->isImage() && renderer->isRenderImage()) { - m_text = toRenderImage(renderer)->altText(); - if (!m_text.isEmpty()) { - m_textLength = m_text.length(); - m_lastCharacter = m_text[m_textLength - 1]; + if ((m_behavior & TextIteratorEmitsImageAltText) && is<RenderImage>(renderer)) { + String altText = downcast<RenderImage>(renderer).altText(); + if (unsigned length = altText.length()) { + m_lastCharacter = altText[length - 1]; + m_copyableText.set(WTFMove(altText)); + m_text = m_copyableText.text(); return true; } } - m_textLength = 0; + m_copyableText.reset(); + m_text = StringView(); m_lastCharacter = 0; - return true; } -bool TextIterator::hasVisibleTextNode(RenderText* renderer) -{ - if (renderer->style().visibility() == VISIBLE) - return true; - if (renderer->isTextFragment()) { - RenderTextFragment* fragment = static_cast<RenderTextFragment*>(renderer); - if (fragment->firstLetter() && fragment->firstLetter()->style().visibility() == VISIBLE) - return true; - } - return false; -} - -static bool shouldEmitTabBeforeNode(Node* node) +static bool shouldEmitTabBeforeNode(Node& node) { - RenderObject* r = node->renderer(); + auto* renderer = node.renderer(); // Table cells are delimited by tabs. - if (!r || !isTableCell(node)) + if (!renderer || !isTableCell(&node)) return false; - // Want a tab before every cell other than the first one - RenderTableCell* rc = toRenderTableCell(r); - RenderTable* t = rc->table(); - return t && (t->cellBefore(rc) || t->cellAbove(rc)); + // Want a tab before every cell other than the first one. + RenderTableCell& cell = downcast<RenderTableCell>(*renderer); + RenderTable* table = cell.table(); + return table && (table->cellBefore(&cell) || table->cellAbove(&cell)); } static bool shouldEmitNewlineForNode(Node* node, bool emitsOriginalText) { - RenderObject* renderer = node->renderer(); - - if (renderer ? !renderer->isBR() : !node->hasTagName(brTag)) + auto* renderer = node->renderer(); + if (!(renderer ? renderer->isBR() : node->hasTagName(brTag))) return false; - return emitsOriginalText || !(node->isInShadowTree() && node->shadowHost()->toInputElement()); + return emitsOriginalText || !(node->isInShadowTree() && is<HTMLInputElement>(*node->shadowHost())); +} + +static bool hasHeaderTag(HTMLElement& element) +{ + return element.hasTagName(h1Tag) + || element.hasTagName(h2Tag) + || element.hasTagName(h3Tag) + || element.hasTagName(h4Tag) + || element.hasTagName(h5Tag) + || element.hasTagName(h6Tag); } -static bool shouldEmitNewlinesBeforeAndAfterNode(Node* node) +static bool shouldEmitNewlinesBeforeAndAfterNode(Node& node) { // Block flow (versus inline flow) is represented by having // a newline both before and after the element. - RenderObject* r = node->renderer(); - if (!r) { - return (node->hasTagName(blockquoteTag) - || node->hasTagName(ddTag) - || node->hasTagName(divTag) - || node->hasTagName(dlTag) - || node->hasTagName(dtTag) - || node->hasTagName(h1Tag) - || node->hasTagName(h2Tag) - || node->hasTagName(h3Tag) - || node->hasTagName(h4Tag) - || node->hasTagName(h5Tag) - || node->hasTagName(h6Tag) - || node->hasTagName(hrTag) - || node->hasTagName(liTag) - || node->hasTagName(listingTag) - || node->hasTagName(olTag) - || node->hasTagName(pTag) - || node->hasTagName(preTag) - || node->hasTagName(trTag) - || node->hasTagName(ulTag)); + auto* renderer = node.renderer(); + if (!renderer) { + if (!is<HTMLElement>(node)) + return false; + auto& element = downcast<HTMLElement>(node); + return hasHeaderTag(element) + || element.hasTagName(blockquoteTag) + || element.hasTagName(ddTag) + || element.hasTagName(divTag) + || element.hasTagName(dlTag) + || element.hasTagName(dtTag) + || element.hasTagName(hrTag) + || element.hasTagName(liTag) + || element.hasTagName(listingTag) + || element.hasTagName(olTag) + || element.hasTagName(pTag) + || element.hasTagName(preTag) + || element.hasTagName(trTag) + || element.hasTagName(ulTag); } // Need to make an exception for table cells, because they are blocks, but we // want them tab-delimited rather than having newlines before and after. - if (isTableCell(node)) + if (isTableCell(&node)) return false; // Need to make an exception for table row elements, because they are neither // "inline" or "RenderBlock", but we want newlines for them. - if (r->isTableRow()) { - RenderTable* t = toRenderTableRow(r)->table(); - if (t && !t->isInline()) + if (is<RenderTableRow>(*renderer)) { + RenderTable* table = downcast<RenderTableRow>(*renderer).table(); + if (table && !table->isInline()) return true; } - return !r->isInline() && r->isRenderBlock() - && !r->isFloatingOrOutOfFlowPositioned() && !r->isBody() && !r->isRubyText(); + return !renderer->isInline() + && is<RenderBlock>(*renderer) + && !renderer->isFloatingOrOutOfFlowPositioned() + && !renderer->isBody() + && !renderer->isRubyText(); } -static bool shouldEmitNewlineAfterNode(Node* node) +static bool shouldEmitNewlineAfterNode(Node& node) { // FIXME: It should be better but slower to create a VisiblePosition here. if (!shouldEmitNewlinesBeforeAndAfterNode(node)) return false; // Check if this is the very last renderer in the document. // If so, then we should not emit a newline. - while ((node = NodeTraversal::nextSkippingChildren(node))) - if (node->renderer()) + Node* subsequentNode = &node; + while ((subsequentNode = NodeTraversal::nextSkippingChildren(*subsequentNode))) { + if (subsequentNode->renderer()) return true; + } return false; } -static bool shouldEmitNewlineBeforeNode(Node* node) +static bool shouldEmitNewlineBeforeNode(Node& node) { return shouldEmitNewlinesBeforeAndAfterNode(node); } -static bool shouldEmitExtraNewlineForNode(Node* node) +static bool shouldEmitExtraNewlineForNode(Node& node) { // When there is a significant collapsed bottom margin, emit an extra - // newline for a more realistic result. We end up getting the right + // newline for a more realistic result. We end up getting the right // result even without margin collapsing. For example: <div><p>text</p></div> // will work right even if both the <div> and the <p> have bottom margins. - RenderObject* r = node->renderer(); - if (!r || !r->isBox()) + + auto* renderer = node.renderer(); + if (!is<RenderBox>(renderer)) return false; - - // NOTE: We only do this for a select set of nodes, and fwiw WinIE appears - // not to do this at all - if (node->hasTagName(h1Tag) - || node->hasTagName(h2Tag) - || node->hasTagName(h3Tag) - || node->hasTagName(h4Tag) - || node->hasTagName(h5Tag) - || node->hasTagName(h6Tag) - || node->hasTagName(pTag)) { - int bottomMargin = toRenderBox(r)->collapsedMarginAfter(); - int fontSize = toRenderBox(r)->style().fontDescription().computedPixelSize(); - if (bottomMargin * 2 >= fontSize) - return true; - } - - return false; + + // NOTE: We only do this for a select set of nodes, and WinIE appears not to do this at all. + if (!is<HTMLElement>(node)) + return false; + + HTMLElement& element = downcast<HTMLElement>(node); + if (!hasHeaderTag(element) && !is<HTMLParagraphElement>(element)) + return false; + + int bottomMargin = downcast<RenderBox>(*renderer).collapsedMarginAfter(); + int fontSize = downcast<RenderBox>(*renderer).style().fontDescription().computedPixelSize(); + return bottomMargin * 2 >= fontSize; } -static int collapsedSpaceLength(RenderText* renderer, int textEnd) +static int collapsedSpaceLength(RenderText& renderer, int textEnd) { - const UChar* characters = renderer->text()->deprecatedCharacters(); - int length = renderer->text()->length(); - for (int i = textEnd; i < length; ++i) { - if (!renderer->style().isCollapsibleWhiteSpace(characters[i])) + StringImpl& text = *renderer.text(); + unsigned length = text.length(); + for (unsigned i = textEnd; i < length; ++i) { + if (!renderer.style().isCollapsibleWhiteSpace(text[i])) return i - textEnd; } - return length - textEnd; } -static int maxOffsetIncludingCollapsedSpaces(Node* node) +static int maxOffsetIncludingCollapsedSpaces(Node& node) { int offset = caretMaxOffset(node); - - if (node->renderer() && node->renderer()->isText()) - offset += collapsedSpaceLength(toRenderText(node->renderer()), offset); - + if (auto* renderer = node.renderer()) { + if (is<RenderText>(*renderer)) + offset += collapsedSpaceLength(downcast<RenderText>(*renderer), offset); + } return offset; } // Whether or not we should emit a character as we enter m_node (if it's a container) or as we hit it (if it's atomic). bool TextIterator::shouldRepresentNodeOffsetZero() { - if (m_emitsCharactersBetweenAllVisiblePositions && m_node->renderer() && m_node->renderer()->isTable()) + if ((m_behavior & TextIteratorEmitsCharactersBetweenAllVisiblePositions) && m_node->renderer() && m_node->renderer()->isTable()) return true; // Leave element positioned flush with start of a paragraph @@ -958,7 +1094,7 @@ bool TextIterator::shouldRepresentNodeOffsetZero() // Additionally, if the range we are iterating over contains huge sections of unrendered content, // we would create VisiblePositions on every call to this function without this check. if (!m_node->renderer() || m_node->renderer()->style().visibility() != VISIBLE - || (m_node->renderer()->isRenderBlockFlow() && !toRenderBlock(m_node->renderer())->height() && !m_node->hasTagName(bodyTag))) + || (is<RenderBlockFlow>(*m_node->renderer()) && !downcast<RenderBlockFlow>(*m_node->renderer()).height() && !is<HTMLBodyElement>(*m_node))) return false; // The startPos.isNotNull() check is needed because the start could be before the body, @@ -970,9 +1106,9 @@ bool TextIterator::shouldRepresentNodeOffsetZero() return startPos.isNotNull() && currPos.isNotNull() && !inSameLine(startPos, currPos); } -bool TextIterator::shouldEmitSpaceBeforeAndAfterNode(Node* node) +bool TextIterator::shouldEmitSpaceBeforeAndAfterNode(Node& node) { - return node->renderer() && node->renderer()->isTable() && (node->renderer()->isInline() || m_emitsCharactersBetweenAllVisiblePositions); + return node.renderer() && node.renderer()->isTable() && (node.renderer()->isInline() || (m_behavior & TextIteratorEmitsCharactersBetweenAllVisiblePositions)); } void TextIterator::representNodeOffsetZero() @@ -980,34 +1116,34 @@ void TextIterator::representNodeOffsetZero() // Emit a character to show the positioning of m_node. // When we haven't been emitting any characters, shouldRepresentNodeOffsetZero() can - // create VisiblePositions, which is expensive. So, we perform the inexpensive checks + // create VisiblePositions, which is expensive. So, we perform the inexpensive checks // on m_node to see if it necessitates emitting a character first and will early return // before encountering shouldRepresentNodeOffsetZero()s worse case behavior. - if (shouldEmitTabBeforeNode(m_node)) { + if (shouldEmitTabBeforeNode(*m_node)) { if (shouldRepresentNodeOffsetZero()) - emitCharacter('\t', m_node->parentNode(), m_node, 0, 0); - } else if (shouldEmitNewlineBeforeNode(m_node)) { + emitCharacter('\t', *m_node->parentNode(), m_node, 0, 0); + } else if (shouldEmitNewlineBeforeNode(*m_node)) { if (shouldRepresentNodeOffsetZero()) - emitCharacter('\n', m_node->parentNode(), m_node, 0, 0); - } else if (shouldEmitSpaceBeforeAndAfterNode(m_node)) { + emitCharacter('\n', *m_node->parentNode(), m_node, 0, 0); + } else if (shouldEmitSpaceBeforeAndAfterNode(*m_node)) { if (shouldRepresentNodeOffsetZero()) - emitCharacter(' ', m_node->parentNode(), m_node, 0, 0); + emitCharacter(' ', *m_node->parentNode(), m_node, 0, 0); } } bool TextIterator::handleNonTextNode() { - if (shouldEmitNewlineForNode(m_node, m_emitsOriginalText)) - emitCharacter('\n', m_node->parentNode(), m_node, 0, 1); - else if (m_emitsCharactersBetweenAllVisiblePositions && m_node->renderer() && m_node->renderer()->isHR()) - emitCharacter(' ', m_node->parentNode(), m_node, 0, 1); + if (shouldEmitNewlineForNode(m_node, m_behavior & TextIteratorEmitsOriginalText)) + emitCharacter('\n', *m_node->parentNode(), m_node, 0, 1); + else if ((m_behavior & TextIteratorEmitsCharactersBetweenAllVisiblePositions) && m_node->renderer() && m_node->renderer()->isHR()) + emitCharacter(' ', *m_node->parentNode(), m_node, 0, 1); else representNodeOffsetZero(); return true; } -void TextIterator::exitNode() +void TextIterator::exitNode(Node* exitedNode) { // prevent emitting a newline when exiting a collapsed block at beginning of the range // FIXME: !m_hasEmitted does not necessarily mean there was a collapsed block... it could @@ -1019,212 +1155,160 @@ void TextIterator::exitNode() // Emit with a position *inside* m_node, after m_node's contents, in // case it is a block, because the run should start where the // emitted character is positioned visually. - Node* baseNode = m_node->lastChild() ? m_node->lastChild() : m_node; + Node* baseNode = exitedNode; // FIXME: This shouldn't require the m_lastTextNode to be true, but we can't change that without making - // the logic in _web_attributedStringFromRange match. We'll get that for free when we switch to use + // the logic in _web_attributedStringFromRange match. We'll get that for free when we switch to use // TextIterator in _web_attributedStringFromRange. // See <rdar://problem/5428427> for an example of how this mismatch will cause problems. - if (m_lastTextNode && shouldEmitNewlineAfterNode(m_node)) { + if (m_lastTextNode && shouldEmitNewlineAfterNode(*m_node)) { // use extra newline to represent margin bottom, as needed - bool addNewline = shouldEmitExtraNewlineForNode(m_node); + bool addNewline = shouldEmitExtraNewlineForNode(*m_node); // FIXME: We need to emit a '\n' as we leave an empty block(s) that // contain a VisiblePosition when doing selection preservation. if (m_lastCharacter != '\n') { // insert a newline with a position following this block's contents. - emitCharacter('\n', baseNode->parentNode(), baseNode, 1, 1); + emitCharacter('\n', *baseNode->parentNode(), baseNode, 1, 1); // remember whether to later add a newline for the current node - ASSERT(!m_needsAnotherNewline); - m_needsAnotherNewline = addNewline; + ASSERT(!m_nodeForAdditionalNewline); + if (addNewline) + m_nodeForAdditionalNewline = baseNode; } else if (addNewline) // insert a newline with a position following this block's contents. - emitCharacter('\n', baseNode->parentNode(), baseNode, 1, 1); + emitCharacter('\n', *baseNode->parentNode(), baseNode, 1, 1); } // If nothing was emitted, see if we need to emit a space. - if (!m_positionNode && shouldEmitSpaceBeforeAndAfterNode(m_node)) - emitCharacter(' ', baseNode->parentNode(), baseNode, 1, 1); + if (!m_positionNode && shouldEmitSpaceBeforeAndAfterNode(*m_node)) + emitCharacter(' ', *baseNode->parentNode(), baseNode, 1, 1); } -void TextIterator::emitCharacter(UChar c, Node* textNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset) +void TextIterator::emitCharacter(UChar character, Node& characterNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset) { m_hasEmitted = true; // remember information with which to construct the TextIterator::range() - // NOTE: textNode is often not a text node, so the range will specify child nodes of positionNode - m_positionNode = textNode; + m_positionNode = &characterNode; m_positionOffsetBaseNode = offsetBaseNode; m_positionStartOffset = textStartOffset; m_positionEndOffset = textEndOffset; - - // remember information with which to construct the TextIterator::characters() and length() - m_singleCharacterBuffer = c; - m_textCharacters = &m_singleCharacterBuffer; - m_textLength = 1; - // remember some iteration state + m_copyableText.set(character); + m_text = m_copyableText.text(); + m_lastCharacter = character; m_lastTextNodeEndedWithCollapsedSpace = false; - m_lastCharacter = c; + m_nextRunNeedsWhitespace = false; } -void TextIterator::emitText(Node* textNode, RenderObject* renderObject, int textStartOffset, int textEndOffset) +void TextIterator::emitText(Text& textNode, RenderText& renderer, int textStartOffset, int textEndOffset) { - RenderText* renderer = toRenderText(renderObject); - m_text = m_emitsOriginalText ? renderer->originalText() : (m_emitsTextWithoutTranscoding ? renderer->textWithoutConvertingBackslashToYenSymbol() : renderer->text()); - ASSERT(!m_text.isEmpty()); - ASSERT(0 <= textStartOffset && textStartOffset < static_cast<int>(m_text.length())); - ASSERT(0 <= textEndOffset && textEndOffset <= static_cast<int>(m_text.length())); + ASSERT(textStartOffset >= 0); + ASSERT(textEndOffset >= 0); ASSERT(textStartOffset <= textEndOffset); - m_positionNode = textNode; - m_positionOffsetBaseNode = 0; + // FIXME: This probably yields the wrong offsets when text-transform: lowercase turns a single character into two characters. + String string = (m_behavior & TextIteratorEmitsOriginalText) ? renderer.originalText() + : ((m_behavior & TextIteratorEmitsTextsWithoutTranscoding) ? renderer.textWithoutConvertingBackslashToYenSymbol() : renderer.text()); + + ASSERT(string.length() >= static_cast<unsigned>(textEndOffset)); + + m_positionNode = &textNode; + m_positionOffsetBaseNode = nullptr; m_positionStartOffset = textStartOffset; m_positionEndOffset = textEndOffset; - m_textCharacters = 0; - m_textLength = textEndOffset - textStartOffset; - m_lastCharacter = m_text[textEndOffset - 1]; + + m_lastCharacter = string[textEndOffset - 1]; + m_copyableText.set(WTFMove(string), textStartOffset, textEndOffset - textStartOffset); + m_text = m_copyableText.text(); m_lastTextNodeEndedWithCollapsedSpace = false; + m_nextRunNeedsWhitespace = false; m_hasEmitted = true; } -void TextIterator::emitText(Node* textNode, int textStartOffset, int textEndOffset) +Ref<Range> TextIterator::range() const { - emitText(textNode, m_node->renderer(), textStartOffset, textEndOffset); -} + ASSERT(!atEnd()); -PassRefPtr<Range> TextIterator::range() const -{ // use the current run information, if we have it - if (m_positionNode) { - if (m_positionOffsetBaseNode) { - int index = m_positionOffsetBaseNode->nodeIndex(); - m_positionStartOffset += index; - m_positionEndOffset += index; - m_positionOffsetBaseNode = 0; - } - return Range::create(m_positionNode->document(), m_positionNode, m_positionStartOffset, m_positionNode, m_positionEndOffset); + if (m_positionOffsetBaseNode) { + unsigned index = m_positionOffsetBaseNode->computeNodeIndex(); + m_positionStartOffset += index; + m_positionEndOffset += index; + m_positionOffsetBaseNode = nullptr; } - - // otherwise, return the end of the overall range we were given - if (m_endContainer) - return Range::create(m_endContainer->document(), m_endContainer, m_endOffset, m_endContainer, m_endOffset); - - return 0; + return Range::create(m_positionNode->document(), m_positionNode, m_positionStartOffset, m_positionNode, m_positionEndOffset); } Node* TextIterator::node() const { - RefPtr<Range> textRange = range(); - if (!textRange) - return 0; + Ref<Range> textRange = range(); - Node* node = textRange->startContainer(); - if (!node) - return 0; - if (node->offsetInCharacters()) - return node; + Node& node = textRange->startContainer(); + if (node.offsetInCharacters()) + return &node; - return node->childNode(textRange->startOffset()); + return node.traverseToChildAt(textRange->startOffset()); } // -------- -SimplifiedBackwardsTextIterator::SimplifiedBackwardsTextIterator(const Range* r, TextIteratorBehavior behavior) - : m_node(0) - , m_offset(0) - , m_handledNode(false) - , m_handledChildren(false) - , m_startNode(0) - , m_startOffset(0) - , m_endNode(0) - , m_endOffset(0) - , m_positionNode(0) - , m_positionStartOffset(0) - , m_positionEndOffset(0) - , m_textCharacters(0) - , m_textLength(0) - , m_lastTextNode(0) - , m_lastCharacter(0) - , m_singleCharacterBuffer(0) - , m_havePassedStartNode(false) - , m_shouldHandleFirstLetter(false) - , m_stopsOnFormControls(behavior & TextIteratorStopsOnFormControls) - , m_shouldStop(false) - , m_emitsOriginalText(false) -{ - ASSERT(behavior == TextIteratorDefaultBehavior || behavior == TextIteratorStopsOnFormControls); - - if (!r) - return; - - r->ownerDocument().updateLayoutIgnorePendingStylesheets(); +SimplifiedBackwardsTextIterator::SimplifiedBackwardsTextIterator(const Range& range) +{ + range.ownerDocument().updateLayoutIgnorePendingStylesheets(); - Node* startNode = r->startContainer(); - if (!startNode) - return; - Node* endNode = r->endContainer(); - int startOffset = r->startOffset(); - int endOffset = r->endOffset(); + Node* startNode = &range.startContainer(); + Node* endNode = &range.endContainer(); + int startOffset = range.startOffset(); + int endOffset = range.endOffset(); if (!startNode->offsetInCharacters()) { - if (startOffset >= 0 && startOffset < static_cast<int>(startNode->childNodeCount())) { - startNode = startNode->childNode(startOffset); + if (startOffset >= 0 && startOffset < static_cast<int>(startNode->countChildNodes())) { + startNode = startNode->traverseToChildAt(startOffset); startOffset = 0; } } if (!endNode->offsetInCharacters()) { - if (endOffset > 0 && endOffset <= static_cast<int>(endNode->childNodeCount())) { - endNode = endNode->childNode(endOffset - 1); + if (endOffset > 0 && endOffset <= static_cast<int>(endNode->countChildNodes())) { + endNode = endNode->traverseToChildAt(endOffset - 1); endOffset = lastOffsetInNode(endNode); } } m_node = endNode; - setUpFullyClippedStack(m_fullyClippedStack, m_node); + setUpFullyClippedStack(m_fullyClippedStack, *m_node); m_offset = endOffset; m_handledNode = false; m_handledChildren = endOffset == 0; - m_startNode = startNode; + m_startContainer = startNode; m_startOffset = startOffset; - m_endNode = endNode; + m_endContainer = endNode; m_endOffset = endOffset; -#ifndef NDEBUG - // Need this just because of the assert. m_positionNode = endNode; -#endif - m_lastTextNode = 0; + m_lastTextNode = nullptr; m_lastCharacter = '\n'; - m_havePassedStartNode = false; + m_havePassedStartContainer = false; advance(); } void SimplifiedBackwardsTextIterator::advance() { - ASSERT(m_positionNode); + ASSERT(!atEnd()); - if (m_shouldStop) - return; + m_positionNode = nullptr; + m_copyableText.reset(); + m_text = StringView(); - if (m_stopsOnFormControls && HTMLFormControlElement::enclosingFormControlElement(m_node)) { - m_shouldStop = true; - return; - } - - m_positionNode = 0; - m_textLength = 0; - - while (m_node && !m_havePassedStartNode) { + while (m_node && !m_havePassedStartContainer) { // Don't handle node if we start iterating at [node, 0]. - if (!m_handledNode && !(m_node == m_endNode && m_endOffset == 0)) { - RenderObject* renderer = m_node->renderer(); - if (renderer && renderer->isText() && m_node->nodeType() == Node::TEXT_NODE) { - // FIXME: What about CDATA_SECTION_NODE? + if (!m_handledNode && !(m_node == m_endContainer && !m_endOffset)) { + auto* renderer = m_node->renderer(); + if (renderer && renderer->isText() && m_node->isTextNode()) { if (renderer->style().visibility() == VISIBLE && m_offset > 0) m_handledNode = handleTextNode(); } else if (renderer && (renderer->isImage() || renderer->isWidget())) { @@ -1238,14 +1322,11 @@ void SimplifiedBackwardsTextIterator::advance() if (!m_handledChildren && m_node->hasChildNodes()) { m_node = m_node->lastChild(); - pushFullyClippedState(m_fullyClippedStack, m_node); + pushFullyClippedState(m_fullyClippedStack, *m_node); } else { // Exit empty containers as we pass over them or containers // where [container, 0] is where we started iterating. - if (!m_handledNode - && canHaveChildrenForEditing(m_node) - && m_node->parentNode() - && (!m_node->lastChild() || (m_node == m_endNode && !m_endOffset))) { + if (!m_handledNode && canHaveChildrenForEditing(*m_node) && m_node->parentNode() && (!m_node->lastChild() || (m_node == m_endContainer && !m_endOffset))) { exitNode(); if (m_positionNode) { m_handledNode = true; @@ -1269,14 +1350,14 @@ void SimplifiedBackwardsTextIterator::advance() m_fullyClippedStack.pop(); if (advanceRespectingRange(m_node->previousSibling())) - pushFullyClippedState(m_fullyClippedStack, m_node); + pushFullyClippedState(m_fullyClippedStack, *m_node); else - m_node = 0; + m_node = nullptr; } // For the purpose of word boundary detection, // we should iterate all visible text and trailing (collapsed) whitespaces. - m_offset = m_node ? maxOffsetIncludingCollapsedSpaces(m_node) : 0; + m_offset = m_node ? maxOffsetIncludingCollapsedSpaces(*m_node) : 0; m_handledNode = false; m_handledChildren = false; @@ -1287,7 +1368,9 @@ void SimplifiedBackwardsTextIterator::advance() bool SimplifiedBackwardsTextIterator::handleTextNode() { - m_lastTextNode = m_node; + Text& textNode = downcast<Text>(*m_node); + + m_lastTextNode = &textNode; int startOffset; int offsetInNode; @@ -1296,98 +1379,105 @@ bool SimplifiedBackwardsTextIterator::handleTextNode() return true; String text = renderer->text(); - if (!renderer->firstTextBox() && text.length() > 0) + if (!renderer->hasRenderedText() && text.length()) + return true; + + if (startOffset + offsetInNode == m_offset) { + ASSERT(!m_shouldHandleFirstLetter); return true; + } m_positionEndOffset = m_offset; m_offset = startOffset + offsetInNode; m_positionNode = m_node; m_positionStartOffset = m_offset; - ASSERT(0 <= m_positionStartOffset - offsetInNode && m_positionStartOffset - offsetInNode <= static_cast<int>(text.length())); - ASSERT(1 <= m_positionEndOffset - offsetInNode && m_positionEndOffset - offsetInNode <= static_cast<int>(text.length())); - ASSERT(m_positionStartOffset <= m_positionEndOffset); - - m_textLength = m_positionEndOffset - m_positionStartOffset; - m_textCharacters = text.deprecatedCharacters() + (m_positionStartOffset - offsetInNode); - ASSERT(m_textCharacters >= text.deprecatedCharacters()); - ASSERT(m_textCharacters + m_textLength <= text.deprecatedCharacters() + static_cast<int>(text.length())); + ASSERT(m_positionStartOffset < m_positionEndOffset); + ASSERT(m_positionStartOffset - offsetInNode >= 0); + ASSERT(m_positionEndOffset - offsetInNode > 0); + ASSERT(m_positionEndOffset - offsetInNode <= static_cast<int>(text.length())); - m_lastCharacter = text[m_positionEndOffset - 1]; + m_lastCharacter = text[m_positionEndOffset - offsetInNode - 1]; + m_copyableText.set(WTFMove(text), m_positionStartOffset - offsetInNode, m_positionEndOffset - m_positionStartOffset); + m_text = m_copyableText.text(); return !m_shouldHandleFirstLetter; } RenderText* SimplifiedBackwardsTextIterator::handleFirstLetter(int& startOffset, int& offsetInNode) { - RenderText* renderer = toRenderText(m_node->renderer()); - startOffset = (m_node == m_startNode) ? m_startOffset : 0; + RenderText& renderer = downcast<RenderText>(*m_node->renderer()); + startOffset = (m_node == m_startContainer) ? m_startOffset : 0; - if (!renderer->isTextFragment()) { + if (!is<RenderTextFragment>(renderer)) { offsetInNode = 0; - return renderer; + return &renderer; } - RenderTextFragment* fragment = toRenderTextFragment(renderer); - int offsetAfterFirstLetter = fragment->start(); + RenderTextFragment& fragment = downcast<RenderTextFragment>(renderer); + int offsetAfterFirstLetter = fragment.start(); if (startOffset >= offsetAfterFirstLetter) { ASSERT(!m_shouldHandleFirstLetter); offsetInNode = offsetAfterFirstLetter; - return renderer; + return &renderer; } - if (!m_shouldHandleFirstLetter && offsetAfterFirstLetter < m_offset) { + if (!m_shouldHandleFirstLetter && startOffset + offsetAfterFirstLetter < m_offset) { m_shouldHandleFirstLetter = true; offsetInNode = offsetAfterFirstLetter; - return renderer; + return &renderer; } m_shouldHandleFirstLetter = false; offsetInNode = 0; - return firstRenderTextInFirstLetter(fragment->firstLetter()); + RenderText* firstLetterRenderer = firstRenderTextInFirstLetter(fragment.firstLetter()); + + m_offset = firstLetterRenderer->caretMaxOffset(); + m_offset += collapsedSpaceLength(*firstLetterRenderer, m_offset); + + return firstLetterRenderer; } bool SimplifiedBackwardsTextIterator::handleReplacedElement() { - unsigned index = m_node->nodeIndex(); + unsigned index = m_node->computeNodeIndex(); // We want replaced elements to behave like punctuation for boundary // finding, and to simply take up space for the selection preservation - // code in moveParagraphs, so we use a comma. Unconditionally emit + // code in moveParagraphs, so we use a comma. Unconditionally emit // here because this iterator is only used for boundary finding. - emitCharacter(',', m_node->parentNode(), index, index + 1); + emitCharacter(',', *m_node->parentNode(), index, index + 1); return true; } bool SimplifiedBackwardsTextIterator::handleNonTextNode() { // We can use a linefeed in place of a tab because this simple iterator is only used to - // find boundaries, not actual content. A linefeed breaks words, sentences, and paragraphs. - if (shouldEmitNewlineForNode(m_node, m_emitsOriginalText) || shouldEmitNewlineAfterNode(m_node) || shouldEmitTabBeforeNode(m_node)) { - unsigned index = m_node->nodeIndex(); + // find boundaries, not actual content. A linefeed breaks words, sentences, and paragraphs. + if (shouldEmitNewlineForNode(m_node, m_behavior & TextIteratorEmitsOriginalText) || shouldEmitNewlineAfterNode(*m_node) || shouldEmitTabBeforeNode(*m_node)) { + unsigned index = m_node->computeNodeIndex(); // The start of this emitted range is wrong. Ensuring correctness would require // VisiblePositions and so would be slow. previousBoundary expects this. - emitCharacter('\n', m_node->parentNode(), index + 1, index + 1); + emitCharacter('\n', *m_node->parentNode(), index + 1, index + 1); } return true; } void SimplifiedBackwardsTextIterator::exitNode() { - if (shouldEmitNewlineForNode(m_node, m_emitsOriginalText) || shouldEmitNewlineBeforeNode(m_node) || shouldEmitTabBeforeNode(m_node)) { + if (shouldEmitNewlineForNode(m_node, m_behavior & TextIteratorEmitsOriginalText) || shouldEmitNewlineBeforeNode(*m_node) || shouldEmitTabBeforeNode(*m_node)) { // The start of this emitted range is wrong. Ensuring correctness would require // VisiblePositions and so would be slow. previousBoundary expects this. - emitCharacter('\n', m_node, 0, 0); + emitCharacter('\n', *m_node, 0, 0); } } -void SimplifiedBackwardsTextIterator::emitCharacter(UChar c, Node* node, int startOffset, int endOffset) +void SimplifiedBackwardsTextIterator::emitCharacter(UChar c, Node& node, int startOffset, int endOffset) { - m_singleCharacterBuffer = c; - m_positionNode = node; + m_positionNode = &node; m_positionStartOffset = startOffset; m_positionEndOffset = endOffset; - m_textCharacters = &m_singleCharacterBuffer; - m_textLength = 1; + m_copyableText.set(c); + m_text = m_copyableText.text(); m_lastCharacter = c; } @@ -1395,48 +1485,47 @@ bool SimplifiedBackwardsTextIterator::advanceRespectingRange(Node* next) { if (!next) return false; - m_havePassedStartNode |= m_node == m_startNode; - if (m_havePassedStartNode) + m_havePassedStartContainer |= m_node == m_startContainer; + if (m_havePassedStartContainer) return false; m_node = next; return true; } -PassRefPtr<Range> SimplifiedBackwardsTextIterator::range() const +Ref<Range> SimplifiedBackwardsTextIterator::range() const { - if (m_positionNode) - return Range::create(m_positionNode->document(), m_positionNode, m_positionStartOffset, m_positionNode, m_positionEndOffset); - - return Range::create(m_startNode->document(), m_startNode, m_startOffset, m_startNode, m_startOffset); + ASSERT(!atEnd()); + + return Range::create(m_positionNode->document(), m_positionNode, m_positionStartOffset, m_positionNode, m_positionEndOffset); } // -------- -CharacterIterator::CharacterIterator(const Range* r, TextIteratorBehavior behavior) - : m_offset(0) +CharacterIterator::CharacterIterator(const Range& range, TextIteratorBehavior behavior) + : m_underlyingIterator(&range, behavior) + , m_offset(0) , m_runOffset(0) , m_atBreak(true) - , m_textIterator(r, behavior) { - while (!atEnd() && m_textIterator.length() == 0) - m_textIterator.advance(); + while (!atEnd() && !m_underlyingIterator.text().length()) + m_underlyingIterator.advance(); } -PassRefPtr<Range> CharacterIterator::range() const +Ref<Range> CharacterIterator::range() const { - RefPtr<Range> r = m_textIterator.range(); - if (!m_textIterator.atEnd()) { - if (m_textIterator.length() <= 1) { + Ref<Range> range = m_underlyingIterator.range(); + if (!m_underlyingIterator.atEnd()) { + if (m_underlyingIterator.text().length() <= 1) { ASSERT(m_runOffset == 0); } else { - Node* n = r->startContainer(); - ASSERT(n == r->endContainer()); - int offset = r->startOffset() + m_runOffset; - r->setStart(n, offset, ASSERT_NO_EXCEPTION); - r->setEnd(n, offset + 1, ASSERT_NO_EXCEPTION); + Node& node = range->startContainer(); + ASSERT(&node == &range->endContainer()); + int offset = range->startOffset() + m_runOffset; + range->setStart(node, offset); + range->setEnd(node, offset + 1); } } - return r.release(); + return range; } void CharacterIterator::advance(int count) @@ -1448,94 +1537,85 @@ void CharacterIterator::advance(int count) m_atBreak = false; - // easy if there is enough left in the current m_textIterator run - int remaining = m_textIterator.length() - m_runOffset; + // easy if there is enough left in the current m_underlyingIterator run + int remaining = m_underlyingIterator.text().length() - m_runOffset; if (count < remaining) { m_runOffset += count; m_offset += count; return; } - // exhaust the current m_textIterator run + // exhaust the current m_underlyingIterator run count -= remaining; m_offset += remaining; - // move to a subsequent m_textIterator run - for (m_textIterator.advance(); !atEnd(); m_textIterator.advance()) { - int runLength = m_textIterator.length(); - if (runLength == 0) + // move to a subsequent m_underlyingIterator run + for (m_underlyingIterator.advance(); !atEnd(); m_underlyingIterator.advance()) { + int runLength = m_underlyingIterator.text().length(); + if (!runLength) m_atBreak = true; else { - // see whether this is m_textIterator to use + // see whether this is m_underlyingIterator to use if (count < runLength) { m_runOffset = count; m_offset += count; return; } - // exhaust this m_textIterator run + // exhaust this m_underlyingIterator run count -= runLength; m_offset += runLength; } } - // ran to the end of the m_textIterator... no more runs left + // ran to the end of the m_underlyingIterator... no more runs left m_atBreak = true; m_runOffset = 0; } -String CharacterIterator::string(int numChars) -{ - Vector<UChar> result; - result.reserveInitialCapacity(numChars); - while (numChars > 0 && !atEnd()) { - int runSize = std::min(numChars, length()); - result.append(characters(), runSize); - numChars -= runSize; - advance(runSize); - } - return String::adopt(result); -} - -static PassRefPtr<Range> characterSubrange(CharacterIterator& it, int offset, int length) +static Ref<Range> characterSubrange(Document& document, CharacterIterator& it, int offset, int length) { it.advance(offset); - RefPtr<Range> start = it.range(); + if (it.atEnd()) + return Range::create(document); + + Ref<Range> start = it.range(); if (length > 1) it.advance(length - 1); - RefPtr<Range> end = it.range(); + if (it.atEnd()) + return Range::create(document); - return Range::create(start->startContainer()->document(), - start->startContainer(), start->startOffset(), - end->endContainer(), end->endOffset()); + Ref<Range> end = it.range(); + + return Range::create(document, &start->startContainer(), start->startOffset(), &end->endContainer(), end->endOffset()); } -BackwardsCharacterIterator::BackwardsCharacterIterator(const Range* range, TextIteratorBehavior behavior) - : m_offset(0) +BackwardsCharacterIterator::BackwardsCharacterIterator(const Range& range) + : m_underlyingIterator(range) + , m_offset(0) , m_runOffset(0) , m_atBreak(true) - , m_textIterator(range, behavior) { - while (!atEnd() && !m_textIterator.length()) - m_textIterator.advance(); + while (!atEnd() && !m_underlyingIterator.text().length()) + m_underlyingIterator.advance(); } -PassRefPtr<Range> BackwardsCharacterIterator::range() const +Ref<Range> BackwardsCharacterIterator::range() const { - RefPtr<Range> r = m_textIterator.range(); - if (!m_textIterator.atEnd()) { - if (m_textIterator.length() <= 1) + Ref<Range> r = m_underlyingIterator.range(); + if (!m_underlyingIterator.atEnd()) { + if (m_underlyingIterator.text().length() <= 1) ASSERT(m_runOffset == 0); else { - Node* n = r->startContainer(); - ASSERT(n == r->endContainer()); + Node& node = r->startContainer(); + ASSERT(&node == &r->endContainer()); int offset = r->endOffset() - m_runOffset; - r->setStart(n, offset - 1, ASSERT_NO_EXCEPTION); - r->setEnd(n, offset, ASSERT_NO_EXCEPTION); + r->setStart(node, offset - 1); + r->setEnd(node, offset); } } - return r.release(); + return r; } void BackwardsCharacterIterator::advance(int count) @@ -1547,7 +1627,7 @@ void BackwardsCharacterIterator::advance(int count) m_atBreak = false; - int remaining = m_textIterator.length() - m_runOffset; + int remaining = m_underlyingIterator.text().length() - m_runOffset; if (count < remaining) { m_runOffset += count; m_offset += count; @@ -1557,8 +1637,8 @@ void BackwardsCharacterIterator::advance(int count) count -= remaining; m_offset += remaining; - for (m_textIterator.advance(); !atEnd(); m_textIterator.advance()) { - int runLength = m_textIterator.length(); + for (m_underlyingIterator.advance(); !atEnd(); m_underlyingIterator.advance()) { + int runLength = m_underlyingIterator.text().length(); if (runLength == 0) m_atBreak = true; else { @@ -1579,18 +1659,13 @@ void BackwardsCharacterIterator::advance(int count) // -------- -WordAwareIterator::WordAwareIterator(const Range* r) - : m_previousText(0) +WordAwareIterator::WordAwareIterator(const Range& range) + : m_underlyingIterator(&range) , m_didLookAhead(true) // so we consider the first chunk from the text iterator - , m_textIterator(r) { advance(); // get in position over the first chunk of text } -WordAwareIterator::~WordAwareIterator() -{ -} - // We're always in one of these modes: // - The current chunk in the text iterator is our current chunk // (typically its a piece of whitespace, or text that ended with whitespace) @@ -1602,74 +1677,59 @@ WordAwareIterator::~WordAwareIterator() void WordAwareIterator::advance() { - m_previousText = 0; - m_buffer.clear(); // toss any old buffer we built up + m_previousText.reset(); + m_buffer.clear(); // If last time we did a look-ahead, start with that looked-ahead chunk now if (!m_didLookAhead) { - ASSERT(!m_textIterator.atEnd()); - m_textIterator.advance(); + ASSERT(!m_underlyingIterator.atEnd()); + m_underlyingIterator.advance(); } m_didLookAhead = false; // Go to next non-empty chunk - while (!m_textIterator.atEnd() && m_textIterator.length() == 0) - m_textIterator.advance(); - m_range = m_textIterator.range(); - - if (m_textIterator.atEnd()) + while (!m_underlyingIterator.atEnd() && !m_underlyingIterator.text().length()) + m_underlyingIterator.advance(); + if (m_underlyingIterator.atEnd()) return; - + while (1) { // If this chunk ends in whitespace we can just use it as our chunk. - if (isSpaceOrNewline(m_textIterator.characters()[m_textIterator.length() - 1])) + if (isSpaceOrNewline(m_underlyingIterator.text()[m_underlyingIterator.text().length() - 1])) return; // If this is the first chunk that failed, save it in previousText before look ahead - if (m_buffer.isEmpty()) { - m_previousText = m_textIterator.characters(); - m_previousLength = m_textIterator.length(); - } + if (m_buffer.isEmpty()) + m_previousText = m_underlyingIterator.copyableText(); - // Look ahead to next chunk. If it is whitespace or a break, we can use the previous stuff - m_textIterator.advance(); - if (m_textIterator.atEnd() || m_textIterator.length() == 0 || isSpaceOrNewline(m_textIterator.characters()[0])) { + // Look ahead to next chunk. If it is whitespace or a break, we can use the previous stuff + m_underlyingIterator.advance(); + if (m_underlyingIterator.atEnd() || !m_underlyingIterator.text().length() || isSpaceOrNewline(m_underlyingIterator.text()[0])) { m_didLookAhead = true; return; } if (m_buffer.isEmpty()) { // Start gobbling chunks until we get to a suitable stopping point - m_buffer.append(m_previousText, m_previousLength); - m_previousText = 0; + append(m_buffer, m_previousText.text()); + m_previousText.reset(); } - m_buffer.append(m_textIterator.characters(), m_textIterator.length()); - int exception = 0; - m_range->setEnd(m_textIterator.range()->endContainer(), m_textIterator.range()->endOffset(), exception); + append(m_buffer, m_underlyingIterator.text()); } } -int WordAwareIterator::length() const +StringView WordAwareIterator::text() const { if (!m_buffer.isEmpty()) - return m_buffer.size(); - if (m_previousText) - return m_previousLength; - return m_textIterator.length(); -} - -const UChar* WordAwareIterator::characters() const -{ - if (!m_buffer.isEmpty()) - return m_buffer.data(); - if (m_previousText) - return m_previousText; - return m_textIterator.characters(); + return StringView(m_buffer.data(), m_buffer.size()); + if (m_previousText.text().length()) + return m_previousText.text(); + return m_underlyingIterator.text(); } // -------- -static inline UChar foldQuoteMarkOrSoftHyphen(UChar c) +static inline UChar foldQuoteMark(UChar c) { switch (c) { case hebrewPunctuationGershayim: @@ -1680,37 +1740,29 @@ static inline UChar foldQuoteMarkOrSoftHyphen(UChar c) case leftSingleQuotationMark: case rightSingleQuotationMark: return '\''; - case softHyphen: - // Replace soft hyphen with an ignorable character so that their presence or absence will - // not affect string comparison. - return 0; default: return c; } } -static inline void foldQuoteMarksAndSoftHyphens(String& s) +// FIXME: We'd like to tailor the searcher to fold quote marks for us instead +// of doing it in a separate replacement pass here, but ICU doesn't offer a way +// to add tailoring on top of the locale-specific tailoring as of this writing. +static inline String foldQuoteMarks(String string) { - s.replace(hebrewPunctuationGeresh, '\''); - s.replace(hebrewPunctuationGershayim, '"'); - s.replace(leftDoubleQuotationMark, '"'); - s.replace(leftSingleQuotationMark, '\''); - s.replace(rightDoubleQuotationMark, '"'); - s.replace(rightSingleQuotationMark, '\''); - // Replace soft hyphen with an ignorable character so that their presence or absence will - // not affect string comparison. - s.replace(softHyphen, 0); + string.replace(hebrewPunctuationGeresh, '\''); + string.replace(hebrewPunctuationGershayim, '"'); + string.replace(leftDoubleQuotationMark, '"'); + string.replace(leftSingleQuotationMark, '\''); + string.replace(rightDoubleQuotationMark, '"'); + string.replace(rightSingleQuotationMark, '\''); + + return string; } #if !UCONFIG_NO_COLLATION -static inline void foldQuoteMarksAndSoftHyphens(UChar* data, size_t length) -{ - for (size_t i = 0; i < length; ++i) - data[i] = foldQuoteMarkOrSoftHyphen(data[i]); -} - -static const size_t minimumSearchBufferSize = 8192; +const size_t minimumSearchBufferSize = 8192; #ifndef NDEBUG static bool searcherInUse; @@ -1922,7 +1974,9 @@ static inline bool isCombiningVoicedSoundMark(UChar character) static inline bool containsKanaLetters(const String& pattern) { - const UChar* characters = pattern.deprecatedCharacters(); + if (pattern.is8Bit()) + return false; + const UChar* characters = pattern.characters16(); unsigned length = pattern.length(); for (unsigned i = 0; i < length; ++i) { if (isKanaLetter(characters[i])) @@ -1987,7 +2041,8 @@ static inline bool isSeparator(UChar32 character) } inline SearchBuffer::SearchBuffer(const String& target, FindOptions options) - : m_target(target) + : m_target(foldQuoteMarks(target)) + , m_targetCharacters(StringView(m_target).upconvertedCharacters()) , m_options(options) , m_prefixLength(0) , m_atBreak(true) @@ -1996,18 +2051,13 @@ inline SearchBuffer::SearchBuffer(const String& target, FindOptions options) { ASSERT(!m_target.isEmpty()); - // FIXME: We'd like to tailor the searcher to fold quote marks for us instead - // of doing it in a separate replacement pass here, but ICU doesn't offer a way - // to add tailoring on top of the locale-specific tailoring as of this writing. - foldQuoteMarksAndSoftHyphens(m_target); - size_t targetLength = m_target.length(); m_buffer.reserveInitialCapacity(std::max(targetLength * 8, minimumSearchBufferSize)); m_overlap = m_buffer.capacity() / 4; if ((m_options & AtWordStarts) && targetLength) { UChar32 targetFirstCharacter; - U16_GET(m_target.deprecatedCharacters(), 0, 0, targetLength, targetFirstCharacter); + U16_GET(m_target, 0, 0u, targetLength, targetFirstCharacter); // Characters in the separator category never really occur at the beginning of a word, // so if the target begins with such a character, we just ignore the AtWordStart option. if (isSeparator(targetFirstCharacter)) { @@ -2044,12 +2094,12 @@ inline SearchBuffer::SearchBuffer(const String& target, FindOptions options) usearch_setAttribute(searcher, USEARCH_ELEMENT_COMPARISON, comparator, &status); ASSERT(status == U_ZERO_ERROR); - usearch_setPattern(searcher, m_target.deprecatedCharacters(), targetLength, &status); + usearch_setPattern(searcher, m_targetCharacters, targetLength, &status); ASSERT(status == U_ZERO_ERROR); // The kana workaround requires a normalized copy of the target string. if (m_targetRequiresKanaWorkaround) - normalizeCharacters(m_target.deprecatedCharacters(), m_target.length(), m_normalizedTarget); + normalizeCharacters(m_targetCharacters, targetLength, m_normalizedTarget); } inline SearchBuffer::~SearchBuffer() @@ -2064,9 +2114,9 @@ inline SearchBuffer::~SearchBuffer() unlockSearcher(); } -inline size_t SearchBuffer::append(const UChar* characters, size_t length) +inline size_t SearchBuffer::append(StringView text) { - ASSERT(length); + ASSERT(text.length()); if (m_atBreak) { m_buffer.shrink(0); @@ -2079,10 +2129,11 @@ inline size_t SearchBuffer::append(const UChar* characters, size_t length) } size_t oldLength = m_buffer.size(); - size_t usableLength = std::min(m_buffer.capacity() - oldLength, length); + size_t usableLength = std::min<size_t>(m_buffer.capacity() - oldLength, text.length()); ASSERT(usableLength); - m_buffer.append(characters, usableLength); - foldQuoteMarksAndSoftHyphens(m_buffer.data() + oldLength, usableLength); + m_buffer.grow(oldLength + usableLength); + for (unsigned i = 0; i < usableLength; ++i) + m_buffer[oldLength + i] = foldQuoteMark(text[i]); return usableLength; } @@ -2091,24 +2142,24 @@ inline bool SearchBuffer::needsMoreContext() const return m_needsMoreContext; } -inline void SearchBuffer::prependContext(const UChar* characters, size_t length) +inline void SearchBuffer::prependContext(StringView text) { ASSERT(m_needsMoreContext); ASSERT(m_prefixLength == m_buffer.size()); - if (!length) + if (!text.length()) return; m_atBreak = false; - size_t wordBoundaryContextStart = length; + size_t wordBoundaryContextStart = text.length(); if (wordBoundaryContextStart) { - U16_BACK_1(characters, 0, wordBoundaryContextStart); - wordBoundaryContextStart = startOfLastWordBoundaryContext(characters, wordBoundaryContextStart); + U16_BACK_1(text, 0, wordBoundaryContextStart); + wordBoundaryContextStart = startOfLastWordBoundaryContext(text.substring(0, wordBoundaryContextStart)); } - size_t usableLength = std::min(m_buffer.capacity() - m_prefixLength, length - wordBoundaryContextStart); - m_buffer.insert(0, characters + length - usableLength, usableLength); + size_t usableLength = std::min(m_buffer.capacity() - m_prefixLength, text.length() - wordBoundaryContextStart); + WTF::append(m_buffer, text.substring(text.length() - usableLength, usableLength)); m_prefixLength += usableLength; if (wordBoundaryContextStart || m_prefixLength == m_buffer.capacity()) @@ -2184,6 +2235,17 @@ inline bool SearchBuffer::isBadMatch(const UChar* match, size_t matchLength) con } } } + +inline bool SearchBuffer::isWordEndMatch(size_t start, size_t length) const +{ + ASSERT(length); + ASSERT(m_options & AtWordEnds); + + int endWord; + // Start searching at the end of matched search, so that multiple word matches succeed. + findEndWordBoundary(StringView(m_buffer.data(), m_buffer.size()), start + length - 1, &endWord); + return static_cast<size_t>(endWord) == (start + length); +} inline bool SearchBuffer::isWordStartMatch(size_t start, size_t length) const { @@ -2231,12 +2293,12 @@ inline bool SearchBuffer::isWordStartMatch(size_t start, size_t length) const // Chinese and Japanese lack word boundary marks, and there is no clear agreement on what constitutes // a word, so treat the position before any CJK character as a word start. - if (Font::isCJKIdeographOrSymbol(firstCharacter)) + if (FontCascade::isCJKIdeographOrSymbol(firstCharacter)) return true; size_t wordBreakSearchStart = start + length; while (wordBreakSearchStart > start) - wordBreakSearchStart = findNextWordFromIndex(m_buffer.data(), m_buffer.size(), wordBreakSearchStart, false /* backwards */); + wordBreakSearchStart = findNextWordFromIndex(StringView(m_buffer.data(), m_buffer.size()), wordBreakSearchStart, false /* backwards */); return wordBreakSearchStart == start; } @@ -2277,9 +2339,9 @@ nextMatch: if (m_options & AtWordStarts) { // Ensure that there is sufficient context before matchStart the next time around for // determining if it is at a word boundary. - int wordBoundaryContextStart = matchStart; + unsigned wordBoundaryContextStart = matchStart; U16_BACK_1(m_buffer.data(), 0, wordBoundaryContextStart); - wordBoundaryContextStart = startOfLastWordBoundaryContext(m_buffer.data(), wordBoundaryContextStart); + wordBoundaryContextStart = startOfLastWordBoundaryContext(StringView(m_buffer.data(), wordBoundaryContextStart)); overlap = std::min(size - 1, std::max(overlap, size - wordBoundaryContextStart)); } memcpy(m_buffer.data(), m_buffer.data() + size - overlap, overlap * sizeof(UChar)); @@ -2292,7 +2354,9 @@ nextMatch: ASSERT_WITH_SECURITY_IMPLICATION(matchStart + matchedLength <= size); // If this match is "bad", move on to the next match. - if (isBadMatch(m_buffer.data() + matchStart, matchedLength) || ((m_options & AtWordStarts) && !isWordStartMatch(matchStart, matchedLength))) { + if (isBadMatch(m_buffer.data() + matchStart, matchedLength) + || ((m_options & AtWordStarts) && !isWordStartMatch(matchStart, matchedLength)) + || ((m_options & AtWordEnds) && !isWordEndMatch(matchStart, matchedLength))) { matchStart = usearch_next(searcher, &status); ASSERT(status == U_ZERO_ERROR); goto nextMatch; @@ -2319,7 +2383,7 @@ inline SearchBuffer::SearchBuffer(const String& target, FindOptions options) { ASSERT(!m_target.isEmpty()); m_target.replace(noBreakSpace, ' '); - foldQuoteMarksAndSoftHyphens(m_target); + foldQuoteMarks(m_target); } inline SearchBuffer::~SearchBuffer() @@ -2339,7 +2403,7 @@ inline bool SearchBuffer::atBreak() const inline void SearchBuffer::append(UChar c, bool isStart) { - m_buffer[m_cursor] = c == noBreakSpace ? ' ' : foldQuoteMarkOrSoftHyphen(c); + m_buffer[m_cursor] = c == noBreakSpace ? ' ' : foldQuoteMark(c); m_isCharacterStartBuffer[m_cursor] = isStart; if (++m_cursor == m_target.length()) { m_cursor = 0; @@ -2421,109 +2485,108 @@ size_t SearchBuffer::length() const // -------- -int TextIterator::rangeLength(const Range* r, bool forSelectionPreservation) +int TextIterator::rangeLength(const Range* range, bool forSelectionPreservation) { - int length = 0; - for (TextIterator it(r, forSelectionPreservation ? TextIteratorEmitsCharactersBetweenAllVisiblePositions : TextIteratorDefaultBehavior); !it.atEnd(); it.advance()) - length += it.length(); - + unsigned length = 0; + for (TextIterator it(range, forSelectionPreservation ? TextIteratorEmitsCharactersBetweenAllVisiblePositions : TextIteratorDefaultBehavior); !it.atEnd(); it.advance()) + length += it.text().length(); return length; } -PassRefPtr<Range> TextIterator::subrange(Range* entireRange, int characterOffset, int characterCount) +Ref<Range> TextIterator::subrange(Range* entireRange, int characterOffset, int characterCount) +{ + CharacterIterator entireRangeIterator(*entireRange); + return characterSubrange(entireRange->ownerDocument(), entireRangeIterator, characterOffset, characterCount); +} + +static inline bool isInsideReplacedElement(TextIterator& iterator) { - CharacterIterator entireRangeIterator(entireRange); - return characterSubrange(entireRangeIterator, characterOffset, characterCount); + ASSERT(!iterator.atEnd()); + ASSERT(iterator.text().length() == 1); + Node* node = iterator.node(); + return node && isRendererReplacedElement(node->renderer()); } -PassRefPtr<Range> TextIterator::rangeFromLocationAndLength(ContainerNode* scope, int rangeLocation, int rangeLength, bool forSelectionPreservation) +RefPtr<Range> TextIterator::rangeFromLocationAndLength(ContainerNode* scope, int rangeLocation, int rangeLength, bool forSelectionPreservation) { - RefPtr<Range> resultRange = scope->document().createRange(); + Ref<Range> resultRange = scope->document().createRange(); int docTextPosition = 0; int rangeEnd = rangeLocation + rangeLength; bool startRangeFound = false; - RefPtr<Range> textRunRange; + Ref<Range> textRunRange = rangeOfContents(*scope); - TextIterator it(rangeOfContents(*scope).get(), forSelectionPreservation ? TextIteratorEmitsCharactersBetweenAllVisiblePositions : TextIteratorDefaultBehavior); + TextIterator it(textRunRange.ptr(), forSelectionPreservation ? TextIteratorEmitsCharactersBetweenAllVisiblePositions : TextIteratorDefaultBehavior); // FIXME: the atEnd() check shouldn't be necessary, workaround for <http://bugs.webkit.org/show_bug.cgi?id=6289>. - if (rangeLocation == 0 && rangeLength == 0 && it.atEnd()) { - textRunRange = it.range(); - - resultRange->setStart(textRunRange->startContainer(), 0, ASSERT_NO_EXCEPTION); - resultRange->setEnd(textRunRange->startContainer(), 0, ASSERT_NO_EXCEPTION); - - return resultRange.release(); + if (!rangeLocation && !rangeLength && it.atEnd()) { + resultRange->setStart(textRunRange->startContainer(), 0); + resultRange->setEnd(textRunRange->startContainer(), 0); + return WTFMove(resultRange); } for (; !it.atEnd(); it.advance()) { - int len = it.length(); + int length = it.text().length(); textRunRange = it.range(); - - bool foundStart = rangeLocation >= docTextPosition && rangeLocation <= docTextPosition + len; - bool foundEnd = rangeEnd >= docTextPosition && rangeEnd <= docTextPosition + len; - - // Fix textRunRange->endPosition(), but only if foundStart || foundEnd, because it is only - // in those cases that textRunRange is used. + + bool foundStart = rangeLocation >= docTextPosition && rangeLocation <= docTextPosition + length; + bool foundEnd = rangeEnd >= docTextPosition && rangeEnd <= docTextPosition + length; + if (foundEnd) { // FIXME: This is a workaround for the fact that the end of a run is often at the wrong - // position for emitted '\n's. - if (len == 1 && it.characterAt(0) == '\n') { + // position for emitted '\n's or if the renderer of the current node is a replaced element. + if (length == 1 && (it.text()[0] == '\n' || isInsideReplacedElement(it))) { it.advance(); if (!it.atEnd()) { - RefPtr<Range> range = it.range(); - textRunRange->setEnd(range->startContainer(), range->startOffset(), ASSERT_NO_EXCEPTION); + Ref<Range> range = it.range(); + textRunRange->setEnd(range->startContainer(), range->startOffset()); } else { Position runStart = textRunRange->startPosition(); Position runEnd = VisiblePosition(runStart).next().deepEquivalent(); if (runEnd.isNotNull()) - textRunRange->setEnd(runEnd.containerNode(), runEnd.computeOffsetInContainerNode(), ASSERT_NO_EXCEPTION); + textRunRange->setEnd(*runEnd.containerNode(), runEnd.computeOffsetInContainerNode()); } } } if (foundStart) { startRangeFound = true; - int exception = 0; - if (textRunRange->startContainer()->isTextNode()) { + if (textRunRange->startContainer().isTextNode()) { int offset = rangeLocation - docTextPosition; - resultRange->setStart(textRunRange->startContainer(), offset + textRunRange->startOffset(), exception); + resultRange->setStart(textRunRange->startContainer(), offset + textRunRange->startOffset()); } else { if (rangeLocation == docTextPosition) - resultRange->setStart(textRunRange->startContainer(), textRunRange->startOffset(), exception); + resultRange->setStart(textRunRange->startContainer(), textRunRange->startOffset()); else - resultRange->setStart(textRunRange->endContainer(), textRunRange->endOffset(), exception); + resultRange->setStart(textRunRange->endContainer(), textRunRange->endOffset()); } } if (foundEnd) { - int exception = 0; - if (textRunRange->startContainer()->isTextNode()) { + if (textRunRange->startContainer().isTextNode()) { int offset = rangeEnd - docTextPosition; - resultRange->setEnd(textRunRange->startContainer(), offset + textRunRange->startOffset(), exception); + resultRange->setEnd(textRunRange->startContainer(), offset + textRunRange->startOffset()); } else { if (rangeEnd == docTextPosition) - resultRange->setEnd(textRunRange->startContainer(), textRunRange->startOffset(), exception); + resultRange->setEnd(textRunRange->startContainer(), textRunRange->startOffset()); else - resultRange->setEnd(textRunRange->endContainer(), textRunRange->endOffset(), exception); + resultRange->setEnd(textRunRange->endContainer(), textRunRange->endOffset()); } - docTextPosition += len; + docTextPosition += length; break; } - docTextPosition += len; + + docTextPosition += length; } if (!startRangeFound) - return 0; + return nullptr; - if (rangeLength != 0 && rangeEnd > docTextPosition) { // rangeEnd is out of bounds - int exception = 0; - resultRange->setEnd(textRunRange->endContainer(), textRunRange->endOffset(), exception); - } + if (rangeLength && rangeEnd > docTextPosition) // rangeEnd is out of bounds + resultRange->setEnd(textRunRange->endContainer(), textRunRange->endOffset()); - return resultRange.release(); + return WTFMove(resultRange); } bool TextIterator::getLocationAndLengthFromRange(Node* scope, const Range* range, size_t& location, size_t& length) @@ -2531,26 +2594,23 @@ bool TextIterator::getLocationAndLengthFromRange(Node* scope, const Range* range location = notFound; length = 0; - if (!range->startContainer()) - return false; - // The critical assumption is that this only gets called with ranges that // concentrate on a given area containing the selection root. This is done // because of text fields and textareas. The DOM for those is not // directly in the document DOM, so ensure that the range does not cross a // boundary of one of those. - if (range->startContainer() != scope && !range->startContainer()->isDescendantOf(scope)) + if (&range->startContainer() != scope && !range->startContainer().isDescendantOf(scope)) return false; - if (range->endContainer() != scope && !range->endContainer()->isDescendantOf(scope)) + if (&range->endContainer() != scope && !range->endContainer().isDescendantOf(scope)) return false; - RefPtr<Range> testRange = Range::create(scope->document(), scope, 0, range->startContainer(), range->startOffset()); - ASSERT(testRange->startContainer() == scope); - location = TextIterator::rangeLength(testRange.get()); + Ref<Range> testRange = Range::create(scope->document(), scope, 0, &range->startContainer(), range->startOffset()); + ASSERT(&testRange->startContainer() == scope); + location = TextIterator::rangeLength(testRange.ptr()); - testRange->setEnd(range->endContainer(), range->endOffset(), IGNORE_EXCEPTION); - ASSERT(testRange->startContainer() == scope); - length = TextIterator::rangeLength(testRange.get()) - location; + testRange->setEnd(range->endContainer(), range->endOffset()); + ASSERT(&testRange->startContainer() == scope); + length = TextIterator::rangeLength(testRange.ptr()) - location; return true; } @@ -2570,7 +2630,7 @@ String plainText(const Range* r, TextIteratorBehavior defaultBehavior, bool isDi for (TextIterator it(r, behavior); !it.atEnd(); it.advance()) { it.appendTextToStringBuilder(builder); - bufferLength += it.length(); + bufferLength += it.text().length(); } if (!bufferLength) @@ -2584,71 +2644,101 @@ String plainText(const Range* r, TextIteratorBehavior defaultBehavior, bool isDi return result; } -static PassRefPtr<Range> collapsedToBoundary(const Range* range, bool forward) +String plainTextReplacingNoBreakSpace(const Range* range, TextIteratorBehavior defaultBehavior, bool isDisplayString) { - RefPtr<Range> result = range->cloneRange(ASSERT_NO_EXCEPTION); - result->collapse(!forward, ASSERT_NO_EXCEPTION); - return result.release(); + return plainText(range, defaultBehavior, isDisplayString).replace(noBreakSpace, ' '); } -static size_t findPlainText(CharacterIterator& it, const String& target, FindOptions options, size_t& matchStart) +static Ref<Range> collapsedToBoundary(const Range& range, bool forward) { - matchStart = 0; - size_t matchLength = 0; + Ref<Range> result = range.cloneRange(); + result->collapse(!forward); + return result; +} - SearchBuffer buffer(target, options); +static TextIteratorBehavior findIteratorOptions(FindOptions options) +{ + TextIteratorBehavior iteratorOptions = TextIteratorEntersTextControls | TextIteratorClipsToFrameAncestors; + if (!(options & DoNotTraverseFlatTree)) + iteratorOptions |= TextIteratorTraversesFlatTree; + return iteratorOptions; +} +static void findPlainTextMatches(const Range& range, const String& target, FindOptions options, const std::function<bool(size_t, size_t)>& match) +{ + SearchBuffer buffer(target, options); if (buffer.needsMoreContext()) { - RefPtr<Range> startRange = it.range(); - RefPtr<Range> beforeStartRange = startRange->ownerDocument().createRange(); - beforeStartRange->setEnd(startRange->startContainer(), startRange->startOffset(), IGNORE_EXCEPTION); + Ref<Range> beforeStartRange = range.ownerDocument().createRange(); + beforeStartRange->setEnd(range.startContainer(), range.startOffset()); for (SimplifiedBackwardsTextIterator backwardsIterator(beforeStartRange.get()); !backwardsIterator.atEnd(); backwardsIterator.advance()) { - buffer.prependContext(backwardsIterator.characters(), backwardsIterator.length()); + buffer.prependContext(backwardsIterator.text()); if (!buffer.needsMoreContext()) break; } } - while (!it.atEnd()) { - it.advance(buffer.append(it.characters(), it.length())); -tryAgain: - size_t matchStartOffset; - if (size_t newMatchLength = buffer.search(matchStartOffset)) { - // Note that we found a match, and where we found it. - size_t lastCharacterInBufferOffset = it.characterOffset(); - ASSERT(lastCharacterInBufferOffset >= matchStartOffset); - matchStart = lastCharacterInBufferOffset - matchStartOffset; - matchLength = newMatchLength; - // If searching forward, stop on the first match. - // If searching backward, don't stop, so we end up with the last match. - if (!(options & Backwards)) + CharacterIterator findIterator(range, findIteratorOptions(options)); + while (!findIterator.atEnd()) { + findIterator.advance(buffer.append(findIterator.text())); + while (1) { + size_t matchStartOffset; + size_t newMatchLength = buffer.search(matchStartOffset); + if (!newMatchLength) { + if (findIterator.atBreak() && !buffer.atBreak()) { + buffer.reachedBreak(); + continue; + } break; - goto tryAgain; - } - if (it.atBreak() && !buffer.atBreak()) { - buffer.reachedBreak(); - goto tryAgain; + } + size_t lastCharacterInBufferOffset = findIterator.characterOffset(); + ASSERT(lastCharacterInBufferOffset >= matchStartOffset); + if (match(lastCharacterInBufferOffset - matchStartOffset, newMatchLength)) + return; } } +} - return matchLength; +static Ref<Range> rangeForMatch(const Range& range, FindOptions options, size_t matchStart, size_t matchLength, bool searchForward) +{ + if (!matchLength) + return collapsedToBoundary(range, searchForward); + CharacterIterator rangeComputeIterator(range, findIteratorOptions(options)); + return characterSubrange(range.ownerDocument(), rangeComputeIterator, matchStart, matchLength); } -PassRefPtr<Range> findPlainText(const Range* range, const String& target, FindOptions options) +Ref<Range> findClosestPlainText(const Range& range, const String& target, FindOptions options, unsigned targetOffset) { - // First, find the text. - size_t matchStart; - size_t matchLength; - { - CharacterIterator findIterator(range, TextIteratorEntersTextControls); - matchLength = findPlainText(findIterator, target, options, matchStart); - if (!matchLength) - return collapsedToBoundary(range, !(options & Backwards)); - } + size_t matchStart = 0; + size_t matchLength = 0; + size_t distance = std::numeric_limits<size_t>::max(); + auto match = [targetOffset, &distance, &matchStart, &matchLength] (size_t start, size_t length) { + size_t newDistance = std::min(abs(static_cast<signed>(start - targetOffset)), abs(static_cast<signed>(start + length - targetOffset))); + if (newDistance < distance) { + matchStart = start; + matchLength = length; + distance = newDistance; + } + return false; + }; + + findPlainTextMatches(range, target, options, match); + return rangeForMatch(range, options, matchStart, matchLength, !(options & Backwards)); +} + +Ref<Range> findPlainText(const Range& range, const String& target, FindOptions options) +{ + bool searchForward = !(options & Backwards); + size_t matchStart = 0; + size_t matchLength = 0; + auto match = [searchForward, &matchStart, &matchLength] (size_t start, size_t length) { + matchStart = start; + matchLength = length; + // Look for the last match when searching backwards instead. + return searchForward; + }; - // Then, find the document position of the start and the end of the text. - CharacterIterator computeRangeIterator(range, TextIteratorEntersTextControls); - return characterSubrange(computeRangeIterator, matchStart, matchLength); + findPlainTextMatches(range, target, options, match); + return rangeForMatch(range, options, matchStart, matchLength, searchForward); } } |