diff options
author | Lorry Tar Creator <lorry-tar-importer@lorry> | 2017-06-27 06:07:23 +0000 |
---|---|---|
committer | Lorry Tar Creator <lorry-tar-importer@lorry> | 2017-06-27 06:07:23 +0000 |
commit | 1bf1084f2b10c3b47fd1a588d85d21ed0eb41d0c (patch) | |
tree | 46dcd36c86e7fbc6e5df36deb463b33e9967a6f7 /Source/WebCore/platform/text | |
parent | 32761a6cee1d0dee366b885b7b9c777e67885688 (diff) | |
download | WebKitGtk-tarball-master.tar.gz |
webkitgtk-2.16.5HEADwebkitgtk-2.16.5master
Diffstat (limited to 'Source/WebCore/platform/text')
67 files changed, 3128 insertions, 3432 deletions
diff --git a/Source/WebCore/platform/text/BidiContext.cpp b/Source/WebCore/platform/text/BidiContext.cpp index db126c991..c9f0f813b 100644 --- a/Source/WebCore/platform/text/BidiContext.cpp +++ b/Source/WebCore/platform/text/BidiContext.cpp @@ -1,6 +1,6 @@ /* * Copyright (C) 2000 Lars Knoll (knoll@kde.org) - * Copyright (C) 2003, 2004, 2006, 2007, 2009, 2010 Apple Inc. All right reserved. + * Copyright (C) 2003-2017 Apple Inc. All right reserved. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public @@ -21,6 +21,7 @@ #include "config.h" #include "BidiContext.h" + #include <wtf/Vector.h> namespace WebCore { @@ -32,12 +33,21 @@ struct SameSizeAsBidiContext : public RefCounted<SameSizeAsBidiContext> { COMPILE_ASSERT(sizeof(BidiContext) == sizeof(SameSizeAsBidiContext), BidiContext_should_stay_small); -inline PassRefPtr<BidiContext> BidiContext::createUncached(unsigned char level, UCharDirection direction, bool override, BidiEmbeddingSource source, BidiContext* parent) +inline BidiContext::BidiContext(unsigned char level, UCharDirection direction, bool override, BidiEmbeddingSource source, BidiContext* parent) + : m_level(level) + , m_direction(direction) + , m_override(override) + , m_source(source) + , m_parent(parent) +{ +} + +inline Ref<BidiContext> BidiContext::createUncached(unsigned char level, UCharDirection direction, bool override, BidiEmbeddingSource source, BidiContext* parent) { - return adoptRef(new BidiContext(level, direction, override, source, parent)); + return adoptRef(*new BidiContext(level, direction, override, source, parent)); } -PassRefPtr<BidiContext> BidiContext::create(unsigned char level, UCharDirection direction, bool override, BidiEmbeddingSource source, BidiContext* parent) +Ref<BidiContext> BidiContext::create(unsigned char level, UCharDirection direction, bool override, BidiEmbeddingSource source, BidiContext* parent) { ASSERT(direction == (level % 2 ? U_RIGHT_TO_LEFT : U_LEFT_TO_RIGHT)); @@ -47,52 +57,48 @@ PassRefPtr<BidiContext> BidiContext::create(unsigned char level, UCharDirection ASSERT(level <= 1); if (!level) { if (!override) { - static BidiContext* ltrContext = createUncached(0, U_LEFT_TO_RIGHT, false, FromStyleOrDOM, 0).leakRef(); + static BidiContext& ltrContext = createUncached(0, U_LEFT_TO_RIGHT, false, FromStyleOrDOM, 0).leakRef(); return ltrContext; } - static BidiContext* ltrOverrideContext = createUncached(0, U_LEFT_TO_RIGHT, true, FromStyleOrDOM, 0).leakRef(); + static BidiContext& ltrOverrideContext = createUncached(0, U_LEFT_TO_RIGHT, true, FromStyleOrDOM, 0).leakRef(); return ltrOverrideContext; } if (!override) { - static BidiContext* rtlContext = createUncached(1, U_RIGHT_TO_LEFT, false, FromStyleOrDOM, 0).leakRef(); + static BidiContext& rtlContext = createUncached(1, U_RIGHT_TO_LEFT, false, FromStyleOrDOM, 0).leakRef(); return rtlContext; } - static BidiContext* rtlOverrideContext = createUncached(1, U_RIGHT_TO_LEFT, true, FromStyleOrDOM, 0).leakRef(); + static BidiContext& rtlOverrideContext = createUncached(1, U_RIGHT_TO_LEFT, true, FromStyleOrDOM, 0).leakRef(); return rtlOverrideContext; } -static inline PassRefPtr<BidiContext> copyContextAndRebaselineLevel(BidiContext* context, BidiContext* parent) +static inline Ref<BidiContext> copyContextAndRebaselineLevel(BidiContext& context, BidiContext* parent) { - ASSERT(context); - unsigned char newLevel = parent ? parent->level() : 0; - if (context->dir() == U_RIGHT_TO_LEFT) + auto newLevel = parent ? parent->level() : 0; + if (context.dir() == U_RIGHT_TO_LEFT) newLevel = nextGreaterOddLevel(newLevel); else if (parent) newLevel = nextGreaterEvenLevel(newLevel); - - return BidiContext::create(newLevel, context->dir(), context->override(), context->source(), parent); + return BidiContext::create(newLevel, context.dir(), context.override(), context.source(), parent); } // The BidiContext stack must be immutable -- they're re-used for re-layout after -// DOM modification/editing -- so we copy all the non-unicode contexts, and +// DOM modification/editing -- so we copy all the non-Unicode contexts, and // recalculate their levels. -PassRefPtr<BidiContext> BidiContext::copyStackRemovingUnicodeEmbeddingContexts() +Ref<BidiContext> BidiContext::copyStackRemovingUnicodeEmbeddingContexts() { Vector<BidiContext*, 64> contexts; - for (BidiContext* iter = this; iter; iter = iter->parent()) { - if (iter->source() != FromUnicode) - contexts.append(iter); + for (auto* ancestor = this; ancestor; ancestor = ancestor->parent()) { + if (ancestor->source() != FromUnicode) + contexts.append(ancestor); } ASSERT(contexts.size()); - - RefPtr<BidiContext> topContext = copyContextAndRebaselineLevel(contexts.last(), 0); - for (int i = contexts.size() - 1; i > 0; --i) - topContext = copyContextAndRebaselineLevel(contexts[i - 1], topContext.get()); - - return topContext.release(); + auto topContext = copyContextAndRebaselineLevel(*contexts.last(), nullptr); + for (unsigned i = contexts.size() - 1; i; --i) + topContext = copyContextAndRebaselineLevel(*contexts[i - 1], topContext.ptr()); + return topContext; } bool operator==(const BidiContext& c1, const BidiContext& c2) diff --git a/Source/WebCore/platform/text/BidiContext.h b/Source/WebCore/platform/text/BidiContext.h index 371983055..d4c4e5708 100644 --- a/Source/WebCore/platform/text/BidiContext.h +++ b/Source/WebCore/platform/text/BidiContext.h @@ -1,6 +1,6 @@ /* * Copyright (C) 2000 Lars Knoll (knoll@kde.org) - * Copyright (C) 2003, 2004, 2006, 2007, 2009, 2010 Apple Inc. All right reserved. + * Copyright (C) 2003-2017 Apple Inc. All right reserved. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public @@ -19,26 +19,20 @@ * */ -#ifndef BidiContext_h -#define BidiContext_h +#pragma once -#include <wtf/Assertions.h> -#include <wtf/PassRefPtr.h> +#include <unicode/uchar.h> #include <wtf/RefCounted.h> #include <wtf/RefPtr.h> -#include <wtf/unicode/Unicode.h> namespace WebCore { -enum BidiEmbeddingSource { - FromStyleOrDOM, - FromUnicode -}; +enum BidiEmbeddingSource { FromStyleOrDOM, FromUnicode }; // Used to keep track of explicit embeddings. class BidiContext : public RefCounted<BidiContext> { public: - static PassRefPtr<BidiContext> create(unsigned char level, UCharDirection, bool override = false, BidiEmbeddingSource = FromStyleOrDOM, BidiContext* parent = 0); + WEBCORE_EXPORT static Ref<BidiContext> create(unsigned char level, UCharDirection, bool override = false, BidiEmbeddingSource = FromStyleOrDOM, BidiContext* parent = nullptr); BidiContext* parent() const { return m_parent.get(); } unsigned char level() const { return m_level; } @@ -46,18 +40,12 @@ public: bool override() const { return m_override; } BidiEmbeddingSource source() const { return static_cast<BidiEmbeddingSource>(m_source); } - PassRefPtr<BidiContext> copyStackRemovingUnicodeEmbeddingContexts(); + WEBCORE_EXPORT Ref<BidiContext> copyStackRemovingUnicodeEmbeddingContexts(); + private: - BidiContext(unsigned char level, UCharDirection direction, bool override, BidiEmbeddingSource source, BidiContext* parent) - : m_level(level) - , m_direction(direction) - , m_override(override) - , m_source(source) - , m_parent(parent) - { - } + BidiContext(unsigned char level, UCharDirection, bool override, BidiEmbeddingSource, BidiContext* parent); - static PassRefPtr<BidiContext> createUncached(unsigned char level, UCharDirection, bool override, BidiEmbeddingSource, BidiContext* parent); + static Ref<BidiContext> createUncached(unsigned char level, UCharDirection, bool override, BidiEmbeddingSource, BidiContext* parent); unsigned m_level : 6; // The maximium bidi level is 62: http://unicode.org/reports/tr9/#Explicit_Levels_and_Directions unsigned m_direction : 5; // Direction @@ -79,5 +67,3 @@ inline unsigned char nextGreaterEvenLevel(unsigned char level) bool operator==(const BidiContext&, const BidiContext&); } // namespace WebCore - -#endif // BidiContext_h diff --git a/Source/WebCore/platform/text/BidiResolver.h b/Source/WebCore/platform/text/BidiResolver.h index 9b13941c7..1b13ef3b7 100644 --- a/Source/WebCore/platform/text/BidiResolver.h +++ b/Source/WebCore/platform/text/BidiResolver.h @@ -1,6 +1,6 @@ /* * Copyright (C) 2000 Lars Knoll (knoll@kde.org) - * Copyright (C) 2003, 2004, 2006, 2007, 2008 Apple Inc. All right reserved. + * Copyright (C) 2003-2017 Apple Inc. All rights reserved. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public @@ -19,127 +19,103 @@ * */ -#ifndef BidiResolver_h -#define BidiResolver_h +#pragma once #include "BidiContext.h" #include "BidiRunList.h" -#include "TextDirection.h" +#include "WritingMode.h" +#include <wtf/HashMap.h> #include <wtf/Noncopyable.h> -#include <wtf/PassRefPtr.h> #include <wtf/Vector.h> namespace WebCore { class RenderObject; -template <class Iterator> class MidpointState { +template<typename Iterator> class WhitespaceCollapsingState { public: - MidpointState() - { - reset(); - } - void reset() { - m_numMidpoints = 0; - m_currentMidpoint = 0; - m_betweenMidpoints = false; + m_transitions.clear(); + m_currentTransition = 0; } - void startIgnoringSpaces(const Iterator& midpoint) + void startIgnoringSpaces(const Iterator& transition) { - ASSERT(!(m_numMidpoints % 2)); - addMidpoint(midpoint); + ASSERT(!(m_transitions.size() % 2)); + m_transitions.append(transition); } - void stopIgnoringSpaces(const Iterator& midpoint) + void stopIgnoringSpaces(const Iterator& transition) { - ASSERT(m_numMidpoints % 2); - addMidpoint(midpoint); + ASSERT(m_transitions.size() % 2); + m_transitions.append(transition); } // When ignoring spaces, this needs to be called for objects that need line boxes such as RenderInlines or // hard line breaks to ensure that they're not ignored. - void ensureLineBoxInsideIgnoredSpaces(RenderObject* renderer) + void ensureLineBoxInsideIgnoredSpaces(RenderObject& renderer) { - Iterator midpoint(0, renderer, 0); - stopIgnoringSpaces(midpoint); - startIgnoringSpaces(midpoint); + Iterator transition(0, &renderer, 0); + stopIgnoringSpaces(transition); + startIgnoringSpaces(transition); } - Vector<Iterator>& midpoints() { return m_midpoints; } - const unsigned& numMidpoints() const { return m_numMidpoints; } - const unsigned& currentMidpoint() const { return m_currentMidpoint; } - void incrementCurrentMidpoint() { ++m_currentMidpoint; } - void decreaseNumMidpoints() { --m_numMidpoints; } - const bool& betweenMidpoints() const { return m_betweenMidpoints; } - void setBetweenMidpoints(bool betweenMidpoint) { m_betweenMidpoints = betweenMidpoint; } -private: - // The goal is to reuse the line state across multiple - // lines so we just keep an array around for midpoints and never clear it across multiple - // lines. We track the number of items and position using the two other variables. - Vector<Iterator> m_midpoints; - unsigned m_numMidpoints; - unsigned m_currentMidpoint; - bool m_betweenMidpoints; - - void addMidpoint(const Iterator& midpoint) + void decrementTransitionAt(size_t index) { - if (m_midpoints.size() <= m_numMidpoints) - m_midpoints.grow(m_numMidpoints + 10); - - Iterator* midpointsIterator = m_midpoints.data(); - midpointsIterator[m_numMidpoints++] = midpoint; + m_transitions[index].fastDecrement(); } + + const Vector<Iterator>& transitions() { return m_transitions; } + size_t numTransitions() const { return m_transitions.size(); } + size_t currentTransition() const { return m_currentTransition; } + void setCurrentTransition(size_t currentTransition) { m_currentTransition = currentTransition; } + void incrementCurrentTransition() { ++m_currentTransition; } + void decrementNumTransitions() { m_transitions.shrink(m_transitions.size() - 1); } + bool betweenTransitions() const { return m_currentTransition % 2; } +private: + Vector<Iterator> m_transitions; + size_t m_currentTransition { 0 }; }; // The BidiStatus at a given position (typically the end of a line) can // be cached and then used to restart bidi resolution at that position. struct BidiStatus { - BidiStatus() - : eor(U_OTHER_NEUTRAL) - , lastStrong(U_OTHER_NEUTRAL) - , last(U_OTHER_NEUTRAL) - { - } + BidiStatus() = default; // Creates a BidiStatus representing a new paragraph root with a default direction. // Uses TextDirection as it only has two possibilities instead of UCharDirection which has at least 19. - BidiStatus(TextDirection textDirection, bool isOverride) + BidiStatus(TextDirection direction, bool isOverride) + : eor(direction == LTR ? U_LEFT_TO_RIGHT : U_RIGHT_TO_LEFT) + , lastStrong(eor) + , last(eor) + , context(BidiContext::create(direction == LTR ? 0 : 1, eor, isOverride)) { - UCharDirection direction = textDirection == LTR ? U_LEFT_TO_RIGHT : U_RIGHT_TO_LEFT; - eor = lastStrong = last = direction; - context = BidiContext::create(textDirection == LTR ? 0 : 1, direction, isOverride); } - BidiStatus(UCharDirection eorDir, UCharDirection lastStrongDir, UCharDirection lastDir, PassRefPtr<BidiContext> bidiContext) - : eor(eorDir) - , lastStrong(lastStrongDir) - , last(lastDir) - , context(bidiContext) + BidiStatus(UCharDirection eor, UCharDirection lastStrong, UCharDirection last, RefPtr<BidiContext>&& context) + : eor(eor) + , lastStrong(lastStrong) + , last(last) + , context(WTFMove(context)) { } - UCharDirection eor; - UCharDirection lastStrong; - UCharDirection last; + UCharDirection eor { U_OTHER_NEUTRAL }; + UCharDirection lastStrong { U_OTHER_NEUTRAL }; + UCharDirection last { U_OTHER_NEUTRAL }; RefPtr<BidiContext> context; }; -class BidiEmbedding { -public: +struct BidiEmbedding { BidiEmbedding(UCharDirection direction, BidiEmbeddingSource source) - : m_direction(direction) - , m_source(source) + : direction(direction) + , source(source) { } - UCharDirection direction() const { return m_direction; } - BidiEmbeddingSource source() const { return m_source; } -private: - UCharDirection m_direction; - BidiEmbeddingSource m_source; + UCharDirection direction; + BidiEmbeddingSource source; }; inline bool operator==(const BidiStatus& status1, const BidiStatus& status2) @@ -153,11 +129,12 @@ inline bool operator!=(const BidiStatus& status1, const BidiStatus& status2) } struct BidiCharacterRun { - BidiCharacterRun(int start, int stop, BidiContext* context, UCharDirection direction) - : m_override(context->override()) - , m_next(0) - , m_start(start) + WTF_MAKE_FAST_ALLOCATED; +public: + BidiCharacterRun(unsigned start, unsigned stop, BidiContext* context, UCharDirection direction) + : m_start(start) , m_stop(stop) + , m_override(context->override()) { if (direction == U_OTHER_NEUTRAL) direction = context->dir(); @@ -176,25 +153,33 @@ struct BidiCharacterRun { } } - int start() const { return m_start; } - int stop() const { return m_stop; } + ~BidiCharacterRun() + { + // Delete the linked list in a loop to prevent destructor recursion. + auto next = WTFMove(m_next); + while (next) + next = WTFMove(next->m_next); + } + + unsigned start() const { return m_start; } + unsigned stop() const { return m_stop; } unsigned char level() const { return m_level; } bool reversed(bool visuallyOrdered) { return m_level % 2 && !visuallyOrdered; } bool dirOverride(bool visuallyOrdered) { return m_override || visuallyOrdered; } - BidiCharacterRun* next() const { return m_next; } - void setNext(BidiCharacterRun* next) { m_next = next; } + BidiCharacterRun* next() const { return m_next.get(); } + std::unique_ptr<BidiCharacterRun> takeNext() { return WTFMove(m_next); } + void setNext(std::unique_ptr<BidiCharacterRun>&& next) { m_next = WTFMove(next); } + +private: + std::unique_ptr<BidiCharacterRun> m_next; - // Do not add anything apart from bitfields until after m_next. See https://bugs.webkit.org/show_bug.cgi?id=100173 +public: + unsigned m_start; + unsigned m_stop; + unsigned char m_level; bool m_override : 1; bool m_hasHyphen : 1; // Used by BidiRun subclass which is a layering violation but enables us to save 8 bytes per object on 64-bit. -#if ENABLE(CSS_SHAPES) - bool m_startsSegment : 1; // Same comment as m_hasHyphen. -#endif - unsigned char m_level; - BidiCharacterRun* m_next; - int m_start; - int m_stop; }; enum VisualDirectionOverride { @@ -205,21 +190,9 @@ enum VisualDirectionOverride { // BidiResolver is WebKit's implementation of the Unicode Bidi Algorithm // http://unicode.org/reports/tr9 -template <class Iterator, class Run> class BidiResolver { - WTF_MAKE_NONCOPYABLE(BidiResolver); +template<typename Iterator, typename Run, typename DerivedClass> class BidiResolverBase { + WTF_MAKE_NONCOPYABLE(BidiResolverBase); public: - BidiResolver() - : m_direction(U_OTHER_NEUTRAL) - , m_reachedEndOfLine(false) - , m_emptyRun(true) - , m_nestedIsolateCount(0) - { - } - -#ifndef NDEBUG - ~BidiResolver(); -#endif - const Iterator& position() const { return m_current; } void setPositionIgnoringNestedIsolates(const Iterator& position) { m_current = position; } void setPosition(const Iterator& position, unsigned nestedIsolatedCount) @@ -228,22 +201,22 @@ public: m_nestedIsolateCount = nestedIsolatedCount; } - void increment() { m_current.increment(); } + void increment() { static_cast<DerivedClass&>(*this).incrementInternal(); } BidiContext* context() const { return m_status.context.get(); } - void setContext(PassRefPtr<BidiContext> c) { m_status.context = c; } + void setContext(RefPtr<BidiContext>&& context) { m_status.context = WTFMove(context); } void setLastDir(UCharDirection lastDir) { m_status.last = lastDir; } void setLastStrongDir(UCharDirection lastStrongDir) { m_status.lastStrong = lastStrongDir; } void setEorDir(UCharDirection eorDir) { m_status.eor = eorDir; } UCharDirection dir() const { return m_direction; } - void setDir(UCharDirection d) { m_direction = d; } + void setDir(UCharDirection direction) { m_direction = direction; } const BidiStatus& status() const { return m_status; } - void setStatus(const BidiStatus s) { m_status = s; } + void setStatus(BidiStatus status) { m_status = status; } - MidpointState<Iterator>& midpointState() { return m_midpointState; } + WhitespaceCollapsingState<Iterator>& whitespaceCollapsingState() { return m_whitespaceCollapsingState; } // The current algorithm handles nested isolates one layer of nesting at a time. // But when we layout each isolated span, we will walk into (and ignore) all @@ -263,12 +236,15 @@ public: // It's unclear if this is still needed. void markCurrentRunEmpty() { m_emptyRun = true; } - Vector<Run*>& isolatedRuns() { return m_isolatedRuns; } + void setWhitespaceCollapsingTransitionForIsolatedRun(Run&, size_t); + unsigned whitespaceCollapsingTransitionForIsolatedRun(Run&); protected: + BidiResolverBase() = default; + // FIXME: Instead of InlineBidiResolvers subclassing this method, we should // pass in some sort of Traits object which knows how to create runs for appending. - void appendRun(); + void appendRun() { static_cast<DerivedClass&>(*this).appendRunInternal(); } Iterator m_current; // sor and eor are "start of run" and "end of run" respectively and correpond @@ -277,20 +253,20 @@ protected: Iterator m_eor; // Points to the last character in the current run. Iterator m_last; BidiStatus m_status; - UCharDirection m_direction; + UCharDirection m_direction { U_OTHER_NEUTRAL }; Iterator endOfLine; - bool m_reachedEndOfLine; + bool m_reachedEndOfLine { false }; Iterator m_lastBeforeET; // Before a U_EUROPEAN_NUMBER_TERMINATOR - bool m_emptyRun; + bool m_emptyRun { true }; // FIXME: This should not belong to the resolver, but rather be passed // into createBidiRunsForLine by the caller. BidiRunList<Run> m_runs; - MidpointState<Iterator> m_midpointState; + WhitespaceCollapsingState<Iterator> m_whitespaceCollapsingState; - unsigned m_nestedIsolateCount; - Vector<Run*> m_isolatedRuns; + unsigned m_nestedIsolateCount { 0 }; + HashMap<Run*, unsigned> m_whitespaceCollapsingTransitionForIsolatedRun; private: void raiseExplicitEmbeddingLevel(UCharDirection from, UCharDirection to); @@ -299,21 +275,38 @@ private: void updateStatusLastFromCurrentDirection(UCharDirection); void reorderRunsFromLevels(); + void incrementInternal() { m_current.increment(); } + void appendRunInternal(); Vector<BidiEmbedding, 8> m_currentExplicitEmbeddingSequence; }; -#ifndef NDEBUG -template <class Iterator, class Run> -BidiResolver<Iterator, Run>::~BidiResolver() +template<typename Iterator, typename Run> +class BidiResolver : public BidiResolverBase<Iterator, Run, BidiResolver<Iterator, Run>> { +}; + +template<typename Iterator, typename Run, typename IsolateRun> +class BidiResolverWithIsolate : public BidiResolverBase<Iterator, Run, BidiResolverWithIsolate<Iterator, Run, IsolateRun>> { +public: + ~BidiResolverWithIsolate(); + + void incrementInternal(); + void appendRunInternal(); + Vector<IsolateRun>& isolatedRuns() { return m_isolatedRuns; } + +private: + Vector<IsolateRun> m_isolatedRuns; +}; + +template<typename Iterator, typename Run, typename IsolateRun> +inline BidiResolverWithIsolate<Iterator, Run, IsolateRun>::~BidiResolverWithIsolate() { // The owner of this resolver should have handled the isolated runs. ASSERT(m_isolatedRuns.isEmpty()); } -#endif -template <class Iterator, class Run> -void BidiResolver<Iterator, Run>::appendRun() +template<typename Iterator, typename Run, typename DerivedClass> +void BidiResolverBase<Iterator, Run, DerivedClass>::appendRunInternal() { if (!m_emptyRun && !m_eor.atEnd()) { unsigned startOffset = m_sor.offset(); @@ -325,7 +318,7 @@ void BidiResolver<Iterator, Run>::appendRun() } if (endOffset >= startOffset) - m_runs.addRun(new Run(startOffset, endOffset + 1, context(), m_direction)); + m_runs.appendRun(std::make_unique<Run>(startOffset, endOffset + 1, context(), m_direction)); m_eor.increment(); m_sor = m_eor; @@ -335,8 +328,8 @@ void BidiResolver<Iterator, Run>::appendRun() m_status.eor = U_OTHER_NEUTRAL; } -template <class Iterator, class Run> -void BidiResolver<Iterator, Run>::embed(UCharDirection dir, BidiEmbeddingSource source) +template<typename Iterator, typename Run, typename DerivedClass> +void BidiResolverBase<Iterator, Run, DerivedClass>::embed(UCharDirection dir, BidiEmbeddingSource source) { // Isolated spans compute base directionality during their own UBA run. // Do not insert fake embed characters once we enter an isolated span. @@ -346,8 +339,8 @@ void BidiResolver<Iterator, Run>::embed(UCharDirection dir, BidiEmbeddingSource m_currentExplicitEmbeddingSequence.append(BidiEmbedding(dir, source)); } -template <class Iterator, class Run> -void BidiResolver<Iterator, Run>::checkDirectionInLowerRaiseEmbeddingLevel() +template<typename Iterator, typename Run, typename DerivedClass> +void BidiResolverBase<Iterator, Run, DerivedClass>::checkDirectionInLowerRaiseEmbeddingLevel() { ASSERT(m_status.eor != U_OTHER_NEUTRAL || m_eor.atEnd()); ASSERT(m_status.last != U_DIR_NON_SPACING_MARK @@ -361,8 +354,8 @@ void BidiResolver<Iterator, Run>::checkDirectionInLowerRaiseEmbeddingLevel() m_direction = m_status.lastStrong == U_LEFT_TO_RIGHT ? U_LEFT_TO_RIGHT : U_RIGHT_TO_LEFT; } -template <class Iterator, class Run> -void BidiResolver<Iterator, Run>::lowerExplicitEmbeddingLevel(UCharDirection from) +template<typename Iterator, typename Run, typename DerivedClass> +void BidiResolverBase<Iterator, Run, DerivedClass>::lowerExplicitEmbeddingLevel(UCharDirection from) { if (!m_emptyRun && m_eor != m_last) { checkDirectionInLowerRaiseEmbeddingLevel(); @@ -397,8 +390,8 @@ void BidiResolver<Iterator, Run>::lowerExplicitEmbeddingLevel(UCharDirection fro m_eor = Iterator(); } -template <class Iterator, class Run> -void BidiResolver<Iterator, Run>::raiseExplicitEmbeddingLevel(UCharDirection from, UCharDirection to) +template<typename Iterator, typename Run, typename DerivedClass> +void BidiResolverBase<Iterator, Run, DerivedClass>::raiseExplicitEmbeddingLevel(UCharDirection from, UCharDirection to) { if (!m_emptyRun && m_eor != m_last) { checkDirectionInLowerRaiseEmbeddingLevel(); @@ -434,51 +427,50 @@ void BidiResolver<Iterator, Run>::raiseExplicitEmbeddingLevel(UCharDirection fro m_eor = Iterator(); } -template <class Iterator, class Run> -bool BidiResolver<Iterator, Run>::commitExplicitEmbedding() +template<typename Iterator, typename Run, typename DerivedClass> +bool BidiResolverBase<Iterator, Run, DerivedClass>::commitExplicitEmbedding() { // When we're "inIsolate()" we're resolving the parent context which // ignores (skips over) the isolated content, including embedding levels. // We should never accrue embedding levels while skipping over isolated content. ASSERT(!inIsolate() || m_currentExplicitEmbeddingSequence.isEmpty()); - unsigned char fromLevel = context()->level(); + auto fromLevel = context()->level(); RefPtr<BidiContext> toContext = context(); - for (size_t i = 0; i < m_currentExplicitEmbeddingSequence.size(); ++i) { - BidiEmbedding embedding = m_currentExplicitEmbeddingSequence[i]; - if (embedding.direction() == U_POP_DIRECTIONAL_FORMAT) { - if (BidiContext* parentContext = toContext->parent()) + for (auto& embedding : m_currentExplicitEmbeddingSequence) { + if (embedding.direction == U_POP_DIRECTIONAL_FORMAT) { + if (auto* parentContext = toContext->parent()) toContext = parentContext; } else { - UCharDirection direction = (embedding.direction() == U_RIGHT_TO_LEFT_EMBEDDING || embedding.direction() == U_RIGHT_TO_LEFT_OVERRIDE) ? U_RIGHT_TO_LEFT : U_LEFT_TO_RIGHT; - bool override = embedding.direction() == U_LEFT_TO_RIGHT_OVERRIDE || embedding.direction() == U_RIGHT_TO_LEFT_OVERRIDE; + UCharDirection direction = (embedding.direction == U_RIGHT_TO_LEFT_EMBEDDING || embedding.direction == U_RIGHT_TO_LEFT_OVERRIDE) ? U_RIGHT_TO_LEFT : U_LEFT_TO_RIGHT; + bool override = embedding.direction == U_LEFT_TO_RIGHT_OVERRIDE || embedding.direction == U_RIGHT_TO_LEFT_OVERRIDE; unsigned char level = toContext->level(); if (direction == U_RIGHT_TO_LEFT) level = nextGreaterOddLevel(level); else level = nextGreaterEvenLevel(level); if (level < 61) - toContext = BidiContext::create(level, direction, override, embedding.source(), toContext.get()); + toContext = BidiContext::create(level, direction, override, embedding.source, toContext.get()); } } - unsigned char toLevel = toContext->level(); + auto toLevel = toContext->level(); if (toLevel > fromLevel) raiseExplicitEmbeddingLevel(fromLevel % 2 ? U_RIGHT_TO_LEFT : U_LEFT_TO_RIGHT, toLevel % 2 ? U_RIGHT_TO_LEFT : U_LEFT_TO_RIGHT); else if (toLevel < fromLevel) lowerExplicitEmbeddingLevel(fromLevel % 2 ? U_RIGHT_TO_LEFT : U_LEFT_TO_RIGHT); - setContext(toContext); + setContext(WTFMove(toContext)); m_currentExplicitEmbeddingSequence.clear(); return fromLevel != toLevel; } -template <class Iterator, class Run> -inline void BidiResolver<Iterator, Run>::updateStatusLastFromCurrentDirection(UCharDirection dirCurrent) +template<typename Iterator, typename Run, typename DerivedClass> +inline void BidiResolverBase<Iterator, Run, DerivedClass>::updateStatusLastFromCurrentDirection(UCharDirection dirCurrent) { switch (dirCurrent) { case U_EUROPEAN_NUMBER_TERMINATOR: @@ -518,8 +510,8 @@ inline void BidiResolver<Iterator, Run>::updateStatusLastFromCurrentDirection(UC } } -template <class Iterator, class Run> -inline void BidiResolver<Iterator, Run>::reorderRunsFromLevels() +template<typename Iterator, typename Run, typename DerivedClass> +inline void BidiResolverBase<Iterator, Run, DerivedClass>::reorderRunsFromLevels() { unsigned char levelLow = 128; unsigned char levelHigh = 0; @@ -555,8 +547,8 @@ inline void BidiResolver<Iterator, Run>::reorderRunsFromLevels() } } -template <class Iterator, class Run> -void BidiResolver<Iterator, Run>::createBidiRunsForLine(const Iterator& end, VisualDirectionOverride override, bool hardLineBreak) +template<typename Iterator, typename Run, typename DerivedClass> +void BidiResolverBase<Iterator, Run, DerivedClass>::createBidiRunsForLine(const Iterator& end, VisualDirectionOverride override, bool hardLineBreak) { ASSERT(m_direction == U_OTHER_NEUTRAL); @@ -582,7 +574,7 @@ void BidiResolver<Iterator, Run>::createBidiRunsForLine(const Iterator& end, Vis m_last = m_current; bool pastEnd = false; - BidiResolver<Iterator, Run> stateAtEnd; + BidiResolverBase<Iterator, Run, DerivedClass> stateAtEnd; while (true) { UCharDirection dirCurrent; @@ -617,9 +609,16 @@ void BidiResolver<Iterator, Run>::createBidiRunsForLine(const Iterator& end, Vis dirCurrent = m_status.last; } +#if PLATFORM(WIN) + // Our Windows build hasn't updated its headers from ICU 6.1, which doesn't have these symbols. + const UCharDirection U_FIRST_STRONG_ISOLATE = static_cast<UCharDirection>(19); + const UCharDirection U_LEFT_TO_RIGHT_ISOLATE = static_cast<UCharDirection>(20); + const UCharDirection U_RIGHT_TO_LEFT_ISOLATE = static_cast<UCharDirection>(21); + const UCharDirection U_POP_DIRECTIONAL_ISOLATE = static_cast<UCharDirection>(22); +#endif // We ignore all character directionality while in unicode-bidi: isolate spans. // We'll handle ordering the isolated characters in a second pass. - if (inIsolate()) + if (inIsolate() || dirCurrent == U_FIRST_STRONG_ISOLATE || dirCurrent == U_LEFT_TO_RIGHT_ISOLATE || dirCurrent == U_RIGHT_TO_LEFT_ISOLATE || dirCurrent == U_POP_DIRECTIONAL_ISOLATE) dirCurrent = U_OTHER_NEUTRAL; ASSERT(m_status.eor != U_OTHER_NEUTRAL || m_eor.atEnd()); @@ -955,6 +954,17 @@ void BidiResolver<Iterator, Run>::createBidiRunsForLine(const Iterator& end, Vis endOfLine = Iterator(); } -} // namespace WebCore +template<typename Iterator, typename Run, typename DerivedClass> +void BidiResolverBase<Iterator, Run, DerivedClass>::setWhitespaceCollapsingTransitionForIsolatedRun(Run& run, size_t transition) +{ + ASSERT(!m_whitespaceCollapsingTransitionForIsolatedRun.contains(&run)); + m_whitespaceCollapsingTransitionForIsolatedRun.add(&run, transition); +} -#endif // BidiResolver_h +template<typename Iterator, typename Run, typename DerivedClass> +unsigned BidiResolverBase<Iterator, Run, DerivedClass>::whitespaceCollapsingTransitionForIsolatedRun(Run& run) +{ + return m_whitespaceCollapsingTransitionForIsolatedRun.take(&run); +} + +} // namespace WebCore diff --git a/Source/WebCore/platform/text/BidiRunList.h b/Source/WebCore/platform/text/BidiRunList.h index fdbcc5681..66838a252 100644 --- a/Source/WebCore/platform/text/BidiRunList.h +++ b/Source/WebCore/platform/text/BidiRunList.h @@ -32,9 +32,8 @@ class BidiRunList { WTF_MAKE_NONCOPYABLE(BidiRunList); public: BidiRunList() - : m_firstRun(0) - , m_lastRun(0) - , m_logicallyLastRun(0) + : m_lastRun(nullptr) + , m_logicallyLastRun(nullptr) , m_runCount(0) { } @@ -42,18 +41,18 @@ public: // FIXME: Once BidiResolver no longer owns the BidiRunList, // then ~BidiRunList should call deleteRuns() automatically. - Run* firstRun() const { return m_firstRun; } + Run* firstRun() const { return m_firstRun.get(); } Run* lastRun() const { return m_lastRun; } Run* logicallyLastRun() const { return m_logicallyLastRun; } unsigned runCount() const { return m_runCount; } - void addRun(Run*); - void prependRun(Run*); + void appendRun(std::unique_ptr<Run>&&); + void prependRun(std::unique_ptr<Run>&&); void moveRunToEnd(Run*); void moveRunToBeginning(Run*); - void deleteRuns(); + void clear(); void reverseRuns(unsigned start, unsigned end); void reorderRunsFromLevels(); @@ -62,35 +61,38 @@ public: void replaceRunWithRuns(Run* toReplace, BidiRunList<Run>& newRuns); private: - void clearWithoutDestroyingRuns(); - Run* m_firstRun; + // The runs form a singly-linked-list, where the links (Run::m_next) imply ownership (and are of type std::unique_ptr). + // The raw pointers below point into the singly-linked-list. + std::unique_ptr<Run> m_firstRun; // The head of the list Run* m_lastRun; Run* m_logicallyLastRun; unsigned m_runCount; }; template <class Run> -inline void BidiRunList<Run>::addRun(Run* run) +inline void BidiRunList<Run>::appendRun(std::unique_ptr<Run>&& run) { - if (!m_firstRun) - m_firstRun = run; - else - m_lastRun->m_next = run; - m_lastRun = run; + if (!m_firstRun) { + m_firstRun = WTFMove(run); + m_lastRun = m_firstRun.get(); + } else { + m_lastRun->setNext(WTFMove(run)); + m_lastRun = m_lastRun->next(); + } m_runCount++; } template <class Run> -inline void BidiRunList<Run>::prependRun(Run* run) +inline void BidiRunList<Run>::prependRun(std::unique_ptr<Run>&& run) { - ASSERT(!run->m_next); + ASSERT(!run->next()); if (!m_lastRun) - m_lastRun = run; + m_lastRun = run.get(); else - run->m_next = m_firstRun; - m_firstRun = run; + run->setNext(WTFMove(m_firstRun)); + m_firstRun = WTFMove(run); m_runCount++; } @@ -99,23 +101,25 @@ inline void BidiRunList<Run>::moveRunToEnd(Run* run) { ASSERT(m_firstRun); ASSERT(m_lastRun); - ASSERT(run->m_next); + ASSERT(run->next()); - Run* current = 0; - Run* next = m_firstRun; - while (next != run) { - current = next; - next = current->next(); + Run* previous = nullptr; + Run* current = m_firstRun.get(); + while (current != run) { + previous = current; + current = previous->next(); } - if (!current) - m_firstRun = run->next(); - else - current->m_next = run->m_next; - - run->m_next = 0; - m_lastRun->m_next = run; - m_lastRun = run; + if (!previous) { + ASSERT(m_firstRun.get() == run); + std::unique_ptr<Run> originalFirstRun = WTFMove(m_firstRun); + m_firstRun = originalFirstRun->takeNext(); + m_lastRun->setNext(WTFMove(originalFirstRun)); + } else { + std::unique_ptr<Run> target = previous->takeNext(); + previous->setNext(current->takeNext()); + m_lastRun->setNext(WTFMove(target)); + } } template <class Run> @@ -123,21 +127,22 @@ inline void BidiRunList<Run>::moveRunToBeginning(Run* run) { ASSERT(m_firstRun); ASSERT(m_lastRun); - ASSERT(run != m_firstRun); + ASSERT(run != m_firstRun.get()); - Run* current = m_firstRun; - Run* next = current->next(); - while (next != run) { - current = next; - next = current->next(); + Run* previous = m_firstRun.get(); + Run* current = previous->next(); + while (current != run) { + previous = current; + current = previous->next(); } - current->m_next = run->m_next; + std::unique_ptr<Run> target = previous->takeNext(); + previous->setNext(run->takeNext()); if (run == m_lastRun) - m_lastRun = current; + m_lastRun = previous; - run->m_next = m_firstRun; - m_firstRun = run; + target->setNext(WTFMove(m_firstRun)); + m_firstRun = WTFMove(target); } template <class Run> @@ -147,53 +152,39 @@ void BidiRunList<Run>::replaceRunWithRuns(Run* toReplace, BidiRunList<Run>& newR ASSERT(m_firstRun); ASSERT(toReplace); - if (m_firstRun == toReplace) - m_firstRun = newRuns.firstRun(); - else { - // Find the run just before "toReplace" in the list of runs. - Run* previousRun = m_firstRun; - while (previousRun->next() != toReplace) - previousRun = previousRun->next(); - ASSERT(previousRun); - previousRun->setNext(newRuns.firstRun()); - } - - newRuns.lastRun()->setNext(toReplace->next()); + m_runCount += newRuns.runCount() - 1; // We are adding the new runs and removing toReplace. - // Fix up any of other pointers which may now be stale. + // Fix up any pointers which may end up stale. if (m_lastRun == toReplace) m_lastRun = newRuns.lastRun(); if (m_logicallyLastRun == toReplace) m_logicallyLastRun = newRuns.logicallyLastRun(); - m_runCount += newRuns.runCount() - 1; // We added the new runs and removed toReplace. - delete toReplace; - newRuns.clearWithoutDestroyingRuns(); -} + if (m_firstRun.get() == toReplace) { + newRuns.m_lastRun->setNext(m_firstRun->takeNext()); + m_firstRun = WTFMove(newRuns.m_firstRun); + } else { + // Find the run just before "toReplace" in the list of runs. + Run* previousRun = m_firstRun.get(); + while (previousRun->next() != toReplace) + previousRun = previousRun->next(); + ASSERT(previousRun); -template <class Run> -void BidiRunList<Run>::clearWithoutDestroyingRuns() -{ - m_firstRun = 0; - m_lastRun = 0; - m_logicallyLastRun = 0; - m_runCount = 0; + std::unique_ptr<Run> target = previousRun->takeNext(); + previousRun->setNext(WTFMove(newRuns.m_firstRun)); + newRuns.m_lastRun->setNext(target->takeNext()); + } + + newRuns.clear(); } template <class Run> -void BidiRunList<Run>::deleteRuns() +void BidiRunList<Run>::clear() { - if (!m_firstRun) - return; - - Run* curr = m_firstRun; - while (curr) { - Run* s = curr->next(); - delete curr; - curr = s; - } - - clearWithoutDestroyingRuns(); + m_firstRun = nullptr; + m_lastRun = nullptr; + m_logicallyLastRun = nullptr; + m_runCount = 0; } template <class Run> @@ -206,44 +197,35 @@ void BidiRunList<Run>::reverseRuns(unsigned start, unsigned end) // Get the item before the start of the runs to reverse and put it in // |beforeStart|. |curr| should point to the first run to reverse. - Run* curr = m_firstRun; - Run* beforeStart = 0; + Run* curr = m_firstRun.get(); + Run* beforeStart = nullptr; unsigned i = 0; - while (i < start) { - i++; + for (; i < start; ++i) { beforeStart = curr; curr = curr->next(); } - Run* startRun = curr; - while (i < end) { - i++; + + for (; i < end; ++i) curr = curr->next(); - } - Run* endRun = curr; - Run* afterEnd = curr->next(); - - i = start; - curr = startRun; - Run* newNext = afterEnd; - while (i <= end) { - // Do the reversal. - Run* next = curr->next(); - curr->m_next = newNext; - newNext = curr; - curr = next; - i++; + + if (!curr->next()) + m_lastRun = startRun; + + // Standard "sliding window" of 3 pointers + std::unique_ptr<Run> previous = curr->takeNext(); + std::unique_ptr<Run> current = beforeStart ? beforeStart->takeNext() : WTFMove(m_firstRun); + while (current) { + std::unique_ptr<Run> next = current->takeNext(); + current->setNext(WTFMove(previous)); + previous = WTFMove(current); + current = WTFMove(next); } - // Now hook up beforeStart and afterEnd to the startRun and endRun. if (beforeStart) - beforeStart->m_next = endRun; + beforeStart->setNext(WTFMove(previous)); else - m_firstRun = endRun; - - startRun->m_next = afterEnd; - if (!afterEnd) - m_lastRun = startRun; + m_firstRun = WTFMove(previous); } } // namespace WebCore diff --git a/Source/WebCore/platform/text/CharacterProperties.h b/Source/WebCore/platform/text/CharacterProperties.h new file mode 100644 index 000000000..5ba272b17 --- /dev/null +++ b/Source/WebCore/platform/text/CharacterProperties.h @@ -0,0 +1,96 @@ +/* + * Copyright (C) 2015 Apple, Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +namespace WebCore { + +static inline bool isEmojiGroupCandidate(UChar32 character) +{ + return character == 0x2640 + || character == 0x2642 + || character == 0x26F9 + || (character >= 0x2695 && character <= 0x2696) + || character == 0x2708 + || character == 0x2764 + || character == 0x1F308 + || character == 0x1F33E + || character == 0x1F373 + || character == 0x1F393 + || character == 0x1F3A4 + || character == 0x1F3A8 + || (character >= 0x1F3C2 && character <= 0x1F3C4) + || character == 0x1F3C7 + || (character >= 0x1F3CA && character <= 0x1F3CC) + || character == 0x1F3EB + || character == 0x1F3ED + || character == 0x1F3F3 + || character == 0x1F441 + || (character >= 0x1F466 && character <= 0x1F469) + || (character >= 0x1F46E && character <= 0x1F46F) + || character == 0x1F471 + || character == 0x1F473 + || character == 0x1F477 + || (character >= 0x1F481 && character <= 0x1F482) + || (character >= 0x1F486 && character <= 0x1F487) + || character == 0x1F48B + || (character >= 0x1F4BB && character <= 0x1F4BC) + || character == 0x1F527 + || character == 0x1F52C + || (character >= 0x1F574 && character <= 0x1F575) + || character == 0x1F57A + || character == 0x1F5E8 + || (character >= 0x1F645 && character <= 0x1F647) + || character == 0x1F64B + || (character >= 0x1F64D && character <= 0x1F64E) + || character == 0x1F680 + || character == 0x1F692 + || character == 0x1F6A3 + || (character >= 0x1F6B4 && character <= 0x1F6B6) + || character == 0x1F6CC + || (character >= 0x1F919 && character <= 0x1F91E) + || character == 0x1F926 + || character == 0x1F930 + || (character >= 0x1F933 && character <= 0x1F939) + || (character >= 0x1F93C && character <= 0x1F93E); +} + +static inline bool isEmojiFitzpatrickModifier(UChar32 character) +{ + // U+1F3FB - EMOJI MODIFIER FITZPATRICK TYPE-1-2 + // U+1F3FC - EMOJI MODIFIER FITZPATRICK TYPE-3 + // U+1F3FD - EMOJI MODIFIER FITZPATRICK TYPE-4 + // U+1F3FE - EMOJI MODIFIER FITZPATRICK TYPE-5 + // U+1F3FF - EMOJI MODIFIER FITZPATRICK TYPE-6 + + return character >= 0x1F3FB && character <= 0x1F3FF; +} + +inline bool isVariationSelector(UChar32 character) +{ + return character >= 0xFE00 && character <= 0xFE0F; +} + +} diff --git a/Source/WebCore/platform/text/DateTimeFormat.cpp b/Source/WebCore/platform/text/DateTimeFormat.cpp index bcbe1d578..c75d33d6f 100644 --- a/Source/WebCore/platform/text/DateTimeFormat.cpp +++ b/Source/WebCore/platform/text/DateTimeFormat.cpp @@ -257,21 +257,21 @@ void DateTimeFormat::quoteAndAppendLiteral(const String& literal, StringBuilder& } if (literal.find('\'') == notFound) { - buffer.append("'"); + buffer.append('\''); buffer.append(literal); - buffer.append("'"); + buffer.append('\''); return; } for (unsigned i = 0; i < literal.length(); ++i) { if (literal[i] == '\'') - buffer.append("''"); + buffer.appendLiteral("''"); else { String escaped = literal.substring(i); - escaped.replace(ASCIILiteral("'"), ASCIILiteral("''")); - buffer.append("'"); + escaped.replace('\'', "''"); + buffer.append('\''); buffer.append(escaped); - buffer.append("'"); + buffer.append('\''); return; } } diff --git a/Source/WebCore/platform/text/DecodeEscapeSequences.h b/Source/WebCore/platform/text/DecodeEscapeSequences.h index 2fa6e4658..957d3ef1b 100644 --- a/Source/WebCore/platform/text/DecodeEscapeSequences.h +++ b/Source/WebCore/platform/text/DecodeEscapeSequences.h @@ -40,8 +40,8 @@ namespace WebCore { // See <http://en.wikipedia.org/wiki/Percent-encoding#Non-standard_implementations>. struct Unicode16BitEscapeSequence { enum { sequenceSize = 6 }; // e.g. %u26C4 - static size_t findInString(const String& string, size_t startPosition) { return string.find("%u", startPosition); } - static size_t findEndOfRun(const String& string, size_t startPosition, size_t endPosition) + static size_t findInString(StringView string, size_t startPosition) { return string.find(StringView("%u"), startPosition); } + static size_t findEndOfRun(StringView string, size_t startPosition, size_t endPosition) { size_t runEnd = startPosition; while (endPosition - runEnd >= sequenceSize && string[runEnd] == '%' && string[runEnd + 1] == 'u' @@ -51,19 +51,19 @@ struct Unicode16BitEscapeSequence { } return runEnd; } - static String decodeRun(const UChar* run, size_t runLength, const TextEncoding&) + static String decodeRun(StringView run, const TextEncoding&) { // Each %u-escape sequence represents a UTF-16 code unit. // See <http://www.w3.org/International/iri-edit/draft-duerst-iri.html#anchor29>. // For 16-bit escape sequences, we know that findEndOfRun() has given us a contiguous run of sequences // without any intervening characters, so decode the run without additional checks. - size_t numberOfSequences = runLength / sequenceSize; + auto numberOfSequences = run.length() / sequenceSize; StringBuilder builder; builder.reserveCapacity(numberOfSequences); while (numberOfSequences--) { UChar codeUnit = (toASCIIHexValue(run[2]) << 12) | (toASCIIHexValue(run[3]) << 8) | (toASCIIHexValue(run[4]) << 4) | toASCIIHexValue(run[5]); builder.append(codeUnit); - run += sequenceSize; + run = run.substring(sequenceSize); } return builder.toString(); } @@ -71,8 +71,8 @@ struct Unicode16BitEscapeSequence { struct URLEscapeSequence { enum { sequenceSize = 3 }; // e.g. %41 - static size_t findInString(const String& string, size_t startPosition) { return string.find('%', startPosition); } - static size_t findEndOfRun(const String& string, size_t startPosition, size_t endPosition) + static size_t findInString(StringView string, size_t startPosition) { return string.find('%', startPosition); } + static size_t findEndOfRun(StringView string, size_t startPosition, size_t endPosition) { // Make the simplifying assumption that supported encodings may have up to two unescaped characters // in the range 0x40 - 0x7F as the trailing bytes of their sequences which need to be passed into the @@ -96,30 +96,39 @@ struct URLEscapeSequence { } return runEnd; } - static String decodeRun(const UChar* run, size_t runLength, const TextEncoding& encoding) + + static Vector<char, 512> decodeRun(StringView run) { // For URL escape sequences, we know that findEndOfRun() has given us a run where every %-sign introduces // a valid escape sequence, but there may be characters between the sequences. Vector<char, 512> buffer; - buffer.resize(runLength); // Unescaping hex sequences only makes the length smaller. + buffer.resize(run.length()); // Unescaping hex sequences only makes the length smaller. char* p = buffer.data(); - const UChar* runEnd = run + runLength; - while (run < runEnd) { + while (!run.isEmpty()) { if (run[0] == '%') { *p++ = (toASCIIHexValue(run[1]) << 4) | toASCIIHexValue(run[2]); - run += sequenceSize; + run = run.substring(sequenceSize); } else { *p++ = run[0]; - run += 1; + run = run.substring(1); } } ASSERT(buffer.size() >= static_cast<size_t>(p - buffer.data())); // Prove buffer not overrun. - return (encoding.isValid() ? encoding : UTF8Encoding()).decode(buffer.data(), p - buffer.data()); + buffer.shrink(p - buffer.data()); + return buffer; + } + + static String decodeRun(StringView run, const TextEncoding& encoding) + { + auto buffer = decodeRun(run); + if (!encoding.isValid()) + return UTF8Encoding().decode(buffer.data(), buffer.size()); + return encoding.decode(buffer.data(), buffer.size()); } }; template<typename EscapeSequence> -String decodeEscapeSequences(const String& string, const TextEncoding& encoding) +String decodeEscapeSequences(StringView string, const TextEncoding& encoding) { StringBuilder result; size_t length = string.length(); @@ -134,18 +143,53 @@ String decodeEscapeSequences(const String& string, const TextEncoding& encoding) continue; } - String decoded = EscapeSequence::decodeRun(string.deprecatedCharacters() + encodedRunPosition, encodedRunEnd - encodedRunPosition, encoding); + String decoded = EscapeSequence::decodeRun(string.substring(encodedRunPosition, encodedRunEnd - encodedRunPosition), encoding); if (decoded.isEmpty()) continue; - result.append(string, decodedPosition, encodedRunPosition - decodedPosition); + result.append(string.substring(decodedPosition, encodedRunPosition - decodedPosition)); result.append(decoded); decodedPosition = encodedRunEnd; } - result.append(string, decodedPosition, length - decodedPosition); + result.append(string.substring(decodedPosition, length - decodedPosition)); return result.toString(); } +inline Vector<char> decodeURLEscapeSequencesAsData(StringView string, const TextEncoding& encoding) +{ + ASSERT(encoding.isValid()); + + Vector<char> result; + size_t decodedPosition = 0; + size_t searchPosition = 0; + while (true) { + size_t encodedRunPosition = URLEscapeSequence::findInString(string, searchPosition); + size_t encodedRunEnd = 0; + if (encodedRunPosition != notFound) { + encodedRunEnd = URLEscapeSequence::findEndOfRun(string, encodedRunPosition, string.length()); + searchPosition = encodedRunEnd; + if (encodedRunEnd == encodedRunPosition) { + ++searchPosition; + continue; + } + } + // Strings are encoded as requested. + auto stringFragment = string.substring(decodedPosition, encodedRunPosition - decodedPosition); + auto encodedStringFragment = encoding.encode(stringFragment, URLEncodedEntitiesForUnencodables); + result.append(encodedStringFragment.data(), encodedStringFragment.length()); + + if (encodedRunPosition == notFound) + return result; + + // Bytes go through as-is. + auto decodedEscapeSequence = URLEscapeSequence::decodeRun(string.substring(encodedRunPosition, encodedRunEnd - encodedRunPosition)); + ASSERT(!decodedEscapeSequence.isEmpty()); + result.appendVector(decodedEscapeSequence); + + decodedPosition = encodedRunEnd; + } +} + } // namespace WebCore #endif // DecodeEscapeSequences_h diff --git a/Source/WebCore/platform/text/Hyphenation.cpp b/Source/WebCore/platform/text/Hyphenation.cpp index 89f64386a..ec07637e1 100644 --- a/Source/WebCore/platform/text/Hyphenation.cpp +++ b/Source/WebCore/platform/text/Hyphenation.cpp @@ -26,7 +26,10 @@ #include "config.h" #include "Hyphenation.h" +#if !USE(LIBHYPHEN) + #include "NotImplemented.h" +#include <wtf/text/StringView.h> namespace WebCore { @@ -35,10 +38,12 @@ bool canHyphenate(const AtomicString& /* localeIdentifier */) return false; } -size_t lastHyphenLocation(const UChar* /* characters */, size_t /* length */, size_t /* beforeIndex */, const AtomicString& /* localeIdentifier */) +size_t lastHyphenLocation(StringView, size_t /* beforeIndex */, const AtomicString& /* localeIdentifier */) { ASSERT_NOT_REACHED(); return 0; } } // namespace WebCore + +#endif // !USE(LIBHYPHEN) diff --git a/Source/WebCore/platform/text/Hyphenation.h b/Source/WebCore/platform/text/Hyphenation.h index a99bff0cf..27f430591 100644 --- a/Source/WebCore/platform/text/Hyphenation.h +++ b/Source/WebCore/platform/text/Hyphenation.h @@ -26,13 +26,20 @@ #ifndef Hyphenation_h #define Hyphenation_h +#include <unicode/utypes.h> #include <wtf/Forward.h> -#include <wtf/unicode/Unicode.h> namespace WebCore { +inline static bool enoughWidthForHyphenation(float availableWidth, float fontPixelSize) +{ + // If the maximum width available for the prefix before the hyphen is small, then it is very unlikely + // that an hyphenation opportunity exists, so do not bother to look for it. + return availableWidth > fontPixelSize * 5 / 4; + +} bool canHyphenate(const AtomicString& localeIdentifier); -size_t lastHyphenLocation(const UChar*, size_t length, size_t beforeIndex, const AtomicString& localeIdentifier); +size_t lastHyphenLocation(StringView, size_t beforeIndex, const AtomicString& localeIdentifier); } // namespace WebCore diff --git a/Source/WebCore/platform/text/LineBreakIteratorPoolICU.h b/Source/WebCore/platform/text/LineBreakIteratorPoolICU.h deleted file mode 100644 index d2eb26d63..000000000 --- a/Source/WebCore/platform/text/LineBreakIteratorPoolICU.h +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright (C) 2011 Apple Inc. All Rights Reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef LineBreakIteratorPoolICU_h -#define LineBreakIteratorPoolICU_h - -#include "TextBreakIteratorInternalICU.h" -#include <unicode/ubrk.h> -#include <wtf/Assertions.h> -#include <wtf/HashMap.h> -#include <wtf/PassOwnPtr.h> -#include <wtf/ThreadSpecific.h> -#include <wtf/text/AtomicString.h> -#include <wtf/text/CString.h> - -namespace WebCore { - -class LineBreakIteratorPool { - WTF_MAKE_NONCOPYABLE(LineBreakIteratorPool); -public: - static LineBreakIteratorPool& sharedPool() - { - static WTF::ThreadSpecific<LineBreakIteratorPool>* pool = new WTF::ThreadSpecific<LineBreakIteratorPool>; - return **pool; - } - - static PassOwnPtr<LineBreakIteratorPool> create() { return adoptPtr(new LineBreakIteratorPool); } - - UBreakIterator* take(const AtomicString& locale) - { - UBreakIterator* iterator = 0; - for (size_t i = 0; i < m_pool.size(); ++i) { - if (m_pool[i].first == locale) { - iterator = m_pool[i].second; - m_pool.remove(i); - break; - } - } - - if (!iterator) { - UErrorCode openStatus = U_ZERO_ERROR; - bool localeIsEmpty = locale.isEmpty(); - iterator = ubrk_open(UBRK_LINE, localeIsEmpty ? currentTextBreakLocaleID() : locale.string().utf8().data(), 0, 0, &openStatus); - // locale comes from a web page and it can be invalid, leading ICU - // to fail, in which case we fall back to the default locale. - if (!localeIsEmpty && U_FAILURE(openStatus)) { - openStatus = U_ZERO_ERROR; - iterator = ubrk_open(UBRK_LINE, currentTextBreakLocaleID(), 0, 0, &openStatus); - } - - if (U_FAILURE(openStatus)) { - LOG_ERROR("ubrk_open failed with status %d", openStatus); - return 0; - } - } - - ASSERT(!m_vendedIterators.contains(iterator)); - m_vendedIterators.set(iterator, locale); - return iterator; - } - - void put(UBreakIterator* iterator) - { - ASSERT_ARG(iterator, m_vendedIterators.contains(iterator)); - - if (m_pool.size() == capacity) { - ubrk_close(m_pool[0].second); - m_pool.remove(0); - } - - m_pool.append(Entry(m_vendedIterators.take(iterator), iterator)); - } - -private: - LineBreakIteratorPool() { } - - static const size_t capacity = 4; - - typedef std::pair<AtomicString, UBreakIterator*> Entry; - typedef Vector<Entry, capacity> Pool; - Pool m_pool; - HashMap<UBreakIterator*, AtomicString> m_vendedIterators; - - friend WTF::ThreadSpecific<LineBreakIteratorPool>::operator LineBreakIteratorPool*(); -}; - -} - -#endif diff --git a/Source/WebCore/platform/text/LineEnding.cpp b/Source/WebCore/platform/text/LineEnding.cpp index ae24fb097..3e0ea4df6 100644 --- a/Source/WebCore/platform/text/LineEnding.cpp +++ b/Source/WebCore/platform/text/LineEnding.cpp @@ -39,7 +39,7 @@ namespace { class OutputBuffer { public: - virtual char* allocate(size_t size) = 0; + virtual uint8_t* allocate(size_t size) = 0; virtual void copy(const CString&) = 0; virtual ~OutputBuffer() { } }; @@ -52,14 +52,14 @@ public: } virtual ~CStringBuffer() { } - virtual char* allocate(size_t size) + uint8_t* allocate(size_t size) override { char* ptr; m_buffer = CString::newUninitialized(size, ptr); - return ptr; + return reinterpret_cast<uint8_t*>(ptr); } - virtual void copy(const CString& source) + void copy(const CString& source) override { m_buffer = source; } @@ -70,29 +70,31 @@ private: CString m_buffer; }; +#if OS(WINDOWS) class VectorCharAppendBuffer : public OutputBuffer { public: - VectorCharAppendBuffer(Vector<char>& buffer) + VectorCharAppendBuffer(Vector<uint8_t>& buffer) : m_buffer(buffer) { } virtual ~VectorCharAppendBuffer() { } - virtual char* allocate(size_t size) + uint8_t* allocate(size_t size) override { size_t oldSize = m_buffer.size(); m_buffer.grow(oldSize + size); return m_buffer.data() + oldSize; } - virtual void copy(const CString& source) + void copy(const CString& source) override { m_buffer.append(source.data(), source.length()); } private: - Vector<char>& m_buffer; + Vector<uint8_t>& m_buffer; }; +#endif void internalNormalizeLineEndingsToCRLF(const CString& from, OutputBuffer& buffer) { @@ -126,7 +128,7 @@ void internalNormalizeLineEndingsToCRLF(const CString& from, OutputBuffer& buffe } p = from.data(); - char* q = buffer.allocate(newLen); + uint8_t* q = buffer.allocate(newLen); // Make a copy of the string. while (p < from.data() + from.length()) { @@ -153,10 +155,8 @@ void internalNormalizeLineEndingsToCRLF(const CString& from, OutputBuffer& buffe namespace WebCore { -void normalizeToCROrLF(const CString& from, Vector<char>& result, bool toCR); - // Normalize all line-endings to CR or LF. -void normalizeToCROrLF(const CString& from, Vector<char>& result, bool toCR) +static void normalizeToCROrLF(const CString& from, Vector<uint8_t>& result, bool toCR) { // Compute the new length. size_t newLen = 0; @@ -181,7 +181,7 @@ void normalizeToCROrLF(const CString& from, Vector<char>& result, bool toCR) p = from.data(); size_t oldResultSize = result.size(); result.grow(oldResultSize + newLen); - char* q = result.data() + oldResultSize; + uint8_t* q = result.data() + oldResultSize; // If no need to fix the string, just copy the string over. if (!needFix) { @@ -214,23 +214,13 @@ CString normalizeLineEndingsToCRLF(const CString& from) return buffer.buffer(); } -void normalizeLineEndingsToCR(const CString& from, Vector<char>& result) -{ - normalizeToCROrLF(from, result, true); -} - -void normalizeLineEndingsToLF(const CString& from, Vector<char>& result) -{ - normalizeToCROrLF(from, result, false); -} - -void normalizeLineEndingsToNative(const CString& from, Vector<char>& result) +void normalizeLineEndingsToNative(const CString& from, Vector<uint8_t>& result) { #if OS(WINDOWS) VectorCharAppendBuffer buffer(result); internalNormalizeLineEndingsToCRLF(from, buffer); #else - normalizeLineEndingsToLF(from, result); + normalizeToCROrLF(from, result, false); #endif } diff --git a/Source/WebCore/platform/text/LineEnding.h b/Source/WebCore/platform/text/LineEnding.h index 4306ce8a0..a41cb8099 100644 --- a/Source/WebCore/platform/text/LineEnding.h +++ b/Source/WebCore/platform/text/LineEnding.h @@ -40,15 +40,9 @@ namespace WebCore { // Normalize all line-endings in the given string to CRLF. CString normalizeLineEndingsToCRLF(const CString& from); -// Normalize all line-endings in the given string to CR and append the result to the given buffer. -void normalizeLineEndingsToCR(const CString& from, Vector<char>& result); - -// Normalize all line-endings in the given string to LF and append the result to the given buffer. -void normalizeLineEndingsToLF(const CString& from, Vector<char>& result); - // Normalize all line-endings in the given string to the native line-endings and append the result to the given buffer. // (Normalize to CRLF on Windows and normalize to LF on all other platforms.) -void normalizeLineEndingsToNative(const CString& from, Vector<char>& result); +void normalizeLineEndingsToNative(const CString& from, Vector<uint8_t>& result); } // namespace WebCore diff --git a/Source/WebCore/platform/text/LocaleICU.cpp b/Source/WebCore/platform/text/LocaleICU.cpp new file mode 100644 index 000000000..88151ea78 --- /dev/null +++ b/Source/WebCore/platform/text/LocaleICU.cpp @@ -0,0 +1,376 @@ +/* + * Copyright (C) 2011,2012 Google Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "LocaleICU.h" + +#include "LocalizedStrings.h" +#include <limits> +#include <unicode/udatpg.h> +#include <unicode/uloc.h> +#include <wtf/DateMath.h> +#include <wtf/text/StringBuilder.h> + +using namespace icu; +using namespace std; + +namespace WebCore { + +std::unique_ptr<Locale> Locale::create(const AtomicString& locale) +{ + return std::make_unique<LocaleICU>(locale.string().utf8().data()); +} + +LocaleICU::LocaleICU(const char* locale) + : m_locale(locale) +{ +} + +LocaleICU::~LocaleICU() +{ +#if !UCONFIG_NO_FORMATTING + unum_close(m_numberFormat); +#endif +#if ENABLE(DATE_AND_TIME_INPUT_TYPES) + udat_close(m_shortDateFormat); + udat_close(m_mediumTimeFormat); + udat_close(m_shortTimeFormat); +#endif +} + +#if !UCONFIG_NO_FORMATTING +String LocaleICU::decimalSymbol(UNumberFormatSymbol symbol) +{ + UErrorCode status = U_ZERO_ERROR; + int32_t bufferLength = unum_getSymbol(m_numberFormat, symbol, 0, 0, &status); + ASSERT(U_SUCCESS(status) || status == U_BUFFER_OVERFLOW_ERROR); + if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR) + return String(); + Vector<UChar> buffer(bufferLength); + status = U_ZERO_ERROR; + unum_getSymbol(m_numberFormat, symbol, buffer.data(), bufferLength, &status); + if (U_FAILURE(status)) + return String(); + return String::adopt(WTFMove(buffer)); +} + +String LocaleICU::decimalTextAttribute(UNumberFormatTextAttribute tag) +{ + UErrorCode status = U_ZERO_ERROR; + int32_t bufferLength = unum_getTextAttribute(m_numberFormat, tag, 0, 0, &status); + ASSERT(U_SUCCESS(status) || status == U_BUFFER_OVERFLOW_ERROR); + if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR) + return String(); + Vector<UChar> buffer(bufferLength); + status = U_ZERO_ERROR; + unum_getTextAttribute(m_numberFormat, tag, buffer.data(), bufferLength, &status); + ASSERT(U_SUCCESS(status)); + if (U_FAILURE(status)) + return String(); + return String::adopt(WTFMove(buffer)); +} +#endif + +void LocaleICU::initializeLocaleData() +{ +#if !UCONFIG_NO_FORMATTING + if (m_didCreateDecimalFormat) + return; + m_didCreateDecimalFormat = true; + UErrorCode status = U_ZERO_ERROR; + m_numberFormat = unum_open(UNUM_DECIMAL, 0, 0, m_locale.data(), 0, &status); + if (!U_SUCCESS(status)) + return; + + Vector<String, DecimalSymbolsSize> symbols; + symbols.append(decimalSymbol(UNUM_ZERO_DIGIT_SYMBOL)); + symbols.append(decimalSymbol(UNUM_ONE_DIGIT_SYMBOL)); + symbols.append(decimalSymbol(UNUM_TWO_DIGIT_SYMBOL)); + symbols.append(decimalSymbol(UNUM_THREE_DIGIT_SYMBOL)); + symbols.append(decimalSymbol(UNUM_FOUR_DIGIT_SYMBOL)); + symbols.append(decimalSymbol(UNUM_FIVE_DIGIT_SYMBOL)); + symbols.append(decimalSymbol(UNUM_SIX_DIGIT_SYMBOL)); + symbols.append(decimalSymbol(UNUM_SEVEN_DIGIT_SYMBOL)); + symbols.append(decimalSymbol(UNUM_EIGHT_DIGIT_SYMBOL)); + symbols.append(decimalSymbol(UNUM_NINE_DIGIT_SYMBOL)); + symbols.append(decimalSymbol(UNUM_DECIMAL_SEPARATOR_SYMBOL)); + symbols.append(decimalSymbol(UNUM_GROUPING_SEPARATOR_SYMBOL)); + ASSERT(symbols.size() == DecimalSymbolsSize); + setLocaleData(symbols, decimalTextAttribute(UNUM_POSITIVE_PREFIX), decimalTextAttribute(UNUM_POSITIVE_SUFFIX), decimalTextAttribute(UNUM_NEGATIVE_PREFIX), decimalTextAttribute(UNUM_NEGATIVE_SUFFIX)); +#endif +} + +#if ENABLE(DATE_AND_TIME_INPUT_TYPES) +bool LocaleICU::initializeShortDateFormat() +{ + if (m_didCreateShortDateFormat) + return m_shortDateFormat; + m_shortDateFormat = openDateFormat(UDAT_NONE, UDAT_SHORT); + m_didCreateShortDateFormat = true; + return m_shortDateFormat; +} + +UDateFormat* LocaleICU::openDateFormat(UDateFormatStyle timeStyle, UDateFormatStyle dateStyle) const +{ + const UChar gmtTimezone[3] = {'G', 'M', 'T'}; + UErrorCode status = U_ZERO_ERROR; + return udat_open(timeStyle, dateStyle, m_locale.data(), gmtTimezone, WTF_ARRAY_LENGTH(gmtTimezone), 0, -1, &status); +} + +static String getDateFormatPattern(const UDateFormat* dateFormat) +{ + if (!dateFormat) + return emptyString(); + + UErrorCode status = U_ZERO_ERROR; + int32_t length = udat_toPattern(dateFormat, TRUE, 0, 0, &status); + if (status != U_BUFFER_OVERFLOW_ERROR || !length) + return emptyString(); + Vector<UChar> buffer(length); + status = U_ZERO_ERROR; + udat_toPattern(dateFormat, TRUE, buffer.data(), length, &status); + if (U_FAILURE(status)) + return emptyString(); + return String::adopt(WTFMove(buffer)); +} + +std::unique_ptr<Vector<String>> LocaleICU::createLabelVector(const UDateFormat* dateFormat, UDateFormatSymbolType type, int32_t startIndex, int32_t size) +{ + if (!dateFormat) + return std::make_unique<Vector<String>>(); + if (udat_countSymbols(dateFormat, type) != startIndex + size) + return std::make_unique<Vector<String>>(); + + auto labels = std::make_unique<Vector<String>>(); + labels->reserveCapacity(size); + for (int32_t i = 0; i < size; ++i) { + UErrorCode status = U_ZERO_ERROR; + int32_t length = udat_getSymbols(dateFormat, type, startIndex + i, 0, 0, &status); + if (status != U_BUFFER_OVERFLOW_ERROR) + return std::make_unique<Vector<String>>(); + Vector<UChar> buffer(length); + status = U_ZERO_ERROR; + udat_getSymbols(dateFormat, type, startIndex + i, buffer.data(), length, &status); + if (U_FAILURE(status)) + return std::make_unique<Vector<String>>(); + labels->append(String::adopt(WTFMove(buffer))); + } + return WTFMove(labels); +} + +static std::unique_ptr<Vector<String>> createFallbackMonthLabels() +{ + auto labels = std::make_unique<Vector<String>>(); + labels->reserveCapacity(WTF_ARRAY_LENGTH(WTF::monthFullName)); + for (unsigned i = 0; i < WTF_ARRAY_LENGTH(WTF::monthFullName); ++i) + labels->append(WTF::monthFullName[i]); + return WTFMove(labels); +} + +const Vector<String>& LocaleICU::monthLabels() +{ + if (m_monthLabels) + return *m_monthLabels; + if (initializeShortDateFormat()) { + m_monthLabels = createLabelVector(m_shortDateFormat, UDAT_MONTHS, UCAL_JANUARY, 12); + if (m_monthLabels) + return *m_monthLabels; + } + m_monthLabels = createFallbackMonthLabels(); + return *m_monthLabels; +} + +static std::unique_ptr<Vector<String>> createFallbackAMPMLabels() +{ + auto labels = std::make_unique<Vector<String>>(); + labels->reserveCapacity(2); + labels->append("AM"); + labels->append("PM"); + return WTFMove(labels); +} + +void LocaleICU::initializeDateTimeFormat() +{ + if (m_didCreateTimeFormat) + return; + + // We assume ICU medium time pattern and short time pattern are compatible + // with LDML, because ICU specific pattern character "V" doesn't appear + // in both medium and short time pattern. + m_mediumTimeFormat = openDateFormat(UDAT_MEDIUM, UDAT_NONE); + m_timeFormatWithSeconds = getDateFormatPattern(m_mediumTimeFormat); + + m_shortTimeFormat = openDateFormat(UDAT_SHORT, UDAT_NONE); + m_timeFormatWithoutSeconds = getDateFormatPattern(m_shortTimeFormat); + + UDateFormat* dateTimeFormatWithSeconds = openDateFormat(UDAT_MEDIUM, UDAT_SHORT); + m_dateTimeFormatWithSeconds = getDateFormatPattern(dateTimeFormatWithSeconds); + udat_close(dateTimeFormatWithSeconds); + + UDateFormat* dateTimeFormatWithoutSeconds = openDateFormat(UDAT_SHORT, UDAT_SHORT); + m_dateTimeFormatWithoutSeconds = getDateFormatPattern(dateTimeFormatWithoutSeconds); + udat_close(dateTimeFormatWithoutSeconds); + + auto timeAMPMLabels = createLabelVector(m_mediumTimeFormat, UDAT_AM_PMS, UCAL_AM, 2); + if (!timeAMPMLabels) + timeAMPMLabels = createFallbackAMPMLabels(); + m_timeAMPMLabels = *timeAMPMLabels; + + m_didCreateTimeFormat = true; +} + +String LocaleICU::dateFormat() +{ + if (!m_dateFormat.isNull()) + return m_dateFormat; + if (!initializeShortDateFormat()) + return ASCIILiteral("yyyy-MM-dd"); + m_dateFormat = getDateFormatPattern(m_shortDateFormat); + return m_dateFormat; +} + +static String getFormatForSkeleton(const char* locale, const UChar* skeleton, int32_t skeletonLength) +{ + String format = ASCIILiteral("yyyy-MM"); + UErrorCode status = U_ZERO_ERROR; + UDateTimePatternGenerator* patternGenerator = udatpg_open(locale, &status); + if (!patternGenerator) + return format; + status = U_ZERO_ERROR; + int32_t length = udatpg_getBestPattern(patternGenerator, skeleton, skeletonLength, 0, 0, &status); + if (status == U_BUFFER_OVERFLOW_ERROR && length) { + Vector<UChar> buffer(length); + status = U_ZERO_ERROR; + udatpg_getBestPattern(patternGenerator, skeleton, skeletonLength, buffer.data(), length, &status); + if (U_SUCCESS(status)) + format = String::adopt(WTFMove(buffer)); + } + udatpg_close(patternGenerator); + return format; +} + +String LocaleICU::monthFormat() +{ + if (!m_monthFormat.isNull()) + return m_monthFormat; + // Gets a format for "MMMM" because Windows API always provides formats for + // "MMMM" in some locales. + const UChar skeleton[] = { 'y', 'y', 'y', 'y', 'M', 'M', 'M', 'M' }; + m_monthFormat = getFormatForSkeleton(m_locale.data(), skeleton, WTF_ARRAY_LENGTH(skeleton)); + return m_monthFormat; +} + +String LocaleICU::shortMonthFormat() +{ + if (!m_shortMonthFormat.isNull()) + return m_shortMonthFormat; + const UChar skeleton[] = { 'y', 'y', 'y', 'y', 'M', 'M', 'M' }; + m_shortMonthFormat = getFormatForSkeleton(m_locale.data(), skeleton, WTF_ARRAY_LENGTH(skeleton)); + return m_shortMonthFormat; +} + +String LocaleICU::timeFormat() +{ + initializeDateTimeFormat(); + return m_timeFormatWithSeconds; +} + +String LocaleICU::shortTimeFormat() +{ + initializeDateTimeFormat(); + return m_timeFormatWithoutSeconds; +} + +String LocaleICU::dateTimeFormatWithSeconds() +{ + initializeDateTimeFormat(); + return m_dateTimeFormatWithSeconds; +} + +String LocaleICU::dateTimeFormatWithoutSeconds() +{ + initializeDateTimeFormat(); + return m_dateTimeFormatWithoutSeconds; +} + +const Vector<String>& LocaleICU::shortMonthLabels() +{ + if (!m_shortMonthLabels.isEmpty()) + return m_shortMonthLabels; + if (initializeShortDateFormat()) { + if (auto labels = createLabelVector(m_shortDateFormat, UDAT_SHORT_MONTHS, UCAL_JANUARY, 12)) { + m_shortMonthLabels = *labels; + return m_shortMonthLabels; + } + } + m_shortMonthLabels.reserveCapacity(WTF_ARRAY_LENGTH(WTF::monthName)); + for (unsigned i = 0; i < WTF_ARRAY_LENGTH(WTF::monthName); ++i) + m_shortMonthLabels.append(WTF::monthName[i]); + return m_shortMonthLabels; +} + +const Vector<String>& LocaleICU::standAloneMonthLabels() +{ + if (!m_standAloneMonthLabels.isEmpty()) + return m_standAloneMonthLabels; + if (initializeShortDateFormat()) { + if (auto labels = createLabelVector(m_shortDateFormat, UDAT_STANDALONE_MONTHS, UCAL_JANUARY, 12)) { + m_standAloneMonthLabels = *labels; + return m_standAloneMonthLabels; + } + } + m_standAloneMonthLabels = monthLabels(); + return m_standAloneMonthLabels; +} + +const Vector<String>& LocaleICU::shortStandAloneMonthLabels() +{ + if (!m_shortStandAloneMonthLabels.isEmpty()) + return m_shortStandAloneMonthLabels; + if (initializeShortDateFormat()) { + if (auto labels = createLabelVector(m_shortDateFormat, UDAT_STANDALONE_SHORT_MONTHS, UCAL_JANUARY, 12)) { + m_shortStandAloneMonthLabels = *labels; + return m_shortStandAloneMonthLabels; + } + } + m_shortStandAloneMonthLabels = shortMonthLabels(); + return m_shortStandAloneMonthLabels; +} + +const Vector<String>& LocaleICU::timeAMPMLabels() +{ + initializeDateTimeFormat(); + return m_timeAMPMLabels; +} + +#endif + +} // namespace WebCore + diff --git a/Source/WebCore/platform/text/LocaleICU.h b/Source/WebCore/platform/text/LocaleICU.h new file mode 100644 index 000000000..54b1fb3d0 --- /dev/null +++ b/Source/WebCore/platform/text/LocaleICU.h @@ -0,0 +1,110 @@ +/* + * Copyright (C) 2012 Google Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef LocaleICU_h +#define LocaleICU_h + +#include "DateComponents.h" +#include "PlatformLocale.h" +#include <unicode/udat.h> +#include <unicode/unum.h> +#include <wtf/Forward.h> +#include <wtf/text/CString.h> +#include <wtf/text/WTFString.h> + +namespace WebCore { + +// We should use this class only for LocalizedNumberICU.cpp, LocalizedDateICU.cpp, +// and LocalizedNumberICUTest.cpp. +class LocaleICU : public Locale { +public: + explicit LocaleICU(const char*); + virtual ~LocaleICU(); + +#if ENABLE(DATE_AND_TIME_INPUT_TYPES) + String dateFormat() override; + String monthFormat() override; + String shortMonthFormat() override; + String timeFormat() override; + String shortTimeFormat() override; + String dateTimeFormatWithSeconds() override; + String dateTimeFormatWithoutSeconds() override; + const Vector<String>& monthLabels() override; + const Vector<String>& shortMonthLabels() override; + const Vector<String>& standAloneMonthLabels() override; + const Vector<String>& shortStandAloneMonthLabels() override; + const Vector<String>& timeAMPMLabels() override; +#endif + +private: +#if !UCONFIG_NO_FORMATTING + String decimalSymbol(UNumberFormatSymbol); + String decimalTextAttribute(UNumberFormatTextAttribute); +#endif + void initializeLocaleData() override; + +#if ENABLE(DATE_AND_TIME_INPUT_TYPES) + bool initializeShortDateFormat(); + UDateFormat* openDateFormat(UDateFormatStyle timeStyle, UDateFormatStyle dateStyle) const; + + std::unique_ptr<Vector<String>> createLabelVector(const UDateFormat*, UDateFormatSymbolType, int32_t startIndex, int32_t size); + void initializeDateTimeFormat(); +#endif + + CString m_locale; + +#if !UCONFIG_NO_FORMATTING + UNumberFormat* m_numberFormat { nullptr }; + bool m_didCreateDecimalFormat { false }; +#endif + +#if ENABLE(DATE_AND_TIME_INPUT_TYPES) + std::unique_ptr<Vector<String>> m_monthLabels; + String m_dateFormat; + String m_monthFormat; + String m_shortMonthFormat; + String m_timeFormatWithSeconds; + String m_timeFormatWithoutSeconds; + String m_dateTimeFormatWithSeconds; + String m_dateTimeFormatWithoutSeconds; + UDateFormat* m_shortDateFormat { nullptr }; + UDateFormat* m_mediumTimeFormat { nullptr }; + UDateFormat* m_shortTimeFormat { nullptr }; + Vector<String> m_shortMonthLabels; + Vector<String> m_standAloneMonthLabels; + Vector<String> m_shortStandAloneMonthLabels; + Vector<String> m_timeAMPMLabels; + bool m_didCreateShortDateFormat { false }; + bool m_didCreateTimeFormat { false }; +#endif +}; + +} // namespace WebCore +#endif diff --git a/Source/WebCore/platform/text/LocaleNone.cpp b/Source/WebCore/platform/text/LocaleNone.cpp index 961833e42..3a212ab46 100644 --- a/Source/WebCore/platform/text/LocaleNone.cpp +++ b/Source/WebCore/platform/text/LocaleNone.cpp @@ -26,7 +26,6 @@ #include "config.h" #include "PlatformLocale.h" #include <wtf/DateMath.h> -#include <wtf/PassOwnPtr.h> namespace WebCore { @@ -35,20 +34,20 @@ public: virtual ~LocaleNone(); private: - virtual void initializeLocaleData() override final; + void initializeLocaleData() final; #if ENABLE(DATE_AND_TIME_INPUT_TYPES) - virtual String dateFormat() override; - virtual String monthFormat() override; - virtual String shortMonthFormat() override; - virtual String timeFormat() override; - virtual String shortTimeFormat() override; - virtual String dateTimeFormatWithSeconds() override; - virtual String dateTimeFormatWithoutSeconds() override; - virtual const Vector<String>& monthLabels() override; - virtual const Vector<String>& shortMonthLabels() override; - virtual const Vector<String>& standAloneMonthLabels() override; - virtual const Vector<String>& shortStandAloneMonthLabels() override; - virtual const Vector<String>& timeAMPMLabels() override; + String dateFormat() override; + String monthFormat() override; + String shortMonthFormat() override; + String timeFormat() override; + String shortTimeFormat() override; + String dateTimeFormatWithSeconds() override; + String dateTimeFormatWithoutSeconds() override; + const Vector<String>& monthLabels() override; + const Vector<String>& shortMonthLabels() override; + const Vector<String>& standAloneMonthLabels() override; + const Vector<String>& shortStandAloneMonthLabels() override; + const Vector<String>& timeAMPMLabels() override; Vector<String> m_timeAMPMLabels; Vector<String> m_shortMonthLabels; @@ -56,9 +55,9 @@ private: #endif }; -PassOwnPtr<Locale> Locale::create(const AtomicString&) +std::unique_ptr<Locale> Locale::create(const AtomicString&) { - return adoptPtr(new LocaleNone()); + return std::make_unique<LocaleNone>(); } LocaleNone::~LocaleNone() diff --git a/Source/WebCore/platform/text/LocaleToScriptMapping.h b/Source/WebCore/platform/text/LocaleToScriptMapping.h index d5c8c7603..73006d235 100644 --- a/Source/WebCore/platform/text/LocaleToScriptMapping.h +++ b/Source/WebCore/platform/text/LocaleToScriptMapping.h @@ -31,13 +31,13 @@ #ifndef LocaleToScriptMapping_h #define LocaleToScriptMapping_h +#include <unicode/uscript.h> #include <wtf/Forward.h> -#include <wtf/unicode/Unicode.h> namespace WebCore { UScriptCode localeToScriptCodeForFontSelection(const String&); -UScriptCode scriptNameToCode(const String&); +WEBCORE_EXPORT UScriptCode scriptNameToCode(const String&); } diff --git a/Source/WebCore/platform/text/LocaleToScriptMappingDefault.cpp b/Source/WebCore/platform/text/LocaleToScriptMappingDefault.cpp index 85d87b59f..461bf27f8 100644 --- a/Source/WebCore/platform/text/LocaleToScriptMappingDefault.cpp +++ b/Source/WebCore/platform/text/LocaleToScriptMappingDefault.cpp @@ -32,7 +32,7 @@ #include "LocaleToScriptMapping.h" #include <wtf/HashMap.h> -#include <wtf/HashSet.h> +#include <wtf/NeverDestroyed.h> #include <wtf/text/StringHash.h> namespace WebCore { @@ -156,21 +156,22 @@ static const ScriptNameCode scriptNameCodeList[] = { }; struct ScriptNameCodeMapHashTraits : public HashTraits<String> { - static const int minimumTableSize = WTF::HashTableCapacityForSize<sizeof(scriptNameCodeList) / sizeof(ScriptNameCode)>::value; + static const int minimumTableSize = WTF::HashTableCapacityForSize<WTF_ARRAY_LENGTH(scriptNameCodeList)>::value; }; -typedef HashMap<String, UScriptCode, DefaultHash<String>::Hash, ScriptNameCodeMapHashTraits> ScriptNameCodeMap; +typedef HashMap<String, UScriptCode, ASCIICaseInsensitiveHash, ScriptNameCodeMapHashTraits> ScriptNameCodeMap; UScriptCode scriptNameToCode(const String& scriptName) { - DEFINE_STATIC_LOCAL(ScriptNameCodeMap, scriptNameCodeMap, ()); - if (scriptNameCodeMap.isEmpty()) { - for (size_t i = 0; i < sizeof(scriptNameCodeList) / sizeof(ScriptNameCode); ++i) - scriptNameCodeMap.set(ASCIILiteral(scriptNameCodeList[i].name), scriptNameCodeList[i].code); - } + static NeverDestroyed<ScriptNameCodeMap> scriptNameCodeMap = []() { + ScriptNameCodeMap map; + for (auto& nameAndCode : scriptNameCodeList) + map.add(ASCIILiteral(nameAndCode.name), nameAndCode.code); + return map; + }(); - ScriptNameCodeMap::iterator it = scriptNameCodeMap.find(scriptName.lower()); - if (it != scriptNameCodeMap.end()) + auto it = scriptNameCodeMap.get().find(scriptName); + if (it != scriptNameCodeMap.get().end()) return it->value; return USCRIPT_INVALID_CODE; } @@ -375,38 +376,40 @@ static const LocaleScript localeScriptList[] = { { "yap", USCRIPT_LATIN }, { "yo", USCRIPT_LATIN }, { "za", USCRIPT_LATIN }, - { "zh", USCRIPT_SIMPLIFIED_HAN }, + { "zh", USCRIPT_HAN }, { "zh_hk", USCRIPT_TRADITIONAL_HAN }, { "zh_tw", USCRIPT_TRADITIONAL_HAN }, { "zu", USCRIPT_LATIN } }; struct LocaleScriptMapHashTraits : public HashTraits<String> { - static const int minimumTableSize = WTF::HashTableCapacityForSize<sizeof(localeScriptList) / sizeof(LocaleScript)>::value; + static const int minimumTableSize = WTF::HashTableCapacityForSize<WTF_ARRAY_LENGTH(localeScriptList)>::value; }; -typedef HashMap<String, UScriptCode, DefaultHash<String>::Hash, LocaleScriptMapHashTraits> LocaleScriptMap; +typedef HashMap<String, UScriptCode, ASCIICaseInsensitiveHash, LocaleScriptMapHashTraits> LocaleScriptMap; UScriptCode localeToScriptCodeForFontSelection(const String& locale) { - DEFINE_STATIC_LOCAL(LocaleScriptMap, localeScriptMap, ()); - if (localeScriptMap.isEmpty()) { - for (size_t i = 0; i < sizeof(localeScriptList) / sizeof(LocaleScript); ++i) - localeScriptMap.set(ASCIILiteral(localeScriptList[i].locale), localeScriptList[i].script); - } + static NeverDestroyed<LocaleScriptMap> localeScriptMap = []() { + LocaleScriptMap map; + for (auto& localeAndScript : localeScriptList) + map.add(ASCIILiteral(localeAndScript.locale), localeAndScript.script); + return map; + }(); - String canonicalLocale = locale.lower().replace('-', '_'); + String canonicalLocale = locale; + canonicalLocale.replace('-', '_'); while (!canonicalLocale.isEmpty()) { - LocaleScriptMap::iterator it = localeScriptMap.find(canonicalLocale); - if (it != localeScriptMap.end()) + auto it = localeScriptMap.get().find(canonicalLocale); + if (it != localeScriptMap.get().end()) return it->value; - size_t pos = canonicalLocale.reverseFind('_'); - if (pos == notFound) + auto underscorePosition = canonicalLocale.reverseFind('_'); + if (underscorePosition == notFound) break; - UScriptCode code = scriptNameToCode(canonicalLocale.substring(pos + 1)); + UScriptCode code = scriptNameToCode(canonicalLocale.substring(underscorePosition + 1)); if (code != USCRIPT_INVALID_CODE && code != USCRIPT_UNKNOWN) return code; - canonicalLocale = canonicalLocale.substring(0, pos); + canonicalLocale = canonicalLocale.substring(0, underscorePosition); } return USCRIPT_COMMON; } diff --git a/Source/WebCore/platform/text/LocaleToScriptMappingICU.cpp b/Source/WebCore/platform/text/LocaleToScriptMappingICU.cpp new file mode 100644 index 000000000..6c60d821e --- /dev/null +++ b/Source/WebCore/platform/text/LocaleToScriptMappingICU.cpp @@ -0,0 +1,88 @@ +/* + * Copyright (C) 2011 Google Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "LocaleToScriptMapping.h" + +#include <unicode/uloc.h> +#include <wtf/text/CString.h> +#include <wtf/text/WTFString.h> + +namespace WebCore { + +// Treat certain families of script codes as a single script for assigning a per-script font in Settings. +static UScriptCode scriptCodeForFontSelection(UScriptCode scriptCode) +{ + switch (scriptCode) { + case USCRIPT_HIRAGANA: + case USCRIPT_KATAKANA: + case USCRIPT_JAPANESE: + return USCRIPT_KATAKANA_OR_HIRAGANA; + case USCRIPT_KOREAN: + return USCRIPT_HANGUL; + default: + return scriptCode; + } +} + +UScriptCode localeToScriptCodeForFontSelection(const String& locale) +{ + if (locale.isEmpty()) + return USCRIPT_COMMON; + + char maximizedLocale[ULOC_FULLNAME_CAPACITY]; + UErrorCode status = U_ZERO_ERROR; + uloc_addLikelySubtags(locale.utf8().data(), maximizedLocale, sizeof(maximizedLocale), &status); + if (U_FAILURE(status)) + return USCRIPT_COMMON; + + char script[ULOC_SCRIPT_CAPACITY]; + uloc_getScript(maximizedLocale, script, sizeof(script), &status); + if (U_FAILURE(status)) + return USCRIPT_COMMON; + + UScriptCode scriptCode = USCRIPT_COMMON; + uscript_getCode(script, &scriptCode, 1, &status); + // Ignore error that multiple scripts could be returned, since we only want one script. + if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR) + return USCRIPT_COMMON; + + return scriptCodeForFontSelection(scriptCode); +} + +UScriptCode scriptNameToCode(const String& name) +{ + int32_t code = u_getPropertyValueEnum(UCHAR_SCRIPT, name.utf8().data()); + if (code >= 0 && code < USCRIPT_CODE_LIMIT) + return static_cast<UScriptCode>(code); + return USCRIPT_INVALID_CODE; +} + +} // namespace WebCore diff --git a/Source/WebCore/platform/text/NonCJKGlyphOrientation.h b/Source/WebCore/platform/text/NonCJKGlyphOrientation.h deleted file mode 100644 index 515c23b86..000000000 --- a/Source/WebCore/platform/text/NonCJKGlyphOrientation.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (C) 2011 Apple Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, - * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef NonCJKGlyphOrientation_h -#define NonCJKGlyphOrientation_h - -namespace WebCore { - -enum NonCJKGlyphOrientation { NonCJKGlyphOrientationVerticalRight, NonCJKGlyphOrientationUpright }; - -} - -#endif // NonCJKGlyphOrientation_h diff --git a/Source/WebCore/platform/text/PlatformLocale.cpp b/Source/WebCore/platform/text/PlatformLocale.cpp index 5b7598101..de7e5f705 100644 --- a/Source/WebCore/platform/text/PlatformLocale.cpp +++ b/Source/WebCore/platform/text/PlatformLocale.cpp @@ -51,8 +51,8 @@ public: private: // DateTimeFormat::TokenHandler functions. - virtual void visitField(DateTimeFormat::FieldType, int) override final; - virtual void visitLiteral(const String&) override final; + void visitField(DateTimeFormat::FieldType, int) final; + void visitLiteral(const String&) final; String zeroPadString(const String&, size_t width); void appendNumber(int number, size_t width); @@ -81,7 +81,7 @@ String DateTimeStringBuilder::zeroPadString(const String& string, size_t width) StringBuilder zeroPaddedStringBuilder; zeroPaddedStringBuilder.reserveCapacity(width); for (size_t i = string.length(); i < width; ++i) - zeroPaddedStringBuilder.append("0"); + zeroPaddedStringBuilder.append('0'); zeroPaddedStringBuilder.append(string); return zeroPaddedStringBuilder.toString(); } @@ -311,7 +311,7 @@ String Locale::convertFromLocalizedNumber(const String& localized) StringBuilder builder; builder.reserveCapacity(input.length()); if (isNegative) - builder.append("-"); + builder.append('-'); for (unsigned i = startIndex; i < endIndex;) { unsigned symbolIndex = matchedDecimalSymbolIndex(input, i); if (symbolIndex >= DecimalSymbolsSize) diff --git a/Source/WebCore/platform/text/PlatformLocale.h b/Source/WebCore/platform/text/PlatformLocale.h index 9fc639cea..91198f271 100644 --- a/Source/WebCore/platform/text/PlatformLocale.h +++ b/Source/WebCore/platform/text/PlatformLocale.h @@ -28,21 +28,20 @@ #include "DateComponents.h" #include "Language.h" -#include <wtf/PassOwnPtr.h> #include <wtf/text/WTFString.h> namespace WebCore { #if PLATFORM(IOS) -class Font; +class FontCascade; #endif class Locale { - WTF_MAKE_NONCOPYABLE(Locale); + WTF_MAKE_NONCOPYABLE(Locale); WTF_MAKE_FAST_ALLOCATED; public: - static PassOwnPtr<Locale> create(const AtomicString& localeIdentifier); - static PassOwnPtr<Locale> createDefault(); + static std::unique_ptr<Locale> create(const AtomicString& localeIdentifier); + static std::unique_ptr<Locale> createDefault(); // Converts the specified number string to another number string localized // for this Locale locale. The input string must conform to HTML @@ -124,12 +123,6 @@ public: #endif // !PLATFORM(IOS) #endif -#if PLATFORM(IOS) - // FIXME: This code should be merged with Open Source in a way that is future compatible. - // Maximum width for a formatted date string with a specified font. - virtual float maximumWidthForDateType(DateComponents::Type, const Font&) = 0; -#endif - virtual ~Locale(); protected: @@ -156,7 +149,7 @@ private: bool m_hasLocaleData; }; -inline PassOwnPtr<Locale> Locale::createDefault() +inline std::unique_ptr<Locale> Locale::createDefault() { return Locale::create(defaultLanguage()); } diff --git a/Source/WebCore/platform/text/SegmentedString.cpp b/Source/WebCore/platform/text/SegmentedString.cpp index b0dc3d3dd..ff31b2adb 100644 --- a/Source/WebCore/platform/text/SegmentedString.cpp +++ b/Source/WebCore/platform/text/SegmentedString.cpp @@ -1,5 +1,5 @@ /* - Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved. + Copyright (C) 2004-2016 Apple Inc. All rights reserved. This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public @@ -20,330 +20,204 @@ #include "config.h" #include "SegmentedString.h" +#include <wtf/text/StringBuilder.h> +#include <wtf/text/TextPosition.h> + namespace WebCore { -SegmentedString::SegmentedString(const SegmentedString& other) - : m_pushedChar1(other.m_pushedChar1) - , m_pushedChar2(other.m_pushedChar2) - , m_currentString(other.m_currentString) - , m_numberOfCharactersConsumedPriorToCurrentString(other.m_numberOfCharactersConsumedPriorToCurrentString) - , m_numberOfCharactersConsumedPriorToCurrentLine(other.m_numberOfCharactersConsumedPriorToCurrentLine) - , m_currentLine(other.m_currentLine) - , m_substrings(other.m_substrings) - , m_closed(other.m_closed) - , m_empty(other.m_empty) - , m_fastPathFlags(other.m_fastPathFlags) - , m_advanceFunc(other.m_advanceFunc) - , m_advanceAndUpdateLineNumberFunc(other.m_advanceAndUpdateLineNumberFunc) +inline void SegmentedString::Substring::appendTo(StringBuilder& builder) const { - if (m_pushedChar2) - m_currentChar = m_pushedChar2; - else if (m_pushedChar1) - m_currentChar = m_pushedChar1; - else - m_currentChar = m_currentString.m_length ? m_currentString.getCurrentChar() : 0; + builder.append(string, string.length() - length, length); } -const SegmentedString& SegmentedString::operator=(const SegmentedString& other) +SegmentedString& SegmentedString::operator=(SegmentedString&& other) { - m_pushedChar1 = other.m_pushedChar1; - m_pushedChar2 = other.m_pushedChar2; - m_currentString = other.m_currentString; - m_substrings = other.m_substrings; - if (m_pushedChar2) - m_currentChar = m_pushedChar2; - else if (m_pushedChar1) - m_currentChar = m_pushedChar1; - else - m_currentChar = m_currentString.m_length ? m_currentString.getCurrentChar() : 0; + m_currentSubstring = WTFMove(other.m_currentSubstring); + m_otherSubstrings = WTFMove(other.m_otherSubstrings); - m_closed = other.m_closed; - m_empty = other.m_empty; - m_fastPathFlags = other.m_fastPathFlags; - m_numberOfCharactersConsumedPriorToCurrentString = other.m_numberOfCharactersConsumedPriorToCurrentString; + m_isClosed = other.m_isClosed; + + m_currentCharacter = other.m_currentCharacter; + + m_numberOfCharactersConsumedPriorToCurrentSubstring = other.m_numberOfCharactersConsumedPriorToCurrentSubstring; m_numberOfCharactersConsumedPriorToCurrentLine = other.m_numberOfCharactersConsumedPriorToCurrentLine; m_currentLine = other.m_currentLine; - m_advanceFunc = other.m_advanceFunc; - m_advanceAndUpdateLineNumberFunc = other.m_advanceAndUpdateLineNumberFunc; + m_fastPathFlags = other.m_fastPathFlags; + m_advanceWithoutUpdatingLineNumberFunction = other.m_advanceWithoutUpdatingLineNumberFunction; + m_advanceAndUpdateLineNumberFunction = other.m_advanceAndUpdateLineNumberFunction; + + other.clear(); return *this; } unsigned SegmentedString::length() const { - unsigned length = m_currentString.m_length; - if (m_pushedChar1) { - ++length; - if (m_pushedChar2) - ++length; - } - if (isComposite()) { - Deque<SegmentedSubstring>::const_iterator it = m_substrings.begin(); - Deque<SegmentedSubstring>::const_iterator e = m_substrings.end(); - for (; it != e; ++it) - length += it->m_length; - } + unsigned length = m_currentSubstring.length; + for (auto& substring : m_otherSubstrings) + length += substring.length; return length; } void SegmentedString::setExcludeLineNumbers() { - m_currentString.setExcludeLineNumbers(); - if (isComposite()) { - Deque<SegmentedSubstring>::iterator it = m_substrings.begin(); - Deque<SegmentedSubstring>::iterator e = m_substrings.end(); - for (; it != e; ++it) - it->setExcludeLineNumbers(); - } + if (!m_currentSubstring.doNotExcludeLineNumbers) + return; + m_currentSubstring.doNotExcludeLineNumbers = false; + for (auto& substring : m_otherSubstrings) + substring.doNotExcludeLineNumbers = false; + updateAdvanceFunctionPointers(); } void SegmentedString::clear() { - m_pushedChar1 = 0; - m_pushedChar2 = 0; - m_currentChar = 0; - m_currentString.clear(); - m_numberOfCharactersConsumedPriorToCurrentString = 0; + m_currentSubstring.length = 0; + m_otherSubstrings.clear(); + + m_isClosed = false; + + m_currentCharacter = 0; + + m_numberOfCharactersConsumedPriorToCurrentSubstring = 0; m_numberOfCharactersConsumedPriorToCurrentLine = 0; m_currentLine = 0; - m_substrings.clear(); - m_closed = false; - m_empty = true; - m_fastPathFlags = NoFastPath; - m_advanceFunc = &SegmentedString::advanceEmpty; - m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceEmpty; -} -void SegmentedString::append(const SegmentedSubstring& s) -{ - ASSERT(!m_closed); - if (!s.m_length) - return; - - if (!m_currentString.m_length) { - m_numberOfCharactersConsumedPriorToCurrentString += m_currentString.numberOfCharactersConsumed(); - m_currentString = s; - updateAdvanceFunctionPointers(); - } else - m_substrings.append(s); - m_empty = false; + updateAdvanceFunctionPointersForEmptyString(); } -void SegmentedString::prepend(const SegmentedSubstring& s) +inline void SegmentedString::appendSubstring(Substring&& substring) { - ASSERT(!escaped()); - ASSERT(!s.numberOfCharactersConsumed()); - if (!s.m_length) + ASSERT(!m_isClosed); + if (!substring.length) return; - - // FIXME: We're assuming that the prepend were originally consumed by - // this SegmentedString. We're also ASSERTing that s is a fresh - // SegmentedSubstring. These assumptions are sufficient for our - // current use, but we might need to handle the more elaborate - // cases in the future. - m_numberOfCharactersConsumedPriorToCurrentString += m_currentString.numberOfCharactersConsumed(); - m_numberOfCharactersConsumedPriorToCurrentString -= s.m_length; - if (!m_currentString.m_length) { - m_currentString = s; - updateAdvanceFunctionPointers(); - } else { - // Shift our m_currentString into our list. - m_substrings.prepend(m_currentString); - m_currentString = s; + if (m_currentSubstring.length) + m_otherSubstrings.append(WTFMove(substring)); + else { + m_numberOfCharactersConsumedPriorToCurrentSubstring += m_currentSubstring.numberOfCharactersConsumed(); + m_currentSubstring = WTFMove(substring); + m_currentCharacter = m_currentSubstring.currentCharacter(); updateAdvanceFunctionPointers(); } - m_empty = false; } -void SegmentedString::close() +void SegmentedString::pushBack(String&& string) { - // Closing a stream twice is likely a coding mistake. - ASSERT(!m_closed); - m_closed = true; + // We never create a substring for an empty string. + ASSERT(string.length()); + + // The new substring we will create won't have the doNotExcludeLineNumbers set appropriately. + // That was lost when the characters were consumed before pushing them back. But this does + // not matter, because clients never use this for newlines. Catch that with this assertion. + ASSERT(!string.contains('\n')); + + // The characters in the string must be previously consumed characters from this segmented string. + ASSERT(string.length() <= numberOfCharactersConsumed()); + + m_numberOfCharactersConsumedPriorToCurrentSubstring += m_currentSubstring.numberOfCharactersConsumed(); + if (m_currentSubstring.length) + m_otherSubstrings.prepend(WTFMove(m_currentSubstring)); + m_currentSubstring = WTFMove(string); + m_numberOfCharactersConsumedPriorToCurrentSubstring -= m_currentSubstring.length; + m_currentCharacter = m_currentSubstring.currentCharacter(); + updateAdvanceFunctionPointers(); } -void SegmentedString::append(const SegmentedString& s) +void SegmentedString::close() { - ASSERT(!m_closed); - ASSERT(!s.escaped()); - append(s.m_currentString); - if (s.isComposite()) { - Deque<SegmentedSubstring>::const_iterator it = s.m_substrings.begin(); - Deque<SegmentedSubstring>::const_iterator e = s.m_substrings.end(); - for (; it != e; ++it) - append(*it); - } - m_currentChar = m_pushedChar1 ? m_pushedChar1 : (m_currentString.m_length ? m_currentString.getCurrentChar() : 0); + ASSERT(!m_isClosed); + m_isClosed = true; } -void SegmentedString::prepend(const SegmentedString& s) +void SegmentedString::append(const SegmentedString& string) { - ASSERT(!escaped()); - ASSERT(!s.escaped()); - if (s.isComposite()) { - Deque<SegmentedSubstring>::const_reverse_iterator it = s.m_substrings.rbegin(); - Deque<SegmentedSubstring>::const_reverse_iterator e = s.m_substrings.rend(); - for (; it != e; ++it) - prepend(*it); - } - prepend(s.m_currentString); - m_currentChar = m_pushedChar1 ? m_pushedChar1 : (m_currentString.m_length ? m_currentString.getCurrentChar() : 0); + appendSubstring(Substring { string.m_currentSubstring }); + for (auto& substring : string.m_otherSubstrings) + m_otherSubstrings.append(substring); } -void SegmentedString::advanceSubstring() +void SegmentedString::append(SegmentedString&& string) { - if (isComposite()) { - m_numberOfCharactersConsumedPriorToCurrentString += m_currentString.numberOfCharactersConsumed(); - m_currentString = m_substrings.takeFirst(); - // If we've previously consumed some characters of the non-current - // string, we now account for those characters as part of the current - // string, not as part of "prior to current string." - m_numberOfCharactersConsumedPriorToCurrentString -= m_currentString.numberOfCharactersConsumed(); - updateAdvanceFunctionPointers(); - } else { - m_currentString.clear(); - m_empty = true; - m_fastPathFlags = NoFastPath; - m_advanceFunc = &SegmentedString::advanceEmpty; - m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceEmpty; - } + appendSubstring(WTFMove(string.m_currentSubstring)); + for (auto& substring : string.m_otherSubstrings) + m_otherSubstrings.append(WTFMove(substring)); } -String SegmentedString::toString() const +void SegmentedString::append(String&& string) { - StringBuilder result; - if (m_pushedChar1) { - result.append(m_pushedChar1); - if (m_pushedChar2) - result.append(m_pushedChar2); - } - m_currentString.appendTo(result); - if (isComposite()) { - Deque<SegmentedSubstring>::const_iterator it = m_substrings.begin(); - Deque<SegmentedSubstring>::const_iterator e = m_substrings.end(); - for (; it != e; ++it) - it->appendTo(result); - } - return result.toString(); + appendSubstring(WTFMove(string)); } -void SegmentedString::advance(unsigned count, UChar* consumedCharacters) +void SegmentedString::append(const String& string) { - ASSERT_WITH_SECURITY_IMPLICATION(count <= length()); - for (unsigned i = 0; i < count; ++i) { - consumedCharacters[i] = currentChar(); - advance(); - } + appendSubstring(String { string }); } -void SegmentedString::advance8() +String SegmentedString::toString() const { - ASSERT(!m_pushedChar1); - decrementAndCheckLength(); - m_currentChar = m_currentString.incrementAndGetCurrentChar8(); + StringBuilder result; + m_currentSubstring.appendTo(result); + for (auto& substring : m_otherSubstrings) + substring.appendTo(result); + return result.toString(); } -void SegmentedString::advance16() +void SegmentedString::advanceWithoutUpdatingLineNumber16() { - ASSERT(!m_pushedChar1); + m_currentCharacter = *++m_currentSubstring.currentCharacter16; decrementAndCheckLength(); - m_currentChar = m_currentString.incrementAndGetCurrentChar16(); -} - -void SegmentedString::advanceAndUpdateLineNumber8() -{ - ASSERT(!m_pushedChar1); - ASSERT(m_currentString.getCurrentChar() == m_currentChar); - if (m_currentChar == '\n') { - ++m_currentLine; - m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumed() + 1; - } - decrementAndCheckLength(); - m_currentChar = m_currentString.incrementAndGetCurrentChar8(); } void SegmentedString::advanceAndUpdateLineNumber16() { - ASSERT(!m_pushedChar1); - ASSERT(m_currentString.getCurrentChar() == m_currentChar); - if (m_currentChar == '\n') { - ++m_currentLine; - m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumed() + 1; - } + ASSERT(m_currentSubstring.doNotExcludeLineNumbers); + processPossibleNewline(); + m_currentCharacter = *++m_currentSubstring.currentCharacter16; decrementAndCheckLength(); - m_currentChar = m_currentString.incrementAndGetCurrentChar16(); } -void SegmentedString::advanceSlowCase() +inline void SegmentedString::advancePastSingleCharacterSubstringWithoutUpdatingLineNumber() { - if (m_pushedChar1) { - m_pushedChar1 = m_pushedChar2; - m_pushedChar2 = 0; - - if (m_pushedChar1) { - m_currentChar = m_pushedChar1; - return; - } - - updateAdvanceFunctionPointers(); - } else if (m_currentString.m_length) { - if (--m_currentString.m_length == 0) - advanceSubstring(); - } else if (!isComposite()) { - m_currentString.clear(); - m_empty = true; - m_fastPathFlags = NoFastPath; - m_advanceFunc = &SegmentedString::advanceEmpty; - m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceEmpty; + ASSERT(m_currentSubstring.length == 1); + if (m_otherSubstrings.isEmpty()) { + m_currentSubstring.length = 0; + m_currentCharacter = 0; + updateAdvanceFunctionPointersForEmptyString(); + return; } - m_currentChar = m_currentString.m_length ? m_currentString.getCurrentChar() : 0; + m_numberOfCharactersConsumedPriorToCurrentSubstring += m_currentSubstring.numberOfCharactersConsumed(); + m_currentSubstring = m_otherSubstrings.takeFirst(); + // If we've previously consumed some characters of the non-current string, we now account for those + // characters as part of the current string, not as part of "prior to current string." + m_numberOfCharactersConsumedPriorToCurrentSubstring -= m_currentSubstring.numberOfCharactersConsumed(); + m_currentCharacter = m_currentSubstring.currentCharacter(); + updateAdvanceFunctionPointers(); } -void SegmentedString::advanceAndUpdateLineNumberSlowCase() +void SegmentedString::advancePastSingleCharacterSubstring() { - if (m_pushedChar1) { - m_pushedChar1 = m_pushedChar2; - m_pushedChar2 = 0; - - if (m_pushedChar1) { - m_currentChar = m_pushedChar1; - return; - } - - updateAdvanceFunctionPointers(); - } else if (m_currentString.m_length) { - if (m_currentString.getCurrentChar() == '\n' && m_currentString.doNotExcludeLineNumbers()) { - ++m_currentLine; - // Plus 1 because numberOfCharactersConsumed value hasn't incremented yet; it does with m_length decrement below. - m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumed() + 1; - } - if (--m_currentString.m_length == 0) - advanceSubstring(); - else - m_currentString.incrementAndGetCurrentChar(); // Only need the ++ - } else if (!isComposite()) { - m_currentString.clear(); - m_empty = true; - m_fastPathFlags = NoFastPath; - m_advanceFunc = &SegmentedString::advanceEmpty; - m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceEmpty; - } - - m_currentChar = m_currentString.m_length ? m_currentString.getCurrentChar() : 0; + ASSERT(m_currentSubstring.length == 1); + ASSERT(m_currentSubstring.doNotExcludeLineNumbers); + processPossibleNewline(); + advancePastSingleCharacterSubstringWithoutUpdatingLineNumber(); } void SegmentedString::advanceEmpty() { - ASSERT(!m_currentString.m_length && !isComposite()); - m_currentChar = 0; + ASSERT(!m_currentSubstring.length); + ASSERT(m_otherSubstrings.isEmpty()); + ASSERT(!m_currentCharacter); } -void SegmentedString::updateSlowCaseFunctionPointers() +void SegmentedString::updateAdvanceFunctionPointersForSingleCharacterSubstring() { + ASSERT(m_currentSubstring.length == 1); m_fastPathFlags = NoFastPath; - m_advanceFunc = &SegmentedString::advanceSlowCase; - m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceAndUpdateLineNumberSlowCase; + m_advanceWithoutUpdatingLineNumberFunction = &SegmentedString::advancePastSingleCharacterSubstringWithoutUpdatingLineNumber; + if (m_currentSubstring.doNotExcludeLineNumbers) + m_advanceAndUpdateLineNumberFunction = &SegmentedString::advancePastSingleCharacterSubstring; + else + m_advanceAndUpdateLineNumberFunction = &SegmentedString::advancePastSingleCharacterSubstringWithoutUpdatingLineNumber; } OrdinalNumber SegmentedString::currentLine() const @@ -353,8 +227,7 @@ OrdinalNumber SegmentedString::currentLine() const OrdinalNumber SegmentedString::currentColumn() const { - int zeroBasedColumn = numberOfCharactersConsumed() - m_numberOfCharactersConsumedPriorToCurrentLine; - return OrdinalNumber::fromZeroBasedInt(zeroBasedColumn); + return OrdinalNumber::fromZeroBasedInt(numberOfCharactersConsumed() - m_numberOfCharactersConsumedPriorToCurrentLine); } void SegmentedString::setCurrentPosition(OrdinalNumber line, OrdinalNumber columnAftreProlog, int prologLength) @@ -363,4 +236,36 @@ void SegmentedString::setCurrentPosition(OrdinalNumber line, OrdinalNumber colum m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumed() + prologLength - columnAftreProlog.zeroBasedInt(); } +SegmentedString::AdvancePastResult SegmentedString::advancePastSlowCase(const char* literal, bool lettersIgnoringASCIICase) +{ + constexpr unsigned maxLength = 10; + ASSERT(!strchr(literal, '\n')); + auto length = strlen(literal); + ASSERT(length <= maxLength); + if (length > this->length()) + return NotEnoughCharacters; + UChar consumedCharacters[maxLength]; + for (unsigned i = 0; i < length; ++i) { + auto character = m_currentCharacter; + if (characterMismatch(character, literal[i], lettersIgnoringASCIICase)) { + if (i) + pushBack(String { consumedCharacters, i }); + return DidNotMatch; + } + advancePastNonNewline(); + consumedCharacters[i] = character; + } + return DidMatch; +} + +void SegmentedString::updateAdvanceFunctionPointersForEmptyString() +{ + ASSERT(!m_currentSubstring.length); + ASSERT(m_otherSubstrings.isEmpty()); + ASSERT(!m_currentCharacter); + m_fastPathFlags = NoFastPath; + m_advanceWithoutUpdatingLineNumberFunction = &SegmentedString::advanceEmpty; + m_advanceAndUpdateLineNumberFunction = &SegmentedString::advanceEmpty; +} + } diff --git a/Source/WebCore/platform/text/SegmentedString.h b/Source/WebCore/platform/text/SegmentedString.h index d5fe367b3..fe7f19140 100644 --- a/Source/WebCore/platform/text/SegmentedString.h +++ b/Source/WebCore/platform/text/SegmentedString.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved. + Copyright (C) 2004-2016 Apple Inc. All rights reserved. This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public @@ -17,406 +17,307 @@ Boston, MA 02110-1301, USA. */ -#ifndef SegmentedString_h -#define SegmentedString_h +#pragma once #include <wtf/Deque.h> -#include <wtf/text/StringBuilder.h> -#include <wtf/text/TextPosition.h> #include <wtf/text/WTFString.h> namespace WebCore { -class SegmentedString; +// FIXME: This should not start with "k". +// FIXME: This is a shared tokenizer concept, not a SegmentedString concept, but this is the only common header for now. +constexpr LChar kEndOfFileMarker = 0; -class SegmentedSubstring { +class SegmentedString { public: - SegmentedSubstring() - : m_length(0) - , m_doNotExcludeLineNumbers(true) - , m_is8Bit(false) - { - m_data.string16Ptr = 0; - } + SegmentedString() = default; + SegmentedString(String&&); + SegmentedString(const String&); - SegmentedSubstring(const String& str) - : m_length(str.length()) - , m_doNotExcludeLineNumbers(true) - , m_string(str) - { - if (m_length) { - if (m_string.is8Bit()) { - m_is8Bit = true; - m_data.string8Ptr = m_string.characters8(); - } else { - m_is8Bit = false; - m_data.string16Ptr = m_string.characters16(); - } - } else - m_is8Bit = false; - } + SegmentedString(SegmentedString&&) = delete; + SegmentedString(const SegmentedString&) = delete; - void clear() { m_length = 0; m_data.string16Ptr = 0; m_is8Bit = false;} - - bool is8Bit() { return m_is8Bit; } - - bool excludeLineNumbers() const { return !m_doNotExcludeLineNumbers; } - bool doNotExcludeLineNumbers() const { return m_doNotExcludeLineNumbers; } + SegmentedString& operator=(SegmentedString&&); + SegmentedString& operator=(const SegmentedString&) = default; - void setExcludeLineNumbers() { m_doNotExcludeLineNumbers = false; } + void clear(); + void close(); - int numberOfCharactersConsumed() const { return m_string.length() - m_length; } + void append(SegmentedString&&); + void append(const SegmentedString&); - void appendTo(StringBuilder& builder) const - { - int offset = m_string.length() - m_length; + void append(String&&); + void append(const String&); - if (!offset) { - if (m_length) - builder.append(m_string); - } else - builder.append(m_string.substring(offset, m_length)); - } + void pushBack(String&&); - UChar getCurrentChar8() - { - return *m_data.string8Ptr; - } + void setExcludeLineNumbers(); - UChar getCurrentChar16() - { - return m_data.string16Ptr ? *m_data.string16Ptr : 0; - } + bool isEmpty() const { return !m_currentSubstring.length; } + unsigned length() const; - UChar incrementAndGetCurrentChar8() - { - ASSERT(m_data.string8Ptr); - return *++m_data.string8Ptr; - } + bool isClosed() const { return m_isClosed; } - UChar incrementAndGetCurrentChar16() - { - ASSERT(m_data.string16Ptr); - return *++m_data.string16Ptr; - } + void advance(); + void advancePastNonNewline(); // Faster than calling advance when we know the current character is not a newline. + void advancePastNewline(); // Faster than calling advance when we know the current character is a newline. - String currentSubString(unsigned length) - { - int offset = m_string.length() - m_length; - return m_string.substring(offset, length); - } + enum AdvancePastResult { DidNotMatch, DidMatch, NotEnoughCharacters }; + template<unsigned length> AdvancePastResult advancePast(const char (&literal)[length]) { return advancePast<length, false>(literal); } + template<unsigned length> AdvancePastResult advancePastLettersIgnoringASCIICase(const char (&literal)[length]) { return advancePast<length, true>(literal); } - ALWAYS_INLINE UChar getCurrentChar() - { - ASSERT(m_length); - if (is8Bit()) - return getCurrentChar8(); - return getCurrentChar16(); - } - - ALWAYS_INLINE UChar incrementAndGetCurrentChar() - { - ASSERT(m_length); - if (is8Bit()) - return incrementAndGetCurrentChar8(); - return incrementAndGetCurrentChar16(); - } + unsigned numberOfCharactersConsumed() const; -public: - union { - const LChar* string8Ptr; - const UChar* string16Ptr; - } m_data; - int m_length; - -private: - bool m_doNotExcludeLineNumbers; - bool m_is8Bit; - String m_string; -}; + String toString() const; -class SegmentedString { -public: - SegmentedString() - : m_pushedChar1(0) - , m_pushedChar2(0) - , m_currentChar(0) - , m_numberOfCharactersConsumedPriorToCurrentString(0) - , m_numberOfCharactersConsumedPriorToCurrentLine(0) - , m_currentLine(0) - , m_closed(false) - , m_empty(true) - , m_fastPathFlags(NoFastPath) - , m_advanceFunc(&SegmentedString::advanceEmpty) - , m_advanceAndUpdateLineNumberFunc(&SegmentedString::advanceEmpty) - { - } + UChar currentCharacter() const { return m_currentCharacter; } - SegmentedString(const String& str) - : m_pushedChar1(0) - , m_pushedChar2(0) - , m_currentString(str) - , m_currentChar(0) - , m_numberOfCharactersConsumedPriorToCurrentString(0) - , m_numberOfCharactersConsumedPriorToCurrentLine(0) - , m_currentLine(0) - , m_closed(false) - , m_empty(!str.length()) - , m_fastPathFlags(NoFastPath) - { - if (m_currentString.m_length) - m_currentChar = m_currentString.getCurrentChar(); - updateAdvanceFunctionPointers(); - } + OrdinalNumber currentColumn() const; + OrdinalNumber currentLine() const; - SegmentedString(const SegmentedString&); + // Sets value of line/column variables. Column is specified indirectly by a parameter columnAfterProlog + // which is a value of column that we should get after a prolog (first prologLength characters) has been consumed. + void setCurrentPosition(OrdinalNumber line, OrdinalNumber columnAfterProlog, int prologLength); - const SegmentedString& operator=(const SegmentedString&); +private: + struct Substring { + Substring() = default; + Substring(String&&); + + UChar currentCharacter() const; + UChar currentCharacterPreIncrement(); + + unsigned numberOfCharactersConsumed() const; + void appendTo(StringBuilder&) const; + + String string; + unsigned length { 0 }; + bool is8Bit; + union { + const LChar* currentCharacter8; + const UChar* currentCharacter16; + }; + bool doNotExcludeLineNumbers { true }; + }; - void clear(); - void close(); + enum FastPathFlags { + NoFastPath = 0, + Use8BitAdvanceAndUpdateLineNumbers = 1 << 0, + Use8BitAdvance = 1 << 1, + }; - void append(const SegmentedString&); - void prepend(const SegmentedString&); + void appendSubstring(Substring&&); - bool excludeLineNumbers() const { return m_currentString.excludeLineNumbers(); } - void setExcludeLineNumbers(); + void processPossibleNewline(); + void startNewLine(); - void push(UChar c) - { - if (!m_pushedChar1) { - m_pushedChar1 = c; - m_currentChar = m_pushedChar1 ? m_pushedChar1 : m_currentString.getCurrentChar(); - updateSlowCaseFunctionPointers(); - } else { - ASSERT(!m_pushedChar2); - m_pushedChar2 = c; - } - } + void advanceWithoutUpdatingLineNumber(); + void advanceWithoutUpdatingLineNumber16(); + void advanceAndUpdateLineNumber16(); + void advancePastSingleCharacterSubstringWithoutUpdatingLineNumber(); + void advancePastSingleCharacterSubstring(); + void advanceEmpty(); - bool isEmpty() const { return m_empty; } - unsigned length() const; + void updateAdvanceFunctionPointers(); + void updateAdvanceFunctionPointersForEmptyString(); + void updateAdvanceFunctionPointersForSingleCharacterSubstring(); - bool isClosed() const { return m_closed; } + void decrementAndCheckLength(); - enum LookAheadResult { - DidNotMatch, - DidMatch, - NotEnoughCharacters, - }; + template<typename CharacterType> static bool characterMismatch(CharacterType, char, bool lettersIgnoringASCIICase); + template<unsigned length, bool lettersIgnoringASCIICase> AdvancePastResult advancePast(const char (&literal)[length]); + AdvancePastResult advancePastSlowCase(const char* literal, bool lettersIgnoringASCIICase); - LookAheadResult lookAhead(const String& string) { return lookAheadInline(string, true); } - LookAheadResult lookAheadIgnoringCase(const String& string) { return lookAheadInline(string, false); } + Substring m_currentSubstring; + Deque<Substring> m_otherSubstrings; - void advance() - { - if (m_fastPathFlags & Use8BitAdvance) { - ASSERT(!m_pushedChar1); - bool haveOneCharacterLeft = (--m_currentString.m_length == 1); - m_currentChar = m_currentString.incrementAndGetCurrentChar8(); + bool m_isClosed { false }; - if (!haveOneCharacterLeft) - return; + UChar m_currentCharacter { 0 }; - updateSlowCaseFunctionPointers(); + unsigned m_numberOfCharactersConsumedPriorToCurrentSubstring { 0 }; + unsigned m_numberOfCharactersConsumedPriorToCurrentLine { 0 }; + int m_currentLine { 0 }; - return; - } + unsigned char m_fastPathFlags { NoFastPath }; + void (SegmentedString::*m_advanceWithoutUpdatingLineNumberFunction)() { &SegmentedString::advanceEmpty }; + void (SegmentedString::*m_advanceAndUpdateLineNumberFunction)() { &SegmentedString::advanceEmpty }; +}; - (this->*m_advanceFunc)(); +inline SegmentedString::Substring::Substring(String&& passedString) + : string(WTFMove(passedString)) + , length(string.length()) +{ + if (length) { + is8Bit = string.impl()->is8Bit(); + if (is8Bit) + currentCharacter8 = string.impl()->characters8(); + else + currentCharacter16 = string.impl()->characters16(); } +} - inline void advanceAndUpdateLineNumber() - { - if (m_fastPathFlags & Use8BitAdvance) { - ASSERT(!m_pushedChar1); - - bool haveNewLine = (m_currentChar == '\n') & !!(m_fastPathFlags & Use8BitAdvanceAndUpdateLineNumbers); - bool haveOneCharacterLeft = (--m_currentString.m_length == 1); +inline unsigned SegmentedString::Substring::numberOfCharactersConsumed() const +{ + return string.length() - length; +} - m_currentChar = m_currentString.incrementAndGetCurrentChar8(); +ALWAYS_INLINE UChar SegmentedString::Substring::currentCharacter() const +{ + ASSERT(length); + return is8Bit ? *currentCharacter8 : *currentCharacter16; +} - if (!(haveNewLine | haveOneCharacterLeft)) - return; +ALWAYS_INLINE UChar SegmentedString::Substring::currentCharacterPreIncrement() +{ + ASSERT(length); + return is8Bit ? *++currentCharacter8 : *++currentCharacter16; +} - if (haveNewLine) { - ++m_currentLine; - m_numberOfCharactersConsumedPriorToCurrentLine = m_numberOfCharactersConsumedPriorToCurrentString + m_currentString.numberOfCharactersConsumed(); - } +inline SegmentedString::SegmentedString(String&& string) + : m_currentSubstring(WTFMove(string)) +{ + if (m_currentSubstring.length) { + m_currentCharacter = m_currentSubstring.currentCharacter(); + updateAdvanceFunctionPointers(); + } +} - if (haveOneCharacterLeft) - updateSlowCaseFunctionPointers(); +inline SegmentedString::SegmentedString(const String& string) + : SegmentedString(String { string }) +{ +} - return; - } +ALWAYS_INLINE void SegmentedString::decrementAndCheckLength() +{ + ASSERT(m_currentSubstring.length > 1); + if (UNLIKELY(--m_currentSubstring.length == 1)) + updateAdvanceFunctionPointersForSingleCharacterSubstring(); +} - (this->*m_advanceAndUpdateLineNumberFunc)(); +ALWAYS_INLINE void SegmentedString::advanceWithoutUpdatingLineNumber() +{ + if (LIKELY(m_fastPathFlags & Use8BitAdvance)) { + m_currentCharacter = *++m_currentSubstring.currentCharacter8; + decrementAndCheckLength(); + return; } - void advanceAndASSERT(UChar expectedCharacter) - { - ASSERT_UNUSED(expectedCharacter, currentChar() == expectedCharacter); - advance(); - } + (this->*m_advanceWithoutUpdatingLineNumberFunction)(); +} - void advanceAndASSERTIgnoringCase(UChar expectedCharacter) - { - ASSERT_UNUSED(expectedCharacter, u_foldCase(currentChar(), U_FOLD_CASE_DEFAULT) == u_foldCase(expectedCharacter, U_FOLD_CASE_DEFAULT)); - advance(); - } +inline void SegmentedString::startNewLine() +{ + ++m_currentLine; + m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumed(); +} - void advancePastNonNewline() - { - ASSERT(currentChar() != '\n'); - advance(); - } +inline void SegmentedString::processPossibleNewline() +{ + if (m_currentCharacter == '\n') + startNewLine(); +} - void advancePastNewlineAndUpdateLineNumber() - { - ASSERT(currentChar() == '\n'); - if (!m_pushedChar1 && m_currentString.m_length > 1) { - int newLineFlag = m_currentString.doNotExcludeLineNumbers(); - m_currentLine += newLineFlag; - if (newLineFlag) - m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumed() + 1; - decrementAndCheckLength(); - m_currentChar = m_currentString.incrementAndGetCurrentChar(); +inline void SegmentedString::advance() +{ + if (LIKELY(m_fastPathFlags & Use8BitAdvance)) { + ASSERT(m_currentSubstring.length > 1); + bool lastCharacterWasNewline = m_currentCharacter == '\n'; + m_currentCharacter = *++m_currentSubstring.currentCharacter8; + bool haveOneCharacterLeft = --m_currentSubstring.length == 1; + if (LIKELY(!(lastCharacterWasNewline | haveOneCharacterLeft))) return; - } - advanceAndUpdateLineNumberSlowCase(); + if (lastCharacterWasNewline & !!(m_fastPathFlags & Use8BitAdvanceAndUpdateLineNumbers)) + startNewLine(); + if (haveOneCharacterLeft) + updateAdvanceFunctionPointersForSingleCharacterSubstring(); + return; } - // Writes the consumed characters into consumedCharacters, which must - // have space for at least |count| characters. - void advance(unsigned count, UChar* consumedCharacters); + (this->*m_advanceAndUpdateLineNumberFunction)(); +} - bool escaped() const { return m_pushedChar1; } +ALWAYS_INLINE void SegmentedString::advancePastNonNewline() +{ + ASSERT(m_currentCharacter != '\n'); + advanceWithoutUpdatingLineNumber(); +} - int numberOfCharactersConsumed() const - { - int numberOfPushedCharacters = 0; - if (m_pushedChar1) { - ++numberOfPushedCharacters; - if (m_pushedChar2) - ++numberOfPushedCharacters; - } - return m_numberOfCharactersConsumedPriorToCurrentString + m_currentString.numberOfCharactersConsumed() - numberOfPushedCharacters; +inline void SegmentedString::advancePastNewline() +{ + ASSERT(m_currentCharacter == '\n'); + if (m_currentSubstring.length > 1) { + if (m_currentSubstring.doNotExcludeLineNumbers) + startNewLine(); + m_currentCharacter = m_currentSubstring.currentCharacterPreIncrement(); + decrementAndCheckLength(); + return; } - String toString() const; - - UChar currentChar() const { return m_currentChar; } - - // The method is moderately slow, comparing to currentLine method. - OrdinalNumber currentColumn() const; - OrdinalNumber currentLine() const; - // Sets value of line/column variables. Column is specified indirectly by a parameter columnAftreProlog - // which is a value of column that we should get after a prolog (first prologLength characters) has been consumed. - void setCurrentPosition(OrdinalNumber line, OrdinalNumber columnAftreProlog, int prologLength); - -private: - enum FastPathFlags { - NoFastPath = 0, - Use8BitAdvanceAndUpdateLineNumbers = 1 << 0, - Use8BitAdvance = 1 << 1, - }; + (this->*m_advanceAndUpdateLineNumberFunction)(); +} - void append(const SegmentedSubstring&); - void prepend(const SegmentedSubstring&); +inline unsigned SegmentedString::numberOfCharactersConsumed() const +{ + return m_numberOfCharactersConsumedPriorToCurrentSubstring + m_currentSubstring.numberOfCharactersConsumed(); +} - void advance8(); - void advance16(); - void advanceAndUpdateLineNumber8(); - void advanceAndUpdateLineNumber16(); - void advanceSlowCase(); - void advanceAndUpdateLineNumberSlowCase(); - void advanceEmpty(); - void advanceSubstring(); - - void updateSlowCaseFunctionPointers(); - - void decrementAndCheckLength() - { - ASSERT(m_currentString.m_length > 1); - if (--m_currentString.m_length == 1) - updateSlowCaseFunctionPointers(); - } +template<typename CharacterType> ALWAYS_INLINE bool SegmentedString::characterMismatch(CharacterType a, char b, bool lettersIgnoringASCIICase) +{ + return lettersIgnoringASCIICase ? !isASCIIAlphaCaselessEqual(a, b) : a != b; +} - void updateAdvanceFunctionPointers() - { - if ((m_currentString.m_length > 1) && !m_pushedChar1) { - if (m_currentString.is8Bit()) { - m_advanceFunc = &SegmentedString::advance8; - m_fastPathFlags = Use8BitAdvance; - if (m_currentString.doNotExcludeLineNumbers()) { - m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceAndUpdateLineNumber8; - m_fastPathFlags |= Use8BitAdvanceAndUpdateLineNumbers; - } else - m_advanceAndUpdateLineNumberFunc = &SegmentedString::advance8; - return; +template<unsigned lengthIncludingTerminator, bool lettersIgnoringASCIICase> SegmentedString::AdvancePastResult SegmentedString::advancePast(const char (&literal)[lengthIncludingTerminator]) +{ + constexpr unsigned length = lengthIncludingTerminator - 1; + ASSERT(!literal[length]); + ASSERT(!strchr(literal, '\n')); + if (length + 1 < m_currentSubstring.length) { + if (m_currentSubstring.is8Bit) { + for (unsigned i = 0; i < length; ++i) { + if (characterMismatch(m_currentSubstring.currentCharacter8[i], literal[i], lettersIgnoringASCIICase)) + return DidNotMatch; } - - m_advanceFunc = &SegmentedString::advance16; - m_fastPathFlags = NoFastPath; - if (m_currentString.doNotExcludeLineNumbers()) - m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceAndUpdateLineNumber16; - else - m_advanceAndUpdateLineNumberFunc = &SegmentedString::advance16; - return; - } - - if (!m_currentString.m_length && !isComposite()) { - m_advanceFunc = &SegmentedString::advanceEmpty; - m_fastPathFlags = NoFastPath; - m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceEmpty; + m_currentSubstring.currentCharacter8 += length; + m_currentCharacter = *m_currentSubstring.currentCharacter8; + } else { + for (unsigned i = 0; i < length; ++i) { + if (characterMismatch(m_currentSubstring.currentCharacter16[i], literal[i], lettersIgnoringASCIICase)) + return DidNotMatch; + } + m_currentSubstring.currentCharacter16 += length; + m_currentCharacter = *m_currentSubstring.currentCharacter16; } - - updateSlowCaseFunctionPointers(); + m_currentSubstring.length -= length; + return DidMatch; } + return advancePastSlowCase(literal, lettersIgnoringASCIICase); +} - inline LookAheadResult lookAheadInline(const String& string, bool caseSensitive) - { - if (!m_pushedChar1 && string.length() <= static_cast<unsigned>(m_currentString.m_length)) { - String currentSubstring = m_currentString.currentSubString(string.length()); - if (currentSubstring.startsWith(string, caseSensitive)) - return DidMatch; - return DidNotMatch; +inline void SegmentedString::updateAdvanceFunctionPointers() +{ + if (m_currentSubstring.length > 1) { + if (m_currentSubstring.is8Bit) { + m_fastPathFlags = Use8BitAdvance; + if (m_currentSubstring.doNotExcludeLineNumbers) + m_fastPathFlags |= Use8BitAdvanceAndUpdateLineNumbers; + return; } - return lookAheadSlowCase(string, caseSensitive); - } - - LookAheadResult lookAheadSlowCase(const String& string, bool caseSensitive) - { - unsigned count = string.length(); - if (count > length()) - return NotEnoughCharacters; - UChar* consumedCharacters; - String consumedString = String::createUninitialized(count, consumedCharacters); - advance(count, consumedCharacters); - LookAheadResult result = DidNotMatch; - if (consumedString.startsWith(string, caseSensitive)) - result = DidMatch; - prepend(SegmentedString(consumedString)); - return result; + m_fastPathFlags = NoFastPath; + m_advanceWithoutUpdatingLineNumberFunction = &SegmentedString::advanceWithoutUpdatingLineNumber16; + if (m_currentSubstring.doNotExcludeLineNumbers) + m_advanceAndUpdateLineNumberFunction = &SegmentedString::advanceAndUpdateLineNumber16; + else + m_advanceAndUpdateLineNumberFunction = &SegmentedString::advanceWithoutUpdatingLineNumber16; + return; } - bool isComposite() const { return !m_substrings.isEmpty(); } - - UChar m_pushedChar1; - UChar m_pushedChar2; - SegmentedSubstring m_currentString; - UChar m_currentChar; - int m_numberOfCharactersConsumedPriorToCurrentString; - int m_numberOfCharactersConsumedPriorToCurrentLine; - int m_currentLine; - Deque<SegmentedSubstring> m_substrings; - bool m_closed; - bool m_empty; - unsigned char m_fastPathFlags; - void (SegmentedString::*m_advanceFunc)(); - void (SegmentedString::*m_advanceAndUpdateLineNumberFunc)(); -}; + if (!m_currentSubstring.length) { + updateAdvanceFunctionPointersForEmptyString(); + return; + } + updateAdvanceFunctionPointersForSingleCharacterSubstring(); } -#endif +} diff --git a/Source/WebCore/platform/text/StringWithDirection.h b/Source/WebCore/platform/text/StringWithDirection.h index 3833b1cdc..6dc87d765 100644 --- a/Source/WebCore/platform/text/StringWithDirection.h +++ b/Source/WebCore/platform/text/StringWithDirection.h @@ -1,5 +1,6 @@ /* * Copyright (C) 2011 Google Inc. All rights reserved. + * Copyright (C) 2017 Apple Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are @@ -28,10 +29,9 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef StringWithDirection_h -#define StringWithDirection_h +#pragma once -#include "TextDirection.h" +#include "WritingMode.h" #include <wtf/text/WTFString.h> namespace WebCore { @@ -44,36 +44,30 @@ namespace WebCore { // Note that is explicitly *not* the direction of the string as learned // from the characters of the string; it's extra metadata we have external // to the string. -class StringWithDirection { -public: - StringWithDirection() - : m_direction(LTR) - { - } - StringWithDirection(const String& string, TextDirection dir) - : m_string(string) - , m_direction(dir) - { - } - - const String& string() const { return m_string; } - TextDirection direction() const { return m_direction; } - - bool isEmpty() const { return m_string.isEmpty(); } - bool isNull() const { return m_string.isNull(); } +struct StringWithDirection { + StringWithDirection() = default; + StringWithDirection(const String& string, TextDirection direction) : string { string }, direction { direction } { } + StringWithDirection(String&& string, TextDirection direction) : string { WTFMove(string) }, direction { direction } { } + String string; + TextDirection direction { LTR }; +}; - bool operator==(const StringWithDirection& other) const - { - return other.m_string == m_string && other.m_direction == m_direction; - } - bool operator!=(const StringWithDirection& other) const { return !((*this) == other); } +inline bool operator==(const StringWithDirection& a, const StringWithDirection& b) +{ + return a.string == b.string && a.direction == b.direction; +} -private: - String m_string; - TextDirection m_direction; -}; +inline bool operator!=(const StringWithDirection& a, const StringWithDirection& b) +{ + return !(a == b); +} +inline StringWithDirection truncateFromEnd(const StringWithDirection& string, unsigned maxLength) +{ + if (string.direction == LTR) + return StringWithDirection(string.string.left(maxLength), LTR); + return StringWithDirection(string.string.right(maxLength), RTL); } -#endif // StringWithDirection_h +} diff --git a/Source/WebCore/platform/text/SuffixTree.h b/Source/WebCore/platform/text/SuffixTree.h index 1f05c363e..f51f2c72f 100644 --- a/Source/WebCore/platform/text/SuffixTree.h +++ b/Source/WebCore/platform/text/SuffixTree.h @@ -13,7 +13,7 @@ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR diff --git a/Source/WebCore/platform/text/TextDirection.h b/Source/WebCore/platform/text/TextAllInOne.cpp index 9a074c85d..2c1d5b4e6 100644 --- a/Source/WebCore/platform/text/TextDirection.h +++ b/Source/WebCore/platform/text/TextAllInOne.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2003, 2006 Apple Computer, Inc. All rights reserved. + * Copyright (C) 2012 Apple Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -10,28 +10,30 @@ * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef TextDirection_h -#define TextDirection_h +// This all-in-one cpp file cuts down on template bloat to allow us to build our Windows release build. -namespace WebCore { - -enum TextDirection { RTL, LTR }; - -inline bool isLeftToRightDirection(TextDirection direction) { return direction == LTR; } - -} - -#endif +#include "TextBoundaries.cpp" +#include "TextCodec.cpp" +#include "TextCodecICU.cpp" +#include "TextCodecLatin1.cpp" +#include "TextCodecReplacement.cpp" +#include "TextCodecUTF16.cpp" +#include "TextCodecUTF8.cpp" +#include "TextCodecUserDefined.cpp" +#include "TextEncoding.cpp" +#include "TextEncodingDetectorICU.cpp" +#include "TextEncodingRegistry.cpp" +#include "TextStream.cpp" diff --git a/Source/WebCore/platform/text/TextBoundaries.cpp b/Source/WebCore/platform/text/TextBoundaries.cpp index f5676e2df..e780ff682 100644 --- a/Source/WebCore/platform/text/TextBoundaries.cpp +++ b/Source/WebCore/platform/text/TextBoundaries.cpp @@ -11,10 +11,10 @@ * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -27,84 +27,86 @@ #include "config.h" #include "TextBoundaries.h" -#include "TextBreakIterator.h" +#include <unicode/ubrk.h> #include <wtf/text/StringImpl.h> -#include <wtf/unicode/Unicode.h> +#include <wtf/text/TextBreakIterator.h> namespace WebCore { -int endOfFirstWordBoundaryContext(const UChar* characters, int length) +unsigned endOfFirstWordBoundaryContext(StringView text) { - for (int i = 0; i < length; ) { - int first = i; + unsigned length = text.length(); + for (unsigned i = 0; i < length; ) { + unsigned first = i; UChar32 ch; - U16_NEXT(characters, i, length, ch); + U16_NEXT(text, i, length, ch); if (!requiresContextForWordBoundary(ch)) return first; } return length; } -int startOfLastWordBoundaryContext(const UChar* characters, int length) +unsigned startOfLastWordBoundaryContext(StringView text) { - for (int i = length; i > 0; ) { - int last = i; + unsigned length = text.length(); + for (unsigned i = length; i > 0; ) { + unsigned last = i; UChar32 ch; - U16_PREV(characters, 0, i, ch); + U16_PREV(text, 0, i, ch); if (!requiresContextForWordBoundary(ch)) return last; } return 0; } -#if !PLATFORM(MAC) +#if !PLATFORM(COCOA) -int findNextWordFromIndex(const UChar* chars, int len, int position, bool forward) +int findNextWordFromIndex(StringView text, int position, bool forward) { - TextBreakIterator* it = wordBreakIterator(StringView(chars, len)); + UBreakIterator* it = wordBreakIterator(text); if (forward) { - position = textBreakFollowing(it, position); - while (position != TextBreakDone) { + position = ubrk_following(it, position); + while (position != UBRK_DONE) { // We stop searching when the character preceeding the break is alphanumeric. - if (position < len && u_isalnum(chars[position - 1])) + if (static_cast<unsigned>(position) < text.length() && u_isalnum(text[position - 1])) return position; - position = textBreakFollowing(it, position); + position = ubrk_following(it, position); } - return len; + return text.length(); } else { - position = textBreakPreceding(it, position); - while (position != TextBreakDone) { + position = ubrk_preceding(it, position); + while (position != UBRK_DONE) { // We stop searching when the character following the break is alphanumeric. - if (position > 0 && u_isalnum(chars[position])) + if (position && u_isalnum(text[position])) return position; - position = textBreakPreceding(it, position); + position = ubrk_preceding(it, position); } return 0; } } -void findWordBoundary(const UChar* chars, int len, int position, int* start, int* end) +void findWordBoundary(StringView text, int position, int* start, int* end) { - TextBreakIterator* it = wordBreakIterator(StringView(chars, len)); - *end = textBreakFollowing(it, position); + UBreakIterator* it = wordBreakIterator(text); + *end = ubrk_following(it, position); if (*end < 0) - *end = textBreakLast(it); - *start = textBreakPrevious(it); + *end = ubrk_last(it); + *start = ubrk_previous(it); } -void findEndWordBoundary(const UChar* chars, int len, int position, int* end) +void findEndWordBoundary(StringView text, int position, int* end) { - TextBreakIterator* it = wordBreakIterator(StringView(chars, len)); - *end = textBreakFollowing(it, position); + UBreakIterator* it = wordBreakIterator(text); + *end = ubrk_following(it, position); if (*end < 0) - *end = textBreakLast(it); + *end = ubrk_last(it); } -#endif // !PLATFORM(MAC) +#endif // !PLATFORM(COCOA) } // namespace WebCore diff --git a/Source/WebCore/platform/text/TextBoundaries.h b/Source/WebCore/platform/text/TextBoundaries.h index 6c83b3bbf..9a873af4d 100644 --- a/Source/WebCore/platform/text/TextBoundaries.h +++ b/Source/WebCore/platform/text/TextBoundaries.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2004, 2006 Apple Computer, Inc. All rights reserved. + * Copyright (C) 2004, 2006 Apple Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -10,10 +10,10 @@ * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -26,7 +26,8 @@ #ifndef TextBoundaries_h #define TextBoundaries_h -#include <wtf/unicode/Unicode.h> +#include <unicode/uchar.h> +#include <wtf/Forward.h> namespace WebCore { @@ -41,12 +42,12 @@ namespace WebCore { return lineBreak == U_LB_COMPLEX_CONTEXT || lineBreak == WK_U_LB_CONDITIONAL_JAPANESE_STARTER || lineBreak == U_LB_IDEOGRAPHIC; } - int endOfFirstWordBoundaryContext(const UChar* characters, int length); - int startOfLastWordBoundaryContext(const UChar* characters, int length); + unsigned endOfFirstWordBoundaryContext(StringView); + unsigned startOfLastWordBoundaryContext(StringView); - void findWordBoundary(const UChar*, int len, int position, int* start, int* end); - void findEndWordBoundary(const UChar*, int len, int position, int* end); - int findNextWordFromIndex(const UChar*, int len, int position, bool forward); + void findWordBoundary(StringView, int position, int* start, int* end); + void findEndWordBoundary(StringView, int position, int* end); + int findNextWordFromIndex(StringView, int position, bool forward); } diff --git a/Source/WebCore/platform/text/TextBreakIterator.cpp b/Source/WebCore/platform/text/TextBreakIterator.cpp deleted file mode 100644 index 9d94066b4..000000000 --- a/Source/WebCore/platform/text/TextBreakIterator.cpp +++ /dev/null @@ -1,400 +0,0 @@ -/* - * (C) 1999 Lars Knoll (knoll@kde.org) - * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved. - * Copyright (C) 2007-2009 Torch Mobile, Inc. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Library General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. - * - * You should have received a copy of the GNU Library General Public License - * along with this library; see the file COPYING.LIB. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - */ - -#include "config.h" -#include "TextBreakIterator.h" - -#include "LineBreakIteratorPoolICU.h" -#include "UTextProviderLatin1.h" -#include "UTextProviderUTF16.h" -#include <mutex> -#include <wtf/Atomics.h> -#include <wtf/text/StringView.h> -#include <wtf/text/WTFString.h> - -namespace WebCore { - -// Iterator initialization - -static TextBreakIterator* initializeIterator(UBreakIteratorType type, const char* locale = currentTextBreakLocaleID()) -{ - UErrorCode openStatus = U_ZERO_ERROR; - TextBreakIterator* iterator = reinterpret_cast<TextBreakIterator*>(ubrk_open(type, locale, 0, 0, &openStatus)); - ASSERT_WITH_MESSAGE(U_SUCCESS(openStatus), "ICU could not open a break iterator: %s (%d)", u_errorName(openStatus), openStatus); - return iterator; -} - -#if !PLATFORM(IOS) -static TextBreakIterator* initializeIteratorWithRules(const char* breakRules) -{ - UParseError parseStatus; - UErrorCode openStatus = U_ZERO_ERROR; - String rules(breakRules); - TextBreakIterator* iterator = reinterpret_cast<TextBreakIterator*>(ubrk_openRules(rules.deprecatedCharacters(), rules.length(), 0, 0, &parseStatus, &openStatus)); - ASSERT_WITH_MESSAGE(U_SUCCESS(openStatus), "ICU could not open a break iterator: %s (%d)", u_errorName(openStatus), openStatus); - return iterator; -} -#endif // !PLATFORM(IOS) - - -// Iterator text setting - -static TextBreakIterator* setTextForIterator(TextBreakIterator& iterator, StringView string) -{ - if (string.is8Bit()) { - UTextWithBuffer textLocal; - textLocal.text = UTEXT_INITIALIZER; - textLocal.text.extraSize = sizeof(textLocal.buffer); - textLocal.text.pExtra = textLocal.buffer; - - UErrorCode openStatus = U_ZERO_ERROR; - UText* text = openLatin1UTextProvider(&textLocal, string.characters8(), string.length(), &openStatus); - if (U_FAILURE(openStatus)) { - LOG_ERROR("uTextOpenLatin1 failed with status %d", openStatus); - return nullptr; - } - - UErrorCode setTextStatus = U_ZERO_ERROR; - ubrk_setUText(reinterpret_cast<UBreakIterator*>(&iterator), text, &setTextStatus); - if (U_FAILURE(setTextStatus)) { - LOG_ERROR("ubrk_setUText failed with status %d", setTextStatus); - return nullptr; - } - - utext_close(text); - } else { - UErrorCode setTextStatus = U_ZERO_ERROR; - ubrk_setText(reinterpret_cast<UBreakIterator*>(&iterator), string.characters16(), string.length(), &setTextStatus); - if (U_FAILURE(setTextStatus)) - return nullptr; - } - - return &iterator; -} - -static TextBreakIterator* setContextAwareTextForIterator(TextBreakIterator& iterator, StringView string, const UChar* priorContext, unsigned priorContextLength) -{ - if (string.is8Bit()) { - UTextWithBuffer textLocal; - textLocal.text = UTEXT_INITIALIZER; - textLocal.text.extraSize = sizeof(textLocal.buffer); - textLocal.text.pExtra = textLocal.buffer; - - UErrorCode openStatus = U_ZERO_ERROR; - UText* text = openLatin1ContextAwareUTextProvider(&textLocal, string.characters8(), string.length(), priorContext, priorContextLength, &openStatus); - if (U_FAILURE(openStatus)) { - LOG_ERROR("openLatin1ContextAwareUTextProvider failed with status %d", openStatus); - return nullptr; - } - - UErrorCode setTextStatus = U_ZERO_ERROR; - ubrk_setUText(reinterpret_cast<UBreakIterator*>(&iterator), text, &setTextStatus); - if (U_FAILURE(setTextStatus)) { - LOG_ERROR("ubrk_setUText failed with status %d", setTextStatus); - return nullptr; - } - - utext_close(text); - } else { - UText textLocal = UTEXT_INITIALIZER; - - UErrorCode openStatus = U_ZERO_ERROR; - UText* text = openUTF16ContextAwareUTextProvider(&textLocal, string.characters16(), string.length(), priorContext, priorContextLength, &openStatus); - if (U_FAILURE(openStatus)) { - LOG_ERROR("openUTF16ContextAwareUTextProvider failed with status %d", openStatus); - return 0; - } - - UErrorCode setTextStatus = U_ZERO_ERROR; - ubrk_setUText(reinterpret_cast<UBreakIterator*>(&iterator), text, &setTextStatus); - if (U_FAILURE(setTextStatus)) { - LOG_ERROR("ubrk_setUText failed with status %d", setTextStatus); - return nullptr; - } - - utext_close(text); - } - - return &iterator; -} - - -// Static iterators - -TextBreakIterator* wordBreakIterator(StringView string) -{ - static TextBreakIterator* staticWordBreakIterator = initializeIterator(UBRK_WORD); - if (!staticWordBreakIterator) - return nullptr; - - return setTextForIterator(*staticWordBreakIterator, string); -} - -TextBreakIterator* sentenceBreakIterator(StringView string) -{ - static TextBreakIterator* staticSentenceBreakIterator = initializeIterator(UBRK_SENTENCE); - if (!staticSentenceBreakIterator) - return nullptr; - - return setTextForIterator(*staticSentenceBreakIterator, string); -} - -TextBreakIterator* cursorMovementIterator(StringView string) -{ -#if !PLATFORM(IOS) - // This rule set is based on character-break iterator rules of ICU 4.0 - // <http://source.icu-project.org/repos/icu/icu/tags/release-4-0/source/data/brkitr/char.txt>. - // The major differences from the original ones are listed below: - // * Replaced '[\p{Grapheme_Cluster_Break = SpacingMark}]' with '[\p{General_Category = Spacing Mark} - $Extend]' for ICU 3.8 or earlier; - // * Removed rules that prevent a cursor from moving after prepend characters (Bug 24342); - // * Added rules that prevent a cursor from moving after virama signs of Indic languages except Tamil (Bug 15790), and; - // * Added rules that prevent a cursor from moving before Japanese half-width katakara voiced marks. - // * Added rules for regional indicator symbols. - static const char* kRules = - "$CR = [\\p{Grapheme_Cluster_Break = CR}];" - "$LF = [\\p{Grapheme_Cluster_Break = LF}];" - "$Control = [\\p{Grapheme_Cluster_Break = Control}];" - "$VoiceMarks = [\\uFF9E\\uFF9F];" // Japanese half-width katakana voiced marks - "$Extend = [\\p{Grapheme_Cluster_Break = Extend} $VoiceMarks - [\\u0E30 \\u0E32 \\u0E45 \\u0EB0 \\u0EB2]];" - "$SpacingMark = [[\\p{General_Category = Spacing Mark}] - $Extend];" - "$L = [\\p{Grapheme_Cluster_Break = L}];" - "$V = [\\p{Grapheme_Cluster_Break = V}];" - "$T = [\\p{Grapheme_Cluster_Break = T}];" - "$LV = [\\p{Grapheme_Cluster_Break = LV}];" - "$LVT = [\\p{Grapheme_Cluster_Break = LVT}];" - "$Hin0 = [\\u0905-\\u0939];" // Devanagari Letter A,...,Ha - "$HinV = \\u094D;" // Devanagari Sign Virama - "$Hin1 = [\\u0915-\\u0939];" // Devanagari Letter Ka,...,Ha - "$Ben0 = [\\u0985-\\u09B9];" // Bengali Letter A,...,Ha - "$BenV = \\u09CD;" // Bengali Sign Virama - "$Ben1 = [\\u0995-\\u09B9];" // Bengali Letter Ka,...,Ha - "$Pan0 = [\\u0A05-\\u0A39];" // Gurmukhi Letter A,...,Ha - "$PanV = \\u0A4D;" // Gurmukhi Sign Virama - "$Pan1 = [\\u0A15-\\u0A39];" // Gurmukhi Letter Ka,...,Ha - "$Guj0 = [\\u0A85-\\u0AB9];" // Gujarati Letter A,...,Ha - "$GujV = \\u0ACD;" // Gujarati Sign Virama - "$Guj1 = [\\u0A95-\\u0AB9];" // Gujarati Letter Ka,...,Ha - "$Ori0 = [\\u0B05-\\u0B39];" // Oriya Letter A,...,Ha - "$OriV = \\u0B4D;" // Oriya Sign Virama - "$Ori1 = [\\u0B15-\\u0B39];" // Oriya Letter Ka,...,Ha - "$Tel0 = [\\u0C05-\\u0C39];" // Telugu Letter A,...,Ha - "$TelV = \\u0C4D;" // Telugu Sign Virama - "$Tel1 = [\\u0C14-\\u0C39];" // Telugu Letter Ka,...,Ha - "$Kan0 = [\\u0C85-\\u0CB9];" // Kannada Letter A,...,Ha - "$KanV = \\u0CCD;" // Kannada Sign Virama - "$Kan1 = [\\u0C95-\\u0CB9];" // Kannada Letter A,...,Ha - "$Mal0 = [\\u0D05-\\u0D39];" // Malayalam Letter A,...,Ha - "$MalV = \\u0D4D;" // Malayalam Sign Virama - "$Mal1 = [\\u0D15-\\u0D39];" // Malayalam Letter A,...,Ha - "$RI = [\\U0001F1E6-\\U0001F1FF];" // Emoji regional indicators - "!!chain;" - "!!forward;" - "$CR $LF;" - "$L ($L | $V | $LV | $LVT);" - "($LV | $V) ($V | $T);" - "($LVT | $T) $T;" - "[^$Control $CR $LF] $Extend;" - "[^$Control $CR $LF] $SpacingMark;" - "$RI $RI / $RI;" - "$RI $RI;" - "$Hin0 $HinV $Hin1;" // Devanagari Virama (forward) - "$Ben0 $BenV $Ben1;" // Bengali Virama (forward) - "$Pan0 $PanV $Pan1;" // Gurmukhi Virama (forward) - "$Guj0 $GujV $Guj1;" // Gujarati Virama (forward) - "$Ori0 $OriV $Ori1;" // Oriya Virama (forward) - "$Tel0 $TelV $Tel1;" // Telugu Virama (forward) - "$Kan0 $KanV $Kan1;" // Kannada Virama (forward) - "$Mal0 $MalV $Mal1;" // Malayalam Virama (forward) - "!!reverse;" - "$LF $CR;" - "($L | $V | $LV | $LVT) $L;" - "($V | $T) ($LV | $V);" - "$T ($LVT | $T);" - "$Extend [^$Control $CR $LF];" - "$SpacingMark [^$Control $CR $LF];" - "$RI $RI / $RI $RI;" - "$RI $RI;" - "$Hin1 $HinV $Hin0;" // Devanagari Virama (backward) - "$Ben1 $BenV $Ben0;" // Bengali Virama (backward) - "$Pan1 $PanV $Pan0;" // Gurmukhi Virama (backward) - "$Guj1 $GujV $Guj0;" // Gujarati Virama (backward) - "$Ori1 $OriV $Ori0;" // Gujarati Virama (backward) - "$Tel1 $TelV $Tel0;" // Telugu Virama (backward) - "$Kan1 $KanV $Kan0;" // Kannada Virama (backward) - "$Mal1 $MalV $Mal0;" // Malayalam Virama (backward) - "!!safe_reverse;" - "!!safe_forward;"; - static TextBreakIterator* staticCursorMovementIterator = initializeIteratorWithRules(kRules); -#else // PLATFORM(IOS) - // Use the special Thai character break iterator for all locales - static TextBreakIterator* staticCursorMovementIterator = initializeIterator(UBRK_CHARACTER, "th"); -#endif // !PLATFORM(IOS) - - if (!staticCursorMovementIterator) - return nullptr; - - return setTextForIterator(*staticCursorMovementIterator, string); -} - -TextBreakIterator* acquireLineBreakIterator(StringView string, const AtomicString& locale, const UChar* priorContext, unsigned priorContextLength) -{ - TextBreakIterator* iterator = reinterpret_cast<TextBreakIterator*>(LineBreakIteratorPool::sharedPool().take(locale)); - if (!iterator) - return nullptr; - - return setContextAwareTextForIterator(*iterator, string, priorContext, priorContextLength); -} - -void releaseLineBreakIterator(TextBreakIterator* iterator) -{ - ASSERT_ARG(iterator, iterator); - - LineBreakIteratorPool::sharedPool().put(reinterpret_cast<UBreakIterator*>(iterator)); -} - -static TextBreakIterator* nonSharedCharacterBreakIterator; - -static inline bool compareAndSwapNonSharedCharacterBreakIterator(TextBreakIterator* expected, TextBreakIterator* newValue) -{ -#if ENABLE(COMPARE_AND_SWAP) - return WTF::weakCompareAndSwap(reinterpret_cast<void**>(&nonSharedCharacterBreakIterator), expected, newValue); -#else - DEFINE_STATIC_LOCAL(std::mutex, nonSharedCharacterBreakIteratorMutex, ()); - std::lock_guard<std::mutex> locker(nonSharedCharacterBreakIteratorMutex); - if (nonSharedCharacterBreakIterator != expected) - return false; - nonSharedCharacterBreakIterator = newValue; - return true; -#endif -} - -NonSharedCharacterBreakIterator::NonSharedCharacterBreakIterator(StringView string) -{ - m_iterator = nonSharedCharacterBreakIterator; - - bool createdIterator = m_iterator && compareAndSwapNonSharedCharacterBreakIterator(m_iterator, 0); - if (!createdIterator) - m_iterator = initializeIterator(UBRK_CHARACTER); - if (!m_iterator) - return; - - m_iterator = setTextForIterator(*m_iterator, string); -} - -NonSharedCharacterBreakIterator::~NonSharedCharacterBreakIterator() -{ - if (!compareAndSwapNonSharedCharacterBreakIterator(0, m_iterator)) - ubrk_close(reinterpret_cast<UBreakIterator*>(m_iterator)); -} - - -// Iterator implemenation. - -int textBreakFirst(TextBreakIterator* iterator) -{ - return ubrk_first(reinterpret_cast<UBreakIterator*>(iterator)); -} - -int textBreakLast(TextBreakIterator* iterator) -{ - return ubrk_last(reinterpret_cast<UBreakIterator*>(iterator)); -} - -int textBreakNext(TextBreakIterator* iterator) -{ - return ubrk_next(reinterpret_cast<UBreakIterator*>(iterator)); -} - -int textBreakPrevious(TextBreakIterator* iterator) -{ - return ubrk_previous(reinterpret_cast<UBreakIterator*>(iterator)); -} - -int textBreakPreceding(TextBreakIterator* iterator, int pos) -{ - return ubrk_preceding(reinterpret_cast<UBreakIterator*>(iterator), pos); -} - -int textBreakFollowing(TextBreakIterator* iterator, int pos) -{ - return ubrk_following(reinterpret_cast<UBreakIterator*>(iterator), pos); -} - -int textBreakCurrent(TextBreakIterator* iterator) -{ - return ubrk_current(reinterpret_cast<UBreakIterator*>(iterator)); -} - -bool isTextBreak(TextBreakIterator* iterator, int position) -{ - return ubrk_isBoundary(reinterpret_cast<UBreakIterator*>(iterator), position); -} - -bool isWordTextBreak(TextBreakIterator* iterator) -{ - int ruleStatus = ubrk_getRuleStatus(reinterpret_cast<UBreakIterator*>(iterator)); - return ruleStatus != UBRK_WORD_NONE; -} - -unsigned numGraphemeClusters(const String& s) -{ - unsigned stringLength = s.length(); - - if (!stringLength) - return 0; - - // The only Latin-1 Extended Grapheme Cluster is CR LF - if (s.is8Bit() && !s.contains('\r')) - return stringLength; - - NonSharedCharacterBreakIterator it(s); - if (!it) - return stringLength; - - unsigned num = 0; - while (textBreakNext(it) != TextBreakDone) - ++num; - return num; -} - -unsigned numCharactersInGraphemeClusters(const String& s, unsigned numGraphemeClusters) -{ - unsigned stringLength = s.length(); - - if (!stringLength) - return 0; - - // The only Latin-1 Extended Grapheme Cluster is CR LF - if (s.is8Bit() && !s.contains('\r')) - return std::min(stringLength, numGraphemeClusters); - - NonSharedCharacterBreakIterator it(s); - if (!it) - return std::min(stringLength, numGraphemeClusters); - - for (unsigned i = 0; i < numGraphemeClusters; ++i) { - if (textBreakNext(it) == TextBreakDone) - return stringLength; - } - return textBreakCurrent(it); -} - -} // namespace WebCore diff --git a/Source/WebCore/platform/text/TextBreakIterator.h b/Source/WebCore/platform/text/TextBreakIterator.h deleted file mode 100644 index eaf00ecb1..000000000 --- a/Source/WebCore/platform/text/TextBreakIterator.h +++ /dev/null @@ -1,191 +0,0 @@ -/* - * Copyright (C) 2006 Lars Knoll <lars@trolltech.com> - * Copyright (C) 2007, 2011, 2012 Apple Inc. All rights reserved. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Library General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. - * - * You should have received a copy of the GNU Library General Public License - * along with this library; see the file COPYING.LIB. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - * - */ - -#ifndef TextBreakIterator_h -#define TextBreakIterator_h - -#include <wtf/text/AtomicString.h> -#include <wtf/text/StringView.h> -#include <wtf/unicode/Unicode.h> - -namespace WebCore { - -class TextBreakIterator; - -// Note: The returned iterator is good only until you get another iterator, with the exception of acquireLineBreakIterator. - -// This is similar to character break iterator in most cases, but is subject to -// platform UI conventions. One notable example where this can be different -// from character break iterator is Thai prepend characters, see bug 24342. -// Use this for insertion point and selection manipulations. -TextBreakIterator* cursorMovementIterator(StringView); - -TextBreakIterator* wordBreakIterator(StringView); -TextBreakIterator* sentenceBreakIterator(StringView); - -TextBreakIterator* acquireLineBreakIterator(StringView, const AtomicString& locale, const UChar* priorContext, unsigned priorContextLength); -void releaseLineBreakIterator(TextBreakIterator*); - -int textBreakFirst(TextBreakIterator*); -int textBreakLast(TextBreakIterator*); -int textBreakNext(TextBreakIterator*); -int textBreakPrevious(TextBreakIterator*); -int textBreakCurrent(TextBreakIterator*); -int textBreakPreceding(TextBreakIterator*, int); -int textBreakFollowing(TextBreakIterator*, int); -bool isTextBreak(TextBreakIterator*, int); -bool isWordTextBreak(TextBreakIterator*); - -const int TextBreakDone = -1; - -class LazyLineBreakIterator { -public: - LazyLineBreakIterator() - : m_iterator(0) - , m_cachedPriorContext(0) - , m_cachedPriorContextLength(0) - { - resetPriorContext(); - } - - LazyLineBreakIterator(String string, const AtomicString& locale = AtomicString()) - : m_string(string) - , m_locale(locale) - , m_iterator(0) - , m_cachedPriorContext(0) - , m_cachedPriorContextLength(0) - { - resetPriorContext(); - } - - ~LazyLineBreakIterator() - { - if (m_iterator) - releaseLineBreakIterator(m_iterator); - } - - String string() const { return m_string; } - - UChar lastCharacter() const - { - COMPILE_ASSERT(WTF_ARRAY_LENGTH(m_priorContext) == 2, TextBreakIterator_unexpected_prior_context_length); - return m_priorContext[1]; - } - UChar secondToLastCharacter() const - { - COMPILE_ASSERT(WTF_ARRAY_LENGTH(m_priorContext) == 2, TextBreakIterator_unexpected_prior_context_length); - return m_priorContext[0]; - } - void setPriorContext(UChar last, UChar secondToLast) - { - COMPILE_ASSERT(WTF_ARRAY_LENGTH(m_priorContext) == 2, TextBreakIterator_unexpected_prior_context_length); - m_priorContext[0] = secondToLast; - m_priorContext[1] = last; - } - void updatePriorContext(UChar last) - { - COMPILE_ASSERT(WTF_ARRAY_LENGTH(m_priorContext) == 2, TextBreakIterator_unexpected_prior_context_length); - m_priorContext[0] = m_priorContext[1]; - m_priorContext[1] = last; - } - void resetPriorContext() - { - COMPILE_ASSERT(WTF_ARRAY_LENGTH(m_priorContext) == 2, TextBreakIterator_unexpected_prior_context_length); - m_priorContext[0] = 0; - m_priorContext[1] = 0; - } - unsigned priorContextLength() const - { - unsigned priorContextLength = 0; - COMPILE_ASSERT(WTF_ARRAY_LENGTH(m_priorContext) == 2, TextBreakIterator_unexpected_prior_context_length); - if (m_priorContext[1]) { - ++priorContextLength; - if (m_priorContext[0]) - ++priorContextLength; - } - return priorContextLength; - } - // Obtain text break iterator, possibly previously cached, where this iterator is (or has been) - // initialized to use the previously stored string as the primary breaking context and using - // previously stored prior context if non-empty. - TextBreakIterator* get(unsigned priorContextLength) - { - ASSERT(priorContextLength <= priorContextCapacity); - const UChar* priorContext = priorContextLength ? &m_priorContext[priorContextCapacity - priorContextLength] : 0; - if (!m_iterator) { - m_iterator = acquireLineBreakIterator(m_string, m_locale, priorContext, priorContextLength); - m_cachedPriorContext = priorContext; - m_cachedPriorContextLength = priorContextLength; - } else if (priorContext != m_cachedPriorContext || priorContextLength != m_cachedPriorContextLength) { - this->resetStringAndReleaseIterator(m_string, m_locale); - return this->get(priorContextLength); - } - return m_iterator; - } - void resetStringAndReleaseIterator(String string, const AtomicString& locale) - { - if (m_iterator) - releaseLineBreakIterator(m_iterator); - m_string = string; - m_locale = locale; - m_iterator = 0; - m_cachedPriorContext = 0; - m_cachedPriorContextLength = 0; - } - -private: - static const unsigned priorContextCapacity = 2; - String m_string; - AtomicString m_locale; - TextBreakIterator* m_iterator; - UChar m_priorContext[priorContextCapacity]; - const UChar* m_cachedPriorContext; - unsigned m_cachedPriorContextLength; -}; - -// Iterates over "extended grapheme clusters", as defined in UAX #29. -// Note that platform implementations may be less sophisticated - e.g. ICU prior to -// version 4.0 only supports "legacy grapheme clusters". -// Use this for general text processing, e.g. string truncation. - -class NonSharedCharacterBreakIterator { - WTF_MAKE_NONCOPYABLE(NonSharedCharacterBreakIterator); -public: - NonSharedCharacterBreakIterator(StringView); - ~NonSharedCharacterBreakIterator(); - - operator TextBreakIterator*() const { return m_iterator; } - -private: - TextBreakIterator* m_iterator; -}; - -// Counts the number of grapheme clusters. A surrogate pair or a sequence -// of a non-combining character and following combining characters is -// counted as 1 grapheme cluster. -unsigned numGraphemeClusters(const String&); -// Returns the number of characters which will be less than or equal to -// the specified grapheme cluster length. -unsigned numCharactersInGraphemeClusters(const String&, unsigned); - -} - -#endif diff --git a/Source/WebCore/platform/text/TextBreakIteratorInternalICU.h b/Source/WebCore/platform/text/TextBreakIteratorInternalICU.h deleted file mode 100644 index 68b7003c3..000000000 --- a/Source/WebCore/platform/text/TextBreakIteratorInternalICU.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright (C) 2007 Apple Inc. All rights reserved. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Library General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. - * - * You should have received a copy of the GNU Library General Public License - * along with this library; see the file COPYING.LIB. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - * - */ - -#ifndef TextBreakIteratorInternalICU_h -#define TextBreakIteratorInternalICU_h - -// FIXME: Now that this handles locales for ICU, not just for text breaking, -// this file and the various implementation files should be renamed. - -namespace WebCore { - - const char* currentSearchLocaleID(); - const char* currentTextBreakLocaleID(); - -} - -#endif diff --git a/Source/WebCore/platform/text/TextCheckerClient.h b/Source/WebCore/platform/text/TextCheckerClient.h index 054fb11ec..39e3211e8 100644 --- a/Source/WebCore/platform/text/TextCheckerClient.h +++ b/Source/WebCore/platform/text/TextCheckerClient.h @@ -25,40 +25,34 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef TextCheckerClient_h -#define TextCheckerClient_h +#pragma once #include "TextChecking.h" -#include <wtf/Forward.h> -#include <wtf/PassRefPtr.h> -#include <wtf/Vector.h> -#include <wtf/text/WTFString.h> - namespace WebCore { +class VisibleSelection; + class TextCheckerClient { public: - virtual ~TextCheckerClient() {} + virtual ~TextCheckerClient() { } virtual bool shouldEraseMarkersAfterChangeSelection(TextCheckingType) const = 0; virtual void ignoreWordInSpellDocument(const String&) = 0; virtual void learnWord(const String&) = 0; - virtual void checkSpellingOfString(const UChar*, int length, int* misspellingLocation, int* misspellingLength) = 0; + virtual void checkSpellingOfString(StringView, int* misspellingLocation, int* misspellingLength) = 0; virtual String getAutoCorrectSuggestionForMisspelledWord(const String& misspelledWord) = 0; - virtual void checkGrammarOfString(const UChar*, int length, Vector<GrammarDetail>&, int* badGrammarLocation, int* badGrammarLength) = 0; + virtual void checkGrammarOfString(StringView, Vector<GrammarDetail>&, int* badGrammarLocation, int* badGrammarLength) = 0; #if USE(UNIFIED_TEXT_CHECKING) - virtual Vector<TextCheckingResult> checkTextOfParagraph(StringView, TextCheckingTypeMask checkingTypes) = 0; + virtual Vector<TextCheckingResult> checkTextOfParagraph(StringView, TextCheckingTypeMask checkingTypes, const VisibleSelection& currentSelection) = 0; #endif // For spellcheckers that support multiple languages, it's often important to be able to identify the language in order to // provide more accurate correction suggestions. Caller can pass in more text in "context" to aid such spellcheckers on language // identification. Noramlly it's the text surrounding the "word" for which we are getting correction suggestions. - virtual void getGuessesForWord(const String& word, const String& context, Vector<String>& guesses) = 0; - virtual void requestCheckingOfString(PassRefPtr<TextCheckingRequest>) = 0; + virtual void getGuessesForWord(const String& word, const String& context, const VisibleSelection& currentSelection, Vector<String>& guesses) = 0; + virtual void requestCheckingOfString(TextCheckingRequest&, const VisibleSelection& currentSelection) = 0; }; } - -#endif // TextCheckerClient_h diff --git a/Source/WebCore/platform/text/TextCodec.cpp b/Source/WebCore/platform/text/TextCodec.cpp index b0a5720bc..47e3b5ac0 100644 --- a/Source/WebCore/platform/text/TextCodec.cpp +++ b/Source/WebCore/platform/text/TextCodec.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2004, 2006 Apple Computer, Inc. All rights reserved. + * Copyright (C) 2004, 2006 Apple Inc. All rights reserved. * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> * * Redistribution and use in source and binary forms, with or without @@ -11,10 +11,10 @@ * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR diff --git a/Source/WebCore/platform/text/TextCodec.h b/Source/WebCore/platform/text/TextCodec.h index e4c9571c6..f30cd1261 100644 --- a/Source/WebCore/platform/text/TextCodec.h +++ b/Source/WebCore/platform/text/TextCodec.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2004, 2006 Apple Computer, Inc. All rights reserved. + * Copyright (C) 2004, 2006 Apple Inc. All rights reserved. * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com> * * Redistribution and use in source and binary forms, with or without @@ -11,10 +11,10 @@ * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -30,10 +30,7 @@ #include <memory> #include <wtf/Forward.h> #include <wtf/Noncopyable.h> -#include <wtf/PassOwnPtr.h> -#include <wtf/Vector.h> #include <wtf/text/WTFString.h> -#include <wtf/unicode/Unicode.h> namespace WebCore { class TextEncoding; @@ -79,7 +76,7 @@ namespace WebCore { typedef void (*EncodingNameRegistrar)(const char* alias, const char* name); - typedef PassOwnPtr<TextCodec> (*NewTextCodecFunction)(const TextEncoding&, const void* additionalData); + typedef std::unique_ptr<TextCodec> (*NewTextCodecFunction)(const TextEncoding&, const void* additionalData); typedef void (*TextCodecRegistrar)(const char* name, NewTextCodecFunction, const void* additionalData); } // namespace WebCore diff --git a/Source/WebCore/platform/text/TextCodecASCIIFastPath.h b/Source/WebCore/platform/text/TextCodecASCIIFastPath.h index 7d57677fc..08f4edfc4 100644 --- a/Source/WebCore/platform/text/TextCodecASCIIFastPath.h +++ b/Source/WebCore/platform/text/TextCodecASCIIFastPath.h @@ -11,10 +11,10 @@ * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR diff --git a/Source/WebCore/platform/text/TextCodecICU.cpp b/Source/WebCore/platform/text/TextCodecICU.cpp index f6d16cbf1..40212fe3d 100644 --- a/Source/WebCore/platform/text/TextCodecICU.cpp +++ b/Source/WebCore/platform/text/TextCodecICU.cpp @@ -11,10 +11,10 @@ * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -66,132 +66,121 @@ static UConverter*& cachedConverterICU() return threadGlobalData().cachedConverterICU().converter; } -PassOwnPtr<TextCodec> TextCodecICU::create(const TextEncoding& encoding, const void* additionalData) +std::unique_ptr<TextCodec> TextCodecICU::create(const TextEncoding& encoding, const void* additionalData) { // Name strings are persistently kept in TextEncodingRegistry maps, so they are never deleted. - return adoptPtr(new TextCodecICU(encoding.name(), static_cast<const char*>(additionalData))); + return std::make_unique<TextCodecICU>(encoding.name(), static_cast<const char*>(additionalData)); } +#define DECLARE_ALIASES(encoding, ...) \ + static const char* const encoding##_aliases[] { __VA_ARGS__ } + +// From https://encoding.spec.whatwg.org. +DECLARE_ALIASES(IBM866, "866", "cp866", "csibm866"); +DECLARE_ALIASES(ISO_8859_2, "csisolatin2", "iso-ir-101", "iso8859-2", "iso88592", "iso_8859-2", "iso_8859-2:1987", "l2", "latin2"); +DECLARE_ALIASES(ISO_8859_3, "csisolatin3", "iso-ir-109", "iso8859-3", "iso88593", "iso_8859-3", "iso_8859-3:1988", "l3", "latin3"); +DECLARE_ALIASES(ISO_8859_4, "csisolatin4", "iso-ir-110", "iso8859-4", "iso88594", "iso_8859-4", "iso_8859-4:1988", "l4", "latin4"); +DECLARE_ALIASES(ISO_8859_5, "csisolatincyrillic", "cyrillic", "iso-ir-144", "iso8859-5", "iso88595", "iso_8859-5", "iso_8859-5:1988"); +DECLARE_ALIASES(ISO_8859_6, "arabic", "asmo-708", "csiso88596e", "csiso88596i", "csisolatinarabic", "ecma-114", "iso-8859-6-e", "iso-8859-6-i", "iso-ir-127", "iso8859-6", "iso88596", "iso_8859-6", "iso_8859-6:1987"); +DECLARE_ALIASES(ISO_8859_7, "csisolatingreek", "ecma-118", "elot_928", "greek", "greek8", "iso-ir-126", "iso8859-7", "iso88597", "iso_8859-7", "iso_8859-7:1987", "sun_eu_greek"); +DECLARE_ALIASES(ISO_8859_8, "csiso88598e", "csisolatinhebrew", "hebrew", "iso-8859-8-e", "iso-ir-138", "iso8859-8", "iso88598", "iso_8859-8", "iso_8859-8:1988", "visual"); +DECLARE_ALIASES(ISO_8859_8_I, "csiso88598i", "logical"); +DECLARE_ALIASES(ISO_8859_10, "csisolatin6", "iso-ir-157", "iso8859-10", "iso885910", "l6", "latin6"); +DECLARE_ALIASES(ISO_8859_13, "iso8859-13", "iso885913"); +DECLARE_ALIASES(ISO_8859_14, "iso8859-14", "iso885914"); +DECLARE_ALIASES(ISO_8859_15, "csisolatin9", "iso8859-15", "iso885915", "iso_8859-15", "l9"); +DECLARE_ALIASES(KOI8_R, "cskoi8r", "koi", "koi8", "koi8_r"); +DECLARE_ALIASES(KOI8_U, "koi8-ru"); +DECLARE_ALIASES(macintosh, "csmacintosh", "mac", "x-mac-roman", "macroman", "x-macroman"); +DECLARE_ALIASES(windows_874, "dos-874", "iso-8859-11", "iso8859-11", "iso885911", "tis-620"); +DECLARE_ALIASES(windows_949, "euc-kr", "cseuckr", "csksc56011987", "iso-ir-149", "korean", "ks_c_5601-1987", "ks_c_5601-1989", "ksc5601", "ksc_5601", "ms949", "x-KSC5601", "x-windows-949", "x-uhc"); +DECLARE_ALIASES(windows_1250, "cp1250", "x-cp1250", "winlatin2"); +DECLARE_ALIASES(windows_1251, "cp1251", "wincyrillic", "x-cp1251"); +DECLARE_ALIASES(windows_1253, "wingreek", "cp1253", "x-cp1253"); +DECLARE_ALIASES(windows_1254, "winturkish", "cp1254", "csisolatin5", "iso-8859-9", "iso-ir-148", "iso8859-9", "iso88599", "iso_8859-9", "iso_8859-9:1989", "l5", "latin5", "x-cp1254"); +DECLARE_ALIASES(windows_1255, "winhebrew", "cp1255", "x-cp1255"); +DECLARE_ALIASES(windows_1256, "winarabic", "cp1256", "x-cp1256"); +DECLARE_ALIASES(windows_1257, "winbaltic", "cp1257", "x-cp1257"); +DECLARE_ALIASES(windows_1258, "winvietnamese", "cp1258", "x-cp1258"); +DECLARE_ALIASES(x_mac_cyrillic, "maccyrillic", "x-mac-ukrainian", "windows-10007", "mac-cyrillic", "maccy", "x-MacCyrillic", "x-MacUkraine"); +DECLARE_ALIASES(GBK, "cn-gb", "csgb231280", "x-euc-cn", "chinese", "csgb2312", "csiso58gb231280", "gb2312", "gb_2312", "gb_2312-80", "iso-ir-58", "x-gbk", "euc-cn", "cp936", "ms936", "gb2312-1980", "windows-936", "windows-936-2000"); +DECLARE_ALIASES(gb18030, "ibm-1392", "windows-54936"); +DECLARE_ALIASES(Big5, "cn-big5", "x-x-big5", "csbig5", "windows-950", "windows-950-2000", "ms950", "x-windows-950", "x-big5"); +DECLARE_ALIASES(EUC_JP, "x-euc", "cseucpkdfmtjapanese", "x-euc-jp"); +DECLARE_ALIASES(ISO_2022_JP, "jis7", "csiso2022jp"); +DECLARE_ALIASES(Shift_JIS, "shift-jis", "csshiftjis", "ms932", "ms_kanji", "sjis", "windows-31j", "x-sjis"); +// Encodings below are not in the standard. +DECLARE_ALIASES(UTF_32, "ISO-10646-UCS-4", "ibm-1236", "ibm-1237", "csUCS4", "ucs-4"); +DECLARE_ALIASES(UTF_32LE, "UTF32_LittleEndian", "ibm-1234", "ibm-1235"); +DECLARE_ALIASES(UTF_32BE, "UTF32_BigEndian", "ibm-1232", "ibm-1233", "ibm-9424"); +DECLARE_ALIASES(x_mac_greek, "windows-10006", "macgr", "x-MacGreek"); +DECLARE_ALIASES(x_mac_centraleurroman, "windows-10029", "x-mac-ce", "macce", "maccentraleurope", "x-MacCentralEurope"); +DECLARE_ALIASES(x_mac_turkish, "windows-10081", "mactr", "x-MacTurkish"); +DECLARE_ALIASES(Big5_HKSCS, "big5hk", "HKSCS-BIG5", "ibm-1375", "ibm-1375_P100-2008"); + +#define DECLARE_ENCODING_NAME(encoding, alias_array) \ + { encoding, WTF_ARRAY_LENGTH(alias_array##_aliases), alias_array##_aliases } + +#define DECLARE_ENCODING_NAME_NO_ALIASES(encoding) \ + { encoding, 0, nullptr } + +static const struct EncodingName { + const char* name; + unsigned aliasCount; + const char* const * aliases; +} encodingNames[] = { + DECLARE_ENCODING_NAME("IBM866", IBM866), + DECLARE_ENCODING_NAME("ISO-8859-2", ISO_8859_2), + DECLARE_ENCODING_NAME("ISO-8859-3", ISO_8859_3), + DECLARE_ENCODING_NAME("ISO-8859-4", ISO_8859_4), + DECLARE_ENCODING_NAME("ISO-8859-5", ISO_8859_5), + DECLARE_ENCODING_NAME("ISO-8859-6", ISO_8859_6), + DECLARE_ENCODING_NAME("ISO-8859-7", ISO_8859_7), + DECLARE_ENCODING_NAME("ISO-8859-8", ISO_8859_8), + DECLARE_ENCODING_NAME("ISO-8859-8-I", ISO_8859_8_I), + DECLARE_ENCODING_NAME("ISO-8859-10", ISO_8859_10), + DECLARE_ENCODING_NAME("ISO-8859-13", ISO_8859_13), + DECLARE_ENCODING_NAME("ISO-8859-14", ISO_8859_14), + DECLARE_ENCODING_NAME("ISO-8859-15", ISO_8859_15), + DECLARE_ENCODING_NAME_NO_ALIASES("ISO-8859-16"), + DECLARE_ENCODING_NAME("KOI8-R", KOI8_R), + DECLARE_ENCODING_NAME("KOI8-U", KOI8_U), + DECLARE_ENCODING_NAME("macintosh", macintosh), + DECLARE_ENCODING_NAME("windows-874", windows_874), + DECLARE_ENCODING_NAME("windows-949", windows_949), + DECLARE_ENCODING_NAME("windows-1250", windows_1250), + DECLARE_ENCODING_NAME("windows-1251", windows_1251), + DECLARE_ENCODING_NAME("windows-1253", windows_1253), + DECLARE_ENCODING_NAME("windows-1254", windows_1254), + DECLARE_ENCODING_NAME("windows-1255", windows_1255), + DECLARE_ENCODING_NAME("windows-1256", windows_1256), + DECLARE_ENCODING_NAME("windows-1257", windows_1257), + DECLARE_ENCODING_NAME("windows-1258", windows_1258), + DECLARE_ENCODING_NAME("x-mac-cyrillic", x_mac_cyrillic), + DECLARE_ENCODING_NAME("GBK", GBK), + DECLARE_ENCODING_NAME("gb18030", gb18030), + DECLARE_ENCODING_NAME("Big5", Big5), + DECLARE_ENCODING_NAME("EUC-JP", EUC_JP), + DECLARE_ENCODING_NAME("ISO-2022-JP", ISO_2022_JP), + DECLARE_ENCODING_NAME("Shift_JIS", Shift_JIS), + // Encodings below are not in the standard. + DECLARE_ENCODING_NAME("UTF-32", UTF_32), + DECLARE_ENCODING_NAME("UTF-32LE", UTF_32LE), + DECLARE_ENCODING_NAME("UTF-32BE", UTF_32BE), + DECLARE_ENCODING_NAME("x-mac-greek", x_mac_greek), + DECLARE_ENCODING_NAME("x-mac-centraleurroman", x_mac_centraleurroman), + DECLARE_ENCODING_NAME("x-mac-turkish", x_mac_turkish), + DECLARE_ENCODING_NAME("Big5-HKSCS", Big5_HKSCS), +}; + void TextCodecICU::registerEncodingNames(EncodingNameRegistrar registrar) { - // We register Hebrew with logical ordering using a separate name. - // Otherwise, this would share the same canonical name as the - // visual ordering case, and then TextEncoding could not tell them - // apart; ICU treats these names as synonyms. - registrar("ISO-8859-8-I", "ISO-8859-8-I"); - - int32_t numConverters = ucnv_countAvailable(); - for (int32_t i = 0; i < numConverters; ++i) { - const char* canonicalConverterName = ucnv_getAvailableName(i); - UErrorCode error = U_ZERO_ERROR; - // Try MIME before trying IANA to pick up commonly used names like - // 'EUC-JP' instead of horrendously long names like - // 'Extended_UNIX_Code_Packed_Format_for_Japanese'. - const char* webStandardName = ucnv_getStandardName(canonicalConverterName, "MIME", &error); - if (!U_SUCCESS(error) || !webStandardName) { - error = U_ZERO_ERROR; - // Try IANA to pick up 'windows-12xx' and other names - // which are not preferred MIME names but are widely used. - webStandardName = ucnv_getStandardName(canonicalConverterName, "IANA", &error); - if (!U_SUCCESS(error) || !webStandardName) - continue; - } - - // Any standard encoding overrides should match checks in registerCodecs() below. - - // 1. Treat GB2312 encoding as GBK (its more modern superset), to match other browsers. - // 2. On the Web, GB2312 is encoded as EUC-CN or HZ, while ICU provides a native encoding - // for encoding GB_2312-80 and several others. So, we need to override this behavior, too. - if (strcmp(webStandardName, "GB2312") == 0 || strcmp(webStandardName, "GB_2312-80") == 0) - webStandardName = "GBK"; - // Similarly, EUC-KR encodings all map to an extended version. - else if (strcmp(webStandardName, "KSC_5601") == 0 || strcmp(webStandardName, "EUC-KR") == 0 || strcmp(webStandardName, "cp1363") == 0) - webStandardName = "windows-949"; - // And so on. - // FIXME: strcasecmp is locale sensitive, we should not be using it. - else if (strcasecmp(webStandardName, "iso-8859-9") == 0) // This name is returned in different case by ICU 3.2 and 3.6. - webStandardName = "windows-1254"; - else if (strcmp(webStandardName, "TIS-620") == 0) - webStandardName = "windows-874"; - - registrar(webStandardName, webStandardName); - - uint16_t numAliases = ucnv_countAliases(canonicalConverterName, &error); - ASSERT(U_SUCCESS(error)); - if (U_SUCCESS(error)) - for (uint16_t j = 0; j < numAliases; ++j) { - error = U_ZERO_ERROR; - const char* alias = ucnv_getAlias(canonicalConverterName, j, &error); - ASSERT(U_SUCCESS(error)); - if (U_SUCCESS(error) && alias != webStandardName) - registrar(alias, webStandardName); - } + for (auto& encodingName : encodingNames) { + registrar(encodingName.name, encodingName.name); + for (size_t i = 0; i < encodingName.aliasCount; ++i) + registrar(encodingName.aliases[i], encodingName.name); } - // Additional aliases. - // macroman is present in modern versions of ICU, but not in ICU 3.2 (shipped with Mac OS X 10.4). - // FIXME: Do any ports still use such old versions? - registrar("macroman", "macintosh"); - - // Additional aliases that historically were present in the encoding - // table in WebKit on Macintosh that don't seem to be present in ICU. - // Perhaps we can prove these are not used on the web and remove them. - // Or perhaps we can get them added to ICU. - registrar("x-mac-roman", "macintosh"); - registrar("maccyrillic", "x-mac-cyrillic"); - registrar("x-mac-ukrainian", "x-mac-cyrillic"); - registrar("cn-big5", "Big5"); - registrar("x-x-big5", "Big5"); - registrar("cn-gb", "GBK"); - registrar("csgb231280", "GBK"); - registrar("x-euc-cn", "GBK"); - registrar("x-gbk", "GBK"); - registrar("csISO88598I", "ISO-8859-8-I"); - registrar("koi", "KOI8-R"); - registrar("logical", "ISO-8859-8-I"); - registrar("visual", "ISO-8859-8"); - registrar("winarabic", "windows-1256"); - registrar("winbaltic", "windows-1257"); - registrar("wincyrillic", "windows-1251"); - registrar("iso-8859-11", "windows-874"); - registrar("iso8859-11", "windows-874"); - registrar("dos-874", "windows-874"); - registrar("wingreek", "windows-1253"); - registrar("winhebrew", "windows-1255"); - registrar("winlatin2", "windows-1250"); - registrar("winturkish", "windows-1254"); - registrar("winvietnamese", "windows-1258"); - registrar("x-cp1250", "windows-1250"); - registrar("x-cp1251", "windows-1251"); - registrar("x-euc", "EUC-JP"); - registrar("x-windows-949", "windows-949"); - registrar("KSC5601", "windows-949"); - registrar("x-uhc", "windows-949"); - registrar("shift-jis", "Shift_JIS"); - - // These aliases are present in modern versions of ICU, but use different codecs, and have no standard names. - // They are not present in ICU 3.2. - registrar("dos-720", "cp864"); - registrar("jis7", "ISO-2022-JP"); - - // Alternative spelling of ISO encoding names. - registrar("ISO8859-1", "ISO-8859-1"); - registrar("ISO8859-2", "ISO-8859-2"); - registrar("ISO8859-3", "ISO-8859-3"); - registrar("ISO8859-4", "ISO-8859-4"); - registrar("ISO8859-5", "ISO-8859-5"); - registrar("ISO8859-6", "ISO-8859-6"); - registrar("ISO8859-7", "ISO-8859-7"); - registrar("ISO8859-8", "ISO-8859-8"); - registrar("ISO8859-8-I", "ISO-8859-8-I"); - registrar("ISO8859-9", "windows-1254"); - registrar("ISO8859-10", "ISO-8859-10"); - registrar("ISO8859-13", "ISO-8859-13"); - registrar("ISO8859-14", "ISO-8859-14"); - registrar("ISO8859-15", "ISO-8859-15"); - // Not registering ISO8859-16, because Firefox (as of version 3.6.6) doesn't know this particular alias, - // and because older versions of ICU don't support ISO-8859-16 encoding at all. - #if PLATFORM(IOS) // A.B. adding a few more Mac encodings missing 'cause we don't have TextCodecMac right now // luckily, they are supported in ICU, just need to alias them. @@ -218,40 +207,40 @@ void TextCodecICU::registerEncodingNames(EncodingNameRegistrar registrar) void TextCodecICU::registerCodecs(TextCodecRegistrar registrar) { - // See comment above in registerEncodingNames. - UErrorCode error = U_ZERO_ERROR; - const char* canonicalConverterName = ucnv_getCanonicalName("ISO-8859-8-I", "IANA", &error); - ASSERT(U_SUCCESS(error)); - registrar("ISO-8859-8-I", create, canonicalConverterName); - - int32_t numConverters = ucnv_countAvailable(); - for (int32_t i = 0; i < numConverters; ++i) { - canonicalConverterName = ucnv_getAvailableName(i); - error = U_ZERO_ERROR; - const char* webStandardName = ucnv_getStandardName(canonicalConverterName, "MIME", &error); - if (!U_SUCCESS(error) || !webStandardName) { - error = U_ZERO_ERROR; - webStandardName = ucnv_getStandardName(canonicalConverterName, "IANA", &error); - if (!U_SUCCESS(error) || !webStandardName) - continue; + for (auto& encodingName : encodingNames) { + // These encodings currently don't have standard names, so we need to register encoders manually. + // http://demo.icu-project.org/icu-bin/convexp + if (!strcmp(encodingName.name, "windows-874")) { + registrar(encodingName.name, create, "windows-874-2000"); + continue; } - - // Don't register codecs for overridden encodings. - if (strcmp(webStandardName, "GB2312") == 0 || strcmp(webStandardName, "GB_2312-80") == 0 - || strcmp(webStandardName, "KSC_5601") == 0 || strcmp(webStandardName, "EUC-KR") == 0 - || strcmp(webStandardName, "cp1363") == 0 - || strcasecmp(webStandardName, "iso-8859-9") == 0 - || strcmp(webStandardName, "TIS-620") == 0) + if (!strcmp(encodingName.name, "windows-949")) { + registrar(encodingName.name, create, "windows-949-2000"); continue; + } + if (!strcmp(encodingName.name, "x-mac-cyrillic")) { + registrar(encodingName.name, create, "macos-7_3-10.2"); + continue; + } + if (!strcmp(encodingName.name, "x-mac-greek")) { + registrar(encodingName.name, create, "macos-6_2-10.4"); + continue; + } + if (!strcmp(encodingName.name, "x-mac-centraleurroman")) { + registrar(encodingName.name, create, "macos-29-10.2"); + continue; + } + if (!strcmp(encodingName.name, "x-mac-turkish")) { + registrar(encodingName.name, create, "macos-35-10.2"); + continue; + } - registrar(webStandardName, create, fastStrDup(canonicalConverterName)); + UErrorCode error = U_ZERO_ERROR; + const char* canonicalConverterName = ucnv_getCanonicalName(encodingName.name, "IANA", &error); + ASSERT(U_SUCCESS(error)); + registrar(encodingName.name, create, canonicalConverterName); } - // These encodings currently don't have standard names, so we need to register encoders manually. - // FIXME: Is there a good way to determine the most up to date variant programmatically? - registrar("windows-874", create, "windows-874-2000"); - registrar("windows-949", create, "windows-949-2000"); - #if PLATFORM(IOS) // See comment above in registerEncodingNames(). int32_t i = 0; @@ -491,18 +480,26 @@ CString TextCodecICU::encode(const UChar* characters, size_t length, Unencodable // FIXME: We should see if there is "force ASCII range" mode in ICU; // until then, we change the backslash into a yen sign. // Encoding will change the yen sign back into a backslash. - String copy; - const UChar* source; - const UChar* sourceLimit; + Vector<UChar> copy; + const UChar* source = characters; if (shouldShowBackslashAsCurrencySymbolIn(m_encodingName)) { - copy.append(characters, length); - copy.replace('\\', 0xA5); - source = copy.deprecatedCharacters(); - sourceLimit = source + copy.length(); - } else { - source = characters; - sourceLimit = source + length; + for (size_t i = 0; i < length; ++i) { + if (characters[i] == '\\') { + copy.reserveInitialCapacity(length); + for (size_t j = 0; j < i; ++j) + copy.uncheckedAppend(characters[j]); + for (size_t j = i; j < length; ++j) { + UChar character = characters[j]; + if (character == '\\') + character = yenSign; + copy.uncheckedAppend(character); + } + source = copy.data(); + break; + } + } } + const UChar* sourceLimit = source + length; UErrorCode err = U_ZERO_ERROR; diff --git a/Source/WebCore/platform/text/TextCodecICU.h b/Source/WebCore/platform/text/TextCodecICU.h index 3803df1a7..0c1798c9b 100644 --- a/Source/WebCore/platform/text/TextCodecICU.h +++ b/Source/WebCore/platform/text/TextCodecICU.h @@ -11,10 +11,10 @@ * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -36,17 +36,18 @@ namespace WebCore { class TextCodecICU : public TextCodec { public: + static std::unique_ptr<TextCodec> create(const TextEncoding&, const void* additionalData); + + TextCodecICU(const char* encoding, const char* canonicalConverterName); + static void registerEncodingNames(EncodingNameRegistrar); static void registerCodecs(TextCodecRegistrar); virtual ~TextCodecICU(); private: - TextCodecICU(const char* encoding, const char* canonicalConverterName); - static PassOwnPtr<TextCodec> create(const TextEncoding&, const void* additionalData); - - virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError); - virtual CString encode(const UChar*, size_t length, UnencodableHandling); + String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError) override; + CString encode(const UChar*, size_t length, UnencodableHandling) override; void createICUConverter() const; void releaseICUConverter() const; diff --git a/Source/WebCore/platform/text/TextCodecLatin1.cpp b/Source/WebCore/platform/text/TextCodecLatin1.cpp index da86f8ff4..f2e8afa72 100644 --- a/Source/WebCore/platform/text/TextCodecLatin1.cpp +++ b/Source/WebCore/platform/text/TextCodecLatin1.cpp @@ -10,10 +10,10 @@ * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -27,7 +27,6 @@ #include "TextCodecLatin1.h" #include "TextCodecASCIIFastPath.h" -#include <wtf/PassOwnPtr.h> #include <wtf/text/CString.h> #include <wtf/text/StringBuffer.h> #include <wtf/text/WTFString.h> @@ -73,49 +72,34 @@ static const UChar table[256] = { void TextCodecLatin1::registerEncodingNames(EncodingNameRegistrar registrar) { + // From https://encoding.spec.whatwg.org. registrar("windows-1252", "windows-1252"); - registrar("ISO-8859-1", "ISO-8859-1"); - registrar("US-ASCII", "US-ASCII"); - - registrar("WinLatin1", "windows-1252"); - registrar("ibm-1252", "windows-1252"); - registrar("ibm-1252_P100-2000", "windows-1252"); - - registrar("CP819", "ISO-8859-1"); - registrar("IBM819", "ISO-8859-1"); - registrar("csISOLatin1", "ISO-8859-1"); - registrar("iso-ir-100", "ISO-8859-1"); - registrar("iso_8859-1:1987", "ISO-8859-1"); - registrar("l1", "ISO-8859-1"); - registrar("latin1", "ISO-8859-1"); - - registrar("ANSI_X3.4-1968", "US-ASCII"); - registrar("ANSI_X3.4-1986", "US-ASCII"); - registrar("ASCII", "US-ASCII"); - registrar("IBM367", "US-ASCII"); - registrar("ISO646-US", "US-ASCII"); - registrar("ISO_646.irv:1991", "US-ASCII"); - registrar("cp367", "US-ASCII"); - registrar("csASCII", "US-ASCII"); - registrar("ibm-367_P100-1995", "US-ASCII"); - registrar("iso-ir-6", "US-ASCII"); - registrar("iso-ir-6-us", "US-ASCII"); - registrar("us", "US-ASCII"); - registrar("x-ansi", "US-ASCII"); + registrar("ansi_x3.4-1968", "windows-1252"); + registrar("ascii", "windows-1252"); + registrar("cp1252", "windows-1252"); + registrar("cp819", "windows-1252"); + registrar("csisolatin1", "windows-1252"); + registrar("ibm819", "windows-1252"); + registrar("iso-8859-1", "windows-1252"); + registrar("iso-ir-100", "windows-1252"); + registrar("iso8859-1", "windows-1252"); + registrar("iso88591", "windows-1252"); + registrar("iso_8859-1", "windows-1252"); + registrar("iso_8859-1:1987", "windows-1252"); + registrar("l1", "windows-1252"); + registrar("latin1", "windows-1252"); + registrar("us-ascii", "windows-1252"); + registrar("x-cp1252", "windows-1252"); } -static PassOwnPtr<TextCodec> newStreamingTextDecoderWindowsLatin1(const TextEncoding&, const void*) +static std::unique_ptr<TextCodec> newStreamingTextDecoderWindowsLatin1(const TextEncoding&, const void*) { - return adoptPtr(new TextCodecLatin1); + return std::make_unique<TextCodecLatin1>(); } void TextCodecLatin1::registerCodecs(TextCodecRegistrar registrar) { registrar("windows-1252", newStreamingTextDecoderWindowsLatin1, 0); - - // ASCII and Latin-1 both decode as Windows Latin-1 although they retain unique identities. - registrar("ISO-8859-1", newStreamingTextDecoderWindowsLatin1, 0); - registrar("US-ASCII", newStreamingTextDecoderWindowsLatin1, 0); } String TextCodecLatin1::decode(const char* bytes, size_t length, bool, bool, bool&) @@ -147,6 +131,10 @@ String TextCodecLatin1::decode(const char* bytes, size_t length, bool, bool, boo if (source == end) break; + + // *source may not be ASCII anymore if source moves inside the loop of the fast code path + if (!isASCII(*source)) + goto useLookupTable; } *destination = *source; } else { @@ -198,6 +186,10 @@ upConvertTo16Bit: if (source == end) break; + + // *source may not be ASCII anymore if source moves inside the loop of the fast code path + if (!isASCII(*source)) + goto useLookupTable16; } *destination16 = *source; } else { diff --git a/Source/WebCore/platform/text/TextCodecLatin1.h b/Source/WebCore/platform/text/TextCodecLatin1.h index f035d01da..9d08aa3f5 100644 --- a/Source/WebCore/platform/text/TextCodecLatin1.h +++ b/Source/WebCore/platform/text/TextCodecLatin1.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2004, 2006 Apple Computer, Inc. All rights reserved. + * Copyright (C) 2004, 2006 Apple Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -10,10 +10,10 @@ * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -35,8 +35,8 @@ namespace WebCore { static void registerEncodingNames(EncodingNameRegistrar); static void registerCodecs(TextCodecRegistrar); - virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError); - virtual CString encode(const UChar*, size_t length, UnencodableHandling); + String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError) override; + CString encode(const UChar*, size_t length, UnencodableHandling) override; }; } // namespace WebCore diff --git a/Source/WebCore/platform/text/icu/UTextProviderLatin1.h b/Source/WebCore/platform/text/TextCodecReplacement.cpp index 51347c21b..fc39b18b8 100644 --- a/Source/WebCore/platform/text/icu/UTextProviderLatin1.h +++ b/Source/WebCore/platform/text/TextCodecReplacement.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2014 Apple Inc. All rights reserved. + * Copyright (C) 2016 Apple Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -23,24 +23,49 @@ * THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef UTextProviderLatin1_h -#define UTextProviderLatin1_h +#include "config.h" +#include "TextCodecReplacement.h" -#include <unicode/utext.h> -#include <wtf/unicode/Unicode.h> +#include <wtf/unicode/CharacterNames.h> namespace WebCore { -const int UTextWithBufferInlineCapacity = 16; +std::unique_ptr<TextCodec> TextCodecReplacement::create(const TextEncoding&, const void*) +{ + return std::make_unique<TextCodecReplacement>(); +} -struct UTextWithBuffer { - UText text; - UChar buffer[UTextWithBufferInlineCapacity]; -}; +TextCodecReplacement::TextCodecReplacement() +{ +} -UText* openLatin1UTextProvider(UTextWithBuffer* utWithBuffer, const LChar* string, unsigned length, UErrorCode* status); -UText* openLatin1ContextAwareUTextProvider(UTextWithBuffer* utWithBuffer, const LChar* string, unsigned length, const UChar* priorContext, int priorContextLength, UErrorCode* status); +void TextCodecReplacement::registerEncodingNames(EncodingNameRegistrar registrar) +{ + // The 'replacement' itself is not a valid label. It is the name of + // a group of legacy encoding labels. Hence, it cannot be used directly. + registrar("replacement", "replacement"); -} // namespace WebCore + // The labels + registrar("csiso2022kr", "replacement"); + registrar("hz-gb-2312", "replacement"); + registrar("iso-2022-cn", "replacement"); + registrar("iso-2022-cn-ext", "replacement"); + registrar("iso-2022-kr", "replacement"); +} + +void TextCodecReplacement::registerCodecs(TextCodecRegistrar registrar) +{ + registrar("replacement", create, 0); +} + +String TextCodecReplacement::decode(const char*, size_t, bool, bool, bool& sawError) +{ + sawError = true; + if (m_sentEOF) + return emptyString(); -#endif // UTextProviderLatin1_h + m_sentEOF = true; + return String(&replacementCharacter, 1); +} + +} // namespace WebCore diff --git a/Source/WebCore/platform/text/icu/UTextProviderUTF16.h b/Source/WebCore/platform/text/TextCodecReplacement.h index 564a37a81..7a67d516d 100644 --- a/Source/WebCore/platform/text/icu/UTextProviderUTF16.h +++ b/Source/WebCore/platform/text/TextCodecReplacement.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2014 Apple Inc. All rights reserved. + * Copyright (C) 2016 Apple Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -23,16 +23,29 @@ * THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef UTextProviderUTF16_h -#define UTextProviderUTF16_h +#ifndef TextCodecReplacement_h +#define TextCodecReplacement_h -#include <unicode/utext.h> -#include <wtf/unicode/Unicode.h> +#include "TextCodecUTF8.h" namespace WebCore { -UText* openUTF16ContextAwareUTextProvider(UText*, const UChar*, unsigned length, const UChar* priorContext, int priorContextLength, UErrorCode*); +class TextCodecReplacement : public TextCodecUTF8 { +public: + static std::unique_ptr<TextCodec> create(const TextEncoding&, const void*); + + TextCodecReplacement(); + + static void registerEncodingNames(EncodingNameRegistrar); + static void registerCodecs(TextCodecRegistrar); + +private: + String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError) override; + + bool m_sentEOF { false }; + +}; } // namespace WebCore -#endif // UTextProviderUTF16_h +#endif /* TextCodecReplacement_h */ diff --git a/Source/WebCore/platform/text/TextCodecUTF16.cpp b/Source/WebCore/platform/text/TextCodecUTF16.cpp index 673f73813..0e39de128 100644 --- a/Source/WebCore/platform/text/TextCodecUTF16.cpp +++ b/Source/WebCore/platform/text/TextCodecUTF16.cpp @@ -10,10 +10,10 @@ * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -26,7 +26,6 @@ #include "config.h" #include "TextCodecUTF16.h" -#include <wtf/PassOwnPtr.h> #include <wtf/text/CString.h> #include <wtf/text/StringBuffer.h> #include <wtf/text/WTFString.h> @@ -48,14 +47,14 @@ void TextCodecUTF16::registerEncodingNames(EncodingNameRegistrar registrar) registrar("unicodeFFFE", "UTF-16BE"); } -static PassOwnPtr<TextCodec> newStreamingTextDecoderUTF16LE(const TextEncoding&, const void*) +static std::unique_ptr<TextCodec> newStreamingTextDecoderUTF16LE(const TextEncoding&, const void*) { - return adoptPtr(new TextCodecUTF16(true)); + return std::make_unique<TextCodecUTF16>(true); } -static PassOwnPtr<TextCodec> newStreamingTextDecoderUTF16BE(const TextEncoding&, const void*) +static std::unique_ptr<TextCodec> newStreamingTextDecoderUTF16BE(const TextEncoding&, const void*) { - return adoptPtr(new TextCodecUTF16(false)); + return std::make_unique<TextCodecUTF16>(false); } void TextCodecUTF16::registerCodecs(TextCodecRegistrar registrar) @@ -112,7 +111,7 @@ String TextCodecUTF16::decode(const char* bytes, size_t length, bool, bool, bool buffer.shrink(q - buffer.characters()); - return String::adopt(buffer); + return String::adopt(WTFMove(buffer)); } CString TextCodecUTF16::encode(const UChar* characters, size_t length, UnencodableHandling) diff --git a/Source/WebCore/platform/text/TextCodecUTF16.h b/Source/WebCore/platform/text/TextCodecUTF16.h index 8ce947611..28760cdee 100644 --- a/Source/WebCore/platform/text/TextCodecUTF16.h +++ b/Source/WebCore/platform/text/TextCodecUTF16.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2004, 2006 Apple Computer, Inc. All rights reserved. + * Copyright (C) 2004, 2006 Apple Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -10,10 +10,10 @@ * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -37,8 +37,8 @@ namespace WebCore { TextCodecUTF16(bool littleEndian) : m_littleEndian(littleEndian), m_haveBufferedByte(false) { } - virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError); - virtual CString encode(const UChar*, size_t length, UnencodableHandling); + String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError) override; + CString encode(const UChar*, size_t length, UnencodableHandling) override; private: bool m_littleEndian; diff --git a/Source/WebCore/platform/text/TextCodecUTF8.cpp b/Source/WebCore/platform/text/TextCodecUTF8.cpp index 6abf43174..508edee9a 100644 --- a/Source/WebCore/platform/text/TextCodecUTF8.cpp +++ b/Source/WebCore/platform/text/TextCodecUTF8.cpp @@ -10,10 +10,10 @@ * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -38,14 +38,17 @@ namespace WebCore { const int nonCharacter = -1; -PassOwnPtr<TextCodec> TextCodecUTF8::create(const TextEncoding&, const void*) +std::unique_ptr<TextCodec> TextCodecUTF8::create(const TextEncoding&, const void*) { - return adoptPtr(new TextCodecUTF8); + return std::make_unique<TextCodecUTF8>(); } void TextCodecUTF8::registerEncodingNames(EncodingNameRegistrar registrar) { + // From https://encoding.spec.whatwg.org. registrar("UTF-8", "UTF-8"); + registrar("utf8", "UTF-8"); + registrar("unicode-1-1-utf-8", "UTF-8"); // Additional aliases that originally were present in the encoding // table in WebKit on Macintosh, and subsequently added by @@ -53,7 +56,6 @@ void TextCodecUTF8::registerEncodingNames(EncodingNameRegistrar registrar) // and remove them. registrar("unicode11utf8", "UTF-8"); registrar("unicode20utf8", "UTF-8"); - registrar("utf8", "UTF-8"); registrar("x-unicode20utf8", "UTF-8"); } @@ -341,7 +343,7 @@ String TextCodecUTF8::decode(const char* bytes, size_t length, bool flush, bool buffer.shrink(destination - buffer.characters()); - return String::adopt(buffer); + return String::adopt(WTFMove(buffer)); upConvertTo16Bit: StringBuffer<UChar> buffer16(m_partialSequenceSize + length); @@ -417,7 +419,7 @@ upConvertTo16Bit: buffer16.shrink(destination16 - buffer16.characters()); - return String::adopt(buffer16); + return String::adopt(WTFMove(buffer16)); } CString TextCodecUTF8::encode(const UChar* characters, size_t length, UnencodableHandling) diff --git a/Source/WebCore/platform/text/TextCodecUTF8.h b/Source/WebCore/platform/text/TextCodecUTF8.h index 270cf298f..590fd5c0d 100644 --- a/Source/WebCore/platform/text/TextCodecUTF8.h +++ b/Source/WebCore/platform/text/TextCodecUTF8.h @@ -10,10 +10,10 @@ * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -32,15 +32,18 @@ namespace WebCore { class TextCodecUTF8 : public TextCodec { public: + static std::unique_ptr<TextCodec> create(const TextEncoding&, const void*); + TextCodecUTF8() + : m_partialSequenceSize(0) + { + } + static void registerEncodingNames(EncodingNameRegistrar); static void registerCodecs(TextCodecRegistrar); private: - static PassOwnPtr<TextCodec> create(const TextEncoding&, const void*); - TextCodecUTF8() : m_partialSequenceSize(0) { } - - virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError); - virtual CString encode(const UChar*, size_t length, UnencodableHandling); + String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError) override; + CString encode(const UChar*, size_t length, UnencodableHandling) override; template <typename CharType> bool handlePartialSequence(CharType*& destination, const uint8_t*& source, const uint8_t* end, bool flush, bool stopOnError, bool& sawError); diff --git a/Source/WebCore/platform/text/TextCodecUserDefined.cpp b/Source/WebCore/platform/text/TextCodecUserDefined.cpp index 47d682110..e319214e4 100644 --- a/Source/WebCore/platform/text/TextCodecUserDefined.cpp +++ b/Source/WebCore/platform/text/TextCodecUserDefined.cpp @@ -10,10 +10,10 @@ * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -27,7 +27,6 @@ #include "TextCodecUserDefined.h" #include <stdio.h> -#include <wtf/PassOwnPtr.h> #include <wtf/text/CString.h> #include <wtf/text/StringBuffer.h> #include <wtf/text/StringBuilder.h> @@ -40,9 +39,9 @@ void TextCodecUserDefined::registerEncodingNames(EncodingNameRegistrar registrar registrar("x-user-defined", "x-user-defined"); } -static PassOwnPtr<TextCodec> newStreamingTextDecoderUserDefined(const TextEncoding&, const void*) +static std::unique_ptr<TextCodec> newStreamingTextDecoderUserDefined(const TextEncoding&, const void*) { - return adoptPtr(new TextCodecUserDefined); + return std::make_unique<TextCodecUserDefined>(); } void TextCodecUserDefined::registerCodecs(TextCodecRegistrar registrar) diff --git a/Source/WebCore/platform/text/TextCodecUserDefined.h b/Source/WebCore/platform/text/TextCodecUserDefined.h index d1b31601a..5821ca378 100644 --- a/Source/WebCore/platform/text/TextCodecUserDefined.h +++ b/Source/WebCore/platform/text/TextCodecUserDefined.h @@ -10,10 +10,10 @@ * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -35,8 +35,8 @@ namespace WebCore { static void registerEncodingNames(EncodingNameRegistrar); static void registerCodecs(TextCodecRegistrar); - virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError); - virtual CString encode(const UChar*, size_t length, UnencodableHandling); + String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError) override; + CString encode(const UChar*, size_t length, UnencodableHandling) override; }; } // namespace WebCore diff --git a/Source/WebCore/platform/text/TextEncoding.cpp b/Source/WebCore/platform/text/TextEncoding.cpp index d27082670..7daba67e2 100644 --- a/Source/WebCore/platform/text/TextEncoding.cpp +++ b/Source/WebCore/platform/text/TextEncoding.cpp @@ -12,10 +12,10 @@ * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -31,10 +31,9 @@ #include "TextCodec.h" #include "TextEncodingRegistry.h" #include <unicode/unorm.h> -#include <wtf/OwnPtr.h> #include <wtf/StdLibExtras.h> #include <wtf/text/CString.h> -#include <wtf/text/WTFString.h> +#include <wtf/text/StringView.h> namespace WebCore { @@ -48,12 +47,18 @@ TextEncoding::TextEncoding(const char* name) : m_name(atomicCanonicalTextEncodingName(name)) , m_backslashAsCurrencySymbol(backslashAsCurrencySymbol()) { + // Aliases are valid, but not "replacement" itself. + if (m_name && isReplacementEncoding(name)) + m_name = nullptr; } TextEncoding::TextEncoding(const String& name) : m_name(atomicCanonicalTextEncodingName(name)) , m_backslashAsCurrencySymbol(backslashAsCurrencySymbol()) { + // Aliases are valid, but not "replacement" itself. + if (m_name && isReplacementEncoding(name)) + m_name = nullptr; } String TextEncoding::decode(const char* data, size_t length, bool stopOnError, bool& sawError) const @@ -64,20 +69,22 @@ String TextEncoding::decode(const char* data, size_t length, bool stopOnError, b return newTextCodec(*this)->decode(data, length, true, stopOnError, sawError); } -CString TextEncoding::encode(const UChar* characters, size_t length, UnencodableHandling handling) const +CString TextEncoding::encode(StringView text, UnencodableHandling handling) const { if (!m_name) return CString(); - if (!length) + if (text.isEmpty()) return ""; // FIXME: What's the right place to do normalization? // It's a little strange to do it inside the encode function. // Perhaps normalization should be an explicit step done before calling encode. - const UChar* source = characters; - size_t sourceLength = length; + auto upconvertedCharacters = text.upconvertedCharacters(); + + const UChar* source = upconvertedCharacters; + size_t sourceLength = text.length(); Vector<UChar> normalizedCharacters; @@ -85,17 +92,18 @@ CString TextEncoding::encode(const UChar* characters, size_t length, Unencodable if (unorm_quickCheck(source, sourceLength, UNORM_NFC, &err) != UNORM_YES) { // First try using the length of the original string, since normalization to NFC rarely increases length. normalizedCharacters.grow(sourceLength); - int32_t normalizedLength = unorm_normalize(source, length, UNORM_NFC, 0, normalizedCharacters.data(), length, &err); + int32_t normalizedLength = unorm_normalize(source, sourceLength, UNORM_NFC, 0, normalizedCharacters.data(), sourceLength, &err); if (err == U_BUFFER_OVERFLOW_ERROR) { err = U_ZERO_ERROR; normalizedCharacters.resize(normalizedLength); - normalizedLength = unorm_normalize(source, length, UNORM_NFC, 0, normalizedCharacters.data(), normalizedLength, &err); + normalizedLength = unorm_normalize(source, sourceLength, UNORM_NFC, 0, normalizedCharacters.data(), normalizedLength, &err); } ASSERT(U_SUCCESS(err)); source = normalizedCharacters.data(); sourceLength = normalizedLength; } + return newTextCodec(*this)->encode(source, sourceLength, handling); } diff --git a/Source/WebCore/platform/text/TextEncoding.h b/Source/WebCore/platform/text/TextEncoding.h index 68e397814..a349c3828 100644 --- a/Source/WebCore/platform/text/TextEncoding.h +++ b/Source/WebCore/platform/text/TextEncoding.h @@ -10,10 +10,10 @@ * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -23,77 +23,59 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#ifndef TextEncoding_h -#define TextEncoding_h +#pragma once #include "TextCodec.h" #include <wtf/Forward.h> -#include <wtf/unicode/Unicode.h> namespace WebCore { - class TextEncoding { - public: - TextEncoding() : m_name(0) { } - TextEncoding(const char* name); - TextEncoding(const String& name); +class TextEncoding { +public: + TextEncoding() = default; + TextEncoding(const char* name); + WEBCORE_EXPORT TextEncoding(const String& name); - bool isValid() const { return m_name; } - const char* name() const { return m_name; } - const char* domName() const; // name exposed via DOM - bool usesVisualOrdering() const; - bool isJapanese() const; - - PassRefPtr<StringImpl> displayString(PassRefPtr<StringImpl> str) const - { - if (m_backslashAsCurrencySymbol == '\\' || !str) - return str; - return str->replace('\\', m_backslashAsCurrencySymbol); - } - template <typename CharacterType> - void displayBuffer(CharacterType* characters, unsigned len) const - { - if (m_backslashAsCurrencySymbol == '\\') - return; - for (unsigned i = 0; i < len; ++i) { - if (characters[i] == '\\') - characters[i] = m_backslashAsCurrencySymbol; - } - } + bool isValid() const { return m_name; } + const char* name() const { return m_name; } + WEBCORE_EXPORT const char* domName() const; // name exposed via DOM + bool usesVisualOrdering() const; + bool isJapanese() const; - const TextEncoding& closestByteBasedEquivalent() const; - const TextEncoding& encodingForFormSubmission() const; + const TextEncoding& closestByteBasedEquivalent() const; + const TextEncoding& encodingForFormSubmission() const; - String decode(const char* str, size_t length) const - { - bool ignored; - return decode(str, length, false, ignored); - } - String decode(const char*, size_t length, bool stopOnError, bool& sawError) const; - CString encode(const UChar*, size_t length, UnencodableHandling) const; + WEBCORE_EXPORT String decode(const char*, size_t length, bool stopOnError, bool& sawError) const; + String decode(const char*, size_t length) const; + CString encode(StringView, UnencodableHandling) const; - UChar backslashAsCurrencySymbol() const; + UChar backslashAsCurrencySymbol() const; + bool isByteBasedEncoding() const { return !isNonByteBasedEncoding(); } - private: - bool isNonByteBasedEncoding() const; - bool isUTF7Encoding() const; +private: + bool isNonByteBasedEncoding() const; + bool isUTF7Encoding() const; - const char* m_name; - UChar m_backslashAsCurrencySymbol; - }; + const char* m_name { nullptr }; + UChar m_backslashAsCurrencySymbol; +}; - inline bool operator==(const TextEncoding& a, const TextEncoding& b) { return a.name() == b.name(); } - inline bool operator!=(const TextEncoding& a, const TextEncoding& b) { return a.name() != b.name(); } +inline bool operator==(const TextEncoding& a, const TextEncoding& b) { return a.name() == b.name(); } +inline bool operator!=(const TextEncoding& a, const TextEncoding& b) { return a.name() != b.name(); } - const TextEncoding& ASCIIEncoding(); - const TextEncoding& Latin1Encoding(); - const TextEncoding& UTF16BigEndianEncoding(); - const TextEncoding& UTF16LittleEndianEncoding(); - const TextEncoding& UTF32BigEndianEncoding(); - const TextEncoding& UTF32LittleEndianEncoding(); - const TextEncoding& UTF8Encoding(); - const TextEncoding& WindowsLatin1Encoding(); +const TextEncoding& ASCIIEncoding(); +const TextEncoding& Latin1Encoding(); +const TextEncoding& UTF16BigEndianEncoding(); +const TextEncoding& UTF16LittleEndianEncoding(); +const TextEncoding& UTF32BigEndianEncoding(); +const TextEncoding& UTF32LittleEndianEncoding(); +WEBCORE_EXPORT const TextEncoding& UTF8Encoding(); +WEBCORE_EXPORT const TextEncoding& WindowsLatin1Encoding(); -} // namespace WebCore +inline String TextEncoding::decode(const char* characters, size_t length) const +{ + bool ignored; + return decode(characters, length, false, ignored); +} -#endif // TextEncoding_h +} // namespace WebCore diff --git a/Source/WebCore/platform/text/TextEncodingDetectorICU.cpp b/Source/WebCore/platform/text/TextEncodingDetectorICU.cpp new file mode 100644 index 000000000..8153b75bd --- /dev/null +++ b/Source/WebCore/platform/text/TextEncodingDetectorICU.cpp @@ -0,0 +1,117 @@ +/* + * Copyright (C) 2008, 2009 Google Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "TextEncodingDetector.h" + +#include "TextEncoding.h" +#include <unicode/ucnv.h> +#include <unicode/ucsdet.h> + +namespace WebCore { + +bool detectTextEncoding(const char* data, size_t len, + const char* hintEncodingName, + TextEncoding* detectedEncoding) +{ + *detectedEncoding = TextEncoding(); + int matchesCount = 0; + UErrorCode status = U_ZERO_ERROR; + UCharsetDetector* detector = ucsdet_open(&status); + if (U_FAILURE(status)) + return false; + ucsdet_enableInputFilter(detector, true); + ucsdet_setText(detector, data, static_cast<int32_t>(len), &status); + if (U_FAILURE(status)) + return false; + + // FIXME: A few things we can do other than improving + // the ICU detector itself. + // 1. Use ucsdet_detectAll and pick the most likely one given + // "the context" (parent-encoding, referrer encoding, etc). + // 2. 'Emulate' Firefox/IE's non-Universal detectors (e.g. + // Chinese, Japanese, Russian, Korean and Hebrew) by picking the + // encoding with a highest confidence among the detector-specific + // limited set of candidate encodings. + // Below is a partial implementation of the first part of what's outlined + // above. + const UCharsetMatch** matches = ucsdet_detectAll(detector, &matchesCount, &status); + if (U_FAILURE(status)) { + ucsdet_close(detector); + return false; + } + + const char* encoding = 0; + if (hintEncodingName) { + TextEncoding hintEncoding(hintEncodingName); + // 10 is the minimum confidence value consistent with the codepoint + // allocation in a given encoding. The size of a chunk passed to + // us varies even for the same html file (apparently depending on + // the network load). When we're given a rather short chunk, we + // don't have a sufficiently reliable signal other than the fact that + // the chunk is consistent with a set of encodings. So, instead of + // setting an arbitrary threshold, we have to scan all the encodings + // consistent with the data. + const int32_t kThreshold = 10; + for (int i = 0; i < matchesCount; ++i) { + int32_t confidence = ucsdet_getConfidence(matches[i], &status); + if (U_FAILURE(status)) { + status = U_ZERO_ERROR; + continue; + } + if (confidence < kThreshold) + break; + const char* matchEncoding = ucsdet_getName(matches[i], &status); + if (U_FAILURE(status)) { + status = U_ZERO_ERROR; + continue; + } + if (TextEncoding(matchEncoding) == hintEncoding) { + encoding = hintEncodingName; + break; + } + } + } + // If no match is found so far, just pick the top match. + // This can happen, say, when a parent frame in EUC-JP refers to + // a child frame in Shift_JIS and both frames do NOT specify the encoding + // making us resort to auto-detection (when it IS turned on). + if (!encoding && matchesCount > 0) + encoding = ucsdet_getName(matches[0], &status); + if (U_SUCCESS(status)) { + *detectedEncoding = TextEncoding(encoding); + ucsdet_close(detector); + return true; + } + ucsdet_close(detector); + return false; +} + +} diff --git a/Source/WebCore/platform/text/TextEncodingDetectorNone.cpp b/Source/WebCore/platform/text/TextEncodingDetectorNone.cpp deleted file mode 100644 index 3b62bc5b0..000000000 --- a/Source/WebCore/platform/text/TextEncodingDetectorNone.cpp +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (C) 2009 Google Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following disclaimer - * in the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Google Inc. nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "TextEncodingDetector.h" - -#include "TextEncoding.h" - -namespace WebCore { - -bool detectTextEncoding(const char*, size_t, const char*, TextEncoding* detectedEncoding) -{ - *detectedEncoding = TextEncoding(); - return false; -} - -} diff --git a/Source/WebCore/platform/text/TextEncodingRegistry.cpp b/Source/WebCore/platform/text/TextEncodingRegistry.cpp index d66f82b8f..eace643a3 100644 --- a/Source/WebCore/platform/text/TextEncodingRegistry.cpp +++ b/Source/WebCore/platform/text/TextEncodingRegistry.cpp @@ -11,10 +11,10 @@ * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -29,6 +29,7 @@ #include "TextCodecICU.h" #include "TextCodecLatin1.h" +#include "TextCodecReplacement.h" #include "TextCodecUserDefined.h" #include "TextCodecUTF16.h" #include "TextCodecUTF8.h" @@ -37,12 +38,16 @@ #include <wtf/ASCIICType.h> #include <wtf/HashMap.h> #include <wtf/HashSet.h> +#include <wtf/Lock.h> #include <wtf/MainThread.h> -#include <wtf/NeverDestroyed.h> #include <wtf/StdLibExtras.h> #include <wtf/StringExtras.h> -#if PLATFORM(MAC) && !PLATFORM(IOS) +#if PLATFORM(COCOA) +#include "WebCoreSystemInterface.h" +#endif + +#if PLATFORM(MAC) #include "TextCodecMac.h" #endif @@ -102,14 +107,7 @@ struct TextCodecFactory { typedef HashMap<const char*, const char*, TextEncodingNameHash> TextEncodingNameMap; typedef HashMap<const char*, TextCodecFactory> TextCodecMap; -static std::mutex& encodingRegistryMutex() -{ - // We don't have to construct this mutex in a thread safe way because this function - // is called on the main thread for any page before it is used in worker threads. - static NeverDestroyed<std::mutex> mutex; - - return mutex; -} +static StaticLock encodingRegistryMutex; static TextEncodingNameMap* textEncodingNameMap; static TextCodecMap* textCodecMap; @@ -117,7 +115,7 @@ static bool didExtendTextCodecMaps; static HashSet<const char*>* japaneseEncodings; static HashSet<const char*>* nonBackslashEncodings; -static const char* const textEncodingNameBlacklist[] = { "UTF-7" }; +static const char* const textEncodingNameBlacklist[] = { "UTF-7", "BOCU-1", "SCSU" }; #if ERROR_DISABLED @@ -199,9 +197,8 @@ static void pruneBlacklistedCodecs() } } -static void buildBaseTextCodecMaps() +static void buildBaseTextCodecMaps(const std::lock_guard<StaticLock>&) { - ASSERT(isMainThread()); ASSERT(!textCodecMap); ASSERT(!textEncodingNameMap); @@ -269,6 +266,22 @@ bool isJapaneseEncoding(const char* canonicalEncodingName) return canonicalEncodingName && japaneseEncodings && japaneseEncodings->contains(canonicalEncodingName); } +bool isReplacementEncoding(const char* alias) +{ + if (!alias) + return false; + + if (strlen(alias) != 11) + return false; + + return !strcasecmp(alias, "replacement"); +} + +bool isReplacementEncoding(const String& alias) +{ + return equalLettersIgnoringASCIICase(alias, "replacement"); +} + bool shouldShowBackslashAsCurrencySymbolIn(const char* canonicalEncodingName) { return canonicalEncodingName && nonBackslashEncodings && nonBackslashEncodings->contains(canonicalEncodingName); @@ -276,10 +289,13 @@ bool shouldShowBackslashAsCurrencySymbolIn(const char* canonicalEncodingName) static void extendTextCodecMaps() { + TextCodecReplacement::registerEncodingNames(addToTextEncodingNameMap); + TextCodecReplacement::registerCodecs(addToTextCodecMap); + TextCodecICU::registerEncodingNames(addToTextEncodingNameMap); TextCodecICU::registerCodecs(addToTextCodecMap); -#if PLATFORM(MAC) && !PLATFORM(IOS) +#if PLATFORM(MAC) TextCodecMac::registerEncodingNames(addToTextEncodingNameMap); TextCodecMac::registerCodecs(addToTextCodecMap); #endif @@ -288,9 +304,9 @@ static void extendTextCodecMaps() buildQuirksSets(); } -PassOwnPtr<TextCodec> newTextCodec(const TextEncoding& encoding) +std::unique_ptr<TextCodec> newTextCodec(const TextEncoding& encoding) { - std::lock_guard<std::mutex> lock(encodingRegistryMutex()); + std::lock_guard<StaticLock> lock(encodingRegistryMutex); ASSERT(textCodecMap); TextCodecFactory factory = textCodecMap->get(encoding.name()); @@ -303,10 +319,10 @@ const char* atomicCanonicalTextEncodingName(const char* name) if (!name || !name[0]) return nullptr; - if (!textEncodingNameMap) - buildBaseTextCodecMaps(); + std::lock_guard<StaticLock> lock(encodingRegistryMutex); - std::lock_guard<std::mutex> lock(encodingRegistryMutex()); + if (!textEncodingNameMap) + buildBaseTextCodecMaps(lock); if (const char* atomicName = textEncodingNameMap->get(name)) return atomicName; @@ -336,12 +352,12 @@ const char* atomicCanonicalTextEncodingName(const CharacterType* characters, siz const char* atomicCanonicalTextEncodingName(const String& alias) { if (!alias.length()) - return 0; + return nullptr; if (alias.is8Bit()) - return atomicCanonicalTextEncodingName<LChar>(alias.characters8(), alias.length()); + return atomicCanonicalTextEncodingName(alias.characters8(), alias.length()); - return atomicCanonicalTextEncodingName<UChar>(alias.deprecatedCharacters(), alias.length()); + return atomicCanonicalTextEncodingName(alias.characters16(), alias.length()); } bool noExtendedTextEncodingNameUsed() @@ -350,13 +366,30 @@ bool noExtendedTextEncodingNameUsed() return !didExtendTextCodecMaps; } +String defaultTextEncodingNameForSystemLanguage() +{ +#if PLATFORM(COCOA) + String systemEncodingName = CFStringConvertEncodingToIANACharSetName(wkGetWebDefaultCFStringEncoding()); + + // CFStringConvertEncodingToIANACharSetName() returns cp949 for kTextEncodingDOSKorean AKA "extended EUC-KR" AKA windows-949. + // ICU uses this name for a different encoding, so we need to change the name to a value that actually gives us windows-949. + // In addition, this value must match what is used in Safari, see <rdar://problem/5579292>. + // On some OS versions, the result is CP949 (uppercase). + if (equalLettersIgnoringASCIICase(systemEncodingName, "cp949")) + systemEncodingName = ASCIILiteral("ks_c_5601-1987"); + return systemEncodingName; +#else + return ASCIILiteral("ISO-8859-1"); +#endif +} + #ifndef NDEBUG void dumpTextEncodingNameMap() { unsigned size = textEncodingNameMap->size(); fprintf(stderr, "Dumping %u entries in WebCore::textEncodingNameMap...\n", size); - std::lock_guard<std::mutex> lock(encodingRegistryMutex()); + std::lock_guard<StaticLock> lock(encodingRegistryMutex); TextEncodingNameMap::const_iterator it = textEncodingNameMap->begin(); TextEncodingNameMap::const_iterator end = textEncodingNameMap->end(); diff --git a/Source/WebCore/platform/text/TextEncodingRegistry.h b/Source/WebCore/platform/text/TextEncodingRegistry.h index 1895df737..0c5ba5116 100644 --- a/Source/WebCore/platform/text/TextEncodingRegistry.h +++ b/Source/WebCore/platform/text/TextEncodingRegistry.h @@ -10,10 +10,10 @@ * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -27,9 +27,7 @@ #define TextEncodingRegistry_h #include <memory> -#include <wtf/PassOwnPtr.h> #include <wtf/text/WTFString.h> -#include <wtf/unicode/Unicode.h> namespace WebCore { @@ -38,7 +36,7 @@ namespace WebCore { // Use TextResourceDecoder::decode to decode resources, since it handles BOMs. // Use TextEncoding::encode to encode, since it takes care of normalization. - PassOwnPtr<TextCodec> newTextCodec(const TextEncoding&); + std::unique_ptr<TextCodec> newTextCodec(const TextEncoding&); // Only TextEncoding should use the following functions directly. const char* atomicCanonicalTextEncodingName(const char* alias); @@ -48,6 +46,10 @@ namespace WebCore { bool noExtendedTextEncodingNameUsed(); bool isJapaneseEncoding(const char* canonicalEncodingName); bool shouldShowBackslashAsCurrencySymbolIn(const char* canonicalEncodingName); + bool isReplacementEncoding(const char* alias); + bool isReplacementEncoding(const String& alias); + + WEBCORE_EXPORT String defaultTextEncodingNameForSystemLanguage(); #ifndef NDEBUG void dumpTextEncodingNameMap(); diff --git a/Source/WebCore/platform/text/TextFlags.h b/Source/WebCore/platform/text/TextFlags.h new file mode 100644 index 000000000..76445976d --- /dev/null +++ b/Source/WebCore/platform/text/TextFlags.h @@ -0,0 +1,408 @@ +/* + * Copyright (C) 2003, 2006 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef TextFlags_h +#define TextFlags_h + +namespace WebCore { + +enum TextRenderingMode { AutoTextRendering, OptimizeSpeed, OptimizeLegibility, GeometricPrecision }; + +enum FontSmoothingMode { AutoSmoothing, NoSmoothing, Antialiased, SubpixelAntialiased }; + +// This setting is used to provide ways of switching between multiple rendering modes that may have different +// metrics. It is used to switch between CG and GDI text on Windows. +enum class FontRenderingMode { Normal, Alternate }; + +enum FontOrientation { Horizontal, Vertical }; + +enum class NonCJKGlyphOrientation { Mixed, Upright }; + +// Here, "Leading" and "Trailing" are relevant after the line has been rearranged for bidi. +// ("Leading" means "left" and "Trailing" means "right.") +enum ExpansionBehaviorFlags { + ForbidTrailingExpansion = 0 << 0, + AllowTrailingExpansion = 1 << 0, + ForceTrailingExpansion = 2 << 0, + TrailingExpansionMask = 3 << 0, + + ForbidLeadingExpansion = 0 << 2, + AllowLeadingExpansion = 1 << 2, + ForceLeadingExpansion = 2 << 2, + LeadingExpansionMask = 3 << 2, + + DefaultExpansion = AllowTrailingExpansion | ForbidLeadingExpansion, +}; +typedef unsigned ExpansionBehavior; + +enum FontSynthesisValues { + FontSynthesisNone = 0x0, + FontSynthesisWeight = 0x1, + FontSynthesisStyle = 0x2, + FontSynthesisSmallCaps = 0x4 +}; +typedef unsigned FontSynthesis; +const unsigned FontSynthesisWidth = 3; + +enum class FontVariantLigatures { + Normal, + Yes, + No +}; + +enum class FontVariantPosition { + Normal, + Subscript, + Superscript +}; + +enum class FontVariantCaps { + Normal, + Small, + AllSmall, + Petite, + AllPetite, + Unicase, + Titling +}; + +enum class FontVariantNumericFigure { + Normal, + LiningNumbers, + OldStyleNumbers +}; + +enum class FontVariantNumericSpacing { + Normal, + ProportionalNumbers, + TabularNumbers +}; + +enum class FontVariantNumericFraction { + Normal, + DiagonalFractions, + StackedFractions +}; + +enum class FontVariantNumericOrdinal { + Normal, + Yes +}; + +enum class FontVariantNumericSlashedZero { + Normal, + Yes +}; + +enum class FontVariantAlternates { + Normal, + HistoricalForms +}; + +enum class FontVariantEastAsianVariant { + Normal, + Jis78, + Jis83, + Jis90, + Jis04, + Simplified, + Traditional +}; + +enum class FontVariantEastAsianWidth { + Normal, + Full, + Proportional +}; + +enum class FontVariantEastAsianRuby { + Normal, + Yes +}; + +struct FontVariantSettings { + FontVariantSettings() + : commonLigatures(FontVariantLigatures::Normal) + , discretionaryLigatures(FontVariantLigatures::Normal) + , historicalLigatures(FontVariantLigatures::Normal) + , contextualAlternates(FontVariantLigatures::Normal) + , position(FontVariantPosition::Normal) + , caps(FontVariantCaps::Normal) + , numericFigure(FontVariantNumericFigure::Normal) + , numericSpacing(FontVariantNumericSpacing::Normal) + , numericFraction(FontVariantNumericFraction::Normal) + , numericOrdinal(FontVariantNumericOrdinal::Normal) + , numericSlashedZero(FontVariantNumericSlashedZero::Normal) + , alternates(FontVariantAlternates::Normal) + , eastAsianVariant(FontVariantEastAsianVariant::Normal) + , eastAsianWidth(FontVariantEastAsianWidth::Normal) + , eastAsianRuby(FontVariantEastAsianRuby::Normal) + { + } + + FontVariantSettings( + FontVariantLigatures commonLigatures, + FontVariantLigatures discretionaryLigatures, + FontVariantLigatures historicalLigatures, + FontVariantLigatures contextualAlternates, + FontVariantPosition position, + FontVariantCaps caps, + FontVariantNumericFigure numericFigure, + FontVariantNumericSpacing numericSpacing, + FontVariantNumericFraction numericFraction, + FontVariantNumericOrdinal numericOrdinal, + FontVariantNumericSlashedZero numericSlashedZero, + FontVariantAlternates alternates, + FontVariantEastAsianVariant eastAsianVariant, + FontVariantEastAsianWidth eastAsianWidth, + FontVariantEastAsianRuby eastAsianRuby) + : commonLigatures(commonLigatures) + , discretionaryLigatures(discretionaryLigatures) + , historicalLigatures(historicalLigatures) + , contextualAlternates(contextualAlternates) + , position(position) + , caps(caps) + , numericFigure(numericFigure) + , numericSpacing(numericSpacing) + , numericFraction(numericFraction) + , numericOrdinal(numericOrdinal) + , numericSlashedZero(numericSlashedZero) + , alternates(alternates) + , eastAsianVariant(eastAsianVariant) + , eastAsianWidth(eastAsianWidth) + , eastAsianRuby(eastAsianRuby) + { + } + + bool isAllNormal() const + { + return commonLigatures == FontVariantLigatures::Normal + && discretionaryLigatures == FontVariantLigatures::Normal + && historicalLigatures == FontVariantLigatures::Normal + && contextualAlternates == FontVariantLigatures::Normal + && position == FontVariantPosition::Normal + && caps == FontVariantCaps::Normal + && numericFigure == FontVariantNumericFigure::Normal + && numericSpacing == FontVariantNumericSpacing::Normal + && numericFraction == FontVariantNumericFraction::Normal + && numericOrdinal == FontVariantNumericOrdinal::Normal + && numericSlashedZero == FontVariantNumericSlashedZero::Normal + && alternates == FontVariantAlternates::Normal + && eastAsianVariant == FontVariantEastAsianVariant::Normal + && eastAsianWidth == FontVariantEastAsianWidth::Normal + && eastAsianRuby == FontVariantEastAsianRuby::Normal; + } + + bool operator==(const FontVariantSettings& other) const + { + return commonLigatures == other.commonLigatures + && discretionaryLigatures == other.discretionaryLigatures + && historicalLigatures == other.historicalLigatures + && contextualAlternates == other.contextualAlternates + && position == other.position + && caps == other.caps + && numericFigure == other.numericFigure + && numericSpacing == other.numericSpacing + && numericFraction == other.numericFraction + && numericOrdinal == other.numericOrdinal + && numericSlashedZero == other.numericSlashedZero + && alternates == other.alternates + && eastAsianVariant == other.eastAsianVariant + && eastAsianWidth == other.eastAsianWidth + && eastAsianRuby == other.eastAsianRuby; + } + + bool operator!=(const FontVariantSettings& other) const { return !(*this == other); } + + unsigned uniqueValue() const + { + return static_cast<unsigned>(commonLigatures) << 26 + | static_cast<unsigned>(discretionaryLigatures) << 24 + | static_cast<unsigned>(historicalLigatures) << 22 + | static_cast<unsigned>(contextualAlternates) << 20 + | static_cast<unsigned>(position) << 18 + | static_cast<unsigned>(caps) << 15 + | static_cast<unsigned>(numericFigure) << 13 + | static_cast<unsigned>(numericSpacing) << 11 + | static_cast<unsigned>(numericFraction) << 9 + | static_cast<unsigned>(numericOrdinal) << 8 + | static_cast<unsigned>(numericSlashedZero) << 7 + | static_cast<unsigned>(alternates) << 6 + | static_cast<unsigned>(eastAsianVariant) << 3 + | static_cast<unsigned>(eastAsianWidth) << 1 + | static_cast<unsigned>(eastAsianRuby) << 0; + } + + FontVariantLigatures commonLigatures; + FontVariantLigatures discretionaryLigatures; + FontVariantLigatures historicalLigatures; + FontVariantLigatures contextualAlternates; + FontVariantPosition position; + FontVariantCaps caps; + FontVariantNumericFigure numericFigure; + FontVariantNumericSpacing numericSpacing; + FontVariantNumericFraction numericFraction; + FontVariantNumericOrdinal numericOrdinal; + FontVariantNumericSlashedZero numericSlashedZero; + FontVariantAlternates alternates; + FontVariantEastAsianVariant eastAsianVariant; + FontVariantEastAsianWidth eastAsianWidth; + FontVariantEastAsianRuby eastAsianRuby; +}; + +struct FontVariantLigaturesValues { + FontVariantLigaturesValues( + FontVariantLigatures commonLigatures, + FontVariantLigatures discretionaryLigatures, + FontVariantLigatures historicalLigatures, + FontVariantLigatures contextualAlternates) + : commonLigatures(commonLigatures) + , discretionaryLigatures(discretionaryLigatures) + , historicalLigatures(historicalLigatures) + , contextualAlternates(contextualAlternates) + { + } + + FontVariantLigatures commonLigatures; + FontVariantLigatures discretionaryLigatures; + FontVariantLigatures historicalLigatures; + FontVariantLigatures contextualAlternates; +}; + +struct FontVariantNumericValues { + FontVariantNumericValues( + FontVariantNumericFigure figure, + FontVariantNumericSpacing spacing, + FontVariantNumericFraction fraction, + FontVariantNumericOrdinal ordinal, + FontVariantNumericSlashedZero slashedZero) + : figure(figure) + , spacing(spacing) + , fraction(fraction) + , ordinal(ordinal) + , slashedZero(slashedZero) + { + } + + FontVariantNumericFigure figure; + FontVariantNumericSpacing spacing; + FontVariantNumericFraction fraction; + FontVariantNumericOrdinal ordinal; + FontVariantNumericSlashedZero slashedZero; +}; + +struct FontVariantEastAsianValues { + FontVariantEastAsianValues( + FontVariantEastAsianVariant variant, + FontVariantEastAsianWidth width, + FontVariantEastAsianRuby ruby) + : variant(variant) + , width(width) + , ruby(ruby) + { + } + + FontVariantEastAsianVariant variant; + FontVariantEastAsianWidth width; + FontVariantEastAsianRuby ruby; +}; + +enum FontWidthVariant { + RegularWidth, + HalfWidth, + ThirdWidth, + QuarterWidth, + LastFontWidthVariant = QuarterWidth +}; + +const unsigned FontWidthVariantWidth = 2; + +COMPILE_ASSERT(!(LastFontWidthVariant >> FontWidthVariantWidth), FontWidthVariantWidth_is_correct); + +enum FontWeight { + FontWeight100, + FontWeight200, + FontWeight300, + FontWeight400, + FontWeight500, + FontWeight600, + FontWeight700, + FontWeight800, + FontWeight900, + FontWeightNormal = FontWeight400, + FontWeightBold = FontWeight700 +}; + +enum FontItalic { + FontItalicOff = 0, + FontItalicOn = 1 +}; + +enum FontSmallCaps { + FontSmallCapsOff = 0, + FontSmallCapsOn = 1 +}; + +enum { + FontStyleNormalBit = 0, + FontStyleItalicBit, + FontWeight100Bit, + FontWeight200Bit, + FontWeight300Bit, + FontWeight400Bit, + FontWeight500Bit, + FontWeight600Bit, + FontWeight700Bit, + FontWeight800Bit, + FontWeight900Bit, + FontTraitsMaskWidth +}; + +enum FontTraitsMask { + FontStyleNormalMask = 1 << FontStyleNormalBit, + FontStyleItalicMask = 1 << FontStyleItalicBit, + FontStyleMask = FontStyleNormalMask | FontStyleItalicMask, + + FontWeight100Mask = 1 << FontWeight100Bit, + FontWeight200Mask = 1 << FontWeight200Bit, + FontWeight300Mask = 1 << FontWeight300Bit, + FontWeight400Mask = 1 << FontWeight400Bit, + FontWeight500Mask = 1 << FontWeight500Bit, + FontWeight600Mask = 1 << FontWeight600Bit, + FontWeight700Mask = 1 << FontWeight700Bit, + FontWeight800Mask = 1 << FontWeight800Bit, + FontWeight900Mask = 1 << FontWeight900Bit, + FontWeightMask = FontWeight100Mask | FontWeight200Mask | FontWeight300Mask | FontWeight400Mask | FontWeight500Mask | FontWeight600Mask | FontWeight700Mask | FontWeight800Mask | FontWeight900Mask +}; + +enum class Kerning { + Auto, + Normal, + NoShift +}; + +} + +#endif diff --git a/Source/WebCore/platform/text/TextStream.cpp b/Source/WebCore/platform/text/TextStream.cpp index d07bffd09..32fe3c757 100644 --- a/Source/WebCore/platform/text/TextStream.cpp +++ b/Source/WebCore/platform/text/TextStream.cpp @@ -10,10 +10,10 @@ * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -27,10 +27,9 @@ #include "TextStream.h" #include "FloatPoint.h" -#include "FloatRect.h" #include "IntPoint.h" -#include "IntRect.h" #include "LayoutRect.h" +#include "LayoutUnit.h" #include <wtf/MathExtras.h> #include <wtf/StringExtras.h> #include <wtf/text/WTFString.h> @@ -90,13 +89,19 @@ TextStream& TextStream::operator<<(unsigned long long i) TextStream& TextStream::operator<<(float f) { - m_text.append(String::numberToStringFixedWidth(f, 2)); + if (m_formattingFlags & Formatting::NumberRespectingIntegers) + return *this << FormatNumberRespectingIntegers(f); + + m_text.appendFixedWidthNumber(f, 2); return *this; } TextStream& TextStream::operator<<(double d) { - m_text.append(String::numberToStringFixedWidth(d, 2)); + if (m_formattingFlags & Formatting::NumberRespectingIntegers) + return *this << FormatNumberRespectingIntegers(d); + + m_text.appendFixedWidthNumber(d, 2); return *this; } @@ -121,52 +126,62 @@ TextStream& TextStream::operator<<(const String& string) TextStream& TextStream::operator<<(const FormatNumberRespectingIntegers& numberToFormat) { - if (hasFractions(numberToFormat.value)) - return *this << numberToFormat.value; + if (hasFractions(numberToFormat.value)) { + m_text.appendFixedWidthNumber(numberToFormat.value, 2); + return *this; + } m_text.appendNumber(static_cast<int>(numberToFormat.value)); return *this; } -TextStream& TextStream::operator<<(const IntPoint& p) +TextStream& TextStream::operator<<(LayoutUnit v) { - return *this << "(" << p.x() << "," << p.y() << ")"; + return *this << TextStream::FormatNumberRespectingIntegers(v.toFloat()); } -TextStream& TextStream::operator<<(const IntRect& r) +String TextStream::release() { - return *this << "at (" << r.x() << "," << r.y() << ") size " << r.width() << "x" << r.height(); + String result = m_text.toString(); + m_text.clear(); + return result; } -TextStream& TextStream::operator<<(const FloatPoint& p) +void TextStream::startGroup() { - return *this << "(" << TextStream::FormatNumberRespectingIntegers(p.x()) - << "," << TextStream::FormatNumberRespectingIntegers(p.y()) << ")"; -} + TextStream& ts = *this; -TextStream& TextStream::operator<<(const FloatSize& s) -{ - return *this << "width=" << TextStream::FormatNumberRespectingIntegers(s.width()) - << " height=" << TextStream::FormatNumberRespectingIntegers(s.height()); + if (m_multiLineMode) { + ts << "\n"; + ts.writeIndent(); + ts << "("; + ts.increaseIndent(); + } else + ts << " ("; } -TextStream& TextStream::operator<<(const LayoutPoint& p) +void TextStream::endGroup() { - // FIXME: These should be printed as floats. Keeping them ints for consistency with pervious test expectations. - return *this << "(" << p.x().toInt() << "," << p.y().toInt() << ")"; + TextStream& ts = *this; + ts << ")"; + if (m_multiLineMode) + ts.decreaseIndent(); } -TextStream& TextStream::operator<<(const LayoutRect& r) +void TextStream::nextLine() { - // FIXME: These should be printed as floats. Keeping them ints for consistency with previous test expectations. - return *this << pixelSnappedIntRect(r); + TextStream& ts = *this; + if (m_multiLineMode) { + ts << "\n"; + ts.writeIndent(); + } else + ts << " "; } -String TextStream::release() +void TextStream::writeIndent() { - String result = m_text.toString(); - m_text.clear(); - return result; + if (m_multiLineMode) + WebCore::writeIndent(*this, m_indent); } void writeIndent(TextStream& ts, int indent) diff --git a/Source/WebCore/platform/text/TextStream.h b/Source/WebCore/platform/text/TextStream.h index 053cb60de..be9f5d10b 100644 --- a/Source/WebCore/platform/text/TextStream.h +++ b/Source/WebCore/platform/text/TextStream.h @@ -10,10 +10,10 @@ * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR @@ -28,16 +28,10 @@ #include <wtf/Forward.h> #include <wtf/text/StringBuilder.h> -#include <wtf/unicode/Unicode.h> namespace WebCore { -class IntPoint; -class IntRect; -class FloatPoint; -class FloatSize; -class LayoutPoint; -class LayoutRect; +class LayoutUnit; class TextStream { public: @@ -45,49 +39,102 @@ public: FormatNumberRespectingIntegers(double number) : value(number) { } double value; }; - - TextStream& operator<<(bool); - TextStream& operator<<(int); - TextStream& operator<<(unsigned); - TextStream& operator<<(long); - TextStream& operator<<(unsigned long); - TextStream& operator<<(long long); - TextStream& operator<<(unsigned long long); - TextStream& operator<<(float); - TextStream& operator<<(double); - TextStream& operator<<(const char*); - TextStream& operator<<(const void*); - TextStream& operator<<(const String&); - TextStream& operator<<(const FormatNumberRespectingIntegers&); - - TextStream& operator<<(const IntPoint&); - TextStream& operator<<(const IntRect&); - TextStream& operator<<(const FloatPoint&); - TextStream& operator<<(const FloatSize&); - TextStream& operator<<(const LayoutPoint&); - TextStream& operator<<(const LayoutRect&); - - template<typename Item> - TextStream& operator<<(const Vector<Item>& vector) + + enum Formatting { + SVGStyleRect = 1 << 0, // "at (0,0) size 10x10" + NumberRespectingIntegers = 1 << 1, + LayoutUnitsAsIntegers = 1 << 2, + }; + + using FormattingFlags = unsigned; + + enum class LineMode { SingleLine, MultipleLine }; + TextStream(LineMode lineMode = LineMode::MultipleLine, FormattingFlags formattingFlags = 0) + : m_formattingFlags(formattingFlags) + , m_multiLineMode(lineMode == LineMode::MultipleLine) { - *this << "["; + } - unsigned size = vector.size(); - for (unsigned i = 0; i < size; ++i) { - *this << vector[i]; - if (i < size - 1) - *this << ", "; - } + WEBCORE_EXPORT TextStream& operator<<(bool); + WEBCORE_EXPORT TextStream& operator<<(int); + WEBCORE_EXPORT TextStream& operator<<(unsigned); + WEBCORE_EXPORT TextStream& operator<<(long); + WEBCORE_EXPORT TextStream& operator<<(unsigned long); + WEBCORE_EXPORT TextStream& operator<<(long long); + WEBCORE_EXPORT TextStream& operator<<(LayoutUnit); + + WEBCORE_EXPORT TextStream& operator<<(unsigned long long); + WEBCORE_EXPORT TextStream& operator<<(float); + WEBCORE_EXPORT TextStream& operator<<(double); + WEBCORE_EXPORT TextStream& operator<<(const char*); + WEBCORE_EXPORT TextStream& operator<<(const void*); + WEBCORE_EXPORT TextStream& operator<<(const String&); + // Deprecated. Use the NumberRespectingIntegers FormattingFlag instead. + WEBCORE_EXPORT TextStream& operator<<(const FormatNumberRespectingIntegers&); - return *this << "]"; + FormattingFlags formattingFlags() const { return m_formattingFlags; } + void setFormattingFlags(FormattingFlags flags) { m_formattingFlags = flags; } + + bool hasFormattingFlag(Formatting flag) const { return m_formattingFlags & flag; } + + template<typename T> + void dumpProperty(const String& name, const T& value) + { + TextStream& ts = *this; + ts.startGroup(); + ts << name << " " << value; + ts.endGroup(); } - String release(); + WEBCORE_EXPORT String release(); + + WEBCORE_EXPORT void startGroup(); + WEBCORE_EXPORT void endGroup(); + WEBCORE_EXPORT void nextLine(); // Output newline and indent. + + void increaseIndent(int amount = 1) { m_indent += amount; } + void decreaseIndent(int amount = 1) { m_indent -= amount; ASSERT(m_indent >= 0); } + + WEBCORE_EXPORT void writeIndent(); + + class GroupScope { + public: + GroupScope(TextStream& ts) + : m_stream(ts) + { + m_stream.startGroup(); + } + ~GroupScope() + { + m_stream.endGroup(); + } + + private: + TextStream& m_stream; + }; private: StringBuilder m_text; + FormattingFlags m_formattingFlags { 0 }; + int m_indent { 0 }; + bool m_multiLineMode { true }; }; +template<typename Item> +TextStream& operator<<(TextStream& ts, const Vector<Item>& vector) +{ + ts << "["; + + unsigned size = vector.size(); + for (unsigned i = 0; i < size; ++i) { + ts << vector[i]; + if (i < size - 1) + ts << ", "; + } + + return ts << "]"; +} + void writeIndent(TextStream&, int indent); } diff --git a/Source/WebCore/platform/text/UnicodeBidi.h b/Source/WebCore/platform/text/UnicodeBidi.h index 14c88bc1f..7249cfefe 100644 --- a/Source/WebCore/platform/text/UnicodeBidi.h +++ b/Source/WebCore/platform/text/UnicodeBidi.h @@ -10,10 +10,10 @@ * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * - * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR diff --git a/Source/WebCore/platform/text/WritingMode.h b/Source/WebCore/platform/text/WritingMode.h index 9e5d28c7d..2310b43e7 100644 --- a/Source/WebCore/platform/text/WritingMode.h +++ b/Source/WebCore/platform/text/WritingMode.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2012, Google Inc. All rights reserved. + * Copyright (C) 2015, Apple Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are @@ -33,26 +34,137 @@ namespace WebCore { +enum TextDirection { LTR, RTL }; + +inline bool isLeftToRightDirection(TextDirection direction) +{ + return direction == LTR; +} + enum WritingMode { - TopToBottomWritingMode, RightToLeftWritingMode, LeftToRightWritingMode, BottomToTopWritingMode + TopToBottomWritingMode = 0, // horizontal-tb + BottomToTopWritingMode = 1, // horizontal-bt + LeftToRightWritingMode = 2, // vertical-lr + RightToLeftWritingMode = 3, // vertical-rl +}; + +#define MAKE_TEXT_FLOW(writingMode, direction) ((writingMode) << 1 | (direction)) + +// Define the text flow in terms of the writing mode and the text direction. The first +// part is the line growing direction and the second part is the block growing direction. +enum TextFlow { + InlineEastBlockSouth = MAKE_TEXT_FLOW(TopToBottomWritingMode, LTR), + InlineWestBlockSouth = MAKE_TEXT_FLOW(TopToBottomWritingMode, RTL), + InlineEastBlockNorth = MAKE_TEXT_FLOW(BottomToTopWritingMode, LTR), + InlineWestBlockNorth = MAKE_TEXT_FLOW(BottomToTopWritingMode, RTL), + InlineSouthBlockEast = MAKE_TEXT_FLOW(LeftToRightWritingMode, LTR), + InlineSouthBlockWest = MAKE_TEXT_FLOW(LeftToRightWritingMode, RTL), + InlineNorthBlockEast = MAKE_TEXT_FLOW(RightToLeftWritingMode, LTR), + InlineNorthBlockWest = MAKE_TEXT_FLOW(RightToLeftWritingMode, RTL) }; +inline TextFlow makeTextFlow(WritingMode writingMode, TextDirection direction) +{ + return static_cast<TextFlow>(MAKE_TEXT_FLOW(writingMode, direction)); +} + +#undef MAKE_TEXT_FLOW + +const unsigned TextFlowReversedMask = 1; +const unsigned TextFlowFlippedMask = 2; +const unsigned TextFlowVerticalMask = 4; + +inline bool isReversedTextFlow(TextFlow textflow) +{ + return textflow & TextFlowReversedMask; +} + +inline bool isFlippedTextFlow(TextFlow textflow) +{ + return textflow & TextFlowFlippedMask; +} + +inline bool isVerticalTextFlow(TextFlow textflow) +{ + return textflow & TextFlowVerticalMask; +} + +// Lines have vertical orientation; modes vertical-lr or vertical-rl. +inline bool isVerticalWritingMode(WritingMode writingMode) +{ + return isVerticalTextFlow(makeTextFlow(writingMode, LTR)); +} + +// Block progression increases in the opposite direction to normal; modes vertical-rl or horizontal-bt. +inline bool isFlippedWritingMode(WritingMode writingMode) +{ + return isFlippedTextFlow(makeTextFlow(writingMode, LTR)); +} + // Lines have horizontal orientation; modes horizontal-tb or horizontal-bt. inline bool isHorizontalWritingMode(WritingMode writingMode) { - return writingMode == TopToBottomWritingMode || writingMode == BottomToTopWritingMode; + return !isVerticalWritingMode(writingMode); } // Bottom of the line occurs earlier in the block; modes vertical-lr or horizontal-bt. inline bool isFlippedLinesWritingMode(WritingMode writingMode) { - return writingMode == LeftToRightWritingMode || writingMode == BottomToTopWritingMode; + return isVerticalWritingMode(writingMode) != isFlippedWritingMode(writingMode); } -// Block progression increases in the opposite direction to normal; modes vertical-rl or horizontal-bt. -inline bool isFlippedBlocksWritingMode(WritingMode writingMode) +enum LogicalBoxSide { + BeforeSide, + EndSide, + AfterSide, + StartSide +}; + +enum PhysicalBoxSide { + NilSide = -1, + TopSide, + RightSide, + BottomSide, + LeftSide +}; + +inline bool isHorizontalPhysicalSide(PhysicalBoxSide physicalSide) +{ + return physicalSide == LeftSide || physicalSide == RightSide; +} + +inline PhysicalBoxSide mirrorPhysicalSide(PhysicalBoxSide physicalSide) +{ + // top <-> bottom and left <-> right conversion + return static_cast<PhysicalBoxSide>((static_cast<int>(physicalSide) + 2) % 4); +} + +inline PhysicalBoxSide rotatePhysicalSide(PhysicalBoxSide physicalSide) +{ + // top <-> left and right <-> bottom conversion + bool horizontalSide = isHorizontalPhysicalSide(physicalSide); + return static_cast<PhysicalBoxSide>((static_cast<int>(physicalSide) + (horizontalSide ? 1 : 3)) % 4); +} + +inline PhysicalBoxSide mapLogicalSideToPhysicalSide(TextFlow textflow, LogicalBoxSide logicalSide) +{ + PhysicalBoxSide physicalSide = static_cast<PhysicalBoxSide>(logicalSide); + bool horizontalSide = isHorizontalPhysicalSide(physicalSide); + + if (isVerticalTextFlow(textflow)) + physicalSide = rotatePhysicalSide(physicalSide); + + if ((horizontalSide && isReversedTextFlow(textflow)) || (!horizontalSide && isFlippedTextFlow(textflow))) + physicalSide = mirrorPhysicalSide(physicalSide); + + return physicalSide; +} + +inline PhysicalBoxSide mapLogicalSideToPhysicalSide(WritingMode writingMode, LogicalBoxSide logicalSide) { - return writingMode == RightToLeftWritingMode || writingMode == BottomToTopWritingMode; + // Set the direction such that side is mirrored if isFlippedWritingMode() is true + TextDirection direction = isFlippedWritingMode(writingMode) ? RTL : LTR; + return mapLogicalSideToPhysicalSide(makeTextFlow(writingMode, direction), logicalSide); } } // namespace WebCore diff --git a/Source/WebCore/platform/text/enchant/TextCheckerEnchant.cpp b/Source/WebCore/platform/text/enchant/TextCheckerEnchant.cpp index d2d2c6f04..638f76c46 100644 --- a/Source/WebCore/platform/text/enchant/TextCheckerEnchant.cpp +++ b/Source/WebCore/platform/text/enchant/TextCheckerEnchant.cpp @@ -24,7 +24,8 @@ #include <Language.h> #include <glib.h> -#include <text/TextBreakIterator.h> +#include <unicode/ubrk.h> +#include <wtf/text/TextBreakIterator.h> namespace WebCore { @@ -53,14 +54,14 @@ TextCheckerEnchant::~TextCheckerEnchant() void TextCheckerEnchant::ignoreWord(const String& word) { - for (Vector<EnchantDict*>::const_iterator iter = m_enchantDictionaries.begin(); iter != m_enchantDictionaries.end(); ++iter) - enchant_dict_add_to_session(*iter, word.utf8().data(), -1); + for (auto& dictionary : m_enchantDictionaries) + enchant_dict_add_to_session(dictionary, word.utf8().data(), -1); } void TextCheckerEnchant::learnWord(const String& word) { - for (Vector<EnchantDict*>::const_iterator iter = m_enchantDictionaries.begin(); iter != m_enchantDictionaries.end(); ++iter) - enchant_dict_add(*iter, word.utf8().data(), -1); + for (auto& dictionary : m_enchantDictionaries) + enchant_dict_add(dictionary, word.utf8().data(), -1); } void TextCheckerEnchant::checkSpellingOfWord(const CString& word, int start, int end, int& misspellingLocation, int& misspellingLength) @@ -69,8 +70,8 @@ void TextCheckerEnchant::checkSpellingOfWord(const CString& word, int start, int char* startPtr = g_utf8_offset_to_pointer(string, start); int numberOfBytes = static_cast<int>(g_utf8_offset_to_pointer(string, end) - startPtr); - for (Vector<EnchantDict*>::const_iterator dictIter = m_enchantDictionaries.begin(); dictIter != m_enchantDictionaries.end(); ++dictIter) { - if (!enchant_dict_check(*dictIter, startPtr, numberOfBytes)) { + for (auto& dictionary : m_enchantDictionaries) { + if (!enchant_dict_check(dictionary, startPtr, numberOfBytes)) { // Stop checking, this word is ok in at least one dict. misspellingLocation = -1; misspellingLength = 0; @@ -91,13 +92,13 @@ void TextCheckerEnchant::checkSpellingOfString(const String& string, int& misspe if (!hasDictionary()) return; - TextBreakIterator* iter = wordBreakIterator(string); + UBreakIterator* iter = wordBreakIterator(string); if (!iter) return; CString utf8String = string.utf8(); - int start = textBreakFirst(iter); - for (int end = textBreakNext(iter); end != TextBreakDone; end = textBreakNext(iter)) { + int start = ubrk_first(iter); + for (int end = ubrk_next(iter); end != UBRK_DONE; end = ubrk_next(iter)) { if (isWordTextBreak(iter)) { checkSpellingOfWord(utf8String, start, end, misspellingLocation, misspellingLength); // Stop checking the next words If the current word is misspelled, to do not overwrite its misspelled location and length. @@ -114,11 +115,11 @@ Vector<String> TextCheckerEnchant::getGuessesForWord(const String& word) if (!hasDictionary()) return guesses; - for (Vector<EnchantDict*>::const_iterator iter = m_enchantDictionaries.begin(); iter != m_enchantDictionaries.end(); ++iter) { + for (auto& dictionary : m_enchantDictionaries) { size_t numberOfSuggestions; size_t i; - char** suggestions = enchant_dict_suggest(*iter, word.utf8().data(), -1, &numberOfSuggestions); + char** suggestions = enchant_dict_suggest(dictionary, word.utf8().data(), -1, &numberOfSuggestions); if (numberOfSuggestions <= 0) continue; @@ -128,7 +129,7 @@ Vector<String> TextCheckerEnchant::getGuessesForWord(const String& word) for (i = 0; i < numberOfSuggestions; i++) guesses.append(String::fromUTF8(suggestions[i])); - enchant_dict_free_suggestions(*iter, suggestions); + enchant_dict_free_suggestions(dictionary, suggestions); } return guesses; @@ -139,8 +140,8 @@ void TextCheckerEnchant::updateSpellCheckingLanguages(const Vector<String>& lang Vector<EnchantDict*> spellDictionaries; if (!languages.isEmpty()) { - for (Vector<String>::const_iterator iter = languages.begin(); iter != languages.end(); ++iter) { - CString currentLanguage = iter->utf8(); + for (auto& language : languages) { + CString currentLanguage = language.utf8(); if (enchant_broker_dict_exists(m_broker, currentLanguage.data())) { EnchantDict* dict = enchant_broker_request_dict(m_broker, currentLanguage.data()); spellDictionaries.append(dict); @@ -175,11 +176,11 @@ Vector<String> TextCheckerEnchant::loadedSpellCheckingLanguages() const // Get a Vector<CString> with the list of languages in use. Vector<CString> currentDictionaries; - for (Vector<EnchantDict*>::const_iterator iter = m_enchantDictionaries.begin(); iter != m_enchantDictionaries.end(); ++iter) - enchant_dict_describe(*iter, enchantDictDescribeCallback, ¤tDictionaries); + for (auto& dictionary : m_enchantDictionaries) + enchant_dict_describe(dictionary, enchantDictDescribeCallback, ¤tDictionaries); - for (Vector<CString>::const_iterator iter = currentDictionaries.begin(); iter != currentDictionaries.end(); ++iter) - languages.append(String::fromUTF8(iter->data())); + for (auto& dictionary : currentDictionaries) + languages.append(String::fromUTF8(dictionary.data())); return languages; } @@ -190,16 +191,16 @@ Vector<String> TextCheckerEnchant::availableSpellCheckingLanguages() const enchant_broker_list_dicts(m_broker, enchantDictDescribeCallback, &allDictionaries); Vector<String> languages; - for (Vector<CString>::const_iterator iter = allDictionaries.begin(); iter != allDictionaries.end(); ++iter) - languages.append(String::fromUTF8(iter->data())); + for (auto& dictionary : allDictionaries) + languages.append(String::fromUTF8(dictionary.data())); return languages; } void TextCheckerEnchant::freeEnchantBrokerDictionaries() { - for (Vector<EnchantDict*>::const_iterator iter = m_enchantDictionaries.begin(); iter != m_enchantDictionaries.end(); ++iter) - enchant_broker_free_dict(m_broker, *iter); + for (auto& dictionary : m_enchantDictionaries) + enchant_broker_free_dict(m_broker, dictionary); } } // namespace WebCore diff --git a/Source/WebCore/platform/text/enchant/TextCheckerEnchant.h b/Source/WebCore/platform/text/enchant/TextCheckerEnchant.h index eb9be3e1f..3bd73c205 100644 --- a/Source/WebCore/platform/text/enchant/TextCheckerEnchant.h +++ b/Source/WebCore/platform/text/enchant/TextCheckerEnchant.h @@ -24,7 +24,6 @@ #include <enchant.h> #include <wtf/FastMalloc.h> -#include <wtf/PassOwnPtr.h> #include <wtf/Vector.h> #include <wtf/text/CString.h> #include <wtf/text/WTFString.h> @@ -35,7 +34,7 @@ class TextCheckerEnchant { WTF_MAKE_FAST_ALLOCATED; public: - static PassOwnPtr<TextCheckerEnchant> create() { return adoptPtr(new TextCheckerEnchant); } + TextCheckerEnchant(); virtual ~TextCheckerEnchant(); void ignoreWord(const String&); @@ -48,7 +47,6 @@ public: Vector<String> availableSpellCheckingLanguages() const; private: - TextCheckerEnchant(); void freeEnchantBrokerDictionaries(); void checkSpellingOfWord(const CString&, int start, int end, int& misspellingLocation, int& misspellingLength); diff --git a/Source/WebCore/platform/text/gtk/TextBreakIteratorInternalICUGtk.cpp b/Source/WebCore/platform/text/gtk/TextBreakIteratorInternalICUGtk.cpp deleted file mode 100644 index 35e5a05fa..000000000 --- a/Source/WebCore/platform/text/gtk/TextBreakIteratorInternalICUGtk.cpp +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (C) 2007 Alp Toker <alp@atoker.com> - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Library General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Library General Public License for more details. - * - * You should have received a copy of the GNU Library General Public License - * along with this library; see the file COPYING.LIB. If not, write to - * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, - * Boston, MA 02110-1301, USA. - */ - -#include "config.h" -#include "TextBreakIteratorInternalICU.h" - -namespace WebCore { - -const char* currentSearchLocaleID() -{ - // FIXME: Should use system locale. - return ""; -} - -const char* currentTextBreakLocaleID() -{ - // FIXME: Should use system locale. - return "en_us"; -} - -} diff --git a/Source/WebCore/platform/text/hyphen/HyphenationLibHyphen.cpp b/Source/WebCore/platform/text/hyphen/HyphenationLibHyphen.cpp new file mode 100644 index 000000000..6c206b8aa --- /dev/null +++ b/Source/WebCore/platform/text/hyphen/HyphenationLibHyphen.cpp @@ -0,0 +1,294 @@ +/* + * Copyright (C) 2010 Apple Inc. All rights reserved. + * Copyright (C) 2015 Igalia S.L. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "Hyphenation.h" + +#if USE(LIBHYPHEN) + +#include "FileSystem.h" +#include <hyphen.h> +#include <limits> +#include <stdlib.h> +#include <wtf/HashMap.h> +#include <wtf/NeverDestroyed.h> +#include <wtf/TinyLRUCache.h> +#include <wtf/text/AtomicStringHash.h> +#include <wtf/text/CString.h> +#include <wtf/text/StringView.h> + +#if PLATFORM(GTK) +#include "GtkUtilities.h" +#include <wtf/glib/GUniquePtr.h> +#endif + +namespace WebCore { + +static const char* const gDictionaryDirectories[] = { + "/usr/share/hyphen", + "/usr/local/share/hyphen", +}; + +static String extractLocaleFromDictionaryFilePath(const String& filePath) +{ + // Dictionary files always have the form "hyph_<locale name>.dic" + // so we strip everything except the locale. + String fileName = pathGetFileName(filePath); + static const int prefixLength = 5; + static const int suffixLength = 4; + return fileName.substring(prefixLength, fileName.length() - prefixLength - suffixLength); +} + +static void scanDirectoryForDicionaries(const char* directoryPath, HashMap<AtomicString, Vector<String>>& availableLocales) +{ + for (auto& filePath : listDirectory(directoryPath, "hyph_*.dic")) { + String locale = extractLocaleFromDictionaryFilePath(filePath).convertToASCIILowercase(); + + char normalizedPath[PATH_MAX]; + if (!realpath(fileSystemRepresentation(filePath).data(), normalizedPath)) + continue; + + filePath = stringFromFileSystemRepresentation(normalizedPath); + availableLocales.add(locale, Vector<String>()).iterator->value.append(filePath); + + String localeReplacingUnderscores = String(locale); + localeReplacingUnderscores.replace('_', '-'); + if (locale != localeReplacingUnderscores) + availableLocales.add(localeReplacingUnderscores, Vector<String>()).iterator->value.append(filePath); + + size_t dividerPosition = localeReplacingUnderscores.find('-'); + if (dividerPosition != notFound) { + localeReplacingUnderscores.truncate(dividerPosition); + availableLocales.add(localeReplacingUnderscores, Vector<String>()).iterator->value.append(filePath); + } + } +} + +#if ENABLE(DEVELOPER_MODE) +static void scanTestDictionariesDirectoryIfNecessary(HashMap<AtomicString, Vector<String>>& availableLocales) +{ + // It's unfortunate that we need to look for the dictionaries this way, but + // libhyphen doesn't have the concept of installed dictionaries. Instead, + // we have this special case for WebKit tests. +#if PLATFORM(GTK) + CString buildDirectory = webkitBuildDirectory(); + GUniquePtr<char> dictionariesPath(g_build_filename(buildDirectory.data(), "DependenciesGTK", "Root", "webkitgtk-test-dicts", nullptr)); + if (g_file_test(dictionariesPath.get(), static_cast<GFileTest>(G_FILE_TEST_IS_DIR))) { + scanDirectoryForDicionaries(dictionariesPath.get(), availableLocales); + return; + } + + // Try alternative dictionaries path for people not using JHBuild. + dictionariesPath.reset(g_build_filename(buildDirectory.data(), "webkitgtk-test-dicts", nullptr)); + scanDirectoryForDicionaries(dictionariesPath.get(), availableLocales); +#elif defined(TEST_HYPHENATAION_PATH) + scanDirectoryForDicionaries(TEST_HYPHENATAION_PATH, availableLocales); +#else + UNUSED_PARAM(availableLocales); +#endif +} +#endif + +static HashMap<AtomicString, Vector<String>>& availableLocales() +{ + static bool scannedLocales = false; + static HashMap<AtomicString, Vector<String>> availableLocales; + + if (!scannedLocales) { + for (size_t i = 0; i < WTF_ARRAY_LENGTH(gDictionaryDirectories); i++) + scanDirectoryForDicionaries(gDictionaryDirectories[i], availableLocales); + +#if ENABLE(DEVELOPER_MODE) + scanTestDictionariesDirectoryIfNecessary(availableLocales); +#endif + + scannedLocales = true; + } + + return availableLocales; +} + +bool canHyphenate(const AtomicString& localeIdentifier) +{ + if (localeIdentifier.isNull()) + return false; + if (availableLocales().contains(localeIdentifier)) + return true; + return availableLocales().contains(AtomicString(localeIdentifier.string().convertToASCIILowercase())); +} + +class HyphenationDictionary : public RefCounted<HyphenationDictionary> { + WTF_MAKE_NONCOPYABLE(HyphenationDictionary); + WTF_MAKE_FAST_ALLOCATED; +public: + typedef std::unique_ptr<HyphenDict, void(*)(HyphenDict*)> HyphenDictUniquePtr; + + virtual ~HyphenationDictionary() { } + static RefPtr<HyphenationDictionary> createNull() + { + return adoptRef(new HyphenationDictionary()); + } + + static RefPtr<HyphenationDictionary> create(const CString& dictPath) + { + return adoptRef(new HyphenationDictionary(dictPath)); + } + + HyphenDict* libhyphenDictionary() const + { + return m_libhyphenDictionary.get(); + } + +private: + HyphenationDictionary(const CString& dictPath) + : m_libhyphenDictionary(HyphenDictUniquePtr(hnj_hyphen_load(dictPath.data()), hnj_hyphen_free)) + { + } + + HyphenationDictionary() + : m_libhyphenDictionary(HyphenDictUniquePtr(nullptr, hnj_hyphen_free)) + { + } + + HyphenDictUniquePtr m_libhyphenDictionary; +}; + +} // namespace WebCore + +namespace WTF { + +template<> +class TinyLRUCachePolicy<AtomicString, RefPtr<WebCore::HyphenationDictionary>> +{ +public: + static TinyLRUCache<AtomicString, RefPtr<WebCore::HyphenationDictionary>, 32>& cache() + { + static NeverDestroyed<TinyLRUCache<AtomicString, RefPtr<WebCore::HyphenationDictionary>, 32>> cache; + return cache; + } + + static bool isKeyNull(const AtomicString& localeIdentifier) + { + return localeIdentifier.isNull(); + } + + static RefPtr<WebCore::HyphenationDictionary> createValueForNullKey() + { + return WebCore::HyphenationDictionary::createNull(); + } + + static RefPtr<WebCore::HyphenationDictionary> createValueForKey(const AtomicString& dictionaryPath) + { + return WebCore::HyphenationDictionary::create(WebCore::fileSystemRepresentation(dictionaryPath.string())); + } +}; + +} // namespace WTF + +namespace WebCore { + +static void countLeadingSpaces(const CString& utf8String, int32_t& pointerOffset, int32_t& characterOffset) +{ + pointerOffset = 0; + characterOffset = 0; + const char* stringData = utf8String.data(); + UChar32 character = 0; + while (static_cast<unsigned>(pointerOffset) < utf8String.length()) { + int32_t nextPointerOffset = pointerOffset; + U8_NEXT(stringData, nextPointerOffset, static_cast<int32_t>(utf8String.length()), character); + + if (character < 0 || !u_isUWhiteSpace(character)) + return; + + pointerOffset = nextPointerOffset; + characterOffset++; + } +} + +size_t lastHyphenLocation(StringView string, size_t beforeIndex, const AtomicString& localeIdentifier) +{ + // libhyphen accepts strings in UTF-8 format, but WebCore can only provide StringView + // which stores either UTF-16 or Latin1 data. This is unfortunate for performance + // reasons and we should consider switching to a more flexible hyphenation library + // if it is available. + CString utf8StringCopy = string.toStringWithoutCopying().utf8(); + + // WebCore often passes strings like " wordtohyphenate" to the platform layer. Since + // libhyphen isn't advanced enough to deal with leading spaces (presumably CoreFoundation + // can), we should find the appropriate indexes into the string to skip them. + int32_t leadingSpaceBytes; + int32_t leadingSpaceCharacters; + countLeadingSpaces(utf8StringCopy, leadingSpaceBytes, leadingSpaceCharacters); + + // The libhyphen documentation specifies that this array should be 5 bytes longer than + // the byte length of the input string. + Vector<char> hyphenArray(utf8StringCopy.length() - leadingSpaceBytes + 5); + char* hyphenArrayData = hyphenArray.data(); + + String lowercaseLocaleIdentifier = AtomicString(localeIdentifier.string().convertToASCIILowercase()); + + // Web content may specify strings for locales which do not exist or that we do not have. + if (!availableLocales().contains(lowercaseLocaleIdentifier)) + return 0; + + for (const auto& dictionaryPath : availableLocales().get(lowercaseLocaleIdentifier)) { + RefPtr<HyphenationDictionary> dictionary = WTF::TinyLRUCachePolicy<AtomicString, RefPtr<HyphenationDictionary>>::cache().get(AtomicString(dictionaryPath)); + + char** replacements = nullptr; + int* positions = nullptr; + int* removedCharacterCounts = nullptr; + hnj_hyphen_hyphenate2(dictionary->libhyphenDictionary(), + utf8StringCopy.data() + leadingSpaceBytes, + utf8StringCopy.length() - leadingSpaceBytes, + hyphenArrayData, + nullptr, /* output parameter for hyphenated word */ + &replacements, + &positions, + &removedCharacterCounts); + + if (replacements) { + for (unsigned i = 0; i < utf8StringCopy.length() - leadingSpaceBytes - 1; i++) + free(replacements[i]); + free(replacements); + } + + free(positions); + free(removedCharacterCounts); + + for (int i = beforeIndex - leadingSpaceCharacters - 2; i >= 0; i--) { + // libhyphen will put an odd number in hyphenArrayData at all + // hyphenation points. A number & 1 will be true for odd numbers. + if (hyphenArrayData[i] & 1) + return i + 1 + leadingSpaceCharacters; + } + } + + return 0; +} + +} // namespace WebCore + +#endif // USE(LIBHYPHEN) diff --git a/Source/WebCore/platform/text/icu/UTextProvider.cpp b/Source/WebCore/platform/text/icu/UTextProvider.cpp deleted file mode 100644 index fdaee81e6..000000000 --- a/Source/WebCore/platform/text/icu/UTextProvider.cpp +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (C) 2014 Apple Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, - * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "UTextProvider.h" - -#include <string.h> - -namespace WebCore { - -// Relocate pointer from source into destination as required. -static inline void fixPointer(const UText* source, UText* destination, const void*& pointer) -{ - if (pointer >= source->pExtra && pointer < static_cast<char*>(source->pExtra) + source->extraSize) { - // Pointer references source extra buffer. - pointer = static_cast<char*>(destination->pExtra) + (static_cast<const char*>(pointer) - static_cast<const char*>(source->pExtra)); - } else if (pointer >= source && pointer < reinterpret_cast<const char*>(source) + source->sizeOfStruct) { - // Pointer references source text structure, but not source extra buffer. - pointer = reinterpret_cast<char*>(destination) + (static_cast<const char*>(pointer) - reinterpret_cast<const char*>(source)); - } -} - -UText* uTextCloneImpl(UText* destination, const UText* source, UBool deep, UErrorCode* status) -{ - ASSERT_UNUSED(deep, !deep); - if (U_FAILURE(*status)) - return nullptr; - int32_t extraSize = source->extraSize; - destination = utext_setup(destination, extraSize, status); - if (U_FAILURE(*status)) - return destination; - void* extraNew = destination->pExtra; - int32_t flags = destination->flags; - int sizeToCopy = std::min(source->sizeOfStruct, destination->sizeOfStruct); - memcpy(destination, source, sizeToCopy); - destination->pExtra = extraNew; - destination->flags = flags; - memcpy(destination->pExtra, source->pExtra, extraSize); - fixPointer(source, destination, destination->context); - fixPointer(source, destination, destination->p); - fixPointer(source, destination, destination->q); - ASSERT(!destination->r); - const void* chunkContents = static_cast<const void*>(destination->chunkContents); - fixPointer(source, destination, chunkContents); - destination->chunkContents = static_cast<const UChar*>(chunkContents); - return destination; -} - -} // namespace WebCore diff --git a/Source/WebCore/platform/text/icu/UTextProvider.h b/Source/WebCore/platform/text/icu/UTextProvider.h deleted file mode 100644 index 81a025a98..000000000 --- a/Source/WebCore/platform/text/icu/UTextProvider.h +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Copyright (C) 2014 Apple Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, - * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef UTextProvider_h -#define UTextProvider_h - -#include <unicode/utext.h> -#include <wtf/unicode/Unicode.h> - -namespace WebCore { - -enum class UTextProviderContext { - NoContext, - PriorContext, - PrimaryContext -}; - -inline UTextProviderContext uTextProviderContext(const UText* text, int64_t nativeIndex, UBool forward) -{ - if (!text->b || nativeIndex > text->b) - return UTextProviderContext::PrimaryContext; - if (nativeIndex == text->b) - return forward ? UTextProviderContext::PrimaryContext : UTextProviderContext::PriorContext; - return UTextProviderContext::PriorContext; -} - -inline void initializeContextAwareUTextProvider(UText* text, const UTextFuncs* funcs, const void* string, unsigned length, const UChar* priorContext, int priorContextLength) -{ - text->pFuncs = funcs; - text->providerProperties = 1 << UTEXT_PROVIDER_STABLE_CHUNKS; - text->context = string; - text->p = string; - text->a = length; - text->q = priorContext; - text->b = priorContextLength; -} - -// Shared implementation for the UTextClone function on UTextFuncs. - -UText* uTextCloneImpl(UText* destination, const UText* source, UBool deep, UErrorCode* status); - - -// Helpers for the UTextAccess function on UTextFuncs. - -inline int64_t uTextAccessPinIndex(int64_t& index, int64_t limit) -{ - if (index < 0) - index = 0; - else if (index > limit) - index = limit; - return index; -} - -inline bool uTextAccessInChunkOrOutOfRange(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward, UBool& isAccessible) -{ - if (forward) { - if (nativeIndex >= text->chunkNativeStart && nativeIndex < text->chunkNativeLimit) { - int64_t offset = nativeIndex - text->chunkNativeStart; - // Ensure chunk offset is well formed if computed offset exceeds int32_t range. - ASSERT(offset < std::numeric_limits<int32_t>::max()); - text->chunkOffset = offset < std::numeric_limits<int32_t>::max() ? static_cast<int32_t>(offset) : 0; - isAccessible = TRUE; - return true; - } - if (nativeIndex >= nativeLength && text->chunkNativeLimit == nativeLength) { - text->chunkOffset = text->chunkLength; - isAccessible = FALSE; - return true; - } - } else { - if (nativeIndex > text->chunkNativeStart && nativeIndex <= text->chunkNativeLimit) { - int64_t offset = nativeIndex - text->chunkNativeStart; - // Ensure chunk offset is well formed if computed offset exceeds int32_t range. - ASSERT(offset < std::numeric_limits<int32_t>::max()); - text->chunkOffset = offset < std::numeric_limits<int32_t>::max() ? static_cast<int32_t>(offset) : 0; - isAccessible = TRUE; - return true; - } - if (nativeIndex <= 0 && !text->chunkNativeStart) { - text->chunkOffset = 0; - isAccessible = FALSE; - return true; - } - } - return false; -} - -} // namespace WebCore - -#endif // UTextProvider_h diff --git a/Source/WebCore/platform/text/icu/UTextProviderLatin1.cpp b/Source/WebCore/platform/text/icu/UTextProviderLatin1.cpp deleted file mode 100644 index ee027637e..000000000 --- a/Source/WebCore/platform/text/icu/UTextProviderLatin1.cpp +++ /dev/null @@ -1,394 +0,0 @@ -/* - * Copyright (C) 2014 Apple Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, - * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "UTextProviderLatin1.h" - -#include "UTextProvider.h" -#include <wtf/text/StringImpl.h> - -namespace WebCore { - -// Latin1 provider - -static UText* uTextLatin1Clone(UText*, const UText*, UBool, UErrorCode*); -static int64_t uTextLatin1NativeLength(UText*); -static UBool uTextLatin1Access(UText*, int64_t, UBool); -static int32_t uTextLatin1Extract(UText*, int64_t, int64_t, UChar*, int32_t, UErrorCode*); -static int64_t uTextLatin1MapOffsetToNative(const UText*); -static int32_t uTextLatin1MapNativeIndexToUTF16(const UText*, int64_t); -static void uTextLatin1Close(UText*); - -static struct UTextFuncs uTextLatin1Funcs = { - sizeof(UTextFuncs), - 0, - 0, - 0, - uTextLatin1Clone, - uTextLatin1NativeLength, - uTextLatin1Access, - uTextLatin1Extract, - nullptr, - nullptr, - uTextLatin1MapOffsetToNative, - uTextLatin1MapNativeIndexToUTF16, - uTextLatin1Close, - nullptr, - nullptr, - nullptr -}; - -static UText* uTextLatin1Clone(UText* destination, const UText* source, UBool deep, UErrorCode* status) -{ - ASSERT_UNUSED(deep, !deep); - - if (U_FAILURE(*status)) - return 0; - - UText* result = utext_setup(destination, sizeof(UChar) * UTextWithBufferInlineCapacity, status); - if (U_FAILURE(*status)) - return destination; - - result->providerProperties = source->providerProperties; - - // Point at the same position, but with an empty buffer. - result->chunkNativeStart = source->chunkNativeStart; - result->chunkNativeLimit = source->chunkNativeStart; - result->nativeIndexingLimit = static_cast<int32_t>(source->chunkNativeStart); - result->chunkOffset = 0; - result->context = source->context; - result->a = source->a; - result->pFuncs = &uTextLatin1Funcs; - result->chunkContents = (UChar*)result->pExtra; - memset(const_cast<UChar*>(result->chunkContents), 0, sizeof(UChar) * UTextWithBufferInlineCapacity); - - return result; -} - -static int64_t uTextLatin1NativeLength(UText* uText) -{ - return uText->a; -} - -static UBool uTextLatin1Access(UText* uText, int64_t index, UBool forward) -{ - int64_t length = uText->a; - - if (forward) { - if (index < uText->chunkNativeLimit && index >= uText->chunkNativeStart) { - // Already inside the buffer. Set the new offset. - uText->chunkOffset = static_cast<int32_t>(index - uText->chunkNativeStart); - return TRUE; - } - if (index >= length && uText->chunkNativeLimit == length) { - // Off the end of the buffer, but we can't get it. - uText->chunkOffset = static_cast<int32_t>(index - uText->chunkNativeStart); - return FALSE; - } - } else { - if (index <= uText->chunkNativeLimit && index > uText->chunkNativeStart) { - // Already inside the buffer. Set the new offset. - uText->chunkOffset = static_cast<int32_t>(index - uText->chunkNativeStart); - return TRUE; - } - if (!index && !uText->chunkNativeStart) { - // Already at the beginning; can't go any farther. - uText->chunkOffset = 0; - return FALSE; - } - } - - if (forward) { - uText->chunkNativeStart = index; - uText->chunkNativeLimit = uText->chunkNativeStart + UTextWithBufferInlineCapacity; - if (uText->chunkNativeLimit > length) - uText->chunkNativeLimit = length; - - uText->chunkOffset = 0; - } else { - uText->chunkNativeLimit = index; - if (uText->chunkNativeLimit > length) - uText->chunkNativeLimit = length; - - uText->chunkNativeStart = uText->chunkNativeLimit - UTextWithBufferInlineCapacity; - if (uText->chunkNativeStart < 0) - uText->chunkNativeStart = 0; - - uText->chunkOffset = static_cast<int32_t>(index - uText->chunkNativeStart); - } - uText->chunkLength = static_cast<int32_t>(uText->chunkNativeLimit - uText->chunkNativeStart); - - StringImpl::copyChars(const_cast<UChar*>(uText->chunkContents), static_cast<const LChar*>(uText->context) + uText->chunkNativeStart, static_cast<unsigned>(uText->chunkLength)); - - uText->nativeIndexingLimit = uText->chunkLength; - - return TRUE; -} - -static int32_t uTextLatin1Extract(UText* uText, int64_t start, int64_t limit, UChar* dest, int32_t destCapacity, UErrorCode* status) -{ - int64_t length = uText->a; - if (U_FAILURE(*status)) - return 0; - - if (destCapacity < 0 || (!dest && destCapacity > 0)) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - if (start < 0 || start > limit || (limit - start) > INT32_MAX) { - *status = U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - - if (start > length) - start = length; - if (limit > length) - limit = length; - - length = limit - start; - - if (!length) - return 0; - - if (destCapacity > 0 && !dest) { - int32_t trimmedLength = static_cast<int32_t>(length); - if (trimmedLength > destCapacity) - trimmedLength = destCapacity; - - StringImpl::copyChars(dest, static_cast<const LChar*>(uText->context) + start, static_cast<unsigned>(trimmedLength)); - } - - if (length < destCapacity) { - dest[length] = 0; - if (*status == U_STRING_NOT_TERMINATED_WARNING) - *status = U_ZERO_ERROR; - } else if (length == destCapacity) - *status = U_STRING_NOT_TERMINATED_WARNING; - else - *status = U_BUFFER_OVERFLOW_ERROR; - - return static_cast<int32_t>(length); -} - -static int64_t uTextLatin1MapOffsetToNative(const UText* uText) -{ - return uText->chunkNativeStart + uText->chunkOffset; -} - -static int32_t uTextLatin1MapNativeIndexToUTF16(const UText* uText, int64_t nativeIndex) -{ - ASSERT_UNUSED(uText, uText->chunkNativeStart >= nativeIndex); - ASSERT_UNUSED(uText, nativeIndex < uText->chunkNativeLimit); - return static_cast<int32_t>(nativeIndex); -} - -static void uTextLatin1Close(UText* uText) -{ - uText->context = nullptr; -} - -UText* openLatin1UTextProvider(UTextWithBuffer* utWithBuffer, const LChar* string, unsigned length, UErrorCode* status) -{ - if (U_FAILURE(*status)) - return nullptr; - if (!string || length > static_cast<unsigned>(std::numeric_limits<int32_t>::max())) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return nullptr; - } - UText* text = utext_setup(&utWithBuffer->text, sizeof(utWithBuffer->buffer), status); - if (U_FAILURE(*status)) { - ASSERT(!text); - return nullptr; - } - - text->context = string; - text->a = length; - text->pFuncs = &uTextLatin1Funcs; - text->chunkContents = (UChar*)text->pExtra; - memset(const_cast<UChar*>(text->chunkContents), 0, sizeof(UChar) * UTextWithBufferInlineCapacity); - - return text; -} - - -// Latin1ContextAware provider - -static UText* uTextLatin1ContextAwareClone(UText*, const UText*, UBool, UErrorCode*); -static int64_t uTextLatin1ContextAwareNativeLength(UText*); -static UBool uTextLatin1ContextAwareAccess(UText*, int64_t, UBool); -static int32_t uTextLatin1ContextAwareExtract(UText*, int64_t, int64_t, UChar*, int32_t, UErrorCode*); -static void uTextLatin1ContextAwareClose(UText*); - -static const struct UTextFuncs textLatin1ContextAwareFuncs = { - sizeof(UTextFuncs), - 0, - 0, - 0, - uTextLatin1ContextAwareClone, - uTextLatin1ContextAwareNativeLength, - uTextLatin1ContextAwareAccess, - uTextLatin1ContextAwareExtract, - nullptr, - nullptr, - nullptr, - nullptr, - uTextLatin1ContextAwareClose, - nullptr, - nullptr, - nullptr -}; - -static inline UTextProviderContext textLatin1ContextAwareGetCurrentContext(const UText* text) -{ - if (!text->chunkContents) - return UTextProviderContext::NoContext; - return text->chunkContents == text->pExtra ? UTextProviderContext::PrimaryContext : UTextProviderContext::PriorContext; -} - -static void textLatin1ContextAwareMoveInPrimaryContext(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward) -{ - ASSERT(text->chunkContents == text->pExtra); - if (forward) { - ASSERT(nativeIndex >= text->b && nativeIndex < nativeLength); - text->chunkNativeStart = nativeIndex; - text->chunkNativeLimit = nativeIndex + text->extraSize / sizeof(UChar); - if (text->chunkNativeLimit > nativeLength) - text->chunkNativeLimit = nativeLength; - } else { - ASSERT(nativeIndex > text->b && nativeIndex <= nativeLength); - text->chunkNativeLimit = nativeIndex; - text->chunkNativeStart = nativeIndex - text->extraSize / sizeof(UChar); - if (text->chunkNativeStart < text->b) - text->chunkNativeStart = text->b; - } - int64_t length = text->chunkNativeLimit - text->chunkNativeStart; - // Ensure chunk length is well defined if computed length exceeds int32_t range. - ASSERT(length < std::numeric_limits<int32_t>::max()); - text->chunkLength = length < std::numeric_limits<int32_t>::max() ? static_cast<int32_t>(length) : 0; - text->nativeIndexingLimit = text->chunkLength; - text->chunkOffset = forward ? 0 : text->chunkLength; - StringImpl::copyChars(const_cast<UChar*>(text->chunkContents), static_cast<const LChar*>(text->p) + (text->chunkNativeStart - text->b), static_cast<unsigned>(text->chunkLength)); -} - -static void textLatin1ContextAwareSwitchToPrimaryContext(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward) -{ - ASSERT(!text->chunkContents || text->chunkContents == text->q); - text->chunkContents = static_cast<const UChar*>(text->pExtra); - textLatin1ContextAwareMoveInPrimaryContext(text, nativeIndex, nativeLength, forward); -} - -static void textLatin1ContextAwareMoveInPriorContext(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward) -{ - ASSERT(text->chunkContents == text->q); - ASSERT(forward ? nativeIndex < text->b : nativeIndex <= text->b); - ASSERT_UNUSED(nativeLength, forward ? nativeIndex < nativeLength : nativeIndex <= nativeLength); - ASSERT_UNUSED(forward, forward ? nativeIndex < nativeLength : nativeIndex <= nativeLength); - text->chunkNativeStart = 0; - text->chunkNativeLimit = text->b; - text->chunkLength = text->b; - text->nativeIndexingLimit = text->chunkLength; - int64_t offset = nativeIndex - text->chunkNativeStart; - // Ensure chunk offset is well defined if computed offset exceeds int32_t range or chunk length. - ASSERT(offset < std::numeric_limits<int32_t>::max()); - text->chunkOffset = std::min(offset < std::numeric_limits<int32_t>::max() ? static_cast<int32_t>(offset) : 0, text->chunkLength); -} - -static void textLatin1ContextAwareSwitchToPriorContext(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward) -{ - ASSERT(!text->chunkContents || text->chunkContents == text->pExtra); - text->chunkContents = static_cast<const UChar*>(text->q); - textLatin1ContextAwareMoveInPriorContext(text, nativeIndex, nativeLength, forward); -} - -static UText* uTextLatin1ContextAwareClone(UText* destination, const UText* source, UBool deep, UErrorCode* status) -{ - return uTextCloneImpl(destination, source, deep, status); -} - -static int64_t uTextLatin1ContextAwareNativeLength(UText* text) -{ - return text->a + text->b; -} - -static UBool uTextLatin1ContextAwareAccess(UText* text, int64_t nativeIndex, UBool forward) -{ - if (!text->context) - return FALSE; - int64_t nativeLength = uTextLatin1ContextAwareNativeLength(text); - UBool isAccessible; - if (uTextAccessInChunkOrOutOfRange(text, nativeIndex, nativeLength, forward, isAccessible)) - return isAccessible; - nativeIndex = uTextAccessPinIndex(nativeIndex, nativeLength); - UTextProviderContext currentContext = textLatin1ContextAwareGetCurrentContext(text); - UTextProviderContext newContext = uTextProviderContext(text, nativeIndex, forward); - ASSERT(newContext != UTextProviderContext::NoContext); - if (newContext == currentContext) { - if (currentContext == UTextProviderContext::PrimaryContext) - textLatin1ContextAwareMoveInPrimaryContext(text, nativeIndex, nativeLength, forward); - else - textLatin1ContextAwareMoveInPriorContext(text, nativeIndex, nativeLength, forward); - } else if (newContext == UTextProviderContext::PrimaryContext) - textLatin1ContextAwareSwitchToPrimaryContext(text, nativeIndex, nativeLength, forward); - else { - ASSERT(newContext == UTextProviderContext::PriorContext); - textLatin1ContextAwareSwitchToPriorContext(text, nativeIndex, nativeLength, forward); - } - return TRUE; -} - -static int32_t uTextLatin1ContextAwareExtract(UText*, int64_t, int64_t, UChar*, int32_t, UErrorCode* errorCode) -{ - // In the present context, this text provider is used only with ICU functions - // that do not perform an extract operation. - ASSERT_NOT_REACHED(); - *errorCode = U_UNSUPPORTED_ERROR; - return 0; -} - -static void uTextLatin1ContextAwareClose(UText* text) -{ - text->context = nullptr; -} - -UText* openLatin1ContextAwareUTextProvider(UTextWithBuffer* utWithBuffer, const LChar* string, unsigned length, const UChar* priorContext, int priorContextLength, UErrorCode* status) -{ - if (U_FAILURE(*status)) - return 0; - if (!string || length > static_cast<unsigned>(std::numeric_limits<int32_t>::max())) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - UText* text = utext_setup(&utWithBuffer->text, sizeof(utWithBuffer->buffer), status); - if (U_FAILURE(*status)) { - ASSERT(!text); - return 0; - } - - initializeContextAwareUTextProvider(text, &textLatin1ContextAwareFuncs, string, length, priorContext, priorContextLength); - return text; -} - -} // namespace WebCore diff --git a/Source/WebCore/platform/text/icu/UTextProviderUTF16.cpp b/Source/WebCore/platform/text/icu/UTextProviderUTF16.cpp deleted file mode 100644 index 7aaac48c8..000000000 --- a/Source/WebCore/platform/text/icu/UTextProviderUTF16.cpp +++ /dev/null @@ -1,183 +0,0 @@ -/* - * Copyright (C) 2014 Apple Inc. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, - * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - * THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "config.h" -#include "UTextProviderUTF16.h" - -#include "UTextProvider.h" - -namespace WebCore { - -// UTF16ContextAware provider - -static UText* uTextUTF16ContextAwareClone(UText*, const UText*, UBool, UErrorCode*); -static int64_t uTextUTF16ContextAwareNativeLength(UText*); -static UBool uTextUTF16ContextAwareAccess(UText*, int64_t, UBool); -static int32_t uTextUTF16ContextAwareExtract(UText*, int64_t, int64_t, UChar*, int32_t, UErrorCode*); -static void uTextUTF16ContextAwareClose(UText*); - -static const struct UTextFuncs textUTF16ContextAwareFuncs = { - sizeof(UTextFuncs), - 0, - 0, - 0, - uTextUTF16ContextAwareClone, - uTextUTF16ContextAwareNativeLength, - uTextUTF16ContextAwareAccess, - uTextUTF16ContextAwareExtract, - nullptr, - nullptr, - nullptr, - nullptr, - uTextUTF16ContextAwareClose, - nullptr, - nullptr, - nullptr -}; - -static inline UTextProviderContext textUTF16ContextAwareGetCurrentContext(const UText* text) -{ - if (!text->chunkContents) - return UTextProviderContext::NoContext; - return text->chunkContents == text->p ? UTextProviderContext::PrimaryContext : UTextProviderContext::PriorContext; -} - -static void textUTF16ContextAwareMoveInPrimaryContext(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward) -{ - ASSERT(text->chunkContents == text->p); - ASSERT_UNUSED(forward, forward ? nativeIndex >= text->b : nativeIndex > text->b); - ASSERT_UNUSED(forward, forward ? nativeIndex < nativeLength : nativeIndex <= nativeLength); - text->chunkNativeStart = text->b; - text->chunkNativeLimit = nativeLength; - int64_t length = text->chunkNativeLimit - text->chunkNativeStart; - // Ensure chunk length is well defined if computed length exceeds int32_t range. - ASSERT(length < std::numeric_limits<int32_t>::max()); - text->chunkLength = length < std::numeric_limits<int32_t>::max() ? static_cast<int32_t>(length) : 0; - text->nativeIndexingLimit = text->chunkLength; - int64_t offset = nativeIndex - text->chunkNativeStart; - // Ensure chunk offset is well defined if computed offset exceeds int32_t range or chunk length. - ASSERT(offset < std::numeric_limits<int32_t>::max()); - text->chunkOffset = std::min(offset < std::numeric_limits<int32_t>::max() ? static_cast<int32_t>(offset) : 0, text->chunkLength); -} - -static void textUTF16ContextAwareSwitchToPrimaryContext(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward) -{ - ASSERT(!text->chunkContents || text->chunkContents == text->q); - text->chunkContents = static_cast<const UChar*>(text->p); - textUTF16ContextAwareMoveInPrimaryContext(text, nativeIndex, nativeLength, forward); -} - -static void textUTF16ContextAwareMoveInPriorContext(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward) -{ - ASSERT(text->chunkContents == text->q); - ASSERT(forward ? nativeIndex < text->b : nativeIndex <= text->b); - ASSERT_UNUSED(nativeLength, forward ? nativeIndex < nativeLength : nativeIndex <= nativeLength); - ASSERT_UNUSED(forward, forward ? nativeIndex < nativeLength : nativeIndex <= nativeLength); - text->chunkNativeStart = 0; - text->chunkNativeLimit = text->b; - text->chunkLength = text->b; - text->nativeIndexingLimit = text->chunkLength; - int64_t offset = nativeIndex - text->chunkNativeStart; - // Ensure chunk offset is well defined if computed offset exceeds int32_t range or chunk length. - ASSERT(offset < std::numeric_limits<int32_t>::max()); - text->chunkOffset = std::min(offset < std::numeric_limits<int32_t>::max() ? static_cast<int32_t>(offset) : 0, text->chunkLength); -} - -static void textUTF16ContextAwareSwitchToPriorContext(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward) -{ - ASSERT(!text->chunkContents || text->chunkContents == text->p); - text->chunkContents = static_cast<const UChar*>(text->q); - textUTF16ContextAwareMoveInPriorContext(text, nativeIndex, nativeLength, forward); -} - -static UText* uTextUTF16ContextAwareClone(UText* destination, const UText* source, UBool deep, UErrorCode* status) -{ - return uTextCloneImpl(destination, source, deep, status); -} - -static inline int64_t uTextUTF16ContextAwareNativeLength(UText* text) -{ - return text->a + text->b; -} - -static UBool uTextUTF16ContextAwareAccess(UText* text, int64_t nativeIndex, UBool forward) -{ - if (!text->context) - return FALSE; - int64_t nativeLength = uTextUTF16ContextAwareNativeLength(text); - UBool isAccessible; - if (uTextAccessInChunkOrOutOfRange(text, nativeIndex, nativeLength, forward, isAccessible)) - return isAccessible; - nativeIndex = uTextAccessPinIndex(nativeIndex, nativeLength); - UTextProviderContext currentContext = textUTF16ContextAwareGetCurrentContext(text); - UTextProviderContext newContext = uTextProviderContext(text, nativeIndex, forward); - ASSERT(newContext != UTextProviderContext::NoContext); - if (newContext == currentContext) { - if (currentContext == UTextProviderContext::PrimaryContext) - textUTF16ContextAwareMoveInPrimaryContext(text, nativeIndex, nativeLength, forward); - else - textUTF16ContextAwareMoveInPriorContext(text, nativeIndex, nativeLength, forward); - } else if (newContext == UTextProviderContext::PrimaryContext) - textUTF16ContextAwareSwitchToPrimaryContext(text, nativeIndex, nativeLength, forward); - else { - ASSERT(newContext == UTextProviderContext::PriorContext); - textUTF16ContextAwareSwitchToPriorContext(text, nativeIndex, nativeLength, forward); - } - return TRUE; -} - -static int32_t uTextUTF16ContextAwareExtract(UText*, int64_t, int64_t, UChar*, int32_t, UErrorCode* errorCode) -{ - // In the present context, this text provider is used only with ICU functions - // that do not perform an extract operation. - ASSERT_NOT_REACHED(); - *errorCode = U_UNSUPPORTED_ERROR; - return 0; -} - -static void uTextUTF16ContextAwareClose(UText* text) -{ - text->context = nullptr; -} - -UText* openUTF16ContextAwareUTextProvider(UText* text, const UChar* string, unsigned length, const UChar* priorContext, int priorContextLength, UErrorCode* status) -{ - if (U_FAILURE(*status)) - return 0; - if (!string || length > static_cast<unsigned>(std::numeric_limits<int32_t>::max())) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - text = utext_setup(text, 0, status); - if (U_FAILURE(*status)) { - ASSERT(!text); - return 0; - } - - initializeContextAwareUTextProvider(text, &textUTF16ContextAwareFuncs, string, length, priorContext, priorContextLength); - return text; -} - -} // namespace WebCore diff --git a/Source/WebCore/platform/text/mac/make-charset-table.pl b/Source/WebCore/platform/text/mac/make-charset-table.pl deleted file mode 100755 index 16fd25ab1..000000000 --- a/Source/WebCore/platform/text/mac/make-charset-table.pl +++ /dev/null @@ -1,225 +0,0 @@ -#!/usr/bin/perl -w - -# Copyright (C) 2003, 2004, 2005, 2006 Apple Computer, Inc. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of -# its contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY -# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -use strict; - -my %aliasesFromCharsetsFile; -my %namesWritten; - -my $output = ""; - -my $error = 0; - -sub error ($) -{ - print STDERR @_, "\n"; - $error = 1; -} - -sub emit_line -{ - my ($name, $prefix, $encoding, $flags) = @_; - - error "$name shows up twice in output" if $namesWritten{$name}; - $namesWritten{$name} = 1; - - $output .= " { \"$name\", $prefix$encoding },\n"; -} - -sub process_platform_encodings -{ - my ($filename, $PlatformPrefix) = @_; - my $baseFilename = $filename; - $baseFilename =~ s|.*/||; - - my %seenPlatformNames; - my %seenIANANames; - - open PLATFORM_ENCODINGS, $filename or die; - - while (<PLATFORM_ENCODINGS>) { - chomp; - s/\#.*$//; - s/\s+$//; - if (my ($PlatformName, undef, $flags, $IANANames) = /^(.+?)(, (.+))?: (.+)$/) { - my %aliases; - - my $PlatformNameWithFlags = $PlatformName; - if ($flags) { - $PlatformNameWithFlags .= ", " . $flags; - } else { - $flags = "NoEncodingFlags"; - } - error "Platform encoding name $PlatformName is mentioned twice in $baseFilename" if $seenPlatformNames{$PlatformNameWithFlags}; - $seenPlatformNames{$PlatformNameWithFlags} = 1; - - # Build the aliases list. - # Also check that no two names are part of the same entry in the charsets file. - my @IANANames = split ", ", $IANANames; - my $firstName = ""; - my $canonicalFirstName = ""; - my $prevName = ""; - for my $name (@IANANames) { - if ($firstName eq "") { - if ($name !~ /^[-A-Za-z0-9_]+$/) { - error "$name, in $baseFilename, has illegal characters in it"; - next; - } - $firstName = $name; - } else { - if ($name !~ /^[a-z0-9]+$/) { - error "$name, in $baseFilename, has illegal characters in it (must be all lowercase alphanumeric)"; - next; - } - if ($name le $prevName) { - error "$name comes after $prevName in $baseFilename, but everything must be in alphabetical order"; - } - $prevName = $name; - } - - my $canonicalName = lc $name; - $canonicalName =~ tr/-_//d; - - $canonicalFirstName = $canonicalName if $canonicalFirstName eq ""; - - error "$name is mentioned twice in $baseFilename" if $seenIANANames{$canonicalName}; - $seenIANANames{$canonicalName} = 1; - - $aliases{$canonicalName} = 1; - next if !$aliasesFromCharsetsFile{$canonicalName}; - for my $alias (@{$aliasesFromCharsetsFile{$canonicalName}}) { - $aliases{$alias} = 1; - } - for my $otherName (@IANANames) { - next if $canonicalName eq $otherName; - if ($aliasesFromCharsetsFile{$otherName} - && $aliasesFromCharsetsFile{$canonicalName} eq $aliasesFromCharsetsFile{$otherName} - && $canonicalName le $otherName) { - error "$baseFilename lists both $name and $otherName under $PlatformName, but that aliasing is already specified in character-sets.txt"; - } - } - } - - # write out - emit_line($firstName, $PlatformPrefix, $PlatformName, $flags); - for my $alias (sort keys %aliases) { - emit_line($alias, $PlatformPrefix, $PlatformName, $flags) if $alias ne $canonicalFirstName; - } - } elsif (/^([a-zA-Z0-9_]+)(, (.+))?$/) { - my $PlatformName = $1; - - error "Platform encoding name $PlatformName is mentioned twice in $baseFilename" if $seenPlatformNames{$PlatformName}; - $seenPlatformNames{$PlatformName} = 1; - } elsif (/./) { - error "syntax error in $baseFilename, line $."; - } - } - - close PLATFORM_ENCODINGS; -} - -sub process_iana_charset -{ - my ($canonical_name, @aliases) = @_; - - return if !$canonical_name; - - my @names = sort $canonical_name, @aliases; - - for my $name (@names) { - $aliasesFromCharsetsFile{$name} = \@names; - } -} - -sub process_iana_charsets -{ - my ($filename) = @_; - - open CHARSETS, $filename or die; - - my %seen; - - my $canonical_name; - my @aliases; - - my %exceptions = ( isoir91 => 1, isoir92 => 1 ); - - while (<CHARSETS>) { - chomp; - if ((my $new_canonical_name) = /Name: ([^ \t]*).*/) { - $new_canonical_name = lc $new_canonical_name; - $new_canonical_name =~ tr/a-z0-9//cd; - - error "saw $new_canonical_name twice in character-sets.txt", if $seen{$new_canonical_name}; - $seen{$new_canonical_name} = $new_canonical_name; - - process_iana_charset $canonical_name, @aliases; - - $canonical_name = $new_canonical_name; - @aliases = (); - } elsif ((my $new_alias) = /Alias: ([^ \t]*).*/) { - $new_alias = lc $new_alias; - $new_alias =~ tr/a-z0-9//cd; - - # do this after normalizing the alias, sometimes character-sets.txt - # has weird escape characters, e.g. \b after None - next if $new_alias eq "none"; - - error "saw $new_alias twice in character-sets.txt $seen{$new_alias}, $canonical_name", if $seen{$new_alias} && $seen{$new_alias} ne $canonical_name && !$exceptions{$new_alias}; - push @aliases, $new_alias if !$seen{$new_alias}; - $seen{$new_alias} = $canonical_name; - } - } - - process_iana_charset $canonical_name, @aliases; - - close CHARSETS; -} - -# Program body - -process_iana_charsets($ARGV[0]); -process_platform_encodings($ARGV[1], $ARGV[2]); - -exit 1 if $error; - -print <<EOF -// File generated by make-charset-table.pl. Do not edit! - -#include "config.h" -#include "CharsetData.h" - -namespace WebCore { - - const CharsetEntry CharsetTable[] = { -$output - { 0, 0 } - }; - -} -EOF |