summaryrefslogtreecommitdiff
path: root/Source/WebCore/platform/text
diff options
context:
space:
mode:
authorLorry Tar Creator <lorry-tar-importer@lorry>2017-06-27 06:07:23 +0000
committerLorry Tar Creator <lorry-tar-importer@lorry>2017-06-27 06:07:23 +0000
commit1bf1084f2b10c3b47fd1a588d85d21ed0eb41d0c (patch)
tree46dcd36c86e7fbc6e5df36deb463b33e9967a6f7 /Source/WebCore/platform/text
parent32761a6cee1d0dee366b885b7b9c777e67885688 (diff)
downloadWebKitGtk-tarball-master.tar.gz
Diffstat (limited to 'Source/WebCore/platform/text')
-rw-r--r--Source/WebCore/platform/text/BidiContext.cpp56
-rw-r--r--Source/WebCore/platform/text/BidiContext.h32
-rw-r--r--Source/WebCore/platform/text/BidiResolver.h316
-rw-r--r--Source/WebCore/platform/text/BidiRunList.h198
-rw-r--r--Source/WebCore/platform/text/CharacterProperties.h96
-rw-r--r--Source/WebCore/platform/text/DateTimeFormat.cpp12
-rw-r--r--Source/WebCore/platform/text/DecodeEscapeSequences.h80
-rw-r--r--Source/WebCore/platform/text/Hyphenation.cpp7
-rw-r--r--Source/WebCore/platform/text/Hyphenation.h11
-rw-r--r--Source/WebCore/platform/text/LineBreakIteratorPoolICU.h111
-rw-r--r--Source/WebCore/platform/text/LineEnding.cpp40
-rw-r--r--Source/WebCore/platform/text/LineEnding.h8
-rw-r--r--Source/WebCore/platform/text/LocaleICU.cpp376
-rw-r--r--Source/WebCore/platform/text/LocaleICU.h110
-rw-r--r--Source/WebCore/platform/text/LocaleNone.cpp31
-rw-r--r--Source/WebCore/platform/text/LocaleToScriptMapping.h4
-rw-r--r--Source/WebCore/platform/text/LocaleToScriptMappingDefault.cpp53
-rw-r--r--Source/WebCore/platform/text/LocaleToScriptMappingICU.cpp88
-rw-r--r--Source/WebCore/platform/text/NonCJKGlyphOrientation.h35
-rw-r--r--Source/WebCore/platform/text/PlatformLocale.cpp8
-rw-r--r--Source/WebCore/platform/text/PlatformLocale.h17
-rw-r--r--Source/WebCore/platform/text/SegmentedString.cpp401
-rw-r--r--Source/WebCore/platform/text/SegmentedString.h567
-rw-r--r--Source/WebCore/platform/text/StringWithDirection.h54
-rw-r--r--Source/WebCore/platform/text/SuffixTree.h2
-rw-r--r--Source/WebCore/platform/text/TextAllInOne.cpp (renamed from Source/WebCore/platform/text/TextDirection.h)32
-rw-r--r--Source/WebCore/platform/text/TextBoundaries.cpp70
-rw-r--r--Source/WebCore/platform/text/TextBoundaries.h19
-rw-r--r--Source/WebCore/platform/text/TextBreakIterator.cpp400
-rw-r--r--Source/WebCore/platform/text/TextBreakIterator.h191
-rw-r--r--Source/WebCore/platform/text/TextBreakIteratorInternalICU.h34
-rw-r--r--Source/WebCore/platform/text/TextCheckerClient.h24
-rw-r--r--Source/WebCore/platform/text/TextCodec.cpp6
-rw-r--r--Source/WebCore/platform/text/TextCodec.h11
-rw-r--r--Source/WebCore/platform/text/TextCodecASCIIFastPath.h4
-rw-r--r--Source/WebCore/platform/text/TextCodecICU.cpp315
-rw-r--r--Source/WebCore/platform/text/TextCodecICU.h15
-rw-r--r--Source/WebCore/platform/text/TextCodecLatin1.cpp66
-rw-r--r--Source/WebCore/platform/text/TextCodecLatin1.h10
-rw-r--r--Source/WebCore/platform/text/TextCodecReplacement.cpp (renamed from Source/WebCore/platform/text/icu/UTextProviderLatin1.h)53
-rw-r--r--Source/WebCore/platform/text/TextCodecReplacement.h (renamed from Source/WebCore/platform/text/icu/UTextProviderUTF16.h)27
-rw-r--r--Source/WebCore/platform/text/TextCodecUTF16.cpp15
-rw-r--r--Source/WebCore/platform/text/TextCodecUTF16.h10
-rw-r--r--Source/WebCore/platform/text/TextCodecUTF8.cpp16
-rw-r--r--Source/WebCore/platform/text/TextCodecUTF8.h17
-rw-r--r--Source/WebCore/platform/text/TextCodecUserDefined.cpp9
-rw-r--r--Source/WebCore/platform/text/TextCodecUserDefined.h8
-rw-r--r--Source/WebCore/platform/text/TextEncoding.cpp28
-rw-r--r--Source/WebCore/platform/text/TextEncoding.h102
-rw-r--r--Source/WebCore/platform/text/TextEncodingDetectorICU.cpp117
-rw-r--r--Source/WebCore/platform/text/TextEncodingDetectorNone.cpp44
-rw-r--r--Source/WebCore/platform/text/TextEncodingRegistry.cpp83
-rw-r--r--Source/WebCore/platform/text/TextEncodingRegistry.h12
-rw-r--r--Source/WebCore/platform/text/TextFlags.h408
-rw-r--r--Source/WebCore/platform/text/TextStream.cpp75
-rw-r--r--Source/WebCore/platform/text/TextStream.h131
-rw-r--r--Source/WebCore/platform/text/UnicodeBidi.h4
-rw-r--r--Source/WebCore/platform/text/WritingMode.h124
-rw-r--r--Source/WebCore/platform/text/enchant/TextCheckerEnchant.cpp47
-rw-r--r--Source/WebCore/platform/text/enchant/TextCheckerEnchant.h4
-rw-r--r--Source/WebCore/platform/text/gtk/TextBreakIteratorInternalICUGtk.cpp37
-rw-r--r--Source/WebCore/platform/text/hyphen/HyphenationLibHyphen.cpp294
-rw-r--r--Source/WebCore/platform/text/icu/UTextProvider.cpp71
-rw-r--r--Source/WebCore/platform/text/icu/UTextProvider.h112
-rw-r--r--Source/WebCore/platform/text/icu/UTextProviderLatin1.cpp394
-rw-r--r--Source/WebCore/platform/text/icu/UTextProviderUTF16.cpp183
-rwxr-xr-xSource/WebCore/platform/text/mac/make-charset-table.pl225
67 files changed, 3128 insertions, 3432 deletions
diff --git a/Source/WebCore/platform/text/BidiContext.cpp b/Source/WebCore/platform/text/BidiContext.cpp
index db126c991..c9f0f813b 100644
--- a/Source/WebCore/platform/text/BidiContext.cpp
+++ b/Source/WebCore/platform/text/BidiContext.cpp
@@ -1,6 +1,6 @@
/*
* Copyright (C) 2000 Lars Knoll (knoll@kde.org)
- * Copyright (C) 2003, 2004, 2006, 2007, 2009, 2010 Apple Inc. All right reserved.
+ * Copyright (C) 2003-2017 Apple Inc. All right reserved.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
@@ -21,6 +21,7 @@
#include "config.h"
#include "BidiContext.h"
+
#include <wtf/Vector.h>
namespace WebCore {
@@ -32,12 +33,21 @@ struct SameSizeAsBidiContext : public RefCounted<SameSizeAsBidiContext> {
COMPILE_ASSERT(sizeof(BidiContext) == sizeof(SameSizeAsBidiContext), BidiContext_should_stay_small);
-inline PassRefPtr<BidiContext> BidiContext::createUncached(unsigned char level, UCharDirection direction, bool override, BidiEmbeddingSource source, BidiContext* parent)
+inline BidiContext::BidiContext(unsigned char level, UCharDirection direction, bool override, BidiEmbeddingSource source, BidiContext* parent)
+ : m_level(level)
+ , m_direction(direction)
+ , m_override(override)
+ , m_source(source)
+ , m_parent(parent)
+{
+}
+
+inline Ref<BidiContext> BidiContext::createUncached(unsigned char level, UCharDirection direction, bool override, BidiEmbeddingSource source, BidiContext* parent)
{
- return adoptRef(new BidiContext(level, direction, override, source, parent));
+ return adoptRef(*new BidiContext(level, direction, override, source, parent));
}
-PassRefPtr<BidiContext> BidiContext::create(unsigned char level, UCharDirection direction, bool override, BidiEmbeddingSource source, BidiContext* parent)
+Ref<BidiContext> BidiContext::create(unsigned char level, UCharDirection direction, bool override, BidiEmbeddingSource source, BidiContext* parent)
{
ASSERT(direction == (level % 2 ? U_RIGHT_TO_LEFT : U_LEFT_TO_RIGHT));
@@ -47,52 +57,48 @@ PassRefPtr<BidiContext> BidiContext::create(unsigned char level, UCharDirection
ASSERT(level <= 1);
if (!level) {
if (!override) {
- static BidiContext* ltrContext = createUncached(0, U_LEFT_TO_RIGHT, false, FromStyleOrDOM, 0).leakRef();
+ static BidiContext& ltrContext = createUncached(0, U_LEFT_TO_RIGHT, false, FromStyleOrDOM, 0).leakRef();
return ltrContext;
}
- static BidiContext* ltrOverrideContext = createUncached(0, U_LEFT_TO_RIGHT, true, FromStyleOrDOM, 0).leakRef();
+ static BidiContext& ltrOverrideContext = createUncached(0, U_LEFT_TO_RIGHT, true, FromStyleOrDOM, 0).leakRef();
return ltrOverrideContext;
}
if (!override) {
- static BidiContext* rtlContext = createUncached(1, U_RIGHT_TO_LEFT, false, FromStyleOrDOM, 0).leakRef();
+ static BidiContext& rtlContext = createUncached(1, U_RIGHT_TO_LEFT, false, FromStyleOrDOM, 0).leakRef();
return rtlContext;
}
- static BidiContext* rtlOverrideContext = createUncached(1, U_RIGHT_TO_LEFT, true, FromStyleOrDOM, 0).leakRef();
+ static BidiContext& rtlOverrideContext = createUncached(1, U_RIGHT_TO_LEFT, true, FromStyleOrDOM, 0).leakRef();
return rtlOverrideContext;
}
-static inline PassRefPtr<BidiContext> copyContextAndRebaselineLevel(BidiContext* context, BidiContext* parent)
+static inline Ref<BidiContext> copyContextAndRebaselineLevel(BidiContext& context, BidiContext* parent)
{
- ASSERT(context);
- unsigned char newLevel = parent ? parent->level() : 0;
- if (context->dir() == U_RIGHT_TO_LEFT)
+ auto newLevel = parent ? parent->level() : 0;
+ if (context.dir() == U_RIGHT_TO_LEFT)
newLevel = nextGreaterOddLevel(newLevel);
else if (parent)
newLevel = nextGreaterEvenLevel(newLevel);
-
- return BidiContext::create(newLevel, context->dir(), context->override(), context->source(), parent);
+ return BidiContext::create(newLevel, context.dir(), context.override(), context.source(), parent);
}
// The BidiContext stack must be immutable -- they're re-used for re-layout after
-// DOM modification/editing -- so we copy all the non-unicode contexts, and
+// DOM modification/editing -- so we copy all the non-Unicode contexts, and
// recalculate their levels.
-PassRefPtr<BidiContext> BidiContext::copyStackRemovingUnicodeEmbeddingContexts()
+Ref<BidiContext> BidiContext::copyStackRemovingUnicodeEmbeddingContexts()
{
Vector<BidiContext*, 64> contexts;
- for (BidiContext* iter = this; iter; iter = iter->parent()) {
- if (iter->source() != FromUnicode)
- contexts.append(iter);
+ for (auto* ancestor = this; ancestor; ancestor = ancestor->parent()) {
+ if (ancestor->source() != FromUnicode)
+ contexts.append(ancestor);
}
ASSERT(contexts.size());
-
- RefPtr<BidiContext> topContext = copyContextAndRebaselineLevel(contexts.last(), 0);
- for (int i = contexts.size() - 1; i > 0; --i)
- topContext = copyContextAndRebaselineLevel(contexts[i - 1], topContext.get());
-
- return topContext.release();
+ auto topContext = copyContextAndRebaselineLevel(*contexts.last(), nullptr);
+ for (unsigned i = contexts.size() - 1; i; --i)
+ topContext = copyContextAndRebaselineLevel(*contexts[i - 1], topContext.ptr());
+ return topContext;
}
bool operator==(const BidiContext& c1, const BidiContext& c2)
diff --git a/Source/WebCore/platform/text/BidiContext.h b/Source/WebCore/platform/text/BidiContext.h
index 371983055..d4c4e5708 100644
--- a/Source/WebCore/platform/text/BidiContext.h
+++ b/Source/WebCore/platform/text/BidiContext.h
@@ -1,6 +1,6 @@
/*
* Copyright (C) 2000 Lars Knoll (knoll@kde.org)
- * Copyright (C) 2003, 2004, 2006, 2007, 2009, 2010 Apple Inc. All right reserved.
+ * Copyright (C) 2003-2017 Apple Inc. All right reserved.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
@@ -19,26 +19,20 @@
*
*/
-#ifndef BidiContext_h
-#define BidiContext_h
+#pragma once
-#include <wtf/Assertions.h>
-#include <wtf/PassRefPtr.h>
+#include <unicode/uchar.h>
#include <wtf/RefCounted.h>
#include <wtf/RefPtr.h>
-#include <wtf/unicode/Unicode.h>
namespace WebCore {
-enum BidiEmbeddingSource {
- FromStyleOrDOM,
- FromUnicode
-};
+enum BidiEmbeddingSource { FromStyleOrDOM, FromUnicode };
// Used to keep track of explicit embeddings.
class BidiContext : public RefCounted<BidiContext> {
public:
- static PassRefPtr<BidiContext> create(unsigned char level, UCharDirection, bool override = false, BidiEmbeddingSource = FromStyleOrDOM, BidiContext* parent = 0);
+ WEBCORE_EXPORT static Ref<BidiContext> create(unsigned char level, UCharDirection, bool override = false, BidiEmbeddingSource = FromStyleOrDOM, BidiContext* parent = nullptr);
BidiContext* parent() const { return m_parent.get(); }
unsigned char level() const { return m_level; }
@@ -46,18 +40,12 @@ public:
bool override() const { return m_override; }
BidiEmbeddingSource source() const { return static_cast<BidiEmbeddingSource>(m_source); }
- PassRefPtr<BidiContext> copyStackRemovingUnicodeEmbeddingContexts();
+ WEBCORE_EXPORT Ref<BidiContext> copyStackRemovingUnicodeEmbeddingContexts();
+
private:
- BidiContext(unsigned char level, UCharDirection direction, bool override, BidiEmbeddingSource source, BidiContext* parent)
- : m_level(level)
- , m_direction(direction)
- , m_override(override)
- , m_source(source)
- , m_parent(parent)
- {
- }
+ BidiContext(unsigned char level, UCharDirection, bool override, BidiEmbeddingSource, BidiContext* parent);
- static PassRefPtr<BidiContext> createUncached(unsigned char level, UCharDirection, bool override, BidiEmbeddingSource, BidiContext* parent);
+ static Ref<BidiContext> createUncached(unsigned char level, UCharDirection, bool override, BidiEmbeddingSource, BidiContext* parent);
unsigned m_level : 6; // The maximium bidi level is 62: http://unicode.org/reports/tr9/#Explicit_Levels_and_Directions
unsigned m_direction : 5; // Direction
@@ -79,5 +67,3 @@ inline unsigned char nextGreaterEvenLevel(unsigned char level)
bool operator==(const BidiContext&, const BidiContext&);
} // namespace WebCore
-
-#endif // BidiContext_h
diff --git a/Source/WebCore/platform/text/BidiResolver.h b/Source/WebCore/platform/text/BidiResolver.h
index 9b13941c7..1b13ef3b7 100644
--- a/Source/WebCore/platform/text/BidiResolver.h
+++ b/Source/WebCore/platform/text/BidiResolver.h
@@ -1,6 +1,6 @@
/*
* Copyright (C) 2000 Lars Knoll (knoll@kde.org)
- * Copyright (C) 2003, 2004, 2006, 2007, 2008 Apple Inc. All right reserved.
+ * Copyright (C) 2003-2017 Apple Inc. All rights reserved.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
@@ -19,127 +19,103 @@
*
*/
-#ifndef BidiResolver_h
-#define BidiResolver_h
+#pragma once
#include "BidiContext.h"
#include "BidiRunList.h"
-#include "TextDirection.h"
+#include "WritingMode.h"
+#include <wtf/HashMap.h>
#include <wtf/Noncopyable.h>
-#include <wtf/PassRefPtr.h>
#include <wtf/Vector.h>
namespace WebCore {
class RenderObject;
-template <class Iterator> class MidpointState {
+template<typename Iterator> class WhitespaceCollapsingState {
public:
- MidpointState()
- {
- reset();
- }
-
void reset()
{
- m_numMidpoints = 0;
- m_currentMidpoint = 0;
- m_betweenMidpoints = false;
+ m_transitions.clear();
+ m_currentTransition = 0;
}
- void startIgnoringSpaces(const Iterator& midpoint)
+ void startIgnoringSpaces(const Iterator& transition)
{
- ASSERT(!(m_numMidpoints % 2));
- addMidpoint(midpoint);
+ ASSERT(!(m_transitions.size() % 2));
+ m_transitions.append(transition);
}
- void stopIgnoringSpaces(const Iterator& midpoint)
+ void stopIgnoringSpaces(const Iterator& transition)
{
- ASSERT(m_numMidpoints % 2);
- addMidpoint(midpoint);
+ ASSERT(m_transitions.size() % 2);
+ m_transitions.append(transition);
}
// When ignoring spaces, this needs to be called for objects that need line boxes such as RenderInlines or
// hard line breaks to ensure that they're not ignored.
- void ensureLineBoxInsideIgnoredSpaces(RenderObject* renderer)
+ void ensureLineBoxInsideIgnoredSpaces(RenderObject& renderer)
{
- Iterator midpoint(0, renderer, 0);
- stopIgnoringSpaces(midpoint);
- startIgnoringSpaces(midpoint);
+ Iterator transition(0, &renderer, 0);
+ stopIgnoringSpaces(transition);
+ startIgnoringSpaces(transition);
}
- Vector<Iterator>& midpoints() { return m_midpoints; }
- const unsigned& numMidpoints() const { return m_numMidpoints; }
- const unsigned& currentMidpoint() const { return m_currentMidpoint; }
- void incrementCurrentMidpoint() { ++m_currentMidpoint; }
- void decreaseNumMidpoints() { --m_numMidpoints; }
- const bool& betweenMidpoints() const { return m_betweenMidpoints; }
- void setBetweenMidpoints(bool betweenMidpoint) { m_betweenMidpoints = betweenMidpoint; }
-private:
- // The goal is to reuse the line state across multiple
- // lines so we just keep an array around for midpoints and never clear it across multiple
- // lines. We track the number of items and position using the two other variables.
- Vector<Iterator> m_midpoints;
- unsigned m_numMidpoints;
- unsigned m_currentMidpoint;
- bool m_betweenMidpoints;
-
- void addMidpoint(const Iterator& midpoint)
+ void decrementTransitionAt(size_t index)
{
- if (m_midpoints.size() <= m_numMidpoints)
- m_midpoints.grow(m_numMidpoints + 10);
-
- Iterator* midpointsIterator = m_midpoints.data();
- midpointsIterator[m_numMidpoints++] = midpoint;
+ m_transitions[index].fastDecrement();
}
+
+ const Vector<Iterator>& transitions() { return m_transitions; }
+ size_t numTransitions() const { return m_transitions.size(); }
+ size_t currentTransition() const { return m_currentTransition; }
+ void setCurrentTransition(size_t currentTransition) { m_currentTransition = currentTransition; }
+ void incrementCurrentTransition() { ++m_currentTransition; }
+ void decrementNumTransitions() { m_transitions.shrink(m_transitions.size() - 1); }
+ bool betweenTransitions() const { return m_currentTransition % 2; }
+private:
+ Vector<Iterator> m_transitions;
+ size_t m_currentTransition { 0 };
};
// The BidiStatus at a given position (typically the end of a line) can
// be cached and then used to restart bidi resolution at that position.
struct BidiStatus {
- BidiStatus()
- : eor(U_OTHER_NEUTRAL)
- , lastStrong(U_OTHER_NEUTRAL)
- , last(U_OTHER_NEUTRAL)
- {
- }
+ BidiStatus() = default;
// Creates a BidiStatus representing a new paragraph root with a default direction.
// Uses TextDirection as it only has two possibilities instead of UCharDirection which has at least 19.
- BidiStatus(TextDirection textDirection, bool isOverride)
+ BidiStatus(TextDirection direction, bool isOverride)
+ : eor(direction == LTR ? U_LEFT_TO_RIGHT : U_RIGHT_TO_LEFT)
+ , lastStrong(eor)
+ , last(eor)
+ , context(BidiContext::create(direction == LTR ? 0 : 1, eor, isOverride))
{
- UCharDirection direction = textDirection == LTR ? U_LEFT_TO_RIGHT : U_RIGHT_TO_LEFT;
- eor = lastStrong = last = direction;
- context = BidiContext::create(textDirection == LTR ? 0 : 1, direction, isOverride);
}
- BidiStatus(UCharDirection eorDir, UCharDirection lastStrongDir, UCharDirection lastDir, PassRefPtr<BidiContext> bidiContext)
- : eor(eorDir)
- , lastStrong(lastStrongDir)
- , last(lastDir)
- , context(bidiContext)
+ BidiStatus(UCharDirection eor, UCharDirection lastStrong, UCharDirection last, RefPtr<BidiContext>&& context)
+ : eor(eor)
+ , lastStrong(lastStrong)
+ , last(last)
+ , context(WTFMove(context))
{
}
- UCharDirection eor;
- UCharDirection lastStrong;
- UCharDirection last;
+ UCharDirection eor { U_OTHER_NEUTRAL };
+ UCharDirection lastStrong { U_OTHER_NEUTRAL };
+ UCharDirection last { U_OTHER_NEUTRAL };
RefPtr<BidiContext> context;
};
-class BidiEmbedding {
-public:
+struct BidiEmbedding {
BidiEmbedding(UCharDirection direction, BidiEmbeddingSource source)
- : m_direction(direction)
- , m_source(source)
+ : direction(direction)
+ , source(source)
{
}
- UCharDirection direction() const { return m_direction; }
- BidiEmbeddingSource source() const { return m_source; }
-private:
- UCharDirection m_direction;
- BidiEmbeddingSource m_source;
+ UCharDirection direction;
+ BidiEmbeddingSource source;
};
inline bool operator==(const BidiStatus& status1, const BidiStatus& status2)
@@ -153,11 +129,12 @@ inline bool operator!=(const BidiStatus& status1, const BidiStatus& status2)
}
struct BidiCharacterRun {
- BidiCharacterRun(int start, int stop, BidiContext* context, UCharDirection direction)
- : m_override(context->override())
- , m_next(0)
- , m_start(start)
+ WTF_MAKE_FAST_ALLOCATED;
+public:
+ BidiCharacterRun(unsigned start, unsigned stop, BidiContext* context, UCharDirection direction)
+ : m_start(start)
, m_stop(stop)
+ , m_override(context->override())
{
if (direction == U_OTHER_NEUTRAL)
direction = context->dir();
@@ -176,25 +153,33 @@ struct BidiCharacterRun {
}
}
- int start() const { return m_start; }
- int stop() const { return m_stop; }
+ ~BidiCharacterRun()
+ {
+ // Delete the linked list in a loop to prevent destructor recursion.
+ auto next = WTFMove(m_next);
+ while (next)
+ next = WTFMove(next->m_next);
+ }
+
+ unsigned start() const { return m_start; }
+ unsigned stop() const { return m_stop; }
unsigned char level() const { return m_level; }
bool reversed(bool visuallyOrdered) { return m_level % 2 && !visuallyOrdered; }
bool dirOverride(bool visuallyOrdered) { return m_override || visuallyOrdered; }
- BidiCharacterRun* next() const { return m_next; }
- void setNext(BidiCharacterRun* next) { m_next = next; }
+ BidiCharacterRun* next() const { return m_next.get(); }
+ std::unique_ptr<BidiCharacterRun> takeNext() { return WTFMove(m_next); }
+ void setNext(std::unique_ptr<BidiCharacterRun>&& next) { m_next = WTFMove(next); }
+
+private:
+ std::unique_ptr<BidiCharacterRun> m_next;
- // Do not add anything apart from bitfields until after m_next. See https://bugs.webkit.org/show_bug.cgi?id=100173
+public:
+ unsigned m_start;
+ unsigned m_stop;
+ unsigned char m_level;
bool m_override : 1;
bool m_hasHyphen : 1; // Used by BidiRun subclass which is a layering violation but enables us to save 8 bytes per object on 64-bit.
-#if ENABLE(CSS_SHAPES)
- bool m_startsSegment : 1; // Same comment as m_hasHyphen.
-#endif
- unsigned char m_level;
- BidiCharacterRun* m_next;
- int m_start;
- int m_stop;
};
enum VisualDirectionOverride {
@@ -205,21 +190,9 @@ enum VisualDirectionOverride {
// BidiResolver is WebKit's implementation of the Unicode Bidi Algorithm
// http://unicode.org/reports/tr9
-template <class Iterator, class Run> class BidiResolver {
- WTF_MAKE_NONCOPYABLE(BidiResolver);
+template<typename Iterator, typename Run, typename DerivedClass> class BidiResolverBase {
+ WTF_MAKE_NONCOPYABLE(BidiResolverBase);
public:
- BidiResolver()
- : m_direction(U_OTHER_NEUTRAL)
- , m_reachedEndOfLine(false)
- , m_emptyRun(true)
- , m_nestedIsolateCount(0)
- {
- }
-
-#ifndef NDEBUG
- ~BidiResolver();
-#endif
-
const Iterator& position() const { return m_current; }
void setPositionIgnoringNestedIsolates(const Iterator& position) { m_current = position; }
void setPosition(const Iterator& position, unsigned nestedIsolatedCount)
@@ -228,22 +201,22 @@ public:
m_nestedIsolateCount = nestedIsolatedCount;
}
- void increment() { m_current.increment(); }
+ void increment() { static_cast<DerivedClass&>(*this).incrementInternal(); }
BidiContext* context() const { return m_status.context.get(); }
- void setContext(PassRefPtr<BidiContext> c) { m_status.context = c; }
+ void setContext(RefPtr<BidiContext>&& context) { m_status.context = WTFMove(context); }
void setLastDir(UCharDirection lastDir) { m_status.last = lastDir; }
void setLastStrongDir(UCharDirection lastStrongDir) { m_status.lastStrong = lastStrongDir; }
void setEorDir(UCharDirection eorDir) { m_status.eor = eorDir; }
UCharDirection dir() const { return m_direction; }
- void setDir(UCharDirection d) { m_direction = d; }
+ void setDir(UCharDirection direction) { m_direction = direction; }
const BidiStatus& status() const { return m_status; }
- void setStatus(const BidiStatus s) { m_status = s; }
+ void setStatus(BidiStatus status) { m_status = status; }
- MidpointState<Iterator>& midpointState() { return m_midpointState; }
+ WhitespaceCollapsingState<Iterator>& whitespaceCollapsingState() { return m_whitespaceCollapsingState; }
// The current algorithm handles nested isolates one layer of nesting at a time.
// But when we layout each isolated span, we will walk into (and ignore) all
@@ -263,12 +236,15 @@ public:
// It's unclear if this is still needed.
void markCurrentRunEmpty() { m_emptyRun = true; }
- Vector<Run*>& isolatedRuns() { return m_isolatedRuns; }
+ void setWhitespaceCollapsingTransitionForIsolatedRun(Run&, size_t);
+ unsigned whitespaceCollapsingTransitionForIsolatedRun(Run&);
protected:
+ BidiResolverBase() = default;
+
// FIXME: Instead of InlineBidiResolvers subclassing this method, we should
// pass in some sort of Traits object which knows how to create runs for appending.
- void appendRun();
+ void appendRun() { static_cast<DerivedClass&>(*this).appendRunInternal(); }
Iterator m_current;
// sor and eor are "start of run" and "end of run" respectively and correpond
@@ -277,20 +253,20 @@ protected:
Iterator m_eor; // Points to the last character in the current run.
Iterator m_last;
BidiStatus m_status;
- UCharDirection m_direction;
+ UCharDirection m_direction { U_OTHER_NEUTRAL };
Iterator endOfLine;
- bool m_reachedEndOfLine;
+ bool m_reachedEndOfLine { false };
Iterator m_lastBeforeET; // Before a U_EUROPEAN_NUMBER_TERMINATOR
- bool m_emptyRun;
+ bool m_emptyRun { true };
// FIXME: This should not belong to the resolver, but rather be passed
// into createBidiRunsForLine by the caller.
BidiRunList<Run> m_runs;
- MidpointState<Iterator> m_midpointState;
+ WhitespaceCollapsingState<Iterator> m_whitespaceCollapsingState;
- unsigned m_nestedIsolateCount;
- Vector<Run*> m_isolatedRuns;
+ unsigned m_nestedIsolateCount { 0 };
+ HashMap<Run*, unsigned> m_whitespaceCollapsingTransitionForIsolatedRun;
private:
void raiseExplicitEmbeddingLevel(UCharDirection from, UCharDirection to);
@@ -299,21 +275,38 @@ private:
void updateStatusLastFromCurrentDirection(UCharDirection);
void reorderRunsFromLevels();
+ void incrementInternal() { m_current.increment(); }
+ void appendRunInternal();
Vector<BidiEmbedding, 8> m_currentExplicitEmbeddingSequence;
};
-#ifndef NDEBUG
-template <class Iterator, class Run>
-BidiResolver<Iterator, Run>::~BidiResolver()
+template<typename Iterator, typename Run>
+class BidiResolver : public BidiResolverBase<Iterator, Run, BidiResolver<Iterator, Run>> {
+};
+
+template<typename Iterator, typename Run, typename IsolateRun>
+class BidiResolverWithIsolate : public BidiResolverBase<Iterator, Run, BidiResolverWithIsolate<Iterator, Run, IsolateRun>> {
+public:
+ ~BidiResolverWithIsolate();
+
+ void incrementInternal();
+ void appendRunInternal();
+ Vector<IsolateRun>& isolatedRuns() { return m_isolatedRuns; }
+
+private:
+ Vector<IsolateRun> m_isolatedRuns;
+};
+
+template<typename Iterator, typename Run, typename IsolateRun>
+inline BidiResolverWithIsolate<Iterator, Run, IsolateRun>::~BidiResolverWithIsolate()
{
// The owner of this resolver should have handled the isolated runs.
ASSERT(m_isolatedRuns.isEmpty());
}
-#endif
-template <class Iterator, class Run>
-void BidiResolver<Iterator, Run>::appendRun()
+template<typename Iterator, typename Run, typename DerivedClass>
+void BidiResolverBase<Iterator, Run, DerivedClass>::appendRunInternal()
{
if (!m_emptyRun && !m_eor.atEnd()) {
unsigned startOffset = m_sor.offset();
@@ -325,7 +318,7 @@ void BidiResolver<Iterator, Run>::appendRun()
}
if (endOffset >= startOffset)
- m_runs.addRun(new Run(startOffset, endOffset + 1, context(), m_direction));
+ m_runs.appendRun(std::make_unique<Run>(startOffset, endOffset + 1, context(), m_direction));
m_eor.increment();
m_sor = m_eor;
@@ -335,8 +328,8 @@ void BidiResolver<Iterator, Run>::appendRun()
m_status.eor = U_OTHER_NEUTRAL;
}
-template <class Iterator, class Run>
-void BidiResolver<Iterator, Run>::embed(UCharDirection dir, BidiEmbeddingSource source)
+template<typename Iterator, typename Run, typename DerivedClass>
+void BidiResolverBase<Iterator, Run, DerivedClass>::embed(UCharDirection dir, BidiEmbeddingSource source)
{
// Isolated spans compute base directionality during their own UBA run.
// Do not insert fake embed characters once we enter an isolated span.
@@ -346,8 +339,8 @@ void BidiResolver<Iterator, Run>::embed(UCharDirection dir, BidiEmbeddingSource
m_currentExplicitEmbeddingSequence.append(BidiEmbedding(dir, source));
}
-template <class Iterator, class Run>
-void BidiResolver<Iterator, Run>::checkDirectionInLowerRaiseEmbeddingLevel()
+template<typename Iterator, typename Run, typename DerivedClass>
+void BidiResolverBase<Iterator, Run, DerivedClass>::checkDirectionInLowerRaiseEmbeddingLevel()
{
ASSERT(m_status.eor != U_OTHER_NEUTRAL || m_eor.atEnd());
ASSERT(m_status.last != U_DIR_NON_SPACING_MARK
@@ -361,8 +354,8 @@ void BidiResolver<Iterator, Run>::checkDirectionInLowerRaiseEmbeddingLevel()
m_direction = m_status.lastStrong == U_LEFT_TO_RIGHT ? U_LEFT_TO_RIGHT : U_RIGHT_TO_LEFT;
}
-template <class Iterator, class Run>
-void BidiResolver<Iterator, Run>::lowerExplicitEmbeddingLevel(UCharDirection from)
+template<typename Iterator, typename Run, typename DerivedClass>
+void BidiResolverBase<Iterator, Run, DerivedClass>::lowerExplicitEmbeddingLevel(UCharDirection from)
{
if (!m_emptyRun && m_eor != m_last) {
checkDirectionInLowerRaiseEmbeddingLevel();
@@ -397,8 +390,8 @@ void BidiResolver<Iterator, Run>::lowerExplicitEmbeddingLevel(UCharDirection fro
m_eor = Iterator();
}
-template <class Iterator, class Run>
-void BidiResolver<Iterator, Run>::raiseExplicitEmbeddingLevel(UCharDirection from, UCharDirection to)
+template<typename Iterator, typename Run, typename DerivedClass>
+void BidiResolverBase<Iterator, Run, DerivedClass>::raiseExplicitEmbeddingLevel(UCharDirection from, UCharDirection to)
{
if (!m_emptyRun && m_eor != m_last) {
checkDirectionInLowerRaiseEmbeddingLevel();
@@ -434,51 +427,50 @@ void BidiResolver<Iterator, Run>::raiseExplicitEmbeddingLevel(UCharDirection fro
m_eor = Iterator();
}
-template <class Iterator, class Run>
-bool BidiResolver<Iterator, Run>::commitExplicitEmbedding()
+template<typename Iterator, typename Run, typename DerivedClass>
+bool BidiResolverBase<Iterator, Run, DerivedClass>::commitExplicitEmbedding()
{
// When we're "inIsolate()" we're resolving the parent context which
// ignores (skips over) the isolated content, including embedding levels.
// We should never accrue embedding levels while skipping over isolated content.
ASSERT(!inIsolate() || m_currentExplicitEmbeddingSequence.isEmpty());
- unsigned char fromLevel = context()->level();
+ auto fromLevel = context()->level();
RefPtr<BidiContext> toContext = context();
- for (size_t i = 0; i < m_currentExplicitEmbeddingSequence.size(); ++i) {
- BidiEmbedding embedding = m_currentExplicitEmbeddingSequence[i];
- if (embedding.direction() == U_POP_DIRECTIONAL_FORMAT) {
- if (BidiContext* parentContext = toContext->parent())
+ for (auto& embedding : m_currentExplicitEmbeddingSequence) {
+ if (embedding.direction == U_POP_DIRECTIONAL_FORMAT) {
+ if (auto* parentContext = toContext->parent())
toContext = parentContext;
} else {
- UCharDirection direction = (embedding.direction() == U_RIGHT_TO_LEFT_EMBEDDING || embedding.direction() == U_RIGHT_TO_LEFT_OVERRIDE) ? U_RIGHT_TO_LEFT : U_LEFT_TO_RIGHT;
- bool override = embedding.direction() == U_LEFT_TO_RIGHT_OVERRIDE || embedding.direction() == U_RIGHT_TO_LEFT_OVERRIDE;
+ UCharDirection direction = (embedding.direction == U_RIGHT_TO_LEFT_EMBEDDING || embedding.direction == U_RIGHT_TO_LEFT_OVERRIDE) ? U_RIGHT_TO_LEFT : U_LEFT_TO_RIGHT;
+ bool override = embedding.direction == U_LEFT_TO_RIGHT_OVERRIDE || embedding.direction == U_RIGHT_TO_LEFT_OVERRIDE;
unsigned char level = toContext->level();
if (direction == U_RIGHT_TO_LEFT)
level = nextGreaterOddLevel(level);
else
level = nextGreaterEvenLevel(level);
if (level < 61)
- toContext = BidiContext::create(level, direction, override, embedding.source(), toContext.get());
+ toContext = BidiContext::create(level, direction, override, embedding.source, toContext.get());
}
}
- unsigned char toLevel = toContext->level();
+ auto toLevel = toContext->level();
if (toLevel > fromLevel)
raiseExplicitEmbeddingLevel(fromLevel % 2 ? U_RIGHT_TO_LEFT : U_LEFT_TO_RIGHT, toLevel % 2 ? U_RIGHT_TO_LEFT : U_LEFT_TO_RIGHT);
else if (toLevel < fromLevel)
lowerExplicitEmbeddingLevel(fromLevel % 2 ? U_RIGHT_TO_LEFT : U_LEFT_TO_RIGHT);
- setContext(toContext);
+ setContext(WTFMove(toContext));
m_currentExplicitEmbeddingSequence.clear();
return fromLevel != toLevel;
}
-template <class Iterator, class Run>
-inline void BidiResolver<Iterator, Run>::updateStatusLastFromCurrentDirection(UCharDirection dirCurrent)
+template<typename Iterator, typename Run, typename DerivedClass>
+inline void BidiResolverBase<Iterator, Run, DerivedClass>::updateStatusLastFromCurrentDirection(UCharDirection dirCurrent)
{
switch (dirCurrent) {
case U_EUROPEAN_NUMBER_TERMINATOR:
@@ -518,8 +510,8 @@ inline void BidiResolver<Iterator, Run>::updateStatusLastFromCurrentDirection(UC
}
}
-template <class Iterator, class Run>
-inline void BidiResolver<Iterator, Run>::reorderRunsFromLevels()
+template<typename Iterator, typename Run, typename DerivedClass>
+inline void BidiResolverBase<Iterator, Run, DerivedClass>::reorderRunsFromLevels()
{
unsigned char levelLow = 128;
unsigned char levelHigh = 0;
@@ -555,8 +547,8 @@ inline void BidiResolver<Iterator, Run>::reorderRunsFromLevels()
}
}
-template <class Iterator, class Run>
-void BidiResolver<Iterator, Run>::createBidiRunsForLine(const Iterator& end, VisualDirectionOverride override, bool hardLineBreak)
+template<typename Iterator, typename Run, typename DerivedClass>
+void BidiResolverBase<Iterator, Run, DerivedClass>::createBidiRunsForLine(const Iterator& end, VisualDirectionOverride override, bool hardLineBreak)
{
ASSERT(m_direction == U_OTHER_NEUTRAL);
@@ -582,7 +574,7 @@ void BidiResolver<Iterator, Run>::createBidiRunsForLine(const Iterator& end, Vis
m_last = m_current;
bool pastEnd = false;
- BidiResolver<Iterator, Run> stateAtEnd;
+ BidiResolverBase<Iterator, Run, DerivedClass> stateAtEnd;
while (true) {
UCharDirection dirCurrent;
@@ -617,9 +609,16 @@ void BidiResolver<Iterator, Run>::createBidiRunsForLine(const Iterator& end, Vis
dirCurrent = m_status.last;
}
+#if PLATFORM(WIN)
+ // Our Windows build hasn't updated its headers from ICU 6.1, which doesn't have these symbols.
+ const UCharDirection U_FIRST_STRONG_ISOLATE = static_cast<UCharDirection>(19);
+ const UCharDirection U_LEFT_TO_RIGHT_ISOLATE = static_cast<UCharDirection>(20);
+ const UCharDirection U_RIGHT_TO_LEFT_ISOLATE = static_cast<UCharDirection>(21);
+ const UCharDirection U_POP_DIRECTIONAL_ISOLATE = static_cast<UCharDirection>(22);
+#endif
// We ignore all character directionality while in unicode-bidi: isolate spans.
// We'll handle ordering the isolated characters in a second pass.
- if (inIsolate())
+ if (inIsolate() || dirCurrent == U_FIRST_STRONG_ISOLATE || dirCurrent == U_LEFT_TO_RIGHT_ISOLATE || dirCurrent == U_RIGHT_TO_LEFT_ISOLATE || dirCurrent == U_POP_DIRECTIONAL_ISOLATE)
dirCurrent = U_OTHER_NEUTRAL;
ASSERT(m_status.eor != U_OTHER_NEUTRAL || m_eor.atEnd());
@@ -955,6 +954,17 @@ void BidiResolver<Iterator, Run>::createBidiRunsForLine(const Iterator& end, Vis
endOfLine = Iterator();
}
-} // namespace WebCore
+template<typename Iterator, typename Run, typename DerivedClass>
+void BidiResolverBase<Iterator, Run, DerivedClass>::setWhitespaceCollapsingTransitionForIsolatedRun(Run& run, size_t transition)
+{
+ ASSERT(!m_whitespaceCollapsingTransitionForIsolatedRun.contains(&run));
+ m_whitespaceCollapsingTransitionForIsolatedRun.add(&run, transition);
+}
-#endif // BidiResolver_h
+template<typename Iterator, typename Run, typename DerivedClass>
+unsigned BidiResolverBase<Iterator, Run, DerivedClass>::whitespaceCollapsingTransitionForIsolatedRun(Run& run)
+{
+ return m_whitespaceCollapsingTransitionForIsolatedRun.take(&run);
+}
+
+} // namespace WebCore
diff --git a/Source/WebCore/platform/text/BidiRunList.h b/Source/WebCore/platform/text/BidiRunList.h
index fdbcc5681..66838a252 100644
--- a/Source/WebCore/platform/text/BidiRunList.h
+++ b/Source/WebCore/platform/text/BidiRunList.h
@@ -32,9 +32,8 @@ class BidiRunList {
WTF_MAKE_NONCOPYABLE(BidiRunList);
public:
BidiRunList()
- : m_firstRun(0)
- , m_lastRun(0)
- , m_logicallyLastRun(0)
+ : m_lastRun(nullptr)
+ , m_logicallyLastRun(nullptr)
, m_runCount(0)
{
}
@@ -42,18 +41,18 @@ public:
// FIXME: Once BidiResolver no longer owns the BidiRunList,
// then ~BidiRunList should call deleteRuns() automatically.
- Run* firstRun() const { return m_firstRun; }
+ Run* firstRun() const { return m_firstRun.get(); }
Run* lastRun() const { return m_lastRun; }
Run* logicallyLastRun() const { return m_logicallyLastRun; }
unsigned runCount() const { return m_runCount; }
- void addRun(Run*);
- void prependRun(Run*);
+ void appendRun(std::unique_ptr<Run>&&);
+ void prependRun(std::unique_ptr<Run>&&);
void moveRunToEnd(Run*);
void moveRunToBeginning(Run*);
- void deleteRuns();
+ void clear();
void reverseRuns(unsigned start, unsigned end);
void reorderRunsFromLevels();
@@ -62,35 +61,38 @@ public:
void replaceRunWithRuns(Run* toReplace, BidiRunList<Run>& newRuns);
private:
- void clearWithoutDestroyingRuns();
- Run* m_firstRun;
+ // The runs form a singly-linked-list, where the links (Run::m_next) imply ownership (and are of type std::unique_ptr).
+ // The raw pointers below point into the singly-linked-list.
+ std::unique_ptr<Run> m_firstRun; // The head of the list
Run* m_lastRun;
Run* m_logicallyLastRun;
unsigned m_runCount;
};
template <class Run>
-inline void BidiRunList<Run>::addRun(Run* run)
+inline void BidiRunList<Run>::appendRun(std::unique_ptr<Run>&& run)
{
- if (!m_firstRun)
- m_firstRun = run;
- else
- m_lastRun->m_next = run;
- m_lastRun = run;
+ if (!m_firstRun) {
+ m_firstRun = WTFMove(run);
+ m_lastRun = m_firstRun.get();
+ } else {
+ m_lastRun->setNext(WTFMove(run));
+ m_lastRun = m_lastRun->next();
+ }
m_runCount++;
}
template <class Run>
-inline void BidiRunList<Run>::prependRun(Run* run)
+inline void BidiRunList<Run>::prependRun(std::unique_ptr<Run>&& run)
{
- ASSERT(!run->m_next);
+ ASSERT(!run->next());
if (!m_lastRun)
- m_lastRun = run;
+ m_lastRun = run.get();
else
- run->m_next = m_firstRun;
- m_firstRun = run;
+ run->setNext(WTFMove(m_firstRun));
+ m_firstRun = WTFMove(run);
m_runCount++;
}
@@ -99,23 +101,25 @@ inline void BidiRunList<Run>::moveRunToEnd(Run* run)
{
ASSERT(m_firstRun);
ASSERT(m_lastRun);
- ASSERT(run->m_next);
+ ASSERT(run->next());
- Run* current = 0;
- Run* next = m_firstRun;
- while (next != run) {
- current = next;
- next = current->next();
+ Run* previous = nullptr;
+ Run* current = m_firstRun.get();
+ while (current != run) {
+ previous = current;
+ current = previous->next();
}
- if (!current)
- m_firstRun = run->next();
- else
- current->m_next = run->m_next;
-
- run->m_next = 0;
- m_lastRun->m_next = run;
- m_lastRun = run;
+ if (!previous) {
+ ASSERT(m_firstRun.get() == run);
+ std::unique_ptr<Run> originalFirstRun = WTFMove(m_firstRun);
+ m_firstRun = originalFirstRun->takeNext();
+ m_lastRun->setNext(WTFMove(originalFirstRun));
+ } else {
+ std::unique_ptr<Run> target = previous->takeNext();
+ previous->setNext(current->takeNext());
+ m_lastRun->setNext(WTFMove(target));
+ }
}
template <class Run>
@@ -123,21 +127,22 @@ inline void BidiRunList<Run>::moveRunToBeginning(Run* run)
{
ASSERT(m_firstRun);
ASSERT(m_lastRun);
- ASSERT(run != m_firstRun);
+ ASSERT(run != m_firstRun.get());
- Run* current = m_firstRun;
- Run* next = current->next();
- while (next != run) {
- current = next;
- next = current->next();
+ Run* previous = m_firstRun.get();
+ Run* current = previous->next();
+ while (current != run) {
+ previous = current;
+ current = previous->next();
}
- current->m_next = run->m_next;
+ std::unique_ptr<Run> target = previous->takeNext();
+ previous->setNext(run->takeNext());
if (run == m_lastRun)
- m_lastRun = current;
+ m_lastRun = previous;
- run->m_next = m_firstRun;
- m_firstRun = run;
+ target->setNext(WTFMove(m_firstRun));
+ m_firstRun = WTFMove(target);
}
template <class Run>
@@ -147,53 +152,39 @@ void BidiRunList<Run>::replaceRunWithRuns(Run* toReplace, BidiRunList<Run>& newR
ASSERT(m_firstRun);
ASSERT(toReplace);
- if (m_firstRun == toReplace)
- m_firstRun = newRuns.firstRun();
- else {
- // Find the run just before "toReplace" in the list of runs.
- Run* previousRun = m_firstRun;
- while (previousRun->next() != toReplace)
- previousRun = previousRun->next();
- ASSERT(previousRun);
- previousRun->setNext(newRuns.firstRun());
- }
-
- newRuns.lastRun()->setNext(toReplace->next());
+ m_runCount += newRuns.runCount() - 1; // We are adding the new runs and removing toReplace.
- // Fix up any of other pointers which may now be stale.
+ // Fix up any pointers which may end up stale.
if (m_lastRun == toReplace)
m_lastRun = newRuns.lastRun();
if (m_logicallyLastRun == toReplace)
m_logicallyLastRun = newRuns.logicallyLastRun();
- m_runCount += newRuns.runCount() - 1; // We added the new runs and removed toReplace.
- delete toReplace;
- newRuns.clearWithoutDestroyingRuns();
-}
+ if (m_firstRun.get() == toReplace) {
+ newRuns.m_lastRun->setNext(m_firstRun->takeNext());
+ m_firstRun = WTFMove(newRuns.m_firstRun);
+ } else {
+ // Find the run just before "toReplace" in the list of runs.
+ Run* previousRun = m_firstRun.get();
+ while (previousRun->next() != toReplace)
+ previousRun = previousRun->next();
+ ASSERT(previousRun);
-template <class Run>
-void BidiRunList<Run>::clearWithoutDestroyingRuns()
-{
- m_firstRun = 0;
- m_lastRun = 0;
- m_logicallyLastRun = 0;
- m_runCount = 0;
+ std::unique_ptr<Run> target = previousRun->takeNext();
+ previousRun->setNext(WTFMove(newRuns.m_firstRun));
+ newRuns.m_lastRun->setNext(target->takeNext());
+ }
+
+ newRuns.clear();
}
template <class Run>
-void BidiRunList<Run>::deleteRuns()
+void BidiRunList<Run>::clear()
{
- if (!m_firstRun)
- return;
-
- Run* curr = m_firstRun;
- while (curr) {
- Run* s = curr->next();
- delete curr;
- curr = s;
- }
-
- clearWithoutDestroyingRuns();
+ m_firstRun = nullptr;
+ m_lastRun = nullptr;
+ m_logicallyLastRun = nullptr;
+ m_runCount = 0;
}
template <class Run>
@@ -206,44 +197,35 @@ void BidiRunList<Run>::reverseRuns(unsigned start, unsigned end)
// Get the item before the start of the runs to reverse and put it in
// |beforeStart|. |curr| should point to the first run to reverse.
- Run* curr = m_firstRun;
- Run* beforeStart = 0;
+ Run* curr = m_firstRun.get();
+ Run* beforeStart = nullptr;
unsigned i = 0;
- while (i < start) {
- i++;
+ for (; i < start; ++i) {
beforeStart = curr;
curr = curr->next();
}
-
Run* startRun = curr;
- while (i < end) {
- i++;
+
+ for (; i < end; ++i)
curr = curr->next();
- }
- Run* endRun = curr;
- Run* afterEnd = curr->next();
-
- i = start;
- curr = startRun;
- Run* newNext = afterEnd;
- while (i <= end) {
- // Do the reversal.
- Run* next = curr->next();
- curr->m_next = newNext;
- newNext = curr;
- curr = next;
- i++;
+
+ if (!curr->next())
+ m_lastRun = startRun;
+
+ // Standard "sliding window" of 3 pointers
+ std::unique_ptr<Run> previous = curr->takeNext();
+ std::unique_ptr<Run> current = beforeStart ? beforeStart->takeNext() : WTFMove(m_firstRun);
+ while (current) {
+ std::unique_ptr<Run> next = current->takeNext();
+ current->setNext(WTFMove(previous));
+ previous = WTFMove(current);
+ current = WTFMove(next);
}
- // Now hook up beforeStart and afterEnd to the startRun and endRun.
if (beforeStart)
- beforeStart->m_next = endRun;
+ beforeStart->setNext(WTFMove(previous));
else
- m_firstRun = endRun;
-
- startRun->m_next = afterEnd;
- if (!afterEnd)
- m_lastRun = startRun;
+ m_firstRun = WTFMove(previous);
}
} // namespace WebCore
diff --git a/Source/WebCore/platform/text/CharacterProperties.h b/Source/WebCore/platform/text/CharacterProperties.h
new file mode 100644
index 000000000..5ba272b17
--- /dev/null
+++ b/Source/WebCore/platform/text/CharacterProperties.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2015 Apple, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+namespace WebCore {
+
+static inline bool isEmojiGroupCandidate(UChar32 character)
+{
+ return character == 0x2640
+ || character == 0x2642
+ || character == 0x26F9
+ || (character >= 0x2695 && character <= 0x2696)
+ || character == 0x2708
+ || character == 0x2764
+ || character == 0x1F308
+ || character == 0x1F33E
+ || character == 0x1F373
+ || character == 0x1F393
+ || character == 0x1F3A4
+ || character == 0x1F3A8
+ || (character >= 0x1F3C2 && character <= 0x1F3C4)
+ || character == 0x1F3C7
+ || (character >= 0x1F3CA && character <= 0x1F3CC)
+ || character == 0x1F3EB
+ || character == 0x1F3ED
+ || character == 0x1F3F3
+ || character == 0x1F441
+ || (character >= 0x1F466 && character <= 0x1F469)
+ || (character >= 0x1F46E && character <= 0x1F46F)
+ || character == 0x1F471
+ || character == 0x1F473
+ || character == 0x1F477
+ || (character >= 0x1F481 && character <= 0x1F482)
+ || (character >= 0x1F486 && character <= 0x1F487)
+ || character == 0x1F48B
+ || (character >= 0x1F4BB && character <= 0x1F4BC)
+ || character == 0x1F527
+ || character == 0x1F52C
+ || (character >= 0x1F574 && character <= 0x1F575)
+ || character == 0x1F57A
+ || character == 0x1F5E8
+ || (character >= 0x1F645 && character <= 0x1F647)
+ || character == 0x1F64B
+ || (character >= 0x1F64D && character <= 0x1F64E)
+ || character == 0x1F680
+ || character == 0x1F692
+ || character == 0x1F6A3
+ || (character >= 0x1F6B4 && character <= 0x1F6B6)
+ || character == 0x1F6CC
+ || (character >= 0x1F919 && character <= 0x1F91E)
+ || character == 0x1F926
+ || character == 0x1F930
+ || (character >= 0x1F933 && character <= 0x1F939)
+ || (character >= 0x1F93C && character <= 0x1F93E);
+}
+
+static inline bool isEmojiFitzpatrickModifier(UChar32 character)
+{
+ // U+1F3FB - EMOJI MODIFIER FITZPATRICK TYPE-1-2
+ // U+1F3FC - EMOJI MODIFIER FITZPATRICK TYPE-3
+ // U+1F3FD - EMOJI MODIFIER FITZPATRICK TYPE-4
+ // U+1F3FE - EMOJI MODIFIER FITZPATRICK TYPE-5
+ // U+1F3FF - EMOJI MODIFIER FITZPATRICK TYPE-6
+
+ return character >= 0x1F3FB && character <= 0x1F3FF;
+}
+
+inline bool isVariationSelector(UChar32 character)
+{
+ return character >= 0xFE00 && character <= 0xFE0F;
+}
+
+}
diff --git a/Source/WebCore/platform/text/DateTimeFormat.cpp b/Source/WebCore/platform/text/DateTimeFormat.cpp
index bcbe1d578..c75d33d6f 100644
--- a/Source/WebCore/platform/text/DateTimeFormat.cpp
+++ b/Source/WebCore/platform/text/DateTimeFormat.cpp
@@ -257,21 +257,21 @@ void DateTimeFormat::quoteAndAppendLiteral(const String& literal, StringBuilder&
}
if (literal.find('\'') == notFound) {
- buffer.append("'");
+ buffer.append('\'');
buffer.append(literal);
- buffer.append("'");
+ buffer.append('\'');
return;
}
for (unsigned i = 0; i < literal.length(); ++i) {
if (literal[i] == '\'')
- buffer.append("''");
+ buffer.appendLiteral("''");
else {
String escaped = literal.substring(i);
- escaped.replace(ASCIILiteral("'"), ASCIILiteral("''"));
- buffer.append("'");
+ escaped.replace('\'', "''");
+ buffer.append('\'');
buffer.append(escaped);
- buffer.append("'");
+ buffer.append('\'');
return;
}
}
diff --git a/Source/WebCore/platform/text/DecodeEscapeSequences.h b/Source/WebCore/platform/text/DecodeEscapeSequences.h
index 2fa6e4658..957d3ef1b 100644
--- a/Source/WebCore/platform/text/DecodeEscapeSequences.h
+++ b/Source/WebCore/platform/text/DecodeEscapeSequences.h
@@ -40,8 +40,8 @@ namespace WebCore {
// See <http://en.wikipedia.org/wiki/Percent-encoding#Non-standard_implementations>.
struct Unicode16BitEscapeSequence {
enum { sequenceSize = 6 }; // e.g. %u26C4
- static size_t findInString(const String& string, size_t startPosition) { return string.find("%u", startPosition); }
- static size_t findEndOfRun(const String& string, size_t startPosition, size_t endPosition)
+ static size_t findInString(StringView string, size_t startPosition) { return string.find(StringView("%u"), startPosition); }
+ static size_t findEndOfRun(StringView string, size_t startPosition, size_t endPosition)
{
size_t runEnd = startPosition;
while (endPosition - runEnd >= sequenceSize && string[runEnd] == '%' && string[runEnd + 1] == 'u'
@@ -51,19 +51,19 @@ struct Unicode16BitEscapeSequence {
}
return runEnd;
}
- static String decodeRun(const UChar* run, size_t runLength, const TextEncoding&)
+ static String decodeRun(StringView run, const TextEncoding&)
{
// Each %u-escape sequence represents a UTF-16 code unit.
// See <http://www.w3.org/International/iri-edit/draft-duerst-iri.html#anchor29>.
// For 16-bit escape sequences, we know that findEndOfRun() has given us a contiguous run of sequences
// without any intervening characters, so decode the run without additional checks.
- size_t numberOfSequences = runLength / sequenceSize;
+ auto numberOfSequences = run.length() / sequenceSize;
StringBuilder builder;
builder.reserveCapacity(numberOfSequences);
while (numberOfSequences--) {
UChar codeUnit = (toASCIIHexValue(run[2]) << 12) | (toASCIIHexValue(run[3]) << 8) | (toASCIIHexValue(run[4]) << 4) | toASCIIHexValue(run[5]);
builder.append(codeUnit);
- run += sequenceSize;
+ run = run.substring(sequenceSize);
}
return builder.toString();
}
@@ -71,8 +71,8 @@ struct Unicode16BitEscapeSequence {
struct URLEscapeSequence {
enum { sequenceSize = 3 }; // e.g. %41
- static size_t findInString(const String& string, size_t startPosition) { return string.find('%', startPosition); }
- static size_t findEndOfRun(const String& string, size_t startPosition, size_t endPosition)
+ static size_t findInString(StringView string, size_t startPosition) { return string.find('%', startPosition); }
+ static size_t findEndOfRun(StringView string, size_t startPosition, size_t endPosition)
{
// Make the simplifying assumption that supported encodings may have up to two unescaped characters
// in the range 0x40 - 0x7F as the trailing bytes of their sequences which need to be passed into the
@@ -96,30 +96,39 @@ struct URLEscapeSequence {
}
return runEnd;
}
- static String decodeRun(const UChar* run, size_t runLength, const TextEncoding& encoding)
+
+ static Vector<char, 512> decodeRun(StringView run)
{
// For URL escape sequences, we know that findEndOfRun() has given us a run where every %-sign introduces
// a valid escape sequence, but there may be characters between the sequences.
Vector<char, 512> buffer;
- buffer.resize(runLength); // Unescaping hex sequences only makes the length smaller.
+ buffer.resize(run.length()); // Unescaping hex sequences only makes the length smaller.
char* p = buffer.data();
- const UChar* runEnd = run + runLength;
- while (run < runEnd) {
+ while (!run.isEmpty()) {
if (run[0] == '%') {
*p++ = (toASCIIHexValue(run[1]) << 4) | toASCIIHexValue(run[2]);
- run += sequenceSize;
+ run = run.substring(sequenceSize);
} else {
*p++ = run[0];
- run += 1;
+ run = run.substring(1);
}
}
ASSERT(buffer.size() >= static_cast<size_t>(p - buffer.data())); // Prove buffer not overrun.
- return (encoding.isValid() ? encoding : UTF8Encoding()).decode(buffer.data(), p - buffer.data());
+ buffer.shrink(p - buffer.data());
+ return buffer;
+ }
+
+ static String decodeRun(StringView run, const TextEncoding& encoding)
+ {
+ auto buffer = decodeRun(run);
+ if (!encoding.isValid())
+ return UTF8Encoding().decode(buffer.data(), buffer.size());
+ return encoding.decode(buffer.data(), buffer.size());
}
};
template<typename EscapeSequence>
-String decodeEscapeSequences(const String& string, const TextEncoding& encoding)
+String decodeEscapeSequences(StringView string, const TextEncoding& encoding)
{
StringBuilder result;
size_t length = string.length();
@@ -134,18 +143,53 @@ String decodeEscapeSequences(const String& string, const TextEncoding& encoding)
continue;
}
- String decoded = EscapeSequence::decodeRun(string.deprecatedCharacters() + encodedRunPosition, encodedRunEnd - encodedRunPosition, encoding);
+ String decoded = EscapeSequence::decodeRun(string.substring(encodedRunPosition, encodedRunEnd - encodedRunPosition), encoding);
if (decoded.isEmpty())
continue;
- result.append(string, decodedPosition, encodedRunPosition - decodedPosition);
+ result.append(string.substring(decodedPosition, encodedRunPosition - decodedPosition));
result.append(decoded);
decodedPosition = encodedRunEnd;
}
- result.append(string, decodedPosition, length - decodedPosition);
+ result.append(string.substring(decodedPosition, length - decodedPosition));
return result.toString();
}
+inline Vector<char> decodeURLEscapeSequencesAsData(StringView string, const TextEncoding& encoding)
+{
+ ASSERT(encoding.isValid());
+
+ Vector<char> result;
+ size_t decodedPosition = 0;
+ size_t searchPosition = 0;
+ while (true) {
+ size_t encodedRunPosition = URLEscapeSequence::findInString(string, searchPosition);
+ size_t encodedRunEnd = 0;
+ if (encodedRunPosition != notFound) {
+ encodedRunEnd = URLEscapeSequence::findEndOfRun(string, encodedRunPosition, string.length());
+ searchPosition = encodedRunEnd;
+ if (encodedRunEnd == encodedRunPosition) {
+ ++searchPosition;
+ continue;
+ }
+ }
+ // Strings are encoded as requested.
+ auto stringFragment = string.substring(decodedPosition, encodedRunPosition - decodedPosition);
+ auto encodedStringFragment = encoding.encode(stringFragment, URLEncodedEntitiesForUnencodables);
+ result.append(encodedStringFragment.data(), encodedStringFragment.length());
+
+ if (encodedRunPosition == notFound)
+ return result;
+
+ // Bytes go through as-is.
+ auto decodedEscapeSequence = URLEscapeSequence::decodeRun(string.substring(encodedRunPosition, encodedRunEnd - encodedRunPosition));
+ ASSERT(!decodedEscapeSequence.isEmpty());
+ result.appendVector(decodedEscapeSequence);
+
+ decodedPosition = encodedRunEnd;
+ }
+}
+
} // namespace WebCore
#endif // DecodeEscapeSequences_h
diff --git a/Source/WebCore/platform/text/Hyphenation.cpp b/Source/WebCore/platform/text/Hyphenation.cpp
index 89f64386a..ec07637e1 100644
--- a/Source/WebCore/platform/text/Hyphenation.cpp
+++ b/Source/WebCore/platform/text/Hyphenation.cpp
@@ -26,7 +26,10 @@
#include "config.h"
#include "Hyphenation.h"
+#if !USE(LIBHYPHEN)
+
#include "NotImplemented.h"
+#include <wtf/text/StringView.h>
namespace WebCore {
@@ -35,10 +38,12 @@ bool canHyphenate(const AtomicString& /* localeIdentifier */)
return false;
}
-size_t lastHyphenLocation(const UChar* /* characters */, size_t /* length */, size_t /* beforeIndex */, const AtomicString& /* localeIdentifier */)
+size_t lastHyphenLocation(StringView, size_t /* beforeIndex */, const AtomicString& /* localeIdentifier */)
{
ASSERT_NOT_REACHED();
return 0;
}
} // namespace WebCore
+
+#endif // !USE(LIBHYPHEN)
diff --git a/Source/WebCore/platform/text/Hyphenation.h b/Source/WebCore/platform/text/Hyphenation.h
index a99bff0cf..27f430591 100644
--- a/Source/WebCore/platform/text/Hyphenation.h
+++ b/Source/WebCore/platform/text/Hyphenation.h
@@ -26,13 +26,20 @@
#ifndef Hyphenation_h
#define Hyphenation_h
+#include <unicode/utypes.h>
#include <wtf/Forward.h>
-#include <wtf/unicode/Unicode.h>
namespace WebCore {
+inline static bool enoughWidthForHyphenation(float availableWidth, float fontPixelSize)
+{
+ // If the maximum width available for the prefix before the hyphen is small, then it is very unlikely
+ // that an hyphenation opportunity exists, so do not bother to look for it.
+ return availableWidth > fontPixelSize * 5 / 4;
+
+}
bool canHyphenate(const AtomicString& localeIdentifier);
-size_t lastHyphenLocation(const UChar*, size_t length, size_t beforeIndex, const AtomicString& localeIdentifier);
+size_t lastHyphenLocation(StringView, size_t beforeIndex, const AtomicString& localeIdentifier);
} // namespace WebCore
diff --git a/Source/WebCore/platform/text/LineBreakIteratorPoolICU.h b/Source/WebCore/platform/text/LineBreakIteratorPoolICU.h
deleted file mode 100644
index d2eb26d63..000000000
--- a/Source/WebCore/platform/text/LineBreakIteratorPoolICU.h
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Copyright (C) 2011 Apple Inc. All Rights Reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef LineBreakIteratorPoolICU_h
-#define LineBreakIteratorPoolICU_h
-
-#include "TextBreakIteratorInternalICU.h"
-#include <unicode/ubrk.h>
-#include <wtf/Assertions.h>
-#include <wtf/HashMap.h>
-#include <wtf/PassOwnPtr.h>
-#include <wtf/ThreadSpecific.h>
-#include <wtf/text/AtomicString.h>
-#include <wtf/text/CString.h>
-
-namespace WebCore {
-
-class LineBreakIteratorPool {
- WTF_MAKE_NONCOPYABLE(LineBreakIteratorPool);
-public:
- static LineBreakIteratorPool& sharedPool()
- {
- static WTF::ThreadSpecific<LineBreakIteratorPool>* pool = new WTF::ThreadSpecific<LineBreakIteratorPool>;
- return **pool;
- }
-
- static PassOwnPtr<LineBreakIteratorPool> create() { return adoptPtr(new LineBreakIteratorPool); }
-
- UBreakIterator* take(const AtomicString& locale)
- {
- UBreakIterator* iterator = 0;
- for (size_t i = 0; i < m_pool.size(); ++i) {
- if (m_pool[i].first == locale) {
- iterator = m_pool[i].second;
- m_pool.remove(i);
- break;
- }
- }
-
- if (!iterator) {
- UErrorCode openStatus = U_ZERO_ERROR;
- bool localeIsEmpty = locale.isEmpty();
- iterator = ubrk_open(UBRK_LINE, localeIsEmpty ? currentTextBreakLocaleID() : locale.string().utf8().data(), 0, 0, &openStatus);
- // locale comes from a web page and it can be invalid, leading ICU
- // to fail, in which case we fall back to the default locale.
- if (!localeIsEmpty && U_FAILURE(openStatus)) {
- openStatus = U_ZERO_ERROR;
- iterator = ubrk_open(UBRK_LINE, currentTextBreakLocaleID(), 0, 0, &openStatus);
- }
-
- if (U_FAILURE(openStatus)) {
- LOG_ERROR("ubrk_open failed with status %d", openStatus);
- return 0;
- }
- }
-
- ASSERT(!m_vendedIterators.contains(iterator));
- m_vendedIterators.set(iterator, locale);
- return iterator;
- }
-
- void put(UBreakIterator* iterator)
- {
- ASSERT_ARG(iterator, m_vendedIterators.contains(iterator));
-
- if (m_pool.size() == capacity) {
- ubrk_close(m_pool[0].second);
- m_pool.remove(0);
- }
-
- m_pool.append(Entry(m_vendedIterators.take(iterator), iterator));
- }
-
-private:
- LineBreakIteratorPool() { }
-
- static const size_t capacity = 4;
-
- typedef std::pair<AtomicString, UBreakIterator*> Entry;
- typedef Vector<Entry, capacity> Pool;
- Pool m_pool;
- HashMap<UBreakIterator*, AtomicString> m_vendedIterators;
-
- friend WTF::ThreadSpecific<LineBreakIteratorPool>::operator LineBreakIteratorPool*();
-};
-
-}
-
-#endif
diff --git a/Source/WebCore/platform/text/LineEnding.cpp b/Source/WebCore/platform/text/LineEnding.cpp
index ae24fb097..3e0ea4df6 100644
--- a/Source/WebCore/platform/text/LineEnding.cpp
+++ b/Source/WebCore/platform/text/LineEnding.cpp
@@ -39,7 +39,7 @@ namespace {
class OutputBuffer {
public:
- virtual char* allocate(size_t size) = 0;
+ virtual uint8_t* allocate(size_t size) = 0;
virtual void copy(const CString&) = 0;
virtual ~OutputBuffer() { }
};
@@ -52,14 +52,14 @@ public:
}
virtual ~CStringBuffer() { }
- virtual char* allocate(size_t size)
+ uint8_t* allocate(size_t size) override
{
char* ptr;
m_buffer = CString::newUninitialized(size, ptr);
- return ptr;
+ return reinterpret_cast<uint8_t*>(ptr);
}
- virtual void copy(const CString& source)
+ void copy(const CString& source) override
{
m_buffer = source;
}
@@ -70,29 +70,31 @@ private:
CString m_buffer;
};
+#if OS(WINDOWS)
class VectorCharAppendBuffer : public OutputBuffer {
public:
- VectorCharAppendBuffer(Vector<char>& buffer)
+ VectorCharAppendBuffer(Vector<uint8_t>& buffer)
: m_buffer(buffer)
{
}
virtual ~VectorCharAppendBuffer() { }
- virtual char* allocate(size_t size)
+ uint8_t* allocate(size_t size) override
{
size_t oldSize = m_buffer.size();
m_buffer.grow(oldSize + size);
return m_buffer.data() + oldSize;
}
- virtual void copy(const CString& source)
+ void copy(const CString& source) override
{
m_buffer.append(source.data(), source.length());
}
private:
- Vector<char>& m_buffer;
+ Vector<uint8_t>& m_buffer;
};
+#endif
void internalNormalizeLineEndingsToCRLF(const CString& from, OutputBuffer& buffer)
{
@@ -126,7 +128,7 @@ void internalNormalizeLineEndingsToCRLF(const CString& from, OutputBuffer& buffe
}
p = from.data();
- char* q = buffer.allocate(newLen);
+ uint8_t* q = buffer.allocate(newLen);
// Make a copy of the string.
while (p < from.data() + from.length()) {
@@ -153,10 +155,8 @@ void internalNormalizeLineEndingsToCRLF(const CString& from, OutputBuffer& buffe
namespace WebCore {
-void normalizeToCROrLF(const CString& from, Vector<char>& result, bool toCR);
-
// Normalize all line-endings to CR or LF.
-void normalizeToCROrLF(const CString& from, Vector<char>& result, bool toCR)
+static void normalizeToCROrLF(const CString& from, Vector<uint8_t>& result, bool toCR)
{
// Compute the new length.
size_t newLen = 0;
@@ -181,7 +181,7 @@ void normalizeToCROrLF(const CString& from, Vector<char>& result, bool toCR)
p = from.data();
size_t oldResultSize = result.size();
result.grow(oldResultSize + newLen);
- char* q = result.data() + oldResultSize;
+ uint8_t* q = result.data() + oldResultSize;
// If no need to fix the string, just copy the string over.
if (!needFix) {
@@ -214,23 +214,13 @@ CString normalizeLineEndingsToCRLF(const CString& from)
return buffer.buffer();
}
-void normalizeLineEndingsToCR(const CString& from, Vector<char>& result)
-{
- normalizeToCROrLF(from, result, true);
-}
-
-void normalizeLineEndingsToLF(const CString& from, Vector<char>& result)
-{
- normalizeToCROrLF(from, result, false);
-}
-
-void normalizeLineEndingsToNative(const CString& from, Vector<char>& result)
+void normalizeLineEndingsToNative(const CString& from, Vector<uint8_t>& result)
{
#if OS(WINDOWS)
VectorCharAppendBuffer buffer(result);
internalNormalizeLineEndingsToCRLF(from, buffer);
#else
- normalizeLineEndingsToLF(from, result);
+ normalizeToCROrLF(from, result, false);
#endif
}
diff --git a/Source/WebCore/platform/text/LineEnding.h b/Source/WebCore/platform/text/LineEnding.h
index 4306ce8a0..a41cb8099 100644
--- a/Source/WebCore/platform/text/LineEnding.h
+++ b/Source/WebCore/platform/text/LineEnding.h
@@ -40,15 +40,9 @@ namespace WebCore {
// Normalize all line-endings in the given string to CRLF.
CString normalizeLineEndingsToCRLF(const CString& from);
-// Normalize all line-endings in the given string to CR and append the result to the given buffer.
-void normalizeLineEndingsToCR(const CString& from, Vector<char>& result);
-
-// Normalize all line-endings in the given string to LF and append the result to the given buffer.
-void normalizeLineEndingsToLF(const CString& from, Vector<char>& result);
-
// Normalize all line-endings in the given string to the native line-endings and append the result to the given buffer.
// (Normalize to CRLF on Windows and normalize to LF on all other platforms.)
-void normalizeLineEndingsToNative(const CString& from, Vector<char>& result);
+void normalizeLineEndingsToNative(const CString& from, Vector<uint8_t>& result);
} // namespace WebCore
diff --git a/Source/WebCore/platform/text/LocaleICU.cpp b/Source/WebCore/platform/text/LocaleICU.cpp
new file mode 100644
index 000000000..88151ea78
--- /dev/null
+++ b/Source/WebCore/platform/text/LocaleICU.cpp
@@ -0,0 +1,376 @@
+/*
+ * Copyright (C) 2011,2012 Google Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "LocaleICU.h"
+
+#include "LocalizedStrings.h"
+#include <limits>
+#include <unicode/udatpg.h>
+#include <unicode/uloc.h>
+#include <wtf/DateMath.h>
+#include <wtf/text/StringBuilder.h>
+
+using namespace icu;
+using namespace std;
+
+namespace WebCore {
+
+std::unique_ptr<Locale> Locale::create(const AtomicString& locale)
+{
+ return std::make_unique<LocaleICU>(locale.string().utf8().data());
+}
+
+LocaleICU::LocaleICU(const char* locale)
+ : m_locale(locale)
+{
+}
+
+LocaleICU::~LocaleICU()
+{
+#if !UCONFIG_NO_FORMATTING
+ unum_close(m_numberFormat);
+#endif
+#if ENABLE(DATE_AND_TIME_INPUT_TYPES)
+ udat_close(m_shortDateFormat);
+ udat_close(m_mediumTimeFormat);
+ udat_close(m_shortTimeFormat);
+#endif
+}
+
+#if !UCONFIG_NO_FORMATTING
+String LocaleICU::decimalSymbol(UNumberFormatSymbol symbol)
+{
+ UErrorCode status = U_ZERO_ERROR;
+ int32_t bufferLength = unum_getSymbol(m_numberFormat, symbol, 0, 0, &status);
+ ASSERT(U_SUCCESS(status) || status == U_BUFFER_OVERFLOW_ERROR);
+ if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
+ return String();
+ Vector<UChar> buffer(bufferLength);
+ status = U_ZERO_ERROR;
+ unum_getSymbol(m_numberFormat, symbol, buffer.data(), bufferLength, &status);
+ if (U_FAILURE(status))
+ return String();
+ return String::adopt(WTFMove(buffer));
+}
+
+String LocaleICU::decimalTextAttribute(UNumberFormatTextAttribute tag)
+{
+ UErrorCode status = U_ZERO_ERROR;
+ int32_t bufferLength = unum_getTextAttribute(m_numberFormat, tag, 0, 0, &status);
+ ASSERT(U_SUCCESS(status) || status == U_BUFFER_OVERFLOW_ERROR);
+ if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
+ return String();
+ Vector<UChar> buffer(bufferLength);
+ status = U_ZERO_ERROR;
+ unum_getTextAttribute(m_numberFormat, tag, buffer.data(), bufferLength, &status);
+ ASSERT(U_SUCCESS(status));
+ if (U_FAILURE(status))
+ return String();
+ return String::adopt(WTFMove(buffer));
+}
+#endif
+
+void LocaleICU::initializeLocaleData()
+{
+#if !UCONFIG_NO_FORMATTING
+ if (m_didCreateDecimalFormat)
+ return;
+ m_didCreateDecimalFormat = true;
+ UErrorCode status = U_ZERO_ERROR;
+ m_numberFormat = unum_open(UNUM_DECIMAL, 0, 0, m_locale.data(), 0, &status);
+ if (!U_SUCCESS(status))
+ return;
+
+ Vector<String, DecimalSymbolsSize> symbols;
+ symbols.append(decimalSymbol(UNUM_ZERO_DIGIT_SYMBOL));
+ symbols.append(decimalSymbol(UNUM_ONE_DIGIT_SYMBOL));
+ symbols.append(decimalSymbol(UNUM_TWO_DIGIT_SYMBOL));
+ symbols.append(decimalSymbol(UNUM_THREE_DIGIT_SYMBOL));
+ symbols.append(decimalSymbol(UNUM_FOUR_DIGIT_SYMBOL));
+ symbols.append(decimalSymbol(UNUM_FIVE_DIGIT_SYMBOL));
+ symbols.append(decimalSymbol(UNUM_SIX_DIGIT_SYMBOL));
+ symbols.append(decimalSymbol(UNUM_SEVEN_DIGIT_SYMBOL));
+ symbols.append(decimalSymbol(UNUM_EIGHT_DIGIT_SYMBOL));
+ symbols.append(decimalSymbol(UNUM_NINE_DIGIT_SYMBOL));
+ symbols.append(decimalSymbol(UNUM_DECIMAL_SEPARATOR_SYMBOL));
+ symbols.append(decimalSymbol(UNUM_GROUPING_SEPARATOR_SYMBOL));
+ ASSERT(symbols.size() == DecimalSymbolsSize);
+ setLocaleData(symbols, decimalTextAttribute(UNUM_POSITIVE_PREFIX), decimalTextAttribute(UNUM_POSITIVE_SUFFIX), decimalTextAttribute(UNUM_NEGATIVE_PREFIX), decimalTextAttribute(UNUM_NEGATIVE_SUFFIX));
+#endif
+}
+
+#if ENABLE(DATE_AND_TIME_INPUT_TYPES)
+bool LocaleICU::initializeShortDateFormat()
+{
+ if (m_didCreateShortDateFormat)
+ return m_shortDateFormat;
+ m_shortDateFormat = openDateFormat(UDAT_NONE, UDAT_SHORT);
+ m_didCreateShortDateFormat = true;
+ return m_shortDateFormat;
+}
+
+UDateFormat* LocaleICU::openDateFormat(UDateFormatStyle timeStyle, UDateFormatStyle dateStyle) const
+{
+ const UChar gmtTimezone[3] = {'G', 'M', 'T'};
+ UErrorCode status = U_ZERO_ERROR;
+ return udat_open(timeStyle, dateStyle, m_locale.data(), gmtTimezone, WTF_ARRAY_LENGTH(gmtTimezone), 0, -1, &status);
+}
+
+static String getDateFormatPattern(const UDateFormat* dateFormat)
+{
+ if (!dateFormat)
+ return emptyString();
+
+ UErrorCode status = U_ZERO_ERROR;
+ int32_t length = udat_toPattern(dateFormat, TRUE, 0, 0, &status);
+ if (status != U_BUFFER_OVERFLOW_ERROR || !length)
+ return emptyString();
+ Vector<UChar> buffer(length);
+ status = U_ZERO_ERROR;
+ udat_toPattern(dateFormat, TRUE, buffer.data(), length, &status);
+ if (U_FAILURE(status))
+ return emptyString();
+ return String::adopt(WTFMove(buffer));
+}
+
+std::unique_ptr<Vector<String>> LocaleICU::createLabelVector(const UDateFormat* dateFormat, UDateFormatSymbolType type, int32_t startIndex, int32_t size)
+{
+ if (!dateFormat)
+ return std::make_unique<Vector<String>>();
+ if (udat_countSymbols(dateFormat, type) != startIndex + size)
+ return std::make_unique<Vector<String>>();
+
+ auto labels = std::make_unique<Vector<String>>();
+ labels->reserveCapacity(size);
+ for (int32_t i = 0; i < size; ++i) {
+ UErrorCode status = U_ZERO_ERROR;
+ int32_t length = udat_getSymbols(dateFormat, type, startIndex + i, 0, 0, &status);
+ if (status != U_BUFFER_OVERFLOW_ERROR)
+ return std::make_unique<Vector<String>>();
+ Vector<UChar> buffer(length);
+ status = U_ZERO_ERROR;
+ udat_getSymbols(dateFormat, type, startIndex + i, buffer.data(), length, &status);
+ if (U_FAILURE(status))
+ return std::make_unique<Vector<String>>();
+ labels->append(String::adopt(WTFMove(buffer)));
+ }
+ return WTFMove(labels);
+}
+
+static std::unique_ptr<Vector<String>> createFallbackMonthLabels()
+{
+ auto labels = std::make_unique<Vector<String>>();
+ labels->reserveCapacity(WTF_ARRAY_LENGTH(WTF::monthFullName));
+ for (unsigned i = 0; i < WTF_ARRAY_LENGTH(WTF::monthFullName); ++i)
+ labels->append(WTF::monthFullName[i]);
+ return WTFMove(labels);
+}
+
+const Vector<String>& LocaleICU::monthLabels()
+{
+ if (m_monthLabels)
+ return *m_monthLabels;
+ if (initializeShortDateFormat()) {
+ m_monthLabels = createLabelVector(m_shortDateFormat, UDAT_MONTHS, UCAL_JANUARY, 12);
+ if (m_monthLabels)
+ return *m_monthLabels;
+ }
+ m_monthLabels = createFallbackMonthLabels();
+ return *m_monthLabels;
+}
+
+static std::unique_ptr<Vector<String>> createFallbackAMPMLabels()
+{
+ auto labels = std::make_unique<Vector<String>>();
+ labels->reserveCapacity(2);
+ labels->append("AM");
+ labels->append("PM");
+ return WTFMove(labels);
+}
+
+void LocaleICU::initializeDateTimeFormat()
+{
+ if (m_didCreateTimeFormat)
+ return;
+
+ // We assume ICU medium time pattern and short time pattern are compatible
+ // with LDML, because ICU specific pattern character "V" doesn't appear
+ // in both medium and short time pattern.
+ m_mediumTimeFormat = openDateFormat(UDAT_MEDIUM, UDAT_NONE);
+ m_timeFormatWithSeconds = getDateFormatPattern(m_mediumTimeFormat);
+
+ m_shortTimeFormat = openDateFormat(UDAT_SHORT, UDAT_NONE);
+ m_timeFormatWithoutSeconds = getDateFormatPattern(m_shortTimeFormat);
+
+ UDateFormat* dateTimeFormatWithSeconds = openDateFormat(UDAT_MEDIUM, UDAT_SHORT);
+ m_dateTimeFormatWithSeconds = getDateFormatPattern(dateTimeFormatWithSeconds);
+ udat_close(dateTimeFormatWithSeconds);
+
+ UDateFormat* dateTimeFormatWithoutSeconds = openDateFormat(UDAT_SHORT, UDAT_SHORT);
+ m_dateTimeFormatWithoutSeconds = getDateFormatPattern(dateTimeFormatWithoutSeconds);
+ udat_close(dateTimeFormatWithoutSeconds);
+
+ auto timeAMPMLabels = createLabelVector(m_mediumTimeFormat, UDAT_AM_PMS, UCAL_AM, 2);
+ if (!timeAMPMLabels)
+ timeAMPMLabels = createFallbackAMPMLabels();
+ m_timeAMPMLabels = *timeAMPMLabels;
+
+ m_didCreateTimeFormat = true;
+}
+
+String LocaleICU::dateFormat()
+{
+ if (!m_dateFormat.isNull())
+ return m_dateFormat;
+ if (!initializeShortDateFormat())
+ return ASCIILiteral("yyyy-MM-dd");
+ m_dateFormat = getDateFormatPattern(m_shortDateFormat);
+ return m_dateFormat;
+}
+
+static String getFormatForSkeleton(const char* locale, const UChar* skeleton, int32_t skeletonLength)
+{
+ String format = ASCIILiteral("yyyy-MM");
+ UErrorCode status = U_ZERO_ERROR;
+ UDateTimePatternGenerator* patternGenerator = udatpg_open(locale, &status);
+ if (!patternGenerator)
+ return format;
+ status = U_ZERO_ERROR;
+ int32_t length = udatpg_getBestPattern(patternGenerator, skeleton, skeletonLength, 0, 0, &status);
+ if (status == U_BUFFER_OVERFLOW_ERROR && length) {
+ Vector<UChar> buffer(length);
+ status = U_ZERO_ERROR;
+ udatpg_getBestPattern(patternGenerator, skeleton, skeletonLength, buffer.data(), length, &status);
+ if (U_SUCCESS(status))
+ format = String::adopt(WTFMove(buffer));
+ }
+ udatpg_close(patternGenerator);
+ return format;
+}
+
+String LocaleICU::monthFormat()
+{
+ if (!m_monthFormat.isNull())
+ return m_monthFormat;
+ // Gets a format for "MMMM" because Windows API always provides formats for
+ // "MMMM" in some locales.
+ const UChar skeleton[] = { 'y', 'y', 'y', 'y', 'M', 'M', 'M', 'M' };
+ m_monthFormat = getFormatForSkeleton(m_locale.data(), skeleton, WTF_ARRAY_LENGTH(skeleton));
+ return m_monthFormat;
+}
+
+String LocaleICU::shortMonthFormat()
+{
+ if (!m_shortMonthFormat.isNull())
+ return m_shortMonthFormat;
+ const UChar skeleton[] = { 'y', 'y', 'y', 'y', 'M', 'M', 'M' };
+ m_shortMonthFormat = getFormatForSkeleton(m_locale.data(), skeleton, WTF_ARRAY_LENGTH(skeleton));
+ return m_shortMonthFormat;
+}
+
+String LocaleICU::timeFormat()
+{
+ initializeDateTimeFormat();
+ return m_timeFormatWithSeconds;
+}
+
+String LocaleICU::shortTimeFormat()
+{
+ initializeDateTimeFormat();
+ return m_timeFormatWithoutSeconds;
+}
+
+String LocaleICU::dateTimeFormatWithSeconds()
+{
+ initializeDateTimeFormat();
+ return m_dateTimeFormatWithSeconds;
+}
+
+String LocaleICU::dateTimeFormatWithoutSeconds()
+{
+ initializeDateTimeFormat();
+ return m_dateTimeFormatWithoutSeconds;
+}
+
+const Vector<String>& LocaleICU::shortMonthLabels()
+{
+ if (!m_shortMonthLabels.isEmpty())
+ return m_shortMonthLabels;
+ if (initializeShortDateFormat()) {
+ if (auto labels = createLabelVector(m_shortDateFormat, UDAT_SHORT_MONTHS, UCAL_JANUARY, 12)) {
+ m_shortMonthLabels = *labels;
+ return m_shortMonthLabels;
+ }
+ }
+ m_shortMonthLabels.reserveCapacity(WTF_ARRAY_LENGTH(WTF::monthName));
+ for (unsigned i = 0; i < WTF_ARRAY_LENGTH(WTF::monthName); ++i)
+ m_shortMonthLabels.append(WTF::monthName[i]);
+ return m_shortMonthLabels;
+}
+
+const Vector<String>& LocaleICU::standAloneMonthLabels()
+{
+ if (!m_standAloneMonthLabels.isEmpty())
+ return m_standAloneMonthLabels;
+ if (initializeShortDateFormat()) {
+ if (auto labels = createLabelVector(m_shortDateFormat, UDAT_STANDALONE_MONTHS, UCAL_JANUARY, 12)) {
+ m_standAloneMonthLabels = *labels;
+ return m_standAloneMonthLabels;
+ }
+ }
+ m_standAloneMonthLabels = monthLabels();
+ return m_standAloneMonthLabels;
+}
+
+const Vector<String>& LocaleICU::shortStandAloneMonthLabels()
+{
+ if (!m_shortStandAloneMonthLabels.isEmpty())
+ return m_shortStandAloneMonthLabels;
+ if (initializeShortDateFormat()) {
+ if (auto labels = createLabelVector(m_shortDateFormat, UDAT_STANDALONE_SHORT_MONTHS, UCAL_JANUARY, 12)) {
+ m_shortStandAloneMonthLabels = *labels;
+ return m_shortStandAloneMonthLabels;
+ }
+ }
+ m_shortStandAloneMonthLabels = shortMonthLabels();
+ return m_shortStandAloneMonthLabels;
+}
+
+const Vector<String>& LocaleICU::timeAMPMLabels()
+{
+ initializeDateTimeFormat();
+ return m_timeAMPMLabels;
+}
+
+#endif
+
+} // namespace WebCore
+
diff --git a/Source/WebCore/platform/text/LocaleICU.h b/Source/WebCore/platform/text/LocaleICU.h
new file mode 100644
index 000000000..54b1fb3d0
--- /dev/null
+++ b/Source/WebCore/platform/text/LocaleICU.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright (C) 2012 Google Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef LocaleICU_h
+#define LocaleICU_h
+
+#include "DateComponents.h"
+#include "PlatformLocale.h"
+#include <unicode/udat.h>
+#include <unicode/unum.h>
+#include <wtf/Forward.h>
+#include <wtf/text/CString.h>
+#include <wtf/text/WTFString.h>
+
+namespace WebCore {
+
+// We should use this class only for LocalizedNumberICU.cpp, LocalizedDateICU.cpp,
+// and LocalizedNumberICUTest.cpp.
+class LocaleICU : public Locale {
+public:
+ explicit LocaleICU(const char*);
+ virtual ~LocaleICU();
+
+#if ENABLE(DATE_AND_TIME_INPUT_TYPES)
+ String dateFormat() override;
+ String monthFormat() override;
+ String shortMonthFormat() override;
+ String timeFormat() override;
+ String shortTimeFormat() override;
+ String dateTimeFormatWithSeconds() override;
+ String dateTimeFormatWithoutSeconds() override;
+ const Vector<String>& monthLabels() override;
+ const Vector<String>& shortMonthLabels() override;
+ const Vector<String>& standAloneMonthLabels() override;
+ const Vector<String>& shortStandAloneMonthLabels() override;
+ const Vector<String>& timeAMPMLabels() override;
+#endif
+
+private:
+#if !UCONFIG_NO_FORMATTING
+ String decimalSymbol(UNumberFormatSymbol);
+ String decimalTextAttribute(UNumberFormatTextAttribute);
+#endif
+ void initializeLocaleData() override;
+
+#if ENABLE(DATE_AND_TIME_INPUT_TYPES)
+ bool initializeShortDateFormat();
+ UDateFormat* openDateFormat(UDateFormatStyle timeStyle, UDateFormatStyle dateStyle) const;
+
+ std::unique_ptr<Vector<String>> createLabelVector(const UDateFormat*, UDateFormatSymbolType, int32_t startIndex, int32_t size);
+ void initializeDateTimeFormat();
+#endif
+
+ CString m_locale;
+
+#if !UCONFIG_NO_FORMATTING
+ UNumberFormat* m_numberFormat { nullptr };
+ bool m_didCreateDecimalFormat { false };
+#endif
+
+#if ENABLE(DATE_AND_TIME_INPUT_TYPES)
+ std::unique_ptr<Vector<String>> m_monthLabels;
+ String m_dateFormat;
+ String m_monthFormat;
+ String m_shortMonthFormat;
+ String m_timeFormatWithSeconds;
+ String m_timeFormatWithoutSeconds;
+ String m_dateTimeFormatWithSeconds;
+ String m_dateTimeFormatWithoutSeconds;
+ UDateFormat* m_shortDateFormat { nullptr };
+ UDateFormat* m_mediumTimeFormat { nullptr };
+ UDateFormat* m_shortTimeFormat { nullptr };
+ Vector<String> m_shortMonthLabels;
+ Vector<String> m_standAloneMonthLabels;
+ Vector<String> m_shortStandAloneMonthLabels;
+ Vector<String> m_timeAMPMLabels;
+ bool m_didCreateShortDateFormat { false };
+ bool m_didCreateTimeFormat { false };
+#endif
+};
+
+} // namespace WebCore
+#endif
diff --git a/Source/WebCore/platform/text/LocaleNone.cpp b/Source/WebCore/platform/text/LocaleNone.cpp
index 961833e42..3a212ab46 100644
--- a/Source/WebCore/platform/text/LocaleNone.cpp
+++ b/Source/WebCore/platform/text/LocaleNone.cpp
@@ -26,7 +26,6 @@
#include "config.h"
#include "PlatformLocale.h"
#include <wtf/DateMath.h>
-#include <wtf/PassOwnPtr.h>
namespace WebCore {
@@ -35,20 +34,20 @@ public:
virtual ~LocaleNone();
private:
- virtual void initializeLocaleData() override final;
+ void initializeLocaleData() final;
#if ENABLE(DATE_AND_TIME_INPUT_TYPES)
- virtual String dateFormat() override;
- virtual String monthFormat() override;
- virtual String shortMonthFormat() override;
- virtual String timeFormat() override;
- virtual String shortTimeFormat() override;
- virtual String dateTimeFormatWithSeconds() override;
- virtual String dateTimeFormatWithoutSeconds() override;
- virtual const Vector<String>& monthLabels() override;
- virtual const Vector<String>& shortMonthLabels() override;
- virtual const Vector<String>& standAloneMonthLabels() override;
- virtual const Vector<String>& shortStandAloneMonthLabels() override;
- virtual const Vector<String>& timeAMPMLabels() override;
+ String dateFormat() override;
+ String monthFormat() override;
+ String shortMonthFormat() override;
+ String timeFormat() override;
+ String shortTimeFormat() override;
+ String dateTimeFormatWithSeconds() override;
+ String dateTimeFormatWithoutSeconds() override;
+ const Vector<String>& monthLabels() override;
+ const Vector<String>& shortMonthLabels() override;
+ const Vector<String>& standAloneMonthLabels() override;
+ const Vector<String>& shortStandAloneMonthLabels() override;
+ const Vector<String>& timeAMPMLabels() override;
Vector<String> m_timeAMPMLabels;
Vector<String> m_shortMonthLabels;
@@ -56,9 +55,9 @@ private:
#endif
};
-PassOwnPtr<Locale> Locale::create(const AtomicString&)
+std::unique_ptr<Locale> Locale::create(const AtomicString&)
{
- return adoptPtr(new LocaleNone());
+ return std::make_unique<LocaleNone>();
}
LocaleNone::~LocaleNone()
diff --git a/Source/WebCore/platform/text/LocaleToScriptMapping.h b/Source/WebCore/platform/text/LocaleToScriptMapping.h
index d5c8c7603..73006d235 100644
--- a/Source/WebCore/platform/text/LocaleToScriptMapping.h
+++ b/Source/WebCore/platform/text/LocaleToScriptMapping.h
@@ -31,13 +31,13 @@
#ifndef LocaleToScriptMapping_h
#define LocaleToScriptMapping_h
+#include <unicode/uscript.h>
#include <wtf/Forward.h>
-#include <wtf/unicode/Unicode.h>
namespace WebCore {
UScriptCode localeToScriptCodeForFontSelection(const String&);
-UScriptCode scriptNameToCode(const String&);
+WEBCORE_EXPORT UScriptCode scriptNameToCode(const String&);
}
diff --git a/Source/WebCore/platform/text/LocaleToScriptMappingDefault.cpp b/Source/WebCore/platform/text/LocaleToScriptMappingDefault.cpp
index 85d87b59f..461bf27f8 100644
--- a/Source/WebCore/platform/text/LocaleToScriptMappingDefault.cpp
+++ b/Source/WebCore/platform/text/LocaleToScriptMappingDefault.cpp
@@ -32,7 +32,7 @@
#include "LocaleToScriptMapping.h"
#include <wtf/HashMap.h>
-#include <wtf/HashSet.h>
+#include <wtf/NeverDestroyed.h>
#include <wtf/text/StringHash.h>
namespace WebCore {
@@ -156,21 +156,22 @@ static const ScriptNameCode scriptNameCodeList[] = {
};
struct ScriptNameCodeMapHashTraits : public HashTraits<String> {
- static const int minimumTableSize = WTF::HashTableCapacityForSize<sizeof(scriptNameCodeList) / sizeof(ScriptNameCode)>::value;
+ static const int minimumTableSize = WTF::HashTableCapacityForSize<WTF_ARRAY_LENGTH(scriptNameCodeList)>::value;
};
-typedef HashMap<String, UScriptCode, DefaultHash<String>::Hash, ScriptNameCodeMapHashTraits> ScriptNameCodeMap;
+typedef HashMap<String, UScriptCode, ASCIICaseInsensitiveHash, ScriptNameCodeMapHashTraits> ScriptNameCodeMap;
UScriptCode scriptNameToCode(const String& scriptName)
{
- DEFINE_STATIC_LOCAL(ScriptNameCodeMap, scriptNameCodeMap, ());
- if (scriptNameCodeMap.isEmpty()) {
- for (size_t i = 0; i < sizeof(scriptNameCodeList) / sizeof(ScriptNameCode); ++i)
- scriptNameCodeMap.set(ASCIILiteral(scriptNameCodeList[i].name), scriptNameCodeList[i].code);
- }
+ static NeverDestroyed<ScriptNameCodeMap> scriptNameCodeMap = []() {
+ ScriptNameCodeMap map;
+ for (auto& nameAndCode : scriptNameCodeList)
+ map.add(ASCIILiteral(nameAndCode.name), nameAndCode.code);
+ return map;
+ }();
- ScriptNameCodeMap::iterator it = scriptNameCodeMap.find(scriptName.lower());
- if (it != scriptNameCodeMap.end())
+ auto it = scriptNameCodeMap.get().find(scriptName);
+ if (it != scriptNameCodeMap.get().end())
return it->value;
return USCRIPT_INVALID_CODE;
}
@@ -375,38 +376,40 @@ static const LocaleScript localeScriptList[] = {
{ "yap", USCRIPT_LATIN },
{ "yo", USCRIPT_LATIN },
{ "za", USCRIPT_LATIN },
- { "zh", USCRIPT_SIMPLIFIED_HAN },
+ { "zh", USCRIPT_HAN },
{ "zh_hk", USCRIPT_TRADITIONAL_HAN },
{ "zh_tw", USCRIPT_TRADITIONAL_HAN },
{ "zu", USCRIPT_LATIN }
};
struct LocaleScriptMapHashTraits : public HashTraits<String> {
- static const int minimumTableSize = WTF::HashTableCapacityForSize<sizeof(localeScriptList) / sizeof(LocaleScript)>::value;
+ static const int minimumTableSize = WTF::HashTableCapacityForSize<WTF_ARRAY_LENGTH(localeScriptList)>::value;
};
-typedef HashMap<String, UScriptCode, DefaultHash<String>::Hash, LocaleScriptMapHashTraits> LocaleScriptMap;
+typedef HashMap<String, UScriptCode, ASCIICaseInsensitiveHash, LocaleScriptMapHashTraits> LocaleScriptMap;
UScriptCode localeToScriptCodeForFontSelection(const String& locale)
{
- DEFINE_STATIC_LOCAL(LocaleScriptMap, localeScriptMap, ());
- if (localeScriptMap.isEmpty()) {
- for (size_t i = 0; i < sizeof(localeScriptList) / sizeof(LocaleScript); ++i)
- localeScriptMap.set(ASCIILiteral(localeScriptList[i].locale), localeScriptList[i].script);
- }
+ static NeverDestroyed<LocaleScriptMap> localeScriptMap = []() {
+ LocaleScriptMap map;
+ for (auto& localeAndScript : localeScriptList)
+ map.add(ASCIILiteral(localeAndScript.locale), localeAndScript.script);
+ return map;
+ }();
- String canonicalLocale = locale.lower().replace('-', '_');
+ String canonicalLocale = locale;
+ canonicalLocale.replace('-', '_');
while (!canonicalLocale.isEmpty()) {
- LocaleScriptMap::iterator it = localeScriptMap.find(canonicalLocale);
- if (it != localeScriptMap.end())
+ auto it = localeScriptMap.get().find(canonicalLocale);
+ if (it != localeScriptMap.get().end())
return it->value;
- size_t pos = canonicalLocale.reverseFind('_');
- if (pos == notFound)
+ auto underscorePosition = canonicalLocale.reverseFind('_');
+ if (underscorePosition == notFound)
break;
- UScriptCode code = scriptNameToCode(canonicalLocale.substring(pos + 1));
+ UScriptCode code = scriptNameToCode(canonicalLocale.substring(underscorePosition + 1));
if (code != USCRIPT_INVALID_CODE && code != USCRIPT_UNKNOWN)
return code;
- canonicalLocale = canonicalLocale.substring(0, pos);
+ canonicalLocale = canonicalLocale.substring(0, underscorePosition);
}
return USCRIPT_COMMON;
}
diff --git a/Source/WebCore/platform/text/LocaleToScriptMappingICU.cpp b/Source/WebCore/platform/text/LocaleToScriptMappingICU.cpp
new file mode 100644
index 000000000..6c60d821e
--- /dev/null
+++ b/Source/WebCore/platform/text/LocaleToScriptMappingICU.cpp
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2011 Google Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "LocaleToScriptMapping.h"
+
+#include <unicode/uloc.h>
+#include <wtf/text/CString.h>
+#include <wtf/text/WTFString.h>
+
+namespace WebCore {
+
+// Treat certain families of script codes as a single script for assigning a per-script font in Settings.
+static UScriptCode scriptCodeForFontSelection(UScriptCode scriptCode)
+{
+ switch (scriptCode) {
+ case USCRIPT_HIRAGANA:
+ case USCRIPT_KATAKANA:
+ case USCRIPT_JAPANESE:
+ return USCRIPT_KATAKANA_OR_HIRAGANA;
+ case USCRIPT_KOREAN:
+ return USCRIPT_HANGUL;
+ default:
+ return scriptCode;
+ }
+}
+
+UScriptCode localeToScriptCodeForFontSelection(const String& locale)
+{
+ if (locale.isEmpty())
+ return USCRIPT_COMMON;
+
+ char maximizedLocale[ULOC_FULLNAME_CAPACITY];
+ UErrorCode status = U_ZERO_ERROR;
+ uloc_addLikelySubtags(locale.utf8().data(), maximizedLocale, sizeof(maximizedLocale), &status);
+ if (U_FAILURE(status))
+ return USCRIPT_COMMON;
+
+ char script[ULOC_SCRIPT_CAPACITY];
+ uloc_getScript(maximizedLocale, script, sizeof(script), &status);
+ if (U_FAILURE(status))
+ return USCRIPT_COMMON;
+
+ UScriptCode scriptCode = USCRIPT_COMMON;
+ uscript_getCode(script, &scriptCode, 1, &status);
+ // Ignore error that multiple scripts could be returned, since we only want one script.
+ if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
+ return USCRIPT_COMMON;
+
+ return scriptCodeForFontSelection(scriptCode);
+}
+
+UScriptCode scriptNameToCode(const String& name)
+{
+ int32_t code = u_getPropertyValueEnum(UCHAR_SCRIPT, name.utf8().data());
+ if (code >= 0 && code < USCRIPT_CODE_LIMIT)
+ return static_cast<UScriptCode>(code);
+ return USCRIPT_INVALID_CODE;
+}
+
+} // namespace WebCore
diff --git a/Source/WebCore/platform/text/NonCJKGlyphOrientation.h b/Source/WebCore/platform/text/NonCJKGlyphOrientation.h
deleted file mode 100644
index 515c23b86..000000000
--- a/Source/WebCore/platform/text/NonCJKGlyphOrientation.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright (C) 2011 Apple Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
- * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef NonCJKGlyphOrientation_h
-#define NonCJKGlyphOrientation_h
-
-namespace WebCore {
-
-enum NonCJKGlyphOrientation { NonCJKGlyphOrientationVerticalRight, NonCJKGlyphOrientationUpright };
-
-}
-
-#endif // NonCJKGlyphOrientation_h
diff --git a/Source/WebCore/platform/text/PlatformLocale.cpp b/Source/WebCore/platform/text/PlatformLocale.cpp
index 5b7598101..de7e5f705 100644
--- a/Source/WebCore/platform/text/PlatformLocale.cpp
+++ b/Source/WebCore/platform/text/PlatformLocale.cpp
@@ -51,8 +51,8 @@ public:
private:
// DateTimeFormat::TokenHandler functions.
- virtual void visitField(DateTimeFormat::FieldType, int) override final;
- virtual void visitLiteral(const String&) override final;
+ void visitField(DateTimeFormat::FieldType, int) final;
+ void visitLiteral(const String&) final;
String zeroPadString(const String&, size_t width);
void appendNumber(int number, size_t width);
@@ -81,7 +81,7 @@ String DateTimeStringBuilder::zeroPadString(const String& string, size_t width)
StringBuilder zeroPaddedStringBuilder;
zeroPaddedStringBuilder.reserveCapacity(width);
for (size_t i = string.length(); i < width; ++i)
- zeroPaddedStringBuilder.append("0");
+ zeroPaddedStringBuilder.append('0');
zeroPaddedStringBuilder.append(string);
return zeroPaddedStringBuilder.toString();
}
@@ -311,7 +311,7 @@ String Locale::convertFromLocalizedNumber(const String& localized)
StringBuilder builder;
builder.reserveCapacity(input.length());
if (isNegative)
- builder.append("-");
+ builder.append('-');
for (unsigned i = startIndex; i < endIndex;) {
unsigned symbolIndex = matchedDecimalSymbolIndex(input, i);
if (symbolIndex >= DecimalSymbolsSize)
diff --git a/Source/WebCore/platform/text/PlatformLocale.h b/Source/WebCore/platform/text/PlatformLocale.h
index 9fc639cea..91198f271 100644
--- a/Source/WebCore/platform/text/PlatformLocale.h
+++ b/Source/WebCore/platform/text/PlatformLocale.h
@@ -28,21 +28,20 @@
#include "DateComponents.h"
#include "Language.h"
-#include <wtf/PassOwnPtr.h>
#include <wtf/text/WTFString.h>
namespace WebCore {
#if PLATFORM(IOS)
-class Font;
+class FontCascade;
#endif
class Locale {
- WTF_MAKE_NONCOPYABLE(Locale);
+ WTF_MAKE_NONCOPYABLE(Locale); WTF_MAKE_FAST_ALLOCATED;
public:
- static PassOwnPtr<Locale> create(const AtomicString& localeIdentifier);
- static PassOwnPtr<Locale> createDefault();
+ static std::unique_ptr<Locale> create(const AtomicString& localeIdentifier);
+ static std::unique_ptr<Locale> createDefault();
// Converts the specified number string to another number string localized
// for this Locale locale. The input string must conform to HTML
@@ -124,12 +123,6 @@ public:
#endif // !PLATFORM(IOS)
#endif
-#if PLATFORM(IOS)
- // FIXME: This code should be merged with Open Source in a way that is future compatible.
- // Maximum width for a formatted date string with a specified font.
- virtual float maximumWidthForDateType(DateComponents::Type, const Font&) = 0;
-#endif
-
virtual ~Locale();
protected:
@@ -156,7 +149,7 @@ private:
bool m_hasLocaleData;
};
-inline PassOwnPtr<Locale> Locale::createDefault()
+inline std::unique_ptr<Locale> Locale::createDefault()
{
return Locale::create(defaultLanguage());
}
diff --git a/Source/WebCore/platform/text/SegmentedString.cpp b/Source/WebCore/platform/text/SegmentedString.cpp
index b0dc3d3dd..ff31b2adb 100644
--- a/Source/WebCore/platform/text/SegmentedString.cpp
+++ b/Source/WebCore/platform/text/SegmentedString.cpp
@@ -1,5 +1,5 @@
/*
- Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
+ Copyright (C) 2004-2016 Apple Inc. All rights reserved.
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public
@@ -20,330 +20,204 @@
#include "config.h"
#include "SegmentedString.h"
+#include <wtf/text/StringBuilder.h>
+#include <wtf/text/TextPosition.h>
+
namespace WebCore {
-SegmentedString::SegmentedString(const SegmentedString& other)
- : m_pushedChar1(other.m_pushedChar1)
- , m_pushedChar2(other.m_pushedChar2)
- , m_currentString(other.m_currentString)
- , m_numberOfCharactersConsumedPriorToCurrentString(other.m_numberOfCharactersConsumedPriorToCurrentString)
- , m_numberOfCharactersConsumedPriorToCurrentLine(other.m_numberOfCharactersConsumedPriorToCurrentLine)
- , m_currentLine(other.m_currentLine)
- , m_substrings(other.m_substrings)
- , m_closed(other.m_closed)
- , m_empty(other.m_empty)
- , m_fastPathFlags(other.m_fastPathFlags)
- , m_advanceFunc(other.m_advanceFunc)
- , m_advanceAndUpdateLineNumberFunc(other.m_advanceAndUpdateLineNumberFunc)
+inline void SegmentedString::Substring::appendTo(StringBuilder& builder) const
{
- if (m_pushedChar2)
- m_currentChar = m_pushedChar2;
- else if (m_pushedChar1)
- m_currentChar = m_pushedChar1;
- else
- m_currentChar = m_currentString.m_length ? m_currentString.getCurrentChar() : 0;
+ builder.append(string, string.length() - length, length);
}
-const SegmentedString& SegmentedString::operator=(const SegmentedString& other)
+SegmentedString& SegmentedString::operator=(SegmentedString&& other)
{
- m_pushedChar1 = other.m_pushedChar1;
- m_pushedChar2 = other.m_pushedChar2;
- m_currentString = other.m_currentString;
- m_substrings = other.m_substrings;
- if (m_pushedChar2)
- m_currentChar = m_pushedChar2;
- else if (m_pushedChar1)
- m_currentChar = m_pushedChar1;
- else
- m_currentChar = m_currentString.m_length ? m_currentString.getCurrentChar() : 0;
+ m_currentSubstring = WTFMove(other.m_currentSubstring);
+ m_otherSubstrings = WTFMove(other.m_otherSubstrings);
- m_closed = other.m_closed;
- m_empty = other.m_empty;
- m_fastPathFlags = other.m_fastPathFlags;
- m_numberOfCharactersConsumedPriorToCurrentString = other.m_numberOfCharactersConsumedPriorToCurrentString;
+ m_isClosed = other.m_isClosed;
+
+ m_currentCharacter = other.m_currentCharacter;
+
+ m_numberOfCharactersConsumedPriorToCurrentSubstring = other.m_numberOfCharactersConsumedPriorToCurrentSubstring;
m_numberOfCharactersConsumedPriorToCurrentLine = other.m_numberOfCharactersConsumedPriorToCurrentLine;
m_currentLine = other.m_currentLine;
- m_advanceFunc = other.m_advanceFunc;
- m_advanceAndUpdateLineNumberFunc = other.m_advanceAndUpdateLineNumberFunc;
+ m_fastPathFlags = other.m_fastPathFlags;
+ m_advanceWithoutUpdatingLineNumberFunction = other.m_advanceWithoutUpdatingLineNumberFunction;
+ m_advanceAndUpdateLineNumberFunction = other.m_advanceAndUpdateLineNumberFunction;
+
+ other.clear();
return *this;
}
unsigned SegmentedString::length() const
{
- unsigned length = m_currentString.m_length;
- if (m_pushedChar1) {
- ++length;
- if (m_pushedChar2)
- ++length;
- }
- if (isComposite()) {
- Deque<SegmentedSubstring>::const_iterator it = m_substrings.begin();
- Deque<SegmentedSubstring>::const_iterator e = m_substrings.end();
- for (; it != e; ++it)
- length += it->m_length;
- }
+ unsigned length = m_currentSubstring.length;
+ for (auto& substring : m_otherSubstrings)
+ length += substring.length;
return length;
}
void SegmentedString::setExcludeLineNumbers()
{
- m_currentString.setExcludeLineNumbers();
- if (isComposite()) {
- Deque<SegmentedSubstring>::iterator it = m_substrings.begin();
- Deque<SegmentedSubstring>::iterator e = m_substrings.end();
- for (; it != e; ++it)
- it->setExcludeLineNumbers();
- }
+ if (!m_currentSubstring.doNotExcludeLineNumbers)
+ return;
+ m_currentSubstring.doNotExcludeLineNumbers = false;
+ for (auto& substring : m_otherSubstrings)
+ substring.doNotExcludeLineNumbers = false;
+ updateAdvanceFunctionPointers();
}
void SegmentedString::clear()
{
- m_pushedChar1 = 0;
- m_pushedChar2 = 0;
- m_currentChar = 0;
- m_currentString.clear();
- m_numberOfCharactersConsumedPriorToCurrentString = 0;
+ m_currentSubstring.length = 0;
+ m_otherSubstrings.clear();
+
+ m_isClosed = false;
+
+ m_currentCharacter = 0;
+
+ m_numberOfCharactersConsumedPriorToCurrentSubstring = 0;
m_numberOfCharactersConsumedPriorToCurrentLine = 0;
m_currentLine = 0;
- m_substrings.clear();
- m_closed = false;
- m_empty = true;
- m_fastPathFlags = NoFastPath;
- m_advanceFunc = &SegmentedString::advanceEmpty;
- m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceEmpty;
-}
-void SegmentedString::append(const SegmentedSubstring& s)
-{
- ASSERT(!m_closed);
- if (!s.m_length)
- return;
-
- if (!m_currentString.m_length) {
- m_numberOfCharactersConsumedPriorToCurrentString += m_currentString.numberOfCharactersConsumed();
- m_currentString = s;
- updateAdvanceFunctionPointers();
- } else
- m_substrings.append(s);
- m_empty = false;
+ updateAdvanceFunctionPointersForEmptyString();
}
-void SegmentedString::prepend(const SegmentedSubstring& s)
+inline void SegmentedString::appendSubstring(Substring&& substring)
{
- ASSERT(!escaped());
- ASSERT(!s.numberOfCharactersConsumed());
- if (!s.m_length)
+ ASSERT(!m_isClosed);
+ if (!substring.length)
return;
-
- // FIXME: We're assuming that the prepend were originally consumed by
- // this SegmentedString. We're also ASSERTing that s is a fresh
- // SegmentedSubstring. These assumptions are sufficient for our
- // current use, but we might need to handle the more elaborate
- // cases in the future.
- m_numberOfCharactersConsumedPriorToCurrentString += m_currentString.numberOfCharactersConsumed();
- m_numberOfCharactersConsumedPriorToCurrentString -= s.m_length;
- if (!m_currentString.m_length) {
- m_currentString = s;
- updateAdvanceFunctionPointers();
- } else {
- // Shift our m_currentString into our list.
- m_substrings.prepend(m_currentString);
- m_currentString = s;
+ if (m_currentSubstring.length)
+ m_otherSubstrings.append(WTFMove(substring));
+ else {
+ m_numberOfCharactersConsumedPriorToCurrentSubstring += m_currentSubstring.numberOfCharactersConsumed();
+ m_currentSubstring = WTFMove(substring);
+ m_currentCharacter = m_currentSubstring.currentCharacter();
updateAdvanceFunctionPointers();
}
- m_empty = false;
}
-void SegmentedString::close()
+void SegmentedString::pushBack(String&& string)
{
- // Closing a stream twice is likely a coding mistake.
- ASSERT(!m_closed);
- m_closed = true;
+ // We never create a substring for an empty string.
+ ASSERT(string.length());
+
+ // The new substring we will create won't have the doNotExcludeLineNumbers set appropriately.
+ // That was lost when the characters were consumed before pushing them back. But this does
+ // not matter, because clients never use this for newlines. Catch that with this assertion.
+ ASSERT(!string.contains('\n'));
+
+ // The characters in the string must be previously consumed characters from this segmented string.
+ ASSERT(string.length() <= numberOfCharactersConsumed());
+
+ m_numberOfCharactersConsumedPriorToCurrentSubstring += m_currentSubstring.numberOfCharactersConsumed();
+ if (m_currentSubstring.length)
+ m_otherSubstrings.prepend(WTFMove(m_currentSubstring));
+ m_currentSubstring = WTFMove(string);
+ m_numberOfCharactersConsumedPriorToCurrentSubstring -= m_currentSubstring.length;
+ m_currentCharacter = m_currentSubstring.currentCharacter();
+ updateAdvanceFunctionPointers();
}
-void SegmentedString::append(const SegmentedString& s)
+void SegmentedString::close()
{
- ASSERT(!m_closed);
- ASSERT(!s.escaped());
- append(s.m_currentString);
- if (s.isComposite()) {
- Deque<SegmentedSubstring>::const_iterator it = s.m_substrings.begin();
- Deque<SegmentedSubstring>::const_iterator e = s.m_substrings.end();
- for (; it != e; ++it)
- append(*it);
- }
- m_currentChar = m_pushedChar1 ? m_pushedChar1 : (m_currentString.m_length ? m_currentString.getCurrentChar() : 0);
+ ASSERT(!m_isClosed);
+ m_isClosed = true;
}
-void SegmentedString::prepend(const SegmentedString& s)
+void SegmentedString::append(const SegmentedString& string)
{
- ASSERT(!escaped());
- ASSERT(!s.escaped());
- if (s.isComposite()) {
- Deque<SegmentedSubstring>::const_reverse_iterator it = s.m_substrings.rbegin();
- Deque<SegmentedSubstring>::const_reverse_iterator e = s.m_substrings.rend();
- for (; it != e; ++it)
- prepend(*it);
- }
- prepend(s.m_currentString);
- m_currentChar = m_pushedChar1 ? m_pushedChar1 : (m_currentString.m_length ? m_currentString.getCurrentChar() : 0);
+ appendSubstring(Substring { string.m_currentSubstring });
+ for (auto& substring : string.m_otherSubstrings)
+ m_otherSubstrings.append(substring);
}
-void SegmentedString::advanceSubstring()
+void SegmentedString::append(SegmentedString&& string)
{
- if (isComposite()) {
- m_numberOfCharactersConsumedPriorToCurrentString += m_currentString.numberOfCharactersConsumed();
- m_currentString = m_substrings.takeFirst();
- // If we've previously consumed some characters of the non-current
- // string, we now account for those characters as part of the current
- // string, not as part of "prior to current string."
- m_numberOfCharactersConsumedPriorToCurrentString -= m_currentString.numberOfCharactersConsumed();
- updateAdvanceFunctionPointers();
- } else {
- m_currentString.clear();
- m_empty = true;
- m_fastPathFlags = NoFastPath;
- m_advanceFunc = &SegmentedString::advanceEmpty;
- m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceEmpty;
- }
+ appendSubstring(WTFMove(string.m_currentSubstring));
+ for (auto& substring : string.m_otherSubstrings)
+ m_otherSubstrings.append(WTFMove(substring));
}
-String SegmentedString::toString() const
+void SegmentedString::append(String&& string)
{
- StringBuilder result;
- if (m_pushedChar1) {
- result.append(m_pushedChar1);
- if (m_pushedChar2)
- result.append(m_pushedChar2);
- }
- m_currentString.appendTo(result);
- if (isComposite()) {
- Deque<SegmentedSubstring>::const_iterator it = m_substrings.begin();
- Deque<SegmentedSubstring>::const_iterator e = m_substrings.end();
- for (; it != e; ++it)
- it->appendTo(result);
- }
- return result.toString();
+ appendSubstring(WTFMove(string));
}
-void SegmentedString::advance(unsigned count, UChar* consumedCharacters)
+void SegmentedString::append(const String& string)
{
- ASSERT_WITH_SECURITY_IMPLICATION(count <= length());
- for (unsigned i = 0; i < count; ++i) {
- consumedCharacters[i] = currentChar();
- advance();
- }
+ appendSubstring(String { string });
}
-void SegmentedString::advance8()
+String SegmentedString::toString() const
{
- ASSERT(!m_pushedChar1);
- decrementAndCheckLength();
- m_currentChar = m_currentString.incrementAndGetCurrentChar8();
+ StringBuilder result;
+ m_currentSubstring.appendTo(result);
+ for (auto& substring : m_otherSubstrings)
+ substring.appendTo(result);
+ return result.toString();
}
-void SegmentedString::advance16()
+void SegmentedString::advanceWithoutUpdatingLineNumber16()
{
- ASSERT(!m_pushedChar1);
+ m_currentCharacter = *++m_currentSubstring.currentCharacter16;
decrementAndCheckLength();
- m_currentChar = m_currentString.incrementAndGetCurrentChar16();
-}
-
-void SegmentedString::advanceAndUpdateLineNumber8()
-{
- ASSERT(!m_pushedChar1);
- ASSERT(m_currentString.getCurrentChar() == m_currentChar);
- if (m_currentChar == '\n') {
- ++m_currentLine;
- m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumed() + 1;
- }
- decrementAndCheckLength();
- m_currentChar = m_currentString.incrementAndGetCurrentChar8();
}
void SegmentedString::advanceAndUpdateLineNumber16()
{
- ASSERT(!m_pushedChar1);
- ASSERT(m_currentString.getCurrentChar() == m_currentChar);
- if (m_currentChar == '\n') {
- ++m_currentLine;
- m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumed() + 1;
- }
+ ASSERT(m_currentSubstring.doNotExcludeLineNumbers);
+ processPossibleNewline();
+ m_currentCharacter = *++m_currentSubstring.currentCharacter16;
decrementAndCheckLength();
- m_currentChar = m_currentString.incrementAndGetCurrentChar16();
}
-void SegmentedString::advanceSlowCase()
+inline void SegmentedString::advancePastSingleCharacterSubstringWithoutUpdatingLineNumber()
{
- if (m_pushedChar1) {
- m_pushedChar1 = m_pushedChar2;
- m_pushedChar2 = 0;
-
- if (m_pushedChar1) {
- m_currentChar = m_pushedChar1;
- return;
- }
-
- updateAdvanceFunctionPointers();
- } else if (m_currentString.m_length) {
- if (--m_currentString.m_length == 0)
- advanceSubstring();
- } else if (!isComposite()) {
- m_currentString.clear();
- m_empty = true;
- m_fastPathFlags = NoFastPath;
- m_advanceFunc = &SegmentedString::advanceEmpty;
- m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceEmpty;
+ ASSERT(m_currentSubstring.length == 1);
+ if (m_otherSubstrings.isEmpty()) {
+ m_currentSubstring.length = 0;
+ m_currentCharacter = 0;
+ updateAdvanceFunctionPointersForEmptyString();
+ return;
}
- m_currentChar = m_currentString.m_length ? m_currentString.getCurrentChar() : 0;
+ m_numberOfCharactersConsumedPriorToCurrentSubstring += m_currentSubstring.numberOfCharactersConsumed();
+ m_currentSubstring = m_otherSubstrings.takeFirst();
+ // If we've previously consumed some characters of the non-current string, we now account for those
+ // characters as part of the current string, not as part of "prior to current string."
+ m_numberOfCharactersConsumedPriorToCurrentSubstring -= m_currentSubstring.numberOfCharactersConsumed();
+ m_currentCharacter = m_currentSubstring.currentCharacter();
+ updateAdvanceFunctionPointers();
}
-void SegmentedString::advanceAndUpdateLineNumberSlowCase()
+void SegmentedString::advancePastSingleCharacterSubstring()
{
- if (m_pushedChar1) {
- m_pushedChar1 = m_pushedChar2;
- m_pushedChar2 = 0;
-
- if (m_pushedChar1) {
- m_currentChar = m_pushedChar1;
- return;
- }
-
- updateAdvanceFunctionPointers();
- } else if (m_currentString.m_length) {
- if (m_currentString.getCurrentChar() == '\n' && m_currentString.doNotExcludeLineNumbers()) {
- ++m_currentLine;
- // Plus 1 because numberOfCharactersConsumed value hasn't incremented yet; it does with m_length decrement below.
- m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumed() + 1;
- }
- if (--m_currentString.m_length == 0)
- advanceSubstring();
- else
- m_currentString.incrementAndGetCurrentChar(); // Only need the ++
- } else if (!isComposite()) {
- m_currentString.clear();
- m_empty = true;
- m_fastPathFlags = NoFastPath;
- m_advanceFunc = &SegmentedString::advanceEmpty;
- m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceEmpty;
- }
-
- m_currentChar = m_currentString.m_length ? m_currentString.getCurrentChar() : 0;
+ ASSERT(m_currentSubstring.length == 1);
+ ASSERT(m_currentSubstring.doNotExcludeLineNumbers);
+ processPossibleNewline();
+ advancePastSingleCharacterSubstringWithoutUpdatingLineNumber();
}
void SegmentedString::advanceEmpty()
{
- ASSERT(!m_currentString.m_length && !isComposite());
- m_currentChar = 0;
+ ASSERT(!m_currentSubstring.length);
+ ASSERT(m_otherSubstrings.isEmpty());
+ ASSERT(!m_currentCharacter);
}
-void SegmentedString::updateSlowCaseFunctionPointers()
+void SegmentedString::updateAdvanceFunctionPointersForSingleCharacterSubstring()
{
+ ASSERT(m_currentSubstring.length == 1);
m_fastPathFlags = NoFastPath;
- m_advanceFunc = &SegmentedString::advanceSlowCase;
- m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceAndUpdateLineNumberSlowCase;
+ m_advanceWithoutUpdatingLineNumberFunction = &SegmentedString::advancePastSingleCharacterSubstringWithoutUpdatingLineNumber;
+ if (m_currentSubstring.doNotExcludeLineNumbers)
+ m_advanceAndUpdateLineNumberFunction = &SegmentedString::advancePastSingleCharacterSubstring;
+ else
+ m_advanceAndUpdateLineNumberFunction = &SegmentedString::advancePastSingleCharacterSubstringWithoutUpdatingLineNumber;
}
OrdinalNumber SegmentedString::currentLine() const
@@ -353,8 +227,7 @@ OrdinalNumber SegmentedString::currentLine() const
OrdinalNumber SegmentedString::currentColumn() const
{
- int zeroBasedColumn = numberOfCharactersConsumed() - m_numberOfCharactersConsumedPriorToCurrentLine;
- return OrdinalNumber::fromZeroBasedInt(zeroBasedColumn);
+ return OrdinalNumber::fromZeroBasedInt(numberOfCharactersConsumed() - m_numberOfCharactersConsumedPriorToCurrentLine);
}
void SegmentedString::setCurrentPosition(OrdinalNumber line, OrdinalNumber columnAftreProlog, int prologLength)
@@ -363,4 +236,36 @@ void SegmentedString::setCurrentPosition(OrdinalNumber line, OrdinalNumber colum
m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumed() + prologLength - columnAftreProlog.zeroBasedInt();
}
+SegmentedString::AdvancePastResult SegmentedString::advancePastSlowCase(const char* literal, bool lettersIgnoringASCIICase)
+{
+ constexpr unsigned maxLength = 10;
+ ASSERT(!strchr(literal, '\n'));
+ auto length = strlen(literal);
+ ASSERT(length <= maxLength);
+ if (length > this->length())
+ return NotEnoughCharacters;
+ UChar consumedCharacters[maxLength];
+ for (unsigned i = 0; i < length; ++i) {
+ auto character = m_currentCharacter;
+ if (characterMismatch(character, literal[i], lettersIgnoringASCIICase)) {
+ if (i)
+ pushBack(String { consumedCharacters, i });
+ return DidNotMatch;
+ }
+ advancePastNonNewline();
+ consumedCharacters[i] = character;
+ }
+ return DidMatch;
+}
+
+void SegmentedString::updateAdvanceFunctionPointersForEmptyString()
+{
+ ASSERT(!m_currentSubstring.length);
+ ASSERT(m_otherSubstrings.isEmpty());
+ ASSERT(!m_currentCharacter);
+ m_fastPathFlags = NoFastPath;
+ m_advanceWithoutUpdatingLineNumberFunction = &SegmentedString::advanceEmpty;
+ m_advanceAndUpdateLineNumberFunction = &SegmentedString::advanceEmpty;
+}
+
}
diff --git a/Source/WebCore/platform/text/SegmentedString.h b/Source/WebCore/platform/text/SegmentedString.h
index d5fe367b3..fe7f19140 100644
--- a/Source/WebCore/platform/text/SegmentedString.h
+++ b/Source/WebCore/platform/text/SegmentedString.h
@@ -1,5 +1,5 @@
/*
- Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
+ Copyright (C) 2004-2016 Apple Inc. All rights reserved.
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public
@@ -17,406 +17,307 @@
Boston, MA 02110-1301, USA.
*/
-#ifndef SegmentedString_h
-#define SegmentedString_h
+#pragma once
#include <wtf/Deque.h>
-#include <wtf/text/StringBuilder.h>
-#include <wtf/text/TextPosition.h>
#include <wtf/text/WTFString.h>
namespace WebCore {
-class SegmentedString;
+// FIXME: This should not start with "k".
+// FIXME: This is a shared tokenizer concept, not a SegmentedString concept, but this is the only common header for now.
+constexpr LChar kEndOfFileMarker = 0;
-class SegmentedSubstring {
+class SegmentedString {
public:
- SegmentedSubstring()
- : m_length(0)
- , m_doNotExcludeLineNumbers(true)
- , m_is8Bit(false)
- {
- m_data.string16Ptr = 0;
- }
+ SegmentedString() = default;
+ SegmentedString(String&&);
+ SegmentedString(const String&);
- SegmentedSubstring(const String& str)
- : m_length(str.length())
- , m_doNotExcludeLineNumbers(true)
- , m_string(str)
- {
- if (m_length) {
- if (m_string.is8Bit()) {
- m_is8Bit = true;
- m_data.string8Ptr = m_string.characters8();
- } else {
- m_is8Bit = false;
- m_data.string16Ptr = m_string.characters16();
- }
- } else
- m_is8Bit = false;
- }
+ SegmentedString(SegmentedString&&) = delete;
+ SegmentedString(const SegmentedString&) = delete;
- void clear() { m_length = 0; m_data.string16Ptr = 0; m_is8Bit = false;}
-
- bool is8Bit() { return m_is8Bit; }
-
- bool excludeLineNumbers() const { return !m_doNotExcludeLineNumbers; }
- bool doNotExcludeLineNumbers() const { return m_doNotExcludeLineNumbers; }
+ SegmentedString& operator=(SegmentedString&&);
+ SegmentedString& operator=(const SegmentedString&) = default;
- void setExcludeLineNumbers() { m_doNotExcludeLineNumbers = false; }
+ void clear();
+ void close();
- int numberOfCharactersConsumed() const { return m_string.length() - m_length; }
+ void append(SegmentedString&&);
+ void append(const SegmentedString&);
- void appendTo(StringBuilder& builder) const
- {
- int offset = m_string.length() - m_length;
+ void append(String&&);
+ void append(const String&);
- if (!offset) {
- if (m_length)
- builder.append(m_string);
- } else
- builder.append(m_string.substring(offset, m_length));
- }
+ void pushBack(String&&);
- UChar getCurrentChar8()
- {
- return *m_data.string8Ptr;
- }
+ void setExcludeLineNumbers();
- UChar getCurrentChar16()
- {
- return m_data.string16Ptr ? *m_data.string16Ptr : 0;
- }
+ bool isEmpty() const { return !m_currentSubstring.length; }
+ unsigned length() const;
- UChar incrementAndGetCurrentChar8()
- {
- ASSERT(m_data.string8Ptr);
- return *++m_data.string8Ptr;
- }
+ bool isClosed() const { return m_isClosed; }
- UChar incrementAndGetCurrentChar16()
- {
- ASSERT(m_data.string16Ptr);
- return *++m_data.string16Ptr;
- }
+ void advance();
+ void advancePastNonNewline(); // Faster than calling advance when we know the current character is not a newline.
+ void advancePastNewline(); // Faster than calling advance when we know the current character is a newline.
- String currentSubString(unsigned length)
- {
- int offset = m_string.length() - m_length;
- return m_string.substring(offset, length);
- }
+ enum AdvancePastResult { DidNotMatch, DidMatch, NotEnoughCharacters };
+ template<unsigned length> AdvancePastResult advancePast(const char (&literal)[length]) { return advancePast<length, false>(literal); }
+ template<unsigned length> AdvancePastResult advancePastLettersIgnoringASCIICase(const char (&literal)[length]) { return advancePast<length, true>(literal); }
- ALWAYS_INLINE UChar getCurrentChar()
- {
- ASSERT(m_length);
- if (is8Bit())
- return getCurrentChar8();
- return getCurrentChar16();
- }
-
- ALWAYS_INLINE UChar incrementAndGetCurrentChar()
- {
- ASSERT(m_length);
- if (is8Bit())
- return incrementAndGetCurrentChar8();
- return incrementAndGetCurrentChar16();
- }
+ unsigned numberOfCharactersConsumed() const;
-public:
- union {
- const LChar* string8Ptr;
- const UChar* string16Ptr;
- } m_data;
- int m_length;
-
-private:
- bool m_doNotExcludeLineNumbers;
- bool m_is8Bit;
- String m_string;
-};
+ String toString() const;
-class SegmentedString {
-public:
- SegmentedString()
- : m_pushedChar1(0)
- , m_pushedChar2(0)
- , m_currentChar(0)
- , m_numberOfCharactersConsumedPriorToCurrentString(0)
- , m_numberOfCharactersConsumedPriorToCurrentLine(0)
- , m_currentLine(0)
- , m_closed(false)
- , m_empty(true)
- , m_fastPathFlags(NoFastPath)
- , m_advanceFunc(&SegmentedString::advanceEmpty)
- , m_advanceAndUpdateLineNumberFunc(&SegmentedString::advanceEmpty)
- {
- }
+ UChar currentCharacter() const { return m_currentCharacter; }
- SegmentedString(const String& str)
- : m_pushedChar1(0)
- , m_pushedChar2(0)
- , m_currentString(str)
- , m_currentChar(0)
- , m_numberOfCharactersConsumedPriorToCurrentString(0)
- , m_numberOfCharactersConsumedPriorToCurrentLine(0)
- , m_currentLine(0)
- , m_closed(false)
- , m_empty(!str.length())
- , m_fastPathFlags(NoFastPath)
- {
- if (m_currentString.m_length)
- m_currentChar = m_currentString.getCurrentChar();
- updateAdvanceFunctionPointers();
- }
+ OrdinalNumber currentColumn() const;
+ OrdinalNumber currentLine() const;
- SegmentedString(const SegmentedString&);
+ // Sets value of line/column variables. Column is specified indirectly by a parameter columnAfterProlog
+ // which is a value of column that we should get after a prolog (first prologLength characters) has been consumed.
+ void setCurrentPosition(OrdinalNumber line, OrdinalNumber columnAfterProlog, int prologLength);
- const SegmentedString& operator=(const SegmentedString&);
+private:
+ struct Substring {
+ Substring() = default;
+ Substring(String&&);
+
+ UChar currentCharacter() const;
+ UChar currentCharacterPreIncrement();
+
+ unsigned numberOfCharactersConsumed() const;
+ void appendTo(StringBuilder&) const;
+
+ String string;
+ unsigned length { 0 };
+ bool is8Bit;
+ union {
+ const LChar* currentCharacter8;
+ const UChar* currentCharacter16;
+ };
+ bool doNotExcludeLineNumbers { true };
+ };
- void clear();
- void close();
+ enum FastPathFlags {
+ NoFastPath = 0,
+ Use8BitAdvanceAndUpdateLineNumbers = 1 << 0,
+ Use8BitAdvance = 1 << 1,
+ };
- void append(const SegmentedString&);
- void prepend(const SegmentedString&);
+ void appendSubstring(Substring&&);
- bool excludeLineNumbers() const { return m_currentString.excludeLineNumbers(); }
- void setExcludeLineNumbers();
+ void processPossibleNewline();
+ void startNewLine();
- void push(UChar c)
- {
- if (!m_pushedChar1) {
- m_pushedChar1 = c;
- m_currentChar = m_pushedChar1 ? m_pushedChar1 : m_currentString.getCurrentChar();
- updateSlowCaseFunctionPointers();
- } else {
- ASSERT(!m_pushedChar2);
- m_pushedChar2 = c;
- }
- }
+ void advanceWithoutUpdatingLineNumber();
+ void advanceWithoutUpdatingLineNumber16();
+ void advanceAndUpdateLineNumber16();
+ void advancePastSingleCharacterSubstringWithoutUpdatingLineNumber();
+ void advancePastSingleCharacterSubstring();
+ void advanceEmpty();
- bool isEmpty() const { return m_empty; }
- unsigned length() const;
+ void updateAdvanceFunctionPointers();
+ void updateAdvanceFunctionPointersForEmptyString();
+ void updateAdvanceFunctionPointersForSingleCharacterSubstring();
- bool isClosed() const { return m_closed; }
+ void decrementAndCheckLength();
- enum LookAheadResult {
- DidNotMatch,
- DidMatch,
- NotEnoughCharacters,
- };
+ template<typename CharacterType> static bool characterMismatch(CharacterType, char, bool lettersIgnoringASCIICase);
+ template<unsigned length, bool lettersIgnoringASCIICase> AdvancePastResult advancePast(const char (&literal)[length]);
+ AdvancePastResult advancePastSlowCase(const char* literal, bool lettersIgnoringASCIICase);
- LookAheadResult lookAhead(const String& string) { return lookAheadInline(string, true); }
- LookAheadResult lookAheadIgnoringCase(const String& string) { return lookAheadInline(string, false); }
+ Substring m_currentSubstring;
+ Deque<Substring> m_otherSubstrings;
- void advance()
- {
- if (m_fastPathFlags & Use8BitAdvance) {
- ASSERT(!m_pushedChar1);
- bool haveOneCharacterLeft = (--m_currentString.m_length == 1);
- m_currentChar = m_currentString.incrementAndGetCurrentChar8();
+ bool m_isClosed { false };
- if (!haveOneCharacterLeft)
- return;
+ UChar m_currentCharacter { 0 };
- updateSlowCaseFunctionPointers();
+ unsigned m_numberOfCharactersConsumedPriorToCurrentSubstring { 0 };
+ unsigned m_numberOfCharactersConsumedPriorToCurrentLine { 0 };
+ int m_currentLine { 0 };
- return;
- }
+ unsigned char m_fastPathFlags { NoFastPath };
+ void (SegmentedString::*m_advanceWithoutUpdatingLineNumberFunction)() { &SegmentedString::advanceEmpty };
+ void (SegmentedString::*m_advanceAndUpdateLineNumberFunction)() { &SegmentedString::advanceEmpty };
+};
- (this->*m_advanceFunc)();
+inline SegmentedString::Substring::Substring(String&& passedString)
+ : string(WTFMove(passedString))
+ , length(string.length())
+{
+ if (length) {
+ is8Bit = string.impl()->is8Bit();
+ if (is8Bit)
+ currentCharacter8 = string.impl()->characters8();
+ else
+ currentCharacter16 = string.impl()->characters16();
}
+}
- inline void advanceAndUpdateLineNumber()
- {
- if (m_fastPathFlags & Use8BitAdvance) {
- ASSERT(!m_pushedChar1);
-
- bool haveNewLine = (m_currentChar == '\n') & !!(m_fastPathFlags & Use8BitAdvanceAndUpdateLineNumbers);
- bool haveOneCharacterLeft = (--m_currentString.m_length == 1);
+inline unsigned SegmentedString::Substring::numberOfCharactersConsumed() const
+{
+ return string.length() - length;
+}
- m_currentChar = m_currentString.incrementAndGetCurrentChar8();
+ALWAYS_INLINE UChar SegmentedString::Substring::currentCharacter() const
+{
+ ASSERT(length);
+ return is8Bit ? *currentCharacter8 : *currentCharacter16;
+}
- if (!(haveNewLine | haveOneCharacterLeft))
- return;
+ALWAYS_INLINE UChar SegmentedString::Substring::currentCharacterPreIncrement()
+{
+ ASSERT(length);
+ return is8Bit ? *++currentCharacter8 : *++currentCharacter16;
+}
- if (haveNewLine) {
- ++m_currentLine;
- m_numberOfCharactersConsumedPriorToCurrentLine = m_numberOfCharactersConsumedPriorToCurrentString + m_currentString.numberOfCharactersConsumed();
- }
+inline SegmentedString::SegmentedString(String&& string)
+ : m_currentSubstring(WTFMove(string))
+{
+ if (m_currentSubstring.length) {
+ m_currentCharacter = m_currentSubstring.currentCharacter();
+ updateAdvanceFunctionPointers();
+ }
+}
- if (haveOneCharacterLeft)
- updateSlowCaseFunctionPointers();
+inline SegmentedString::SegmentedString(const String& string)
+ : SegmentedString(String { string })
+{
+}
- return;
- }
+ALWAYS_INLINE void SegmentedString::decrementAndCheckLength()
+{
+ ASSERT(m_currentSubstring.length > 1);
+ if (UNLIKELY(--m_currentSubstring.length == 1))
+ updateAdvanceFunctionPointersForSingleCharacterSubstring();
+}
- (this->*m_advanceAndUpdateLineNumberFunc)();
+ALWAYS_INLINE void SegmentedString::advanceWithoutUpdatingLineNumber()
+{
+ if (LIKELY(m_fastPathFlags & Use8BitAdvance)) {
+ m_currentCharacter = *++m_currentSubstring.currentCharacter8;
+ decrementAndCheckLength();
+ return;
}
- void advanceAndASSERT(UChar expectedCharacter)
- {
- ASSERT_UNUSED(expectedCharacter, currentChar() == expectedCharacter);
- advance();
- }
+ (this->*m_advanceWithoutUpdatingLineNumberFunction)();
+}
- void advanceAndASSERTIgnoringCase(UChar expectedCharacter)
- {
- ASSERT_UNUSED(expectedCharacter, u_foldCase(currentChar(), U_FOLD_CASE_DEFAULT) == u_foldCase(expectedCharacter, U_FOLD_CASE_DEFAULT));
- advance();
- }
+inline void SegmentedString::startNewLine()
+{
+ ++m_currentLine;
+ m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumed();
+}
- void advancePastNonNewline()
- {
- ASSERT(currentChar() != '\n');
- advance();
- }
+inline void SegmentedString::processPossibleNewline()
+{
+ if (m_currentCharacter == '\n')
+ startNewLine();
+}
- void advancePastNewlineAndUpdateLineNumber()
- {
- ASSERT(currentChar() == '\n');
- if (!m_pushedChar1 && m_currentString.m_length > 1) {
- int newLineFlag = m_currentString.doNotExcludeLineNumbers();
- m_currentLine += newLineFlag;
- if (newLineFlag)
- m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumed() + 1;
- decrementAndCheckLength();
- m_currentChar = m_currentString.incrementAndGetCurrentChar();
+inline void SegmentedString::advance()
+{
+ if (LIKELY(m_fastPathFlags & Use8BitAdvance)) {
+ ASSERT(m_currentSubstring.length > 1);
+ bool lastCharacterWasNewline = m_currentCharacter == '\n';
+ m_currentCharacter = *++m_currentSubstring.currentCharacter8;
+ bool haveOneCharacterLeft = --m_currentSubstring.length == 1;
+ if (LIKELY(!(lastCharacterWasNewline | haveOneCharacterLeft)))
return;
- }
- advanceAndUpdateLineNumberSlowCase();
+ if (lastCharacterWasNewline & !!(m_fastPathFlags & Use8BitAdvanceAndUpdateLineNumbers))
+ startNewLine();
+ if (haveOneCharacterLeft)
+ updateAdvanceFunctionPointersForSingleCharacterSubstring();
+ return;
}
- // Writes the consumed characters into consumedCharacters, which must
- // have space for at least |count| characters.
- void advance(unsigned count, UChar* consumedCharacters);
+ (this->*m_advanceAndUpdateLineNumberFunction)();
+}
- bool escaped() const { return m_pushedChar1; }
+ALWAYS_INLINE void SegmentedString::advancePastNonNewline()
+{
+ ASSERT(m_currentCharacter != '\n');
+ advanceWithoutUpdatingLineNumber();
+}
- int numberOfCharactersConsumed() const
- {
- int numberOfPushedCharacters = 0;
- if (m_pushedChar1) {
- ++numberOfPushedCharacters;
- if (m_pushedChar2)
- ++numberOfPushedCharacters;
- }
- return m_numberOfCharactersConsumedPriorToCurrentString + m_currentString.numberOfCharactersConsumed() - numberOfPushedCharacters;
+inline void SegmentedString::advancePastNewline()
+{
+ ASSERT(m_currentCharacter == '\n');
+ if (m_currentSubstring.length > 1) {
+ if (m_currentSubstring.doNotExcludeLineNumbers)
+ startNewLine();
+ m_currentCharacter = m_currentSubstring.currentCharacterPreIncrement();
+ decrementAndCheckLength();
+ return;
}
- String toString() const;
-
- UChar currentChar() const { return m_currentChar; }
-
- // The method is moderately slow, comparing to currentLine method.
- OrdinalNumber currentColumn() const;
- OrdinalNumber currentLine() const;
- // Sets value of line/column variables. Column is specified indirectly by a parameter columnAftreProlog
- // which is a value of column that we should get after a prolog (first prologLength characters) has been consumed.
- void setCurrentPosition(OrdinalNumber line, OrdinalNumber columnAftreProlog, int prologLength);
-
-private:
- enum FastPathFlags {
- NoFastPath = 0,
- Use8BitAdvanceAndUpdateLineNumbers = 1 << 0,
- Use8BitAdvance = 1 << 1,
- };
+ (this->*m_advanceAndUpdateLineNumberFunction)();
+}
- void append(const SegmentedSubstring&);
- void prepend(const SegmentedSubstring&);
+inline unsigned SegmentedString::numberOfCharactersConsumed() const
+{
+ return m_numberOfCharactersConsumedPriorToCurrentSubstring + m_currentSubstring.numberOfCharactersConsumed();
+}
- void advance8();
- void advance16();
- void advanceAndUpdateLineNumber8();
- void advanceAndUpdateLineNumber16();
- void advanceSlowCase();
- void advanceAndUpdateLineNumberSlowCase();
- void advanceEmpty();
- void advanceSubstring();
-
- void updateSlowCaseFunctionPointers();
-
- void decrementAndCheckLength()
- {
- ASSERT(m_currentString.m_length > 1);
- if (--m_currentString.m_length == 1)
- updateSlowCaseFunctionPointers();
- }
+template<typename CharacterType> ALWAYS_INLINE bool SegmentedString::characterMismatch(CharacterType a, char b, bool lettersIgnoringASCIICase)
+{
+ return lettersIgnoringASCIICase ? !isASCIIAlphaCaselessEqual(a, b) : a != b;
+}
- void updateAdvanceFunctionPointers()
- {
- if ((m_currentString.m_length > 1) && !m_pushedChar1) {
- if (m_currentString.is8Bit()) {
- m_advanceFunc = &SegmentedString::advance8;
- m_fastPathFlags = Use8BitAdvance;
- if (m_currentString.doNotExcludeLineNumbers()) {
- m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceAndUpdateLineNumber8;
- m_fastPathFlags |= Use8BitAdvanceAndUpdateLineNumbers;
- } else
- m_advanceAndUpdateLineNumberFunc = &SegmentedString::advance8;
- return;
+template<unsigned lengthIncludingTerminator, bool lettersIgnoringASCIICase> SegmentedString::AdvancePastResult SegmentedString::advancePast(const char (&literal)[lengthIncludingTerminator])
+{
+ constexpr unsigned length = lengthIncludingTerminator - 1;
+ ASSERT(!literal[length]);
+ ASSERT(!strchr(literal, '\n'));
+ if (length + 1 < m_currentSubstring.length) {
+ if (m_currentSubstring.is8Bit) {
+ for (unsigned i = 0; i < length; ++i) {
+ if (characterMismatch(m_currentSubstring.currentCharacter8[i], literal[i], lettersIgnoringASCIICase))
+ return DidNotMatch;
}
-
- m_advanceFunc = &SegmentedString::advance16;
- m_fastPathFlags = NoFastPath;
- if (m_currentString.doNotExcludeLineNumbers())
- m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceAndUpdateLineNumber16;
- else
- m_advanceAndUpdateLineNumberFunc = &SegmentedString::advance16;
- return;
- }
-
- if (!m_currentString.m_length && !isComposite()) {
- m_advanceFunc = &SegmentedString::advanceEmpty;
- m_fastPathFlags = NoFastPath;
- m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceEmpty;
+ m_currentSubstring.currentCharacter8 += length;
+ m_currentCharacter = *m_currentSubstring.currentCharacter8;
+ } else {
+ for (unsigned i = 0; i < length; ++i) {
+ if (characterMismatch(m_currentSubstring.currentCharacter16[i], literal[i], lettersIgnoringASCIICase))
+ return DidNotMatch;
+ }
+ m_currentSubstring.currentCharacter16 += length;
+ m_currentCharacter = *m_currentSubstring.currentCharacter16;
}
-
- updateSlowCaseFunctionPointers();
+ m_currentSubstring.length -= length;
+ return DidMatch;
}
+ return advancePastSlowCase(literal, lettersIgnoringASCIICase);
+}
- inline LookAheadResult lookAheadInline(const String& string, bool caseSensitive)
- {
- if (!m_pushedChar1 && string.length() <= static_cast<unsigned>(m_currentString.m_length)) {
- String currentSubstring = m_currentString.currentSubString(string.length());
- if (currentSubstring.startsWith(string, caseSensitive))
- return DidMatch;
- return DidNotMatch;
+inline void SegmentedString::updateAdvanceFunctionPointers()
+{
+ if (m_currentSubstring.length > 1) {
+ if (m_currentSubstring.is8Bit) {
+ m_fastPathFlags = Use8BitAdvance;
+ if (m_currentSubstring.doNotExcludeLineNumbers)
+ m_fastPathFlags |= Use8BitAdvanceAndUpdateLineNumbers;
+ return;
}
- return lookAheadSlowCase(string, caseSensitive);
- }
-
- LookAheadResult lookAheadSlowCase(const String& string, bool caseSensitive)
- {
- unsigned count = string.length();
- if (count > length())
- return NotEnoughCharacters;
- UChar* consumedCharacters;
- String consumedString = String::createUninitialized(count, consumedCharacters);
- advance(count, consumedCharacters);
- LookAheadResult result = DidNotMatch;
- if (consumedString.startsWith(string, caseSensitive))
- result = DidMatch;
- prepend(SegmentedString(consumedString));
- return result;
+ m_fastPathFlags = NoFastPath;
+ m_advanceWithoutUpdatingLineNumberFunction = &SegmentedString::advanceWithoutUpdatingLineNumber16;
+ if (m_currentSubstring.doNotExcludeLineNumbers)
+ m_advanceAndUpdateLineNumberFunction = &SegmentedString::advanceAndUpdateLineNumber16;
+ else
+ m_advanceAndUpdateLineNumberFunction = &SegmentedString::advanceWithoutUpdatingLineNumber16;
+ return;
}
- bool isComposite() const { return !m_substrings.isEmpty(); }
-
- UChar m_pushedChar1;
- UChar m_pushedChar2;
- SegmentedSubstring m_currentString;
- UChar m_currentChar;
- int m_numberOfCharactersConsumedPriorToCurrentString;
- int m_numberOfCharactersConsumedPriorToCurrentLine;
- int m_currentLine;
- Deque<SegmentedSubstring> m_substrings;
- bool m_closed;
- bool m_empty;
- unsigned char m_fastPathFlags;
- void (SegmentedString::*m_advanceFunc)();
- void (SegmentedString::*m_advanceAndUpdateLineNumberFunc)();
-};
+ if (!m_currentSubstring.length) {
+ updateAdvanceFunctionPointersForEmptyString();
+ return;
+ }
+ updateAdvanceFunctionPointersForSingleCharacterSubstring();
}
-#endif
+}
diff --git a/Source/WebCore/platform/text/StringWithDirection.h b/Source/WebCore/platform/text/StringWithDirection.h
index 3833b1cdc..6dc87d765 100644
--- a/Source/WebCore/platform/text/StringWithDirection.h
+++ b/Source/WebCore/platform/text/StringWithDirection.h
@@ -1,5 +1,6 @@
/*
* Copyright (C) 2011 Google Inc. All rights reserved.
+ * Copyright (C) 2017 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
@@ -28,10 +29,9 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#ifndef StringWithDirection_h
-#define StringWithDirection_h
+#pragma once
-#include "TextDirection.h"
+#include "WritingMode.h"
#include <wtf/text/WTFString.h>
namespace WebCore {
@@ -44,36 +44,30 @@ namespace WebCore {
// Note that is explicitly *not* the direction of the string as learned
// from the characters of the string; it's extra metadata we have external
// to the string.
-class StringWithDirection {
-public:
- StringWithDirection()
- : m_direction(LTR)
- {
- }
- StringWithDirection(const String& string, TextDirection dir)
- : m_string(string)
- , m_direction(dir)
- {
- }
-
- const String& string() const { return m_string; }
- TextDirection direction() const { return m_direction; }
-
- bool isEmpty() const { return m_string.isEmpty(); }
- bool isNull() const { return m_string.isNull(); }
+struct StringWithDirection {
+ StringWithDirection() = default;
+ StringWithDirection(const String& string, TextDirection direction) : string { string }, direction { direction } { }
+ StringWithDirection(String&& string, TextDirection direction) : string { WTFMove(string) }, direction { direction } { }
+ String string;
+ TextDirection direction { LTR };
+};
- bool operator==(const StringWithDirection& other) const
- {
- return other.m_string == m_string && other.m_direction == m_direction;
- }
- bool operator!=(const StringWithDirection& other) const { return !((*this) == other); }
+inline bool operator==(const StringWithDirection& a, const StringWithDirection& b)
+{
+ return a.string == b.string && a.direction == b.direction;
+}
-private:
- String m_string;
- TextDirection m_direction;
-};
+inline bool operator!=(const StringWithDirection& a, const StringWithDirection& b)
+{
+ return !(a == b);
+}
+inline StringWithDirection truncateFromEnd(const StringWithDirection& string, unsigned maxLength)
+{
+ if (string.direction == LTR)
+ return StringWithDirection(string.string.left(maxLength), LTR);
+ return StringWithDirection(string.string.right(maxLength), RTL);
}
-#endif // StringWithDirection_h
+}
diff --git a/Source/WebCore/platform/text/SuffixTree.h b/Source/WebCore/platform/text/SuffixTree.h
index 1f05c363e..f51f2c72f 100644
--- a/Source/WebCore/platform/text/SuffixTree.h
+++ b/Source/WebCore/platform/text/SuffixTree.h
@@ -13,7 +13,7 @@
* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
diff --git a/Source/WebCore/platform/text/TextDirection.h b/Source/WebCore/platform/text/TextAllInOne.cpp
index 9a074c85d..2c1d5b4e6 100644
--- a/Source/WebCore/platform/text/TextDirection.h
+++ b/Source/WebCore/platform/text/TextAllInOne.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2003, 2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (C) 2012 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -10,28 +10,30 @@
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#ifndef TextDirection_h
-#define TextDirection_h
+// This all-in-one cpp file cuts down on template bloat to allow us to build our Windows release build.
-namespace WebCore {
-
-enum TextDirection { RTL, LTR };
-
-inline bool isLeftToRightDirection(TextDirection direction) { return direction == LTR; }
-
-}
-
-#endif
+#include "TextBoundaries.cpp"
+#include "TextCodec.cpp"
+#include "TextCodecICU.cpp"
+#include "TextCodecLatin1.cpp"
+#include "TextCodecReplacement.cpp"
+#include "TextCodecUTF16.cpp"
+#include "TextCodecUTF8.cpp"
+#include "TextCodecUserDefined.cpp"
+#include "TextEncoding.cpp"
+#include "TextEncodingDetectorICU.cpp"
+#include "TextEncodingRegistry.cpp"
+#include "TextStream.cpp"
diff --git a/Source/WebCore/platform/text/TextBoundaries.cpp b/Source/WebCore/platform/text/TextBoundaries.cpp
index f5676e2df..e780ff682 100644
--- a/Source/WebCore/platform/text/TextBoundaries.cpp
+++ b/Source/WebCore/platform/text/TextBoundaries.cpp
@@ -11,10 +11,10 @@
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
@@ -27,84 +27,86 @@
#include "config.h"
#include "TextBoundaries.h"
-#include "TextBreakIterator.h"
+#include <unicode/ubrk.h>
#include <wtf/text/StringImpl.h>
-#include <wtf/unicode/Unicode.h>
+#include <wtf/text/TextBreakIterator.h>
namespace WebCore {
-int endOfFirstWordBoundaryContext(const UChar* characters, int length)
+unsigned endOfFirstWordBoundaryContext(StringView text)
{
- for (int i = 0; i < length; ) {
- int first = i;
+ unsigned length = text.length();
+ for (unsigned i = 0; i < length; ) {
+ unsigned first = i;
UChar32 ch;
- U16_NEXT(characters, i, length, ch);
+ U16_NEXT(text, i, length, ch);
if (!requiresContextForWordBoundary(ch))
return first;
}
return length;
}
-int startOfLastWordBoundaryContext(const UChar* characters, int length)
+unsigned startOfLastWordBoundaryContext(StringView text)
{
- for (int i = length; i > 0; ) {
- int last = i;
+ unsigned length = text.length();
+ for (unsigned i = length; i > 0; ) {
+ unsigned last = i;
UChar32 ch;
- U16_PREV(characters, 0, i, ch);
+ U16_PREV(text, 0, i, ch);
if (!requiresContextForWordBoundary(ch))
return last;
}
return 0;
}
-#if !PLATFORM(MAC)
+#if !PLATFORM(COCOA)
-int findNextWordFromIndex(const UChar* chars, int len, int position, bool forward)
+int findNextWordFromIndex(StringView text, int position, bool forward)
{
- TextBreakIterator* it = wordBreakIterator(StringView(chars, len));
+ UBreakIterator* it = wordBreakIterator(text);
if (forward) {
- position = textBreakFollowing(it, position);
- while (position != TextBreakDone) {
+ position = ubrk_following(it, position);
+ while (position != UBRK_DONE) {
// We stop searching when the character preceeding the break is alphanumeric.
- if (position < len && u_isalnum(chars[position - 1]))
+ if (static_cast<unsigned>(position) < text.length() && u_isalnum(text[position - 1]))
return position;
- position = textBreakFollowing(it, position);
+ position = ubrk_following(it, position);
}
- return len;
+ return text.length();
} else {
- position = textBreakPreceding(it, position);
- while (position != TextBreakDone) {
+ position = ubrk_preceding(it, position);
+ while (position != UBRK_DONE) {
// We stop searching when the character following the break is alphanumeric.
- if (position > 0 && u_isalnum(chars[position]))
+ if (position && u_isalnum(text[position]))
return position;
- position = textBreakPreceding(it, position);
+ position = ubrk_preceding(it, position);
}
return 0;
}
}
-void findWordBoundary(const UChar* chars, int len, int position, int* start, int* end)
+void findWordBoundary(StringView text, int position, int* start, int* end)
{
- TextBreakIterator* it = wordBreakIterator(StringView(chars, len));
- *end = textBreakFollowing(it, position);
+ UBreakIterator* it = wordBreakIterator(text);
+ *end = ubrk_following(it, position);
if (*end < 0)
- *end = textBreakLast(it);
- *start = textBreakPrevious(it);
+ *end = ubrk_last(it);
+ *start = ubrk_previous(it);
}
-void findEndWordBoundary(const UChar* chars, int len, int position, int* end)
+void findEndWordBoundary(StringView text, int position, int* end)
{
- TextBreakIterator* it = wordBreakIterator(StringView(chars, len));
- *end = textBreakFollowing(it, position);
+ UBreakIterator* it = wordBreakIterator(text);
+ *end = ubrk_following(it, position);
if (*end < 0)
- *end = textBreakLast(it);
+ *end = ubrk_last(it);
}
-#endif // !PLATFORM(MAC)
+#endif // !PLATFORM(COCOA)
} // namespace WebCore
diff --git a/Source/WebCore/platform/text/TextBoundaries.h b/Source/WebCore/platform/text/TextBoundaries.h
index 6c83b3bbf..9a873af4d 100644
--- a/Source/WebCore/platform/text/TextBoundaries.h
+++ b/Source/WebCore/platform/text/TextBoundaries.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2004, 2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (C) 2004, 2006 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -10,10 +10,10 @@
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
@@ -26,7 +26,8 @@
#ifndef TextBoundaries_h
#define TextBoundaries_h
-#include <wtf/unicode/Unicode.h>
+#include <unicode/uchar.h>
+#include <wtf/Forward.h>
namespace WebCore {
@@ -41,12 +42,12 @@ namespace WebCore {
return lineBreak == U_LB_COMPLEX_CONTEXT || lineBreak == WK_U_LB_CONDITIONAL_JAPANESE_STARTER || lineBreak == U_LB_IDEOGRAPHIC;
}
- int endOfFirstWordBoundaryContext(const UChar* characters, int length);
- int startOfLastWordBoundaryContext(const UChar* characters, int length);
+ unsigned endOfFirstWordBoundaryContext(StringView);
+ unsigned startOfLastWordBoundaryContext(StringView);
- void findWordBoundary(const UChar*, int len, int position, int* start, int* end);
- void findEndWordBoundary(const UChar*, int len, int position, int* end);
- int findNextWordFromIndex(const UChar*, int len, int position, bool forward);
+ void findWordBoundary(StringView, int position, int* start, int* end);
+ void findEndWordBoundary(StringView, int position, int* end);
+ int findNextWordFromIndex(StringView, int position, bool forward);
}
diff --git a/Source/WebCore/platform/text/TextBreakIterator.cpp b/Source/WebCore/platform/text/TextBreakIterator.cpp
deleted file mode 100644
index 9d94066b4..000000000
--- a/Source/WebCore/platform/text/TextBreakIterator.cpp
+++ /dev/null
@@ -1,400 +0,0 @@
-/*
- * (C) 1999 Lars Knoll (knoll@kde.org)
- * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
- * Copyright (C) 2007-2009 Torch Mobile, Inc.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public License
- * along with this library; see the file COPYING.LIB. If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- * Boston, MA 02110-1301, USA.
- */
-
-#include "config.h"
-#include "TextBreakIterator.h"
-
-#include "LineBreakIteratorPoolICU.h"
-#include "UTextProviderLatin1.h"
-#include "UTextProviderUTF16.h"
-#include <mutex>
-#include <wtf/Atomics.h>
-#include <wtf/text/StringView.h>
-#include <wtf/text/WTFString.h>
-
-namespace WebCore {
-
-// Iterator initialization
-
-static TextBreakIterator* initializeIterator(UBreakIteratorType type, const char* locale = currentTextBreakLocaleID())
-{
- UErrorCode openStatus = U_ZERO_ERROR;
- TextBreakIterator* iterator = reinterpret_cast<TextBreakIterator*>(ubrk_open(type, locale, 0, 0, &openStatus));
- ASSERT_WITH_MESSAGE(U_SUCCESS(openStatus), "ICU could not open a break iterator: %s (%d)", u_errorName(openStatus), openStatus);
- return iterator;
-}
-
-#if !PLATFORM(IOS)
-static TextBreakIterator* initializeIteratorWithRules(const char* breakRules)
-{
- UParseError parseStatus;
- UErrorCode openStatus = U_ZERO_ERROR;
- String rules(breakRules);
- TextBreakIterator* iterator = reinterpret_cast<TextBreakIterator*>(ubrk_openRules(rules.deprecatedCharacters(), rules.length(), 0, 0, &parseStatus, &openStatus));
- ASSERT_WITH_MESSAGE(U_SUCCESS(openStatus), "ICU could not open a break iterator: %s (%d)", u_errorName(openStatus), openStatus);
- return iterator;
-}
-#endif // !PLATFORM(IOS)
-
-
-// Iterator text setting
-
-static TextBreakIterator* setTextForIterator(TextBreakIterator& iterator, StringView string)
-{
- if (string.is8Bit()) {
- UTextWithBuffer textLocal;
- textLocal.text = UTEXT_INITIALIZER;
- textLocal.text.extraSize = sizeof(textLocal.buffer);
- textLocal.text.pExtra = textLocal.buffer;
-
- UErrorCode openStatus = U_ZERO_ERROR;
- UText* text = openLatin1UTextProvider(&textLocal, string.characters8(), string.length(), &openStatus);
- if (U_FAILURE(openStatus)) {
- LOG_ERROR("uTextOpenLatin1 failed with status %d", openStatus);
- return nullptr;
- }
-
- UErrorCode setTextStatus = U_ZERO_ERROR;
- ubrk_setUText(reinterpret_cast<UBreakIterator*>(&iterator), text, &setTextStatus);
- if (U_FAILURE(setTextStatus)) {
- LOG_ERROR("ubrk_setUText failed with status %d", setTextStatus);
- return nullptr;
- }
-
- utext_close(text);
- } else {
- UErrorCode setTextStatus = U_ZERO_ERROR;
- ubrk_setText(reinterpret_cast<UBreakIterator*>(&iterator), string.characters16(), string.length(), &setTextStatus);
- if (U_FAILURE(setTextStatus))
- return nullptr;
- }
-
- return &iterator;
-}
-
-static TextBreakIterator* setContextAwareTextForIterator(TextBreakIterator& iterator, StringView string, const UChar* priorContext, unsigned priorContextLength)
-{
- if (string.is8Bit()) {
- UTextWithBuffer textLocal;
- textLocal.text = UTEXT_INITIALIZER;
- textLocal.text.extraSize = sizeof(textLocal.buffer);
- textLocal.text.pExtra = textLocal.buffer;
-
- UErrorCode openStatus = U_ZERO_ERROR;
- UText* text = openLatin1ContextAwareUTextProvider(&textLocal, string.characters8(), string.length(), priorContext, priorContextLength, &openStatus);
- if (U_FAILURE(openStatus)) {
- LOG_ERROR("openLatin1ContextAwareUTextProvider failed with status %d", openStatus);
- return nullptr;
- }
-
- UErrorCode setTextStatus = U_ZERO_ERROR;
- ubrk_setUText(reinterpret_cast<UBreakIterator*>(&iterator), text, &setTextStatus);
- if (U_FAILURE(setTextStatus)) {
- LOG_ERROR("ubrk_setUText failed with status %d", setTextStatus);
- return nullptr;
- }
-
- utext_close(text);
- } else {
- UText textLocal = UTEXT_INITIALIZER;
-
- UErrorCode openStatus = U_ZERO_ERROR;
- UText* text = openUTF16ContextAwareUTextProvider(&textLocal, string.characters16(), string.length(), priorContext, priorContextLength, &openStatus);
- if (U_FAILURE(openStatus)) {
- LOG_ERROR("openUTF16ContextAwareUTextProvider failed with status %d", openStatus);
- return 0;
- }
-
- UErrorCode setTextStatus = U_ZERO_ERROR;
- ubrk_setUText(reinterpret_cast<UBreakIterator*>(&iterator), text, &setTextStatus);
- if (U_FAILURE(setTextStatus)) {
- LOG_ERROR("ubrk_setUText failed with status %d", setTextStatus);
- return nullptr;
- }
-
- utext_close(text);
- }
-
- return &iterator;
-}
-
-
-// Static iterators
-
-TextBreakIterator* wordBreakIterator(StringView string)
-{
- static TextBreakIterator* staticWordBreakIterator = initializeIterator(UBRK_WORD);
- if (!staticWordBreakIterator)
- return nullptr;
-
- return setTextForIterator(*staticWordBreakIterator, string);
-}
-
-TextBreakIterator* sentenceBreakIterator(StringView string)
-{
- static TextBreakIterator* staticSentenceBreakIterator = initializeIterator(UBRK_SENTENCE);
- if (!staticSentenceBreakIterator)
- return nullptr;
-
- return setTextForIterator(*staticSentenceBreakIterator, string);
-}
-
-TextBreakIterator* cursorMovementIterator(StringView string)
-{
-#if !PLATFORM(IOS)
- // This rule set is based on character-break iterator rules of ICU 4.0
- // <http://source.icu-project.org/repos/icu/icu/tags/release-4-0/source/data/brkitr/char.txt>.
- // The major differences from the original ones are listed below:
- // * Replaced '[\p{Grapheme_Cluster_Break = SpacingMark}]' with '[\p{General_Category = Spacing Mark} - $Extend]' for ICU 3.8 or earlier;
- // * Removed rules that prevent a cursor from moving after prepend characters (Bug 24342);
- // * Added rules that prevent a cursor from moving after virama signs of Indic languages except Tamil (Bug 15790), and;
- // * Added rules that prevent a cursor from moving before Japanese half-width katakara voiced marks.
- // * Added rules for regional indicator symbols.
- static const char* kRules =
- "$CR = [\\p{Grapheme_Cluster_Break = CR}];"
- "$LF = [\\p{Grapheme_Cluster_Break = LF}];"
- "$Control = [\\p{Grapheme_Cluster_Break = Control}];"
- "$VoiceMarks = [\\uFF9E\\uFF9F];" // Japanese half-width katakana voiced marks
- "$Extend = [\\p{Grapheme_Cluster_Break = Extend} $VoiceMarks - [\\u0E30 \\u0E32 \\u0E45 \\u0EB0 \\u0EB2]];"
- "$SpacingMark = [[\\p{General_Category = Spacing Mark}] - $Extend];"
- "$L = [\\p{Grapheme_Cluster_Break = L}];"
- "$V = [\\p{Grapheme_Cluster_Break = V}];"
- "$T = [\\p{Grapheme_Cluster_Break = T}];"
- "$LV = [\\p{Grapheme_Cluster_Break = LV}];"
- "$LVT = [\\p{Grapheme_Cluster_Break = LVT}];"
- "$Hin0 = [\\u0905-\\u0939];" // Devanagari Letter A,...,Ha
- "$HinV = \\u094D;" // Devanagari Sign Virama
- "$Hin1 = [\\u0915-\\u0939];" // Devanagari Letter Ka,...,Ha
- "$Ben0 = [\\u0985-\\u09B9];" // Bengali Letter A,...,Ha
- "$BenV = \\u09CD;" // Bengali Sign Virama
- "$Ben1 = [\\u0995-\\u09B9];" // Bengali Letter Ka,...,Ha
- "$Pan0 = [\\u0A05-\\u0A39];" // Gurmukhi Letter A,...,Ha
- "$PanV = \\u0A4D;" // Gurmukhi Sign Virama
- "$Pan1 = [\\u0A15-\\u0A39];" // Gurmukhi Letter Ka,...,Ha
- "$Guj0 = [\\u0A85-\\u0AB9];" // Gujarati Letter A,...,Ha
- "$GujV = \\u0ACD;" // Gujarati Sign Virama
- "$Guj1 = [\\u0A95-\\u0AB9];" // Gujarati Letter Ka,...,Ha
- "$Ori0 = [\\u0B05-\\u0B39];" // Oriya Letter A,...,Ha
- "$OriV = \\u0B4D;" // Oriya Sign Virama
- "$Ori1 = [\\u0B15-\\u0B39];" // Oriya Letter Ka,...,Ha
- "$Tel0 = [\\u0C05-\\u0C39];" // Telugu Letter A,...,Ha
- "$TelV = \\u0C4D;" // Telugu Sign Virama
- "$Tel1 = [\\u0C14-\\u0C39];" // Telugu Letter Ka,...,Ha
- "$Kan0 = [\\u0C85-\\u0CB9];" // Kannada Letter A,...,Ha
- "$KanV = \\u0CCD;" // Kannada Sign Virama
- "$Kan1 = [\\u0C95-\\u0CB9];" // Kannada Letter A,...,Ha
- "$Mal0 = [\\u0D05-\\u0D39];" // Malayalam Letter A,...,Ha
- "$MalV = \\u0D4D;" // Malayalam Sign Virama
- "$Mal1 = [\\u0D15-\\u0D39];" // Malayalam Letter A,...,Ha
- "$RI = [\\U0001F1E6-\\U0001F1FF];" // Emoji regional indicators
- "!!chain;"
- "!!forward;"
- "$CR $LF;"
- "$L ($L | $V | $LV | $LVT);"
- "($LV | $V) ($V | $T);"
- "($LVT | $T) $T;"
- "[^$Control $CR $LF] $Extend;"
- "[^$Control $CR $LF] $SpacingMark;"
- "$RI $RI / $RI;"
- "$RI $RI;"
- "$Hin0 $HinV $Hin1;" // Devanagari Virama (forward)
- "$Ben0 $BenV $Ben1;" // Bengali Virama (forward)
- "$Pan0 $PanV $Pan1;" // Gurmukhi Virama (forward)
- "$Guj0 $GujV $Guj1;" // Gujarati Virama (forward)
- "$Ori0 $OriV $Ori1;" // Oriya Virama (forward)
- "$Tel0 $TelV $Tel1;" // Telugu Virama (forward)
- "$Kan0 $KanV $Kan1;" // Kannada Virama (forward)
- "$Mal0 $MalV $Mal1;" // Malayalam Virama (forward)
- "!!reverse;"
- "$LF $CR;"
- "($L | $V | $LV | $LVT) $L;"
- "($V | $T) ($LV | $V);"
- "$T ($LVT | $T);"
- "$Extend [^$Control $CR $LF];"
- "$SpacingMark [^$Control $CR $LF];"
- "$RI $RI / $RI $RI;"
- "$RI $RI;"
- "$Hin1 $HinV $Hin0;" // Devanagari Virama (backward)
- "$Ben1 $BenV $Ben0;" // Bengali Virama (backward)
- "$Pan1 $PanV $Pan0;" // Gurmukhi Virama (backward)
- "$Guj1 $GujV $Guj0;" // Gujarati Virama (backward)
- "$Ori1 $OriV $Ori0;" // Gujarati Virama (backward)
- "$Tel1 $TelV $Tel0;" // Telugu Virama (backward)
- "$Kan1 $KanV $Kan0;" // Kannada Virama (backward)
- "$Mal1 $MalV $Mal0;" // Malayalam Virama (backward)
- "!!safe_reverse;"
- "!!safe_forward;";
- static TextBreakIterator* staticCursorMovementIterator = initializeIteratorWithRules(kRules);
-#else // PLATFORM(IOS)
- // Use the special Thai character break iterator for all locales
- static TextBreakIterator* staticCursorMovementIterator = initializeIterator(UBRK_CHARACTER, "th");
-#endif // !PLATFORM(IOS)
-
- if (!staticCursorMovementIterator)
- return nullptr;
-
- return setTextForIterator(*staticCursorMovementIterator, string);
-}
-
-TextBreakIterator* acquireLineBreakIterator(StringView string, const AtomicString& locale, const UChar* priorContext, unsigned priorContextLength)
-{
- TextBreakIterator* iterator = reinterpret_cast<TextBreakIterator*>(LineBreakIteratorPool::sharedPool().take(locale));
- if (!iterator)
- return nullptr;
-
- return setContextAwareTextForIterator(*iterator, string, priorContext, priorContextLength);
-}
-
-void releaseLineBreakIterator(TextBreakIterator* iterator)
-{
- ASSERT_ARG(iterator, iterator);
-
- LineBreakIteratorPool::sharedPool().put(reinterpret_cast<UBreakIterator*>(iterator));
-}
-
-static TextBreakIterator* nonSharedCharacterBreakIterator;
-
-static inline bool compareAndSwapNonSharedCharacterBreakIterator(TextBreakIterator* expected, TextBreakIterator* newValue)
-{
-#if ENABLE(COMPARE_AND_SWAP)
- return WTF::weakCompareAndSwap(reinterpret_cast<void**>(&nonSharedCharacterBreakIterator), expected, newValue);
-#else
- DEFINE_STATIC_LOCAL(std::mutex, nonSharedCharacterBreakIteratorMutex, ());
- std::lock_guard<std::mutex> locker(nonSharedCharacterBreakIteratorMutex);
- if (nonSharedCharacterBreakIterator != expected)
- return false;
- nonSharedCharacterBreakIterator = newValue;
- return true;
-#endif
-}
-
-NonSharedCharacterBreakIterator::NonSharedCharacterBreakIterator(StringView string)
-{
- m_iterator = nonSharedCharacterBreakIterator;
-
- bool createdIterator = m_iterator && compareAndSwapNonSharedCharacterBreakIterator(m_iterator, 0);
- if (!createdIterator)
- m_iterator = initializeIterator(UBRK_CHARACTER);
- if (!m_iterator)
- return;
-
- m_iterator = setTextForIterator(*m_iterator, string);
-}
-
-NonSharedCharacterBreakIterator::~NonSharedCharacterBreakIterator()
-{
- if (!compareAndSwapNonSharedCharacterBreakIterator(0, m_iterator))
- ubrk_close(reinterpret_cast<UBreakIterator*>(m_iterator));
-}
-
-
-// Iterator implemenation.
-
-int textBreakFirst(TextBreakIterator* iterator)
-{
- return ubrk_first(reinterpret_cast<UBreakIterator*>(iterator));
-}
-
-int textBreakLast(TextBreakIterator* iterator)
-{
- return ubrk_last(reinterpret_cast<UBreakIterator*>(iterator));
-}
-
-int textBreakNext(TextBreakIterator* iterator)
-{
- return ubrk_next(reinterpret_cast<UBreakIterator*>(iterator));
-}
-
-int textBreakPrevious(TextBreakIterator* iterator)
-{
- return ubrk_previous(reinterpret_cast<UBreakIterator*>(iterator));
-}
-
-int textBreakPreceding(TextBreakIterator* iterator, int pos)
-{
- return ubrk_preceding(reinterpret_cast<UBreakIterator*>(iterator), pos);
-}
-
-int textBreakFollowing(TextBreakIterator* iterator, int pos)
-{
- return ubrk_following(reinterpret_cast<UBreakIterator*>(iterator), pos);
-}
-
-int textBreakCurrent(TextBreakIterator* iterator)
-{
- return ubrk_current(reinterpret_cast<UBreakIterator*>(iterator));
-}
-
-bool isTextBreak(TextBreakIterator* iterator, int position)
-{
- return ubrk_isBoundary(reinterpret_cast<UBreakIterator*>(iterator), position);
-}
-
-bool isWordTextBreak(TextBreakIterator* iterator)
-{
- int ruleStatus = ubrk_getRuleStatus(reinterpret_cast<UBreakIterator*>(iterator));
- return ruleStatus != UBRK_WORD_NONE;
-}
-
-unsigned numGraphemeClusters(const String& s)
-{
- unsigned stringLength = s.length();
-
- if (!stringLength)
- return 0;
-
- // The only Latin-1 Extended Grapheme Cluster is CR LF
- if (s.is8Bit() && !s.contains('\r'))
- return stringLength;
-
- NonSharedCharacterBreakIterator it(s);
- if (!it)
- return stringLength;
-
- unsigned num = 0;
- while (textBreakNext(it) != TextBreakDone)
- ++num;
- return num;
-}
-
-unsigned numCharactersInGraphemeClusters(const String& s, unsigned numGraphemeClusters)
-{
- unsigned stringLength = s.length();
-
- if (!stringLength)
- return 0;
-
- // The only Latin-1 Extended Grapheme Cluster is CR LF
- if (s.is8Bit() && !s.contains('\r'))
- return std::min(stringLength, numGraphemeClusters);
-
- NonSharedCharacterBreakIterator it(s);
- if (!it)
- return std::min(stringLength, numGraphemeClusters);
-
- for (unsigned i = 0; i < numGraphemeClusters; ++i) {
- if (textBreakNext(it) == TextBreakDone)
- return stringLength;
- }
- return textBreakCurrent(it);
-}
-
-} // namespace WebCore
diff --git a/Source/WebCore/platform/text/TextBreakIterator.h b/Source/WebCore/platform/text/TextBreakIterator.h
deleted file mode 100644
index eaf00ecb1..000000000
--- a/Source/WebCore/platform/text/TextBreakIterator.h
+++ /dev/null
@@ -1,191 +0,0 @@
-/*
- * Copyright (C) 2006 Lars Knoll <lars@trolltech.com>
- * Copyright (C) 2007, 2011, 2012 Apple Inc. All rights reserved.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public License
- * along with this library; see the file COPYING.LIB. If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- * Boston, MA 02110-1301, USA.
- *
- */
-
-#ifndef TextBreakIterator_h
-#define TextBreakIterator_h
-
-#include <wtf/text/AtomicString.h>
-#include <wtf/text/StringView.h>
-#include <wtf/unicode/Unicode.h>
-
-namespace WebCore {
-
-class TextBreakIterator;
-
-// Note: The returned iterator is good only until you get another iterator, with the exception of acquireLineBreakIterator.
-
-// This is similar to character break iterator in most cases, but is subject to
-// platform UI conventions. One notable example where this can be different
-// from character break iterator is Thai prepend characters, see bug 24342.
-// Use this for insertion point and selection manipulations.
-TextBreakIterator* cursorMovementIterator(StringView);
-
-TextBreakIterator* wordBreakIterator(StringView);
-TextBreakIterator* sentenceBreakIterator(StringView);
-
-TextBreakIterator* acquireLineBreakIterator(StringView, const AtomicString& locale, const UChar* priorContext, unsigned priorContextLength);
-void releaseLineBreakIterator(TextBreakIterator*);
-
-int textBreakFirst(TextBreakIterator*);
-int textBreakLast(TextBreakIterator*);
-int textBreakNext(TextBreakIterator*);
-int textBreakPrevious(TextBreakIterator*);
-int textBreakCurrent(TextBreakIterator*);
-int textBreakPreceding(TextBreakIterator*, int);
-int textBreakFollowing(TextBreakIterator*, int);
-bool isTextBreak(TextBreakIterator*, int);
-bool isWordTextBreak(TextBreakIterator*);
-
-const int TextBreakDone = -1;
-
-class LazyLineBreakIterator {
-public:
- LazyLineBreakIterator()
- : m_iterator(0)
- , m_cachedPriorContext(0)
- , m_cachedPriorContextLength(0)
- {
- resetPriorContext();
- }
-
- LazyLineBreakIterator(String string, const AtomicString& locale = AtomicString())
- : m_string(string)
- , m_locale(locale)
- , m_iterator(0)
- , m_cachedPriorContext(0)
- , m_cachedPriorContextLength(0)
- {
- resetPriorContext();
- }
-
- ~LazyLineBreakIterator()
- {
- if (m_iterator)
- releaseLineBreakIterator(m_iterator);
- }
-
- String string() const { return m_string; }
-
- UChar lastCharacter() const
- {
- COMPILE_ASSERT(WTF_ARRAY_LENGTH(m_priorContext) == 2, TextBreakIterator_unexpected_prior_context_length);
- return m_priorContext[1];
- }
- UChar secondToLastCharacter() const
- {
- COMPILE_ASSERT(WTF_ARRAY_LENGTH(m_priorContext) == 2, TextBreakIterator_unexpected_prior_context_length);
- return m_priorContext[0];
- }
- void setPriorContext(UChar last, UChar secondToLast)
- {
- COMPILE_ASSERT(WTF_ARRAY_LENGTH(m_priorContext) == 2, TextBreakIterator_unexpected_prior_context_length);
- m_priorContext[0] = secondToLast;
- m_priorContext[1] = last;
- }
- void updatePriorContext(UChar last)
- {
- COMPILE_ASSERT(WTF_ARRAY_LENGTH(m_priorContext) == 2, TextBreakIterator_unexpected_prior_context_length);
- m_priorContext[0] = m_priorContext[1];
- m_priorContext[1] = last;
- }
- void resetPriorContext()
- {
- COMPILE_ASSERT(WTF_ARRAY_LENGTH(m_priorContext) == 2, TextBreakIterator_unexpected_prior_context_length);
- m_priorContext[0] = 0;
- m_priorContext[1] = 0;
- }
- unsigned priorContextLength() const
- {
- unsigned priorContextLength = 0;
- COMPILE_ASSERT(WTF_ARRAY_LENGTH(m_priorContext) == 2, TextBreakIterator_unexpected_prior_context_length);
- if (m_priorContext[1]) {
- ++priorContextLength;
- if (m_priorContext[0])
- ++priorContextLength;
- }
- return priorContextLength;
- }
- // Obtain text break iterator, possibly previously cached, where this iterator is (or has been)
- // initialized to use the previously stored string as the primary breaking context and using
- // previously stored prior context if non-empty.
- TextBreakIterator* get(unsigned priorContextLength)
- {
- ASSERT(priorContextLength <= priorContextCapacity);
- const UChar* priorContext = priorContextLength ? &m_priorContext[priorContextCapacity - priorContextLength] : 0;
- if (!m_iterator) {
- m_iterator = acquireLineBreakIterator(m_string, m_locale, priorContext, priorContextLength);
- m_cachedPriorContext = priorContext;
- m_cachedPriorContextLength = priorContextLength;
- } else if (priorContext != m_cachedPriorContext || priorContextLength != m_cachedPriorContextLength) {
- this->resetStringAndReleaseIterator(m_string, m_locale);
- return this->get(priorContextLength);
- }
- return m_iterator;
- }
- void resetStringAndReleaseIterator(String string, const AtomicString& locale)
- {
- if (m_iterator)
- releaseLineBreakIterator(m_iterator);
- m_string = string;
- m_locale = locale;
- m_iterator = 0;
- m_cachedPriorContext = 0;
- m_cachedPriorContextLength = 0;
- }
-
-private:
- static const unsigned priorContextCapacity = 2;
- String m_string;
- AtomicString m_locale;
- TextBreakIterator* m_iterator;
- UChar m_priorContext[priorContextCapacity];
- const UChar* m_cachedPriorContext;
- unsigned m_cachedPriorContextLength;
-};
-
-// Iterates over "extended grapheme clusters", as defined in UAX #29.
-// Note that platform implementations may be less sophisticated - e.g. ICU prior to
-// version 4.0 only supports "legacy grapheme clusters".
-// Use this for general text processing, e.g. string truncation.
-
-class NonSharedCharacterBreakIterator {
- WTF_MAKE_NONCOPYABLE(NonSharedCharacterBreakIterator);
-public:
- NonSharedCharacterBreakIterator(StringView);
- ~NonSharedCharacterBreakIterator();
-
- operator TextBreakIterator*() const { return m_iterator; }
-
-private:
- TextBreakIterator* m_iterator;
-};
-
-// Counts the number of grapheme clusters. A surrogate pair or a sequence
-// of a non-combining character and following combining characters is
-// counted as 1 grapheme cluster.
-unsigned numGraphemeClusters(const String&);
-// Returns the number of characters which will be less than or equal to
-// the specified grapheme cluster length.
-unsigned numCharactersInGraphemeClusters(const String&, unsigned);
-
-}
-
-#endif
diff --git a/Source/WebCore/platform/text/TextBreakIteratorInternalICU.h b/Source/WebCore/platform/text/TextBreakIteratorInternalICU.h
deleted file mode 100644
index 68b7003c3..000000000
--- a/Source/WebCore/platform/text/TextBreakIteratorInternalICU.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (C) 2007 Apple Inc. All rights reserved.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public License
- * along with this library; see the file COPYING.LIB. If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- * Boston, MA 02110-1301, USA.
- *
- */
-
-#ifndef TextBreakIteratorInternalICU_h
-#define TextBreakIteratorInternalICU_h
-
-// FIXME: Now that this handles locales for ICU, not just for text breaking,
-// this file and the various implementation files should be renamed.
-
-namespace WebCore {
-
- const char* currentSearchLocaleID();
- const char* currentTextBreakLocaleID();
-
-}
-
-#endif
diff --git a/Source/WebCore/platform/text/TextCheckerClient.h b/Source/WebCore/platform/text/TextCheckerClient.h
index 054fb11ec..39e3211e8 100644
--- a/Source/WebCore/platform/text/TextCheckerClient.h
+++ b/Source/WebCore/platform/text/TextCheckerClient.h
@@ -25,40 +25,34 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#ifndef TextCheckerClient_h
-#define TextCheckerClient_h
+#pragma once
#include "TextChecking.h"
-#include <wtf/Forward.h>
-#include <wtf/PassRefPtr.h>
-#include <wtf/Vector.h>
-#include <wtf/text/WTFString.h>
-
namespace WebCore {
+class VisibleSelection;
+
class TextCheckerClient {
public:
- virtual ~TextCheckerClient() {}
+ virtual ~TextCheckerClient() { }
virtual bool shouldEraseMarkersAfterChangeSelection(TextCheckingType) const = 0;
virtual void ignoreWordInSpellDocument(const String&) = 0;
virtual void learnWord(const String&) = 0;
- virtual void checkSpellingOfString(const UChar*, int length, int* misspellingLocation, int* misspellingLength) = 0;
+ virtual void checkSpellingOfString(StringView, int* misspellingLocation, int* misspellingLength) = 0;
virtual String getAutoCorrectSuggestionForMisspelledWord(const String& misspelledWord) = 0;
- virtual void checkGrammarOfString(const UChar*, int length, Vector<GrammarDetail>&, int* badGrammarLocation, int* badGrammarLength) = 0;
+ virtual void checkGrammarOfString(StringView, Vector<GrammarDetail>&, int* badGrammarLocation, int* badGrammarLength) = 0;
#if USE(UNIFIED_TEXT_CHECKING)
- virtual Vector<TextCheckingResult> checkTextOfParagraph(StringView, TextCheckingTypeMask checkingTypes) = 0;
+ virtual Vector<TextCheckingResult> checkTextOfParagraph(StringView, TextCheckingTypeMask checkingTypes, const VisibleSelection& currentSelection) = 0;
#endif
// For spellcheckers that support multiple languages, it's often important to be able to identify the language in order to
// provide more accurate correction suggestions. Caller can pass in more text in "context" to aid such spellcheckers on language
// identification. Noramlly it's the text surrounding the "word" for which we are getting correction suggestions.
- virtual void getGuessesForWord(const String& word, const String& context, Vector<String>& guesses) = 0;
- virtual void requestCheckingOfString(PassRefPtr<TextCheckingRequest>) = 0;
+ virtual void getGuessesForWord(const String& word, const String& context, const VisibleSelection& currentSelection, Vector<String>& guesses) = 0;
+ virtual void requestCheckingOfString(TextCheckingRequest&, const VisibleSelection& currentSelection) = 0;
};
}
-
-#endif // TextCheckerClient_h
diff --git a/Source/WebCore/platform/text/TextCodec.cpp b/Source/WebCore/platform/text/TextCodec.cpp
index b0a5720bc..47e3b5ac0 100644
--- a/Source/WebCore/platform/text/TextCodec.cpp
+++ b/Source/WebCore/platform/text/TextCodec.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2004, 2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (C) 2004, 2006 Apple Inc. All rights reserved.
* Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
*
* Redistribution and use in source and binary forms, with or without
@@ -11,10 +11,10 @@
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
diff --git a/Source/WebCore/platform/text/TextCodec.h b/Source/WebCore/platform/text/TextCodec.h
index e4c9571c6..f30cd1261 100644
--- a/Source/WebCore/platform/text/TextCodec.h
+++ b/Source/WebCore/platform/text/TextCodec.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2004, 2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (C) 2004, 2006 Apple Inc. All rights reserved.
* Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
*
* Redistribution and use in source and binary forms, with or without
@@ -11,10 +11,10 @@
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
@@ -30,10 +30,7 @@
#include <memory>
#include <wtf/Forward.h>
#include <wtf/Noncopyable.h>
-#include <wtf/PassOwnPtr.h>
-#include <wtf/Vector.h>
#include <wtf/text/WTFString.h>
-#include <wtf/unicode/Unicode.h>
namespace WebCore {
class TextEncoding;
@@ -79,7 +76,7 @@ namespace WebCore {
typedef void (*EncodingNameRegistrar)(const char* alias, const char* name);
- typedef PassOwnPtr<TextCodec> (*NewTextCodecFunction)(const TextEncoding&, const void* additionalData);
+ typedef std::unique_ptr<TextCodec> (*NewTextCodecFunction)(const TextEncoding&, const void* additionalData);
typedef void (*TextCodecRegistrar)(const char* name, NewTextCodecFunction, const void* additionalData);
} // namespace WebCore
diff --git a/Source/WebCore/platform/text/TextCodecASCIIFastPath.h b/Source/WebCore/platform/text/TextCodecASCIIFastPath.h
index 7d57677fc..08f4edfc4 100644
--- a/Source/WebCore/platform/text/TextCodecASCIIFastPath.h
+++ b/Source/WebCore/platform/text/TextCodecASCIIFastPath.h
@@ -11,10 +11,10 @@
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
diff --git a/Source/WebCore/platform/text/TextCodecICU.cpp b/Source/WebCore/platform/text/TextCodecICU.cpp
index f6d16cbf1..40212fe3d 100644
--- a/Source/WebCore/platform/text/TextCodecICU.cpp
+++ b/Source/WebCore/platform/text/TextCodecICU.cpp
@@ -11,10 +11,10 @@
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
@@ -66,132 +66,121 @@ static UConverter*& cachedConverterICU()
return threadGlobalData().cachedConverterICU().converter;
}
-PassOwnPtr<TextCodec> TextCodecICU::create(const TextEncoding& encoding, const void* additionalData)
+std::unique_ptr<TextCodec> TextCodecICU::create(const TextEncoding& encoding, const void* additionalData)
{
// Name strings are persistently kept in TextEncodingRegistry maps, so they are never deleted.
- return adoptPtr(new TextCodecICU(encoding.name(), static_cast<const char*>(additionalData)));
+ return std::make_unique<TextCodecICU>(encoding.name(), static_cast<const char*>(additionalData));
}
+#define DECLARE_ALIASES(encoding, ...) \
+ static const char* const encoding##_aliases[] { __VA_ARGS__ }
+
+// From https://encoding.spec.whatwg.org.
+DECLARE_ALIASES(IBM866, "866", "cp866", "csibm866");
+DECLARE_ALIASES(ISO_8859_2, "csisolatin2", "iso-ir-101", "iso8859-2", "iso88592", "iso_8859-2", "iso_8859-2:1987", "l2", "latin2");
+DECLARE_ALIASES(ISO_8859_3, "csisolatin3", "iso-ir-109", "iso8859-3", "iso88593", "iso_8859-3", "iso_8859-3:1988", "l3", "latin3");
+DECLARE_ALIASES(ISO_8859_4, "csisolatin4", "iso-ir-110", "iso8859-4", "iso88594", "iso_8859-4", "iso_8859-4:1988", "l4", "latin4");
+DECLARE_ALIASES(ISO_8859_5, "csisolatincyrillic", "cyrillic", "iso-ir-144", "iso8859-5", "iso88595", "iso_8859-5", "iso_8859-5:1988");
+DECLARE_ALIASES(ISO_8859_6, "arabic", "asmo-708", "csiso88596e", "csiso88596i", "csisolatinarabic", "ecma-114", "iso-8859-6-e", "iso-8859-6-i", "iso-ir-127", "iso8859-6", "iso88596", "iso_8859-6", "iso_8859-6:1987");
+DECLARE_ALIASES(ISO_8859_7, "csisolatingreek", "ecma-118", "elot_928", "greek", "greek8", "iso-ir-126", "iso8859-7", "iso88597", "iso_8859-7", "iso_8859-7:1987", "sun_eu_greek");
+DECLARE_ALIASES(ISO_8859_8, "csiso88598e", "csisolatinhebrew", "hebrew", "iso-8859-8-e", "iso-ir-138", "iso8859-8", "iso88598", "iso_8859-8", "iso_8859-8:1988", "visual");
+DECLARE_ALIASES(ISO_8859_8_I, "csiso88598i", "logical");
+DECLARE_ALIASES(ISO_8859_10, "csisolatin6", "iso-ir-157", "iso8859-10", "iso885910", "l6", "latin6");
+DECLARE_ALIASES(ISO_8859_13, "iso8859-13", "iso885913");
+DECLARE_ALIASES(ISO_8859_14, "iso8859-14", "iso885914");
+DECLARE_ALIASES(ISO_8859_15, "csisolatin9", "iso8859-15", "iso885915", "iso_8859-15", "l9");
+DECLARE_ALIASES(KOI8_R, "cskoi8r", "koi", "koi8", "koi8_r");
+DECLARE_ALIASES(KOI8_U, "koi8-ru");
+DECLARE_ALIASES(macintosh, "csmacintosh", "mac", "x-mac-roman", "macroman", "x-macroman");
+DECLARE_ALIASES(windows_874, "dos-874", "iso-8859-11", "iso8859-11", "iso885911", "tis-620");
+DECLARE_ALIASES(windows_949, "euc-kr", "cseuckr", "csksc56011987", "iso-ir-149", "korean", "ks_c_5601-1987", "ks_c_5601-1989", "ksc5601", "ksc_5601", "ms949", "x-KSC5601", "x-windows-949", "x-uhc");
+DECLARE_ALIASES(windows_1250, "cp1250", "x-cp1250", "winlatin2");
+DECLARE_ALIASES(windows_1251, "cp1251", "wincyrillic", "x-cp1251");
+DECLARE_ALIASES(windows_1253, "wingreek", "cp1253", "x-cp1253");
+DECLARE_ALIASES(windows_1254, "winturkish", "cp1254", "csisolatin5", "iso-8859-9", "iso-ir-148", "iso8859-9", "iso88599", "iso_8859-9", "iso_8859-9:1989", "l5", "latin5", "x-cp1254");
+DECLARE_ALIASES(windows_1255, "winhebrew", "cp1255", "x-cp1255");
+DECLARE_ALIASES(windows_1256, "winarabic", "cp1256", "x-cp1256");
+DECLARE_ALIASES(windows_1257, "winbaltic", "cp1257", "x-cp1257");
+DECLARE_ALIASES(windows_1258, "winvietnamese", "cp1258", "x-cp1258");
+DECLARE_ALIASES(x_mac_cyrillic, "maccyrillic", "x-mac-ukrainian", "windows-10007", "mac-cyrillic", "maccy", "x-MacCyrillic", "x-MacUkraine");
+DECLARE_ALIASES(GBK, "cn-gb", "csgb231280", "x-euc-cn", "chinese", "csgb2312", "csiso58gb231280", "gb2312", "gb_2312", "gb_2312-80", "iso-ir-58", "x-gbk", "euc-cn", "cp936", "ms936", "gb2312-1980", "windows-936", "windows-936-2000");
+DECLARE_ALIASES(gb18030, "ibm-1392", "windows-54936");
+DECLARE_ALIASES(Big5, "cn-big5", "x-x-big5", "csbig5", "windows-950", "windows-950-2000", "ms950", "x-windows-950", "x-big5");
+DECLARE_ALIASES(EUC_JP, "x-euc", "cseucpkdfmtjapanese", "x-euc-jp");
+DECLARE_ALIASES(ISO_2022_JP, "jis7", "csiso2022jp");
+DECLARE_ALIASES(Shift_JIS, "shift-jis", "csshiftjis", "ms932", "ms_kanji", "sjis", "windows-31j", "x-sjis");
+// Encodings below are not in the standard.
+DECLARE_ALIASES(UTF_32, "ISO-10646-UCS-4", "ibm-1236", "ibm-1237", "csUCS4", "ucs-4");
+DECLARE_ALIASES(UTF_32LE, "UTF32_LittleEndian", "ibm-1234", "ibm-1235");
+DECLARE_ALIASES(UTF_32BE, "UTF32_BigEndian", "ibm-1232", "ibm-1233", "ibm-9424");
+DECLARE_ALIASES(x_mac_greek, "windows-10006", "macgr", "x-MacGreek");
+DECLARE_ALIASES(x_mac_centraleurroman, "windows-10029", "x-mac-ce", "macce", "maccentraleurope", "x-MacCentralEurope");
+DECLARE_ALIASES(x_mac_turkish, "windows-10081", "mactr", "x-MacTurkish");
+DECLARE_ALIASES(Big5_HKSCS, "big5hk", "HKSCS-BIG5", "ibm-1375", "ibm-1375_P100-2008");
+
+#define DECLARE_ENCODING_NAME(encoding, alias_array) \
+ { encoding, WTF_ARRAY_LENGTH(alias_array##_aliases), alias_array##_aliases }
+
+#define DECLARE_ENCODING_NAME_NO_ALIASES(encoding) \
+ { encoding, 0, nullptr }
+
+static const struct EncodingName {
+ const char* name;
+ unsigned aliasCount;
+ const char* const * aliases;
+} encodingNames[] = {
+ DECLARE_ENCODING_NAME("IBM866", IBM866),
+ DECLARE_ENCODING_NAME("ISO-8859-2", ISO_8859_2),
+ DECLARE_ENCODING_NAME("ISO-8859-3", ISO_8859_3),
+ DECLARE_ENCODING_NAME("ISO-8859-4", ISO_8859_4),
+ DECLARE_ENCODING_NAME("ISO-8859-5", ISO_8859_5),
+ DECLARE_ENCODING_NAME("ISO-8859-6", ISO_8859_6),
+ DECLARE_ENCODING_NAME("ISO-8859-7", ISO_8859_7),
+ DECLARE_ENCODING_NAME("ISO-8859-8", ISO_8859_8),
+ DECLARE_ENCODING_NAME("ISO-8859-8-I", ISO_8859_8_I),
+ DECLARE_ENCODING_NAME("ISO-8859-10", ISO_8859_10),
+ DECLARE_ENCODING_NAME("ISO-8859-13", ISO_8859_13),
+ DECLARE_ENCODING_NAME("ISO-8859-14", ISO_8859_14),
+ DECLARE_ENCODING_NAME("ISO-8859-15", ISO_8859_15),
+ DECLARE_ENCODING_NAME_NO_ALIASES("ISO-8859-16"),
+ DECLARE_ENCODING_NAME("KOI8-R", KOI8_R),
+ DECLARE_ENCODING_NAME("KOI8-U", KOI8_U),
+ DECLARE_ENCODING_NAME("macintosh", macintosh),
+ DECLARE_ENCODING_NAME("windows-874", windows_874),
+ DECLARE_ENCODING_NAME("windows-949", windows_949),
+ DECLARE_ENCODING_NAME("windows-1250", windows_1250),
+ DECLARE_ENCODING_NAME("windows-1251", windows_1251),
+ DECLARE_ENCODING_NAME("windows-1253", windows_1253),
+ DECLARE_ENCODING_NAME("windows-1254", windows_1254),
+ DECLARE_ENCODING_NAME("windows-1255", windows_1255),
+ DECLARE_ENCODING_NAME("windows-1256", windows_1256),
+ DECLARE_ENCODING_NAME("windows-1257", windows_1257),
+ DECLARE_ENCODING_NAME("windows-1258", windows_1258),
+ DECLARE_ENCODING_NAME("x-mac-cyrillic", x_mac_cyrillic),
+ DECLARE_ENCODING_NAME("GBK", GBK),
+ DECLARE_ENCODING_NAME("gb18030", gb18030),
+ DECLARE_ENCODING_NAME("Big5", Big5),
+ DECLARE_ENCODING_NAME("EUC-JP", EUC_JP),
+ DECLARE_ENCODING_NAME("ISO-2022-JP", ISO_2022_JP),
+ DECLARE_ENCODING_NAME("Shift_JIS", Shift_JIS),
+ // Encodings below are not in the standard.
+ DECLARE_ENCODING_NAME("UTF-32", UTF_32),
+ DECLARE_ENCODING_NAME("UTF-32LE", UTF_32LE),
+ DECLARE_ENCODING_NAME("UTF-32BE", UTF_32BE),
+ DECLARE_ENCODING_NAME("x-mac-greek", x_mac_greek),
+ DECLARE_ENCODING_NAME("x-mac-centraleurroman", x_mac_centraleurroman),
+ DECLARE_ENCODING_NAME("x-mac-turkish", x_mac_turkish),
+ DECLARE_ENCODING_NAME("Big5-HKSCS", Big5_HKSCS),
+};
+
void TextCodecICU::registerEncodingNames(EncodingNameRegistrar registrar)
{
- // We register Hebrew with logical ordering using a separate name.
- // Otherwise, this would share the same canonical name as the
- // visual ordering case, and then TextEncoding could not tell them
- // apart; ICU treats these names as synonyms.
- registrar("ISO-8859-8-I", "ISO-8859-8-I");
-
- int32_t numConverters = ucnv_countAvailable();
- for (int32_t i = 0; i < numConverters; ++i) {
- const char* canonicalConverterName = ucnv_getAvailableName(i);
- UErrorCode error = U_ZERO_ERROR;
- // Try MIME before trying IANA to pick up commonly used names like
- // 'EUC-JP' instead of horrendously long names like
- // 'Extended_UNIX_Code_Packed_Format_for_Japanese'.
- const char* webStandardName = ucnv_getStandardName(canonicalConverterName, "MIME", &error);
- if (!U_SUCCESS(error) || !webStandardName) {
- error = U_ZERO_ERROR;
- // Try IANA to pick up 'windows-12xx' and other names
- // which are not preferred MIME names but are widely used.
- webStandardName = ucnv_getStandardName(canonicalConverterName, "IANA", &error);
- if (!U_SUCCESS(error) || !webStandardName)
- continue;
- }
-
- // Any standard encoding overrides should match checks in registerCodecs() below.
-
- // 1. Treat GB2312 encoding as GBK (its more modern superset), to match other browsers.
- // 2. On the Web, GB2312 is encoded as EUC-CN or HZ, while ICU provides a native encoding
- // for encoding GB_2312-80 and several others. So, we need to override this behavior, too.
- if (strcmp(webStandardName, "GB2312") == 0 || strcmp(webStandardName, "GB_2312-80") == 0)
- webStandardName = "GBK";
- // Similarly, EUC-KR encodings all map to an extended version.
- else if (strcmp(webStandardName, "KSC_5601") == 0 || strcmp(webStandardName, "EUC-KR") == 0 || strcmp(webStandardName, "cp1363") == 0)
- webStandardName = "windows-949";
- // And so on.
- // FIXME: strcasecmp is locale sensitive, we should not be using it.
- else if (strcasecmp(webStandardName, "iso-8859-9") == 0) // This name is returned in different case by ICU 3.2 and 3.6.
- webStandardName = "windows-1254";
- else if (strcmp(webStandardName, "TIS-620") == 0)
- webStandardName = "windows-874";
-
- registrar(webStandardName, webStandardName);
-
- uint16_t numAliases = ucnv_countAliases(canonicalConverterName, &error);
- ASSERT(U_SUCCESS(error));
- if (U_SUCCESS(error))
- for (uint16_t j = 0; j < numAliases; ++j) {
- error = U_ZERO_ERROR;
- const char* alias = ucnv_getAlias(canonicalConverterName, j, &error);
- ASSERT(U_SUCCESS(error));
- if (U_SUCCESS(error) && alias != webStandardName)
- registrar(alias, webStandardName);
- }
+ for (auto& encodingName : encodingNames) {
+ registrar(encodingName.name, encodingName.name);
+ for (size_t i = 0; i < encodingName.aliasCount; ++i)
+ registrar(encodingName.aliases[i], encodingName.name);
}
- // Additional aliases.
- // macroman is present in modern versions of ICU, but not in ICU 3.2 (shipped with Mac OS X 10.4).
- // FIXME: Do any ports still use such old versions?
- registrar("macroman", "macintosh");
-
- // Additional aliases that historically were present in the encoding
- // table in WebKit on Macintosh that don't seem to be present in ICU.
- // Perhaps we can prove these are not used on the web and remove them.
- // Or perhaps we can get them added to ICU.
- registrar("x-mac-roman", "macintosh");
- registrar("maccyrillic", "x-mac-cyrillic");
- registrar("x-mac-ukrainian", "x-mac-cyrillic");
- registrar("cn-big5", "Big5");
- registrar("x-x-big5", "Big5");
- registrar("cn-gb", "GBK");
- registrar("csgb231280", "GBK");
- registrar("x-euc-cn", "GBK");
- registrar("x-gbk", "GBK");
- registrar("csISO88598I", "ISO-8859-8-I");
- registrar("koi", "KOI8-R");
- registrar("logical", "ISO-8859-8-I");
- registrar("visual", "ISO-8859-8");
- registrar("winarabic", "windows-1256");
- registrar("winbaltic", "windows-1257");
- registrar("wincyrillic", "windows-1251");
- registrar("iso-8859-11", "windows-874");
- registrar("iso8859-11", "windows-874");
- registrar("dos-874", "windows-874");
- registrar("wingreek", "windows-1253");
- registrar("winhebrew", "windows-1255");
- registrar("winlatin2", "windows-1250");
- registrar("winturkish", "windows-1254");
- registrar("winvietnamese", "windows-1258");
- registrar("x-cp1250", "windows-1250");
- registrar("x-cp1251", "windows-1251");
- registrar("x-euc", "EUC-JP");
- registrar("x-windows-949", "windows-949");
- registrar("KSC5601", "windows-949");
- registrar("x-uhc", "windows-949");
- registrar("shift-jis", "Shift_JIS");
-
- // These aliases are present in modern versions of ICU, but use different codecs, and have no standard names.
- // They are not present in ICU 3.2.
- registrar("dos-720", "cp864");
- registrar("jis7", "ISO-2022-JP");
-
- // Alternative spelling of ISO encoding names.
- registrar("ISO8859-1", "ISO-8859-1");
- registrar("ISO8859-2", "ISO-8859-2");
- registrar("ISO8859-3", "ISO-8859-3");
- registrar("ISO8859-4", "ISO-8859-4");
- registrar("ISO8859-5", "ISO-8859-5");
- registrar("ISO8859-6", "ISO-8859-6");
- registrar("ISO8859-7", "ISO-8859-7");
- registrar("ISO8859-8", "ISO-8859-8");
- registrar("ISO8859-8-I", "ISO-8859-8-I");
- registrar("ISO8859-9", "windows-1254");
- registrar("ISO8859-10", "ISO-8859-10");
- registrar("ISO8859-13", "ISO-8859-13");
- registrar("ISO8859-14", "ISO-8859-14");
- registrar("ISO8859-15", "ISO-8859-15");
- // Not registering ISO8859-16, because Firefox (as of version 3.6.6) doesn't know this particular alias,
- // and because older versions of ICU don't support ISO-8859-16 encoding at all.
-
#if PLATFORM(IOS)
// A.B. adding a few more Mac encodings missing 'cause we don't have TextCodecMac right now
// luckily, they are supported in ICU, just need to alias them.
@@ -218,40 +207,40 @@ void TextCodecICU::registerEncodingNames(EncodingNameRegistrar registrar)
void TextCodecICU::registerCodecs(TextCodecRegistrar registrar)
{
- // See comment above in registerEncodingNames.
- UErrorCode error = U_ZERO_ERROR;
- const char* canonicalConverterName = ucnv_getCanonicalName("ISO-8859-8-I", "IANA", &error);
- ASSERT(U_SUCCESS(error));
- registrar("ISO-8859-8-I", create, canonicalConverterName);
-
- int32_t numConverters = ucnv_countAvailable();
- for (int32_t i = 0; i < numConverters; ++i) {
- canonicalConverterName = ucnv_getAvailableName(i);
- error = U_ZERO_ERROR;
- const char* webStandardName = ucnv_getStandardName(canonicalConverterName, "MIME", &error);
- if (!U_SUCCESS(error) || !webStandardName) {
- error = U_ZERO_ERROR;
- webStandardName = ucnv_getStandardName(canonicalConverterName, "IANA", &error);
- if (!U_SUCCESS(error) || !webStandardName)
- continue;
+ for (auto& encodingName : encodingNames) {
+ // These encodings currently don't have standard names, so we need to register encoders manually.
+ // http://demo.icu-project.org/icu-bin/convexp
+ if (!strcmp(encodingName.name, "windows-874")) {
+ registrar(encodingName.name, create, "windows-874-2000");
+ continue;
}
-
- // Don't register codecs for overridden encodings.
- if (strcmp(webStandardName, "GB2312") == 0 || strcmp(webStandardName, "GB_2312-80") == 0
- || strcmp(webStandardName, "KSC_5601") == 0 || strcmp(webStandardName, "EUC-KR") == 0
- || strcmp(webStandardName, "cp1363") == 0
- || strcasecmp(webStandardName, "iso-8859-9") == 0
- || strcmp(webStandardName, "TIS-620") == 0)
+ if (!strcmp(encodingName.name, "windows-949")) {
+ registrar(encodingName.name, create, "windows-949-2000");
continue;
+ }
+ if (!strcmp(encodingName.name, "x-mac-cyrillic")) {
+ registrar(encodingName.name, create, "macos-7_3-10.2");
+ continue;
+ }
+ if (!strcmp(encodingName.name, "x-mac-greek")) {
+ registrar(encodingName.name, create, "macos-6_2-10.4");
+ continue;
+ }
+ if (!strcmp(encodingName.name, "x-mac-centraleurroman")) {
+ registrar(encodingName.name, create, "macos-29-10.2");
+ continue;
+ }
+ if (!strcmp(encodingName.name, "x-mac-turkish")) {
+ registrar(encodingName.name, create, "macos-35-10.2");
+ continue;
+ }
- registrar(webStandardName, create, fastStrDup(canonicalConverterName));
+ UErrorCode error = U_ZERO_ERROR;
+ const char* canonicalConverterName = ucnv_getCanonicalName(encodingName.name, "IANA", &error);
+ ASSERT(U_SUCCESS(error));
+ registrar(encodingName.name, create, canonicalConverterName);
}
- // These encodings currently don't have standard names, so we need to register encoders manually.
- // FIXME: Is there a good way to determine the most up to date variant programmatically?
- registrar("windows-874", create, "windows-874-2000");
- registrar("windows-949", create, "windows-949-2000");
-
#if PLATFORM(IOS)
// See comment above in registerEncodingNames().
int32_t i = 0;
@@ -491,18 +480,26 @@ CString TextCodecICU::encode(const UChar* characters, size_t length, Unencodable
// FIXME: We should see if there is "force ASCII range" mode in ICU;
// until then, we change the backslash into a yen sign.
// Encoding will change the yen sign back into a backslash.
- String copy;
- const UChar* source;
- const UChar* sourceLimit;
+ Vector<UChar> copy;
+ const UChar* source = characters;
if (shouldShowBackslashAsCurrencySymbolIn(m_encodingName)) {
- copy.append(characters, length);
- copy.replace('\\', 0xA5);
- source = copy.deprecatedCharacters();
- sourceLimit = source + copy.length();
- } else {
- source = characters;
- sourceLimit = source + length;
+ for (size_t i = 0; i < length; ++i) {
+ if (characters[i] == '\\') {
+ copy.reserveInitialCapacity(length);
+ for (size_t j = 0; j < i; ++j)
+ copy.uncheckedAppend(characters[j]);
+ for (size_t j = i; j < length; ++j) {
+ UChar character = characters[j];
+ if (character == '\\')
+ character = yenSign;
+ copy.uncheckedAppend(character);
+ }
+ source = copy.data();
+ break;
+ }
+ }
}
+ const UChar* sourceLimit = source + length;
UErrorCode err = U_ZERO_ERROR;
diff --git a/Source/WebCore/platform/text/TextCodecICU.h b/Source/WebCore/platform/text/TextCodecICU.h
index 3803df1a7..0c1798c9b 100644
--- a/Source/WebCore/platform/text/TextCodecICU.h
+++ b/Source/WebCore/platform/text/TextCodecICU.h
@@ -11,10 +11,10 @@
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
@@ -36,17 +36,18 @@ namespace WebCore {
class TextCodecICU : public TextCodec {
public:
+ static std::unique_ptr<TextCodec> create(const TextEncoding&, const void* additionalData);
+
+ TextCodecICU(const char* encoding, const char* canonicalConverterName);
+
static void registerEncodingNames(EncodingNameRegistrar);
static void registerCodecs(TextCodecRegistrar);
virtual ~TextCodecICU();
private:
- TextCodecICU(const char* encoding, const char* canonicalConverterName);
- static PassOwnPtr<TextCodec> create(const TextEncoding&, const void* additionalData);
-
- virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError);
- virtual CString encode(const UChar*, size_t length, UnencodableHandling);
+ String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError) override;
+ CString encode(const UChar*, size_t length, UnencodableHandling) override;
void createICUConverter() const;
void releaseICUConverter() const;
diff --git a/Source/WebCore/platform/text/TextCodecLatin1.cpp b/Source/WebCore/platform/text/TextCodecLatin1.cpp
index da86f8ff4..f2e8afa72 100644
--- a/Source/WebCore/platform/text/TextCodecLatin1.cpp
+++ b/Source/WebCore/platform/text/TextCodecLatin1.cpp
@@ -10,10 +10,10 @@
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
@@ -27,7 +27,6 @@
#include "TextCodecLatin1.h"
#include "TextCodecASCIIFastPath.h"
-#include <wtf/PassOwnPtr.h>
#include <wtf/text/CString.h>
#include <wtf/text/StringBuffer.h>
#include <wtf/text/WTFString.h>
@@ -73,49 +72,34 @@ static const UChar table[256] = {
void TextCodecLatin1::registerEncodingNames(EncodingNameRegistrar registrar)
{
+ // From https://encoding.spec.whatwg.org.
registrar("windows-1252", "windows-1252");
- registrar("ISO-8859-1", "ISO-8859-1");
- registrar("US-ASCII", "US-ASCII");
-
- registrar("WinLatin1", "windows-1252");
- registrar("ibm-1252", "windows-1252");
- registrar("ibm-1252_P100-2000", "windows-1252");
-
- registrar("CP819", "ISO-8859-1");
- registrar("IBM819", "ISO-8859-1");
- registrar("csISOLatin1", "ISO-8859-1");
- registrar("iso-ir-100", "ISO-8859-1");
- registrar("iso_8859-1:1987", "ISO-8859-1");
- registrar("l1", "ISO-8859-1");
- registrar("latin1", "ISO-8859-1");
-
- registrar("ANSI_X3.4-1968", "US-ASCII");
- registrar("ANSI_X3.4-1986", "US-ASCII");
- registrar("ASCII", "US-ASCII");
- registrar("IBM367", "US-ASCII");
- registrar("ISO646-US", "US-ASCII");
- registrar("ISO_646.irv:1991", "US-ASCII");
- registrar("cp367", "US-ASCII");
- registrar("csASCII", "US-ASCII");
- registrar("ibm-367_P100-1995", "US-ASCII");
- registrar("iso-ir-6", "US-ASCII");
- registrar("iso-ir-6-us", "US-ASCII");
- registrar("us", "US-ASCII");
- registrar("x-ansi", "US-ASCII");
+ registrar("ansi_x3.4-1968", "windows-1252");
+ registrar("ascii", "windows-1252");
+ registrar("cp1252", "windows-1252");
+ registrar("cp819", "windows-1252");
+ registrar("csisolatin1", "windows-1252");
+ registrar("ibm819", "windows-1252");
+ registrar("iso-8859-1", "windows-1252");
+ registrar("iso-ir-100", "windows-1252");
+ registrar("iso8859-1", "windows-1252");
+ registrar("iso88591", "windows-1252");
+ registrar("iso_8859-1", "windows-1252");
+ registrar("iso_8859-1:1987", "windows-1252");
+ registrar("l1", "windows-1252");
+ registrar("latin1", "windows-1252");
+ registrar("us-ascii", "windows-1252");
+ registrar("x-cp1252", "windows-1252");
}
-static PassOwnPtr<TextCodec> newStreamingTextDecoderWindowsLatin1(const TextEncoding&, const void*)
+static std::unique_ptr<TextCodec> newStreamingTextDecoderWindowsLatin1(const TextEncoding&, const void*)
{
- return adoptPtr(new TextCodecLatin1);
+ return std::make_unique<TextCodecLatin1>();
}
void TextCodecLatin1::registerCodecs(TextCodecRegistrar registrar)
{
registrar("windows-1252", newStreamingTextDecoderWindowsLatin1, 0);
-
- // ASCII and Latin-1 both decode as Windows Latin-1 although they retain unique identities.
- registrar("ISO-8859-1", newStreamingTextDecoderWindowsLatin1, 0);
- registrar("US-ASCII", newStreamingTextDecoderWindowsLatin1, 0);
}
String TextCodecLatin1::decode(const char* bytes, size_t length, bool, bool, bool&)
@@ -147,6 +131,10 @@ String TextCodecLatin1::decode(const char* bytes, size_t length, bool, bool, boo
if (source == end)
break;
+
+ // *source may not be ASCII anymore if source moves inside the loop of the fast code path
+ if (!isASCII(*source))
+ goto useLookupTable;
}
*destination = *source;
} else {
@@ -198,6 +186,10 @@ upConvertTo16Bit:
if (source == end)
break;
+
+ // *source may not be ASCII anymore if source moves inside the loop of the fast code path
+ if (!isASCII(*source))
+ goto useLookupTable16;
}
*destination16 = *source;
} else {
diff --git a/Source/WebCore/platform/text/TextCodecLatin1.h b/Source/WebCore/platform/text/TextCodecLatin1.h
index f035d01da..9d08aa3f5 100644
--- a/Source/WebCore/platform/text/TextCodecLatin1.h
+++ b/Source/WebCore/platform/text/TextCodecLatin1.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2004, 2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (C) 2004, 2006 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -10,10 +10,10 @@
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
@@ -35,8 +35,8 @@ namespace WebCore {
static void registerEncodingNames(EncodingNameRegistrar);
static void registerCodecs(TextCodecRegistrar);
- virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError);
- virtual CString encode(const UChar*, size_t length, UnencodableHandling);
+ String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError) override;
+ CString encode(const UChar*, size_t length, UnencodableHandling) override;
};
} // namespace WebCore
diff --git a/Source/WebCore/platform/text/icu/UTextProviderLatin1.h b/Source/WebCore/platform/text/TextCodecReplacement.cpp
index 51347c21b..fc39b18b8 100644
--- a/Source/WebCore/platform/text/icu/UTextProviderLatin1.h
+++ b/Source/WebCore/platform/text/TextCodecReplacement.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2014 Apple Inc. All rights reserved.
+ * Copyright (C) 2016 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -23,24 +23,49 @@
* THE POSSIBILITY OF SUCH DAMAGE.
*/
-#ifndef UTextProviderLatin1_h
-#define UTextProviderLatin1_h
+#include "config.h"
+#include "TextCodecReplacement.h"
-#include <unicode/utext.h>
-#include <wtf/unicode/Unicode.h>
+#include <wtf/unicode/CharacterNames.h>
namespace WebCore {
-const int UTextWithBufferInlineCapacity = 16;
+std::unique_ptr<TextCodec> TextCodecReplacement::create(const TextEncoding&, const void*)
+{
+ return std::make_unique<TextCodecReplacement>();
+}
-struct UTextWithBuffer {
- UText text;
- UChar buffer[UTextWithBufferInlineCapacity];
-};
+TextCodecReplacement::TextCodecReplacement()
+{
+}
-UText* openLatin1UTextProvider(UTextWithBuffer* utWithBuffer, const LChar* string, unsigned length, UErrorCode* status);
-UText* openLatin1ContextAwareUTextProvider(UTextWithBuffer* utWithBuffer, const LChar* string, unsigned length, const UChar* priorContext, int priorContextLength, UErrorCode* status);
+void TextCodecReplacement::registerEncodingNames(EncodingNameRegistrar registrar)
+{
+ // The 'replacement' itself is not a valid label. It is the name of
+ // a group of legacy encoding labels. Hence, it cannot be used directly.
+ registrar("replacement", "replacement");
-} // namespace WebCore
+ // The labels
+ registrar("csiso2022kr", "replacement");
+ registrar("hz-gb-2312", "replacement");
+ registrar("iso-2022-cn", "replacement");
+ registrar("iso-2022-cn-ext", "replacement");
+ registrar("iso-2022-kr", "replacement");
+}
+
+void TextCodecReplacement::registerCodecs(TextCodecRegistrar registrar)
+{
+ registrar("replacement", create, 0);
+}
+
+String TextCodecReplacement::decode(const char*, size_t, bool, bool, bool& sawError)
+{
+ sawError = true;
+ if (m_sentEOF)
+ return emptyString();
-#endif // UTextProviderLatin1_h
+ m_sentEOF = true;
+ return String(&replacementCharacter, 1);
+}
+
+} // namespace WebCore
diff --git a/Source/WebCore/platform/text/icu/UTextProviderUTF16.h b/Source/WebCore/platform/text/TextCodecReplacement.h
index 564a37a81..7a67d516d 100644
--- a/Source/WebCore/platform/text/icu/UTextProviderUTF16.h
+++ b/Source/WebCore/platform/text/TextCodecReplacement.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2014 Apple Inc. All rights reserved.
+ * Copyright (C) 2016 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -23,16 +23,29 @@
* THE POSSIBILITY OF SUCH DAMAGE.
*/
-#ifndef UTextProviderUTF16_h
-#define UTextProviderUTF16_h
+#ifndef TextCodecReplacement_h
+#define TextCodecReplacement_h
-#include <unicode/utext.h>
-#include <wtf/unicode/Unicode.h>
+#include "TextCodecUTF8.h"
namespace WebCore {
-UText* openUTF16ContextAwareUTextProvider(UText*, const UChar*, unsigned length, const UChar* priorContext, int priorContextLength, UErrorCode*);
+class TextCodecReplacement : public TextCodecUTF8 {
+public:
+ static std::unique_ptr<TextCodec> create(const TextEncoding&, const void*);
+
+ TextCodecReplacement();
+
+ static void registerEncodingNames(EncodingNameRegistrar);
+ static void registerCodecs(TextCodecRegistrar);
+
+private:
+ String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError) override;
+
+ bool m_sentEOF { false };
+
+};
} // namespace WebCore
-#endif // UTextProviderUTF16_h
+#endif /* TextCodecReplacement_h */
diff --git a/Source/WebCore/platform/text/TextCodecUTF16.cpp b/Source/WebCore/platform/text/TextCodecUTF16.cpp
index 673f73813..0e39de128 100644
--- a/Source/WebCore/platform/text/TextCodecUTF16.cpp
+++ b/Source/WebCore/platform/text/TextCodecUTF16.cpp
@@ -10,10 +10,10 @@
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
@@ -26,7 +26,6 @@
#include "config.h"
#include "TextCodecUTF16.h"
-#include <wtf/PassOwnPtr.h>
#include <wtf/text/CString.h>
#include <wtf/text/StringBuffer.h>
#include <wtf/text/WTFString.h>
@@ -48,14 +47,14 @@ void TextCodecUTF16::registerEncodingNames(EncodingNameRegistrar registrar)
registrar("unicodeFFFE", "UTF-16BE");
}
-static PassOwnPtr<TextCodec> newStreamingTextDecoderUTF16LE(const TextEncoding&, const void*)
+static std::unique_ptr<TextCodec> newStreamingTextDecoderUTF16LE(const TextEncoding&, const void*)
{
- return adoptPtr(new TextCodecUTF16(true));
+ return std::make_unique<TextCodecUTF16>(true);
}
-static PassOwnPtr<TextCodec> newStreamingTextDecoderUTF16BE(const TextEncoding&, const void*)
+static std::unique_ptr<TextCodec> newStreamingTextDecoderUTF16BE(const TextEncoding&, const void*)
{
- return adoptPtr(new TextCodecUTF16(false));
+ return std::make_unique<TextCodecUTF16>(false);
}
void TextCodecUTF16::registerCodecs(TextCodecRegistrar registrar)
@@ -112,7 +111,7 @@ String TextCodecUTF16::decode(const char* bytes, size_t length, bool, bool, bool
buffer.shrink(q - buffer.characters());
- return String::adopt(buffer);
+ return String::adopt(WTFMove(buffer));
}
CString TextCodecUTF16::encode(const UChar* characters, size_t length, UnencodableHandling)
diff --git a/Source/WebCore/platform/text/TextCodecUTF16.h b/Source/WebCore/platform/text/TextCodecUTF16.h
index 8ce947611..28760cdee 100644
--- a/Source/WebCore/platform/text/TextCodecUTF16.h
+++ b/Source/WebCore/platform/text/TextCodecUTF16.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2004, 2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (C) 2004, 2006 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -10,10 +10,10 @@
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
@@ -37,8 +37,8 @@ namespace WebCore {
TextCodecUTF16(bool littleEndian) : m_littleEndian(littleEndian), m_haveBufferedByte(false) { }
- virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError);
- virtual CString encode(const UChar*, size_t length, UnencodableHandling);
+ String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError) override;
+ CString encode(const UChar*, size_t length, UnencodableHandling) override;
private:
bool m_littleEndian;
diff --git a/Source/WebCore/platform/text/TextCodecUTF8.cpp b/Source/WebCore/platform/text/TextCodecUTF8.cpp
index 6abf43174..508edee9a 100644
--- a/Source/WebCore/platform/text/TextCodecUTF8.cpp
+++ b/Source/WebCore/platform/text/TextCodecUTF8.cpp
@@ -10,10 +10,10 @@
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
@@ -38,14 +38,17 @@ namespace WebCore {
const int nonCharacter = -1;
-PassOwnPtr<TextCodec> TextCodecUTF8::create(const TextEncoding&, const void*)
+std::unique_ptr<TextCodec> TextCodecUTF8::create(const TextEncoding&, const void*)
{
- return adoptPtr(new TextCodecUTF8);
+ return std::make_unique<TextCodecUTF8>();
}
void TextCodecUTF8::registerEncodingNames(EncodingNameRegistrar registrar)
{
+ // From https://encoding.spec.whatwg.org.
registrar("UTF-8", "UTF-8");
+ registrar("utf8", "UTF-8");
+ registrar("unicode-1-1-utf-8", "UTF-8");
// Additional aliases that originally were present in the encoding
// table in WebKit on Macintosh, and subsequently added by
@@ -53,7 +56,6 @@ void TextCodecUTF8::registerEncodingNames(EncodingNameRegistrar registrar)
// and remove them.
registrar("unicode11utf8", "UTF-8");
registrar("unicode20utf8", "UTF-8");
- registrar("utf8", "UTF-8");
registrar("x-unicode20utf8", "UTF-8");
}
@@ -341,7 +343,7 @@ String TextCodecUTF8::decode(const char* bytes, size_t length, bool flush, bool
buffer.shrink(destination - buffer.characters());
- return String::adopt(buffer);
+ return String::adopt(WTFMove(buffer));
upConvertTo16Bit:
StringBuffer<UChar> buffer16(m_partialSequenceSize + length);
@@ -417,7 +419,7 @@ upConvertTo16Bit:
buffer16.shrink(destination16 - buffer16.characters());
- return String::adopt(buffer16);
+ return String::adopt(WTFMove(buffer16));
}
CString TextCodecUTF8::encode(const UChar* characters, size_t length, UnencodableHandling)
diff --git a/Source/WebCore/platform/text/TextCodecUTF8.h b/Source/WebCore/platform/text/TextCodecUTF8.h
index 270cf298f..590fd5c0d 100644
--- a/Source/WebCore/platform/text/TextCodecUTF8.h
+++ b/Source/WebCore/platform/text/TextCodecUTF8.h
@@ -10,10 +10,10 @@
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
@@ -32,15 +32,18 @@ namespace WebCore {
class TextCodecUTF8 : public TextCodec {
public:
+ static std::unique_ptr<TextCodec> create(const TextEncoding&, const void*);
+ TextCodecUTF8()
+ : m_partialSequenceSize(0)
+ {
+ }
+
static void registerEncodingNames(EncodingNameRegistrar);
static void registerCodecs(TextCodecRegistrar);
private:
- static PassOwnPtr<TextCodec> create(const TextEncoding&, const void*);
- TextCodecUTF8() : m_partialSequenceSize(0) { }
-
- virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError);
- virtual CString encode(const UChar*, size_t length, UnencodableHandling);
+ String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError) override;
+ CString encode(const UChar*, size_t length, UnencodableHandling) override;
template <typename CharType>
bool handlePartialSequence(CharType*& destination, const uint8_t*& source, const uint8_t* end, bool flush, bool stopOnError, bool& sawError);
diff --git a/Source/WebCore/platform/text/TextCodecUserDefined.cpp b/Source/WebCore/platform/text/TextCodecUserDefined.cpp
index 47d682110..e319214e4 100644
--- a/Source/WebCore/platform/text/TextCodecUserDefined.cpp
+++ b/Source/WebCore/platform/text/TextCodecUserDefined.cpp
@@ -10,10 +10,10 @@
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
@@ -27,7 +27,6 @@
#include "TextCodecUserDefined.h"
#include <stdio.h>
-#include <wtf/PassOwnPtr.h>
#include <wtf/text/CString.h>
#include <wtf/text/StringBuffer.h>
#include <wtf/text/StringBuilder.h>
@@ -40,9 +39,9 @@ void TextCodecUserDefined::registerEncodingNames(EncodingNameRegistrar registrar
registrar("x-user-defined", "x-user-defined");
}
-static PassOwnPtr<TextCodec> newStreamingTextDecoderUserDefined(const TextEncoding&, const void*)
+static std::unique_ptr<TextCodec> newStreamingTextDecoderUserDefined(const TextEncoding&, const void*)
{
- return adoptPtr(new TextCodecUserDefined);
+ return std::make_unique<TextCodecUserDefined>();
}
void TextCodecUserDefined::registerCodecs(TextCodecRegistrar registrar)
diff --git a/Source/WebCore/platform/text/TextCodecUserDefined.h b/Source/WebCore/platform/text/TextCodecUserDefined.h
index d1b31601a..5821ca378 100644
--- a/Source/WebCore/platform/text/TextCodecUserDefined.h
+++ b/Source/WebCore/platform/text/TextCodecUserDefined.h
@@ -10,10 +10,10 @@
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
@@ -35,8 +35,8 @@ namespace WebCore {
static void registerEncodingNames(EncodingNameRegistrar);
static void registerCodecs(TextCodecRegistrar);
- virtual String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError);
- virtual CString encode(const UChar*, size_t length, UnencodableHandling);
+ String decode(const char*, size_t length, bool flush, bool stopOnError, bool& sawError) override;
+ CString encode(const UChar*, size_t length, UnencodableHandling) override;
};
} // namespace WebCore
diff --git a/Source/WebCore/platform/text/TextEncoding.cpp b/Source/WebCore/platform/text/TextEncoding.cpp
index d27082670..7daba67e2 100644
--- a/Source/WebCore/platform/text/TextEncoding.cpp
+++ b/Source/WebCore/platform/text/TextEncoding.cpp
@@ -12,10 +12,10 @@
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
@@ -31,10 +31,9 @@
#include "TextCodec.h"
#include "TextEncodingRegistry.h"
#include <unicode/unorm.h>
-#include <wtf/OwnPtr.h>
#include <wtf/StdLibExtras.h>
#include <wtf/text/CString.h>
-#include <wtf/text/WTFString.h>
+#include <wtf/text/StringView.h>
namespace WebCore {
@@ -48,12 +47,18 @@ TextEncoding::TextEncoding(const char* name)
: m_name(atomicCanonicalTextEncodingName(name))
, m_backslashAsCurrencySymbol(backslashAsCurrencySymbol())
{
+ // Aliases are valid, but not "replacement" itself.
+ if (m_name && isReplacementEncoding(name))
+ m_name = nullptr;
}
TextEncoding::TextEncoding(const String& name)
: m_name(atomicCanonicalTextEncodingName(name))
, m_backslashAsCurrencySymbol(backslashAsCurrencySymbol())
{
+ // Aliases are valid, but not "replacement" itself.
+ if (m_name && isReplacementEncoding(name))
+ m_name = nullptr;
}
String TextEncoding::decode(const char* data, size_t length, bool stopOnError, bool& sawError) const
@@ -64,20 +69,22 @@ String TextEncoding::decode(const char* data, size_t length, bool stopOnError, b
return newTextCodec(*this)->decode(data, length, true, stopOnError, sawError);
}
-CString TextEncoding::encode(const UChar* characters, size_t length, UnencodableHandling handling) const
+CString TextEncoding::encode(StringView text, UnencodableHandling handling) const
{
if (!m_name)
return CString();
- if (!length)
+ if (text.isEmpty())
return "";
// FIXME: What's the right place to do normalization?
// It's a little strange to do it inside the encode function.
// Perhaps normalization should be an explicit step done before calling encode.
- const UChar* source = characters;
- size_t sourceLength = length;
+ auto upconvertedCharacters = text.upconvertedCharacters();
+
+ const UChar* source = upconvertedCharacters;
+ size_t sourceLength = text.length();
Vector<UChar> normalizedCharacters;
@@ -85,17 +92,18 @@ CString TextEncoding::encode(const UChar* characters, size_t length, Unencodable
if (unorm_quickCheck(source, sourceLength, UNORM_NFC, &err) != UNORM_YES) {
// First try using the length of the original string, since normalization to NFC rarely increases length.
normalizedCharacters.grow(sourceLength);
- int32_t normalizedLength = unorm_normalize(source, length, UNORM_NFC, 0, normalizedCharacters.data(), length, &err);
+ int32_t normalizedLength = unorm_normalize(source, sourceLength, UNORM_NFC, 0, normalizedCharacters.data(), sourceLength, &err);
if (err == U_BUFFER_OVERFLOW_ERROR) {
err = U_ZERO_ERROR;
normalizedCharacters.resize(normalizedLength);
- normalizedLength = unorm_normalize(source, length, UNORM_NFC, 0, normalizedCharacters.data(), normalizedLength, &err);
+ normalizedLength = unorm_normalize(source, sourceLength, UNORM_NFC, 0, normalizedCharacters.data(), normalizedLength, &err);
}
ASSERT(U_SUCCESS(err));
source = normalizedCharacters.data();
sourceLength = normalizedLength;
}
+
return newTextCodec(*this)->encode(source, sourceLength, handling);
}
diff --git a/Source/WebCore/platform/text/TextEncoding.h b/Source/WebCore/platform/text/TextEncoding.h
index 68e397814..a349c3828 100644
--- a/Source/WebCore/platform/text/TextEncoding.h
+++ b/Source/WebCore/platform/text/TextEncoding.h
@@ -10,10 +10,10 @@
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
@@ -23,77 +23,59 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#ifndef TextEncoding_h
-#define TextEncoding_h
+#pragma once
#include "TextCodec.h"
#include <wtf/Forward.h>
-#include <wtf/unicode/Unicode.h>
namespace WebCore {
- class TextEncoding {
- public:
- TextEncoding() : m_name(0) { }
- TextEncoding(const char* name);
- TextEncoding(const String& name);
+class TextEncoding {
+public:
+ TextEncoding() = default;
+ TextEncoding(const char* name);
+ WEBCORE_EXPORT TextEncoding(const String& name);
- bool isValid() const { return m_name; }
- const char* name() const { return m_name; }
- const char* domName() const; // name exposed via DOM
- bool usesVisualOrdering() const;
- bool isJapanese() const;
-
- PassRefPtr<StringImpl> displayString(PassRefPtr<StringImpl> str) const
- {
- if (m_backslashAsCurrencySymbol == '\\' || !str)
- return str;
- return str->replace('\\', m_backslashAsCurrencySymbol);
- }
- template <typename CharacterType>
- void displayBuffer(CharacterType* characters, unsigned len) const
- {
- if (m_backslashAsCurrencySymbol == '\\')
- return;
- for (unsigned i = 0; i < len; ++i) {
- if (characters[i] == '\\')
- characters[i] = m_backslashAsCurrencySymbol;
- }
- }
+ bool isValid() const { return m_name; }
+ const char* name() const { return m_name; }
+ WEBCORE_EXPORT const char* domName() const; // name exposed via DOM
+ bool usesVisualOrdering() const;
+ bool isJapanese() const;
- const TextEncoding& closestByteBasedEquivalent() const;
- const TextEncoding& encodingForFormSubmission() const;
+ const TextEncoding& closestByteBasedEquivalent() const;
+ const TextEncoding& encodingForFormSubmission() const;
- String decode(const char* str, size_t length) const
- {
- bool ignored;
- return decode(str, length, false, ignored);
- }
- String decode(const char*, size_t length, bool stopOnError, bool& sawError) const;
- CString encode(const UChar*, size_t length, UnencodableHandling) const;
+ WEBCORE_EXPORT String decode(const char*, size_t length, bool stopOnError, bool& sawError) const;
+ String decode(const char*, size_t length) const;
+ CString encode(StringView, UnencodableHandling) const;
- UChar backslashAsCurrencySymbol() const;
+ UChar backslashAsCurrencySymbol() const;
+ bool isByteBasedEncoding() const { return !isNonByteBasedEncoding(); }
- private:
- bool isNonByteBasedEncoding() const;
- bool isUTF7Encoding() const;
+private:
+ bool isNonByteBasedEncoding() const;
+ bool isUTF7Encoding() const;
- const char* m_name;
- UChar m_backslashAsCurrencySymbol;
- };
+ const char* m_name { nullptr };
+ UChar m_backslashAsCurrencySymbol;
+};
- inline bool operator==(const TextEncoding& a, const TextEncoding& b) { return a.name() == b.name(); }
- inline bool operator!=(const TextEncoding& a, const TextEncoding& b) { return a.name() != b.name(); }
+inline bool operator==(const TextEncoding& a, const TextEncoding& b) { return a.name() == b.name(); }
+inline bool operator!=(const TextEncoding& a, const TextEncoding& b) { return a.name() != b.name(); }
- const TextEncoding& ASCIIEncoding();
- const TextEncoding& Latin1Encoding();
- const TextEncoding& UTF16BigEndianEncoding();
- const TextEncoding& UTF16LittleEndianEncoding();
- const TextEncoding& UTF32BigEndianEncoding();
- const TextEncoding& UTF32LittleEndianEncoding();
- const TextEncoding& UTF8Encoding();
- const TextEncoding& WindowsLatin1Encoding();
+const TextEncoding& ASCIIEncoding();
+const TextEncoding& Latin1Encoding();
+const TextEncoding& UTF16BigEndianEncoding();
+const TextEncoding& UTF16LittleEndianEncoding();
+const TextEncoding& UTF32BigEndianEncoding();
+const TextEncoding& UTF32LittleEndianEncoding();
+WEBCORE_EXPORT const TextEncoding& UTF8Encoding();
+WEBCORE_EXPORT const TextEncoding& WindowsLatin1Encoding();
-} // namespace WebCore
+inline String TextEncoding::decode(const char* characters, size_t length) const
+{
+ bool ignored;
+ return decode(characters, length, false, ignored);
+}
-#endif // TextEncoding_h
+} // namespace WebCore
diff --git a/Source/WebCore/platform/text/TextEncodingDetectorICU.cpp b/Source/WebCore/platform/text/TextEncodingDetectorICU.cpp
new file mode 100644
index 000000000..8153b75bd
--- /dev/null
+++ b/Source/WebCore/platform/text/TextEncodingDetectorICU.cpp
@@ -0,0 +1,117 @@
+/*
+ * Copyright (C) 2008, 2009 Google Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "TextEncodingDetector.h"
+
+#include "TextEncoding.h"
+#include <unicode/ucnv.h>
+#include <unicode/ucsdet.h>
+
+namespace WebCore {
+
+bool detectTextEncoding(const char* data, size_t len,
+ const char* hintEncodingName,
+ TextEncoding* detectedEncoding)
+{
+ *detectedEncoding = TextEncoding();
+ int matchesCount = 0;
+ UErrorCode status = U_ZERO_ERROR;
+ UCharsetDetector* detector = ucsdet_open(&status);
+ if (U_FAILURE(status))
+ return false;
+ ucsdet_enableInputFilter(detector, true);
+ ucsdet_setText(detector, data, static_cast<int32_t>(len), &status);
+ if (U_FAILURE(status))
+ return false;
+
+ // FIXME: A few things we can do other than improving
+ // the ICU detector itself.
+ // 1. Use ucsdet_detectAll and pick the most likely one given
+ // "the context" (parent-encoding, referrer encoding, etc).
+ // 2. 'Emulate' Firefox/IE's non-Universal detectors (e.g.
+ // Chinese, Japanese, Russian, Korean and Hebrew) by picking the
+ // encoding with a highest confidence among the detector-specific
+ // limited set of candidate encodings.
+ // Below is a partial implementation of the first part of what's outlined
+ // above.
+ const UCharsetMatch** matches = ucsdet_detectAll(detector, &matchesCount, &status);
+ if (U_FAILURE(status)) {
+ ucsdet_close(detector);
+ return false;
+ }
+
+ const char* encoding = 0;
+ if (hintEncodingName) {
+ TextEncoding hintEncoding(hintEncodingName);
+ // 10 is the minimum confidence value consistent with the codepoint
+ // allocation in a given encoding. The size of a chunk passed to
+ // us varies even for the same html file (apparently depending on
+ // the network load). When we're given a rather short chunk, we
+ // don't have a sufficiently reliable signal other than the fact that
+ // the chunk is consistent with a set of encodings. So, instead of
+ // setting an arbitrary threshold, we have to scan all the encodings
+ // consistent with the data.
+ const int32_t kThreshold = 10;
+ for (int i = 0; i < matchesCount; ++i) {
+ int32_t confidence = ucsdet_getConfidence(matches[i], &status);
+ if (U_FAILURE(status)) {
+ status = U_ZERO_ERROR;
+ continue;
+ }
+ if (confidence < kThreshold)
+ break;
+ const char* matchEncoding = ucsdet_getName(matches[i], &status);
+ if (U_FAILURE(status)) {
+ status = U_ZERO_ERROR;
+ continue;
+ }
+ if (TextEncoding(matchEncoding) == hintEncoding) {
+ encoding = hintEncodingName;
+ break;
+ }
+ }
+ }
+ // If no match is found so far, just pick the top match.
+ // This can happen, say, when a parent frame in EUC-JP refers to
+ // a child frame in Shift_JIS and both frames do NOT specify the encoding
+ // making us resort to auto-detection (when it IS turned on).
+ if (!encoding && matchesCount > 0)
+ encoding = ucsdet_getName(matches[0], &status);
+ if (U_SUCCESS(status)) {
+ *detectedEncoding = TextEncoding(encoding);
+ ucsdet_close(detector);
+ return true;
+ }
+ ucsdet_close(detector);
+ return false;
+}
+
+}
diff --git a/Source/WebCore/platform/text/TextEncodingDetectorNone.cpp b/Source/WebCore/platform/text/TextEncodingDetectorNone.cpp
deleted file mode 100644
index 3b62bc5b0..000000000
--- a/Source/WebCore/platform/text/TextEncodingDetectorNone.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (C) 2009 Google Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Google Inc. nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-#include "TextEncodingDetector.h"
-
-#include "TextEncoding.h"
-
-namespace WebCore {
-
-bool detectTextEncoding(const char*, size_t, const char*, TextEncoding* detectedEncoding)
-{
- *detectedEncoding = TextEncoding();
- return false;
-}
-
-}
diff --git a/Source/WebCore/platform/text/TextEncodingRegistry.cpp b/Source/WebCore/platform/text/TextEncodingRegistry.cpp
index d66f82b8f..eace643a3 100644
--- a/Source/WebCore/platform/text/TextEncodingRegistry.cpp
+++ b/Source/WebCore/platform/text/TextEncodingRegistry.cpp
@@ -11,10 +11,10 @@
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
@@ -29,6 +29,7 @@
#include "TextCodecICU.h"
#include "TextCodecLatin1.h"
+#include "TextCodecReplacement.h"
#include "TextCodecUserDefined.h"
#include "TextCodecUTF16.h"
#include "TextCodecUTF8.h"
@@ -37,12 +38,16 @@
#include <wtf/ASCIICType.h>
#include <wtf/HashMap.h>
#include <wtf/HashSet.h>
+#include <wtf/Lock.h>
#include <wtf/MainThread.h>
-#include <wtf/NeverDestroyed.h>
#include <wtf/StdLibExtras.h>
#include <wtf/StringExtras.h>
-#if PLATFORM(MAC) && !PLATFORM(IOS)
+#if PLATFORM(COCOA)
+#include "WebCoreSystemInterface.h"
+#endif
+
+#if PLATFORM(MAC)
#include "TextCodecMac.h"
#endif
@@ -102,14 +107,7 @@ struct TextCodecFactory {
typedef HashMap<const char*, const char*, TextEncodingNameHash> TextEncodingNameMap;
typedef HashMap<const char*, TextCodecFactory> TextCodecMap;
-static std::mutex& encodingRegistryMutex()
-{
- // We don't have to construct this mutex in a thread safe way because this function
- // is called on the main thread for any page before it is used in worker threads.
- static NeverDestroyed<std::mutex> mutex;
-
- return mutex;
-}
+static StaticLock encodingRegistryMutex;
static TextEncodingNameMap* textEncodingNameMap;
static TextCodecMap* textCodecMap;
@@ -117,7 +115,7 @@ static bool didExtendTextCodecMaps;
static HashSet<const char*>* japaneseEncodings;
static HashSet<const char*>* nonBackslashEncodings;
-static const char* const textEncodingNameBlacklist[] = { "UTF-7" };
+static const char* const textEncodingNameBlacklist[] = { "UTF-7", "BOCU-1", "SCSU" };
#if ERROR_DISABLED
@@ -199,9 +197,8 @@ static void pruneBlacklistedCodecs()
}
}
-static void buildBaseTextCodecMaps()
+static void buildBaseTextCodecMaps(const std::lock_guard<StaticLock>&)
{
- ASSERT(isMainThread());
ASSERT(!textCodecMap);
ASSERT(!textEncodingNameMap);
@@ -269,6 +266,22 @@ bool isJapaneseEncoding(const char* canonicalEncodingName)
return canonicalEncodingName && japaneseEncodings && japaneseEncodings->contains(canonicalEncodingName);
}
+bool isReplacementEncoding(const char* alias)
+{
+ if (!alias)
+ return false;
+
+ if (strlen(alias) != 11)
+ return false;
+
+ return !strcasecmp(alias, "replacement");
+}
+
+bool isReplacementEncoding(const String& alias)
+{
+ return equalLettersIgnoringASCIICase(alias, "replacement");
+}
+
bool shouldShowBackslashAsCurrencySymbolIn(const char* canonicalEncodingName)
{
return canonicalEncodingName && nonBackslashEncodings && nonBackslashEncodings->contains(canonicalEncodingName);
@@ -276,10 +289,13 @@ bool shouldShowBackslashAsCurrencySymbolIn(const char* canonicalEncodingName)
static void extendTextCodecMaps()
{
+ TextCodecReplacement::registerEncodingNames(addToTextEncodingNameMap);
+ TextCodecReplacement::registerCodecs(addToTextCodecMap);
+
TextCodecICU::registerEncodingNames(addToTextEncodingNameMap);
TextCodecICU::registerCodecs(addToTextCodecMap);
-#if PLATFORM(MAC) && !PLATFORM(IOS)
+#if PLATFORM(MAC)
TextCodecMac::registerEncodingNames(addToTextEncodingNameMap);
TextCodecMac::registerCodecs(addToTextCodecMap);
#endif
@@ -288,9 +304,9 @@ static void extendTextCodecMaps()
buildQuirksSets();
}
-PassOwnPtr<TextCodec> newTextCodec(const TextEncoding& encoding)
+std::unique_ptr<TextCodec> newTextCodec(const TextEncoding& encoding)
{
- std::lock_guard<std::mutex> lock(encodingRegistryMutex());
+ std::lock_guard<StaticLock> lock(encodingRegistryMutex);
ASSERT(textCodecMap);
TextCodecFactory factory = textCodecMap->get(encoding.name());
@@ -303,10 +319,10 @@ const char* atomicCanonicalTextEncodingName(const char* name)
if (!name || !name[0])
return nullptr;
- if (!textEncodingNameMap)
- buildBaseTextCodecMaps();
+ std::lock_guard<StaticLock> lock(encodingRegistryMutex);
- std::lock_guard<std::mutex> lock(encodingRegistryMutex());
+ if (!textEncodingNameMap)
+ buildBaseTextCodecMaps(lock);
if (const char* atomicName = textEncodingNameMap->get(name))
return atomicName;
@@ -336,12 +352,12 @@ const char* atomicCanonicalTextEncodingName(const CharacterType* characters, siz
const char* atomicCanonicalTextEncodingName(const String& alias)
{
if (!alias.length())
- return 0;
+ return nullptr;
if (alias.is8Bit())
- return atomicCanonicalTextEncodingName<LChar>(alias.characters8(), alias.length());
+ return atomicCanonicalTextEncodingName(alias.characters8(), alias.length());
- return atomicCanonicalTextEncodingName<UChar>(alias.deprecatedCharacters(), alias.length());
+ return atomicCanonicalTextEncodingName(alias.characters16(), alias.length());
}
bool noExtendedTextEncodingNameUsed()
@@ -350,13 +366,30 @@ bool noExtendedTextEncodingNameUsed()
return !didExtendTextCodecMaps;
}
+String defaultTextEncodingNameForSystemLanguage()
+{
+#if PLATFORM(COCOA)
+ String systemEncodingName = CFStringConvertEncodingToIANACharSetName(wkGetWebDefaultCFStringEncoding());
+
+ // CFStringConvertEncodingToIANACharSetName() returns cp949 for kTextEncodingDOSKorean AKA "extended EUC-KR" AKA windows-949.
+ // ICU uses this name for a different encoding, so we need to change the name to a value that actually gives us windows-949.
+ // In addition, this value must match what is used in Safari, see <rdar://problem/5579292>.
+ // On some OS versions, the result is CP949 (uppercase).
+ if (equalLettersIgnoringASCIICase(systemEncodingName, "cp949"))
+ systemEncodingName = ASCIILiteral("ks_c_5601-1987");
+ return systemEncodingName;
+#else
+ return ASCIILiteral("ISO-8859-1");
+#endif
+}
+
#ifndef NDEBUG
void dumpTextEncodingNameMap()
{
unsigned size = textEncodingNameMap->size();
fprintf(stderr, "Dumping %u entries in WebCore::textEncodingNameMap...\n", size);
- std::lock_guard<std::mutex> lock(encodingRegistryMutex());
+ std::lock_guard<StaticLock> lock(encodingRegistryMutex);
TextEncodingNameMap::const_iterator it = textEncodingNameMap->begin();
TextEncodingNameMap::const_iterator end = textEncodingNameMap->end();
diff --git a/Source/WebCore/platform/text/TextEncodingRegistry.h b/Source/WebCore/platform/text/TextEncodingRegistry.h
index 1895df737..0c5ba5116 100644
--- a/Source/WebCore/platform/text/TextEncodingRegistry.h
+++ b/Source/WebCore/platform/text/TextEncodingRegistry.h
@@ -10,10 +10,10 @@
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
@@ -27,9 +27,7 @@
#define TextEncodingRegistry_h
#include <memory>
-#include <wtf/PassOwnPtr.h>
#include <wtf/text/WTFString.h>
-#include <wtf/unicode/Unicode.h>
namespace WebCore {
@@ -38,7 +36,7 @@ namespace WebCore {
// Use TextResourceDecoder::decode to decode resources, since it handles BOMs.
// Use TextEncoding::encode to encode, since it takes care of normalization.
- PassOwnPtr<TextCodec> newTextCodec(const TextEncoding&);
+ std::unique_ptr<TextCodec> newTextCodec(const TextEncoding&);
// Only TextEncoding should use the following functions directly.
const char* atomicCanonicalTextEncodingName(const char* alias);
@@ -48,6 +46,10 @@ namespace WebCore {
bool noExtendedTextEncodingNameUsed();
bool isJapaneseEncoding(const char* canonicalEncodingName);
bool shouldShowBackslashAsCurrencySymbolIn(const char* canonicalEncodingName);
+ bool isReplacementEncoding(const char* alias);
+ bool isReplacementEncoding(const String& alias);
+
+ WEBCORE_EXPORT String defaultTextEncodingNameForSystemLanguage();
#ifndef NDEBUG
void dumpTextEncodingNameMap();
diff --git a/Source/WebCore/platform/text/TextFlags.h b/Source/WebCore/platform/text/TextFlags.h
new file mode 100644
index 000000000..76445976d
--- /dev/null
+++ b/Source/WebCore/platform/text/TextFlags.h
@@ -0,0 +1,408 @@
+/*
+ * Copyright (C) 2003, 2006 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef TextFlags_h
+#define TextFlags_h
+
+namespace WebCore {
+
+enum TextRenderingMode { AutoTextRendering, OptimizeSpeed, OptimizeLegibility, GeometricPrecision };
+
+enum FontSmoothingMode { AutoSmoothing, NoSmoothing, Antialiased, SubpixelAntialiased };
+
+// This setting is used to provide ways of switching between multiple rendering modes that may have different
+// metrics. It is used to switch between CG and GDI text on Windows.
+enum class FontRenderingMode { Normal, Alternate };
+
+enum FontOrientation { Horizontal, Vertical };
+
+enum class NonCJKGlyphOrientation { Mixed, Upright };
+
+// Here, "Leading" and "Trailing" are relevant after the line has been rearranged for bidi.
+// ("Leading" means "left" and "Trailing" means "right.")
+enum ExpansionBehaviorFlags {
+ ForbidTrailingExpansion = 0 << 0,
+ AllowTrailingExpansion = 1 << 0,
+ ForceTrailingExpansion = 2 << 0,
+ TrailingExpansionMask = 3 << 0,
+
+ ForbidLeadingExpansion = 0 << 2,
+ AllowLeadingExpansion = 1 << 2,
+ ForceLeadingExpansion = 2 << 2,
+ LeadingExpansionMask = 3 << 2,
+
+ DefaultExpansion = AllowTrailingExpansion | ForbidLeadingExpansion,
+};
+typedef unsigned ExpansionBehavior;
+
+enum FontSynthesisValues {
+ FontSynthesisNone = 0x0,
+ FontSynthesisWeight = 0x1,
+ FontSynthesisStyle = 0x2,
+ FontSynthesisSmallCaps = 0x4
+};
+typedef unsigned FontSynthesis;
+const unsigned FontSynthesisWidth = 3;
+
+enum class FontVariantLigatures {
+ Normal,
+ Yes,
+ No
+};
+
+enum class FontVariantPosition {
+ Normal,
+ Subscript,
+ Superscript
+};
+
+enum class FontVariantCaps {
+ Normal,
+ Small,
+ AllSmall,
+ Petite,
+ AllPetite,
+ Unicase,
+ Titling
+};
+
+enum class FontVariantNumericFigure {
+ Normal,
+ LiningNumbers,
+ OldStyleNumbers
+};
+
+enum class FontVariantNumericSpacing {
+ Normal,
+ ProportionalNumbers,
+ TabularNumbers
+};
+
+enum class FontVariantNumericFraction {
+ Normal,
+ DiagonalFractions,
+ StackedFractions
+};
+
+enum class FontVariantNumericOrdinal {
+ Normal,
+ Yes
+};
+
+enum class FontVariantNumericSlashedZero {
+ Normal,
+ Yes
+};
+
+enum class FontVariantAlternates {
+ Normal,
+ HistoricalForms
+};
+
+enum class FontVariantEastAsianVariant {
+ Normal,
+ Jis78,
+ Jis83,
+ Jis90,
+ Jis04,
+ Simplified,
+ Traditional
+};
+
+enum class FontVariantEastAsianWidth {
+ Normal,
+ Full,
+ Proportional
+};
+
+enum class FontVariantEastAsianRuby {
+ Normal,
+ Yes
+};
+
+struct FontVariantSettings {
+ FontVariantSettings()
+ : commonLigatures(FontVariantLigatures::Normal)
+ , discretionaryLigatures(FontVariantLigatures::Normal)
+ , historicalLigatures(FontVariantLigatures::Normal)
+ , contextualAlternates(FontVariantLigatures::Normal)
+ , position(FontVariantPosition::Normal)
+ , caps(FontVariantCaps::Normal)
+ , numericFigure(FontVariantNumericFigure::Normal)
+ , numericSpacing(FontVariantNumericSpacing::Normal)
+ , numericFraction(FontVariantNumericFraction::Normal)
+ , numericOrdinal(FontVariantNumericOrdinal::Normal)
+ , numericSlashedZero(FontVariantNumericSlashedZero::Normal)
+ , alternates(FontVariantAlternates::Normal)
+ , eastAsianVariant(FontVariantEastAsianVariant::Normal)
+ , eastAsianWidth(FontVariantEastAsianWidth::Normal)
+ , eastAsianRuby(FontVariantEastAsianRuby::Normal)
+ {
+ }
+
+ FontVariantSettings(
+ FontVariantLigatures commonLigatures,
+ FontVariantLigatures discretionaryLigatures,
+ FontVariantLigatures historicalLigatures,
+ FontVariantLigatures contextualAlternates,
+ FontVariantPosition position,
+ FontVariantCaps caps,
+ FontVariantNumericFigure numericFigure,
+ FontVariantNumericSpacing numericSpacing,
+ FontVariantNumericFraction numericFraction,
+ FontVariantNumericOrdinal numericOrdinal,
+ FontVariantNumericSlashedZero numericSlashedZero,
+ FontVariantAlternates alternates,
+ FontVariantEastAsianVariant eastAsianVariant,
+ FontVariantEastAsianWidth eastAsianWidth,
+ FontVariantEastAsianRuby eastAsianRuby)
+ : commonLigatures(commonLigatures)
+ , discretionaryLigatures(discretionaryLigatures)
+ , historicalLigatures(historicalLigatures)
+ , contextualAlternates(contextualAlternates)
+ , position(position)
+ , caps(caps)
+ , numericFigure(numericFigure)
+ , numericSpacing(numericSpacing)
+ , numericFraction(numericFraction)
+ , numericOrdinal(numericOrdinal)
+ , numericSlashedZero(numericSlashedZero)
+ , alternates(alternates)
+ , eastAsianVariant(eastAsianVariant)
+ , eastAsianWidth(eastAsianWidth)
+ , eastAsianRuby(eastAsianRuby)
+ {
+ }
+
+ bool isAllNormal() const
+ {
+ return commonLigatures == FontVariantLigatures::Normal
+ && discretionaryLigatures == FontVariantLigatures::Normal
+ && historicalLigatures == FontVariantLigatures::Normal
+ && contextualAlternates == FontVariantLigatures::Normal
+ && position == FontVariantPosition::Normal
+ && caps == FontVariantCaps::Normal
+ && numericFigure == FontVariantNumericFigure::Normal
+ && numericSpacing == FontVariantNumericSpacing::Normal
+ && numericFraction == FontVariantNumericFraction::Normal
+ && numericOrdinal == FontVariantNumericOrdinal::Normal
+ && numericSlashedZero == FontVariantNumericSlashedZero::Normal
+ && alternates == FontVariantAlternates::Normal
+ && eastAsianVariant == FontVariantEastAsianVariant::Normal
+ && eastAsianWidth == FontVariantEastAsianWidth::Normal
+ && eastAsianRuby == FontVariantEastAsianRuby::Normal;
+ }
+
+ bool operator==(const FontVariantSettings& other) const
+ {
+ return commonLigatures == other.commonLigatures
+ && discretionaryLigatures == other.discretionaryLigatures
+ && historicalLigatures == other.historicalLigatures
+ && contextualAlternates == other.contextualAlternates
+ && position == other.position
+ && caps == other.caps
+ && numericFigure == other.numericFigure
+ && numericSpacing == other.numericSpacing
+ && numericFraction == other.numericFraction
+ && numericOrdinal == other.numericOrdinal
+ && numericSlashedZero == other.numericSlashedZero
+ && alternates == other.alternates
+ && eastAsianVariant == other.eastAsianVariant
+ && eastAsianWidth == other.eastAsianWidth
+ && eastAsianRuby == other.eastAsianRuby;
+ }
+
+ bool operator!=(const FontVariantSettings& other) const { return !(*this == other); }
+
+ unsigned uniqueValue() const
+ {
+ return static_cast<unsigned>(commonLigatures) << 26
+ | static_cast<unsigned>(discretionaryLigatures) << 24
+ | static_cast<unsigned>(historicalLigatures) << 22
+ | static_cast<unsigned>(contextualAlternates) << 20
+ | static_cast<unsigned>(position) << 18
+ | static_cast<unsigned>(caps) << 15
+ | static_cast<unsigned>(numericFigure) << 13
+ | static_cast<unsigned>(numericSpacing) << 11
+ | static_cast<unsigned>(numericFraction) << 9
+ | static_cast<unsigned>(numericOrdinal) << 8
+ | static_cast<unsigned>(numericSlashedZero) << 7
+ | static_cast<unsigned>(alternates) << 6
+ | static_cast<unsigned>(eastAsianVariant) << 3
+ | static_cast<unsigned>(eastAsianWidth) << 1
+ | static_cast<unsigned>(eastAsianRuby) << 0;
+ }
+
+ FontVariantLigatures commonLigatures;
+ FontVariantLigatures discretionaryLigatures;
+ FontVariantLigatures historicalLigatures;
+ FontVariantLigatures contextualAlternates;
+ FontVariantPosition position;
+ FontVariantCaps caps;
+ FontVariantNumericFigure numericFigure;
+ FontVariantNumericSpacing numericSpacing;
+ FontVariantNumericFraction numericFraction;
+ FontVariantNumericOrdinal numericOrdinal;
+ FontVariantNumericSlashedZero numericSlashedZero;
+ FontVariantAlternates alternates;
+ FontVariantEastAsianVariant eastAsianVariant;
+ FontVariantEastAsianWidth eastAsianWidth;
+ FontVariantEastAsianRuby eastAsianRuby;
+};
+
+struct FontVariantLigaturesValues {
+ FontVariantLigaturesValues(
+ FontVariantLigatures commonLigatures,
+ FontVariantLigatures discretionaryLigatures,
+ FontVariantLigatures historicalLigatures,
+ FontVariantLigatures contextualAlternates)
+ : commonLigatures(commonLigatures)
+ , discretionaryLigatures(discretionaryLigatures)
+ , historicalLigatures(historicalLigatures)
+ , contextualAlternates(contextualAlternates)
+ {
+ }
+
+ FontVariantLigatures commonLigatures;
+ FontVariantLigatures discretionaryLigatures;
+ FontVariantLigatures historicalLigatures;
+ FontVariantLigatures contextualAlternates;
+};
+
+struct FontVariantNumericValues {
+ FontVariantNumericValues(
+ FontVariantNumericFigure figure,
+ FontVariantNumericSpacing spacing,
+ FontVariantNumericFraction fraction,
+ FontVariantNumericOrdinal ordinal,
+ FontVariantNumericSlashedZero slashedZero)
+ : figure(figure)
+ , spacing(spacing)
+ , fraction(fraction)
+ , ordinal(ordinal)
+ , slashedZero(slashedZero)
+ {
+ }
+
+ FontVariantNumericFigure figure;
+ FontVariantNumericSpacing spacing;
+ FontVariantNumericFraction fraction;
+ FontVariantNumericOrdinal ordinal;
+ FontVariantNumericSlashedZero slashedZero;
+};
+
+struct FontVariantEastAsianValues {
+ FontVariantEastAsianValues(
+ FontVariantEastAsianVariant variant,
+ FontVariantEastAsianWidth width,
+ FontVariantEastAsianRuby ruby)
+ : variant(variant)
+ , width(width)
+ , ruby(ruby)
+ {
+ }
+
+ FontVariantEastAsianVariant variant;
+ FontVariantEastAsianWidth width;
+ FontVariantEastAsianRuby ruby;
+};
+
+enum FontWidthVariant {
+ RegularWidth,
+ HalfWidth,
+ ThirdWidth,
+ QuarterWidth,
+ LastFontWidthVariant = QuarterWidth
+};
+
+const unsigned FontWidthVariantWidth = 2;
+
+COMPILE_ASSERT(!(LastFontWidthVariant >> FontWidthVariantWidth), FontWidthVariantWidth_is_correct);
+
+enum FontWeight {
+ FontWeight100,
+ FontWeight200,
+ FontWeight300,
+ FontWeight400,
+ FontWeight500,
+ FontWeight600,
+ FontWeight700,
+ FontWeight800,
+ FontWeight900,
+ FontWeightNormal = FontWeight400,
+ FontWeightBold = FontWeight700
+};
+
+enum FontItalic {
+ FontItalicOff = 0,
+ FontItalicOn = 1
+};
+
+enum FontSmallCaps {
+ FontSmallCapsOff = 0,
+ FontSmallCapsOn = 1
+};
+
+enum {
+ FontStyleNormalBit = 0,
+ FontStyleItalicBit,
+ FontWeight100Bit,
+ FontWeight200Bit,
+ FontWeight300Bit,
+ FontWeight400Bit,
+ FontWeight500Bit,
+ FontWeight600Bit,
+ FontWeight700Bit,
+ FontWeight800Bit,
+ FontWeight900Bit,
+ FontTraitsMaskWidth
+};
+
+enum FontTraitsMask {
+ FontStyleNormalMask = 1 << FontStyleNormalBit,
+ FontStyleItalicMask = 1 << FontStyleItalicBit,
+ FontStyleMask = FontStyleNormalMask | FontStyleItalicMask,
+
+ FontWeight100Mask = 1 << FontWeight100Bit,
+ FontWeight200Mask = 1 << FontWeight200Bit,
+ FontWeight300Mask = 1 << FontWeight300Bit,
+ FontWeight400Mask = 1 << FontWeight400Bit,
+ FontWeight500Mask = 1 << FontWeight500Bit,
+ FontWeight600Mask = 1 << FontWeight600Bit,
+ FontWeight700Mask = 1 << FontWeight700Bit,
+ FontWeight800Mask = 1 << FontWeight800Bit,
+ FontWeight900Mask = 1 << FontWeight900Bit,
+ FontWeightMask = FontWeight100Mask | FontWeight200Mask | FontWeight300Mask | FontWeight400Mask | FontWeight500Mask | FontWeight600Mask | FontWeight700Mask | FontWeight800Mask | FontWeight900Mask
+};
+
+enum class Kerning {
+ Auto,
+ Normal,
+ NoShift
+};
+
+}
+
+#endif
diff --git a/Source/WebCore/platform/text/TextStream.cpp b/Source/WebCore/platform/text/TextStream.cpp
index d07bffd09..32fe3c757 100644
--- a/Source/WebCore/platform/text/TextStream.cpp
+++ b/Source/WebCore/platform/text/TextStream.cpp
@@ -10,10 +10,10 @@
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
@@ -27,10 +27,9 @@
#include "TextStream.h"
#include "FloatPoint.h"
-#include "FloatRect.h"
#include "IntPoint.h"
-#include "IntRect.h"
#include "LayoutRect.h"
+#include "LayoutUnit.h"
#include <wtf/MathExtras.h>
#include <wtf/StringExtras.h>
#include <wtf/text/WTFString.h>
@@ -90,13 +89,19 @@ TextStream& TextStream::operator<<(unsigned long long i)
TextStream& TextStream::operator<<(float f)
{
- m_text.append(String::numberToStringFixedWidth(f, 2));
+ if (m_formattingFlags & Formatting::NumberRespectingIntegers)
+ return *this << FormatNumberRespectingIntegers(f);
+
+ m_text.appendFixedWidthNumber(f, 2);
return *this;
}
TextStream& TextStream::operator<<(double d)
{
- m_text.append(String::numberToStringFixedWidth(d, 2));
+ if (m_formattingFlags & Formatting::NumberRespectingIntegers)
+ return *this << FormatNumberRespectingIntegers(d);
+
+ m_text.appendFixedWidthNumber(d, 2);
return *this;
}
@@ -121,52 +126,62 @@ TextStream& TextStream::operator<<(const String& string)
TextStream& TextStream::operator<<(const FormatNumberRespectingIntegers& numberToFormat)
{
- if (hasFractions(numberToFormat.value))
- return *this << numberToFormat.value;
+ if (hasFractions(numberToFormat.value)) {
+ m_text.appendFixedWidthNumber(numberToFormat.value, 2);
+ return *this;
+ }
m_text.appendNumber(static_cast<int>(numberToFormat.value));
return *this;
}
-TextStream& TextStream::operator<<(const IntPoint& p)
+TextStream& TextStream::operator<<(LayoutUnit v)
{
- return *this << "(" << p.x() << "," << p.y() << ")";
+ return *this << TextStream::FormatNumberRespectingIntegers(v.toFloat());
}
-TextStream& TextStream::operator<<(const IntRect& r)
+String TextStream::release()
{
- return *this << "at (" << r.x() << "," << r.y() << ") size " << r.width() << "x" << r.height();
+ String result = m_text.toString();
+ m_text.clear();
+ return result;
}
-TextStream& TextStream::operator<<(const FloatPoint& p)
+void TextStream::startGroup()
{
- return *this << "(" << TextStream::FormatNumberRespectingIntegers(p.x())
- << "," << TextStream::FormatNumberRespectingIntegers(p.y()) << ")";
-}
+ TextStream& ts = *this;
-TextStream& TextStream::operator<<(const FloatSize& s)
-{
- return *this << "width=" << TextStream::FormatNumberRespectingIntegers(s.width())
- << " height=" << TextStream::FormatNumberRespectingIntegers(s.height());
+ if (m_multiLineMode) {
+ ts << "\n";
+ ts.writeIndent();
+ ts << "(";
+ ts.increaseIndent();
+ } else
+ ts << " (";
}
-TextStream& TextStream::operator<<(const LayoutPoint& p)
+void TextStream::endGroup()
{
- // FIXME: These should be printed as floats. Keeping them ints for consistency with pervious test expectations.
- return *this << "(" << p.x().toInt() << "," << p.y().toInt() << ")";
+ TextStream& ts = *this;
+ ts << ")";
+ if (m_multiLineMode)
+ ts.decreaseIndent();
}
-TextStream& TextStream::operator<<(const LayoutRect& r)
+void TextStream::nextLine()
{
- // FIXME: These should be printed as floats. Keeping them ints for consistency with previous test expectations.
- return *this << pixelSnappedIntRect(r);
+ TextStream& ts = *this;
+ if (m_multiLineMode) {
+ ts << "\n";
+ ts.writeIndent();
+ } else
+ ts << " ";
}
-String TextStream::release()
+void TextStream::writeIndent()
{
- String result = m_text.toString();
- m_text.clear();
- return result;
+ if (m_multiLineMode)
+ WebCore::writeIndent(*this, m_indent);
}
void writeIndent(TextStream& ts, int indent)
diff --git a/Source/WebCore/platform/text/TextStream.h b/Source/WebCore/platform/text/TextStream.h
index 053cb60de..be9f5d10b 100644
--- a/Source/WebCore/platform/text/TextStream.h
+++ b/Source/WebCore/platform/text/TextStream.h
@@ -10,10 +10,10 @@
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
@@ -28,16 +28,10 @@
#include <wtf/Forward.h>
#include <wtf/text/StringBuilder.h>
-#include <wtf/unicode/Unicode.h>
namespace WebCore {
-class IntPoint;
-class IntRect;
-class FloatPoint;
-class FloatSize;
-class LayoutPoint;
-class LayoutRect;
+class LayoutUnit;
class TextStream {
public:
@@ -45,49 +39,102 @@ public:
FormatNumberRespectingIntegers(double number) : value(number) { }
double value;
};
-
- TextStream& operator<<(bool);
- TextStream& operator<<(int);
- TextStream& operator<<(unsigned);
- TextStream& operator<<(long);
- TextStream& operator<<(unsigned long);
- TextStream& operator<<(long long);
- TextStream& operator<<(unsigned long long);
- TextStream& operator<<(float);
- TextStream& operator<<(double);
- TextStream& operator<<(const char*);
- TextStream& operator<<(const void*);
- TextStream& operator<<(const String&);
- TextStream& operator<<(const FormatNumberRespectingIntegers&);
-
- TextStream& operator<<(const IntPoint&);
- TextStream& operator<<(const IntRect&);
- TextStream& operator<<(const FloatPoint&);
- TextStream& operator<<(const FloatSize&);
- TextStream& operator<<(const LayoutPoint&);
- TextStream& operator<<(const LayoutRect&);
-
- template<typename Item>
- TextStream& operator<<(const Vector<Item>& vector)
+
+ enum Formatting {
+ SVGStyleRect = 1 << 0, // "at (0,0) size 10x10"
+ NumberRespectingIntegers = 1 << 1,
+ LayoutUnitsAsIntegers = 1 << 2,
+ };
+
+ using FormattingFlags = unsigned;
+
+ enum class LineMode { SingleLine, MultipleLine };
+ TextStream(LineMode lineMode = LineMode::MultipleLine, FormattingFlags formattingFlags = 0)
+ : m_formattingFlags(formattingFlags)
+ , m_multiLineMode(lineMode == LineMode::MultipleLine)
{
- *this << "[";
+ }
- unsigned size = vector.size();
- for (unsigned i = 0; i < size; ++i) {
- *this << vector[i];
- if (i < size - 1)
- *this << ", ";
- }
+ WEBCORE_EXPORT TextStream& operator<<(bool);
+ WEBCORE_EXPORT TextStream& operator<<(int);
+ WEBCORE_EXPORT TextStream& operator<<(unsigned);
+ WEBCORE_EXPORT TextStream& operator<<(long);
+ WEBCORE_EXPORT TextStream& operator<<(unsigned long);
+ WEBCORE_EXPORT TextStream& operator<<(long long);
+ WEBCORE_EXPORT TextStream& operator<<(LayoutUnit);
+
+ WEBCORE_EXPORT TextStream& operator<<(unsigned long long);
+ WEBCORE_EXPORT TextStream& operator<<(float);
+ WEBCORE_EXPORT TextStream& operator<<(double);
+ WEBCORE_EXPORT TextStream& operator<<(const char*);
+ WEBCORE_EXPORT TextStream& operator<<(const void*);
+ WEBCORE_EXPORT TextStream& operator<<(const String&);
+ // Deprecated. Use the NumberRespectingIntegers FormattingFlag instead.
+ WEBCORE_EXPORT TextStream& operator<<(const FormatNumberRespectingIntegers&);
- return *this << "]";
+ FormattingFlags formattingFlags() const { return m_formattingFlags; }
+ void setFormattingFlags(FormattingFlags flags) { m_formattingFlags = flags; }
+
+ bool hasFormattingFlag(Formatting flag) const { return m_formattingFlags & flag; }
+
+ template<typename T>
+ void dumpProperty(const String& name, const T& value)
+ {
+ TextStream& ts = *this;
+ ts.startGroup();
+ ts << name << " " << value;
+ ts.endGroup();
}
- String release();
+ WEBCORE_EXPORT String release();
+
+ WEBCORE_EXPORT void startGroup();
+ WEBCORE_EXPORT void endGroup();
+ WEBCORE_EXPORT void nextLine(); // Output newline and indent.
+
+ void increaseIndent(int amount = 1) { m_indent += amount; }
+ void decreaseIndent(int amount = 1) { m_indent -= amount; ASSERT(m_indent >= 0); }
+
+ WEBCORE_EXPORT void writeIndent();
+
+ class GroupScope {
+ public:
+ GroupScope(TextStream& ts)
+ : m_stream(ts)
+ {
+ m_stream.startGroup();
+ }
+ ~GroupScope()
+ {
+ m_stream.endGroup();
+ }
+
+ private:
+ TextStream& m_stream;
+ };
private:
StringBuilder m_text;
+ FormattingFlags m_formattingFlags { 0 };
+ int m_indent { 0 };
+ bool m_multiLineMode { true };
};
+template<typename Item>
+TextStream& operator<<(TextStream& ts, const Vector<Item>& vector)
+{
+ ts << "[";
+
+ unsigned size = vector.size();
+ for (unsigned i = 0; i < size; ++i) {
+ ts << vector[i];
+ if (i < size - 1)
+ ts << ", ";
+ }
+
+ return ts << "]";
+}
+
void writeIndent(TextStream&, int indent);
}
diff --git a/Source/WebCore/platform/text/UnicodeBidi.h b/Source/WebCore/platform/text/UnicodeBidi.h
index 14c88bc1f..7249cfefe 100644
--- a/Source/WebCore/platform/text/UnicodeBidi.h
+++ b/Source/WebCore/platform/text/UnicodeBidi.h
@@ -10,10 +10,10 @@
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
diff --git a/Source/WebCore/platform/text/WritingMode.h b/Source/WebCore/platform/text/WritingMode.h
index 9e5d28c7d..2310b43e7 100644
--- a/Source/WebCore/platform/text/WritingMode.h
+++ b/Source/WebCore/platform/text/WritingMode.h
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2012, Google Inc. All rights reserved.
+ * Copyright (C) 2015, Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
@@ -33,26 +34,137 @@
namespace WebCore {
+enum TextDirection { LTR, RTL };
+
+inline bool isLeftToRightDirection(TextDirection direction)
+{
+ return direction == LTR;
+}
+
enum WritingMode {
- TopToBottomWritingMode, RightToLeftWritingMode, LeftToRightWritingMode, BottomToTopWritingMode
+ TopToBottomWritingMode = 0, // horizontal-tb
+ BottomToTopWritingMode = 1, // horizontal-bt
+ LeftToRightWritingMode = 2, // vertical-lr
+ RightToLeftWritingMode = 3, // vertical-rl
+};
+
+#define MAKE_TEXT_FLOW(writingMode, direction) ((writingMode) << 1 | (direction))
+
+// Define the text flow in terms of the writing mode and the text direction. The first
+// part is the line growing direction and the second part is the block growing direction.
+enum TextFlow {
+ InlineEastBlockSouth = MAKE_TEXT_FLOW(TopToBottomWritingMode, LTR),
+ InlineWestBlockSouth = MAKE_TEXT_FLOW(TopToBottomWritingMode, RTL),
+ InlineEastBlockNorth = MAKE_TEXT_FLOW(BottomToTopWritingMode, LTR),
+ InlineWestBlockNorth = MAKE_TEXT_FLOW(BottomToTopWritingMode, RTL),
+ InlineSouthBlockEast = MAKE_TEXT_FLOW(LeftToRightWritingMode, LTR),
+ InlineSouthBlockWest = MAKE_TEXT_FLOW(LeftToRightWritingMode, RTL),
+ InlineNorthBlockEast = MAKE_TEXT_FLOW(RightToLeftWritingMode, LTR),
+ InlineNorthBlockWest = MAKE_TEXT_FLOW(RightToLeftWritingMode, RTL)
};
+inline TextFlow makeTextFlow(WritingMode writingMode, TextDirection direction)
+{
+ return static_cast<TextFlow>(MAKE_TEXT_FLOW(writingMode, direction));
+}
+
+#undef MAKE_TEXT_FLOW
+
+const unsigned TextFlowReversedMask = 1;
+const unsigned TextFlowFlippedMask = 2;
+const unsigned TextFlowVerticalMask = 4;
+
+inline bool isReversedTextFlow(TextFlow textflow)
+{
+ return textflow & TextFlowReversedMask;
+}
+
+inline bool isFlippedTextFlow(TextFlow textflow)
+{
+ return textflow & TextFlowFlippedMask;
+}
+
+inline bool isVerticalTextFlow(TextFlow textflow)
+{
+ return textflow & TextFlowVerticalMask;
+}
+
+// Lines have vertical orientation; modes vertical-lr or vertical-rl.
+inline bool isVerticalWritingMode(WritingMode writingMode)
+{
+ return isVerticalTextFlow(makeTextFlow(writingMode, LTR));
+}
+
+// Block progression increases in the opposite direction to normal; modes vertical-rl or horizontal-bt.
+inline bool isFlippedWritingMode(WritingMode writingMode)
+{
+ return isFlippedTextFlow(makeTextFlow(writingMode, LTR));
+}
+
// Lines have horizontal orientation; modes horizontal-tb or horizontal-bt.
inline bool isHorizontalWritingMode(WritingMode writingMode)
{
- return writingMode == TopToBottomWritingMode || writingMode == BottomToTopWritingMode;
+ return !isVerticalWritingMode(writingMode);
}
// Bottom of the line occurs earlier in the block; modes vertical-lr or horizontal-bt.
inline bool isFlippedLinesWritingMode(WritingMode writingMode)
{
- return writingMode == LeftToRightWritingMode || writingMode == BottomToTopWritingMode;
+ return isVerticalWritingMode(writingMode) != isFlippedWritingMode(writingMode);
}
-// Block progression increases in the opposite direction to normal; modes vertical-rl or horizontal-bt.
-inline bool isFlippedBlocksWritingMode(WritingMode writingMode)
+enum LogicalBoxSide {
+ BeforeSide,
+ EndSide,
+ AfterSide,
+ StartSide
+};
+
+enum PhysicalBoxSide {
+ NilSide = -1,
+ TopSide,
+ RightSide,
+ BottomSide,
+ LeftSide
+};
+
+inline bool isHorizontalPhysicalSide(PhysicalBoxSide physicalSide)
+{
+ return physicalSide == LeftSide || physicalSide == RightSide;
+}
+
+inline PhysicalBoxSide mirrorPhysicalSide(PhysicalBoxSide physicalSide)
+{
+ // top <-> bottom and left <-> right conversion
+ return static_cast<PhysicalBoxSide>((static_cast<int>(physicalSide) + 2) % 4);
+}
+
+inline PhysicalBoxSide rotatePhysicalSide(PhysicalBoxSide physicalSide)
+{
+ // top <-> left and right <-> bottom conversion
+ bool horizontalSide = isHorizontalPhysicalSide(physicalSide);
+ return static_cast<PhysicalBoxSide>((static_cast<int>(physicalSide) + (horizontalSide ? 1 : 3)) % 4);
+}
+
+inline PhysicalBoxSide mapLogicalSideToPhysicalSide(TextFlow textflow, LogicalBoxSide logicalSide)
+{
+ PhysicalBoxSide physicalSide = static_cast<PhysicalBoxSide>(logicalSide);
+ bool horizontalSide = isHorizontalPhysicalSide(physicalSide);
+
+ if (isVerticalTextFlow(textflow))
+ physicalSide = rotatePhysicalSide(physicalSide);
+
+ if ((horizontalSide && isReversedTextFlow(textflow)) || (!horizontalSide && isFlippedTextFlow(textflow)))
+ physicalSide = mirrorPhysicalSide(physicalSide);
+
+ return physicalSide;
+}
+
+inline PhysicalBoxSide mapLogicalSideToPhysicalSide(WritingMode writingMode, LogicalBoxSide logicalSide)
{
- return writingMode == RightToLeftWritingMode || writingMode == BottomToTopWritingMode;
+ // Set the direction such that side is mirrored if isFlippedWritingMode() is true
+ TextDirection direction = isFlippedWritingMode(writingMode) ? RTL : LTR;
+ return mapLogicalSideToPhysicalSide(makeTextFlow(writingMode, direction), logicalSide);
}
} // namespace WebCore
diff --git a/Source/WebCore/platform/text/enchant/TextCheckerEnchant.cpp b/Source/WebCore/platform/text/enchant/TextCheckerEnchant.cpp
index d2d2c6f04..638f76c46 100644
--- a/Source/WebCore/platform/text/enchant/TextCheckerEnchant.cpp
+++ b/Source/WebCore/platform/text/enchant/TextCheckerEnchant.cpp
@@ -24,7 +24,8 @@
#include <Language.h>
#include <glib.h>
-#include <text/TextBreakIterator.h>
+#include <unicode/ubrk.h>
+#include <wtf/text/TextBreakIterator.h>
namespace WebCore {
@@ -53,14 +54,14 @@ TextCheckerEnchant::~TextCheckerEnchant()
void TextCheckerEnchant::ignoreWord(const String& word)
{
- for (Vector<EnchantDict*>::const_iterator iter = m_enchantDictionaries.begin(); iter != m_enchantDictionaries.end(); ++iter)
- enchant_dict_add_to_session(*iter, word.utf8().data(), -1);
+ for (auto& dictionary : m_enchantDictionaries)
+ enchant_dict_add_to_session(dictionary, word.utf8().data(), -1);
}
void TextCheckerEnchant::learnWord(const String& word)
{
- for (Vector<EnchantDict*>::const_iterator iter = m_enchantDictionaries.begin(); iter != m_enchantDictionaries.end(); ++iter)
- enchant_dict_add(*iter, word.utf8().data(), -1);
+ for (auto& dictionary : m_enchantDictionaries)
+ enchant_dict_add(dictionary, word.utf8().data(), -1);
}
void TextCheckerEnchant::checkSpellingOfWord(const CString& word, int start, int end, int& misspellingLocation, int& misspellingLength)
@@ -69,8 +70,8 @@ void TextCheckerEnchant::checkSpellingOfWord(const CString& word, int start, int
char* startPtr = g_utf8_offset_to_pointer(string, start);
int numberOfBytes = static_cast<int>(g_utf8_offset_to_pointer(string, end) - startPtr);
- for (Vector<EnchantDict*>::const_iterator dictIter = m_enchantDictionaries.begin(); dictIter != m_enchantDictionaries.end(); ++dictIter) {
- if (!enchant_dict_check(*dictIter, startPtr, numberOfBytes)) {
+ for (auto& dictionary : m_enchantDictionaries) {
+ if (!enchant_dict_check(dictionary, startPtr, numberOfBytes)) {
// Stop checking, this word is ok in at least one dict.
misspellingLocation = -1;
misspellingLength = 0;
@@ -91,13 +92,13 @@ void TextCheckerEnchant::checkSpellingOfString(const String& string, int& misspe
if (!hasDictionary())
return;
- TextBreakIterator* iter = wordBreakIterator(string);
+ UBreakIterator* iter = wordBreakIterator(string);
if (!iter)
return;
CString utf8String = string.utf8();
- int start = textBreakFirst(iter);
- for (int end = textBreakNext(iter); end != TextBreakDone; end = textBreakNext(iter)) {
+ int start = ubrk_first(iter);
+ for (int end = ubrk_next(iter); end != UBRK_DONE; end = ubrk_next(iter)) {
if (isWordTextBreak(iter)) {
checkSpellingOfWord(utf8String, start, end, misspellingLocation, misspellingLength);
// Stop checking the next words If the current word is misspelled, to do not overwrite its misspelled location and length.
@@ -114,11 +115,11 @@ Vector<String> TextCheckerEnchant::getGuessesForWord(const String& word)
if (!hasDictionary())
return guesses;
- for (Vector<EnchantDict*>::const_iterator iter = m_enchantDictionaries.begin(); iter != m_enchantDictionaries.end(); ++iter) {
+ for (auto& dictionary : m_enchantDictionaries) {
size_t numberOfSuggestions;
size_t i;
- char** suggestions = enchant_dict_suggest(*iter, word.utf8().data(), -1, &numberOfSuggestions);
+ char** suggestions = enchant_dict_suggest(dictionary, word.utf8().data(), -1, &numberOfSuggestions);
if (numberOfSuggestions <= 0)
continue;
@@ -128,7 +129,7 @@ Vector<String> TextCheckerEnchant::getGuessesForWord(const String& word)
for (i = 0; i < numberOfSuggestions; i++)
guesses.append(String::fromUTF8(suggestions[i]));
- enchant_dict_free_suggestions(*iter, suggestions);
+ enchant_dict_free_suggestions(dictionary, suggestions);
}
return guesses;
@@ -139,8 +140,8 @@ void TextCheckerEnchant::updateSpellCheckingLanguages(const Vector<String>& lang
Vector<EnchantDict*> spellDictionaries;
if (!languages.isEmpty()) {
- for (Vector<String>::const_iterator iter = languages.begin(); iter != languages.end(); ++iter) {
- CString currentLanguage = iter->utf8();
+ for (auto& language : languages) {
+ CString currentLanguage = language.utf8();
if (enchant_broker_dict_exists(m_broker, currentLanguage.data())) {
EnchantDict* dict = enchant_broker_request_dict(m_broker, currentLanguage.data());
spellDictionaries.append(dict);
@@ -175,11 +176,11 @@ Vector<String> TextCheckerEnchant::loadedSpellCheckingLanguages() const
// Get a Vector<CString> with the list of languages in use.
Vector<CString> currentDictionaries;
- for (Vector<EnchantDict*>::const_iterator iter = m_enchantDictionaries.begin(); iter != m_enchantDictionaries.end(); ++iter)
- enchant_dict_describe(*iter, enchantDictDescribeCallback, &currentDictionaries);
+ for (auto& dictionary : m_enchantDictionaries)
+ enchant_dict_describe(dictionary, enchantDictDescribeCallback, &currentDictionaries);
- for (Vector<CString>::const_iterator iter = currentDictionaries.begin(); iter != currentDictionaries.end(); ++iter)
- languages.append(String::fromUTF8(iter->data()));
+ for (auto& dictionary : currentDictionaries)
+ languages.append(String::fromUTF8(dictionary.data()));
return languages;
}
@@ -190,16 +191,16 @@ Vector<String> TextCheckerEnchant::availableSpellCheckingLanguages() const
enchant_broker_list_dicts(m_broker, enchantDictDescribeCallback, &allDictionaries);
Vector<String> languages;
- for (Vector<CString>::const_iterator iter = allDictionaries.begin(); iter != allDictionaries.end(); ++iter)
- languages.append(String::fromUTF8(iter->data()));
+ for (auto& dictionary : allDictionaries)
+ languages.append(String::fromUTF8(dictionary.data()));
return languages;
}
void TextCheckerEnchant::freeEnchantBrokerDictionaries()
{
- for (Vector<EnchantDict*>::const_iterator iter = m_enchantDictionaries.begin(); iter != m_enchantDictionaries.end(); ++iter)
- enchant_broker_free_dict(m_broker, *iter);
+ for (auto& dictionary : m_enchantDictionaries)
+ enchant_broker_free_dict(m_broker, dictionary);
}
} // namespace WebCore
diff --git a/Source/WebCore/platform/text/enchant/TextCheckerEnchant.h b/Source/WebCore/platform/text/enchant/TextCheckerEnchant.h
index eb9be3e1f..3bd73c205 100644
--- a/Source/WebCore/platform/text/enchant/TextCheckerEnchant.h
+++ b/Source/WebCore/platform/text/enchant/TextCheckerEnchant.h
@@ -24,7 +24,6 @@
#include <enchant.h>
#include <wtf/FastMalloc.h>
-#include <wtf/PassOwnPtr.h>
#include <wtf/Vector.h>
#include <wtf/text/CString.h>
#include <wtf/text/WTFString.h>
@@ -35,7 +34,7 @@ class TextCheckerEnchant {
WTF_MAKE_FAST_ALLOCATED;
public:
- static PassOwnPtr<TextCheckerEnchant> create() { return adoptPtr(new TextCheckerEnchant); }
+ TextCheckerEnchant();
virtual ~TextCheckerEnchant();
void ignoreWord(const String&);
@@ -48,7 +47,6 @@ public:
Vector<String> availableSpellCheckingLanguages() const;
private:
- TextCheckerEnchant();
void freeEnchantBrokerDictionaries();
void checkSpellingOfWord(const CString&, int start, int end, int& misspellingLocation, int& misspellingLength);
diff --git a/Source/WebCore/platform/text/gtk/TextBreakIteratorInternalICUGtk.cpp b/Source/WebCore/platform/text/gtk/TextBreakIteratorInternalICUGtk.cpp
deleted file mode 100644
index 35e5a05fa..000000000
--- a/Source/WebCore/platform/text/gtk/TextBreakIteratorInternalICUGtk.cpp
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (C) 2007 Alp Toker <alp@atoker.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public License
- * along with this library; see the file COPYING.LIB. If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- * Boston, MA 02110-1301, USA.
- */
-
-#include "config.h"
-#include "TextBreakIteratorInternalICU.h"
-
-namespace WebCore {
-
-const char* currentSearchLocaleID()
-{
- // FIXME: Should use system locale.
- return "";
-}
-
-const char* currentTextBreakLocaleID()
-{
- // FIXME: Should use system locale.
- return "en_us";
-}
-
-}
diff --git a/Source/WebCore/platform/text/hyphen/HyphenationLibHyphen.cpp b/Source/WebCore/platform/text/hyphen/HyphenationLibHyphen.cpp
new file mode 100644
index 000000000..6c206b8aa
--- /dev/null
+++ b/Source/WebCore/platform/text/hyphen/HyphenationLibHyphen.cpp
@@ -0,0 +1,294 @@
+/*
+ * Copyright (C) 2010 Apple Inc. All rights reserved.
+ * Copyright (C) 2015 Igalia S.L.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "Hyphenation.h"
+
+#if USE(LIBHYPHEN)
+
+#include "FileSystem.h"
+#include <hyphen.h>
+#include <limits>
+#include <stdlib.h>
+#include <wtf/HashMap.h>
+#include <wtf/NeverDestroyed.h>
+#include <wtf/TinyLRUCache.h>
+#include <wtf/text/AtomicStringHash.h>
+#include <wtf/text/CString.h>
+#include <wtf/text/StringView.h>
+
+#if PLATFORM(GTK)
+#include "GtkUtilities.h"
+#include <wtf/glib/GUniquePtr.h>
+#endif
+
+namespace WebCore {
+
+static const char* const gDictionaryDirectories[] = {
+ "/usr/share/hyphen",
+ "/usr/local/share/hyphen",
+};
+
+static String extractLocaleFromDictionaryFilePath(const String& filePath)
+{
+ // Dictionary files always have the form "hyph_<locale name>.dic"
+ // so we strip everything except the locale.
+ String fileName = pathGetFileName(filePath);
+ static const int prefixLength = 5;
+ static const int suffixLength = 4;
+ return fileName.substring(prefixLength, fileName.length() - prefixLength - suffixLength);
+}
+
+static void scanDirectoryForDicionaries(const char* directoryPath, HashMap<AtomicString, Vector<String>>& availableLocales)
+{
+ for (auto& filePath : listDirectory(directoryPath, "hyph_*.dic")) {
+ String locale = extractLocaleFromDictionaryFilePath(filePath).convertToASCIILowercase();
+
+ char normalizedPath[PATH_MAX];
+ if (!realpath(fileSystemRepresentation(filePath).data(), normalizedPath))
+ continue;
+
+ filePath = stringFromFileSystemRepresentation(normalizedPath);
+ availableLocales.add(locale, Vector<String>()).iterator->value.append(filePath);
+
+ String localeReplacingUnderscores = String(locale);
+ localeReplacingUnderscores.replace('_', '-');
+ if (locale != localeReplacingUnderscores)
+ availableLocales.add(localeReplacingUnderscores, Vector<String>()).iterator->value.append(filePath);
+
+ size_t dividerPosition = localeReplacingUnderscores.find('-');
+ if (dividerPosition != notFound) {
+ localeReplacingUnderscores.truncate(dividerPosition);
+ availableLocales.add(localeReplacingUnderscores, Vector<String>()).iterator->value.append(filePath);
+ }
+ }
+}
+
+#if ENABLE(DEVELOPER_MODE)
+static void scanTestDictionariesDirectoryIfNecessary(HashMap<AtomicString, Vector<String>>& availableLocales)
+{
+ // It's unfortunate that we need to look for the dictionaries this way, but
+ // libhyphen doesn't have the concept of installed dictionaries. Instead,
+ // we have this special case for WebKit tests.
+#if PLATFORM(GTK)
+ CString buildDirectory = webkitBuildDirectory();
+ GUniquePtr<char> dictionariesPath(g_build_filename(buildDirectory.data(), "DependenciesGTK", "Root", "webkitgtk-test-dicts", nullptr));
+ if (g_file_test(dictionariesPath.get(), static_cast<GFileTest>(G_FILE_TEST_IS_DIR))) {
+ scanDirectoryForDicionaries(dictionariesPath.get(), availableLocales);
+ return;
+ }
+
+ // Try alternative dictionaries path for people not using JHBuild.
+ dictionariesPath.reset(g_build_filename(buildDirectory.data(), "webkitgtk-test-dicts", nullptr));
+ scanDirectoryForDicionaries(dictionariesPath.get(), availableLocales);
+#elif defined(TEST_HYPHENATAION_PATH)
+ scanDirectoryForDicionaries(TEST_HYPHENATAION_PATH, availableLocales);
+#else
+ UNUSED_PARAM(availableLocales);
+#endif
+}
+#endif
+
+static HashMap<AtomicString, Vector<String>>& availableLocales()
+{
+ static bool scannedLocales = false;
+ static HashMap<AtomicString, Vector<String>> availableLocales;
+
+ if (!scannedLocales) {
+ for (size_t i = 0; i < WTF_ARRAY_LENGTH(gDictionaryDirectories); i++)
+ scanDirectoryForDicionaries(gDictionaryDirectories[i], availableLocales);
+
+#if ENABLE(DEVELOPER_MODE)
+ scanTestDictionariesDirectoryIfNecessary(availableLocales);
+#endif
+
+ scannedLocales = true;
+ }
+
+ return availableLocales;
+}
+
+bool canHyphenate(const AtomicString& localeIdentifier)
+{
+ if (localeIdentifier.isNull())
+ return false;
+ if (availableLocales().contains(localeIdentifier))
+ return true;
+ return availableLocales().contains(AtomicString(localeIdentifier.string().convertToASCIILowercase()));
+}
+
+class HyphenationDictionary : public RefCounted<HyphenationDictionary> {
+ WTF_MAKE_NONCOPYABLE(HyphenationDictionary);
+ WTF_MAKE_FAST_ALLOCATED;
+public:
+ typedef std::unique_ptr<HyphenDict, void(*)(HyphenDict*)> HyphenDictUniquePtr;
+
+ virtual ~HyphenationDictionary() { }
+ static RefPtr<HyphenationDictionary> createNull()
+ {
+ return adoptRef(new HyphenationDictionary());
+ }
+
+ static RefPtr<HyphenationDictionary> create(const CString& dictPath)
+ {
+ return adoptRef(new HyphenationDictionary(dictPath));
+ }
+
+ HyphenDict* libhyphenDictionary() const
+ {
+ return m_libhyphenDictionary.get();
+ }
+
+private:
+ HyphenationDictionary(const CString& dictPath)
+ : m_libhyphenDictionary(HyphenDictUniquePtr(hnj_hyphen_load(dictPath.data()), hnj_hyphen_free))
+ {
+ }
+
+ HyphenationDictionary()
+ : m_libhyphenDictionary(HyphenDictUniquePtr(nullptr, hnj_hyphen_free))
+ {
+ }
+
+ HyphenDictUniquePtr m_libhyphenDictionary;
+};
+
+} // namespace WebCore
+
+namespace WTF {
+
+template<>
+class TinyLRUCachePolicy<AtomicString, RefPtr<WebCore::HyphenationDictionary>>
+{
+public:
+ static TinyLRUCache<AtomicString, RefPtr<WebCore::HyphenationDictionary>, 32>& cache()
+ {
+ static NeverDestroyed<TinyLRUCache<AtomicString, RefPtr<WebCore::HyphenationDictionary>, 32>> cache;
+ return cache;
+ }
+
+ static bool isKeyNull(const AtomicString& localeIdentifier)
+ {
+ return localeIdentifier.isNull();
+ }
+
+ static RefPtr<WebCore::HyphenationDictionary> createValueForNullKey()
+ {
+ return WebCore::HyphenationDictionary::createNull();
+ }
+
+ static RefPtr<WebCore::HyphenationDictionary> createValueForKey(const AtomicString& dictionaryPath)
+ {
+ return WebCore::HyphenationDictionary::create(WebCore::fileSystemRepresentation(dictionaryPath.string()));
+ }
+};
+
+} // namespace WTF
+
+namespace WebCore {
+
+static void countLeadingSpaces(const CString& utf8String, int32_t& pointerOffset, int32_t& characterOffset)
+{
+ pointerOffset = 0;
+ characterOffset = 0;
+ const char* stringData = utf8String.data();
+ UChar32 character = 0;
+ while (static_cast<unsigned>(pointerOffset) < utf8String.length()) {
+ int32_t nextPointerOffset = pointerOffset;
+ U8_NEXT(stringData, nextPointerOffset, static_cast<int32_t>(utf8String.length()), character);
+
+ if (character < 0 || !u_isUWhiteSpace(character))
+ return;
+
+ pointerOffset = nextPointerOffset;
+ characterOffset++;
+ }
+}
+
+size_t lastHyphenLocation(StringView string, size_t beforeIndex, const AtomicString& localeIdentifier)
+{
+ // libhyphen accepts strings in UTF-8 format, but WebCore can only provide StringView
+ // which stores either UTF-16 or Latin1 data. This is unfortunate for performance
+ // reasons and we should consider switching to a more flexible hyphenation library
+ // if it is available.
+ CString utf8StringCopy = string.toStringWithoutCopying().utf8();
+
+ // WebCore often passes strings like " wordtohyphenate" to the platform layer. Since
+ // libhyphen isn't advanced enough to deal with leading spaces (presumably CoreFoundation
+ // can), we should find the appropriate indexes into the string to skip them.
+ int32_t leadingSpaceBytes;
+ int32_t leadingSpaceCharacters;
+ countLeadingSpaces(utf8StringCopy, leadingSpaceBytes, leadingSpaceCharacters);
+
+ // The libhyphen documentation specifies that this array should be 5 bytes longer than
+ // the byte length of the input string.
+ Vector<char> hyphenArray(utf8StringCopy.length() - leadingSpaceBytes + 5);
+ char* hyphenArrayData = hyphenArray.data();
+
+ String lowercaseLocaleIdentifier = AtomicString(localeIdentifier.string().convertToASCIILowercase());
+
+ // Web content may specify strings for locales which do not exist or that we do not have.
+ if (!availableLocales().contains(lowercaseLocaleIdentifier))
+ return 0;
+
+ for (const auto& dictionaryPath : availableLocales().get(lowercaseLocaleIdentifier)) {
+ RefPtr<HyphenationDictionary> dictionary = WTF::TinyLRUCachePolicy<AtomicString, RefPtr<HyphenationDictionary>>::cache().get(AtomicString(dictionaryPath));
+
+ char** replacements = nullptr;
+ int* positions = nullptr;
+ int* removedCharacterCounts = nullptr;
+ hnj_hyphen_hyphenate2(dictionary->libhyphenDictionary(),
+ utf8StringCopy.data() + leadingSpaceBytes,
+ utf8StringCopy.length() - leadingSpaceBytes,
+ hyphenArrayData,
+ nullptr, /* output parameter for hyphenated word */
+ &replacements,
+ &positions,
+ &removedCharacterCounts);
+
+ if (replacements) {
+ for (unsigned i = 0; i < utf8StringCopy.length() - leadingSpaceBytes - 1; i++)
+ free(replacements[i]);
+ free(replacements);
+ }
+
+ free(positions);
+ free(removedCharacterCounts);
+
+ for (int i = beforeIndex - leadingSpaceCharacters - 2; i >= 0; i--) {
+ // libhyphen will put an odd number in hyphenArrayData at all
+ // hyphenation points. A number & 1 will be true for odd numbers.
+ if (hyphenArrayData[i] & 1)
+ return i + 1 + leadingSpaceCharacters;
+ }
+ }
+
+ return 0;
+}
+
+} // namespace WebCore
+
+#endif // USE(LIBHYPHEN)
diff --git a/Source/WebCore/platform/text/icu/UTextProvider.cpp b/Source/WebCore/platform/text/icu/UTextProvider.cpp
deleted file mode 100644
index fdaee81e6..000000000
--- a/Source/WebCore/platform/text/icu/UTextProvider.cpp
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Copyright (C) 2014 Apple Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
- * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-#include "UTextProvider.h"
-
-#include <string.h>
-
-namespace WebCore {
-
-// Relocate pointer from source into destination as required.
-static inline void fixPointer(const UText* source, UText* destination, const void*& pointer)
-{
- if (pointer >= source->pExtra && pointer < static_cast<char*>(source->pExtra) + source->extraSize) {
- // Pointer references source extra buffer.
- pointer = static_cast<char*>(destination->pExtra) + (static_cast<const char*>(pointer) - static_cast<const char*>(source->pExtra));
- } else if (pointer >= source && pointer < reinterpret_cast<const char*>(source) + source->sizeOfStruct) {
- // Pointer references source text structure, but not source extra buffer.
- pointer = reinterpret_cast<char*>(destination) + (static_cast<const char*>(pointer) - reinterpret_cast<const char*>(source));
- }
-}
-
-UText* uTextCloneImpl(UText* destination, const UText* source, UBool deep, UErrorCode* status)
-{
- ASSERT_UNUSED(deep, !deep);
- if (U_FAILURE(*status))
- return nullptr;
- int32_t extraSize = source->extraSize;
- destination = utext_setup(destination, extraSize, status);
- if (U_FAILURE(*status))
- return destination;
- void* extraNew = destination->pExtra;
- int32_t flags = destination->flags;
- int sizeToCopy = std::min(source->sizeOfStruct, destination->sizeOfStruct);
- memcpy(destination, source, sizeToCopy);
- destination->pExtra = extraNew;
- destination->flags = flags;
- memcpy(destination->pExtra, source->pExtra, extraSize);
- fixPointer(source, destination, destination->context);
- fixPointer(source, destination, destination->p);
- fixPointer(source, destination, destination->q);
- ASSERT(!destination->r);
- const void* chunkContents = static_cast<const void*>(destination->chunkContents);
- fixPointer(source, destination, chunkContents);
- destination->chunkContents = static_cast<const UChar*>(chunkContents);
- return destination;
-}
-
-} // namespace WebCore
diff --git a/Source/WebCore/platform/text/icu/UTextProvider.h b/Source/WebCore/platform/text/icu/UTextProvider.h
deleted file mode 100644
index 81a025a98..000000000
--- a/Source/WebCore/platform/text/icu/UTextProvider.h
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Copyright (C) 2014 Apple Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
- * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef UTextProvider_h
-#define UTextProvider_h
-
-#include <unicode/utext.h>
-#include <wtf/unicode/Unicode.h>
-
-namespace WebCore {
-
-enum class UTextProviderContext {
- NoContext,
- PriorContext,
- PrimaryContext
-};
-
-inline UTextProviderContext uTextProviderContext(const UText* text, int64_t nativeIndex, UBool forward)
-{
- if (!text->b || nativeIndex > text->b)
- return UTextProviderContext::PrimaryContext;
- if (nativeIndex == text->b)
- return forward ? UTextProviderContext::PrimaryContext : UTextProviderContext::PriorContext;
- return UTextProviderContext::PriorContext;
-}
-
-inline void initializeContextAwareUTextProvider(UText* text, const UTextFuncs* funcs, const void* string, unsigned length, const UChar* priorContext, int priorContextLength)
-{
- text->pFuncs = funcs;
- text->providerProperties = 1 << UTEXT_PROVIDER_STABLE_CHUNKS;
- text->context = string;
- text->p = string;
- text->a = length;
- text->q = priorContext;
- text->b = priorContextLength;
-}
-
-// Shared implementation for the UTextClone function on UTextFuncs.
-
-UText* uTextCloneImpl(UText* destination, const UText* source, UBool deep, UErrorCode* status);
-
-
-// Helpers for the UTextAccess function on UTextFuncs.
-
-inline int64_t uTextAccessPinIndex(int64_t& index, int64_t limit)
-{
- if (index < 0)
- index = 0;
- else if (index > limit)
- index = limit;
- return index;
-}
-
-inline bool uTextAccessInChunkOrOutOfRange(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward, UBool& isAccessible)
-{
- if (forward) {
- if (nativeIndex >= text->chunkNativeStart && nativeIndex < text->chunkNativeLimit) {
- int64_t offset = nativeIndex - text->chunkNativeStart;
- // Ensure chunk offset is well formed if computed offset exceeds int32_t range.
- ASSERT(offset < std::numeric_limits<int32_t>::max());
- text->chunkOffset = offset < std::numeric_limits<int32_t>::max() ? static_cast<int32_t>(offset) : 0;
- isAccessible = TRUE;
- return true;
- }
- if (nativeIndex >= nativeLength && text->chunkNativeLimit == nativeLength) {
- text->chunkOffset = text->chunkLength;
- isAccessible = FALSE;
- return true;
- }
- } else {
- if (nativeIndex > text->chunkNativeStart && nativeIndex <= text->chunkNativeLimit) {
- int64_t offset = nativeIndex - text->chunkNativeStart;
- // Ensure chunk offset is well formed if computed offset exceeds int32_t range.
- ASSERT(offset < std::numeric_limits<int32_t>::max());
- text->chunkOffset = offset < std::numeric_limits<int32_t>::max() ? static_cast<int32_t>(offset) : 0;
- isAccessible = TRUE;
- return true;
- }
- if (nativeIndex <= 0 && !text->chunkNativeStart) {
- text->chunkOffset = 0;
- isAccessible = FALSE;
- return true;
- }
- }
- return false;
-}
-
-} // namespace WebCore
-
-#endif // UTextProvider_h
diff --git a/Source/WebCore/platform/text/icu/UTextProviderLatin1.cpp b/Source/WebCore/platform/text/icu/UTextProviderLatin1.cpp
deleted file mode 100644
index ee027637e..000000000
--- a/Source/WebCore/platform/text/icu/UTextProviderLatin1.cpp
+++ /dev/null
@@ -1,394 +0,0 @@
-/*
- * Copyright (C) 2014 Apple Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
- * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-#include "UTextProviderLatin1.h"
-
-#include "UTextProvider.h"
-#include <wtf/text/StringImpl.h>
-
-namespace WebCore {
-
-// Latin1 provider
-
-static UText* uTextLatin1Clone(UText*, const UText*, UBool, UErrorCode*);
-static int64_t uTextLatin1NativeLength(UText*);
-static UBool uTextLatin1Access(UText*, int64_t, UBool);
-static int32_t uTextLatin1Extract(UText*, int64_t, int64_t, UChar*, int32_t, UErrorCode*);
-static int64_t uTextLatin1MapOffsetToNative(const UText*);
-static int32_t uTextLatin1MapNativeIndexToUTF16(const UText*, int64_t);
-static void uTextLatin1Close(UText*);
-
-static struct UTextFuncs uTextLatin1Funcs = {
- sizeof(UTextFuncs),
- 0,
- 0,
- 0,
- uTextLatin1Clone,
- uTextLatin1NativeLength,
- uTextLatin1Access,
- uTextLatin1Extract,
- nullptr,
- nullptr,
- uTextLatin1MapOffsetToNative,
- uTextLatin1MapNativeIndexToUTF16,
- uTextLatin1Close,
- nullptr,
- nullptr,
- nullptr
-};
-
-static UText* uTextLatin1Clone(UText* destination, const UText* source, UBool deep, UErrorCode* status)
-{
- ASSERT_UNUSED(deep, !deep);
-
- if (U_FAILURE(*status))
- return 0;
-
- UText* result = utext_setup(destination, sizeof(UChar) * UTextWithBufferInlineCapacity, status);
- if (U_FAILURE(*status))
- return destination;
-
- result->providerProperties = source->providerProperties;
-
- // Point at the same position, but with an empty buffer.
- result->chunkNativeStart = source->chunkNativeStart;
- result->chunkNativeLimit = source->chunkNativeStart;
- result->nativeIndexingLimit = static_cast<int32_t>(source->chunkNativeStart);
- result->chunkOffset = 0;
- result->context = source->context;
- result->a = source->a;
- result->pFuncs = &uTextLatin1Funcs;
- result->chunkContents = (UChar*)result->pExtra;
- memset(const_cast<UChar*>(result->chunkContents), 0, sizeof(UChar) * UTextWithBufferInlineCapacity);
-
- return result;
-}
-
-static int64_t uTextLatin1NativeLength(UText* uText)
-{
- return uText->a;
-}
-
-static UBool uTextLatin1Access(UText* uText, int64_t index, UBool forward)
-{
- int64_t length = uText->a;
-
- if (forward) {
- if (index < uText->chunkNativeLimit && index >= uText->chunkNativeStart) {
- // Already inside the buffer. Set the new offset.
- uText->chunkOffset = static_cast<int32_t>(index - uText->chunkNativeStart);
- return TRUE;
- }
- if (index >= length && uText->chunkNativeLimit == length) {
- // Off the end of the buffer, but we can't get it.
- uText->chunkOffset = static_cast<int32_t>(index - uText->chunkNativeStart);
- return FALSE;
- }
- } else {
- if (index <= uText->chunkNativeLimit && index > uText->chunkNativeStart) {
- // Already inside the buffer. Set the new offset.
- uText->chunkOffset = static_cast<int32_t>(index - uText->chunkNativeStart);
- return TRUE;
- }
- if (!index && !uText->chunkNativeStart) {
- // Already at the beginning; can't go any farther.
- uText->chunkOffset = 0;
- return FALSE;
- }
- }
-
- if (forward) {
- uText->chunkNativeStart = index;
- uText->chunkNativeLimit = uText->chunkNativeStart + UTextWithBufferInlineCapacity;
- if (uText->chunkNativeLimit > length)
- uText->chunkNativeLimit = length;
-
- uText->chunkOffset = 0;
- } else {
- uText->chunkNativeLimit = index;
- if (uText->chunkNativeLimit > length)
- uText->chunkNativeLimit = length;
-
- uText->chunkNativeStart = uText->chunkNativeLimit - UTextWithBufferInlineCapacity;
- if (uText->chunkNativeStart < 0)
- uText->chunkNativeStart = 0;
-
- uText->chunkOffset = static_cast<int32_t>(index - uText->chunkNativeStart);
- }
- uText->chunkLength = static_cast<int32_t>(uText->chunkNativeLimit - uText->chunkNativeStart);
-
- StringImpl::copyChars(const_cast<UChar*>(uText->chunkContents), static_cast<const LChar*>(uText->context) + uText->chunkNativeStart, static_cast<unsigned>(uText->chunkLength));
-
- uText->nativeIndexingLimit = uText->chunkLength;
-
- return TRUE;
-}
-
-static int32_t uTextLatin1Extract(UText* uText, int64_t start, int64_t limit, UChar* dest, int32_t destCapacity, UErrorCode* status)
-{
- int64_t length = uText->a;
- if (U_FAILURE(*status))
- return 0;
-
- if (destCapacity < 0 || (!dest && destCapacity > 0)) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- if (start < 0 || start > limit || (limit - start) > INT32_MAX) {
- *status = U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
-
- if (start > length)
- start = length;
- if (limit > length)
- limit = length;
-
- length = limit - start;
-
- if (!length)
- return 0;
-
- if (destCapacity > 0 && !dest) {
- int32_t trimmedLength = static_cast<int32_t>(length);
- if (trimmedLength > destCapacity)
- trimmedLength = destCapacity;
-
- StringImpl::copyChars(dest, static_cast<const LChar*>(uText->context) + start, static_cast<unsigned>(trimmedLength));
- }
-
- if (length < destCapacity) {
- dest[length] = 0;
- if (*status == U_STRING_NOT_TERMINATED_WARNING)
- *status = U_ZERO_ERROR;
- } else if (length == destCapacity)
- *status = U_STRING_NOT_TERMINATED_WARNING;
- else
- *status = U_BUFFER_OVERFLOW_ERROR;
-
- return static_cast<int32_t>(length);
-}
-
-static int64_t uTextLatin1MapOffsetToNative(const UText* uText)
-{
- return uText->chunkNativeStart + uText->chunkOffset;
-}
-
-static int32_t uTextLatin1MapNativeIndexToUTF16(const UText* uText, int64_t nativeIndex)
-{
- ASSERT_UNUSED(uText, uText->chunkNativeStart >= nativeIndex);
- ASSERT_UNUSED(uText, nativeIndex < uText->chunkNativeLimit);
- return static_cast<int32_t>(nativeIndex);
-}
-
-static void uTextLatin1Close(UText* uText)
-{
- uText->context = nullptr;
-}
-
-UText* openLatin1UTextProvider(UTextWithBuffer* utWithBuffer, const LChar* string, unsigned length, UErrorCode* status)
-{
- if (U_FAILURE(*status))
- return nullptr;
- if (!string || length > static_cast<unsigned>(std::numeric_limits<int32_t>::max())) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return nullptr;
- }
- UText* text = utext_setup(&utWithBuffer->text, sizeof(utWithBuffer->buffer), status);
- if (U_FAILURE(*status)) {
- ASSERT(!text);
- return nullptr;
- }
-
- text->context = string;
- text->a = length;
- text->pFuncs = &uTextLatin1Funcs;
- text->chunkContents = (UChar*)text->pExtra;
- memset(const_cast<UChar*>(text->chunkContents), 0, sizeof(UChar) * UTextWithBufferInlineCapacity);
-
- return text;
-}
-
-
-// Latin1ContextAware provider
-
-static UText* uTextLatin1ContextAwareClone(UText*, const UText*, UBool, UErrorCode*);
-static int64_t uTextLatin1ContextAwareNativeLength(UText*);
-static UBool uTextLatin1ContextAwareAccess(UText*, int64_t, UBool);
-static int32_t uTextLatin1ContextAwareExtract(UText*, int64_t, int64_t, UChar*, int32_t, UErrorCode*);
-static void uTextLatin1ContextAwareClose(UText*);
-
-static const struct UTextFuncs textLatin1ContextAwareFuncs = {
- sizeof(UTextFuncs),
- 0,
- 0,
- 0,
- uTextLatin1ContextAwareClone,
- uTextLatin1ContextAwareNativeLength,
- uTextLatin1ContextAwareAccess,
- uTextLatin1ContextAwareExtract,
- nullptr,
- nullptr,
- nullptr,
- nullptr,
- uTextLatin1ContextAwareClose,
- nullptr,
- nullptr,
- nullptr
-};
-
-static inline UTextProviderContext textLatin1ContextAwareGetCurrentContext(const UText* text)
-{
- if (!text->chunkContents)
- return UTextProviderContext::NoContext;
- return text->chunkContents == text->pExtra ? UTextProviderContext::PrimaryContext : UTextProviderContext::PriorContext;
-}
-
-static void textLatin1ContextAwareMoveInPrimaryContext(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward)
-{
- ASSERT(text->chunkContents == text->pExtra);
- if (forward) {
- ASSERT(nativeIndex >= text->b && nativeIndex < nativeLength);
- text->chunkNativeStart = nativeIndex;
- text->chunkNativeLimit = nativeIndex + text->extraSize / sizeof(UChar);
- if (text->chunkNativeLimit > nativeLength)
- text->chunkNativeLimit = nativeLength;
- } else {
- ASSERT(nativeIndex > text->b && nativeIndex <= nativeLength);
- text->chunkNativeLimit = nativeIndex;
- text->chunkNativeStart = nativeIndex - text->extraSize / sizeof(UChar);
- if (text->chunkNativeStart < text->b)
- text->chunkNativeStart = text->b;
- }
- int64_t length = text->chunkNativeLimit - text->chunkNativeStart;
- // Ensure chunk length is well defined if computed length exceeds int32_t range.
- ASSERT(length < std::numeric_limits<int32_t>::max());
- text->chunkLength = length < std::numeric_limits<int32_t>::max() ? static_cast<int32_t>(length) : 0;
- text->nativeIndexingLimit = text->chunkLength;
- text->chunkOffset = forward ? 0 : text->chunkLength;
- StringImpl::copyChars(const_cast<UChar*>(text->chunkContents), static_cast<const LChar*>(text->p) + (text->chunkNativeStart - text->b), static_cast<unsigned>(text->chunkLength));
-}
-
-static void textLatin1ContextAwareSwitchToPrimaryContext(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward)
-{
- ASSERT(!text->chunkContents || text->chunkContents == text->q);
- text->chunkContents = static_cast<const UChar*>(text->pExtra);
- textLatin1ContextAwareMoveInPrimaryContext(text, nativeIndex, nativeLength, forward);
-}
-
-static void textLatin1ContextAwareMoveInPriorContext(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward)
-{
- ASSERT(text->chunkContents == text->q);
- ASSERT(forward ? nativeIndex < text->b : nativeIndex <= text->b);
- ASSERT_UNUSED(nativeLength, forward ? nativeIndex < nativeLength : nativeIndex <= nativeLength);
- ASSERT_UNUSED(forward, forward ? nativeIndex < nativeLength : nativeIndex <= nativeLength);
- text->chunkNativeStart = 0;
- text->chunkNativeLimit = text->b;
- text->chunkLength = text->b;
- text->nativeIndexingLimit = text->chunkLength;
- int64_t offset = nativeIndex - text->chunkNativeStart;
- // Ensure chunk offset is well defined if computed offset exceeds int32_t range or chunk length.
- ASSERT(offset < std::numeric_limits<int32_t>::max());
- text->chunkOffset = std::min(offset < std::numeric_limits<int32_t>::max() ? static_cast<int32_t>(offset) : 0, text->chunkLength);
-}
-
-static void textLatin1ContextAwareSwitchToPriorContext(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward)
-{
- ASSERT(!text->chunkContents || text->chunkContents == text->pExtra);
- text->chunkContents = static_cast<const UChar*>(text->q);
- textLatin1ContextAwareMoveInPriorContext(text, nativeIndex, nativeLength, forward);
-}
-
-static UText* uTextLatin1ContextAwareClone(UText* destination, const UText* source, UBool deep, UErrorCode* status)
-{
- return uTextCloneImpl(destination, source, deep, status);
-}
-
-static int64_t uTextLatin1ContextAwareNativeLength(UText* text)
-{
- return text->a + text->b;
-}
-
-static UBool uTextLatin1ContextAwareAccess(UText* text, int64_t nativeIndex, UBool forward)
-{
- if (!text->context)
- return FALSE;
- int64_t nativeLength = uTextLatin1ContextAwareNativeLength(text);
- UBool isAccessible;
- if (uTextAccessInChunkOrOutOfRange(text, nativeIndex, nativeLength, forward, isAccessible))
- return isAccessible;
- nativeIndex = uTextAccessPinIndex(nativeIndex, nativeLength);
- UTextProviderContext currentContext = textLatin1ContextAwareGetCurrentContext(text);
- UTextProviderContext newContext = uTextProviderContext(text, nativeIndex, forward);
- ASSERT(newContext != UTextProviderContext::NoContext);
- if (newContext == currentContext) {
- if (currentContext == UTextProviderContext::PrimaryContext)
- textLatin1ContextAwareMoveInPrimaryContext(text, nativeIndex, nativeLength, forward);
- else
- textLatin1ContextAwareMoveInPriorContext(text, nativeIndex, nativeLength, forward);
- } else if (newContext == UTextProviderContext::PrimaryContext)
- textLatin1ContextAwareSwitchToPrimaryContext(text, nativeIndex, nativeLength, forward);
- else {
- ASSERT(newContext == UTextProviderContext::PriorContext);
- textLatin1ContextAwareSwitchToPriorContext(text, nativeIndex, nativeLength, forward);
- }
- return TRUE;
-}
-
-static int32_t uTextLatin1ContextAwareExtract(UText*, int64_t, int64_t, UChar*, int32_t, UErrorCode* errorCode)
-{
- // In the present context, this text provider is used only with ICU functions
- // that do not perform an extract operation.
- ASSERT_NOT_REACHED();
- *errorCode = U_UNSUPPORTED_ERROR;
- return 0;
-}
-
-static void uTextLatin1ContextAwareClose(UText* text)
-{
- text->context = nullptr;
-}
-
-UText* openLatin1ContextAwareUTextProvider(UTextWithBuffer* utWithBuffer, const LChar* string, unsigned length, const UChar* priorContext, int priorContextLength, UErrorCode* status)
-{
- if (U_FAILURE(*status))
- return 0;
- if (!string || length > static_cast<unsigned>(std::numeric_limits<int32_t>::max())) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- UText* text = utext_setup(&utWithBuffer->text, sizeof(utWithBuffer->buffer), status);
- if (U_FAILURE(*status)) {
- ASSERT(!text);
- return 0;
- }
-
- initializeContextAwareUTextProvider(text, &textLatin1ContextAwareFuncs, string, length, priorContext, priorContextLength);
- return text;
-}
-
-} // namespace WebCore
diff --git a/Source/WebCore/platform/text/icu/UTextProviderUTF16.cpp b/Source/WebCore/platform/text/icu/UTextProviderUTF16.cpp
deleted file mode 100644
index 7aaac48c8..000000000
--- a/Source/WebCore/platform/text/icu/UTextProviderUTF16.cpp
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
- * Copyright (C) 2014 Apple Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
- * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-#include "UTextProviderUTF16.h"
-
-#include "UTextProvider.h"
-
-namespace WebCore {
-
-// UTF16ContextAware provider
-
-static UText* uTextUTF16ContextAwareClone(UText*, const UText*, UBool, UErrorCode*);
-static int64_t uTextUTF16ContextAwareNativeLength(UText*);
-static UBool uTextUTF16ContextAwareAccess(UText*, int64_t, UBool);
-static int32_t uTextUTF16ContextAwareExtract(UText*, int64_t, int64_t, UChar*, int32_t, UErrorCode*);
-static void uTextUTF16ContextAwareClose(UText*);
-
-static const struct UTextFuncs textUTF16ContextAwareFuncs = {
- sizeof(UTextFuncs),
- 0,
- 0,
- 0,
- uTextUTF16ContextAwareClone,
- uTextUTF16ContextAwareNativeLength,
- uTextUTF16ContextAwareAccess,
- uTextUTF16ContextAwareExtract,
- nullptr,
- nullptr,
- nullptr,
- nullptr,
- uTextUTF16ContextAwareClose,
- nullptr,
- nullptr,
- nullptr
-};
-
-static inline UTextProviderContext textUTF16ContextAwareGetCurrentContext(const UText* text)
-{
- if (!text->chunkContents)
- return UTextProviderContext::NoContext;
- return text->chunkContents == text->p ? UTextProviderContext::PrimaryContext : UTextProviderContext::PriorContext;
-}
-
-static void textUTF16ContextAwareMoveInPrimaryContext(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward)
-{
- ASSERT(text->chunkContents == text->p);
- ASSERT_UNUSED(forward, forward ? nativeIndex >= text->b : nativeIndex > text->b);
- ASSERT_UNUSED(forward, forward ? nativeIndex < nativeLength : nativeIndex <= nativeLength);
- text->chunkNativeStart = text->b;
- text->chunkNativeLimit = nativeLength;
- int64_t length = text->chunkNativeLimit - text->chunkNativeStart;
- // Ensure chunk length is well defined if computed length exceeds int32_t range.
- ASSERT(length < std::numeric_limits<int32_t>::max());
- text->chunkLength = length < std::numeric_limits<int32_t>::max() ? static_cast<int32_t>(length) : 0;
- text->nativeIndexingLimit = text->chunkLength;
- int64_t offset = nativeIndex - text->chunkNativeStart;
- // Ensure chunk offset is well defined if computed offset exceeds int32_t range or chunk length.
- ASSERT(offset < std::numeric_limits<int32_t>::max());
- text->chunkOffset = std::min(offset < std::numeric_limits<int32_t>::max() ? static_cast<int32_t>(offset) : 0, text->chunkLength);
-}
-
-static void textUTF16ContextAwareSwitchToPrimaryContext(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward)
-{
- ASSERT(!text->chunkContents || text->chunkContents == text->q);
- text->chunkContents = static_cast<const UChar*>(text->p);
- textUTF16ContextAwareMoveInPrimaryContext(text, nativeIndex, nativeLength, forward);
-}
-
-static void textUTF16ContextAwareMoveInPriorContext(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward)
-{
- ASSERT(text->chunkContents == text->q);
- ASSERT(forward ? nativeIndex < text->b : nativeIndex <= text->b);
- ASSERT_UNUSED(nativeLength, forward ? nativeIndex < nativeLength : nativeIndex <= nativeLength);
- ASSERT_UNUSED(forward, forward ? nativeIndex < nativeLength : nativeIndex <= nativeLength);
- text->chunkNativeStart = 0;
- text->chunkNativeLimit = text->b;
- text->chunkLength = text->b;
- text->nativeIndexingLimit = text->chunkLength;
- int64_t offset = nativeIndex - text->chunkNativeStart;
- // Ensure chunk offset is well defined if computed offset exceeds int32_t range or chunk length.
- ASSERT(offset < std::numeric_limits<int32_t>::max());
- text->chunkOffset = std::min(offset < std::numeric_limits<int32_t>::max() ? static_cast<int32_t>(offset) : 0, text->chunkLength);
-}
-
-static void textUTF16ContextAwareSwitchToPriorContext(UText* text, int64_t nativeIndex, int64_t nativeLength, UBool forward)
-{
- ASSERT(!text->chunkContents || text->chunkContents == text->p);
- text->chunkContents = static_cast<const UChar*>(text->q);
- textUTF16ContextAwareMoveInPriorContext(text, nativeIndex, nativeLength, forward);
-}
-
-static UText* uTextUTF16ContextAwareClone(UText* destination, const UText* source, UBool deep, UErrorCode* status)
-{
- return uTextCloneImpl(destination, source, deep, status);
-}
-
-static inline int64_t uTextUTF16ContextAwareNativeLength(UText* text)
-{
- return text->a + text->b;
-}
-
-static UBool uTextUTF16ContextAwareAccess(UText* text, int64_t nativeIndex, UBool forward)
-{
- if (!text->context)
- return FALSE;
- int64_t nativeLength = uTextUTF16ContextAwareNativeLength(text);
- UBool isAccessible;
- if (uTextAccessInChunkOrOutOfRange(text, nativeIndex, nativeLength, forward, isAccessible))
- return isAccessible;
- nativeIndex = uTextAccessPinIndex(nativeIndex, nativeLength);
- UTextProviderContext currentContext = textUTF16ContextAwareGetCurrentContext(text);
- UTextProviderContext newContext = uTextProviderContext(text, nativeIndex, forward);
- ASSERT(newContext != UTextProviderContext::NoContext);
- if (newContext == currentContext) {
- if (currentContext == UTextProviderContext::PrimaryContext)
- textUTF16ContextAwareMoveInPrimaryContext(text, nativeIndex, nativeLength, forward);
- else
- textUTF16ContextAwareMoveInPriorContext(text, nativeIndex, nativeLength, forward);
- } else if (newContext == UTextProviderContext::PrimaryContext)
- textUTF16ContextAwareSwitchToPrimaryContext(text, nativeIndex, nativeLength, forward);
- else {
- ASSERT(newContext == UTextProviderContext::PriorContext);
- textUTF16ContextAwareSwitchToPriorContext(text, nativeIndex, nativeLength, forward);
- }
- return TRUE;
-}
-
-static int32_t uTextUTF16ContextAwareExtract(UText*, int64_t, int64_t, UChar*, int32_t, UErrorCode* errorCode)
-{
- // In the present context, this text provider is used only with ICU functions
- // that do not perform an extract operation.
- ASSERT_NOT_REACHED();
- *errorCode = U_UNSUPPORTED_ERROR;
- return 0;
-}
-
-static void uTextUTF16ContextAwareClose(UText* text)
-{
- text->context = nullptr;
-}
-
-UText* openUTF16ContextAwareUTextProvider(UText* text, const UChar* string, unsigned length, const UChar* priorContext, int priorContextLength, UErrorCode* status)
-{
- if (U_FAILURE(*status))
- return 0;
- if (!string || length > static_cast<unsigned>(std::numeric_limits<int32_t>::max())) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- text = utext_setup(text, 0, status);
- if (U_FAILURE(*status)) {
- ASSERT(!text);
- return 0;
- }
-
- initializeContextAwareUTextProvider(text, &textUTF16ContextAwareFuncs, string, length, priorContext, priorContextLength);
- return text;
-}
-
-} // namespace WebCore
diff --git a/Source/WebCore/platform/text/mac/make-charset-table.pl b/Source/WebCore/platform/text/mac/make-charset-table.pl
deleted file mode 100755
index 16fd25ab1..000000000
--- a/Source/WebCore/platform/text/mac/make-charset-table.pl
+++ /dev/null
@@ -1,225 +0,0 @@
-#!/usr/bin/perl -w
-
-# Copyright (C) 2003, 2004, 2005, 2006 Apple Computer, Inc. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#
-# 1. Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-# 2. Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution.
-# 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of
-# its contributors may be used to endorse or promote products derived
-# from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
-# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
-# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-use strict;
-
-my %aliasesFromCharsetsFile;
-my %namesWritten;
-
-my $output = "";
-
-my $error = 0;
-
-sub error ($)
-{
- print STDERR @_, "\n";
- $error = 1;
-}
-
-sub emit_line
-{
- my ($name, $prefix, $encoding, $flags) = @_;
-
- error "$name shows up twice in output" if $namesWritten{$name};
- $namesWritten{$name} = 1;
-
- $output .= " { \"$name\", $prefix$encoding },\n";
-}
-
-sub process_platform_encodings
-{
- my ($filename, $PlatformPrefix) = @_;
- my $baseFilename = $filename;
- $baseFilename =~ s|.*/||;
-
- my %seenPlatformNames;
- my %seenIANANames;
-
- open PLATFORM_ENCODINGS, $filename or die;
-
- while (<PLATFORM_ENCODINGS>) {
- chomp;
- s/\#.*$//;
- s/\s+$//;
- if (my ($PlatformName, undef, $flags, $IANANames) = /^(.+?)(, (.+))?: (.+)$/) {
- my %aliases;
-
- my $PlatformNameWithFlags = $PlatformName;
- if ($flags) {
- $PlatformNameWithFlags .= ", " . $flags;
- } else {
- $flags = "NoEncodingFlags";
- }
- error "Platform encoding name $PlatformName is mentioned twice in $baseFilename" if $seenPlatformNames{$PlatformNameWithFlags};
- $seenPlatformNames{$PlatformNameWithFlags} = 1;
-
- # Build the aliases list.
- # Also check that no two names are part of the same entry in the charsets file.
- my @IANANames = split ", ", $IANANames;
- my $firstName = "";
- my $canonicalFirstName = "";
- my $prevName = "";
- for my $name (@IANANames) {
- if ($firstName eq "") {
- if ($name !~ /^[-A-Za-z0-9_]+$/) {
- error "$name, in $baseFilename, has illegal characters in it";
- next;
- }
- $firstName = $name;
- } else {
- if ($name !~ /^[a-z0-9]+$/) {
- error "$name, in $baseFilename, has illegal characters in it (must be all lowercase alphanumeric)";
- next;
- }
- if ($name le $prevName) {
- error "$name comes after $prevName in $baseFilename, but everything must be in alphabetical order";
- }
- $prevName = $name;
- }
-
- my $canonicalName = lc $name;
- $canonicalName =~ tr/-_//d;
-
- $canonicalFirstName = $canonicalName if $canonicalFirstName eq "";
-
- error "$name is mentioned twice in $baseFilename" if $seenIANANames{$canonicalName};
- $seenIANANames{$canonicalName} = 1;
-
- $aliases{$canonicalName} = 1;
- next if !$aliasesFromCharsetsFile{$canonicalName};
- for my $alias (@{$aliasesFromCharsetsFile{$canonicalName}}) {
- $aliases{$alias} = 1;
- }
- for my $otherName (@IANANames) {
- next if $canonicalName eq $otherName;
- if ($aliasesFromCharsetsFile{$otherName}
- && $aliasesFromCharsetsFile{$canonicalName} eq $aliasesFromCharsetsFile{$otherName}
- && $canonicalName le $otherName) {
- error "$baseFilename lists both $name and $otherName under $PlatformName, but that aliasing is already specified in character-sets.txt";
- }
- }
- }
-
- # write out
- emit_line($firstName, $PlatformPrefix, $PlatformName, $flags);
- for my $alias (sort keys %aliases) {
- emit_line($alias, $PlatformPrefix, $PlatformName, $flags) if $alias ne $canonicalFirstName;
- }
- } elsif (/^([a-zA-Z0-9_]+)(, (.+))?$/) {
- my $PlatformName = $1;
-
- error "Platform encoding name $PlatformName is mentioned twice in $baseFilename" if $seenPlatformNames{$PlatformName};
- $seenPlatformNames{$PlatformName} = 1;
- } elsif (/./) {
- error "syntax error in $baseFilename, line $.";
- }
- }
-
- close PLATFORM_ENCODINGS;
-}
-
-sub process_iana_charset
-{
- my ($canonical_name, @aliases) = @_;
-
- return if !$canonical_name;
-
- my @names = sort $canonical_name, @aliases;
-
- for my $name (@names) {
- $aliasesFromCharsetsFile{$name} = \@names;
- }
-}
-
-sub process_iana_charsets
-{
- my ($filename) = @_;
-
- open CHARSETS, $filename or die;
-
- my %seen;
-
- my $canonical_name;
- my @aliases;
-
- my %exceptions = ( isoir91 => 1, isoir92 => 1 );
-
- while (<CHARSETS>) {
- chomp;
- if ((my $new_canonical_name) = /Name: ([^ \t]*).*/) {
- $new_canonical_name = lc $new_canonical_name;
- $new_canonical_name =~ tr/a-z0-9//cd;
-
- error "saw $new_canonical_name twice in character-sets.txt", if $seen{$new_canonical_name};
- $seen{$new_canonical_name} = $new_canonical_name;
-
- process_iana_charset $canonical_name, @aliases;
-
- $canonical_name = $new_canonical_name;
- @aliases = ();
- } elsif ((my $new_alias) = /Alias: ([^ \t]*).*/) {
- $new_alias = lc $new_alias;
- $new_alias =~ tr/a-z0-9//cd;
-
- # do this after normalizing the alias, sometimes character-sets.txt
- # has weird escape characters, e.g. \b after None
- next if $new_alias eq "none";
-
- error "saw $new_alias twice in character-sets.txt $seen{$new_alias}, $canonical_name", if $seen{$new_alias} && $seen{$new_alias} ne $canonical_name && !$exceptions{$new_alias};
- push @aliases, $new_alias if !$seen{$new_alias};
- $seen{$new_alias} = $canonical_name;
- }
- }
-
- process_iana_charset $canonical_name, @aliases;
-
- close CHARSETS;
-}
-
-# Program body
-
-process_iana_charsets($ARGV[0]);
-process_platform_encodings($ARGV[1], $ARGV[2]);
-
-exit 1 if $error;
-
-print <<EOF
-// File generated by make-charset-table.pl. Do not edit!
-
-#include "config.h"
-#include "CharsetData.h"
-
-namespace WebCore {
-
- const CharsetEntry CharsetTable[] = {
-$output
- { 0, 0 }
- };
-
-}
-EOF