summaryrefslogtreecommitdiff
path: root/Source/WTF/wtf/unicode
diff options
context:
space:
mode:
authorLorry Tar Creator <lorry-tar-importer@lorry>2017-06-27 06:07:23 +0000
committerLorry Tar Creator <lorry-tar-importer@lorry>2017-06-27 06:07:23 +0000
commit1bf1084f2b10c3b47fd1a588d85d21ed0eb41d0c (patch)
tree46dcd36c86e7fbc6e5df36deb463b33e9967a6f7 /Source/WTF/wtf/unicode
parent32761a6cee1d0dee366b885b7b9c777e67885688 (diff)
downloadWebKitGtk-tarball-master.tar.gz
Diffstat (limited to 'Source/WTF/wtf/unicode')
-rw-r--r--Source/WTF/wtf/unicode/CharacterNames.h36
-rw-r--r--Source/WTF/wtf/unicode/Collator.h53
-rw-r--r--Source/WTF/wtf/unicode/CollatorDefault.cpp48
-rw-r--r--Source/WTF/wtf/unicode/ScriptCodesFromICU.h153
-rw-r--r--Source/WTF/wtf/unicode/UTF8.cpp42
-rw-r--r--Source/WTF/wtf/unicode/UTF8.h11
-rw-r--r--Source/WTF/wtf/unicode/Unicode.h35
-rw-r--r--Source/WTF/wtf/unicode/UnicodeMacrosFromICU.h100
-rw-r--r--Source/WTF/wtf/unicode/icu/CollatorICU.cpp267
-rw-r--r--Source/WTF/wtf/unicode/icu/UnicodeIcu.h32
10 files changed, 307 insertions, 470 deletions
diff --git a/Source/WTF/wtf/unicode/CharacterNames.h b/Source/WTF/wtf/unicode/CharacterNames.h
index 8fc2fc28f..1c415be34 100644
--- a/Source/WTF/wtf/unicode/CharacterNames.h
+++ b/Source/WTF/wtf/unicode/CharacterNames.h
@@ -10,10 +10,10 @@
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
@@ -26,7 +26,7 @@
#ifndef CharacterNames_h
#define CharacterNames_h
-#include <wtf/unicode/Unicode.h>
+#include <unicode/utypes.h>
namespace WTF {
namespace Unicode {
@@ -37,8 +37,9 @@ namespace Unicode {
// more convenient for WebCore code that mostly uses UTF-16.
const UChar AppleLogo = 0xF8FF;
-const UChar32 aegeanWordSeparatorLine = 0x10100;
+const UChar HiraganaLetterSmallA = 0x3041;
const UChar32 aegeanWordSeparatorDot = 0x10101;
+const UChar32 aegeanWordSeparatorLine = 0x10100;
const UChar apostrophe = 0x0027;
const UChar blackCircle = 0x25CF;
const UChar blackSquare = 0x25A0;
@@ -48,11 +49,10 @@ const UChar bullseye = 0x25CE;
const UChar carriageReturn = 0x000D;
const UChar ethiopicPrefaceColon = 0x1366;
const UChar ethiopicWordspace = 0x1361;
+const UChar firstStrongIsolate = 0x2068;
const UChar fisheye = 0x25C9;
-const UChar quotationMark = 0x0022;
const UChar hebrewPunctuationGeresh = 0x05F3;
const UChar hebrewPunctuationGershayim = 0x05F4;
-const UChar HiraganaLetterSmallA = 0x3041;
const UChar horizontalEllipsis = 0x2026;
const UChar hyphen = 0x2010;
const UChar hyphenMinus = 0x002D;
@@ -62,25 +62,31 @@ const UChar ideographicSpace = 0x3000;
const UChar leftDoubleQuotationMark = 0x201C;
const UChar leftSingleQuotationMark = 0x2018;
const UChar leftToRightEmbed = 0x202A;
+const UChar leftToRightIsolate = 0x2066;
const UChar leftToRightMark = 0x200E;
const UChar leftToRightOverride = 0x202D;
const UChar minusSign = 0x2212;
+const UChar narrowNoBreakSpace = 0x202F;
+const UChar narrowNonBreakingSpace = 0x202F;
const UChar newlineCharacter = 0x000A;
const UChar noBreakSpace = 0x00A0;
const UChar objectReplacementCharacter = 0xFFFC;
const UChar popDirectionalFormatting = 0x202C;
+const UChar popDirectionalIsolate = 0x2069;
+const UChar quotationMark = 0x0022;
const UChar replacementCharacter = 0xFFFD;
const UChar rightDoubleQuotationMark = 0x201D;
const UChar rightSingleQuotationMark = 0x2019;
const UChar rightToLeftEmbed = 0x202B;
+const UChar rightToLeftIsolate = 0x2067;
const UChar rightToLeftMark = 0x200F;
const UChar rightToLeftOverride = 0x202E;
const UChar sesameDot = 0xFE45;
const UChar smallLetterSharpS = 0x00DF;
const UChar softHyphen = 0x00AD;
const UChar space = 0x0020;
-const UChar tibetanMarkIntersyllabicTsheg = 0x0F0B;
const UChar tibetanMarkDelimiterTshegBstar = 0x0F0C;
+const UChar tibetanMarkIntersyllabicTsheg = 0x0F0B;
const UChar32 ugariticWordDivider = 0x1039F;
const UChar whiteBullet = 0x25E6;
const UChar whiteCircle = 0x25CB;
@@ -88,16 +94,17 @@ const UChar whiteSesameDot = 0xFE46;
const UChar whiteUpPointingTriangle = 0x25B3;
const UChar yenSign = 0x00A5;
const UChar zeroWidthJoiner = 0x200D;
+const UChar zeroWidthNoBreakSpace = 0xFEFF;
const UChar zeroWidthNonJoiner = 0x200C;
const UChar zeroWidthSpace = 0x200B;
-const UChar zeroWidthNoBreakSpace = 0xFEFF;
} // namespace Unicode
} // namespace WTF
using WTF::Unicode::AppleLogo;
-using WTF::Unicode::aegeanWordSeparatorLine;
+using WTF::Unicode::HiraganaLetterSmallA;
using WTF::Unicode::aegeanWordSeparatorDot;
+using WTF::Unicode::aegeanWordSeparatorLine;
using WTF::Unicode::blackCircle;
using WTF::Unicode::blackSquare;
using WTF::Unicode::blackUpPointingTriangle;
@@ -106,10 +113,10 @@ using WTF::Unicode::bullseye;
using WTF::Unicode::carriageReturn;
using WTF::Unicode::ethiopicPrefaceColon;
using WTF::Unicode::ethiopicWordspace;
+using WTF::Unicode::firstStrongIsolate;
using WTF::Unicode::fisheye;
using WTF::Unicode::hebrewPunctuationGeresh;
using WTF::Unicode::hebrewPunctuationGershayim;
-using WTF::Unicode::HiraganaLetterSmallA;
using WTF::Unicode::horizontalEllipsis;
using WTF::Unicode::hyphen;
using WTF::Unicode::hyphenMinus;
@@ -119,24 +126,29 @@ using WTF::Unicode::ideographicSpace;
using WTF::Unicode::leftDoubleQuotationMark;
using WTF::Unicode::leftSingleQuotationMark;
using WTF::Unicode::leftToRightEmbed;
+using WTF::Unicode::leftToRightIsolate;
using WTF::Unicode::leftToRightMark;
using WTF::Unicode::leftToRightOverride;
using WTF::Unicode::minusSign;
+using WTF::Unicode::narrowNoBreakSpace;
+using WTF::Unicode::narrowNonBreakingSpace;
using WTF::Unicode::newlineCharacter;
using WTF::Unicode::noBreakSpace;
using WTF::Unicode::objectReplacementCharacter;
using WTF::Unicode::popDirectionalFormatting;
+using WTF::Unicode::popDirectionalIsolate;
using WTF::Unicode::replacementCharacter;
using WTF::Unicode::rightDoubleQuotationMark;
using WTF::Unicode::rightSingleQuotationMark;
using WTF::Unicode::rightToLeftEmbed;
+using WTF::Unicode::rightToLeftIsolate;
using WTF::Unicode::rightToLeftMark;
using WTF::Unicode::rightToLeftOverride;
using WTF::Unicode::sesameDot;
using WTF::Unicode::softHyphen;
using WTF::Unicode::space;
-using WTF::Unicode::tibetanMarkIntersyllabicTsheg;
using WTF::Unicode::tibetanMarkDelimiterTshegBstar;
+using WTF::Unicode::tibetanMarkIntersyllabicTsheg;
using WTF::Unicode::ugariticWordDivider;
using WTF::Unicode::whiteBullet;
using WTF::Unicode::whiteCircle;
@@ -144,8 +156,8 @@ using WTF::Unicode::whiteSesameDot;
using WTF::Unicode::whiteUpPointingTriangle;
using WTF::Unicode::yenSign;
using WTF::Unicode::zeroWidthJoiner;
+using WTF::Unicode::zeroWidthNoBreakSpace;
using WTF::Unicode::zeroWidthNonJoiner;
using WTF::Unicode::zeroWidthSpace;
-using WTF::Unicode::zeroWidthNoBreakSpace;
#endif // CharacterNames_h
diff --git a/Source/WTF/wtf/unicode/Collator.h b/Source/WTF/wtf/unicode/Collator.h
index 67486c7d0..75ff477a5 100644
--- a/Source/WTF/wtf/unicode/Collator.h
+++ b/Source/WTF/wtf/unicode/Collator.h
@@ -10,7 +10,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of
+ * 3. Neither the name of Apple Inc. ("Apple") nor the names of
* its contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
@@ -29,36 +29,49 @@
#ifndef WTF_Collator_h
#define WTF_Collator_h
+#include <unicode/uconfig.h>
#include <wtf/Noncopyable.h>
-#include <wtf/OwnPtr.h>
-#include <wtf/unicode/Unicode.h>
+struct UCharIterator;
struct UCollator;
namespace WTF {
- class Collator {
- WTF_MAKE_NONCOPYABLE(Collator); WTF_MAKE_FAST_ALLOCATED;
- public:
- enum Result { Equal = 0, Greater = 1, Less = -1 };
+class StringView;
- WTF_EXPORT_PRIVATE Collator(const char* locale); // Parsing is lenient; e.g. language identifiers (such as "en-US") are accepted, too.
- WTF_EXPORT_PRIVATE ~Collator();
- WTF_EXPORT_PRIVATE void setOrderLowerFirst(bool);
+#if UCONFIG_NO_COLLATION
- WTF_EXPORT_PRIVATE static std::unique_ptr<Collator> userDefault();
+class Collator {
+public:
+ explicit Collator(const char* = nullptr, bool = false) { }
- WTF_EXPORT_PRIVATE Result collate(const ::UChar*, size_t, const ::UChar*, size_t) const;
+ WTF_EXPORT_PRIVATE static int collate(StringView, StringView);
+ WTF_EXPORT_PRIVATE static int collateUTF8(const char*, const char*);
+};
+
+#else
+
+class Collator {
+ WTF_MAKE_NONCOPYABLE(Collator);
+public:
+ // The value nullptr is a special one meaning the system default locale.
+ // Locale name parsing is lenient; e.g. language identifiers (such as "en-US") are accepted, too.
+ WTF_EXPORT_PRIVATE explicit Collator(const char* locale = nullptr, bool shouldSortLowercaseFirst = false);
+ WTF_EXPORT_PRIVATE ~Collator();
+
+ WTF_EXPORT_PRIVATE int collate(StringView, StringView) const;
+ WTF_EXPORT_PRIVATE int collateUTF8(const char*, const char*) const;
+
+private:
+ char* m_locale;
+ bool m_shouldSortLowercaseFirst;
+ UCollator* m_collator;
+};
+
+WTF_EXPORT_PRIVATE UCharIterator createIterator(StringView);
- private:
-#if !UCONFIG_NO_COLLATION
- void createCollator() const;
- void releaseCollator();
- mutable UCollator* m_collator;
#endif
- char* m_locale;
- bool m_lowerFirst;
- };
+
}
using WTF::Collator;
diff --git a/Source/WTF/wtf/unicode/CollatorDefault.cpp b/Source/WTF/wtf/unicode/CollatorDefault.cpp
index d56bb5a38..eab171bf2 100644
--- a/Source/WTF/wtf/unicode/CollatorDefault.cpp
+++ b/Source/WTF/wtf/unicode/CollatorDefault.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2008 Apple Inc. All rights reserved.
+ * Copyright (C) 2008, 2014 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -10,7 +10,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of
+ * 3. Neither the name of Apple Inc. ("Apple") nor the names of
* its contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
@@ -33,41 +33,27 @@
namespace WTF {
-Collator::Collator(const char*)
+int Collator::collate(StringView a, StringView b) const
{
-}
-
-Collator::~Collator()
-{
-}
+ unsigned commonLength = std::min(a.length(), b.length());
+ for (unsigned i = 0; i < commonLength; ++i) {
+ if (a[i] < b[i])
+ return -1;
+ if (a[i] > b[i])
+ return 1;
+ }
-void Collator::setOrderLowerFirst(bool)
-{
-}
+ if (a.length() < b.length())
+ return -1;
+ if (a.length() > b.length())
+ return 1;
-std::unique_ptr<Collator> Collator::userDefault()
-{
- return std::make_unique<Collator>(nullptr);
+ return 0;
}
-// A default implementation for platforms that lack Unicode-aware collation.
-Collator::Result Collator::collate(const UChar* lhs, size_t lhsLength, const UChar* rhs, size_t rhsLength) const
+int Collator::collateUTF8(const char* a, const char* b) const
{
- int lmin = lhsLength < rhsLength ? lhsLength : rhsLength;
- int l = 0;
- while (l < lmin && *lhs == *rhs) {
- lhs++;
- rhs++;
- l++;
- }
-
- if (l < lmin)
- return (*lhs > *rhs) ? Greater : Less;
-
- if (lhsLength == rhsLength)
- return Equal;
-
- return (lhsLength > rhsLength) ? Greater : Less;
+ return collate(String::fromUTF8(a), String::fromUTF8(b));
}
}
diff --git a/Source/WTF/wtf/unicode/ScriptCodesFromICU.h b/Source/WTF/wtf/unicode/ScriptCodesFromICU.h
deleted file mode 100644
index 4760399a1..000000000
--- a/Source/WTF/wtf/unicode/ScriptCodesFromICU.h
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- * Copyright (C) 1997-2006, International Business Machines
- * Corporation and others. All Rights Reserved.
- */
-
-#ifndef WTF_ScriptCodesFromICU_h
-#define WTF_ScriptCodesFromICU_h
-
-/**
- * Constants for ISO 15924 script codes.
- *
- * Many of these script codes - those from Unicode's ScriptNames.txt -
- * are character property values for Unicode's Script property.
- * See UAX #24 Script Names (http://www.unicode.org/reports/tr24/).
- *
- * Starting with ICU 3.6, constants for most ISO 15924 script codes
- * are included (currently excluding private-use codes Qaaa..Qabx).
- * For scripts for which there are codes in ISO 15924 but which are not
- * used in the Unicode Character Database (UCD), there are no Unicode characters
- * associated with those scripts.
- *
- * For example, there are no characters that have a UCD script code of
- * Hans or Hant. All Han ideographs have the Hani script code.
- * The Hans and Hant script codes are used with CLDR data.
- *
- * ISO 15924 script codes are included for use with CLDR and similar.
- *
- * @stable ICU 2.2
- */
-typedef enum UScriptCode {
- USCRIPT_INVALID_CODE = -1,
- USCRIPT_COMMON = 0 , /* Zyyy */
- USCRIPT_INHERITED = 1, /* Qaai */
- USCRIPT_ARABIC = 2, /* Arab */
- USCRIPT_ARMENIAN = 3, /* Armn */
- USCRIPT_BENGALI = 4, /* Beng */
- USCRIPT_BOPOMOFO = 5, /* Bopo */
- USCRIPT_CHEROKEE = 6, /* Cher */
- USCRIPT_COPTIC = 7, /* Copt */
- USCRIPT_CYRILLIC = 8, /* Cyrl */
- USCRIPT_DESERET = 9, /* Dsrt */
- USCRIPT_DEVANAGARI = 10, /* Deva */
- USCRIPT_ETHIOPIC = 11, /* Ethi */
- USCRIPT_GEORGIAN = 12, /* Geor */
- USCRIPT_GOTHIC = 13, /* Goth */
- USCRIPT_GREEK = 14, /* Grek */
- USCRIPT_GUJARATI = 15, /* Gujr */
- USCRIPT_GURMUKHI = 16, /* Guru */
- USCRIPT_HAN = 17, /* Hani */
- USCRIPT_HANGUL = 18, /* Hang */
- USCRIPT_HEBREW = 19, /* Hebr */
- USCRIPT_HIRAGANA = 20, /* Hira */
- USCRIPT_KANNADA = 21, /* Knda */
- USCRIPT_KATAKANA = 22, /* Kana */
- USCRIPT_KHMER = 23, /* Khmr */
- USCRIPT_LAO = 24, /* Laoo */
- USCRIPT_LATIN = 25, /* Latn */
- USCRIPT_MALAYALAM = 26, /* Mlym */
- USCRIPT_MONGOLIAN = 27, /* Mong */
- USCRIPT_MYANMAR = 28, /* Mymr */
- USCRIPT_OGHAM = 29, /* Ogam */
- USCRIPT_OLD_ITALIC = 30, /* Ital */
- USCRIPT_ORIYA = 31, /* Orya */
- USCRIPT_RUNIC = 32, /* Runr */
- USCRIPT_SINHALA = 33, /* Sinh */
- USCRIPT_SYRIAC = 34, /* Syrc */
- USCRIPT_TAMIL = 35, /* Taml */
- USCRIPT_TELUGU = 36, /* Telu */
- USCRIPT_THAANA = 37, /* Thaa */
- USCRIPT_THAI = 38, /* Thai */
- USCRIPT_TIBETAN = 39, /* Tibt */
- /** Canadian_Aboriginal script. @stable ICU 2.6 */
- USCRIPT_CANADIAN_ABORIGINAL = 40, /* Cans */
- /** Canadian_Aboriginal script (alias). @stable ICU 2.2 */
- USCRIPT_UCAS = USCRIPT_CANADIAN_ABORIGINAL,
- USCRIPT_YI = 41, /* Yiii */
- USCRIPT_TAGALOG = 42, /* Tglg */
- USCRIPT_HANUNOO = 43, /* Hano */
- USCRIPT_BUHID = 44, /* Buhd */
- USCRIPT_TAGBANWA = 45, /* Tagb */
-
- /* New scripts in Unicode 4 @stable ICU 2.6 */
- USCRIPT_BRAILLE = 46, /* Brai */
- USCRIPT_CYPRIOT = 47, /* Cprt */
- USCRIPT_LIMBU = 48, /* Limb */
- USCRIPT_LINEAR_B = 49, /* Linb */
- USCRIPT_OSMANYA = 50, /* Osma */
- USCRIPT_SHAVIAN = 51, /* Shaw */
- USCRIPT_TAI_LE = 52, /* Tale */
- USCRIPT_UGARITIC = 53, /* Ugar */
-
- /** New script code in Unicode 4.0.1 @stable ICU 3.0 */
- USCRIPT_KATAKANA_OR_HIRAGANA = 54,/*Hrkt */
-
-#ifndef U_HIDE_DRAFT_API
- /* New scripts in Unicode 4.1 @draft ICU 3.4 */
- USCRIPT_BUGINESE = 55, /* Bugi */
- USCRIPT_GLAGOLITIC = 56, /* Glag */
- USCRIPT_KHAROSHTHI = 57, /* Khar */
- USCRIPT_SYLOTI_NAGRI = 58, /* Sylo */
- USCRIPT_NEW_TAI_LUE = 59, /* Talu */
- USCRIPT_TIFINAGH = 60, /* Tfng */
- USCRIPT_OLD_PERSIAN = 61, /* Xpeo */
-
- /* New script codes from ISO 15924 @draft ICU 3.6 */
- USCRIPT_BALINESE = 62, /* Bali */
- USCRIPT_BATAK = 63, /* Batk */
- USCRIPT_BLISSYMBOLS = 64, /* Blis */
- USCRIPT_BRAHMI = 65, /* Brah */
- USCRIPT_CHAM = 66, /* Cham */
- USCRIPT_CIRTH = 67, /* Cirt */
- USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC = 68, /* Cyrs */
- USCRIPT_DEMOTIC_EGYPTIAN = 69, /* Egyd */
- USCRIPT_HIERATIC_EGYPTIAN = 70, /* Egyh */
- USCRIPT_EGYPTIAN_HIEROGLYPHS = 71, /* Egyp */
- USCRIPT_KHUTSURI = 72, /* Geok */
- USCRIPT_SIMPLIFIED_HAN = 73, /* Hans */
- USCRIPT_TRADITIONAL_HAN = 74, /* Hant */
- USCRIPT_PAHAWH_HMONG = 75, /* Hmng */
- USCRIPT_OLD_HUNGARIAN = 76, /* Hung */
- USCRIPT_HARAPPAN_INDUS = 77, /* Inds */
- USCRIPT_JAVANESE = 78, /* Java */
- USCRIPT_KAYAH_LI = 79, /* Kali */
- USCRIPT_LATIN_FRAKTUR = 80, /* Latf */
- USCRIPT_LATIN_GAELIC = 81, /* Latg */
- USCRIPT_LEPCHA = 82, /* Lepc */
- USCRIPT_LINEAR_A = 83, /* Lina */
- USCRIPT_MANDAEAN = 84, /* Mand */
- USCRIPT_MAYAN_HIEROGLYPHS = 85, /* Maya */
- USCRIPT_MEROITIC = 86, /* Mero */
- USCRIPT_NKO = 87, /* Nkoo */
- USCRIPT_ORKHON = 88, /* Orkh */
- USCRIPT_OLD_PERMIC = 89, /* Perm */
- USCRIPT_PHAGS_PA = 90, /* Phag */
- USCRIPT_PHOENICIAN = 91, /* Phnx */
- USCRIPT_PHONETIC_POLLARD = 92, /* Plrd */
- USCRIPT_RONGORONGO = 93, /* Roro */
- USCRIPT_SARATI = 94, /* Sara */
- USCRIPT_ESTRANGELO_SYRIAC = 95, /* Syre */
- USCRIPT_WESTERN_SYRIAC = 96, /* Syrj */
- USCRIPT_EASTERN_SYRIAC = 97, /* Syrn */
- USCRIPT_TENGWAR = 98, /* Teng */
- USCRIPT_VAI = 99, /* Vaii */
- USCRIPT_VISIBLE_SPEECH = 100, /* Visp */
- USCRIPT_CUNEIFORM = 101,/* Xsux */
- USCRIPT_UNWRITTEN_LANGUAGES = 102,/* Zxxx */
- USCRIPT_UNKNOWN = 103,/* Zzzz */ /* Unknown="Code for uncoded script", for unassigned code points */
- /* Private use codes from Qaaa - Qabx are not supported*/
-#endif /* U_HIDE_DRAFT_API */
- USCRIPT_CODE_LIMIT = 104
-} UScriptCode;
-
-#endif
diff --git a/Source/WTF/wtf/unicode/UTF8.cpp b/Source/WTF/wtf/unicode/UTF8.cpp
index 1a12e1241..5407401e6 100644
--- a/Source/WTF/wtf/unicode/UTF8.cpp
+++ b/Source/WTF/wtf/unicode/UTF8.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2007 Apple Inc. All rights reserved.
+ * Copyright (C) 2007, 2014 Apple Inc. All rights reserved.
* Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com>
*
* Redistribution and use in source and binary forms, with or without
@@ -11,10 +11,10 @@
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
@@ -28,7 +28,7 @@
#include "UTF8.h"
#include "ASCIICType.h"
-#include <wtf/StringHasher.h>
+#include <wtf/Hasher.h>
#include <wtf/unicode/CharacterNames.h>
namespace WTF {
@@ -420,10 +420,10 @@ unsigned calculateStringHashAndLengthFromUTF8MaskingTop8Bits(const char* data, c
return stringHasher.hashWithTop8BitsMasked();
}
-bool equalUTF16WithUTF8(const UChar* a, const UChar* aEnd, const char* b, const char* bEnd)
+bool equalUTF16WithUTF8(const UChar* a, const char* b, const char* bEnd)
{
while (b < bEnd) {
- if (isASCII(*b)) {
+ if (isASCII(*a) || isASCII(*b)) {
if (*a++ != *b++)
return false;
continue;
@@ -435,7 +435,7 @@ bool equalUTF16WithUTF8(const UChar* a, const UChar* aEnd, const char* b, const
return false;
if (!isLegalUTF8(reinterpret_cast<const unsigned char*>(b), utf8SequenceLength))
- return 0;
+ return false;
UChar32 character = readUTF8Sequence(b, utf8SequenceLength);
ASSERT(!isASCII(character));
@@ -455,7 +455,33 @@ bool equalUTF16WithUTF8(const UChar* a, const UChar* aEnd, const char* b, const
return false;
}
- return a == aEnd;
+ return true;
+}
+
+bool equalLatin1WithUTF8(const LChar* a, const char* b, const char* bEnd)
+{
+ while (b < bEnd) {
+ if (isASCII(*a) || isASCII(*b)) {
+ if (*a++ != *b++)
+ return false;
+ continue;
+ }
+
+ if (b + 1 == bEnd)
+ return false;
+
+ if ((b[0] & 0xE0) != 0xC0 || (b[1] & 0xC0) != 0x80)
+ return false;
+
+ LChar character = ((b[0] & 0x1F) << 6) | (b[1] & 0x3F);
+
+ b += 2;
+
+ if (*a++ != character)
+ return false;
+ }
+
+ return true;
}
} // namespace Unicode
diff --git a/Source/WTF/wtf/unicode/UTF8.h b/Source/WTF/wtf/unicode/UTF8.h
index e95cc1288..354aad71e 100644
--- a/Source/WTF/wtf/unicode/UTF8.h
+++ b/Source/WTF/wtf/unicode/UTF8.h
@@ -10,10 +10,10 @@
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
@@ -26,7 +26,8 @@
#ifndef WTF_UTF8_h
#define WTF_UTF8_h
-#include <wtf/unicode/Unicode.h>
+#include <unicode/utypes.h>
+#include <wtf/text/LChar.h>
namespace WTF {
namespace Unicode {
@@ -76,7 +77,9 @@ namespace Unicode {
WTF_EXPORT_PRIVATE unsigned calculateStringHashAndLengthFromUTF8MaskingTop8Bits(const char* data, const char* dataEnd, unsigned& dataLength, unsigned& utf16Length);
- WTF_EXPORT_PRIVATE bool equalUTF16WithUTF8(const UChar* a, const UChar* aEnd, const char* b, const char* bEnd);
+ // The caller of these functions already knows that the lengths are the same, so we omit an end argument for UTF-16 and Latin-1.
+ bool equalUTF16WithUTF8(const UChar* stringInUTF16, const char* stringInUTF8, const char* stringInUTF8End);
+ bool equalLatin1WithUTF8(const LChar* stringInLatin1, const char* stringInUTF8, const char* stringInUTF8End);
} // namespace Unicode
} // namespace WTF
diff --git a/Source/WTF/wtf/unicode/Unicode.h b/Source/WTF/wtf/unicode/Unicode.h
deleted file mode 100644
index 84f777342..000000000
--- a/Source/WTF/wtf/unicode/Unicode.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright (C) 2006 George Staikos <staikos@kde.org>
- * Copyright (C) 2006, 2008, 2009 Apple Inc. All rights reserved.
- * Copyright (C) 2007-2009 Torch Mobile, Inc.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public License
- * along with this library; see the file COPYING.LIB. If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- * Boston, MA 02110-1301, USA.
- *
- */
-
-#ifndef WTF_UNICODE_H
-#define WTF_UNICODE_H
-
-#include <wtf/Assertions.h>
-
-// Define platform neutral 8 bit character type (L is for Latin-1).
-typedef unsigned char LChar;
-
-#include <wtf/unicode/icu/UnicodeIcu.h>
-
-static_assert(sizeof(UChar) == 2, "UChar must be two bytes!");
-
-#endif // WTF_UNICODE_H
diff --git a/Source/WTF/wtf/unicode/UnicodeMacrosFromICU.h b/Source/WTF/wtf/unicode/UnicodeMacrosFromICU.h
deleted file mode 100644
index 09a7036e3..000000000
--- a/Source/WTF/wtf/unicode/UnicodeMacrosFromICU.h
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Copyright (C) 1999-2004, International Business Machines Corporation and others. All Rights Reserved.
- *
- */
-
-#ifndef UnicodeMacrosFromICU_h
-#define UnicodeMacrosFromICU_h
-
-// some defines from ICU
-
-#define U_IS_BMP(c) ((UChar32)(c)<=0xffff)
-#define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
-#define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
-#define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
-#define U16_GET_SUPPLEMENTARY(lead, trail) \
- (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET)
-
-#define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
-#define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
-#define U16_LENGTH(c) ((uint32_t)(c) <= 0xffff ? 1 : 2)
-
-#define U_IS_SUPPLEMENTARY(c) ((UChar32)((c)-0x10000)<=0xfffff)
-#define U_IS_SURROGATE(c) (((c)&0xfffff800)==0xd800)
-#define U16_IS_SINGLE(c) !U_IS_SURROGATE(c)
-#define U16_IS_SURROGATE(c) U_IS_SURROGATE(c)
-#define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
-
-#define U16_GET(s, start, i, length, c) { \
- (c)=(s)[i]; \
- if(U16_IS_SURROGATE(c)) { \
- uint16_t __c2; \
- if(U16_IS_SURROGATE_LEAD(c)) { \
- if((i)+1<(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
- (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
- } \
- } else { \
- if((i)-1>=(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
- (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
- } \
- } \
- } \
-}
-
-#define U16_PREV(s, start, i, c) { \
- (c)=(s)[--(i)]; \
- if(U16_IS_TRAIL(c)) { \
- uint16_t __c2; \
- if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
- --(i); \
- (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
- } \
- } \
-}
-
-#define U16_BACK_1(s, start, i) { \
- if(U16_IS_TRAIL((s)[--(i)]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
- --(i); \
- } \
-}
-
-#define U16_NEXT(s, i, length, c) { \
- (c)=(s)[(i)++]; \
- if(U16_IS_LEAD(c)) { \
- uint16_t __c2; \
- if((i)<(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
- ++(i); \
- (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
- } \
- } \
-}
-
-#define U16_FWD_1(s, i, length) { \
- if(U16_IS_LEAD((s)[(i)++]) && (i)<(length) && U16_IS_TRAIL((s)[i])) { \
- ++(i); \
- } \
-}
-
-#define U_MASK(x) ((uint32_t)1<<(x))
-
-#define U8_MAX_LENGTH 4
-
-#define U8_APPEND_UNSAFE(s, i, c) { \
- if((uint32_t)(c)<=0x7f) { \
- (s)[(i)++]=(uint8_t)(c); \
- } else { \
- if((uint32_t)(c)<=0x7ff) { \
- (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \
- } else { \
- if((uint32_t)(c)<=0xffff) { \
- (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \
- } else { \
- (s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); \
- (s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); \
- } \
- (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \
- } \
- (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
- } \
-}
-#endif
diff --git a/Source/WTF/wtf/unicode/icu/CollatorICU.cpp b/Source/WTF/wtf/unicode/icu/CollatorICU.cpp
index e4059bcdc..26a4d8386 100644
--- a/Source/WTF/wtf/unicode/icu/CollatorICU.cpp
+++ b/Source/WTF/wtf/unicode/icu/CollatorICU.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2008 Apple Inc. All rights reserved.
+ * Copyright (C) 2008, 2014 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -10,7 +10,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of
+ * 3. Neither the name of Apple Inc. ("Apple") nor the names of
* its contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
@@ -29,129 +29,246 @@
#include "config.h"
#include <wtf/unicode/Collator.h>
+// FIXME: Merge this with CollatorDefault.cpp into a single Collator.cpp source file.
+
#if !UCONFIG_NO_COLLATION
#include <mutex>
-#include <wtf/Assertions.h>
-#include <wtf/StringExtras.h>
#include <unicode/ucol.h>
-#include <string.h>
+#include <wtf/Lock.h>
+#include <wtf/StringExtras.h>
+#include <wtf/text/StringView.h>
-#if OS(DARWIN)
-#include <wtf/RetainPtr.h>
+#if OS(DARWIN) && USE(CF)
#include <CoreFoundation/CoreFoundation.h>
+#include <wtf/RetainPtr.h>
#endif
namespace WTF {
static UCollator* cachedCollator;
+static char* cachedCollatorLocale;
+static bool cachedCollatorShouldSortLowercaseFirst;
-static std::mutex& cachedCollatorMutex()
-{
- static std::once_flag onceFlag;
- static std::mutex* mutex;
- std::call_once(onceFlag, []{
- mutex = std::make_unique<std::mutex>().release();
- });
+static StaticLock cachedCollatorMutex;
+
+#if !(OS(DARWIN) && USE(CF))
- return *mutex;
+static inline const char* resolveDefaultLocale(const char* locale)
+{
+ return locale;
}
-Collator::Collator(const char* locale)
- : m_collator(0)
- , m_locale(locale ? strdup(locale) : 0)
- , m_lowerFirst(false)
+#else
+
+static inline char* copyShortASCIIString(CFStringRef string)
{
+ // OK to have a fixed size buffer and to only handle ASCII since we only use this for locale names.
+ char buffer[256];
+ if (!string || !CFStringGetCString(string, buffer, sizeof(buffer), kCFStringEncodingASCII))
+ return strdup("");
+ return strdup(buffer);
}
-std::unique_ptr<Collator> Collator::userDefault()
+static char* copyDefaultLocale()
{
-#if OS(DARWIN) && USE(CF)
- // Mac OS X doesn't set UNIX locale to match user-selected one, so ICU default doesn't work.
-#if !OS(IOS)
- RetainPtr<CFLocaleRef> currentLocale = adoptCF(CFLocaleCopyCurrent());
- CFStringRef collationOrder = (CFStringRef)CFLocaleGetValue(currentLocale.get(), kCFLocaleCollatorIdentifier);
+#if !PLATFORM(IOS)
+ return copyShortASCIIString(static_cast<CFStringRef>(CFLocaleGetValue(adoptCF(CFLocaleCopyCurrent()).get(), kCFLocaleCollatorIdentifier)));
#else
- RetainPtr<CFStringRef> collationOrderRetainer = adoptCF((CFStringRef)CFPreferencesCopyValue(CFSTR("AppleCollationOrder"), kCFPreferencesAnyApplication, kCFPreferencesCurrentUser, kCFPreferencesAnyHost));
- CFStringRef collationOrder = collationOrderRetainer.get();
-#endif
- char buf[256];
- if (!collationOrder)
- return std::make_unique<Collator>("");
- CFStringGetCString(collationOrder, buf, sizeof(buf), kCFStringEncodingASCII);
- return std::make_unique<Collator>(buf);
-#else
- return std::make_unique<Collator>(static_cast<const char*>(0));
+ // FIXME: Documentation claims the code above would work on iOS 4.0 and later. After test that works, we should remove this and use that instead.
+ return copyShortASCIIString(adoptCF(static_cast<CFStringRef>(CFPreferencesCopyValue(CFSTR("AppleCollationOrder"), kCFPreferencesAnyApplication, kCFPreferencesCurrentUser, kCFPreferencesAnyHost))).get());
#endif
}
-Collator::~Collator()
+static inline const char* resolveDefaultLocale(const char* locale)
{
- releaseCollator();
- free(m_locale);
+ if (locale)
+ return locale;
+ // Since iOS and OS X don't set UNIX locale to match the user's selected locale, the ICU default locale is not the right one.
+ // So, instead of passing null to ICU, we pass the name of the user's selected locale.
+ static char* defaultLocale;
+ static std::once_flag initializeDefaultLocaleOnce;
+ std::call_once(initializeDefaultLocaleOnce, []{
+ defaultLocale = copyDefaultLocale();
+ });
+ return defaultLocale;
}
-void Collator::setOrderLowerFirst(bool lowerFirst)
-{
- m_lowerFirst = lowerFirst;
-}
+#endif
-Collator::Result Collator::collate(const UChar* lhs, size_t lhsLength, const UChar* rhs, size_t rhsLength) const
+static inline bool localesMatch(const char* a, const char* b)
{
- if (!m_collator)
- createCollator();
-
- return static_cast<Result>(ucol_strcoll(m_collator, lhs, lhsLength, rhs, rhsLength));
+ // Two null locales are equal, other locales are compared with strcmp.
+ return a == b || (a && b && !strcmp(a, b));
}
-void Collator::createCollator() const
+Collator::Collator(const char* locale, bool shouldSortLowercaseFirst)
{
- ASSERT(!m_collator);
UErrorCode status = U_ZERO_ERROR;
{
- std::lock_guard<std::mutex> lock(cachedCollatorMutex());
- if (cachedCollator) {
- const char* cachedCollatorLocale = ucol_getLocaleByType(cachedCollator, ULOC_REQUESTED_LOCALE, &status);
- ASSERT(U_SUCCESS(status));
- ASSERT(cachedCollatorLocale);
-
- UColAttributeValue cachedCollatorLowerFirst = ucol_getAttribute(cachedCollator, UCOL_CASE_FIRST, &status);
- ASSERT(U_SUCCESS(status));
-
- // FIXME: default locale is never matched, because ucol_getLocaleByType returns the actual one used, not 0.
- if (m_locale && 0 == strcmp(cachedCollatorLocale, m_locale)
- && ((UCOL_LOWER_FIRST == cachedCollatorLowerFirst && m_lowerFirst) || (UCOL_UPPER_FIRST == cachedCollatorLowerFirst && !m_lowerFirst))) {
- m_collator = cachedCollator;
- cachedCollator = nullptr;
- return;
- }
+ std::lock_guard<StaticLock> lock(cachedCollatorMutex);
+ if (cachedCollator && localesMatch(cachedCollatorLocale, locale) && cachedCollatorShouldSortLowercaseFirst == shouldSortLowercaseFirst) {
+ m_collator = cachedCollator;
+ m_locale = cachedCollatorLocale;
+ m_shouldSortLowercaseFirst = shouldSortLowercaseFirst;
+ cachedCollator = nullptr;
+ cachedCollatorLocale = nullptr;
+ return;
}
}
- m_collator = ucol_open(m_locale, &status);
+ m_collator = ucol_open(resolveDefaultLocale(locale), &status);
if (U_FAILURE(status)) {
status = U_ZERO_ERROR;
- m_collator = ucol_open("", &status); // Fallback to Unicode Collation Algorithm.
+ m_collator = ucol_open("", &status); // Fall back to Unicode Collation Algorithm.
}
ASSERT(U_SUCCESS(status));
- ucol_setAttribute(m_collator, UCOL_CASE_FIRST, m_lowerFirst ? UCOL_LOWER_FIRST : UCOL_UPPER_FIRST, &status);
+ ucol_setAttribute(m_collator, UCOL_CASE_FIRST, shouldSortLowercaseFirst ? UCOL_LOWER_FIRST : UCOL_UPPER_FIRST, &status);
ASSERT(U_SUCCESS(status));
ucol_setAttribute(m_collator, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
ASSERT(U_SUCCESS(status));
+
+ m_locale = locale ? fastStrDup(locale) : nullptr;
+ m_shouldSortLowercaseFirst = shouldSortLowercaseFirst;
}
-void Collator::releaseCollator()
+Collator::~Collator()
{
- {
- std::lock_guard<std::mutex> lock(cachedCollatorMutex());
- if (cachedCollator)
- ucol_close(cachedCollator);
- cachedCollator = m_collator;
- m_collator = nullptr;
+ std::lock_guard<StaticLock> lock(cachedCollatorMutex);
+ if (cachedCollator) {
+ ucol_close(cachedCollator);
+ fastFree(cachedCollatorLocale);
+ }
+ cachedCollator = m_collator;
+ cachedCollatorLocale = m_locale;
+ cachedCollatorShouldSortLowercaseFirst = m_shouldSortLowercaseFirst;
+}
+
+static int32_t getIndexLatin1(UCharIterator* iterator, UCharIteratorOrigin origin)
+{
+ switch (origin) {
+ case UITER_START:
+ return iterator->start;
+ case UITER_CURRENT:
+ return iterator->index;
+ case UITER_LIMIT:
+ return iterator->limit;
+ case UITER_ZERO:
+ return 0;
+ case UITER_LENGTH:
+ return iterator->length;
}
+ ASSERT_NOT_REACHED();
+ return U_SENTINEL;
+}
+
+static int32_t moveLatin1(UCharIterator* iterator, int32_t delta, UCharIteratorOrigin origin)
+{
+ return iterator->index = getIndexLatin1(iterator, origin) + delta;
+}
+
+static UBool hasNextLatin1(UCharIterator* iterator)
+{
+ return iterator->index < iterator->limit;
+}
+
+static UBool hasPreviousLatin1(UCharIterator* iterator)
+{
+ return iterator->index > iterator->start;
+}
+
+static UChar32 currentLatin1(UCharIterator* iterator)
+{
+ ASSERT(iterator->index >= iterator->start);
+ if (iterator->index >= iterator->limit)
+ return U_SENTINEL;
+ return static_cast<const LChar*>(iterator->context)[iterator->index];
+}
+
+static UChar32 nextLatin1(UCharIterator* iterator)
+{
+ ASSERT(iterator->index >= iterator->start);
+ if (iterator->index >= iterator->limit)
+ return U_SENTINEL;
+ return static_cast<const LChar*>(iterator->context)[iterator->index++];
+}
+
+static UChar32 previousLatin1(UCharIterator* iterator)
+{
+ if (iterator->index <= iterator->start)
+ return U_SENTINEL;
+ return static_cast<const LChar*>(iterator->context)[--iterator->index];
+}
+
+static uint32_t getStateLatin1(const UCharIterator* iterator)
+{
+ return iterator->index;
+}
+
+static void setStateLatin1(UCharIterator* iterator, uint32_t state, UErrorCode*)
+{
+ iterator->index = state;
+}
+
+static UCharIterator createLatin1Iterator(const LChar* characters, int length)
+{
+ UCharIterator iterator;
+ iterator.context = characters;
+ iterator.length = length;
+ iterator.start = 0;
+ iterator.index = 0;
+ iterator.limit = length;
+ iterator.reservedField = 0;
+ iterator.getIndex = getIndexLatin1;
+ iterator.move = moveLatin1;
+ iterator.hasNext = hasNextLatin1;
+ iterator.hasPrevious = hasPreviousLatin1;
+ iterator.current = currentLatin1;
+ iterator.next = nextLatin1;
+ iterator.previous = previousLatin1;
+ iterator.reservedFn = nullptr;
+ iterator.getState = getStateLatin1;
+ iterator.setState = setStateLatin1;
+ return iterator;
+}
+
+UCharIterator createIterator(StringView string)
+{
+ if (string.is8Bit())
+ return createLatin1Iterator(string.characters8(), string.length());
+ UCharIterator iterator;
+ uiter_setString(&iterator, string.characters16(), string.length());
+ return iterator;
+}
+
+int Collator::collate(StringView a, StringView b) const
+{
+ UCharIterator iteratorA = createIterator(a);
+ UCharIterator iteratorB = createIterator(b);
+ UErrorCode status = U_ZERO_ERROR;
+ int result = ucol_strcollIter(m_collator, &iteratorA, &iteratorB, &status);
+ ASSERT(U_SUCCESS(status));
+ return result;
+}
+
+static UCharIterator createIteratorUTF8(const char* string)
+{
+ UCharIterator iterator;
+ uiter_setUTF8(&iterator, string, strlen(string));
+ return iterator;
+}
+
+int Collator::collateUTF8(const char* a, const char* b) const
+{
+ UCharIterator iteratorA = createIteratorUTF8(a);
+ UCharIterator iteratorB = createIteratorUTF8(b);
+ UErrorCode status = U_ZERO_ERROR;
+ int result = ucol_strcollIter(m_collator, &iteratorA, &iteratorB, &status);
+ ASSERT(U_SUCCESS(status));
+ return result;
}
} // namespace WTF
diff --git a/Source/WTF/wtf/unicode/icu/UnicodeIcu.h b/Source/WTF/wtf/unicode/icu/UnicodeIcu.h
deleted file mode 100644
index 0a386b59d..000000000
--- a/Source/WTF/wtf/unicode/icu/UnicodeIcu.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (C) 2006 George Staikos <staikos@kde.org>
- * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
- * Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public License
- * along with this library; see the file COPYING.LIB. If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- * Boston, MA 02110-1301, USA.
- *
- */
-
-#ifndef WTF_UNICODE_ICU_H
-#define WTF_UNICODE_ICU_H
-
-#include <stdlib.h>
-#include <unicode/uchar.h>
-#include <unicode/uscript.h>
-#include <unicode/ustring.h>
-#include <unicode/utf16.h>
-
-#endif // WTF_UNICODE_ICU_H