summaryrefslogtreecommitdiff
path: root/Source/WebCore/icu
diff options
context:
space:
mode:
Diffstat (limited to 'Source/WebCore/icu')
-rw-r--r--Source/WebCore/icu/README4
-rw-r--r--Source/WebCore/icu/unicode/bytestream.h257
-rw-r--r--Source/WebCore/icu/unicode/chariter.h722
-rw-r--r--Source/WebCore/icu/unicode/localpointer.h304
-rw-r--r--Source/WebCore/icu/unicode/platform.h880
-rw-r--r--Source/WebCore/icu/unicode/ptypes.h126
-rw-r--r--Source/WebCore/icu/unicode/putil.h35
-rw-r--r--Source/WebCore/icu/unicode/rep.h261
-rw-r--r--Source/WebCore/icu/unicode/std_string.h34
-rw-r--r--Source/WebCore/icu/unicode/strenum.h276
-rw-r--r--Source/WebCore/icu/unicode/stringpiece.h224
-rw-r--r--Source/WebCore/icu/unicode/ubrk.h70
-rw-r--r--Source/WebCore/icu/unicode/uchar.h329
-rw-r--r--Source/WebCore/icu/unicode/ucnv.h103
-rw-r--r--Source/WebCore/icu/unicode/ucol.h358
-rw-r--r--Source/WebCore/icu/unicode/ucoleitr.h336
-rw-r--r--Source/WebCore/icu/unicode/uconfig.h191
-rw-r--r--Source/WebCore/icu/unicode/ucsdet.h413
-rw-r--r--Source/WebCore/icu/unicode/ucurr.h360
-rw-r--r--Source/WebCore/icu/unicode/uenum.h40
-rw-r--r--Source/WebCore/icu/unicode/uidna.h145
-rw-r--r--Source/WebCore/icu/unicode/uiter.h6
-rw-r--r--Source/WebCore/icu/unicode/uloc.h43
-rw-r--r--Source/WebCore/icu/unicode/umachine.h136
-rw-r--r--Source/WebCore/icu/unicode/unistr.h4470
-rw-r--r--Source/WebCore/icu/unicode/unorm2.h528
-rw-r--r--Source/WebCore/icu/unicode/uobject.h320
-rw-r--r--Source/WebCore/icu/unicode/urename.h686
-rw-r--r--Source/WebCore/icu/unicode/uscript.h343
-rw-r--r--Source/WebCore/icu/unicode/usearch.h836
-rw-r--r--Source/WebCore/icu/unicode/uset.h22
-rw-r--r--Source/WebCore/icu/unicode/ushape.h10
-rw-r--r--Source/WebCore/icu/unicode/ustring.h43
-rw-r--r--Source/WebCore/icu/unicode/utext.h1600
-rw-r--r--Source/WebCore/icu/unicode/utf.h55
-rw-r--r--Source/WebCore/icu/unicode/utf16.h39
-rw-r--r--Source/WebCore/icu/unicode/utf8.h271
-rw-r--r--Source/WebCore/icu/unicode/utf_old.h1170
-rw-r--r--Source/WebCore/icu/unicode/utypes.h228
-rw-r--r--Source/WebCore/icu/unicode/uvernum.h167
-rw-r--r--Source/WebCore/icu/unicode/uversion.h28
41 files changed, 14870 insertions, 1599 deletions
diff --git a/Source/WebCore/icu/README b/Source/WebCore/icu/README
new file mode 100644
index 000000000..389e2e801
--- /dev/null
+++ b/Source/WebCore/icu/README
@@ -0,0 +1,4 @@
+The headers in this directory are for compiling on Mac OS X 10.4.
+The Mac OS X 10.4 release includes the ICU binary, but not ICU headers.
+For other platforms, installed ICU headers should be used rather than these.
+They are specific to Mac OS X 10.4.
diff --git a/Source/WebCore/icu/unicode/bytestream.h b/Source/WebCore/icu/unicode/bytestream.h
new file mode 100644
index 000000000..174aa38af
--- /dev/null
+++ b/Source/WebCore/icu/unicode/bytestream.h
@@ -0,0 +1,257 @@
+// Copyright (C) 2009-2012, International Business Machines
+// Corporation and others. All Rights Reserved.
+//
+// Copyright 2007 Google Inc. All Rights Reserved.
+// Author: sanjay@google.com (Sanjay Ghemawat)
+//
+// Abstract interface that consumes a sequence of bytes (ByteSink).
+//
+// Used so that we can write a single piece of code that can operate
+// on a variety of output string types.
+//
+// Various implementations of this interface are provided:
+// ByteSink:
+// CheckedArrayByteSink Write to a flat array, with bounds checking
+// StringByteSink Write to an STL string
+
+// This code is a contribution of Google code, and the style used here is
+// a compromise between the original Google code and the ICU coding guidelines.
+// For example, data types are ICU-ified (size_t,int->int32_t),
+// and API comments doxygen-ified, but function names and behavior are
+// as in the original, if possible.
+// Assertion-style error handling, not available in ICU, was changed to
+// parameter "pinning" similar to UnicodeString.
+//
+// In addition, this is only a partial port of the original Google code,
+// limited to what was needed so far. The (nearly) complete original code
+// is in the ICU svn repository at icuhtml/trunk/design/strings/contrib
+// (see ICU ticket 6765, r25517).
+
+#ifndef __BYTESTREAM_H__
+#define __BYTESTREAM_H__
+
+/**
+ * \file
+ * \brief C++ API: Interface for writing bytes, and implementation classes.
+ */
+
+#include "unicode/utypes.h"
+#include "unicode/uobject.h"
+#include "unicode/std_string.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+ * A ByteSink can be filled with bytes.
+ * @stable ICU 4.2
+ */
+class U_COMMON_API ByteSink : public UMemory {
+public:
+ /**
+ * Default constructor.
+ * @stable ICU 4.2
+ */
+ ByteSink() { }
+ /**
+ * Virtual destructor.
+ * @stable ICU 4.2
+ */
+ virtual ~ByteSink();
+
+ /**
+ * Append "bytes[0,n-1]" to this.
+ * @param bytes the pointer to the bytes
+ * @param n the number of bytes; must be non-negative
+ * @stable ICU 4.2
+ */
+ virtual void Append(const char* bytes, int32_t n) = 0;
+
+ /**
+ * Returns a writable buffer for appending and writes the buffer's capacity to
+ * *result_capacity. Guarantees *result_capacity>=min_capacity.
+ * May return a pointer to the caller-owned scratch buffer which must have
+ * scratch_capacity>=min_capacity.
+ * The returned buffer is only valid until the next operation
+ * on this ByteSink.
+ *
+ * After writing at most *result_capacity bytes, call Append() with the
+ * pointer returned from this function and the number of bytes written.
+ * Many Append() implementations will avoid copying bytes if this function
+ * returned an internal buffer.
+ *
+ * Partial usage example:
+ * int32_t capacity;
+ * char* buffer = sink->GetAppendBuffer(..., &capacity);
+ * ... Write n bytes into buffer, with n <= capacity.
+ * sink->Append(buffer, n);
+ * In many implementations, that call to Append will avoid copying bytes.
+ *
+ * If the ByteSink allocates or reallocates an internal buffer, it should use
+ * the desired_capacity_hint if appropriate.
+ * If a caller cannot provide a reasonable guess at the desired capacity,
+ * it should pass desired_capacity_hint=0.
+ *
+ * If a non-scratch buffer is returned, the caller may only pass
+ * a prefix to it to Append().
+ * That is, it is not correct to pass an interior pointer to Append().
+ *
+ * The default implementation always returns the scratch buffer.
+ *
+ * @param min_capacity required minimum capacity of the returned buffer;
+ * must be non-negative
+ * @param desired_capacity_hint desired capacity of the returned buffer;
+ * must be non-negative
+ * @param scratch default caller-owned buffer
+ * @param scratch_capacity capacity of the scratch buffer
+ * @param result_capacity pointer to an integer which will be set to the
+ * capacity of the returned buffer
+ * @return a buffer with *result_capacity>=min_capacity
+ * @stable ICU 4.2
+ */
+ virtual char* GetAppendBuffer(int32_t min_capacity,
+ int32_t desired_capacity_hint,
+ char* scratch, int32_t scratch_capacity,
+ int32_t* result_capacity);
+
+ /**
+ * Flush internal buffers.
+ * Some byte sinks use internal buffers or provide buffering
+ * and require calling Flush() at the end of the stream.
+ * The ByteSink should be ready for further Append() calls after Flush().
+ * The default implementation of Flush() does nothing.
+ * @stable ICU 4.2
+ */
+ virtual void Flush();
+
+private:
+ ByteSink(const ByteSink &); // copy constructor not implemented
+ ByteSink &operator=(const ByteSink &); // assignment operator not implemented
+};
+
+// -------------------------------------------------------------
+// Some standard implementations
+
+/**
+ * Implementation of ByteSink that writes to a flat byte array,
+ * with bounds-checking:
+ * This sink will not write more than capacity bytes to outbuf.
+ * If more than capacity bytes are Append()ed, then excess bytes are ignored,
+ * and Overflowed() will return true.
+ * Overflow does not cause a runtime error.
+ * @stable ICU 4.2
+ */
+class U_COMMON_API CheckedArrayByteSink : public ByteSink {
+public:
+ /**
+ * Constructs a ByteSink that will write to outbuf[0..capacity-1].
+ * @param outbuf buffer to write to
+ * @param capacity size of the buffer
+ * @stable ICU 4.2
+ */
+ CheckedArrayByteSink(char* outbuf, int32_t capacity);
+ /**
+ * Destructor.
+ * @stable ICU 4.2
+ */
+ virtual ~CheckedArrayByteSink();
+ /**
+ * Returns the sink to its original state, without modifying the buffer.
+ * Useful for reusing both the buffer and the sink for multiple streams.
+ * Resets the state to NumberOfBytesWritten()=NumberOfBytesAppended()=0
+ * and Overflowed()=FALSE.
+ * @return *this
+ * @stable ICU 4.6
+ */
+ virtual CheckedArrayByteSink& Reset();
+ /**
+ * Append "bytes[0,n-1]" to this.
+ * @param bytes the pointer to the bytes
+ * @param n the number of bytes; must be non-negative
+ * @stable ICU 4.2
+ */
+ virtual void Append(const char* bytes, int32_t n);
+ /**
+ * Returns a writable buffer for appending and writes the buffer's capacity to
+ * *result_capacity. For details see the base class documentation.
+ * @param min_capacity required minimum capacity of the returned buffer;
+ * must be non-negative
+ * @param desired_capacity_hint desired capacity of the returned buffer;
+ * must be non-negative
+ * @param scratch default caller-owned buffer
+ * @param scratch_capacity capacity of the scratch buffer
+ * @param result_capacity pointer to an integer which will be set to the
+ * capacity of the returned buffer
+ * @return a buffer with *result_capacity>=min_capacity
+ * @stable ICU 4.2
+ */
+ virtual char* GetAppendBuffer(int32_t min_capacity,
+ int32_t desired_capacity_hint,
+ char* scratch, int32_t scratch_capacity,
+ int32_t* result_capacity);
+ /**
+ * Returns the number of bytes actually written to the sink.
+ * @return number of bytes written to the buffer
+ * @stable ICU 4.2
+ */
+ int32_t NumberOfBytesWritten() const { return size_; }
+ /**
+ * Returns true if any bytes were discarded, i.e., if there was an
+ * attempt to write more than 'capacity' bytes.
+ * @return TRUE if more than 'capacity' bytes were Append()ed
+ * @stable ICU 4.2
+ */
+ UBool Overflowed() const { return overflowed_; }
+ /**
+ * Returns the number of bytes appended to the sink.
+ * If Overflowed() then NumberOfBytesAppended()>NumberOfBytesWritten()
+ * else they return the same number.
+ * @return number of bytes written to the buffer
+ * @stable ICU 4.6
+ */
+ int32_t NumberOfBytesAppended() const { return appended_; }
+private:
+ char* outbuf_;
+ const int32_t capacity_;
+ int32_t size_;
+ int32_t appended_;
+ UBool overflowed_;
+ CheckedArrayByteSink(); ///< default constructor not implemented
+ CheckedArrayByteSink(const CheckedArrayByteSink &); ///< copy constructor not implemented
+ CheckedArrayByteSink &operator=(const CheckedArrayByteSink &); ///< assignment operator not implemented
+};
+
+#if U_HAVE_STD_STRING
+
+/**
+ * Implementation of ByteSink that writes to a "string".
+ * The StringClass is usually instantiated with a std::string.
+ * @stable ICU 4.2
+ */
+template<typename StringClass>
+class StringByteSink : public ByteSink {
+ public:
+ /**
+ * Constructs a ByteSink that will append bytes to the dest string.
+ * @param dest pointer to string object to append to
+ * @stable ICU 4.2
+ */
+ StringByteSink(StringClass* dest) : dest_(dest) { }
+ /**
+ * Append "bytes[0,n-1]" to this.
+ * @param data the pointer to the bytes
+ * @param n the number of bytes; must be non-negative
+ * @stable ICU 4.2
+ */
+ virtual void Append(const char* data, int32_t n) { dest_->append(data, n); }
+ private:
+ StringClass* dest_;
+ StringByteSink(); ///< default constructor not implemented
+ StringByteSink(const StringByteSink &); ///< copy constructor not implemented
+ StringByteSink &operator=(const StringByteSink &); ///< assignment operator not implemented
+};
+
+#endif
+
+U_NAMESPACE_END
+
+#endif // __BYTESTREAM_H__
diff --git a/Source/WebCore/icu/unicode/chariter.h b/Source/WebCore/icu/unicode/chariter.h
new file mode 100644
index 000000000..e8d65090a
--- /dev/null
+++ b/Source/WebCore/icu/unicode/chariter.h
@@ -0,0 +1,722 @@
+/*
+********************************************************************
+*
+* Copyright (C) 1997-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+********************************************************************
+*/
+
+#ifndef CHARITER_H
+#define CHARITER_H
+
+#include "unicode/utypes.h"
+#include "unicode/uobject.h"
+#include "unicode/unistr.h"
+/**
+ * \file
+ * \brief C++ API: Character Iterator
+ */
+
+U_NAMESPACE_BEGIN
+/**
+ * Abstract class that defines an API for forward-only iteration
+ * on text objects.
+ * This is a minimal interface for iteration without random access
+ * or backwards iteration. It is especially useful for wrapping
+ * streams with converters into an object for collation or
+ * normalization.
+ *
+ * <p>Characters can be accessed in two ways: as code units or as
+ * code points.
+ * Unicode code points are 21-bit integers and are the scalar values
+ * of Unicode characters. ICU uses the type UChar32 for them.
+ * Unicode code units are the storage units of a given
+ * Unicode/UCS Transformation Format (a character encoding scheme).
+ * With UTF-16, all code points can be represented with either one
+ * or two code units ("surrogates").
+ * String storage is typically based on code units, while properties
+ * of characters are typically determined using code point values.
+ * Some processes may be designed to work with sequences of code units,
+ * or it may be known that all characters that are important to an
+ * algorithm can be represented with single code units.
+ * Other processes will need to use the code point access functions.</p>
+ *
+ * <p>ForwardCharacterIterator provides nextPostInc() to access
+ * a code unit and advance an internal position into the text object,
+ * similar to a <code>return text[position++]</code>.<br>
+ * It provides next32PostInc() to access a code point and advance an internal
+ * position.</p>
+ *
+ * <p>next32PostInc() assumes that the current position is that of
+ * the beginning of a code point, i.e., of its first code unit.
+ * After next32PostInc(), this will be true again.
+ * In general, access to code units and code points in the same
+ * iteration loop should not be mixed. In UTF-16, if the current position
+ * is on a second code unit (Low Surrogate), then only that code unit
+ * is returned even by next32PostInc().</p>
+ *
+ * <p>For iteration with either function, there are two ways to
+ * check for the end of the iteration. When there are no more
+ * characters in the text object:
+ * <ul>
+ * <li>The hasNext() function returns FALSE.</li>
+ * <li>nextPostInc() and next32PostInc() return DONE
+ * when one attempts to read beyond the end of the text object.</li>
+ * </ul>
+ *
+ * Example:
+ * \code
+ * void function1(ForwardCharacterIterator &it) {
+ * UChar32 c;
+ * while(it.hasNext()) {
+ * c=it.next32PostInc();
+ * // use c
+ * }
+ * }
+ *
+ * void function1(ForwardCharacterIterator &it) {
+ * UChar c;
+ * while((c=it.nextPostInc())!=ForwardCharacterIterator::DONE) {
+ * // use c
+ * }
+ * }
+ * \endcode
+ * </p>
+ *
+ * @stable ICU 2.0
+ */
+class U_COMMON_API ForwardCharacterIterator : public UObject {
+public:
+ /**
+ * Value returned by most of ForwardCharacterIterator's functions
+ * when the iterator has reached the limits of its iteration.
+ * @stable ICU 2.0
+ */
+ enum { DONE = 0xffff };
+
+ /**
+ * Destructor.
+ * @stable ICU 2.0
+ */
+ virtual ~ForwardCharacterIterator();
+
+ /**
+ * Returns true when both iterators refer to the same
+ * character in the same character-storage object.
+ * @param that The ForwardCharacterIterator to be compared for equality
+ * @return true when both iterators refer to the same
+ * character in the same character-storage object
+ * @stable ICU 2.0
+ */
+ virtual UBool operator==(const ForwardCharacterIterator& that) const = 0;
+
+ /**
+ * Returns true when the iterators refer to different
+ * text-storage objects, or to different characters in the
+ * same text-storage object.
+ * @param that The ForwardCharacterIterator to be compared for inequality
+ * @return true when the iterators refer to different
+ * text-storage objects, or to different characters in the
+ * same text-storage object
+ * @stable ICU 2.0
+ */
+ inline UBool operator!=(const ForwardCharacterIterator& that) const;
+
+ /**
+ * Generates a hash code for this iterator.
+ * @return the hash code.
+ * @stable ICU 2.0
+ */
+ virtual int32_t hashCode(void) const = 0;
+
+ /**
+ * Returns a UClassID for this ForwardCharacterIterator ("poor man's
+ * RTTI").<P> Despite the fact that this function is public,
+ * DO NOT CONSIDER IT PART OF CHARACTERITERATOR'S API!
+ * @return a UClassID for this ForwardCharacterIterator
+ * @stable ICU 2.0
+ */
+ virtual UClassID getDynamicClassID(void) const = 0;
+
+ /**
+ * Gets the current code unit for returning and advances to the next code unit
+ * in the iteration range
+ * (toward endIndex()). If there are
+ * no more code units to return, returns DONE.
+ * @return the current code unit.
+ * @stable ICU 2.0
+ */
+ virtual UChar nextPostInc(void) = 0;
+
+ /**
+ * Gets the current code point for returning and advances to the next code point
+ * in the iteration range
+ * (toward endIndex()). If there are
+ * no more code points to return, returns DONE.
+ * @return the current code point.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 next32PostInc(void) = 0;
+
+ /**
+ * Returns FALSE if there are no more code units or code points
+ * at or after the current position in the iteration range.
+ * This is used with nextPostInc() or next32PostInc() in forward
+ * iteration.
+ * @returns FALSE if there are no more code units or code points
+ * at or after the current position in the iteration range.
+ * @stable ICU 2.0
+ */
+ virtual UBool hasNext() = 0;
+
+protected:
+ /** Default constructor to be overridden in the implementing class. @stable ICU 2.0*/
+ ForwardCharacterIterator();
+
+ /** Copy constructor to be overridden in the implementing class. @stable ICU 2.0*/
+ ForwardCharacterIterator(const ForwardCharacterIterator &other);
+
+ /**
+ * Assignment operator to be overridden in the implementing class.
+ * @stable ICU 2.0
+ */
+ ForwardCharacterIterator &operator=(const ForwardCharacterIterator&) { return *this; }
+};
+
+/**
+ * Abstract class that defines an API for iteration
+ * on text objects.
+ * This is an interface for forward and backward iteration
+ * and random access into a text object.
+ *
+ * <p>The API provides backward compatibility to the Java and older ICU
+ * CharacterIterator classes but extends them significantly:
+ * <ol>
+ * <li>CharacterIterator is now a subclass of ForwardCharacterIterator.</li>
+ * <li>While the old API functions provided forward iteration with
+ * "pre-increment" semantics, the new one also provides functions
+ * with "post-increment" semantics. They are more efficient and should
+ * be the preferred iterator functions for new implementations.
+ * The backward iteration always had "pre-decrement" semantics, which
+ * are efficient.</li>
+ * <li>Just like ForwardCharacterIterator, it provides access to
+ * both code units and code points. Code point access versions are available
+ * for the old and the new iteration semantics.</li>
+ * <li>There are new functions for setting and moving the current position
+ * without returning a character, for efficiency.</li>
+ * </ol>
+ *
+ * See ForwardCharacterIterator for examples for using the new forward iteration
+ * functions. For backward iteration, there is also a hasPrevious() function
+ * that can be used analogously to hasNext().
+ * The old functions work as before and are shown below.</p>
+ *
+ * <p>Examples for some of the new functions:</p>
+ *
+ * Forward iteration with hasNext():
+ * \code
+ * void forward1(CharacterIterator &it) {
+ * UChar32 c;
+ * for(it.setToStart(); it.hasNext();) {
+ * c=it.next32PostInc();
+ * // use c
+ * }
+ * }
+ * \endcode
+ * Forward iteration more similar to loops with the old forward iteration,
+ * showing a way to convert simple for() loops:
+ * \code
+ * void forward2(CharacterIterator &it) {
+ * UChar c;
+ * for(c=it.firstPostInc(); c!=CharacterIterator::DONE; c=it.nextPostInc()) {
+ * // use c
+ * }
+ * }
+ * \endcode
+ * Backward iteration with setToEnd() and hasPrevious():
+ * \code
+ * void backward1(CharacterIterator &it) {
+ * UChar32 c;
+ * for(it.setToEnd(); it.hasPrevious();) {
+ * c=it.previous32();
+ * // use c
+ * }
+ * }
+ * \endcode
+ * Backward iteration with a more traditional for() loop:
+ * \code
+ * void backward2(CharacterIterator &it) {
+ * UChar c;
+ * for(c=it.last(); c!=CharacterIterator::DONE; c=it.previous()) {
+ * // use c
+ * }
+ * }
+ * \endcode
+ *
+ * Example for random access:
+ * \code
+ * void random(CharacterIterator &it) {
+ * // set to the third code point from the beginning
+ * it.move32(3, CharacterIterator::kStart);
+ * // get a code point from here without moving the position
+ * UChar32 c=it.current32();
+ * // get the position
+ * int32_t pos=it.getIndex();
+ * // get the previous code unit
+ * UChar u=it.previous();
+ * // move back one more code unit
+ * it.move(-1, CharacterIterator::kCurrent);
+ * // set the position back to where it was
+ * // and read the same code point c and move beyond it
+ * it.setIndex(pos);
+ * if(c!=it.next32PostInc()) {
+ * exit(1); // CharacterIterator inconsistent
+ * }
+ * }
+ * \endcode
+ *
+ * <p>Examples, especially for the old API:</p>
+ *
+ * Function processing characters, in this example simple output
+ * <pre>
+ * \code
+ * void processChar( UChar c )
+ * {
+ * cout << " " << c;
+ * }
+ * \endcode
+ * </pre>
+ * Traverse the text from start to finish
+ * <pre>
+ * \code
+ * void traverseForward(CharacterIterator& iter)
+ * {
+ * for(UChar c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) {
+ * processChar(c);
+ * }
+ * }
+ * \endcode
+ * </pre>
+ * Traverse the text backwards, from end to start
+ * <pre>
+ * \code
+ * void traverseBackward(CharacterIterator& iter)
+ * {
+ * for(UChar c = iter.last(); c != CharacterIterator.DONE; c = iter.previous()) {
+ * processChar(c);
+ * }
+ * }
+ * \endcode
+ * </pre>
+ * Traverse both forward and backward from a given position in the text.
+ * Calls to notBoundary() in this example represents some additional stopping criteria.
+ * <pre>
+ * \code
+ * void traverseOut(CharacterIterator& iter, int32_t pos)
+ * {
+ * UChar c;
+ * for (c = iter.setIndex(pos);
+ * c != CharacterIterator.DONE && (Unicode::isLetter(c) || Unicode::isDigit(c));
+ * c = iter.next()) {}
+ * int32_t end = iter.getIndex();
+ * for (c = iter.setIndex(pos);
+ * c != CharacterIterator.DONE && (Unicode::isLetter(c) || Unicode::isDigit(c));
+ * c = iter.previous()) {}
+ * int32_t start = iter.getIndex() + 1;
+ *
+ * cout << "start: " << start << " end: " << end << endl;
+ * for (c = iter.setIndex(start); iter.getIndex() < end; c = iter.next() ) {
+ * processChar(c);
+ * }
+ * }
+ * \endcode
+ * </pre>
+ * Creating a StringCharacterIterator and calling the test functions
+ * <pre>
+ * \code
+ * void CharacterIterator_Example( void )
+ * {
+ * cout << endl << "===== CharacterIterator_Example: =====" << endl;
+ * UnicodeString text("Ein kleiner Satz.");
+ * StringCharacterIterator iterator(text);
+ * cout << "----- traverseForward: -----------" << endl;
+ * traverseForward( iterator );
+ * cout << endl << endl << "----- traverseBackward: ----------" << endl;
+ * traverseBackward( iterator );
+ * cout << endl << endl << "----- traverseOut: ---------------" << endl;
+ * traverseOut( iterator, 7 );
+ * cout << endl << endl << "-----" << endl;
+ * }
+ * \endcode
+ * </pre>
+ *
+ * @stable ICU 2.0
+ */
+class U_COMMON_API CharacterIterator : public ForwardCharacterIterator {
+public:
+ /**
+ * Origin enumeration for the move() and move32() functions.
+ * @stable ICU 2.0
+ */
+ enum EOrigin { kStart, kCurrent, kEnd };
+
+ /**
+ * Destructor.
+ * @stable ICU 2.0
+ */
+ virtual ~CharacterIterator();
+
+ /**
+ * Returns a pointer to a new CharacterIterator of the same
+ * concrete class as this one, and referring to the same
+ * character in the same text-storage object as this one. The
+ * caller is responsible for deleting the new clone.
+ * @return a pointer to a new CharacterIterator
+ * @stable ICU 2.0
+ */
+ virtual CharacterIterator* clone(void) const = 0;
+
+ /**
+ * Sets the iterator to refer to the first code unit in its
+ * iteration range, and returns that code unit.
+ * This can be used to begin an iteration with next().
+ * @return the first code unit in its iteration range.
+ * @stable ICU 2.0
+ */
+ virtual UChar first(void) = 0;
+
+ /**
+ * Sets the iterator to refer to the first code unit in its
+ * iteration range, returns that code unit, and moves the position
+ * to the second code unit. This is an alternative to setToStart()
+ * for forward iteration with nextPostInc().
+ * @return the first code unit in its iteration range.
+ * @stable ICU 2.0
+ */
+ virtual UChar firstPostInc(void);
+
+ /**
+ * Sets the iterator to refer to the first code point in its
+ * iteration range, and returns that code unit,
+ * This can be used to begin an iteration with next32().
+ * Note that an iteration with next32PostInc(), beginning with,
+ * e.g., setToStart() or firstPostInc(), is more efficient.
+ * @return the first code point in its iteration range.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 first32(void) = 0;
+
+ /**
+ * Sets the iterator to refer to the first code point in its
+ * iteration range, returns that code point, and moves the position
+ * to the second code point. This is an alternative to setToStart()
+ * for forward iteration with next32PostInc().
+ * @return the first code point in its iteration range.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 first32PostInc(void);
+
+ /**
+ * Sets the iterator to refer to the first code unit or code point in its
+ * iteration range. This can be used to begin a forward
+ * iteration with nextPostInc() or next32PostInc().
+ * @return the start position of the iteration range
+ * @stable ICU 2.0
+ */
+ inline int32_t setToStart();
+
+ /**
+ * Sets the iterator to refer to the last code unit in its
+ * iteration range, and returns that code unit.
+ * This can be used to begin an iteration with previous().
+ * @return the last code unit.
+ * @stable ICU 2.0
+ */
+ virtual UChar last(void) = 0;
+
+ /**
+ * Sets the iterator to refer to the last code point in its
+ * iteration range, and returns that code unit.
+ * This can be used to begin an iteration with previous32().
+ * @return the last code point.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 last32(void) = 0;
+
+ /**
+ * Sets the iterator to the end of its iteration range, just behind
+ * the last code unit or code point. This can be used to begin a backward
+ * iteration with previous() or previous32().
+ * @return the end position of the iteration range
+ * @stable ICU 2.0
+ */
+ inline int32_t setToEnd();
+
+ /**
+ * Sets the iterator to refer to the "position"-th code unit
+ * in the text-storage object the iterator refers to, and
+ * returns that code unit.
+ * @param position the "position"-th code unit in the text-storage object
+ * @return the "position"-th code unit.
+ * @stable ICU 2.0
+ */
+ virtual UChar setIndex(int32_t position) = 0;
+
+ /**
+ * Sets the iterator to refer to the beginning of the code point
+ * that contains the "position"-th code unit
+ * in the text-storage object the iterator refers to, and
+ * returns that code point.
+ * The current position is adjusted to the beginning of the code point
+ * (its first code unit).
+ * @param position the "position"-th code unit in the text-storage object
+ * @return the "position"-th code point.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 setIndex32(int32_t position) = 0;
+
+ /**
+ * Returns the code unit the iterator currently refers to.
+ * @return the current code unit.
+ * @stable ICU 2.0
+ */
+ virtual UChar current(void) const = 0;
+
+ /**
+ * Returns the code point the iterator currently refers to.
+ * @return the current code point.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 current32(void) const = 0;
+
+ /**
+ * Advances to the next code unit in the iteration range
+ * (toward endIndex()), and returns that code unit. If there are
+ * no more code units to return, returns DONE.
+ * @return the next code unit.
+ * @stable ICU 2.0
+ */
+ virtual UChar next(void) = 0;
+
+ /**
+ * Advances to the next code point in the iteration range
+ * (toward endIndex()), and returns that code point. If there are
+ * no more code points to return, returns DONE.
+ * Note that iteration with "pre-increment" semantics is less
+ * efficient than iteration with "post-increment" semantics
+ * that is provided by next32PostInc().
+ * @return the next code point.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 next32(void) = 0;
+
+ /**
+ * Advances to the previous code unit in the iteration range
+ * (toward startIndex()), and returns that code unit. If there are
+ * no more code units to return, returns DONE.
+ * @return the previous code unit.
+ * @stable ICU 2.0
+ */
+ virtual UChar previous(void) = 0;
+
+ /**
+ * Advances to the previous code point in the iteration range
+ * (toward startIndex()), and returns that code point. If there are
+ * no more code points to return, returns DONE.
+ * @return the previous code point.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 previous32(void) = 0;
+
+ /**
+ * Returns FALSE if there are no more code units or code points
+ * before the current position in the iteration range.
+ * This is used with previous() or previous32() in backward
+ * iteration.
+ * @return FALSE if there are no more code units or code points
+ * before the current position in the iteration range, return TRUE otherwise.
+ * @stable ICU 2.0
+ */
+ virtual UBool hasPrevious() = 0;
+
+ /**
+ * Returns the numeric index in the underlying text-storage
+ * object of the character returned by first(). Since it's
+ * possible to create an iterator that iterates across only
+ * part of a text-storage object, this number isn't
+ * necessarily 0.
+ * @returns the numeric index in the underlying text-storage
+ * object of the character returned by first().
+ * @stable ICU 2.0
+ */
+ inline int32_t startIndex(void) const;
+
+ /**
+ * Returns the numeric index in the underlying text-storage
+ * object of the position immediately BEYOND the character
+ * returned by last().
+ * @return the numeric index in the underlying text-storage
+ * object of the position immediately BEYOND the character
+ * returned by last().
+ * @stable ICU 2.0
+ */
+ inline int32_t endIndex(void) const;
+
+ /**
+ * Returns the numeric index in the underlying text-storage
+ * object of the character the iterator currently refers to
+ * (i.e., the character returned by current()).
+ * @return the numberic index in the text-storage object of
+ * the character the iterator currently refers to
+ * @stable ICU 2.0
+ */
+ inline int32_t getIndex(void) const;
+
+ /**
+ * Returns the length of the entire text in the underlying
+ * text-storage object.
+ * @return the length of the entire text in the text-storage object
+ * @stable ICU 2.0
+ */
+ inline int32_t getLength() const;
+
+ /**
+ * Moves the current position relative to the start or end of the
+ * iteration range, or relative to the current position itself.
+ * The movement is expressed in numbers of code units forward
+ * or backward by specifying a positive or negative delta.
+ * @param delta the position relative to origin. A positive delta means forward;
+ * a negative delta means backward.
+ * @param origin Origin enumeration {kStart, kCurrent, kEnd}
+ * @return the new position
+ * @stable ICU 2.0
+ */
+ virtual int32_t move(int32_t delta, EOrigin origin) = 0;
+
+ /**
+ * Moves the current position relative to the start or end of the
+ * iteration range, or relative to the current position itself.
+ * The movement is expressed in numbers of code points forward
+ * or backward by specifying a positive or negative delta.
+ * @param delta the position relative to origin. A positive delta means forward;
+ * a negative delta means backward.
+ * @param origin Origin enumeration {kStart, kCurrent, kEnd}
+ * @return the new position
+ * @stable ICU 2.0
+ */
+ virtual int32_t move32(int32_t delta, EOrigin origin) = 0;
+
+ /**
+ * Copies the text under iteration into the UnicodeString
+ * referred to by "result".
+ * @param result Receives a copy of the text under iteration.
+ * @stable ICU 2.0
+ */
+ virtual void getText(UnicodeString& result) = 0;
+
+protected:
+ /**
+ * Empty constructor.
+ * @stable ICU 2.0
+ */
+ CharacterIterator();
+
+ /**
+ * Constructor, just setting the length field in this base class.
+ * @stable ICU 2.0
+ */
+ CharacterIterator(int32_t length);
+
+ /**
+ * Constructor, just setting the length and position fields in this base class.
+ * @stable ICU 2.0
+ */
+ CharacterIterator(int32_t length, int32_t position);
+
+ /**
+ * Constructor, just setting the length, start, end, and position fields in this base class.
+ * @stable ICU 2.0
+ */
+ CharacterIterator(int32_t length, int32_t textBegin, int32_t textEnd, int32_t position);
+
+ /**
+ * Copy constructor.
+ *
+ * @param that The CharacterIterator to be copied
+ * @stable ICU 2.0
+ */
+ CharacterIterator(const CharacterIterator &that);
+
+ /**
+ * Assignment operator. Sets this CharacterIterator to have the same behavior,
+ * as the one passed in.
+ * @param that The CharacterIterator passed in.
+ * @return the newly set CharacterIterator.
+ * @stable ICU 2.0
+ */
+ CharacterIterator &operator=(const CharacterIterator &that);
+
+ /**
+ * Base class text length field.
+ * Necessary this for correct getText() and hashCode().
+ * @stable ICU 2.0
+ */
+ int32_t textLength;
+
+ /**
+ * Base class field for the current position.
+ * @stable ICU 2.0
+ */
+ int32_t pos;
+
+ /**
+ * Base class field for the start of the iteration range.
+ * @stable ICU 2.0
+ */
+ int32_t begin;
+
+ /**
+ * Base class field for the end of the iteration range.
+ * @stable ICU 2.0
+ */
+ int32_t end;
+};
+
+inline UBool
+ForwardCharacterIterator::operator!=(const ForwardCharacterIterator& that) const {
+ return !operator==(that);
+}
+
+inline int32_t
+CharacterIterator::setToStart() {
+ return move(0, kStart);
+}
+
+inline int32_t
+CharacterIterator::setToEnd() {
+ return move(0, kEnd);
+}
+
+inline int32_t
+CharacterIterator::startIndex(void) const {
+ return begin;
+}
+
+inline int32_t
+CharacterIterator::endIndex(void) const {
+ return end;
+}
+
+inline int32_t
+CharacterIterator::getIndex(void) const {
+ return pos;
+}
+
+inline int32_t
+CharacterIterator::getLength(void) const {
+ return textLength;
+}
+
+U_NAMESPACE_END
+#endif
diff --git a/Source/WebCore/icu/unicode/localpointer.h b/Source/WebCore/icu/unicode/localpointer.h
new file mode 100644
index 000000000..e3ccb2581
--- /dev/null
+++ b/Source/WebCore/icu/unicode/localpointer.h
@@ -0,0 +1,304 @@
+/*
+*******************************************************************************
+*
+* Copyright (C) 2009-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: localpointer.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2009nov13
+* created by: Markus W. Scherer
+*/
+
+#ifndef __LOCALPOINTER_H__
+#define __LOCALPOINTER_H__
+
+/**
+ * \file
+ * \brief C++ API: "Smart pointers" for use with and in ICU4C C++ code.
+ *
+ * These classes are inspired by
+ * - std::auto_ptr
+ * - boost::scoped_ptr & boost::scoped_array
+ * - Taligent Safe Pointers (TOnlyPointerTo)
+ *
+ * but none of those provide for all of the goals for ICU smart pointers:
+ * - Smart pointer owns the object and releases it when it goes out of scope.
+ * - No transfer of ownership via copy/assignment to reduce misuse. Simpler & more robust.
+ * - ICU-compatible: No exceptions.
+ * - Need to be able to orphan/release the pointer and its ownership.
+ * - Need variants for normal C++ object pointers, C++ arrays, and ICU C service objects.
+ *
+ * For details see http://site.icu-project.org/design/cpp/scoped_ptr
+ */
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * "Smart pointer" base class; do not use directly: use LocalPointer etc.
+ *
+ * Base class for smart pointer classes that do not throw exceptions.
+ *
+ * Do not use this base class directly, since it does not delete its pointer.
+ * A subclass must implement methods that delete the pointer:
+ * Destructor and adoptInstead().
+ *
+ * There is no operator T *() provided because the programmer must decide
+ * whether to use getAlias() (without transfer of ownership) or orpan()
+ * (with transfer of ownership and NULLing of the pointer).
+ *
+ * @see LocalPointer
+ * @see LocalArray
+ * @see U_DEFINE_LOCAL_OPEN_POINTER
+ * @stable ICU 4.4
+ */
+template<typename T>
+class LocalPointerBase {
+public:
+ /**
+ * Constructor takes ownership.
+ * @param p simple pointer to an object that is adopted
+ * @stable ICU 4.4
+ */
+ explicit LocalPointerBase(T *p=NULL) : ptr(p) {}
+ /**
+ * Destructor deletes the object it owns.
+ * Subclass must override: Base class does nothing.
+ * @stable ICU 4.4
+ */
+ ~LocalPointerBase() { /* delete ptr; */ }
+ /**
+ * NULL check.
+ * @return TRUE if ==NULL
+ * @stable ICU 4.4
+ */
+ UBool isNull() const { return ptr==NULL; }
+ /**
+ * NULL check.
+ * @return TRUE if !=NULL
+ * @stable ICU 4.4
+ */
+ UBool isValid() const { return ptr!=NULL; }
+ /**
+ * Comparison with a simple pointer, so that existing code
+ * with ==NULL need not be changed.
+ * @param other simple pointer for comparison
+ * @return true if this pointer value equals other
+ * @stable ICU 4.4
+ */
+ bool operator==(const T *other) const { return ptr==other; }
+ /**
+ * Comparison with a simple pointer, so that existing code
+ * with !=NULL need not be changed.
+ * @param other simple pointer for comparison
+ * @return true if this pointer value differs from other
+ * @stable ICU 4.4
+ */
+ bool operator!=(const T *other) const { return ptr!=other; }
+ /**
+ * Access without ownership change.
+ * @return the pointer value
+ * @stable ICU 4.4
+ */
+ T *getAlias() const { return ptr; }
+ /**
+ * Access without ownership change.
+ * @return the pointer value as a reference
+ * @stable ICU 4.4
+ */
+ T &operator*() const { return *ptr; }
+ /**
+ * Access without ownership change.
+ * @return the pointer value
+ * @stable ICU 4.4
+ */
+ T *operator->() const { return ptr; }
+ /**
+ * Gives up ownership; the internal pointer becomes NULL.
+ * @return the pointer value;
+ * caller becomes responsible for deleting the object
+ * @stable ICU 4.4
+ */
+ T *orphan() {
+ T *p=ptr;
+ ptr=NULL;
+ return p;
+ }
+ /**
+ * Deletes the object it owns,
+ * and adopts (takes ownership of) the one passed in.
+ * Subclass must override: Base class does not delete the object.
+ * @param p simple pointer to an object that is adopted
+ * @stable ICU 4.4
+ */
+ void adoptInstead(T *p) {
+ // delete ptr;
+ ptr=p;
+ }
+protected:
+ /**
+ * Actual pointer.
+ * @internal
+ */
+ T *ptr;
+private:
+ // No comparison operators with other LocalPointerBases.
+ bool operator==(const LocalPointerBase &other);
+ bool operator!=(const LocalPointerBase &other);
+ // No ownership transfer: No copy constructor, no assignment operator.
+ LocalPointerBase(const LocalPointerBase &other);
+ void operator=(const LocalPointerBase &other);
+ // No heap allocation. Use only on the stack.
+ static void * U_EXPORT2 operator new(size_t size);
+ static void * U_EXPORT2 operator new[](size_t size);
+#if U_HAVE_PLACEMENT_NEW
+ static void * U_EXPORT2 operator new(size_t, void *ptr);
+#endif
+};
+
+/**
+ * "Smart pointer" class, deletes objects via the standard C++ delete operator.
+ * For most methods see the LocalPointerBase base class.
+ *
+ * Usage example:
+ * \code
+ * LocalPointer<UnicodeString> s(new UnicodeString((UChar32)0x50005));
+ * int32_t length=s->length(); // 2
+ * UChar lead=s->charAt(0); // 0xd900
+ * if(some condition) { return; } // no need to explicitly delete the pointer
+ * s.adoptInstead(new UnicodeString((UChar)0xfffc));
+ * length=s->length(); // 1
+ * // no need to explicitly delete the pointer
+ * \endcode
+ *
+ * @see LocalPointerBase
+ * @stable ICU 4.4
+ */
+template<typename T>
+class LocalPointer : public LocalPointerBase<T> {
+public:
+ /**
+ * Constructor takes ownership.
+ * @param p simple pointer to an object that is adopted
+ * @stable ICU 4.4
+ */
+ explicit LocalPointer(T *p=NULL) : LocalPointerBase<T>(p) {}
+ /**
+ * Destructor deletes the object it owns.
+ * @stable ICU 4.4
+ */
+ ~LocalPointer() {
+ delete LocalPointerBase<T>::ptr;
+ }
+ /**
+ * Deletes the object it owns,
+ * and adopts (takes ownership of) the one passed in.
+ * @param p simple pointer to an object that is adopted
+ * @stable ICU 4.4
+ */
+ void adoptInstead(T *p) {
+ delete LocalPointerBase<T>::ptr;
+ LocalPointerBase<T>::ptr=p;
+ }
+};
+
+/**
+ * "Smart pointer" class, deletes objects via the C++ array delete[] operator.
+ * For most methods see the LocalPointerBase base class.
+ * Adds operator[] for array item access.
+ *
+ * Usage example:
+ * \code
+ * LocalArray<UnicodeString> a(new UnicodeString[2]);
+ * a[0].append((UChar)0x61);
+ * if(some condition) { return; } // no need to explicitly delete the array
+ * a.adoptInstead(new UnicodeString[4]);
+ * a[3].append((UChar)0x62).append((UChar)0x63).reverse();
+ * // no need to explicitly delete the array
+ * \endcode
+ *
+ * @see LocalPointerBase
+ * @stable ICU 4.4
+ */
+template<typename T>
+class LocalArray : public LocalPointerBase<T> {
+public:
+ /**
+ * Constructor takes ownership.
+ * @param p simple pointer to an array of T objects that is adopted
+ * @stable ICU 4.4
+ */
+ explicit LocalArray(T *p=NULL) : LocalPointerBase<T>(p) {}
+ /**
+ * Destructor deletes the array it owns.
+ * @stable ICU 4.4
+ */
+ ~LocalArray() {
+ delete[] LocalPointerBase<T>::ptr;
+ }
+ /**
+ * Deletes the array it owns,
+ * and adopts (takes ownership of) the one passed in.
+ * @param p simple pointer to an array of T objects that is adopted
+ * @stable ICU 4.4
+ */
+ void adoptInstead(T *p) {
+ delete[] LocalPointerBase<T>::ptr;
+ LocalPointerBase<T>::ptr=p;
+ }
+ /**
+ * Array item access (writable).
+ * No index bounds check.
+ * @param i array index
+ * @return reference to the array item
+ * @stable ICU 4.4
+ */
+ T &operator[](ptrdiff_t i) const { return LocalPointerBase<T>::ptr[i]; }
+};
+
+/**
+ * \def U_DEFINE_LOCAL_OPEN_POINTER
+ * "Smart pointer" definition macro, deletes objects via the closeFunction.
+ * Defines a subclass of LocalPointerBase which works just
+ * like LocalPointer<Type> except that this subclass will use the closeFunction
+ * rather than the C++ delete operator.
+ *
+ * Requirement: The closeFunction must tolerate a NULL pointer.
+ * (We could add a NULL check here but it is normally redundant.)
+ *
+ * Usage example:
+ * \code
+ * LocalUCaseMapPointer csm(ucasemap_open(localeID, options, &errorCode));
+ * utf8OutLength=ucasemap_utf8ToLower(csm.getAlias(),
+ * utf8Out, (int32_t)sizeof(utf8Out),
+ * utf8In, utf8InLength, &errorCode);
+ * if(U_FAILURE(errorCode)) { return; } // no need to explicitly delete the UCaseMap
+ * \endcode
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction) \
+ class LocalPointerClassName : public LocalPointerBase<Type> { \
+ public: \
+ explicit LocalPointerClassName(Type *p=NULL) : LocalPointerBase<Type>(p) {} \
+ ~LocalPointerClassName() { closeFunction(ptr); } \
+ void adoptInstead(Type *p) { \
+ closeFunction(ptr); \
+ ptr=p; \
+ } \
+ }
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+#endif /* __LOCALPOINTER_H__ */
diff --git a/Source/WebCore/icu/unicode/platform.h b/Source/WebCore/icu/unicode/platform.h
index 3de40d256..1b2ab306e 100644
--- a/Source/WebCore/icu/unicode/platform.h
+++ b/Source/WebCore/icu/unicode/platform.h
@@ -1,15 +1,11 @@
/*
******************************************************************************
*
-* Copyright (C) 1997-2010, International Business Machines
+* Copyright (C) 1997-2013, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
*
-* Note: autoconf creates platform.h from platform.h.in at configure time.
-*
-******************************************************************************
-*
* FILE NAME : platform.h
*
* Date Name Description
@@ -23,379 +19,737 @@
#ifndef _PLATFORM_H
#define _PLATFORM_H
+#include "unicode/uconfig.h"
+#include "unicode/uvernum.h"
+
/**
- * \file
- * \brief Basic types for the platform
+ * \file
+ * \brief Basic types for the platform.
+ *
+ * This file used to be generated by autoconf/configure.
+ * Starting with ICU 49, platform.h is a normal source file,
+ * to simplify cross-compiling and working with non-autoconf/make build systems.
+ *
+ * When a value in this file does not work on a platform, then please
+ * try to derive it from the U_PLATFORM value
+ * (for which we might need a new value constant in rare cases)
+ * and/or from other macros that are predefined by the compiler
+ * or defined in standard (POSIX or platform or compiler) headers.
+ *
+ * As a temporary workaround, you can add an explicit <code>#define</code> for some macros
+ * before it is first tested, or add an equivalent -D macro definition
+ * to the compiler's command line.
+ *
+ * Note: Some compilers provide ways to show the predefined macros.
+ * For example, with gcc you can compile an empty .c file and have the compiler
+ * print the predefined macros with
+ * \code
+ * gcc -E -dM -x c /dev/null | sort
+ * \endcode
+ * (You can provide an actual empty .c file rather than /dev/null.
+ * <code>-x c++</code> is for C++.)
*/
-/* This file should be included before uvernum.h. */
-#if defined(UVERNUM_H)
-# error Do not include unicode/uvernum.h before #including unicode/platform.h. Instead of unicode/uvernum.h, #include unicode/uversion.h
-#endif
-
/**
- * Determine wheter to enable auto cleanup of libraries.
+ * Define some things so that they can be documented.
* @internal
*/
-#ifndef UCLN_NO_AUTO_CLEANUP
-#define UCLN_NO_AUTO_CLEANUP 1
-#endif
-
-/* Need platform.h when using CYGWINMSVC to get definitions above. Ignore everything else. */
-#ifndef CYGWINMSVC
+#ifdef U_IN_DOXYGEN
+/*
+ * Problem: "platform.h:335: warning: documentation for unknown define U_HAVE_STD_STRING found." means that U_HAVE_STD_STRING is not documented.
+ * Solution: #define any defines for non @internal API here, so that they are visible in the docs. If you just set PREDEFINED in Doxyfile.in, they won't be documented.
+ */
-/** Define the platform we're on. */
-#ifndef U_DARWIN
-#define U_DARWIN
+/* None for now. */
#endif
/**
- * \def U_HAVE_DIRENT_H
- * Define whether dirent.h is available
+ * \def U_PLATFORM
+ * The U_PLATFORM macro defines the platform we're on.
+ *
+ * We used to define one different, value-less macro per platform.
+ * That made it hard to know the set of relevant platforms and macros,
+ * and hard to deal with variants of platforms.
+ *
+ * Starting with ICU 49, we define platforms as numeric macros,
+ * with ranges of values for related platforms and their variants.
+ * The U_PLATFORM macro is set to one of these values.
+ *
+ * Historical note from the Solaris Wikipedia article:
+ * AT&T and Sun collaborated on a project to merge the most popular Unix variants
+ * on the market at that time: BSD, System V, and Xenix.
+ * This became Unix System V Release 4 (SVR4).
+ *
* @internal
*/
-#ifndef U_HAVE_DIRENT_H
-#define U_HAVE_DIRENT_H 1
-#endif
-/** Define whether inttypes.h is available */
-#ifndef U_HAVE_INTTYPES_H
-#define U_HAVE_INTTYPES_H 1
+/** Unknown platform. @internal */
+#define U_PF_UNKNOWN 0
+/** Windows @internal */
+#define U_PF_WINDOWS 1000
+/** MinGW. Windows, calls to Win32 API, but using GNU gcc and binutils. @internal */
+#define U_PF_MINGW 1800
+/**
+ * Cygwin. Windows, calls to cygwin1.dll for Posix functions,
+ * using MSVC or GNU gcc and binutils.
+ * @internal
+ */
+#define U_PF_CYGWIN 1900
+/* Reserve 2000 for U_PF_UNIX? */
+/** HP-UX is based on UNIX System V. @internal */
+#define U_PF_HPUX 2100
+/** Solaris is a Unix operating system based on SVR4. @internal */
+#define U_PF_SOLARIS 2600
+/** BSD is a UNIX operating system derivative. @internal */
+#define U_PF_BSD 3000
+/** AIX is based on UNIX System V Releases and 4.3 BSD. @internal */
+#define U_PF_AIX 3100
+/** IRIX is based on UNIX System V with BSD extensions. @internal */
+#define U_PF_IRIX 3200
+/**
+ * Darwin is a POSIX-compliant operating system, composed of code developed by Apple,
+ * as well as code derived from NeXTSTEP, BSD, and other projects,
+ * built around the Mach kernel.
+ * Darwin forms the core set of components upon which Mac OS X, Apple TV, and iOS are based.
+ * (Original description modified from WikiPedia.)
+ * @internal
+ */
+#define U_PF_DARWIN 3500
+/** iPhone OS (iOS) is a derivative of Mac OS X. @internal */
+#define U_PF_IPHONE 3550
+/** QNX is a commercial Unix-like real-time operating system related to BSD. @internal */
+#define U_PF_QNX 3700
+/** Linux is a Unix-like operating system. @internal */
+#define U_PF_LINUX 4000
+/** Android is based on Linux. @internal */
+#define U_PF_ANDROID 4050
+/** "Classic" Mac OS (1984-2001) @internal */
+#define U_PF_CLASSIC_MACOS 8000
+/** z/OS is the successor to OS/390 which was the successor to MVS. @internal */
+#define U_PF_OS390 9000
+/** "IBM i" is the current name of what used to be i5/OS and earlier OS/400. @internal */
+#define U_PF_OS400 9400
+
+#ifdef U_PLATFORM
+ /* Use the predefined value. */
+#elif defined(__MINGW32__)
+# define U_PLATFORM U_PF_MINGW
+#elif defined(__CYGWIN__)
+# define U_PLATFORM U_PF_CYGWIN
+#elif defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
+# define U_PLATFORM U_PF_WINDOWS
+#elif defined(__ANDROID__)
+# define U_PLATFORM U_PF_ANDROID
+ /* Android wchar_t support depends on the API level. */
+# include <android/api-level.h>
+#elif defined(linux) || defined(__linux__) || defined(__linux)
+# define U_PLATFORM U_PF_LINUX
+#elif defined(__APPLE__) && defined(__MACH__)
+# include <TargetConditionals.h>
+# if defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE /* variant of TARGET_OS_MAC */
+# define U_PLATFORM U_PF_IPHONE
+# else
+# define U_PLATFORM U_PF_DARWIN
+# endif
+#elif defined(BSD) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__MirBSD__)
+# define U_PLATFORM U_PF_BSD
+#elif defined(sun) || defined(__sun)
+ /* Check defined(__SVR4) || defined(__svr4__) to distinguish Solaris from SunOS? */
+# define U_PLATFORM U_PF_SOLARIS
+# if defined(__GNUC__)
+ /* Solaris/GCC needs this header file to get the proper endianness. Normally, this
+ * header file is included with stddef.h but on Solairs/GCC, the GCC version of stddef.h
+ * is included which does not include this header file.
+ */
+# include <sys/isa_defs.h>
+# endif
+#elif defined(_AIX) || defined(__TOS_AIX__)
+# define U_PLATFORM U_PF_AIX
+#elif defined(_hpux) || defined(hpux) || defined(__hpux)
+# define U_PLATFORM U_PF_HPUX
+#elif defined(sgi) || defined(__sgi)
+# define U_PLATFORM U_PF_IRIX
+#elif defined(macintosh)
+# define U_PLATFORM U_PF_CLASSIC_MACOS
+#elif defined(__QNX__) || defined(__QNXNTO__)
+# define U_PLATFORM U_PF_QNX
+#elif defined(__TOS_MVS__)
+# define U_PLATFORM U_PF_OS390
+#elif defined(__OS400__) || defined(__TOS_OS400__)
+# define U_PLATFORM U_PF_OS400
+#else
+# define U_PLATFORM U_PF_UNKNOWN
#endif
/**
- * Define what support for C++ streams is available.
- * If U_IOSTREAM_SOURCE is set to 199711, then &lt;iostream&gt; is available
- * (1997711 is the date the ISO/IEC C++ FDIS was published), and then
- * one should qualify streams using the std namespace in ICU header
- * files.
- * If U_IOSTREAM_SOURCE is set to 198506, then &lt;iostream.h&gt; is
- * available instead (198506 is the date when Stroustrup published
- * "An Extensible I/O Facility for C++" at the summer USENIX conference).
- * If U_IOSTREAM_SOURCE is 0, then C++ streams are not available and
- * support for them will be silently suppressed in ICU.
- *
+ * \def CYGWINMSVC
+ * Defined if this is Windows with Cygwin, but using MSVC rather than gcc.
+ * Otherwise undefined.
+ * @internal
*/
-
-#ifndef U_IOSTREAM_SOURCE
-#define U_IOSTREAM_SOURCE 199711
+/* Commented out because this is already set in mh-cygwin-msvc
+#if U_PLATFORM == U_PF_CYGWIN && defined(_MSC_VER)
+# define CYGWINMSVC
#endif
+*/
/**
- * \def U_HAVE_STD_STRING
- * Define whether the standard C++ (STL) &lt;string&gt; header is available.
- * For platforms that do not use platform.h and do not define this constant
- * in their platform-specific headers, std_string.h defaults
- * U_HAVE_STD_STRING to 1.
+ * \def U_PLATFORM_USES_ONLY_WIN32_API
+ * Defines whether the platform uses only the Win32 API.
+ * Set to 1 for Windows/MSVC and MinGW but not Cygwin.
* @internal
*/
-#ifndef U_HAVE_STD_STRING
-#define U_HAVE_STD_STRING 1
+#ifdef U_PLATFORM_USES_ONLY_WIN32_API
+ /* Use the predefined value. */
+#elif (U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_MINGW) || defined(CYGWINMSVC)
+# define U_PLATFORM_USES_ONLY_WIN32_API 1
+#else
+ /* Cygwin implements POSIX. */
+# define U_PLATFORM_USES_ONLY_WIN32_API 0
#endif
-/** @{ Determines whether specific types are available */
-#ifndef U_HAVE_INT8_T
-#define U_HAVE_INT8_T 1
+/**
+ * \def U_PLATFORM_HAS_WIN32_API
+ * Defines whether the Win32 API is available on the platform.
+ * Set to 1 for Windows/MSVC, MinGW and Cygwin.
+ * @internal
+ */
+#ifdef U_PLATFORM_HAS_WIN32_API
+ /* Use the predefined value. */
+#elif U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN
+# define U_PLATFORM_HAS_WIN32_API 1
+#else
+# define U_PLATFORM_HAS_WIN32_API 0
#endif
-#ifndef U_HAVE_UINT8_T
-#define U_HAVE_UINT8_T 0
+/**
+ * \def U_PLATFORM_IMPLEMENTS_POSIX
+ * Defines whether the platform implements (most of) the POSIX API.
+ * Set to 1 for Cygwin and most other platforms.
+ * @internal
+ */
+#ifdef U_PLATFORM_IMPLEMENTS_POSIX
+ /* Use the predefined value. */
+#elif U_PLATFORM_USES_ONLY_WIN32_API || U_PLATFORM == U_PF_CLASSIC_MACOS
+# define U_PLATFORM_IMPLEMENTS_POSIX 0
+#else
+# define U_PLATFORM_IMPLEMENTS_POSIX 1
#endif
-#ifndef U_HAVE_INT16_T
-#define U_HAVE_INT16_T 1
+/**
+ * \def U_PLATFORM_IS_LINUX_BASED
+ * Defines whether the platform is Linux or one of its derivatives.
+ * @internal
+ */
+#ifdef U_PLATFORM_IS_LINUX_BASED
+ /* Use the predefined value. */
+#elif U_PF_LINUX <= U_PLATFORM && U_PLATFORM <= U_PF_ANDROID
+# define U_PLATFORM_IS_LINUX_BASED 1
+#else
+# define U_PLATFORM_IS_LINUX_BASED 0
#endif
-#ifndef U_HAVE_UINT16_T
-#define U_HAVE_UINT16_T 0
+/**
+ * \def U_PLATFORM_IS_DARWIN_BASED
+ * Defines whether the platform is Darwin or one of its derivatives.
+ * @internal
+ */
+#ifdef U_PLATFORM_IS_DARWIN_BASED
+ /* Use the predefined value. */
+#elif U_PF_DARWIN <= U_PLATFORM && U_PLATFORM <= U_PF_IPHONE
+# define U_PLATFORM_IS_DARWIN_BASED 1
+#else
+# define U_PLATFORM_IS_DARWIN_BASED 0
#endif
-#ifndef U_HAVE_INT32_T
-#define U_HAVE_INT32_T 1
+/**
+ * \def U_HAVE_STDINT_H
+ * Defines whether stdint.h is available. It is a C99 standard header.
+ * We used to include inttypes.h which includes stdint.h but we usually do not need
+ * the additional definitions from inttypes.h.
+ * @internal
+ */
+#ifdef U_HAVE_STDINT_H
+ /* Use the predefined value. */
+#elif U_PLATFORM_USES_ONLY_WIN32_API
+# if defined(__BORLANDC__) || U_PLATFORM == U_PF_MINGW || (defined(_MSC_VER) && _MSC_VER>=1600)
+ /* Windows Visual Studio 9 and below do not have stdint.h & inttypes.h, but VS 2010 adds them. */
+# define U_HAVE_STDINT_H 1
+# else
+# define U_HAVE_STDINT_H 0
+# endif
+#elif U_PLATFORM == U_PF_SOLARIS
+ /* Solaris has inttypes.h but not stdint.h. */
+# define U_HAVE_STDINT_H 0
+#elif U_PLATFORM == U_PF_AIX && !defined(_AIX51) && defined(_POWER)
+ /* PPC AIX <= 4.3 has inttypes.h but not stdint.h. */
+# define U_HAVE_STDINT_H 0
+#else
+# define U_HAVE_STDINT_H 1
#endif
-#ifndef U_HAVE_UINT32_T
-#define U_HAVE_UINT32_T 0
+/**
+ * \def U_HAVE_INTTYPES_H
+ * Defines whether inttypes.h is available. It is a C99 standard header.
+ * We include inttypes.h where it is available but stdint.h is not.
+ * @internal
+ */
+#ifdef U_HAVE_INTTYPES_H
+ /* Use the predefined value. */
+#elif U_PLATFORM == U_PF_SOLARIS
+ /* Solaris has inttypes.h but not stdint.h. */
+# define U_HAVE_INTTYPES_H 1
+#elif U_PLATFORM == U_PF_AIX && !defined(_AIX51) && defined(_POWER)
+ /* PPC AIX <= 4.3 has inttypes.h but not stdint.h. */
+# define U_HAVE_INTTYPES_H 1
+#else
+ /* Most platforms have both inttypes.h and stdint.h, or neither. */
+# define U_HAVE_INTTYPES_H U_HAVE_STDINT_H
#endif
-#ifndef U_HAVE_INT64_T
-#define U_HAVE_INT64_T 1
+/**
+ * \def U_IOSTREAM_SOURCE
+ * Defines what support for C++ streams is available.
+ *
+ * If U_IOSTREAM_SOURCE is set to 199711, then &lt;iostream&gt; is available
+ * (the ISO/IEC C++ FDIS was published in November 1997), and then
+ * one should qualify streams using the std namespace in ICU header
+ * files.
+ * Starting with ICU 49, this is the only supported version.
+ *
+ * If U_IOSTREAM_SOURCE is set to 198506, then &lt;iostream.h&gt; is
+ * available instead (in June 1985 Stroustrup published
+ * "An Extensible I/O Facility for C++" at the summer USENIX conference).
+ * Starting with ICU 49, this version is not supported any more.
+ *
+ * If U_IOSTREAM_SOURCE is 0 (or any value less than 199711),
+ * then C++ streams are not available and
+ * support for them will be silently suppressed in ICU.
+ *
+ * @internal
+ */
+#ifndef U_IOSTREAM_SOURCE
+#define U_IOSTREAM_SOURCE 199711
#endif
-#ifndef U_HAVE_UINT64_T
-#define U_HAVE_UINT64_T 0
+/**
+ * \def U_HAVE_STD_STRING
+ * Defines whether the standard C++ (STL) &lt;string&gt; header is available.
+ * @internal
+ */
+#ifdef U_HAVE_STD_STRING
+ /* Use the predefined value. */
+#else
+# define U_HAVE_STD_STRING 1
#endif
-/** @} */
-
/*===========================================================================*/
/** @{ Compiler and environment features */
/*===========================================================================*/
-/* Define whether namespace is supported */
-#ifndef U_HAVE_NAMESPACE
-#define U_HAVE_NAMESPACE 1
-#endif
-
-/* Determines the endianness of the platform
- It's done this way in case multiple architectures are being built at once.
- For example, Darwin supports fat binaries, which can be both PPC and x86 based. */
-#if defined(BYTE_ORDER) && defined(BIG_ENDIAN)
-#define U_IS_BIG_ENDIAN (BYTE_ORDER == BIG_ENDIAN)
+/**
+ * \def U_GCC_MAJOR_MINOR
+ * Indicates whether the compiler is gcc (test for != 0),
+ * and if so, contains its major (times 100) and minor version numbers.
+ * If the compiler is not gcc, then U_GCC_MAJOR_MINOR == 0.
+ *
+ * For example, for testing for whether we have gcc, and whether it's 4.6 or higher,
+ * use "#if U_GCC_MAJOR_MINOR >= 406".
+ * @internal
+ */
+#ifdef __GNUC__
+# define U_GCC_MAJOR_MINOR (__GNUC__ * 100 + __GNUC_MINOR__)
#else
-#define U_IS_BIG_ENDIAN 1
+# define U_GCC_MAJOR_MINOR 0
#endif
-/* 1 or 0 to enable or disable threads. If undefined, default is: enable threads. */
-#ifndef ICU_USE_THREADS
-#define ICU_USE_THREADS 1
-#endif
-
-/* On strong memory model CPUs (e.g. x86 CPUs), we use a safe & quick double check lock. */
-#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
-#define UMTX_STRONG_MEMORY_MODEL 1
-#endif
-
-#ifndef U_DEBUG
-#define U_DEBUG 0
-#endif
-
-#ifndef U_RELEASE
-#define U_RELEASE 1
+/**
+ * \def U_IS_BIG_ENDIAN
+ * Determines the endianness of the platform.
+ * @internal
+ */
+#ifdef U_IS_BIG_ENDIAN
+ /* Use the predefined value. */
+#elif defined(BYTE_ORDER) && defined(BIG_ENDIAN)
+# define U_IS_BIG_ENDIAN (BYTE_ORDER == BIG_ENDIAN)
+#elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__)
+ /* gcc */
+# define U_IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+#elif defined(__BIG_ENDIAN__) || defined(_BIG_ENDIAN)
+# define U_IS_BIG_ENDIAN 1
+#elif defined(__LITTLE_ENDIAN__) || defined(_LITTLE_ENDIAN)
+# define U_IS_BIG_ENDIAN 0
+#elif U_PLATFORM == U_PF_OS390 || U_PLATFORM == U_PF_OS400 || defined(__s390__) || defined(__s390x__)
+ /* These platforms do not appear to predefine any endianness macros. */
+# define U_IS_BIG_ENDIAN 1
+#elif defined(_PA_RISC1_0) || defined(_PA_RISC1_1) || defined(_PA_RISC2_0)
+ /* HPPA do not appear to predefine any endianness macros. */
+# define U_IS_BIG_ENDIAN 1
+#elif defined(sparc) || defined(__sparc) || defined(__sparc__)
+ /* Some sparc based systems (e.g. Linux) do not predefine any endianness macros. */
+# define U_IS_BIG_ENDIAN 1
+#else
+# define U_IS_BIG_ENDIAN 0
#endif
-/* Determine whether to disable renaming or not. This overrides the
- setting in umachine.h which is for all platforms. */
-#ifndef U_DISABLE_RENAMING
-#define U_DISABLE_RENAMING 1
+/**
+ * \def U_HAVE_PLACEMENT_NEW
+ * Determines whether to override placement new and delete for STL.
+ * @stable ICU 2.6
+ */
+#ifdef U_HAVE_PLACEMENT_NEW
+ /* Use the predefined value. */
+#elif defined(__BORLANDC__)
+# define U_HAVE_PLACEMENT_NEW 0
+#else
+# define U_HAVE_PLACEMENT_NEW 1
#endif
-/* Determine whether to override new and delete. */
-#ifndef U_OVERRIDE_CXX_ALLOCATION
-#define U_OVERRIDE_CXX_ALLOCATION 1
-#endif
-/* Determine whether to override placement new and delete for STL. */
-#ifndef U_HAVE_PLACEMENT_NEW
-#define U_HAVE_PLACEMENT_NEW 1
+/**
+ * \def U_HAVE_DEBUG_LOCATION_NEW
+ * Define this to define the MFC debug version of the operator new.
+ *
+ * @stable ICU 3.4
+ */
+#ifdef U_HAVE_DEBUG_LOCATION_NEW
+ /* Use the predefined value. */
+#elif defined(_MSC_VER)
+# define U_HAVE_DEBUG_LOCATION_NEW 1
+#else
+# define U_HAVE_DEBUG_LOCATION_NEW 0
#endif
-/* Determine whether to enable tracing. */
-#ifndef U_ENABLE_TRACING
-#define U_ENABLE_TRACING 1
+/* Compatibility with non clang compilers */
+#ifndef __has_attribute
+# define __has_attribute(x) 0
#endif
/**
- * Whether to enable Dynamic loading in ICU
+ * \def U_MALLOC_ATTR
+ * Attribute to mark functions as malloc-like
* @internal
*/
-#ifndef U_ENABLE_DYLOAD
-#define U_ENABLE_DYLOAD 1
+#if defined(__GNUC__) && __GNUC__>=3
+# define U_MALLOC_ATTR __attribute__ ((__malloc__))
+#else
+# define U_MALLOC_ATTR
#endif
/**
- * Whether to test Dynamic loading as an OS capabilty
+ * \def U_ALLOC_SIZE_ATTR
+ * Attribute to specify the size of the allocated buffer for malloc-like functions
* @internal
*/
-#ifndef U_CHECK_DYLOAD
-#define U_CHECK_DYLOAD 1
-#endif
-
-
-/** Do we allow ICU users to use the draft APIs by default? */
-#ifndef U_DEFAULT_SHOW_DRAFT
-#define U_DEFAULT_SHOW_DRAFT 1
+#if (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) || __has_attribute(alloc_size)
+# define U_ALLOC_SIZE_ATTR(X) __attribute__ ((alloc_size(X)))
+# define U_ALLOC_SIZE_ATTR2(X,Y) __attribute__ ((alloc_size(X,Y)))
+#else
+# define U_ALLOC_SIZE_ATTR(X)
+# define U_ALLOC_SIZE_ATTR2(X,Y)
#endif
/** @} */
/*===========================================================================*/
-/** @{ Character data types */
+/** @{ Character data types */
/*===========================================================================*/
-#if ((defined(OS390) && (!defined(__CHARSET_LIB) || !__CHARSET_LIB))) || defined(OS400)
-# define U_CHARSET_FAMILY 1
+/**
+ * U_CHARSET_FAMILY is equal to this value when the platform is an ASCII based platform.
+ * @stable ICU 2.0
+ */
+#define U_ASCII_FAMILY 0
+
+/**
+ * U_CHARSET_FAMILY is equal to this value when the platform is an EBCDIC based platform.
+ * @stable ICU 2.0
+ */
+#define U_EBCDIC_FAMILY 1
+
+/**
+ * \def U_CHARSET_FAMILY
+ *
+ * <p>These definitions allow to specify the encoding of text
+ * in the char data type as defined by the platform and the compiler.
+ * It is enough to determine the code point values of "invariant characters",
+ * which are the ones shared by all encodings that are in use
+ * on a given platform.</p>
+ *
+ * <p>Those "invariant characters" should be all the uppercase and lowercase
+ * latin letters, the digits, the space, and "basic punctuation".
+ * Also, '\\n', '\\r', '\\t' should be available.</p>
+ *
+ * <p>The list of "invariant characters" is:<br>
+ * \code
+ * A-Z a-z 0-9 SPACE " % &amp; ' ( ) * + , - . / : ; < = > ? _
+ * \endcode
+ * <br>
+ * (52 letters + 10 numbers + 20 punc/sym/space = 82 total)</p>
+ *
+ * <p>This matches the IBM Syntactic Character Set (CS 640).</p>
+ *
+ * <p>In other words, all the graphic characters in 7-bit ASCII should
+ * be safely accessible except the following:</p>
+ *
+ * \code
+ * '\' <backslash>
+ * '[' <left bracket>
+ * ']' <right bracket>
+ * '{' <left brace>
+ * '}' <right brace>
+ * '^' <circumflex>
+ * '~' <tilde>
+ * '!' <exclamation mark>
+ * '#' <number sign>
+ * '|' <vertical line>
+ * '$' <dollar sign>
+ * '@' <commercial at>
+ * '`' <grave accent>
+ * \endcode
+ * @stable ICU 2.0
+ */
+#ifdef U_CHARSET_FAMILY
+ /* Use the predefined value. */
+#elif U_PLATFORM == U_PF_OS390 && (!defined(__CHARSET_LIB) || !__CHARSET_LIB)
+# define U_CHARSET_FAMILY U_EBCDIC_FAMILY
+#elif U_PLATFORM == U_PF_OS400 && !defined(__UTF32__)
+# define U_CHARSET_FAMILY U_EBCDIC_FAMILY
+#else
+# define U_CHARSET_FAMILY U_ASCII_FAMILY
+#endif
+
+/**
+ * \def U_CHARSET_IS_UTF8
+ *
+ * Hardcode the default charset to UTF-8.
+ *
+ * If this is set to 1, then
+ * - ICU will assume that all non-invariant char*, StringPiece, std::string etc.
+ * contain UTF-8 text, regardless of what the system API uses
+ * - some ICU code will use fast functions like u_strFromUTF8()
+ * rather than the more general and more heavy-weight conversion API (ucnv.h)
+ * - ucnv_getDefaultName() always returns "UTF-8"
+ * - ucnv_setDefaultName() is disabled and will not change the default charset
+ * - static builds of ICU are smaller
+ * - more functionality is available with the UCONFIG_NO_CONVERSION build-time
+ * configuration option (see unicode/uconfig.h)
+ * - the UCONFIG_NO_CONVERSION build option in uconfig.h is more usable
+ *
+ * @stable ICU 4.2
+ * @see UCONFIG_NO_CONVERSION
+ */
+#ifdef U_CHARSET_IS_UTF8
+ /* Use the predefined value. */
+#elif U_PLATFORM == U_PF_ANDROID || U_PLATFORM_IS_DARWIN_BASED
+# define U_CHARSET_IS_UTF8 1
+#else
+# define U_CHARSET_IS_UTF8 0
#endif
/** @} */
/*===========================================================================*/
-/** @{ Information about wchar support */
+/** @{ Information about wchar support */
/*===========================================================================*/
-#ifndef U_HAVE_WCHAR_H
-#define U_HAVE_WCHAR_H 1
+/**
+ * \def U_HAVE_WCHAR_H
+ * Indicates whether <wchar.h> is available (1) or not (0). Set to 1 by default.
+ *
+ * @stable ICU 2.0
+ */
+#ifdef U_HAVE_WCHAR_H
+ /* Use the predefined value. */
+#elif U_PLATFORM == U_PF_ANDROID && __ANDROID_API__ < 9
+ /*
+ * Android before Gingerbread (Android 2.3, API level 9) did not support wchar_t.
+ * The type and header existed, but the library functions did not work as expected.
+ * The size of wchar_t was 1 but L"xyz" string literals had 32-bit units anyway.
+ */
+# define U_HAVE_WCHAR_H 0
+#else
+# define U_HAVE_WCHAR_H 1
#endif
-#ifndef U_SIZEOF_WCHAR_T
-#define U_SIZEOF_WCHAR_T 4
+/**
+ * \def U_SIZEOF_WCHAR_T
+ * U_SIZEOF_WCHAR_T==sizeof(wchar_t)
+ *
+ * @stable ICU 2.0
+ */
+#ifdef U_SIZEOF_WCHAR_T
+ /* Use the predefined value. */
+#elif (U_PLATFORM == U_PF_ANDROID && __ANDROID_API__ < 9) || U_PLATFORM == U_PF_CLASSIC_MACOS
+ /*
+ * Classic Mac OS and Mac OS X before 10.3 (Panther) did not support wchar_t or wstring.
+ * Newer Mac OS X has size 4.
+ */
+# define U_SIZEOF_WCHAR_T 1
+#elif U_PLATFORM_HAS_WIN32_API || U_PLATFORM == U_PF_CYGWIN
+# define U_SIZEOF_WCHAR_T 2
+#elif U_PLATFORM == U_PF_AIX
+ /*
+ * AIX 6.1 information, section "Wide character data representation":
+ * "... the wchar_t datatype is 32-bit in the 64-bit environment and
+ * 16-bit in the 32-bit environment."
+ * and
+ * "All locales use Unicode for their wide character code values (process code),
+ * except the IBM-eucTW codeset."
+ */
+# ifdef __64BIT__
+# define U_SIZEOF_WCHAR_T 4
+# else
+# define U_SIZEOF_WCHAR_T 2
+# endif
+#elif U_PLATFORM == U_PF_OS390
+ /*
+ * z/OS V1R11 information center, section "LP64 | ILP32":
+ * "In 31-bit mode, the size of long and pointers is 4 bytes and the size of wchar_t is 2 bytes.
+ * Under LP64, the size of long and pointer is 8 bytes and the size of wchar_t is 4 bytes."
+ */
+# ifdef _LP64
+# define U_SIZEOF_WCHAR_T 4
+# else
+# define U_SIZEOF_WCHAR_T 2
+# endif
+#elif U_PLATFORM == U_PF_OS400
+# if defined(__UTF32__)
+ /*
+ * LOCALETYPE(*LOCALEUTF) is specified.
+ * Wide-character strings are in UTF-32,
+ * narrow-character strings are in UTF-8.
+ */
+# define U_SIZEOF_WCHAR_T 4
+# elif defined(__UCS2__)
+ /*
+ * LOCALETYPE(*LOCALEUCS2) is specified.
+ * Wide-character strings are in UCS-2,
+ * narrow-character strings are in EBCDIC.
+ */
+# define U_SIZEOF_WCHAR_T 2
+#else
+ /*
+ * LOCALETYPE(*CLD) or LOCALETYPE(*LOCALE) is specified.
+ * Wide-character strings are in 16-bit EBCDIC,
+ * narrow-character strings are in EBCDIC.
+ */
+# define U_SIZEOF_WCHAR_T 2
+# endif
+#else
+# define U_SIZEOF_WCHAR_T 4
#endif
#ifndef U_HAVE_WCSCPY
-#define U_HAVE_WCSCPY 1
+#define U_HAVE_WCSCPY U_HAVE_WCHAR_H
#endif
/** @} */
/**
+ * \def U_HAVE_CHAR16_T
+ * Defines whether the char16_t type is available for UTF-16
+ * and u"abc" UTF-16 string literals are supported.
+ * This is a new standard type and standard string literal syntax in C++0x
+ * but has been available in some compilers before.
+ * @internal
+ */
+#ifdef U_HAVE_CHAR16_T
+ /* Use the predefined value. */
+#else
+ /*
+ * Notes:
+ * Visual Studio 10 (_MSC_VER>=1600) defines char16_t but
+ * does not support u"abc" string literals.
+ * gcc 4.4 defines the __CHAR16_TYPE__ macro to a usable type but
+ * does not support u"abc" string literals.
+ * C++11 and C11 require support for UTF-16 literals
+ */
+# if (defined(__cplusplus) && __cplusplus >= 201103L) || (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L)
+# define U_HAVE_CHAR16_T 1
+# else
+# define U_HAVE_CHAR16_T 0
+# endif
+#endif
+
+/**
* @{
* \def U_DECLARE_UTF16
- * Do not use this macro. Use the UNICODE_STRING or U_STRING_DECL macros
- * instead.
- * @internal
- *
- * \def U_GNUC_UTF16_STRING
+ * Do not use this macro because it is not defined on all platforms.
+ * Use the UNICODE_STRING or U_STRING_DECL macros instead.
* @internal
*/
-#ifndef U_GNUC_UTF16_STRING
-#define U_GNUC_UTF16_STRING 0
-#endif
-#if 1 || defined(U_CHECK_UTF16_STRING)
-#if (defined(__xlC__) && defined(__IBM_UTF_LITERAL) && U_SIZEOF_WCHAR_T != 2) \
+#ifdef U_DECLARE_UTF16
+ /* Use the predefined value. */
+#elif U_HAVE_CHAR16_T \
+ || (defined(__xlC__) && defined(__IBM_UTF_LITERAL) && U_SIZEOF_WCHAR_T != 2) \
|| (defined(__HP_aCC) && __HP_aCC >= 035000) \
- || (defined(__HP_cc) && __HP_cc >= 111106) \
- || U_GNUC_UTF16_STRING
-#define U_DECLARE_UTF16(string) u ## string
-#elif (defined(__SUNPRO_CC) && __SUNPRO_CC >= 0x550)
-/* || (defined(__SUNPRO_C) && __SUNPRO_C >= 0x580) */
-/* Sun's C compiler has issues with this notation, and it's unreliable. */
-#define U_DECLARE_UTF16(string) U ## string
+ || (defined(__HP_cc) && __HP_cc >= 111106)
+# define U_DECLARE_UTF16(string) u ## string
#elif U_SIZEOF_WCHAR_T == 2 \
- && (U_CHARSET_FAMILY == 0 || ((defined(OS390) || defined(OS400)) && defined(__UCS2__)))
-#define U_DECLARE_UTF16(string) L ## string
-#endif
-#endif
-
-/** @} */
-
-/*===========================================================================*/
-/** @{ Information about POSIX support */
-/*===========================================================================*/
-
-#ifndef U_HAVE_NL_LANGINFO_CODESET
-#define U_HAVE_NL_LANGINFO_CODESET 1
-#endif
-
-#ifndef U_NL_LANGINFO_CODESET
-#define U_NL_LANGINFO_CODESET CODESET
-#endif
-
-#if 1
-#define U_TZSET tzset
-#endif
-#if 0
-#define U_TIMEZONE timezone
-#endif
-#if 1
-#define U_TZNAME tzname
+ && (U_CHARSET_FAMILY == 0 || (U_PF_OS390 <= U_PLATFORM && U_PLATFORM <= U_PF_OS400 && defined(__UCS2__)))
+# define U_DECLARE_UTF16(string) L ## string
+#else
+ /* Leave U_DECLARE_UTF16 undefined. See unistr.h. */
#endif
-#define U_HAVE_MMAP 1
-#define U_HAVE_POPEN 1
-
/** @} */
/*===========================================================================*/
-/** @{ Symbol import-export control */
+/** @{ Symbol import-export control */
/*===========================================================================*/
-#if 1
-#define U_EXPORT __attribute__((visibility("default")))
+#ifdef U_EXPORT
+ /* Use the predefined value. */
+#elif defined(U_STATIC_IMPLEMENTATION)
+# define U_EXPORT
+#elif defined(__GNUC__)
+# define U_EXPORT __attribute__((visibility("default")))
#elif (defined(__SUNPRO_CC) && __SUNPRO_CC >= 0x550) \
|| (defined(__SUNPRO_C) && __SUNPRO_C >= 0x550)
-#define U_EXPORT __global
+# define U_EXPORT __global
/*#elif defined(__HP_aCC) || defined(__HP_cc)
-#define U_EXPORT __declspec(dllexport)*/
+# define U_EXPORT __declspec(dllexport)*/
+#elif defined(_MSC_VER)
+# define U_EXPORT __declspec(dllexport)
#else
-#define U_EXPORT
+# define U_EXPORT
#endif
/* U_CALLCONV is releated to U_EXPORT2 */
-#define U_EXPORT2
-
-/* cygwin needs to export/import data */
-#if defined(U_CYGWIN) && !defined(__GNUC__)
-#define U_IMPORT __declspec(dllimport)
+#ifdef U_EXPORT2
+ /* Use the predefined value. */
+#elif defined(_MSC_VER)
+# define U_EXPORT2 __cdecl
#else
-#define U_IMPORT
-#endif
-
-/* @} */
-
-/*===========================================================================*/
-/** @{ Code alignment and C function inlining */
-/*===========================================================================*/
-
-#ifndef U_INLINE
-# ifdef __cplusplus
-# define U_INLINE inline
-# else
-# define U_INLINE __inline__
-# endif
-#endif
-
-#ifndef U_ALIGN_CODE
-#define U_ALIGN_CODE(n)
+# define U_EXPORT2
#endif
-/** @} */
-
-/*===========================================================================*/
-/** @{ GCC built in functions for atomic memory operations */
-/*===========================================================================*/
-
-/**
- * \def U_HAVE_GCC_ATOMICS
- * @internal
- */
-#ifndef U_HAVE_GCC_ATOMICS
-#define U_HAVE_GCC_ATOMICS 1
+#ifdef U_IMPORT
+ /* Use the predefined value. */
+#elif defined(_MSC_VER)
+ /* Windows needs to export/import data. */
+# define U_IMPORT __declspec(dllimport)
+#else
+# define U_IMPORT
#endif
-/** @} */
-
-/*===========================================================================*/
-/** @{ Programs used by ICU code */
-/*===========================================================================*/
-
/**
- * \def U_MAKE
- * What program to execute to run 'make'
+ * \def U_CALLCONV
+ * Similar to U_CDECL_BEGIN/U_CDECL_END, this qualifier is necessary
+ * in callback function typedefs to make sure that the calling convention
+ * is compatible.
+ *
+ * This is only used for non-ICU-API functions.
+ * When a function is a public ICU API,
+ * you must use the U_CAPI and U_EXPORT2 qualifiers.
+ * @stable ICU 2.0
*/
-#ifndef U_MAKE
-#define U_MAKE "/usr/bin/gnumake"
+#if U_PLATFORM == U_PF_OS390 && defined(__cplusplus)
+# define U_CALLCONV __cdecl
+#else
+# define U_CALLCONV U_EXPORT2
#endif
-/** @} */
-
-#endif /* CYGWINMSVC */
-
-/*===========================================================================*/
-/* Custom icu entry point renaming */
-/*===========================================================================*/
-
-/**
- * Define the library suffix with C syntax.
- * @internal
- */
-# define U_LIB_SUFFIX_C_NAME
-/**
- * Define the library suffix as a string with C syntax
- * @internal
- */
-# define U_LIB_SUFFIX_C_NAME_STRING ""
-/**
- * 1 if a custom library suffix is set
- * @internal
- */
-# define U_HAVE_LIB_SUFFIX 0
-
-#if U_HAVE_LIB_SUFFIX
-# ifndef U_ICU_ENTRY_POINT_RENAME
-/* Renaming pattern: u_strcpy_41_suffix */
-# define U_ICU_ENTRY_POINT_RENAME(x) x ## _ ## 46 ##
-# define U_DEF_ICUDATA_ENTRY_POINT(major, minor) icudt####major##minor##_dat
-
-# endif
-#endif
+/* @} */
#endif
diff --git a/Source/WebCore/icu/unicode/ptypes.h b/Source/WebCore/icu/unicode/ptypes.h
new file mode 100644
index 000000000..b7f711603
--- /dev/null
+++ b/Source/WebCore/icu/unicode/ptypes.h
@@ -0,0 +1,126 @@
+/*
+******************************************************************************
+*
+* Copyright (C) 1997-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*
+* FILE NAME : ptypes.h
+*
+* Date Name Description
+* 05/13/98 nos Creation (content moved here from ptypes.h).
+* 03/02/99 stephen Added AS400 support.
+* 03/30/99 stephen Added Linux support.
+* 04/13/99 stephen Reworked for autoconf.
+* 09/18/08 srl Moved basic types back to ptypes.h from platform.h
+******************************************************************************
+*/
+
+/**
+ * \file
+ * \brief C API: Definitions of integer types of various widths
+ */
+
+#ifndef _PTYPES_H
+#define _PTYPES_H
+
+/**
+ * \def __STDC_LIMIT_MACROS
+ * According to the Linux stdint.h, the ISO C99 standard specifies that in C++ implementations
+ * macros like INT32_MIN and UINTPTR_MAX should only be defined if explicitly requested.
+ * We need to define __STDC_LIMIT_MACROS before including stdint.h in C++ code
+ * that uses such limit macros.
+ * @internal
+ */
+#ifndef __STDC_LIMIT_MACROS
+#define __STDC_LIMIT_MACROS
+#endif
+
+/* NULL, size_t, wchar_t */
+#include <stddef.h>
+
+/*
+ * If all compilers provided all of the C99 headers and types,
+ * we would just unconditionally #include <stdint.h> here
+ * and not need any of the stuff after including platform.h.
+ */
+
+/* Find out if we have stdint.h etc. */
+#include "unicode/platform.h"
+
+/*===========================================================================*/
+/* Generic data types */
+/*===========================================================================*/
+
+/* If your platform does not have the <stdint.h> header, you may
+ need to edit the typedefs in the #else section below.
+ Use #if...#else...#endif with predefined compiler macros if possible. */
+#if U_HAVE_STDINT_H
+
+/*
+ * We mostly need <stdint.h> (which defines the standard integer types) but not <inttypes.h>.
+ * <inttypes.h> includes <stdint.h> and adds the printf/scanf helpers PRId32, SCNx16 etc.
+ * which we almost never use, plus stuff like imaxabs() which we never use.
+ */
+#include <stdint.h>
+
+#if U_PLATFORM == U_PF_OS390
+/* The features header is needed to get (u)int64_t sometimes. */
+#include <features.h>
+/* z/OS has <stdint.h>, but some versions are missing uint8_t (APAR PK62248). */
+#if !defined(__uint8_t)
+#define __uint8_t 1
+typedef unsigned char uint8_t;
+#endif
+#endif /* U_PLATFORM == U_PF_OS390 */
+
+#elif U_HAVE_INTTYPES_H
+
+# include <inttypes.h>
+
+#else /* neither U_HAVE_STDINT_H nor U_HAVE_INTTYPES_H */
+
+#if ! U_HAVE_INT8_T
+typedef signed char int8_t;
+#endif
+
+#if ! U_HAVE_UINT8_T
+typedef unsigned char uint8_t;
+#endif
+
+#if ! U_HAVE_INT16_T
+typedef signed short int16_t;
+#endif
+
+#if ! U_HAVE_UINT16_T
+typedef unsigned short uint16_t;
+#endif
+
+#if ! U_HAVE_INT32_T
+typedef signed int int32_t;
+#endif
+
+#if ! U_HAVE_UINT32_T
+typedef unsigned int uint32_t;
+#endif
+
+#if ! U_HAVE_INT64_T
+#ifdef _MSC_VER
+ typedef signed __int64 int64_t;
+#else
+ typedef signed long long int64_t;
+#endif
+#endif
+
+#if ! U_HAVE_UINT64_T
+#ifdef _MSC_VER
+ typedef unsigned __int64 uint64_t;
+#else
+ typedef unsigned long long uint64_t;
+#endif
+#endif
+
+#endif /* U_HAVE_STDINT_H / U_HAVE_INTTYPES_H */
+
+#endif /* _PTYPES_H */
diff --git a/Source/WebCore/icu/unicode/putil.h b/Source/WebCore/icu/unicode/putil.h
index 71d5d2643..6fc7e9cd5 100644
--- a/Source/WebCore/icu/unicode/putil.h
+++ b/Source/WebCore/icu/unicode/putil.h
@@ -1,7 +1,7 @@
/*
******************************************************************************
*
-* Copyright (C) 1997-2009, International Business Machines
+* Copyright (C) 1997-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
@@ -30,12 +30,6 @@
* \brief C API: Platform Utilities
*/
-/** Define this to 1 if your platform supports IEEE 754 floating point,
- to 0 if it does not. */
-#ifndef IEEE_754
-# define IEEE_754 1
-#endif
-
/*==========================================================================*/
/* Platform utilities */
/*==========================================================================*/
@@ -93,43 +87,20 @@ U_STABLE const char* U_EXPORT2 u_getDataDirectory(void);
*/
U_STABLE void U_EXPORT2 u_setDataDirectory(const char *directory);
-#if !U_CHARSET_IS_UTF8
-/**
- * Please use ucnv_getDefaultName() instead.
- * Return the default codepage for this platform and locale.
- * This function can call setlocale() on Unix platforms. Please read the
- * platform documentation on setlocale() before calling this function.
- * @return the default codepage for this platform
- * @internal
- */
-U_INTERNAL const char* U_EXPORT2 uprv_getDefaultCodepage(void);
-#endif
-
-/**
- * Please use uloc_getDefault() instead.
- * Return the default locale ID string by querying ths system, or
- * zero if one cannot be found.
- * This function can call setlocale() on Unix platforms. Please read the
- * platform documentation on setlocale() before calling this function.
- * @return the default locale ID string
- * @internal
- */
-U_INTERNAL const char* U_EXPORT2 uprv_getDefaultLocaleID(void);
-
/**
* @{
* Filesystem file and path separator characters.
* Example: '/' and ':' on Unix, '\\' and ';' on Windows.
* @stable ICU 2.0
*/
-#ifdef XP_MAC
+#if U_PLATFORM == U_PF_CLASSIC_MACOS
# define U_FILE_SEP_CHAR ':'
# define U_FILE_ALT_SEP_CHAR ':'
# define U_PATH_SEP_CHAR ';'
# define U_FILE_SEP_STRING ":"
# define U_FILE_ALT_SEP_STRING ":"
# define U_PATH_SEP_STRING ";"
-#elif defined(WIN32) || defined(OS2)
+#elif U_PLATFORM_USES_ONLY_WIN32_API
# define U_FILE_SEP_CHAR '\\'
# define U_FILE_ALT_SEP_CHAR '/'
# define U_PATH_SEP_CHAR ';'
diff --git a/Source/WebCore/icu/unicode/rep.h b/Source/WebCore/icu/unicode/rep.h
new file mode 100644
index 000000000..4c7eae140
--- /dev/null
+++ b/Source/WebCore/icu/unicode/rep.h
@@ -0,0 +1,261 @@
+/*
+**************************************************************************
+* Copyright (C) 1999-2012, International Business Machines Corporation and
+* others. All Rights Reserved.
+**************************************************************************
+* Date Name Description
+* 11/17/99 aliu Creation. Ported from java. Modified to
+* match current UnicodeString API. Forced
+* to use name "handleReplaceBetween" because
+* of existing methods in UnicodeString.
+**************************************************************************
+*/
+
+#ifndef REP_H
+#define REP_H
+
+#include "unicode/uobject.h"
+
+/**
+ * \file
+ * \brief C++ API: Replaceable String
+ */
+
+U_NAMESPACE_BEGIN
+
+class UnicodeString;
+
+/**
+ * <code>Replaceable</code> is an abstract base class representing a
+ * string of characters that supports the replacement of a range of
+ * itself with a new string of characters. It is used by APIs that
+ * change a piece of text while retaining metadata. Metadata is data
+ * other than the Unicode characters returned by char32At(). One
+ * example of metadata is style attributes; another is an edit
+ * history, marking each character with an author and revision number.
+ *
+ * <p>An implicit aspect of the <code>Replaceable</code> API is that
+ * during a replace operation, new characters take on the metadata of
+ * the old characters. For example, if the string "the <b>bold</b>
+ * font" has range (4, 8) replaced with "strong", then it becomes "the
+ * <b>strong</b> font".
+ *
+ * <p><code>Replaceable</code> specifies ranges using a start
+ * offset and a limit offset. The range of characters thus specified
+ * includes the characters at offset start..limit-1. That is, the
+ * start offset is inclusive, and the limit offset is exclusive.
+ *
+ * <p><code>Replaceable</code> also includes API to access characters
+ * in the string: <code>length()</code>, <code>charAt()</code>,
+ * <code>char32At()</code>, and <code>extractBetween()</code>.
+ *
+ * <p>For a subclass to support metadata, typical behavior of
+ * <code>replace()</code> is the following:
+ * <ul>
+ * <li>Set the metadata of the new text to the metadata of the first
+ * character replaced</li>
+ * <li>If no characters are replaced, use the metadata of the
+ * previous character</li>
+ * <li>If there is no previous character (i.e. start == 0), use the
+ * following character</li>
+ * <li>If there is no following character (i.e. the replaceable was
+ * empty), use default metadata.<br>
+ * <li>If the code point U+FFFF is seen, it should be interpreted as
+ * a special marker having no metadata<li>
+ * </li>
+ * </ul>
+ * If this is not the behavior, the subclass should document any differences.
+ * @author Alan Liu
+ * @stable ICU 2.0
+ */
+class U_COMMON_API Replaceable : public UObject {
+
+public:
+ /**
+ * Destructor.
+ * @stable ICU 2.0
+ */
+ virtual ~Replaceable();
+
+ /**
+ * Returns the number of 16-bit code units in the text.
+ * @return number of 16-bit code units in text
+ * @stable ICU 1.8
+ */
+ inline int32_t length() const;
+
+ /**
+ * Returns the 16-bit code unit at the given offset into the text.
+ * @param offset an integer between 0 and <code>length()</code>-1
+ * inclusive
+ * @return 16-bit code unit of text at given offset
+ * @stable ICU 1.8
+ */
+ inline UChar charAt(int32_t offset) const;
+
+ /**
+ * Returns the 32-bit code point at the given 16-bit offset into
+ * the text. This assumes the text is stored as 16-bit code units
+ * with surrogate pairs intermixed. If the offset of a leading or
+ * trailing code unit of a surrogate pair is given, return the
+ * code point of the surrogate pair.
+ *
+ * @param offset an integer between 0 and <code>length()</code>-1
+ * inclusive
+ * @return 32-bit code point of text at given offset
+ * @stable ICU 1.8
+ */
+ inline UChar32 char32At(int32_t offset) const;
+
+ /**
+ * Copies characters in the range [<tt>start</tt>, <tt>limit</tt>)
+ * into the UnicodeString <tt>target</tt>.
+ * @param start offset of first character which will be copied
+ * @param limit offset immediately following the last character to
+ * be copied
+ * @param target UnicodeString into which to copy characters.
+ * @return A reference to <TT>target</TT>
+ * @stable ICU 2.1
+ */
+ virtual void extractBetween(int32_t start,
+ int32_t limit,
+ UnicodeString& target) const = 0;
+
+ /**
+ * Replaces a substring of this object with the given text. If the
+ * characters being replaced have metadata, the new characters
+ * that replace them should be given the same metadata.
+ *
+ * <p>Subclasses must ensure that if the text between start and
+ * limit is equal to the replacement text, that replace has no
+ * effect. That is, any metadata
+ * should be unaffected. In addition, subclasses are encouraged to
+ * check for initial and trailing identical characters, and make a
+ * smaller replacement if possible. This will preserve as much
+ * metadata as possible.
+ * @param start the beginning index, inclusive; <code>0 <= start
+ * <= limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit
+ * <= length()</code>.
+ * @param text the text to replace characters <code>start</code>
+ * to <code>limit - 1</code>
+ * @stable ICU 2.0
+ */
+ virtual void handleReplaceBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& text) = 0;
+ // Note: All other methods in this class take the names of
+ // existing UnicodeString methods. This method is the exception.
+ // It is named differently because all replace methods of
+ // UnicodeString return a UnicodeString&. The 'between' is
+ // required in order to conform to the UnicodeString naming
+ // convention; API taking start/length are named <operation>, and
+ // those taking start/limit are named <operationBetween>. The
+ // 'handle' is added because 'replaceBetween' and
+ // 'doReplaceBetween' are already taken.
+
+ /**
+ * Copies a substring of this object, retaining metadata.
+ * This method is used to duplicate or reorder substrings.
+ * The destination index must not overlap the source range.
+ *
+ * @param start the beginning index, inclusive; <code>0 <= start <=
+ * limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit <=
+ * length()</code>.
+ * @param dest the destination index. The characters from
+ * <code>start..limit-1</code> will be copied to <code>dest</code>.
+ * Implementations of this method may assume that <code>dest <= start ||
+ * dest >= limit</code>.
+ * @stable ICU 2.0
+ */
+ virtual void copy(int32_t start, int32_t limit, int32_t dest) = 0;
+
+ /**
+ * Returns true if this object contains metadata. If a
+ * Replaceable object has metadata, calls to the Replaceable API
+ * must be made so as to preserve metadata. If it does not, calls
+ * to the Replaceable API may be optimized to improve performance.
+ * The default implementation returns true.
+ * @return true if this object contains metadata
+ * @stable ICU 2.2
+ */
+ virtual UBool hasMetaData() const;
+
+ /**
+ * Clone this object, an instance of a subclass of Replaceable.
+ * Clones can be used concurrently in multiple threads.
+ * If a subclass does not implement clone(), or if an error occurs,
+ * then NULL is returned.
+ * The clone functions in all subclasses return a pointer to a Replaceable
+ * because some compilers do not support covariant (same-as-this)
+ * return types; cast to the appropriate subclass if necessary.
+ * The caller must delete the clone.
+ *
+ * @return a clone of this object
+ *
+ * @see getDynamicClassID
+ * @stable ICU 2.6
+ */
+ virtual Replaceable *clone() const;
+
+protected:
+
+ /**
+ * Default constructor.
+ * @stable ICU 2.4
+ */
+ inline Replaceable();
+
+ /*
+ * Assignment operator not declared. The compiler will provide one
+ * which does nothing since this class does not contain any data members.
+ * API/code coverage may show the assignment operator as present and
+ * untested - ignore.
+ * Subclasses need this assignment operator if they use compiler-provided
+ * assignment operators of their own. An alternative to not declaring one
+ * here would be to declare and empty-implement a protected or public one.
+ Replaceable &Replaceable::operator=(const Replaceable &);
+ */
+
+ /**
+ * Virtual version of length().
+ * @stable ICU 2.4
+ */
+ virtual int32_t getLength() const = 0;
+
+ /**
+ * Virtual version of charAt().
+ * @stable ICU 2.4
+ */
+ virtual UChar getCharAt(int32_t offset) const = 0;
+
+ /**
+ * Virtual version of char32At().
+ * @stable ICU 2.4
+ */
+ virtual UChar32 getChar32At(int32_t offset) const = 0;
+};
+
+inline Replaceable::Replaceable() {}
+
+inline int32_t
+Replaceable::length() const {
+ return getLength();
+}
+
+inline UChar
+Replaceable::charAt(int32_t offset) const {
+ return getCharAt(offset);
+}
+
+inline UChar32
+Replaceable::char32At(int32_t offset) const {
+ return getChar32At(offset);
+}
+
+// There is no rep.cpp, see unistr.cpp for Replaceable function implementations.
+
+U_NAMESPACE_END
+
+#endif
diff --git a/Source/WebCore/icu/unicode/std_string.h b/Source/WebCore/icu/unicode/std_string.h
new file mode 100644
index 000000000..67b1d6c5a
--- /dev/null
+++ b/Source/WebCore/icu/unicode/std_string.h
@@ -0,0 +1,34 @@
+/*
+*******************************************************************************
+*
+* Copyright (C) 2009-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: std_string.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2009feb19
+* created by: Markus W. Scherer
+*/
+
+#ifndef __STD_STRING_H__
+#define __STD_STRING_H__
+
+/**
+ * \file
+ * \brief C++ API: Central ICU header for including the C++ standard &lt;string&gt;
+ * header and for related definitions.
+ */
+
+#include "unicode/utypes.h"
+
+#if U_HAVE_STD_STRING
+
+#include <string>
+
+#endif // U_HAVE_STD_STRING
+
+#endif // __STD_STRING_H__
diff --git a/Source/WebCore/icu/unicode/strenum.h b/Source/WebCore/icu/unicode/strenum.h
new file mode 100644
index 000000000..3dbe21c6b
--- /dev/null
+++ b/Source/WebCore/icu/unicode/strenum.h
@@ -0,0 +1,276 @@
+/*
+*******************************************************************************
+*
+* Copyright (C) 2002-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+*/
+
+#ifndef STRENUM_H
+#define STRENUM_H
+
+#include "unicode/uobject.h"
+#include "unicode/unistr.h"
+
+/**
+ * \file
+ * \brief C++ API: String Enumeration
+ */
+
+U_NAMESPACE_BEGIN
+
+/**
+ * Base class for 'pure' C++ implementations of uenum api. Adds a
+ * method that returns the next UnicodeString since in C++ this can
+ * be a common storage format for strings.
+ *
+ * <p>The model is that the enumeration is over strings maintained by
+ * a 'service.' At any point, the service might change, invalidating
+ * the enumerator (though this is expected to be rare). The iterator
+ * returns an error if this has occurred. Lack of the error is no
+ * guarantee that the service didn't change immediately after the
+ * call, so the returned string still might not be 'valid' on
+ * subsequent use.</p>
+ *
+ * <p>Strings may take the form of const char*, const UChar*, or const
+ * UnicodeString*. The type you get is determine by the variant of
+ * 'next' that you call. In general the StringEnumeration is
+ * optimized for one of these types, but all StringEnumerations can
+ * return all types. Returned strings are each terminated with a NUL.
+ * Depending on the service data, they might also include embedded NUL
+ * characters, so API is provided to optionally return the true
+ * length, counting the embedded NULs but not counting the terminating
+ * NUL.</p>
+ *
+ * <p>The pointers returned by next, unext, and snext become invalid
+ * upon any subsequent call to the enumeration's destructor, next,
+ * unext, snext, or reset.</p>
+ *
+ * ICU 2.8 adds some default implementations and helper functions
+ * for subclasses.
+ *
+ * @stable ICU 2.4
+ */
+class U_COMMON_API StringEnumeration : public UObject {
+public:
+ /**
+ * Destructor.
+ * @stable ICU 2.4
+ */
+ virtual ~StringEnumeration();
+
+ /**
+ * Clone this object, an instance of a subclass of StringEnumeration.
+ * Clones can be used concurrently in multiple threads.
+ * If a subclass does not implement clone(), or if an error occurs,
+ * then NULL is returned.
+ * The clone functions in all subclasses return a base class pointer
+ * because some compilers do not support covariant (same-as-this)
+ * return types; cast to the appropriate subclass if necessary.
+ * The caller must delete the clone.
+ *
+ * @return a clone of this object
+ *
+ * @see getDynamicClassID
+ * @stable ICU 2.8
+ */
+ virtual StringEnumeration *clone() const;
+
+ /**
+ * <p>Return the number of elements that the iterator traverses. If
+ * the iterator is out of sync with its service, status is set to
+ * U_ENUM_OUT_OF_SYNC_ERROR, and the return value is zero.</p>
+ *
+ * <p>The return value will not change except possibly as a result of
+ * a subsequent call to reset, or if the iterator becomes out of sync.</p>
+ *
+ * <p>This is a convenience function. It can end up being very
+ * expensive as all the items might have to be pre-fetched
+ * (depending on the storage format of the data being
+ * traversed).</p>
+ *
+ * @param status the error code.
+ * @return number of elements in the iterator.
+ *
+ * @stable ICU 2.4 */
+ virtual int32_t count(UErrorCode& status) const = 0;
+
+ /**
+ * <p>Returns the next element as a NUL-terminated char*. If there
+ * are no more elements, returns NULL. If the resultLength pointer
+ * is not NULL, the length of the string (not counting the
+ * terminating NUL) is returned at that address. If an error
+ * status is returned, the value at resultLength is undefined.</p>
+ *
+ * <p>The returned pointer is owned by this iterator and must not be
+ * deleted by the caller. The pointer is valid until the next call
+ * to next, unext, snext, reset, or the enumerator's destructor.</p>
+ *
+ * <p>If the iterator is out of sync with its service, status is set
+ * to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.</p>
+ *
+ * <p>If the native service string is a UChar* string, it is
+ * converted to char* with the invariant converter. If the
+ * conversion fails (because a character cannot be converted) then
+ * status is set to U_INVARIANT_CONVERSION_ERROR and the return
+ * value is undefined (though not NULL).</p>
+ *
+ * Starting with ICU 2.8, the default implementation calls snext()
+ * and handles the conversion.
+ * Either next() or snext() must be implemented differently by a subclass.
+ *
+ * @param status the error code.
+ * @param resultLength a pointer to receive the length, can be NULL.
+ * @return a pointer to the string, or NULL.
+ *
+ * @stable ICU 2.4
+ */
+ virtual const char* next(int32_t *resultLength, UErrorCode& status);
+
+ /**
+ * <p>Returns the next element as a NUL-terminated UChar*. If there
+ * are no more elements, returns NULL. If the resultLength pointer
+ * is not NULL, the length of the string (not counting the
+ * terminating NUL) is returned at that address. If an error
+ * status is returned, the value at resultLength is undefined.</p>
+ *
+ * <p>The returned pointer is owned by this iterator and must not be
+ * deleted by the caller. The pointer is valid until the next call
+ * to next, unext, snext, reset, or the enumerator's destructor.</p>
+ *
+ * <p>If the iterator is out of sync with its service, status is set
+ * to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.</p>
+ *
+ * Starting with ICU 2.8, the default implementation calls snext()
+ * and handles the conversion.
+ *
+ * @param status the error code.
+ * @param resultLength a ponter to receive the length, can be NULL.
+ * @return a pointer to the string, or NULL.
+ *
+ * @stable ICU 2.4
+ */
+ virtual const UChar* unext(int32_t *resultLength, UErrorCode& status);
+
+ /**
+ * <p>Returns the next element a UnicodeString*. If there are no
+ * more elements, returns NULL.</p>
+ *
+ * <p>The returned pointer is owned by this iterator and must not be
+ * deleted by the caller. The pointer is valid until the next call
+ * to next, unext, snext, reset, or the enumerator's destructor.</p>
+ *
+ * <p>If the iterator is out of sync with its service, status is set
+ * to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.</p>
+ *
+ * Starting with ICU 2.8, the default implementation calls next()
+ * and handles the conversion.
+ * Either next() or snext() must be implemented differently by a subclass.
+ *
+ * @param status the error code.
+ * @return a pointer to the string, or NULL.
+ *
+ * @stable ICU 2.4
+ */
+ virtual const UnicodeString* snext(UErrorCode& status);
+
+ /**
+ * <p>Resets the iterator. This re-establishes sync with the
+ * service and rewinds the iterator to start at the first
+ * element.</p>
+ *
+ * <p>Previous pointers returned by next, unext, or snext become
+ * invalid, and the value returned by count might change.</p>
+ *
+ * @param status the error code.
+ *
+ * @stable ICU 2.4
+ */
+ virtual void reset(UErrorCode& status) = 0;
+
+ /**
+ * Compares this enumeration to other to check if both are equal
+ *
+ * @param that The other string enumeration to compare this object to
+ * @return TRUE if the enumerations are equal. FALSE if not.
+ * @stable ICU 3.6
+ */
+ virtual UBool operator==(const StringEnumeration& that)const;
+ /**
+ * Compares this enumeration to other to check if both are not equal
+ *
+ * @param that The other string enumeration to compare this object to
+ * @return TRUE if the enumerations are equal. FALSE if not.
+ * @stable ICU 3.6
+ */
+ virtual UBool operator!=(const StringEnumeration& that)const;
+
+protected:
+ /**
+ * UnicodeString field for use with default implementations and subclasses.
+ * @stable ICU 2.8
+ */
+ UnicodeString unistr;
+ /**
+ * char * default buffer for use with default implementations and subclasses.
+ * @stable ICU 2.8
+ */
+ char charsBuffer[32];
+ /**
+ * char * buffer for use with default implementations and subclasses.
+ * Allocated in constructor and in ensureCharsCapacity().
+ * @stable ICU 2.8
+ */
+ char *chars;
+ /**
+ * Capacity of chars, for use with default implementations and subclasses.
+ * @stable ICU 2.8
+ */
+ int32_t charsCapacity;
+
+ /**
+ * Default constructor for use with default implementations and subclasses.
+ * @stable ICU 2.8
+ */
+ StringEnumeration();
+
+ /**
+ * Ensures that chars is at least as large as the requested capacity.
+ * For use with default implementations and subclasses.
+ *
+ * @param capacity Requested capacity.
+ * @param status ICU in/out error code.
+ * @stable ICU 2.8
+ */
+ void ensureCharsCapacity(int32_t capacity, UErrorCode &status);
+
+ /**
+ * Converts s to Unicode and sets unistr to the result.
+ * For use with default implementations and subclasses,
+ * especially for implementations of snext() in terms of next().
+ * This is provided with a helper function instead of a default implementation
+ * of snext() to avoid potential infinite loops between next() and snext().
+ *
+ * For example:
+ * \code
+ * const UnicodeString* snext(UErrorCode& status) {
+ * int32_t resultLength=0;
+ * const char *s=next(&resultLength, status);
+ * return setChars(s, resultLength, status);
+ * }
+ * \endcode
+ *
+ * @param s String to be converted to Unicode.
+ * @param length Length of the string.
+ * @param status ICU in/out error code.
+ * @return A pointer to unistr.
+ * @stable ICU 2.8
+ */
+ UnicodeString *setChars(const char *s, int32_t length, UErrorCode &status);
+};
+
+U_NAMESPACE_END
+
+/* STRENUM_H */
+#endif
diff --git a/Source/WebCore/icu/unicode/stringpiece.h b/Source/WebCore/icu/unicode/stringpiece.h
new file mode 100644
index 000000000..b29571d4a
--- /dev/null
+++ b/Source/WebCore/icu/unicode/stringpiece.h
@@ -0,0 +1,224 @@
+// Copyright (C) 2009-2013, International Business Machines
+// Corporation and others. All Rights Reserved.
+//
+// Copyright 2001 and onwards Google Inc.
+// Author: Sanjay Ghemawat
+
+// This code is a contribution of Google code, and the style used here is
+// a compromise between the original Google code and the ICU coding guidelines.
+// For example, data types are ICU-ified (size_t,int->int32_t),
+// and API comments doxygen-ified, but function names and behavior are
+// as in the original, if possible.
+// Assertion-style error handling, not available in ICU, was changed to
+// parameter "pinning" similar to UnicodeString.
+//
+// In addition, this is only a partial port of the original Google code,
+// limited to what was needed so far. The (nearly) complete original code
+// is in the ICU svn repository at icuhtml/trunk/design/strings/contrib
+// (see ICU ticket 6765, r25517).
+
+#ifndef __STRINGPIECE_H__
+#define __STRINGPIECE_H__
+
+/**
+ * \file
+ * \brief C++ API: StringPiece: Read-only byte string wrapper class.
+ */
+
+#include "unicode/utypes.h"
+#include "unicode/uobject.h"
+#include "unicode/std_string.h"
+
+// Arghh! I wish C++ literals were "string".
+
+U_NAMESPACE_BEGIN
+
+/**
+ * A string-like object that points to a sized piece of memory.
+ *
+ * We provide non-explicit singleton constructors so users can pass
+ * in a "const char*" or a "string" wherever a "StringPiece" is
+ * expected.
+ *
+ * Functions or methods may use const StringPiece& parameters to accept either
+ * a "const char*" or a "string" value that will be implicitly converted to
+ * a StringPiece.
+ *
+ * Systematic usage of StringPiece is encouraged as it will reduce unnecessary
+ * conversions from "const char*" to "string" and back again.
+ *
+ * @stable ICU 4.2
+ */
+class U_COMMON_API StringPiece : public UMemory {
+ private:
+ const char* ptr_;
+ int32_t length_;
+
+ public:
+ /**
+ * Default constructor, creates an empty StringPiece.
+ * @stable ICU 4.2
+ */
+ StringPiece() : ptr_(NULL), length_(0) { }
+ /**
+ * Constructs from a NUL-terminated const char * pointer.
+ * @param str a NUL-terminated const char * pointer
+ * @stable ICU 4.2
+ */
+ StringPiece(const char* str);
+#if U_HAVE_STD_STRING
+ /**
+ * Constructs from a std::string.
+ * @stable ICU 4.2
+ */
+ StringPiece(const std::string& str)
+ : ptr_(str.data()), length_(static_cast<int32_t>(str.size())) { }
+#endif
+ /**
+ * Constructs from a const char * pointer and a specified length.
+ * @param offset a const char * pointer (need not be terminated)
+ * @param len the length of the string; must be non-negative
+ * @stable ICU 4.2
+ */
+ StringPiece(const char* offset, int32_t len) : ptr_(offset), length_(len) { }
+ /**
+ * Substring of another StringPiece.
+ * @param x the other StringPiece
+ * @param pos start position in x; must be non-negative and <= x.length().
+ * @stable ICU 4.2
+ */
+ StringPiece(const StringPiece& x, int32_t pos);
+ /**
+ * Substring of another StringPiece.
+ * @param x the other StringPiece
+ * @param pos start position in x; must be non-negative and <= x.length().
+ * @param len length of the substring;
+ * must be non-negative and will be pinned to at most x.length() - pos.
+ * @stable ICU 4.2
+ */
+ StringPiece(const StringPiece& x, int32_t pos, int32_t len);
+
+ /**
+ * Returns the string pointer. May be NULL if it is empty.
+ *
+ * data() may return a pointer to a buffer with embedded NULs, and the
+ * returned buffer may or may not be null terminated. Therefore it is
+ * typically a mistake to pass data() to a routine that expects a NUL
+ * terminated string.
+ * @return the string pointer
+ * @stable ICU 4.2
+ */
+ const char* data() const { return ptr_; }
+ /**
+ * Returns the string length. Same as length().
+ * @return the string length
+ * @stable ICU 4.2
+ */
+ int32_t size() const { return length_; }
+ /**
+ * Returns the string length. Same as size().
+ * @return the string length
+ * @stable ICU 4.2
+ */
+ int32_t length() const { return length_; }
+ /**
+ * Returns whether the string is empty.
+ * @return TRUE if the string is empty
+ * @stable ICU 4.2
+ */
+ UBool empty() const { return length_ == 0; }
+
+ /**
+ * Sets to an empty string.
+ * @stable ICU 4.2
+ */
+ void clear() { ptr_ = NULL; length_ = 0; }
+
+ /**
+ * Reset the stringpiece to refer to new data.
+ * @param xdata pointer the new string data. Need not be nul terminated.
+ * @param len the length of the new data
+ * @stable ICU 4.8
+ */
+ void set(const char* xdata, int32_t len) { ptr_ = xdata; length_ = len; }
+
+ /**
+ * Reset the stringpiece to refer to new data.
+ * @param str a pointer to a NUL-terminated string.
+ * @stable ICU 4.8
+ */
+ void set(const char* str);
+
+ /**
+ * Removes the first n string units.
+ * @param n prefix length, must be non-negative and <=length()
+ * @stable ICU 4.2
+ */
+ void remove_prefix(int32_t n) {
+ if (n >= 0) {
+ if (n > length_) {
+ n = length_;
+ }
+ ptr_ += n;
+ length_ -= n;
+ }
+ }
+
+ /**
+ * Removes the last n string units.
+ * @param n suffix length, must be non-negative and <=length()
+ * @stable ICU 4.2
+ */
+ void remove_suffix(int32_t n) {
+ if (n >= 0) {
+ if (n <= length_) {
+ length_ -= n;
+ } else {
+ length_ = 0;
+ }
+ }
+ }
+
+ /**
+ * Maximum integer, used as a default value for substring methods.
+ * @stable ICU 4.2
+ */
+ static const int32_t npos; // = 0x7fffffff;
+
+ /**
+ * Returns a substring of this StringPiece.
+ * @param pos start position; must be non-negative and <= length().
+ * @param len length of the substring;
+ * must be non-negative and will be pinned to at most length() - pos.
+ * @return the substring StringPiece
+ * @stable ICU 4.2
+ */
+ StringPiece substr(int32_t pos, int32_t len = npos) const {
+ return StringPiece(*this, pos, len);
+ }
+};
+
+/**
+ * Global operator == for StringPiece
+ * @param x The first StringPiece to compare.
+ * @param y The second StringPiece to compare.
+ * @return TRUE if the string data is equal
+ * @stable ICU 4.8
+ */
+U_EXPORT UBool U_EXPORT2
+operator==(const StringPiece& x, const StringPiece& y);
+
+/**
+ * Global operator != for StringPiece
+ * @param x The first StringPiece to compare.
+ * @param y The second StringPiece to compare.
+ * @return TRUE if the string data is not equal
+ * @stable ICU 4.8
+ */
+inline UBool operator!=(const StringPiece& x, const StringPiece& y) {
+ return !(x == y);
+}
+
+U_NAMESPACE_END
+
+#endif // __STRINGPIECE_H__
diff --git a/Source/WebCore/icu/unicode/ubrk.h b/Source/WebCore/icu/unicode/ubrk.h
index f8304a662..e9c0ef627 100644
--- a/Source/WebCore/icu/unicode/ubrk.h
+++ b/Source/WebCore/icu/unicode/ubrk.h
@@ -1,6 +1,6 @@
/*
******************************************************************************
-* Copyright (C) 1996-2010, International Business Machines Corporation and others.
+* Copyright (C) 1996-2013, International Business Machines Corporation and others.
* All Rights Reserved.
******************************************************************************
*/
@@ -243,10 +243,12 @@ ubrk_openRules(const UChar *rules,
/**
* Thread safe cloning operation
* @param bi iterator to be cloned
- * @param stackBuffer user allocated space for the new clone. If NULL new memory will be allocated.
+ * @param stackBuffer <em>Deprecated functionality as of ICU 52, use NULL.</em><br>
+ * user allocated space for the new clone. If NULL new memory will be allocated.
* If buffer is not large enough, new memory will be allocated.
- * Clients can use the U_BRK_SAFECLONE_BUFFERSIZE. This will probably be enough to avoid memory allocations.
- * @param pBufferSize pointer to size of allocated space.
+ * Clients can use the U_BRK_SAFECLONE_BUFFERSIZE.
+ * @param pBufferSize <em>Deprecated functionality as of ICU 52, use NULL or 1.</em><br>
+ * pointer to size of allocated space.
* If *pBufferSize == 0, a sufficient size for use in cloning will
* be returned ('pre-flighting')
* If *pBufferSize is not enough for a stack-based safe clone,
@@ -263,11 +265,15 @@ ubrk_safeClone(
int32_t *pBufferSize,
UErrorCode *status);
+#ifndef U_HIDE_DEPRECATED_API
+
/**
* A recommended size (in bytes) for the memory buffer to be passed to ubrk_saveClone().
- * @stable ICU 2.0
+ * @deprecated ICU 52. Do not rely on ubrk_safeClone() cloning into any provided buffer.
*/
-#define U_BRK_SAFECLONE_BUFFERSIZE 512
+#define U_BRK_SAFECLONE_BUFFERSIZE 1
+
+#endif /* U_HIDE_DEPRECATED_API */
/**
* Close a UBreakIterator.
@@ -313,7 +319,13 @@ ubrk_setText(UBreakIterator* bi,
/**
- * Sets an existing iterator to point to a new piece of text
+ * Sets an existing iterator to point to a new piece of text.
+ *
+ * All index positions returned by break iterator functions are
+ * native indices from the UText. For example, when breaking UTF-8
+ * encoded text, the break positions returned by \ref ubrk_next, \ref ubrk_previous, etc.
+ * will be UTF-8 string indices, not UTF-16 positions.
+ *
* @param bi The iterator to use
* @param text The text to be set.
* This function makes a shallow clone of the supplied UText. This means
@@ -342,7 +354,7 @@ U_STABLE int32_t U_EXPORT2
ubrk_current(const UBreakIterator *bi);
/**
- * Determine the text boundary following the current text boundary.
+ * Advance the iterator to the boundary following the current boundary.
*
* @param bi The break iterator to use.
* @return The character index of the next text boundary, or UBRK_DONE
@@ -354,7 +366,7 @@ U_STABLE int32_t U_EXPORT2
ubrk_next(UBreakIterator *bi);
/**
- * Determine the text boundary preceding the current text boundary.
+ * Set the iterator position to the boundary preceding the current boundary.
*
* @param bi The break iterator to use.
* @return The character index of the preceding text boundary, or UBRK_DONE
@@ -366,7 +378,7 @@ U_STABLE int32_t U_EXPORT2
ubrk_previous(UBreakIterator *bi);
/**
- * Determine the index of the first character in the text being scanned.
+ * Set the iterator position to the index of the first character in the text being scanned.
* This is not always the same as index 0 of the text.
* @param bi The break iterator to use.
* @return The character index of the first character in the text being scanned.
@@ -377,8 +389,7 @@ U_STABLE int32_t U_EXPORT2
ubrk_first(UBreakIterator *bi);
/**
- * Determine the index immediately <EM>beyond</EM> the last character in the text being
- * scanned.
+ * Set the iterator position to the index immediately <EM>beyond</EM> the last character in the text being scanned.
* This is not the same as the last character.
* @param bi The break iterator to use.
* @return The character offset immediately <EM>beyond</EM> the last character in the
@@ -390,8 +401,8 @@ U_STABLE int32_t U_EXPORT2
ubrk_last(UBreakIterator *bi);
/**
- * Determine the text boundary preceding the specified offset.
- * The value returned is always smaller than offset, or UBRK_DONE.
+ * Set the iterator position to the first boundary preceding the specified offset.
+ * The new position is always smaller than offset, or UBRK_DONE.
* @param bi The break iterator to use.
* @param offset The offset to begin scanning.
* @return The text boundary preceding offset, or UBRK_DONE.
@@ -403,7 +414,7 @@ ubrk_preceding(UBreakIterator *bi,
int32_t offset);
/**
- * Determine the text boundary following the specified offset.
+ * Advance the iterator to the first boundary following the specified offset.
* The value returned is always greater than offset, or UBRK_DONE.
* @param bi The break iterator to use.
* @param offset The offset to begin scanning.
@@ -495,6 +506,35 @@ ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity,
U_STABLE const char* U_EXPORT2
ubrk_getLocaleByType(const UBreakIterator *bi, ULocDataLocaleType type, UErrorCode* status);
+/**
+ * Set the subject text string upon which the break iterator is operating
+ * without changing any other aspect of the state.
+ * The new and previous text strings must have the same content.
+ *
+ * This function is intended for use in environments where ICU is operating on
+ * strings that may move around in memory. It provides a mechanism for notifying
+ * ICU that the string has been relocated, and providing a new UText to access the
+ * string in its new position.
+ *
+ * Note that the break iterator never copies the underlying text
+ * of a string being processed, but always operates directly on the original text
+ * provided by the user. Refreshing simply drops the references to the old text
+ * and replaces them with references to the new.
+ *
+ * Caution: this function is normally used only by very specialized
+ * system-level code. One example use case is with garbage collection
+ * that moves the text in memory.
+ *
+ * @param bi The break iterator.
+ * @param text The new (moved) text string.
+ * @param status Receives errors detected by this function.
+ *
+ * @stable ICU 49
+ */
+U_STABLE void U_EXPORT2
+ubrk_refreshUText(UBreakIterator *bi,
+ UText *text,
+ UErrorCode *status);
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
diff --git a/Source/WebCore/icu/unicode/uchar.h b/Source/WebCore/icu/unicode/uchar.h
index 93aa66320..1a5b71b46 100644
--- a/Source/WebCore/icu/unicode/uchar.h
+++ b/Source/WebCore/icu/unicode/uchar.h
@@ -1,6 +1,6 @@
/*
**********************************************************************
-* Copyright (C) 1997-2010, International Business Machines
+* Copyright (C) 1997-2013, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*
@@ -39,7 +39,7 @@ U_CDECL_BEGIN
* @see u_getUnicodeVersion
* @stable ICU 2.0
*/
-#define U_UNICODE_VERSION "6.0"
+#define U_UNICODE_VERSION "6.3"
/**
* \file
@@ -139,19 +139,6 @@ U_CDECL_BEGIN
*/
#define U_MASK(x) ((uint32_t)1<<(x))
-/*
- * !! Note: Several comments in this file are machine-read by the
- * genpname tool. These comments describe the correspondence between
- * icu enum constants and UCD entities. Do not delete them. Update
- * these comments as needed.
- *
- * Any comment of the form "/ *[name]* /" (spaces added) is such
- * a comment.
- *
- * The U_JG_* and U_GC_*_MASK constants are matched by their symbolic
- * name, which must match PropertyValueAliases.txt.
- */
-
/**
* Selection constants for Unicode properties.
* These constants are used in functions like u_hasBinaryProperty to select
@@ -172,9 +159,11 @@ U_CDECL_BEGIN
* @stable ICU 2.1
*/
typedef enum UProperty {
- /* See note !!. Comments of the form "Binary property Dash",
- "Enumerated property Script", "Double property Numeric_Value",
- and "String property Age" are read by genpname. */
+ /*
+ * Note: UProperty constants are parsed by preparseucd.py.
+ * It matches lines like
+ * UCHAR_<Unicode property name>=<integer>,
+ */
/* Note: Place UCHAR_ALPHABETIC before UCHAR_BINARY_START so that
debuggers display UCHAR_ALPHABETIC as the symbolic name for 0,
@@ -491,8 +480,13 @@ typedef enum UProperty {
(http://www.unicode.org/reports/tr29/)
Returns UWordBreakValues values. @stable ICU 3.4 */
UCHAR_WORD_BREAK=0x1014,
+ /** Enumerated property Bidi_Paired_Bracket_Type (new in Unicode 6.3).
+ Used in UAX #9: Unicode Bidirectional Algorithm
+ (http://www.unicode.org/reports/tr9/)
+ Returns UBidiPairedBracketType values. @stable ICU 52 */
+ UCHAR_BIDI_PAIRED_BRACKET_TYPE=0x1015,
/** One more than the last constant for enumerated/integer Unicode properties. @stable ICU 2.2 */
- UCHAR_INT_LIMIT=0x1015,
+ UCHAR_INT_LIMIT=0x1016,
/** Bitmask property General_Category_Mask.
This is the General_Category property returned as a bit mask.
@@ -527,9 +521,11 @@ typedef enum UProperty {
/** String property Case_Folding.
Corresponds to u_strFoldCase in ustring.h. @stable ICU 2.4 */
UCHAR_CASE_FOLDING=0x4002,
- /** String property ISO_Comment.
- Corresponds to u_getISOComment. @stable ICU 2.4 */
+#ifndef U_HIDE_DEPRECATED_API
+ /** Deprecated string property ISO_Comment.
+ Corresponds to u_getISOComment. @deprecated ICU 49 */
UCHAR_ISO_COMMENT=0x4003,
+#endif /* U_HIDE_DEPRECATED_API */
/** String property Lowercase_Mapping.
Corresponds to u_strToLower in ustring.h. @stable ICU 2.4 */
UCHAR_LOWERCASE_MAPPING=0x4004,
@@ -551,29 +547,33 @@ typedef enum UProperty {
/** String property Titlecase_Mapping.
Corresponds to u_strToTitle in ustring.h. @stable ICU 2.4 */
UCHAR_TITLECASE_MAPPING=0x400A,
+#ifndef U_HIDE_DEPRECATED_API
/** String property Unicode_1_Name.
- Corresponds to u_charName. @stable ICU 2.4 */
+ This property is of little practical value.
+ Beginning with ICU 49, ICU APIs return an empty string for this property.
+ Corresponds to u_charName(U_UNICODE_10_CHAR_NAME). @deprecated ICU 49 */
UCHAR_UNICODE_1_NAME=0x400B,
+#endif /* U_HIDE_DEPRECATED_API */
/** String property Uppercase_Mapping.
Corresponds to u_strToUpper in ustring.h. @stable ICU 2.4 */
UCHAR_UPPERCASE_MAPPING=0x400C,
+ /** String property Bidi_Paired_Bracket (new in Unicode 6.3).
+ Corresponds to u_getBidiPairedBracket. @stable ICU 52 */
+ UCHAR_BIDI_PAIRED_BRACKET=0x400D,
/** One more than the last constant for string Unicode properties. @stable ICU 2.4 */
- UCHAR_STRING_LIMIT=0x400D,
+ UCHAR_STRING_LIMIT=0x400E,
- /** Provisional property Script_Extensions (new in Unicode 6.0).
- As a provisional property, it may be modified or removed
- in future versions of the Unicode Standard, and thus in ICU.
+ /** Miscellaneous property Script_Extensions (new in Unicode 6.0).
Some characters are commonly used in multiple scripts.
For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
Corresponds to uscript_hasScript and uscript_getScriptExtensions in uscript.h.
- @draft ICU 4.6 */
+ @stable ICU 4.6 */
UCHAR_SCRIPT_EXTENSIONS=0x7000,
- /** First constant for Unicode properties with unusual value types. @draft ICU 4.6 */
+ /** First constant for Unicode properties with unusual value types. @stable ICU 4.6 */
UCHAR_OTHER_PROPERTY_START=UCHAR_SCRIPT_EXTENSIONS,
/** One more than the last constant for Unicode properties with unusual value types.
- * @draft ICU 4.6 */
+ * @stable ICU 4.6 */
UCHAR_OTHER_PROPERTY_LIMIT=0x7001,
-
/** Represents a nonexistent or invalid property or property value. @stable ICU 2.4 */
UCHAR_INVALID_CODE = -1
} UProperty;
@@ -585,7 +585,12 @@ typedef enum UProperty {
*/
typedef enum UCharCategory
{
- /** See note !!. Comments of the form "Cn" are read by genpname. */
+ /*
+ * Note: UCharCategory constants and their API comments are parsed by preparseucd.py.
+ * It matches pairs of lines like
+ * / ** <Unicode 2-letter General_Category value> comment... * /
+ * U_<[A-Z_]+> = <integer>,
+ */
/** Non-category for unassigned and non-character code points. @stable ICU 2.0 */
U_UNASSIGNED = 0,
@@ -770,7 +775,12 @@ typedef enum UCharCategory
* @stable ICU 2.0
*/
typedef enum UCharDirection {
- /** See note !!. Comments of the form "EN" are read by genpname. */
+ /*
+ * Note: UCharDirection constants and their API comments are parsed by preparseucd.py.
+ * It matches pairs of lines like
+ * / ** <Unicode 1..3-letter Bidi_Class value> comment... * /
+ * U_<[A-Z_]+> = <integer>,
+ */
/** L @stable ICU 2.0 */
U_LEFT_TO_RIGHT = 0,
@@ -810,21 +820,57 @@ typedef enum UCharDirection {
U_DIR_NON_SPACING_MARK = 17,
/** BN @stable ICU 2.0 */
U_BOUNDARY_NEUTRAL = 18,
+ /** FSI @stable ICU 52 */
+ U_FIRST_STRONG_ISOLATE = 19,
+ /** LRI @stable ICU 52 */
+ U_LEFT_TO_RIGHT_ISOLATE = 20,
+ /** RLI @stable ICU 52 */
+ U_RIGHT_TO_LEFT_ISOLATE = 21,
+ /** PDI @stable ICU 52 */
+ U_POP_DIRECTIONAL_ISOLATE = 22,
/** @stable ICU 2.0 */
U_CHAR_DIRECTION_COUNT
} UCharDirection;
/**
+ * Bidi Paired Bracket Type constants.
+ *
+ * @see UCHAR_BIDI_PAIRED_BRACKET_TYPE
+ * @stable ICU 52
+ */
+typedef enum UBidiPairedBracketType {
+ /*
+ * Note: UBidiPairedBracketType constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_BPT_<Unicode Bidi_Paired_Bracket_Type value name>
+ */
+
+ /** Not a paired bracket. @stable ICU 52 */
+ U_BPT_NONE,
+ /** Open paired bracket. @stable ICU 52 */
+ U_BPT_OPEN,
+ /** Close paired bracket. @stable ICU 52 */
+ U_BPT_CLOSE,
+ /** @stable ICU 52 */
+ U_BPT_COUNT /* 3 */
+} UBidiPairedBracketType;
+
+/**
* Constants for Unicode blocks, see the Unicode Data file Blocks.txt
* @stable ICU 2.0
*/
enum UBlockCode {
+ /*
+ * Note: UBlockCode constants are parsed by preparseucd.py.
+ * It matches lines like
+ * UBLOCK_<Unicode Block value name> = <integer>,
+ */
/** New No_Block value in Unicode 4. @stable ICU 2.6 */
UBLOCK_NO_BLOCK = 0, /*[none]*/ /* Special range indicating No_Block */
/** @stable ICU 2.0 */
- UBLOCK_BASIC_LATIN = 1, /*[0000]*/ /*See note !!*/
+ UBLOCK_BASIC_LATIN = 1, /*[0000]*/
/** @stable ICU 2.0 */
UBLOCK_LATIN_1_SUPPLEMENT=2, /*[0080]*/
@@ -1061,7 +1107,7 @@ enum UBlockCode {
UBLOCK_LOW_SURROGATES =77, /*[DC00]*/
/**
- * Same as UBLOCK_PRIVATE_USE_AREA.
+ * Same as UBLOCK_PRIVATE_USE.
* Until Unicode 3.1.1, the corresponding block name was "Private Use",
* and multiple code point ranges had this block.
* Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and
@@ -1069,9 +1115,9 @@ enum UBlockCode {
*
* @stable ICU 2.0
*/
- UBLOCK_PRIVATE_USE = 78,
+ UBLOCK_PRIVATE_USE_AREA =78, /*[E000]*/
/**
- * Same as UBLOCK_PRIVATE_USE.
+ * Same as UBLOCK_PRIVATE_USE_AREA.
* Until Unicode 3.1.1, the corresponding block name was "Private Use",
* and multiple code point ranges had this block.
* Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and
@@ -1079,7 +1125,7 @@ enum UBlockCode {
*
* @stable ICU 2.0
*/
- UBLOCK_PRIVATE_USE_AREA =UBLOCK_PRIVATE_USE, /*[E000]*/
+ UBLOCK_PRIVATE_USE = UBLOCK_PRIVATE_USE_AREA,
/** @stable ICU 2.0 */
UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS =79, /*[F900]*/
@@ -1111,33 +1157,33 @@ enum UBlockCode {
/* New blocks in Unicode 3.1 */
/** @stable ICU 2.0 */
- UBLOCK_OLD_ITALIC = 88 , /*[10300]*/
+ UBLOCK_OLD_ITALIC = 88, /*[10300]*/
/** @stable ICU 2.0 */
- UBLOCK_GOTHIC = 89 , /*[10330]*/
+ UBLOCK_GOTHIC = 89, /*[10330]*/
/** @stable ICU 2.0 */
- UBLOCK_DESERET = 90 , /*[10400]*/
+ UBLOCK_DESERET = 90, /*[10400]*/
/** @stable ICU 2.0 */
- UBLOCK_BYZANTINE_MUSICAL_SYMBOLS = 91 , /*[1D000]*/
+ UBLOCK_BYZANTINE_MUSICAL_SYMBOLS = 91, /*[1D000]*/
/** @stable ICU 2.0 */
- UBLOCK_MUSICAL_SYMBOLS = 92 , /*[1D100]*/
+ UBLOCK_MUSICAL_SYMBOLS = 92, /*[1D100]*/
/** @stable ICU 2.0 */
- UBLOCK_MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 93 , /*[1D400]*/
+ UBLOCK_MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 93, /*[1D400]*/
/** @stable ICU 2.0 */
- UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 94 , /*[20000]*/
+ UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 94, /*[20000]*/
/** @stable ICU 2.0 */
- UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 95 , /*[2F800]*/
+ UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 95, /*[2F800]*/
/** @stable ICU 2.0 */
UBLOCK_TAGS = 96, /*[E0000]*/
/* New blocks in Unicode 3.2 */
+ /** @stable ICU 3.0 */
+ UBLOCK_CYRILLIC_SUPPLEMENT = 97, /*[0500]*/
/**
* Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
* @stable ICU 2.2
*/
- UBLOCK_CYRILLIC_SUPPLEMENTARY = 97,
- /** @stable ICU 3.0 */
- UBLOCK_CYRILLIC_SUPPLEMENT = UBLOCK_CYRILLIC_SUPPLEMENTARY, /*[0500]*/
+ UBLOCK_CYRILLIC_SUPPLEMENTARY = UBLOCK_CYRILLIC_SUPPLEMENT,
/** @stable ICU 2.2 */
UBLOCK_TAGALOG = 98, /*[1700]*/
/** @stable ICU 2.2 */
@@ -1381,8 +1427,33 @@ enum UBlockCode {
/** @stable ICU 4.6 */
UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 209, /*[2B740]*/
- /** @stable ICU 2.0 */
- UBLOCK_COUNT = 210,
+ /* New blocks in Unicode 6.1 */
+
+ /** @stable ICU 49 */
+ UBLOCK_ARABIC_EXTENDED_A = 210, /*[08A0]*/
+ /** @stable ICU 49 */
+ UBLOCK_ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 211, /*[1EE00]*/
+ /** @stable ICU 49 */
+ UBLOCK_CHAKMA = 212, /*[11100]*/
+ /** @stable ICU 49 */
+ UBLOCK_MEETEI_MAYEK_EXTENSIONS = 213, /*[AAE0]*/
+ /** @stable ICU 49 */
+ UBLOCK_MEROITIC_CURSIVE = 214, /*[109A0]*/
+ /** @stable ICU 49 */
+ UBLOCK_MEROITIC_HIEROGLYPHS = 215, /*[10980]*/
+ /** @stable ICU 49 */
+ UBLOCK_MIAO = 216, /*[16F00]*/
+ /** @stable ICU 49 */
+ UBLOCK_SHARADA = 217, /*[11180]*/
+ /** @stable ICU 49 */
+ UBLOCK_SORA_SOMPENG = 218, /*[110D0]*/
+ /** @stable ICU 49 */
+ UBLOCK_SUNDANESE_SUPPLEMENT = 219, /*[1CC0]*/
+ /** @stable ICU 49 */
+ UBLOCK_TAKRI = 220, /*[11680]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_COUNT = 221,
/** @stable ICU 2.0 */
UBLOCK_INVALID_CODE=-1
@@ -1399,7 +1470,13 @@ typedef enum UBlockCode UBlockCode;
* @stable ICU 2.2
*/
typedef enum UEastAsianWidth {
- U_EA_NEUTRAL, /*[N]*/ /*See note !!*/
+ /*
+ * Note: UEastAsianWidth constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_EA_<Unicode East_Asian_Width value name>
+ */
+
+ U_EA_NEUTRAL, /*[N]*/
U_EA_AMBIGUOUS, /*[A]*/
U_EA_HALFWIDTH, /*[H]*/
U_EA_FULLWIDTH, /*[F]*/
@@ -1407,10 +1484,6 @@ typedef enum UEastAsianWidth {
U_EA_WIDE, /*[W]*/
U_EA_COUNT
} UEastAsianWidth;
-/*
- * Implementation note:
- * Keep UEastAsianWidth constant values in sync with names list in genprops/props2.c.
- */
/**
* Selector constants for u_charName().
@@ -1424,10 +1497,21 @@ typedef enum UEastAsianWidth {
* @stable ICU 2.0
*/
typedef enum UCharNameChoice {
+ /** Unicode character name (Name property). @stable ICU 2.0 */
U_UNICODE_CHAR_NAME,
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * The Unicode_1_Name property value which is of little practical value.
+ * Beginning with ICU 49, ICU APIs return an empty string for this name choice.
+ * @deprecated ICU 49
+ */
U_UNICODE_10_CHAR_NAME,
- U_EXTENDED_CHAR_NAME,
- U_CHAR_NAME_ALIAS, /**< Corrected name from NameAliases.txt. @stable ICU 4.4 */
+#endif /* U_HIDE_DEPRECATED_API */
+ /** Standard or synthetic character name. @stable ICU 2.0 */
+ U_EXTENDED_CHAR_NAME = U_UNICODE_CHAR_NAME+2,
+ /** Corrected name from NameAliases.txt. @stable ICU 4.4 */
+ U_CHAR_NAME_ALIAS,
+ /** @stable ICU 2.0 */
U_CHAR_NAME_CHOICE_COUNT
} UCharNameChoice;
@@ -1457,7 +1541,13 @@ typedef enum UPropertyNameChoice {
* @stable ICU 2.2
*/
typedef enum UDecompositionType {
- U_DT_NONE, /*[none]*/ /*See note !!*/
+ /*
+ * Note: UDecompositionType constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_DT_<Unicode Decomposition_Type value name>
+ */
+
+ U_DT_NONE, /*[none]*/
U_DT_CANONICAL, /*[can]*/
U_DT_COMPAT, /*[com]*/
U_DT_CIRCLE, /*[enc]*/
@@ -1485,7 +1575,13 @@ typedef enum UDecompositionType {
* @stable ICU 2.2
*/
typedef enum UJoiningType {
- U_JT_NON_JOINING, /*[U]*/ /*See note !!*/
+ /*
+ * Note: UJoiningType constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_JT_<Unicode Joining_Type value name>
+ */
+
+ U_JT_NON_JOINING, /*[U]*/
U_JT_JOIN_CAUSING, /*[C]*/
U_JT_DUAL_JOINING, /*[D]*/
U_JT_LEFT_JOINING, /*[L]*/
@@ -1501,6 +1597,12 @@ typedef enum UJoiningType {
* @stable ICU 2.2
*/
typedef enum UJoiningGroup {
+ /*
+ * Note: UJoiningGroup constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_JG_<Unicode Joining_Group value name>
+ */
+
U_JG_NO_JOINING_GROUP,
U_JG_AIN,
U_JG_ALAPH,
@@ -1559,6 +1661,7 @@ typedef enum UJoiningGroup {
U_JG_BURUSHASKI_YEH_BARREE, /**< @stable ICU 4.0 */
U_JG_FARSI_YEH, /**< @stable ICU 4.4 */
U_JG_NYA, /**< @stable ICU 4.4 */
+ U_JG_ROHINGYA_YEH, /**< @stable ICU 49 */
U_JG_COUNT
} UJoiningGroup;
@@ -1569,7 +1672,13 @@ typedef enum UJoiningGroup {
* @stable ICU 3.4
*/
typedef enum UGraphemeClusterBreak {
- U_GCB_OTHER = 0, /*[XX]*/ /*See note !!*/
+ /*
+ * Note: UGraphemeClusterBreak constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_GCB_<Unicode Grapheme_Cluster_Break value name>
+ */
+
+ U_GCB_OTHER = 0, /*[XX]*/
U_GCB_CONTROL = 1, /*[CN]*/
U_GCB_CR = 2, /*[CR]*/
U_GCB_EXTEND = 3, /*[EX]*/
@@ -1581,7 +1690,8 @@ typedef enum UGraphemeClusterBreak {
U_GCB_V = 9, /*[V]*/
U_GCB_SPACING_MARK = 10, /*[SM]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */
U_GCB_PREPEND = 11, /*[PP]*/
- U_GCB_COUNT = 12
+ U_GCB_REGIONAL_INDICATOR = 12, /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
+ U_GCB_COUNT = 13
} UGraphemeClusterBreak;
/**
@@ -1592,7 +1702,13 @@ typedef enum UGraphemeClusterBreak {
* @stable ICU 3.4
*/
typedef enum UWordBreakValues {
- U_WB_OTHER = 0, /*[XX]*/ /*See note !!*/
+ /*
+ * Note: UWordBreakValues constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_WB_<Unicode Word_Break value name>
+ */
+
+ U_WB_OTHER = 0, /*[XX]*/
U_WB_ALETTER = 1, /*[LE]*/
U_WB_FORMAT = 2, /*[FO]*/
U_WB_KATAKANA = 3, /*[KA]*/
@@ -1605,7 +1721,11 @@ typedef enum UWordBreakValues {
U_WB_LF = 10, /*[LF]*/
U_WB_MIDNUMLET =11, /*[MB]*/
U_WB_NEWLINE =12, /*[NL]*/
- U_WB_COUNT = 13
+ U_WB_REGIONAL_INDICATOR = 13, /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
+ U_WB_HEBREW_LETTER = 14, /*[HL]*/ /* from here on: new in Unicode 6.3/ICU 52 */
+ U_WB_SINGLE_QUOTE = 15, /*[SQ]*/
+ U_WB_DOUBLE_QUOTE = 16, /*[DQ]*/
+ U_WB_COUNT = 17
} UWordBreakValues;
/**
@@ -1615,7 +1735,13 @@ typedef enum UWordBreakValues {
* @stable ICU 3.4
*/
typedef enum USentenceBreak {
- U_SB_OTHER = 0, /*[XX]*/ /*See note !!*/
+ /*
+ * Note: USentenceBreak constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_SB_<Unicode Sentence_Break value name>
+ */
+
+ U_SB_OTHER = 0, /*[XX]*/
U_SB_ATERM = 1, /*[AT]*/
U_SB_CLOSE = 2, /*[CL]*/
U_SB_FORMAT = 3, /*[FO]*/
@@ -1640,7 +1766,13 @@ typedef enum USentenceBreak {
* @stable ICU 2.2
*/
typedef enum ULineBreak {
- U_LB_UNKNOWN = 0, /*[XX]*/ /*See note !!*/
+ /*
+ * Note: ULineBreak constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_LB_<Unicode Line_Break value name>
+ */
+
+ U_LB_UNKNOWN = 0, /*[XX]*/
U_LB_AMBIGUOUS = 1, /*[AI]*/
U_LB_ALPHABETIC = 2, /*[AL]*/
U_LB_BREAK_BOTH = 3, /*[B2]*/
@@ -1655,9 +1787,9 @@ typedef enum ULineBreak {
U_LB_GLUE = 12, /*[GL]*/
U_LB_HYPHEN = 13, /*[HY]*/
U_LB_IDEOGRAPHIC = 14, /*[ID]*/
- U_LB_INSEPERABLE = 15,
/** Renamed from the misspelled "inseperable" in Unicode 4.0.1/ICU 3.0 @stable ICU 3.0 */
- U_LB_INSEPARABLE=U_LB_INSEPERABLE,/*[IN]*/
+ U_LB_INSEPARABLE = 15, /*[IN]*/
+ U_LB_INSEPERABLE = U_LB_INSEPARABLE,
U_LB_INFIX_NUMERIC = 16, /*[IS]*/
U_LB_LINE_FEED = 17, /*[LF]*/
U_LB_NONSTARTER = 18, /*[NS]*/
@@ -1679,7 +1811,10 @@ typedef enum ULineBreak {
U_LB_JT = 34, /*[JT]*/
U_LB_JV = 35, /*[JV]*/
U_LB_CLOSE_PARENTHESIS = 36, /*[CP]*/ /* new in Unicode 5.2/ICU 4.4 */
- U_LB_COUNT = 37
+ U_LB_CONDITIONAL_JAPANESE_STARTER = 37,/*[CJ]*/ /* new in Unicode 6.1/ICU 49 */
+ U_LB_HEBREW_LETTER = 38, /*[HL]*/ /* new in Unicode 6.1/ICU 49 */
+ U_LB_REGIONAL_INDICATOR = 39,/*[RI]*/ /* new in Unicode 6.2/ICU 50 */
+ U_LB_COUNT = 40
} ULineBreak;
/**
@@ -1689,7 +1824,13 @@ typedef enum ULineBreak {
* @stable ICU 2.2
*/
typedef enum UNumericType {
- U_NT_NONE, /*[None]*/ /*See note !!*/
+ /*
+ * Note: UNumericType constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_NT_<Unicode Numeric_Type value name>
+ */
+
+ U_NT_NONE, /*[None]*/
U_NT_DECIMAL, /*[de]*/
U_NT_DIGIT, /*[di]*/
U_NT_NUMERIC, /*[nu]*/
@@ -1703,7 +1844,13 @@ typedef enum UNumericType {
* @stable ICU 2.6
*/
typedef enum UHangulSyllableType {
- U_HST_NOT_APPLICABLE, /*[NA]*/ /*See note !!*/
+ /*
+ * Note: UHangulSyllableType constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_HST_<Unicode Hangul_Syllable_Type value name>
+ */
+
+ U_HST_NOT_APPLICABLE, /*[NA]*/
U_HST_LEADING_JAMO, /*[L]*/
U_HST_VOWEL_JAMO, /*[V]*/
U_HST_TRAILING_JAMO, /*[T]*/
@@ -1906,6 +2053,8 @@ u_getIntPropertyMaxValue(UProperty which);
*
* For characters without any numeric values in the Unicode Character Database,
* this function will return U_NO_NUMERIC_VALUE.
+ * Note: This is different from the Unicode Standard which specifies NaN as the default value.
+ * (NaN is not available on all platforms.)
*
* Similar to java.lang.Character.getNumericValue(), but u_getNumericValue()
* also supports negative values, large values, and fractions,
@@ -2379,7 +2528,7 @@ u_isMirrored(UChar32 c);
* as the mirror-image of the default glyph of the specified
* character. This is useful for text conversion to and from
* codepages with visual order, and for displays without glyph
- * selecetion capabilities.
+ * selection capabilities.
*
* @param c the code point to be mapped
* @return another Unicode code point that may serve as a mirror-image
@@ -2394,6 +2543,25 @@ U_STABLE UChar32 U_EXPORT2
u_charMirror(UChar32 c);
/**
+ * Maps the specified character to its paired bracket character.
+ * For Bidi_Paired_Bracket_Type!=None, this is the same as u_charMirror().
+ * Otherwise c itself is returned.
+ * See http://www.unicode.org/reports/tr9/
+ *
+ * @param c the code point to be mapped
+ * @return the paired bracket code point,
+ * or c itself if there is no such mapping
+ * (Bidi_Paired_Bracket_Type=None)
+ *
+ * @see UCHAR_BIDI_PAIRED_BRACKET
+ * @see UCHAR_BIDI_PAIRED_BRACKET_TYPE
+ * @see u_charMirror
+ * @stable ICU 52
+ */
+U_STABLE UChar32 U_EXPORT2
+u_getBidiPairedBracket(UChar32 c);
+
+/**
* Returns the general category value for the code point.
*
* Same as java.lang.Character.getType().
@@ -2553,13 +2721,11 @@ u_charName(UChar32 code, UCharNameChoice nameChoice,
char *buffer, int32_t bufferLength,
UErrorCode *pErrorCode);
+#ifndef U_HIDE_DEPRECATED_API
/**
- * Get the ISO 10646 comment for a character.
- * The ISO 10646 comment is an informative field in the Unicode Character
- * Database (UnicodeData.txt field 11) and is from the ISO 10646 names list.
- *
- * Note: Unicode 5.2 removes all ISO comment data, resulting in empty strings
- * returned for all characters.
+ * Returns an empty string.
+ * Used to return the ISO 10646 comment for a character.
+ * The Unicode ISO_Comment property is deprecated and has no values.
*
* @param c The character (code point) for which to get the ISO comment.
* It must be <code>0<=c<=0x10ffff</code>.
@@ -2570,18 +2736,15 @@ u_charName(UChar32 code, UCharNameChoice nameChoice,
* @param pErrorCode Pointer to a UErrorCode variable;
* check for <code>U_SUCCESS()</code> after <code>u_getISOComment()</code>
* returns.
- * @return The length of the comment, or 0 if there is no comment for this character.
- * If the destCapacity is less than or equal to the length, then the buffer
- * contains the truncated name and the returned length indicates the full
- * length of the name.
- * The length does not include the zero-termination.
+ * @return 0
*
- * @stable ICU 2.2
+ * @deprecated ICU 49
*/
U_STABLE int32_t U_EXPORT2
u_getISOComment(UChar32 c,
char *dest, int32_t destCapacity,
UErrorCode *pErrorCode);
+#endif /* U_HIDE_DEPRECATED_API */
/**
* Find a Unicode character by its name and return its code point value.
diff --git a/Source/WebCore/icu/unicode/ucnv.h b/Source/WebCore/icu/unicode/ucnv.h
index 98da8ff69..c5fc2dc78 100644
--- a/Source/WebCore/icu/unicode/ucnv.h
+++ b/Source/WebCore/icu/unicode/ucnv.h
@@ -1,6 +1,6 @@
/*
**********************************************************************
-* Copyright (C) 1999-2010, International Business Machines
+* Copyright (C) 1999-2013, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* ucnv.h:
@@ -88,46 +88,82 @@ U_CDECL_BEGIN
* @stable ICU 2.0
*/
typedef enum {
+ /** @stable ICU 2.0 */
UCNV_UNSUPPORTED_CONVERTER = -1,
+ /** @stable ICU 2.0 */
UCNV_SBCS = 0,
+ /** @stable ICU 2.0 */
UCNV_DBCS = 1,
+ /** @stable ICU 2.0 */
UCNV_MBCS = 2,
+ /** @stable ICU 2.0 */
UCNV_LATIN_1 = 3,
+ /** @stable ICU 2.0 */
UCNV_UTF8 = 4,
+ /** @stable ICU 2.0 */
UCNV_UTF16_BigEndian = 5,
+ /** @stable ICU 2.0 */
UCNV_UTF16_LittleEndian = 6,
+ /** @stable ICU 2.0 */
UCNV_UTF32_BigEndian = 7,
+ /** @stable ICU 2.0 */
UCNV_UTF32_LittleEndian = 8,
+ /** @stable ICU 2.0 */
UCNV_EBCDIC_STATEFUL = 9,
+ /** @stable ICU 2.0 */
UCNV_ISO_2022 = 10,
+ /** @stable ICU 2.0 */
UCNV_LMBCS_1 = 11,
+ /** @stable ICU 2.0 */
UCNV_LMBCS_2,
+ /** @stable ICU 2.0 */
UCNV_LMBCS_3,
+ /** @stable ICU 2.0 */
UCNV_LMBCS_4,
+ /** @stable ICU 2.0 */
UCNV_LMBCS_5,
+ /** @stable ICU 2.0 */
UCNV_LMBCS_6,
+ /** @stable ICU 2.0 */
UCNV_LMBCS_8,
+ /** @stable ICU 2.0 */
UCNV_LMBCS_11,
+ /** @stable ICU 2.0 */
UCNV_LMBCS_16,
+ /** @stable ICU 2.0 */
UCNV_LMBCS_17,
+ /** @stable ICU 2.0 */
UCNV_LMBCS_18,
+ /** @stable ICU 2.0 */
UCNV_LMBCS_19,
+ /** @stable ICU 2.0 */
UCNV_LMBCS_LAST = UCNV_LMBCS_19,
+ /** @stable ICU 2.0 */
UCNV_HZ,
+ /** @stable ICU 2.0 */
UCNV_SCSU,
+ /** @stable ICU 2.0 */
UCNV_ISCII,
+ /** @stable ICU 2.0 */
UCNV_US_ASCII,
+ /** @stable ICU 2.0 */
UCNV_UTF7,
+ /** @stable ICU 2.2 */
UCNV_BOCU1,
+ /** @stable ICU 2.2 */
UCNV_UTF16,
+ /** @stable ICU 2.2 */
UCNV_UTF32,
+ /** @stable ICU 2.2 */
UCNV_CESU8,
+ /** @stable ICU 2.4 */
UCNV_IMAP_MAILBOX,
+ /** @stable ICU 4.8 */
+ UCNV_COMPOUND_TEXT,
/* Number of converter types for which we have conversion routines. */
UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES
-
} UConverterType;
/**
@@ -305,6 +341,8 @@ ucnv_compareNames(const char *name1, const char *name2);
* other than its an alias starting with the letters "cp". Please do not
* associate any meaning to these aliases.</p>
*
+ * \snippet samples/ucnv/convsamp.cpp ucnv_open
+ *
* @param converterName Name of the coded character set table.
* This may have options appended to the string.
* IANA alias character set names, IBM CCSIDs starting with "ibm-",
@@ -483,10 +521,12 @@ ucnv_openPackage(const char *packageName, const char *converterName, UErrorCode
* adjusted pointer and use an accordingly smaller buffer size.
*
* @param cnv converter to be cloned
- * @param stackBuffer user allocated space for the new clone. If NULL new memory will be allocated.
+ * @param stackBuffer <em>Deprecated functionality as of ICU 52, use NULL.</em><br>
+ * user allocated space for the new clone. If NULL new memory will be allocated.
* If buffer is not large enough, new memory will be allocated.
* Clients can use the U_CNV_SAFECLONE_BUFFERSIZE. This will probably be enough to avoid memory allocations.
- * @param pBufferSize pointer to size of allocated space. pBufferSize must not be NULL.
+ * @param pBufferSize <em>Deprecated functionality as of ICU 52, use NULL or 1.</em><br>
+ * pointer to size of allocated space.
* @param status to indicate whether the operation went on smoothly or there were errors
* An informational status value, U_SAFECLONE_ALLOCATED_WARNING,
* is used if any allocations were necessary.
@@ -502,14 +542,18 @@ ucnv_safeClone(const UConverter *cnv,
int32_t *pBufferSize,
UErrorCode *status);
+#ifndef U_HIDE_DEPRECATED_API
+
/**
* \def U_CNV_SAFECLONE_BUFFERSIZE
* Definition of a buffer size that is designed to be large enough for
* converters to be cloned with ucnv_safeClone().
- * @stable ICU 2.0
+ * @deprecated ICU 52. Do not rely on ucnv_safeClone() cloning into any provided buffer.
*/
#define U_CNV_SAFECLONE_BUFFERSIZE 1024
+#endif /* U_HIDE_DEPRECATED_API */
+
/**
* Deletes the unicode converter and releases resources associated
* with just this instance.
@@ -1821,6 +1865,7 @@ ucnv_getCanonicalName(const char *alias, const char *standard, UErrorCode *pErro
U_STABLE const char * U_EXPORT2
ucnv_getDefaultName(void);
+#ifndef U_HIDE_SYSTEM_API
/**
* This function is not thread safe. DO NOT call this function when ANY ICU
* function is being used from more than one thread! This function sets the
@@ -1839,6 +1884,7 @@ ucnv_getDefaultName(void);
*/
U_STABLE void U_EXPORT2
ucnv_setDefaultName(const char *name);
+#endif /* U_HIDE_SYSTEM_API */
/**
* Fixes the backslash character mismapping. For example, in SJIS, the backslash
@@ -1918,34 +1964,7 @@ ucnv_usesFallback(const UConverter *cnv);
* instead of the input signature bytes.
* <p>
* Usage:
- * @code
- * UErrorCode err = U_ZERO_ERROR;
- * char input[] = { '\xEF','\xBB', '\xBF','\x41','\x42','\x43' };
- * int32_t signatureLength = 0;
- * char *encoding = ucnv_detectUnicodeSignature(input,sizeof(input),&signatureLength,&err);
- * UConverter *conv = NULL;
- * UChar output[100];
- * UChar *target = output, *out;
- * char *source = input;
- * if(encoding!=NULL && U_SUCCESS(err)){
- * // should signature be discarded ?
- * conv = ucnv_open(encoding, &err);
- * // do the conversion
- * ucnv_toUnicode(conv,
- * target, output + sizeof(output)/U_SIZEOF_UCHAR,
- * source, input + sizeof(input),
- * NULL, TRUE, &err);
- * out = output;
- * if (discardSignature){
- * ++out; // ignore initial U+FEFF
- * }
- * while(out != target) {
- * printf("%04x ", *out++);
- * }
- * puts("");
- * }
- *
- * @endcode
+ * \snippet samples/ucnv/convsamp.cpp ucnv_detectUnicodeSignature
*
* @param source The source string in which the signature should be detected.
* @param sourceLength Length of the input string, or -1 if terminated with a NUL byte.
@@ -1991,6 +2010,24 @@ ucnv_fromUCountPending(const UConverter* cnv, UErrorCode* status);
U_STABLE int32_t U_EXPORT2
ucnv_toUCountPending(const UConverter* cnv, UErrorCode* status);
+/**
+ * Returns whether or not the charset of the converter has a fixed number of bytes
+ * per charset character.
+ * An example of this are converters that are of the type UCNV_SBCS or UCNV_DBCS.
+ * Another example is UTF-32 which is always 4 bytes per character.
+ * A Unicode code point may be represented by more than one UTF-8 or UTF-16 code unit
+ * but a UTF-32 converter encodes each code point with 4 bytes.
+ * Note: This method is not intended to be used to determine whether the charset has a
+ * fixed ratio of bytes to Unicode codes <i>units</i> for any particular Unicode encoding form.
+ * FALSE is returned with the UErrorCode if error occurs or cnv is NULL.
+ * @param cnv The converter to be tested
+ * @param status ICU error code in/out paramter
+ * @return TRUE if the converter is fixed-width
+ * @stable ICU 4.8
+ */
+U_STABLE UBool U_EXPORT2
+ucnv_isFixedWidth(UConverter *cnv, UErrorCode *status);
+
#endif
#endif
diff --git a/Source/WebCore/icu/unicode/ucol.h b/Source/WebCore/icu/unicode/ucol.h
index 4a4cd606e..5a459b52a 100644
--- a/Source/WebCore/icu/unicode/ucol.h
+++ b/Source/WebCore/icu/unicode/ucol.h
@@ -1,6 +1,6 @@
/*
*******************************************************************************
-* Copyright (c) 1996-2010, International Business Machines Corporation and others.
+* Copyright (c) 1996-2013, International Business Machines Corporation and others.
* All Rights Reserved.
*******************************************************************************
*/
@@ -17,6 +17,7 @@
#include "unicode/parseerr.h"
#include "unicode/uloc.h"
#include "unicode/uset.h"
+#include "unicode/uscript.h"
/**
* \file
@@ -63,12 +64,12 @@ typedef struct UCollator UCollator;
/**
* UCOL_LESS is returned if source string is compared to be less than target
- * string in the u_strcoll() method.
+ * string in the ucol_strcoll() method.
* UCOL_EQUAL is returned if source string is compared to be equal to target
- * string in the u_strcoll() method.
+ * string in the ucol_strcoll() method.
* UCOL_GREATER is returned if source string is compared to be greater than
- * target string in the u_strcoll() method.
- * @see u_strcoll()
+ * target string in the ucol_strcoll() method.
+ * @see ucol_strcoll()
* <p>
* Possible values for a comparison result
* @stable ICU 2.0
@@ -132,18 +133,76 @@ typedef enum {
} UColAttributeValue;
-/** Enum containing the codes for reordering segments of the collation table that are not script
- * codes. These reordering codes are to be used in conjunction with the script codes.
- * @internal
+/**
+ * Enum containing the codes for reordering segments of the collation table that are not script
+ * codes. These reordering codes are to be used in conjunction with the script codes.
+ * @see ucol_getReorderCodes
+ * @see ucol_setReorderCodes
+ * @see ucol_getEquivalentReorderCodes
+ * @see UScriptCode
+ * @stable ICU 4.8
*/
-typedef enum {
- UCOL_REORDER_CODE_SPACE = 0x1000,
- UCOL_REORDER_CODE_FIRST = UCOL_REORDER_CODE_SPACE,
- UCOL_REORDER_CODE_PUNCTUATION = 0x1001,
- UCOL_REORDER_CODE_SYMBOL = 0x1002,
- UCOL_REORDER_CODE_CURRENCY = 0x1003,
- UCOL_REORDER_CODE_DIGIT = 0x1004,
- UCOL_REORDER_CODE_LIMIT = 0x1005
+ typedef enum {
+ /**
+ * A special reordering code that is used to specify the default
+ * reordering codes for a locale.
+ * @stable ICU 4.8
+ */
+ UCOL_REORDER_CODE_DEFAULT = -1,
+ /**
+ * A special reordering code that is used to specify no reordering codes.
+ * @stable ICU 4.8
+ */
+ UCOL_REORDER_CODE_NONE = USCRIPT_UNKNOWN,
+ /**
+ * A special reordering code that is used to specify all other codes used for
+ * reordering except for the codes lised as UColReorderCode values and those
+ * listed explicitly in a reordering.
+ * @stable ICU 4.8
+ */
+ UCOL_REORDER_CODE_OTHERS = USCRIPT_UNKNOWN,
+ /**
+ * Characters with the space property.
+ * This is equivalent to the rule value "space".
+ * @stable ICU 4.8
+ */
+ UCOL_REORDER_CODE_SPACE = 0x1000,
+ /**
+ * The first entry in the enumeration of reordering groups. This is intended for use in
+ * range checking and enumeration of the reorder codes.
+ * @stable ICU 4.8
+ */
+ UCOL_REORDER_CODE_FIRST = UCOL_REORDER_CODE_SPACE,
+ /**
+ * Characters with the punctuation property.
+ * This is equivalent to the rule value "punct".
+ * @stable ICU 4.8
+ */
+ UCOL_REORDER_CODE_PUNCTUATION = 0x1001,
+ /**
+ * Characters with the symbol property.
+ * This is equivalent to the rule value "symbol".
+ * @stable ICU 4.8
+ */
+ UCOL_REORDER_CODE_SYMBOL = 0x1002,
+ /**
+ * Characters with the currency property.
+ * This is equivalent to the rule value "currency".
+ * @stable ICU 4.8
+ */
+ UCOL_REORDER_CODE_CURRENCY = 0x1003,
+ /**
+ * Characters with the digit property.
+ * This is equivalent to the rule value "digit".
+ * @stable ICU 4.8
+ */
+ UCOL_REORDER_CODE_DIGIT = 0x1004,
+ /**
+ * The limit of the reorder codes. This is intended for use in range checking
+ * and enumeration of the reorder codes.
+ * @stable ICU 4.8
+ */
+ UCOL_REORDER_CODE_LIMIT = 0x1005
} UColReorderCode;
/**
@@ -179,10 +238,12 @@ typedef UColAttributeValue UCollationStrength;
* @stable ICU 2.0
*/
typedef enum {
- /** Attribute for direction of secondary weights - used in French.
+ /** Attribute for direction of secondary weights - used in Canadian French.
* Acceptable values are UCOL_ON, which results in secondary weights
* being considered backwards and UCOL_OFF which treats secondary
- * weights in the order they appear.*/
+ * weights in the order they appear.
+ * @stable ICU 2.0
+ */
UCOL_FRENCH_COLLATION,
/** Attribute for handling variable elements.
* Acceptable values are UCOL_NON_IGNORABLE (default)
@@ -191,14 +252,18 @@ typedef enum {
* and UCOL_SHIFTED which causes codepoints with primary
* weights that are equal or below the variable top value
* to be ignored on primary level and moved to the quaternary
- * level.*/
+ * level.
+ * @stable ICU 2.0
+ */
UCOL_ALTERNATE_HANDLING,
/** Controls the ordering of upper and lower case letters.
* Acceptable values are UCOL_OFF (default), which orders
* upper and lower case letters in accordance to their tertiary
* weights, UCOL_UPPER_FIRST which forces upper case letters to
* sort before lower case letters, and UCOL_LOWER_FIRST which does
- * the opposite. */
+ * the opposite.
+ * @stable ICU 2.0
+ */
UCOL_CASE_FIRST,
/** Controls whether an extra case level (positioned before the third
* level) is generated or not. Acceptable values are UCOL_OFF (default),
@@ -206,7 +271,9 @@ typedef enum {
* level to be generated. Contents of the case level are affected by
* the value of UCOL_CASE_FIRST attribute. A simple way to ignore
* accent differences in a string is to set the strength to UCOL_PRIMARY
- * and enable case level. */
+ * and enable case level.
+ * @stable ICU 2.0
+ */
UCOL_CASE_LEVEL,
/** Controls whether the normalization check and necessary normalizations
* are performed. When set to UCOL_OFF (default) no normalization check
@@ -214,9 +281,13 @@ typedef enum {
* input data is in so-called FCD form (see users manual for more info).
* When set to UCOL_ON, an incremental check is performed to see whether
* the input data is in the FCD form. If the data is not in the FCD form,
- * incremental NFD normalization is performed. */
+ * incremental NFD normalization is performed.
+ * @stable ICU 2.0
+ */
UCOL_NORMALIZATION_MODE,
- /** An alias for UCOL_NORMALIZATION_MODE attribute */
+ /** An alias for UCOL_NORMALIZATION_MODE attribute.
+ * @stable ICU 2.0
+ */
UCOL_DECOMPOSITION_MODE = UCOL_NORMALIZATION_MODE,
/** The strength attribute. Can be either UCOL_PRIMARY, UCOL_SECONDARY,
* UCOL_TERTIARY, UCOL_QUATERNARY or UCOL_IDENTICAL. The usual strength
@@ -227,20 +298,37 @@ typedef enum {
* UCOL_HIRAGANA_QUATERNARY mode to on. Otherwise, quaternary level
* is affected only by the number of non ignorable code points in
* the string. Identical strength is rarely useful, as it amounts
- * to codepoints of the NFD form of the string. */
+ * to codepoints of the NFD form of the string.
+ * @stable ICU 2.0
+ */
UCOL_STRENGTH,
+#ifndef U_HIDE_DEPRECATED_API
/** When turned on, this attribute positions Hiragana before all
* non-ignorables on quaternary level This is a sneaky way to produce JIS
- * sort order */
- UCOL_HIRAGANA_QUATERNARY_MODE,
+ * sort order.
+ *
+ * This attribute is an implementation detail of the CLDR Japanese tailoring.
+ * The implementation might change to use a different mechanism
+ * to achieve the same Japanese sort order.
+ * Since ICU 50, this attribute is not settable any more via API functions.
+ * @deprecated ICU 50 Implementation detail, cannot be set via API, might be removed from implementation.
+ */
+ UCOL_HIRAGANA_QUATERNARY_MODE = UCOL_STRENGTH + 1,
+#endif /* U_HIDE_DEPRECATED_API */
/** When turned on, this attribute generates a collation key
* for the numeric value of substrings of digits.
* This is a way to get '100' to sort AFTER '2'. Note that the longest
* digit substring that can be treated as a single collation element is
* 254 digits (not counting leading zeros). If a digit substring is
* longer than that, the digits beyond the limit will be treated as a
- * separate digit substring associated with a separate collation element. */
- UCOL_NUMERIC_COLLATION,
+ * separate digit substring associated with a separate collation element.
+ * @stable ICU 2.8
+ */
+ UCOL_NUMERIC_COLLATION = UCOL_STRENGTH + 2,
+ /**
+ * The number of UColAttribute constants.
+ * @stable ICU 2.0
+ */
UCOL_ATTRIBUTE_COUNT
} UColAttribute;
@@ -248,9 +336,19 @@ typedef enum {
* @stable ICU 2.0
*/
typedef enum {
- /** Retrieve tailoring only */
+ /**
+ * Retrieves the tailoring rules only.
+ * Same as calling the version of getRules() without UColRuleOption.
+ * @stable ICU 2.0
+ */
UCOL_TAILORING_ONLY,
- /** Retrieve UCA rules and tailoring */
+ /**
+ * Retrieves the "UCA rules" concatenated with the tailoring rules.
+ * The "UCA rules" are an <i>approximation</i> of the root collator's sort order.
+ * They are almost never used or useful at runtime and can be removed from the data.
+ * See http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales
+ * @stable ICU 2.0
+ */
UCOL_FULL_RULES
} UColRuleOption ;
@@ -347,6 +445,7 @@ ucol_openFromShortString( const char *definition,
UParseError *parseError,
UErrorCode *status);
+#ifndef U_HIDE_DEPRECATED_API
/**
* Get a set containing the contractions defined by the collator. The set includes
* both the UCA contractions and the contractions defined by the collator. This set
@@ -364,6 +463,7 @@ U_DEPRECATED int32_t U_EXPORT2
ucol_getContractions( const UCollator *coll,
USet *conts,
UErrorCode *status);
+#endif /* U_HIDE_DEPRECATED_API */
/**
* Get a set containing the expansions defined by the collator. The set includes
@@ -435,6 +535,33 @@ ucol_strcoll( const UCollator *coll,
const UChar *target,
int32_t targetLength);
+/**
+* Compare two strings in UTF-8.
+* The strings will be compared using the options already specified.
+* Note: When input string contains malformed a UTF-8 byte sequence,
+* this function treats these bytes as REPLACEMENT CHARACTER (U+FFFD).
+* @param coll The UCollator containing the comparison rules.
+* @param source The source UTF-8 string.
+* @param sourceLength The length of source, or -1 if null-terminated.
+* @param target The target UTF-8 string.
+* @param targetLength The length of target, or -1 if null-terminated.
+* @param status A pointer to an UErrorCode to receive any errors
+* @return The result of comparing the strings; one of UCOL_EQUAL,
+* UCOL_GREATER, UCOL_LESS
+* @see ucol_greater
+* @see ucol_greaterOrEqual
+* @see ucol_equal
+* @stable ICU 50
+*/
+U_STABLE UCollationResult U_EXPORT2
+ucol_strcollUTF8(
+ const UCollator *coll,
+ const char *source,
+ int32_t sourceLength,
+ const char *target,
+ int32_t targetLength,
+ UErrorCode *status);
+
/**
* Determine if one string is greater than another.
* This function is equivalent to {@link #ucol_strcoll } == UCOL_GREATER
@@ -536,37 +663,90 @@ ucol_setStrength(UCollator *coll,
UCollationStrength strength);
/**
- * Get the current reordering of scripts (if one has been set).
+ * Retrieves the reordering codes for this collator.
+ * These reordering codes are a combination of UScript codes and UColReorderCode entries.
* @param coll The UCollator to query.
* @param dest The array to fill with the script ordering.
- * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function will only return the length of the result without writing any of the result string (pre-flighting).
- * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate a failure before the function call.
- * @return The length of the array of the script ordering.
+ * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function
+ * will only return the length of the result without writing any of the result string (pre-flighting).
+ * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate a
+ * failure before the function call.
+ * @return The number of reordering codes written to the dest array.
* @see ucol_setReorderCodes
- * @internal
+ * @see ucol_getEquivalentReorderCodes
+ * @see UScriptCode
+ * @see UColReorderCode
+ * @stable ICU 4.8
*/
-U_INTERNAL int32_t U_EXPORT2
+U_STABLE int32_t U_EXPORT2
ucol_getReorderCodes(const UCollator* coll,
int32_t* dest,
int32_t destCapacity,
UErrorCode *pErrorCode);
-
-/**
- * Set the ordering of scripts for this collator.
+/**
+ * Sets the reordering codes for this collator.
+ * Collation reordering allows scripts and some other defined blocks of characters
+ * to be moved relative to each other as a block. This reordering is done on top of
+ * the DUCET/CLDR standard collation order. Reordering can specify groups to be placed
+ * at the start and/or the end of the collation order. These groups are specified using
+ * UScript codes and UColReorderCode entries.
+ * <p>By default, reordering codes specified for the start of the order are placed in the
+ * order given after a group of "special" non-script blocks. These special groups of characters
+ * are space, punctuation, symbol, currency, and digit. These special groups are represented with
+ * UColReorderCode entries. Script groups can be intermingled with
+ * these special non-script blocks if those special blocks are explicitly specified in the reordering.
+ * <p>The special code OTHERS stands for any script that is not explicitly
+ * mentioned in the list of reordering codes given. Anything that is after OTHERS
+ * will go at the very end of the reordering in the order given.
+ * <p>The special reorder code DEFAULT will reset the reordering for this collator
+ * to the default for this collator. The default reordering may be the DUCET/CLDR order or may be a reordering that
+ * was specified when this collator was created from resource data or from rules. The
+ * DEFAULT code <b>must</b> be the sole code supplied when it used. If not
+ * that will result in an U_ILLEGAL_ARGUMENT_ERROR being set.
+ * <p>The special reorder code NONE will remove any reordering for this collator.
+ * The result of setting no reordering will be to have the DUCET/CLDR ordering used. The
+ * NONE code <b>must</b> be the sole code supplied when it used.
* @param coll The UCollator to set.
- * @param reorderCodes An array of script codes in the new order.
+ * @param reorderCodes An array of script codes in the new order. This can be NULL if the
+ * length is also set to 0. An empty array will clear any reordering codes on the collator.
* @param reorderCodesLength The length of reorderCodes.
- * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate a failure before the function call.
+ * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate a
+ * failure before the function call.
* @see ucol_getReorderCodes
- * @internal
- */
-U_INTERNAL void U_EXPORT2
+ * @see ucol_getEquivalentReorderCodes
+ * @see UScriptCode
+ * @see UColReorderCode
+ * @stable ICU 4.8
+ */
+U_STABLE void U_EXPORT2
ucol_setReorderCodes(UCollator* coll,
const int32_t* reorderCodes,
int32_t reorderCodesLength,
UErrorCode *pErrorCode);
/**
+ * Retrieves the reorder codes that are grouped with the given reorder code. Some reorder
+ * codes will be grouped and must reorder together.
+ * @param reorderCode The reorder code to determine equivalence for.
+ * @param dest The array to fill with the script ordering.
+ * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function
+ * will only return the length of the result without writing any of the result string (pre-flighting).
+ * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate
+ * a failure before the function call.
+ * @return The number of reordering codes written to the dest array.
+ * @see ucol_setReorderCodes
+ * @see ucol_getReorderCodes
+ * @see UScriptCode
+ * @see UColReorderCode
+ * @stable ICU 4.8
+ */
+U_STABLE int32_t U_EXPORT2
+ucol_getEquivalentReorderCodes(int32_t reorderCode,
+ int32_t* dest,
+ int32_t destCapacity,
+ UErrorCode *pErrorCode);
+
+/**
* Get the display name for a UCollator.
* The display name is suitable for presentation to a user.
* @param objLoc The locale of the collator in question.
@@ -705,11 +885,11 @@ ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
UBool* isAvailable, UErrorCode* status);
/**
- * Get the collation rules from a UCollator.
+ * Get the collation tailoring rules from a UCollator.
* The rules will follow the rule syntax.
* @param coll The UCollator to query.
* @param length
- * @return The collation rules.
+ * @return The collation tailoring rules.
* @stable ICU 2.0
*/
U_STABLE const UChar* U_EXPORT2
@@ -802,10 +982,10 @@ ucol_getSortKey(const UCollator *coll,
* to preserve state array between calls and to provide
* the same type of UCharIterator set with the same string.
* The destination buffer provided must be big enough to store
- * the number of requested bytes. Generated sortkey is not
- * compatible with sortkeys generated using ucol_getSortKey
- * API, since we don't do any compression. If uncompressed
- * sortkeys are required, this API can be used.
+ * the number of requested bytes.
+ *
+ * The generated sort key may or may not be compatible with
+ * sort keys generated using ucol_getSortKey().
* @param coll The UCollator containing the collation rules.
* @param iter UCharIterator containing the string we need
* the sort key to be calculated for.
@@ -909,26 +1089,40 @@ ucol_getVersion(const UCollator* coll, UVersionInfo info);
U_STABLE void U_EXPORT2
ucol_getUCAVersion(const UCollator* coll, UVersionInfo info);
-/**
- * Merge two sort keys. The levels are merged with their corresponding counterparts
+/**
+ * Merges two sort keys. The levels are merged with their corresponding counterparts
* (primaries with primaries, secondaries with secondaries etc.). Between the values
* from the same level a separator is inserted.
- * example (uncompressed):
- * 191B1D 01 050505 01 910505 00 and 1F2123 01 050505 01 910505 00
+ *
+ * This is useful, for example, for combining sort keys from first and last names
+ * to sort such pairs.
+ * It is possible to merge multiple sort keys by consecutively merging
+ * another one with the intermediate result.
+ *
+ * The length of the merge result is the sum of the lengths of the input sort keys.
+ *
+ * Example (uncompressed):
+ * <pre>191B1D 01 050505 01 910505 00
+ * 1F2123 01 050505 01 910505 00</pre>
* will be merged as
- * 191B1D 02 1F212301 050505 02 050505 01 910505 02 910505 00
- * This allows for concatenating of first and last names for sorting, among other things.
- * If the destination buffer is not big enough, the results are undefined.
- * If any of source lengths are zero or any of source pointers are NULL/undefined,
- * result is of size zero.
- * @param src1 pointer to the first sortkey
- * @param src1Length length of the first sortkey
- * @param src2 pointer to the second sortkey
- * @param src2Length length of the second sortkey
- * @param dest buffer to hold the result
- * @param destCapacity size of the buffer for the result
- * @return size of the result. If the buffer is big enough size is always
- * src1Length+src2Length-1
+ * <pre>191B1D 02 1F2123 01 050505 02 050505 01 910505 02 910505 00</pre>
+ *
+ * If the destination buffer is not big enough, then its contents are undefined.
+ * If any of source lengths are zero or any of the source pointers are NULL/undefined,
+ * the result is of size zero.
+ *
+ * @param src1 the first sort key
+ * @param src1Length the length of the first sort key, including the zero byte at the end;
+ * can be -1 if the function is to find the length
+ * @param src2 the second sort key
+ * @param src2Length the length of the second sort key, including the zero byte at the end;
+ * can be -1 if the function is to find the length
+ * @param dest the buffer where the merged sort key is written,
+ * can be NULL if destCapacity==0
+ * @param destCapacity the number of bytes in the dest buffer
+ * @return the length of the merged sort key, src1Length+src2Length;
+ * can be larger than destCapacity, or 0 if an error occurs (only for illegal arguments),
+ * in which cases the contents of dest is undefined
* @stable ICU 2.0
*/
U_STABLE int32_t U_EXPORT2
@@ -1018,12 +1212,13 @@ ucol_restoreVariableTop(UCollator *coll, const uint32_t varTop, UErrorCode *stat
/**
* Thread safe cloning operation. The result is a clone of a given collator.
* @param coll collator to be cloned
- * @param stackBuffer user allocated space for the new clone.
+ * @param stackBuffer <em>Deprecated functionality as of ICU 52, use NULL.</em><br>
+ * user allocated space for the new clone.
* If NULL new memory will be allocated.
* If buffer is not large enough, new memory will be allocated.
- * Clients can use the U_COL_SAFECLONE_BUFFERSIZE.
- * This will probably be enough to avoid memory allocations.
- * @param pBufferSize pointer to size of allocated space.
+ * Clients can use the U_COL_SAFECLONE_BUFFERSIZE.
+ * @param pBufferSize <em>Deprecated functionality as of ICU 52, use NULL or 1.</em><br>
+ * pointer to size of allocated space.
* If *pBufferSize == 0, a sufficient size for use in cloning will
* be returned ('pre-flighting')
* If *pBufferSize is not enough for a stack-based safe clone,
@@ -1043,25 +1238,34 @@ ucol_safeClone(const UCollator *coll,
int32_t *pBufferSize,
UErrorCode *status);
-/** default memory size for the new clone. It needs to be this large for os/400 large pointers
- * @stable ICU 2.0
+#ifndef U_HIDE_DEPRECATED_API
+
+/** default memory size for the new clone.
+ * @deprecated ICU 52. Do not rely on ucol_safeClone() cloning into any provided buffer.
*/
-#define U_COL_SAFECLONE_BUFFERSIZE 512
+#define U_COL_SAFECLONE_BUFFERSIZE 1
+
+#endif /* U_HIDE_DEPRECATED_API */
/**
* Returns current rules. Delta defines whether full rules are returned or just the tailoring.
* Returns number of UChars needed to store rules. If buffer is NULL or bufferLen is not enough
* to store rules, will store up to available space.
+ *
+ * ucol_getRules() should normally be used instead.
+ * See http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales
* @param coll collator to get the rules from
* @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES.
* @param buffer buffer to store the result in. If NULL, you'll get no rules.
- * @param bufferLen lenght of buffer to store rules in. If less then needed you'll get only the part that fits in.
+ * @param bufferLen length of buffer to store rules in. If less than needed you'll get only the part that fits in.
* @return current rules
* @stable ICU 2.0
+ * @see UCOL_FULL_RULES
*/
U_STABLE int32_t U_EXPORT2
ucol_getRulesEx(const UCollator *coll, UColRuleOption delta, UChar *buffer, int32_t bufferLen);
+#ifndef U_HIDE_DEPRECATED_API
/**
* gets the locale name of the collator. If the collator
* is instantiated from the rules, then this function returns
@@ -1078,7 +1282,7 @@ ucol_getRulesEx(const UCollator *coll, UColRuleOption delta, UChar *buffer, int3
*/
U_DEPRECATED const char * U_EXPORT2
ucol_getLocale(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status);
-
+#endif /* U_HIDE_DEPRECATED_API */
/**
* gets the locale name of the collator. If the collator
@@ -1110,6 +1314,7 @@ ucol_getLocaleByType(const UCollator *coll, ULocDataLocaleType type, UErrorCode
U_STABLE USet * U_EXPORT2
ucol_getTailoredSet(const UCollator *coll, UErrorCode *status);
+#ifndef U_HIDE_INTERNAL_API
/**
* Universal attribute getter that returns UCOL_DEFAULT if the value is default
* @param coll collator which attributes are to be changed
@@ -1182,6 +1387,7 @@ ucol_prepareShortStringOpen( const char *definition,
UBool forceDefaults,
UParseError *parseError,
UErrorCode *status);
+#endif /* U_HIDE_INTERNAL_API */
/** Creates a binary image of a collator. This binary image can be stored and
* later used to instantiate a collator using ucol_openBinary.
diff --git a/Source/WebCore/icu/unicode/ucoleitr.h b/Source/WebCore/icu/unicode/ucoleitr.h
new file mode 100644
index 000000000..cf730f95b
--- /dev/null
+++ b/Source/WebCore/icu/unicode/ucoleitr.h
@@ -0,0 +1,336 @@
+/*
+*******************************************************************************
+* Copyright (C) 2001-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+*
+* File ucoleitr.cpp
+*
+* Modification History:
+*
+* Date Name Description
+* 02/15/2001 synwee Modified all methods to process its own function
+* instead of calling the equivalent c++ api (coleitr.h)
+*******************************************************************************/
+
+#ifndef UCOLEITR_H
+#define UCOLEITR_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_COLLATION
+
+/**
+ * This indicates an error has occured during processing or if no more CEs is
+ * to be returned.
+ * @stable ICU 2.0
+ */
+#define UCOL_NULLORDER ((int32_t)0xFFFFFFFF)
+
+#ifndef U_HIDE_INTERNAL_API
+/**
+ * This indicates an error has occured during processing or there are no more CEs
+ * to be returned.
+ *
+ * @internal
+ */
+#define UCOL_PROCESSED_NULLORDER ((int64_t)U_INT64_MAX)
+#endif /* U_HIDE_INTERNAL_API */
+
+#include "unicode/ucol.h"
+
+/**
+ * The UCollationElements struct.
+ * For usage in C programs.
+ * @stable ICU 2.0
+ */
+typedef struct UCollationElements UCollationElements;
+
+/**
+ * \file
+ * \brief C API: UCollationElements
+ *
+ * The UCollationElements API is used as an iterator to walk through each
+ * character of an international string. Use the iterator to return the
+ * ordering priority of the positioned character. The ordering priority of a
+ * character, which we refer to as a key, defines how a character is collated
+ * in the given collation object.
+ * For example, consider the following in Spanish:
+ * <pre>
+ * . "ca" -> the first key is key('c') and second key is key('a').
+ * . "cha" -> the first key is key('ch') and second key is key('a').
+ * </pre>
+ * And in German,
+ * <pre>
+ * . "<ae ligature>b"-> the first key is key('a'), the second key is key('e'), and
+ * . the third key is key('b').
+ * </pre>
+ * <p>Example of the iterator usage: (without error checking)
+ * <pre>
+ * . void CollationElementIterator_Example()
+ * . {
+ * . UChar *s;
+ * . t_int32 order, primaryOrder;
+ * . UCollationElements *c;
+ * . UCollatorOld *coll;
+ * . UErrorCode success = U_ZERO_ERROR;
+ * . s=(UChar*)malloc(sizeof(UChar) * (strlen("This is a test")+1) );
+ * . u_uastrcpy(s, "This is a test");
+ * . coll = ucol_open(NULL, &success);
+ * . c = ucol_openElements(coll, str, u_strlen(str), &status);
+ * . order = ucol_next(c, &success);
+ * . ucol_reset(c);
+ * . order = ucol_prev(c, &success);
+ * . free(s);
+ * . ucol_close(coll);
+ * . ucol_closeElements(c);
+ * . }
+ * </pre>
+ * <p>
+ * ucol_next() returns the collation order of the next.
+ * ucol_prev() returns the collation order of the previous character.
+ * The Collation Element Iterator moves only in one direction between calls to
+ * ucol_reset. That is, ucol_next() and ucol_prev can not be inter-used.
+ * Whenever ucol_prev is to be called after ucol_next() or vice versa,
+ * ucol_reset has to be called first to reset the status, shifting pointers to
+ * either the end or the start of the string. Hence at the next call of
+ * ucol_prev or ucol_next, the first or last collation order will be returned.
+ * If a change of direction is done without a ucol_reset, the result is
+ * undefined.
+ * The result of a forward iterate (ucol_next) and reversed result of the
+ * backward iterate (ucol_prev) on the same string are equivalent, if
+ * collation orders with the value UCOL_IGNORABLE are ignored.
+ * Character based on the comparison level of the collator. A collation order
+ * consists of primary order, secondary order and tertiary order. The data
+ * type of the collation order is <strong>t_int32</strong>.
+ *
+ * @see UCollator
+ */
+
+/**
+ * Open the collation elements for a string.
+ *
+ * @param coll The collator containing the desired collation rules.
+ * @param text The text to iterate over.
+ * @param textLength The number of characters in text, or -1 if null-terminated
+ * @param status A pointer to an UErrorCode to receive any errors.
+ * @return a struct containing collation element information
+ * @stable ICU 2.0
+ */
+U_STABLE UCollationElements* U_EXPORT2
+ucol_openElements(const UCollator *coll,
+ const UChar *text,
+ int32_t textLength,
+ UErrorCode *status);
+
+
+/**
+ * get a hash code for a key... Not very useful!
+ * @param key the given key.
+ * @param length the size of the key array.
+ * @return the hash code.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ucol_keyHashCode(const uint8_t* key, int32_t length);
+
+/**
+ * Close a UCollationElements.
+ * Once closed, a UCollationElements may no longer be used.
+ * @param elems The UCollationElements to close.
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucol_closeElements(UCollationElements *elems);
+
+/**
+ * Reset the collation elements to their initial state.
+ * This will move the 'cursor' to the beginning of the text.
+ * Property settings for collation will be reset to the current status.
+ * @param elems The UCollationElements to reset.
+ * @see ucol_next
+ * @see ucol_previous
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucol_reset(UCollationElements *elems);
+
+#ifndef U_HIDE_INTERNAL_API
+/**
+ * Set the collation elements to use implicit ordering for Han
+ * even if they've been tailored. This will also force Hangul
+ * syllables to be ordered by decomposing them to their component
+ * Jamo.
+ *
+ * @param elems The UCollationElements containing the text.
+ * @param status A pointer to a UErrorCode to reveive any errors.
+ *
+ * @internal
+ */
+U_INTERNAL void U_EXPORT2
+ucol_forceHanImplicit(UCollationElements *elems, UErrorCode *status);
+#endif /* U_HIDE_INTERNAL_API */
+
+/**
+ * Get the ordering priority of the next collation element in the text.
+ * A single character may contain more than one collation element.
+ * @param elems The UCollationElements containing the text.
+ * @param status A pointer to an UErrorCode to receive any errors.
+ * @return The next collation elements ordering, otherwise returns NULLORDER
+ * if an error has occured or if the end of string has been reached
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ucol_next(UCollationElements *elems, UErrorCode *status);
+
+/**
+ * Get the ordering priority of the previous collation element in the text.
+ * A single character may contain more than one collation element.
+ * Note that internally a stack is used to store buffered collation elements.
+ * It is very rare that the stack will overflow, however if such a case is
+ * encountered, the problem can be solved by increasing the size
+ * UCOL_EXPAND_CE_BUFFER_SIZE in ucol_imp.h.
+ * @param elems The UCollationElements containing the text.
+ * @param status A pointer to an UErrorCode to receive any errors. Noteably
+ * a U_BUFFER_OVERFLOW_ERROR is returned if the internal stack
+ * buffer has been exhausted.
+ * @return The previous collation elements ordering, otherwise returns
+ * NULLORDER if an error has occured or if the start of string has
+ * been reached.
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ucol_previous(UCollationElements *elems, UErrorCode *status);
+
+#ifndef U_HIDE_INTERNAL_API
+/**
+ * Get the processed ordering priority of the next collation element in the text.
+ * A single character may contain more than one collation element.
+ *
+ * @param elems The UCollationElements containing the text.
+ * @param ixLow a pointer to an int32_t to receive the iterator index before fetching the CE.
+ * @param ixHigh a pointer to an int32_t to receive the iterator index after fetching the CE.
+ * @param status A pointer to an UErrorCode to receive any errors.
+ * @return The next collation elements ordering, otherwise returns UCOL_PROCESSED_NULLORDER
+ * if an error has occured or if the end of string has been reached
+ *
+ * @internal
+ */
+U_INTERNAL int64_t U_EXPORT2
+ucol_nextProcessed(UCollationElements *elems, int32_t *ixLow, int32_t *ixHigh, UErrorCode *status);
+
+/**
+ * Get the processed ordering priority of the previous collation element in the text.
+ * A single character may contain more than one collation element.
+ * Note that internally a stack is used to store buffered collation elements.
+ * It is very rare that the stack will overflow, however if such a case is
+ * encountered, the problem can be solved by increasing the size
+ * UCOL_EXPAND_CE_BUFFER_SIZE in ucol_imp.h.
+ *
+ * @param elems The UCollationElements containing the text.
+ * @param ixLow A pointer to an int32_t to receive the iterator index after fetching the CE
+ * @param ixHigh A pointer to an int32_t to receiver the iterator index before fetching the CE
+ * @param status A pointer to an UErrorCode to receive any errors. Noteably
+ * a U_BUFFER_OVERFLOW_ERROR is returned if the internal stack
+ * buffer has been exhausted.
+ * @return The previous collation elements ordering, otherwise returns
+ * UCOL_PROCESSED_NULLORDER if an error has occured or if the start of
+ * string has been reached.
+ *
+ * @internal
+ */
+U_INTERNAL int64_t U_EXPORT2
+ucol_previousProcessed(UCollationElements *elems, int32_t *ixLow, int32_t *ixHigh, UErrorCode *status);
+#endif /* U_HIDE_INTERNAL_API */
+
+/**
+ * Get the maximum length of any expansion sequences that end with the
+ * specified comparison order.
+ * This is useful for .... ?
+ * @param elems The UCollationElements containing the text.
+ * @param order A collation order returned by previous or next.
+ * @return maximum size of the expansion sequences ending with the collation
+ * element or 1 if collation element does not occur at the end of any
+ * expansion sequence
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ucol_getMaxExpansion(const UCollationElements *elems, int32_t order);
+
+/**
+ * Set the text containing the collation elements.
+ * Property settings for collation will remain the same.
+ * In order to reset the iterator to the current collation property settings,
+ * the API reset() has to be called.
+ * @param elems The UCollationElements to set.
+ * @param text The source text containing the collation elements.
+ * @param textLength The length of text, or -1 if null-terminated.
+ * @param status A pointer to an UErrorCode to receive any errors.
+ * @see ucol_getText
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucol_setText( UCollationElements *elems,
+ const UChar *text,
+ int32_t textLength,
+ UErrorCode *status);
+
+/**
+ * Get the offset of the current source character.
+ * This is an offset into the text of the character containing the current
+ * collation elements.
+ * @param elems The UCollationElements to query.
+ * @return The offset of the current source character.
+ * @see ucol_setOffset
+ * @stable ICU 2.0
+ */
+U_STABLE int32_t U_EXPORT2
+ucol_getOffset(const UCollationElements *elems);
+
+/**
+ * Set the offset of the current source character.
+ * This is an offset into the text of the character to be processed.
+ * Property settings for collation will remain the same.
+ * In order to reset the iterator to the current collation property settings,
+ * the API reset() has to be called.
+ * @param elems The UCollationElements to set.
+ * @param offset The desired character offset.
+ * @param status A pointer to an UErrorCode to receive any errors.
+ * @see ucol_getOffset
+ * @stable ICU 2.0
+ */
+U_STABLE void U_EXPORT2
+ucol_setOffset(UCollationElements *elems,
+ int32_t offset,
+ UErrorCode *status);
+
+/**
+* Get the primary order of a collation order.
+* @param order the collation order
+* @return the primary order of a collation order.
+* @stable ICU 2.6
+*/
+U_STABLE int32_t U_EXPORT2
+ucol_primaryOrder (int32_t order);
+
+/**
+* Get the secondary order of a collation order.
+* @param order the collation order
+* @return the secondary order of a collation order.
+* @stable ICU 2.6
+*/
+U_STABLE int32_t U_EXPORT2
+ucol_secondaryOrder (int32_t order);
+
+/**
+* Get the tertiary order of a collation order.
+* @param order the collation order
+* @return the tertiary order of a collation order.
+* @stable ICU 2.6
+*/
+U_STABLE int32_t U_EXPORT2
+ucol_tertiaryOrder (int32_t order);
+
+#endif /* #if !UCONFIG_NO_COLLATION */
+
+#endif
diff --git a/Source/WebCore/icu/unicode/uconfig.h b/Source/WebCore/icu/unicode/uconfig.h
index 7d2d26e95..bfa8e77b0 100644
--- a/Source/WebCore/icu/unicode/uconfig.h
+++ b/Source/WebCore/icu/unicode/uconfig.h
@@ -1,6 +1,6 @@
/*
**********************************************************************
-* Copyright (C) 2002-2009, International Business Machines
+* Copyright (C) 2002-2013, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* file name: uconfig.h
@@ -18,9 +18,17 @@
/*!
* \file
- * \brief Switches for excluding parts of ICU library code modules.
+ * \brief User-configurable settings
*
- * Allows to build partial, smaller libraries for special purposes.
+ * Miscellaneous switches:
+ *
+ * A number of macros affect a variety of minor aspects of ICU.
+ * Most of them used to be defined elsewhere (e.g., in utypes.h or platform.h)
+ * and moved here to make them easier to find.
+ *
+ * Switches for excluding parts of ICU library code modules:
+ *
+ * Changing these macros allows building partial, smaller libraries for special purposes.
* By default, all modules are built.
* The switches are fairly coarse, controlling large modules.
* Basic services cannot be turned off.
@@ -38,15 +46,153 @@
/**
* If this switch is defined, ICU will attempt to load a header file named "uconfig_local.h"
* prior to determining default settings for uconfig variables.
- *
+ *
* @internal ICU 4.0
- *
*/
#if defined(UCONFIG_USE_LOCAL)
#include "uconfig_local.h"
#endif
/**
+ * \def U_DEBUG
+ * Determines whether to include debugging code.
+ * Automatically set on Windows, but most compilers do not have
+ * related predefined macros.
+ * @internal
+ */
+#ifdef U_DEBUG
+ /* Use the predefined value. */
+#elif defined(_DEBUG)
+ /*
+ * _DEBUG is defined by Visual Studio debug compilation.
+ * Do *not* test for its NDEBUG macro: It is an orthogonal macro
+ * which disables assert().
+ */
+# define U_DEBUG 1
+# else
+# define U_DEBUG 0
+#endif
+
+/**
+ * Determines wheter to enable auto cleanup of libraries.
+ * @internal
+ */
+#ifndef UCLN_NO_AUTO_CLEANUP
+#define UCLN_NO_AUTO_CLEANUP 1
+#endif
+
+/**
+ * \def U_DISABLE_RENAMING
+ * Determines whether to disable renaming or not.
+ * @internal
+ */
+#ifndef U_DISABLE_RENAMING
+#define U_DISABLE_RENAMING 1
+#endif
+
+/**
+ * \def U_NO_DEFAULT_INCLUDE_UTF_HEADERS
+ * Determines whether utypes.h includes utf.h, utf8.h, utf16.h and utf_old.h.
+ * utypes.h includes those headers if this macro is defined to 0.
+ * Otherwise, each those headers must be included explicitly when using one of their macros.
+ * Defaults to 0 for backward compatibility, except inside ICU.
+ * @stable ICU 49
+ */
+#ifdef U_NO_DEFAULT_INCLUDE_UTF_HEADERS
+ /* Use the predefined value. */
+#elif defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || \
+ defined(U_IO_IMPLEMENTATION) || defined(U_LAYOUT_IMPLEMENTATION) || defined(U_LAYOUTEX_IMPLEMENTATION) || \
+ defined(U_TOOLUTIL_IMPLEMENTATION)
+# define U_NO_DEFAULT_INCLUDE_UTF_HEADERS 1
+#else
+# define U_NO_DEFAULT_INCLUDE_UTF_HEADERS 0
+#endif
+
+/**
+ * \def U_OVERRIDE_CXX_ALLOCATION
+ * Determines whether to override new and delete.
+ * ICU is normally built such that all of its C++ classes, via their UMemory base,
+ * override operators new and delete to use its internal, customizable,
+ * non-exception-throwing memory allocation functions. (Default value 1 for this macro.)
+ *
+ * This is especially important when the application and its libraries use multiple heaps.
+ * For example, on Windows, this allows the ICU DLL to be used by
+ * applications that statically link the C Runtime library.
+ *
+ * @stable ICU 2.2
+ */
+#ifndef U_OVERRIDE_CXX_ALLOCATION
+#define U_OVERRIDE_CXX_ALLOCATION 1
+#endif
+
+/**
+ * \def U_ENABLE_TRACING
+ * Determines whether to enable tracing.
+ * @internal
+ */
+#ifndef U_ENABLE_TRACING
+#define U_ENABLE_TRACING 0
+#endif
+
+/**
+ * \def U_ENABLE_DYLOAD
+ * Whether to enable Dynamic loading in ICU.
+ * @internal
+ */
+#ifndef U_ENABLE_DYLOAD
+#define U_ENABLE_DYLOAD 1
+#endif
+
+/**
+ * \def U_CHECK_DYLOAD
+ * Whether to test Dynamic loading as an OS capability.
+ * @internal
+ */
+#ifndef U_CHECK_DYLOAD
+#define U_CHECK_DYLOAD 1
+#endif
+
+
+/**
+ * \def U_DEFAULT_SHOW_DRAFT
+ * Do we allow ICU users to use the draft APIs by default?
+ * @internal
+ */
+#ifndef U_DEFAULT_SHOW_DRAFT
+#define U_DEFAULT_SHOW_DRAFT 1
+#endif
+
+/*===========================================================================*/
+/* Custom icu entry point renaming */
+/*===========================================================================*/
+
+/**
+ * \def U_HAVE_LIB_SUFFIX
+ * 1 if a custom library suffix is set.
+ * @internal
+ */
+#ifdef U_HAVE_LIB_SUFFIX
+ /* Use the predefined value. */
+#elif defined(U_LIB_SUFFIX_C_NAME)
+# define U_HAVE_LIB_SUFFIX 1
+#endif
+
+/**
+ * \def U_LIB_SUFFIX_C_NAME_STRING
+ * Defines the library suffix as a string with C syntax.
+ * @internal
+ */
+#ifdef U_LIB_SUFFIX_C_NAME_STRING
+ /* Use the predefined value. */
+#elif defined(U_LIB_SUFFIX_C_NAME)
+# define U_LIB_SUFFIX_C_NAME_STRING #U_LIB_SUFFIX_C_NAME
+#else
+# define U_LIB_SUFFIX_C_NAME_STRING ""
+#endif
+
+/* common/i18n library switches --------------------------------------------- */
+
+/**
* \def UCONFIG_ONLY_COLLATION
* This switch turns off modules that are not needed for collation.
*
@@ -146,6 +292,9 @@
# define UCONFIG_NO_NORMALIZATION 0
#elif UCONFIG_NO_NORMALIZATION
/* common library */
+ /* ICU 50 CJK dictionary BreakIterator uses normalization */
+# define UCONFIG_NO_BREAK_ITERATION 1
+ /* IDNA (UTS #46) is implemented via normalization */
# define UCONFIG_NO_IDNA 1
/* i18n library */
@@ -176,6 +325,17 @@
# define UCONFIG_NO_IDNA 0
#endif
+/**
+ * \def UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE
+ * Determines the default UMessagePatternApostropheMode.
+ * See the documentation for that enum.
+ *
+ * @stable ICU 4.8
+ */
+#ifndef UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE
+# define UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE UMSGPAT_APOS_DOUBLE_OPTIONAL
+#endif
+
/* i18n library switches ---------------------------------------------------- */
/**
@@ -228,4 +388,25 @@
# define UCONFIG_NO_SERVICE 1
#endif
+/**
+ * \def UCONFIG_HAVE_PARSEALLINPUT
+ * This switch turns on the "parse all input" attribute. Binary incompatible.
+ *
+ * @internal
+ */
+#ifndef UCONFIG_HAVE_PARSEALLINPUT
+# define UCONFIG_HAVE_PARSEALLINPUT 1
+#endif
+
+
+/**
+ * \def UCONFIG_FORMAT_FASTPATHS_49
+ * This switch turns on other formatting fastpaths. Binary incompatible in object DecimalFormat and DecimalFormatSymbols
+ *
+ * @internal
+ */
+#ifndef UCONFIG_FORMAT_FASTPATHS_49
+# define UCONFIG_FORMAT_FASTPATHS_49 1
+#endif
+
#endif
diff --git a/Source/WebCore/icu/unicode/ucsdet.h b/Source/WebCore/icu/unicode/ucsdet.h
new file mode 100644
index 000000000..d3a297be1
--- /dev/null
+++ b/Source/WebCore/icu/unicode/ucsdet.h
@@ -0,0 +1,413 @@
+/*
+ **********************************************************************
+ * Copyright (C) 2005-2013, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ **********************************************************************
+ * file name: ucsdet.h
+ * encoding: US-ASCII
+ * indentation:4
+ *
+ * created on: 2005Aug04
+ * created by: Andy Heninger
+ *
+ * ICU Character Set Detection, API for C
+ *
+ * Draft version 18 Oct 2005
+ *
+ */
+
+#ifndef __UCSDET_H
+#define __UCSDET_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "unicode/localpointer.h"
+#include "unicode/uenum.h"
+
+/**
+ * \file
+ * \brief C API: Charset Detection API
+ *
+ * This API provides a facility for detecting the
+ * charset or encoding of character data in an unknown text format.
+ * The input data can be from an array of bytes.
+ * <p>
+ * Character set detection is at best an imprecise operation. The detection
+ * process will attempt to identify the charset that best matches the characteristics
+ * of the byte data, but the process is partly statistical in nature, and
+ * the results can not be guaranteed to always be correct.
+ * <p>
+ * For best accuracy in charset detection, the input data should be primarily
+ * in a single language, and a minimum of a few hundred bytes worth of plain text
+ * in the language are needed. The detection process will attempt to
+ * ignore html or xml style markup that could otherwise obscure the content.
+ */
+
+
+struct UCharsetDetector;
+/**
+ * Structure representing a charset detector
+ * @stable ICU 3.6
+ */
+typedef struct UCharsetDetector UCharsetDetector;
+
+struct UCharsetMatch;
+/**
+ * Opaque structure representing a match that was identified
+ * from a charset detection operation.
+ * @stable ICU 3.6
+ */
+typedef struct UCharsetMatch UCharsetMatch;
+
+/**
+ * Open a charset detector.
+ *
+ * @param status Any error conditions occurring during the open
+ * operation are reported back in this variable.
+ * @return the newly opened charset detector.
+ * @stable ICU 3.6
+ */
+U_STABLE UCharsetDetector * U_EXPORT2
+ucsdet_open(UErrorCode *status);
+
+/**
+ * Close a charset detector. All storage and any other resources
+ * owned by this charset detector will be released. Failure to
+ * close a charset detector when finished with it can result in
+ * memory leaks in the application.
+ *
+ * @param ucsd The charset detector to be closed.
+ * @stable ICU 3.6
+ */
+U_STABLE void U_EXPORT2
+ucsdet_close(UCharsetDetector *ucsd);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUCharsetDetectorPointer
+ * "Smart pointer" class, closes a UCharsetDetector via ucsdet_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUCharsetDetectorPointer, UCharsetDetector, ucsdet_close);
+
+U_NAMESPACE_END
+
+#endif
+
+/**
+ * Set the input byte data whose charset is to detected.
+ *
+ * Ownership of the input text byte array remains with the caller.
+ * The input string must not be altered or deleted until the charset
+ * detector is either closed or reset to refer to different input text.
+ *
+ * @param ucsd the charset detector to be used.
+ * @param textIn the input text of unknown encoding. .
+ * @param len the length of the input text, or -1 if the text
+ * is NUL terminated.
+ * @param status any error conditions are reported back in this variable.
+ *
+ * @stable ICU 3.6
+ */
+U_STABLE void U_EXPORT2
+ucsdet_setText(UCharsetDetector *ucsd, const char *textIn, int32_t len, UErrorCode *status);
+
+
+/** Set the declared encoding for charset detection.
+ * The declared encoding of an input text is an encoding obtained
+ * by the user from an http header or xml declaration or similar source that
+ * can be provided as an additional hint to the charset detector.
+ *
+ * How and whether the declared encoding will be used during the
+ * detection process is TBD.
+ *
+ * @param ucsd the charset detector to be used.
+ * @param encoding an encoding for the current data obtained from
+ * a header or declaration or other source outside
+ * of the byte data itself.
+ * @param length the length of the encoding name, or -1 if the name string
+ * is NUL terminated.
+ * @param status any error conditions are reported back in this variable.
+ *
+ * @stable ICU 3.6
+ */
+U_STABLE void U_EXPORT2
+ucsdet_setDeclaredEncoding(UCharsetDetector *ucsd, const char *encoding, int32_t length, UErrorCode *status);
+
+
+/**
+ * Return the charset that best matches the supplied input data.
+ *
+ * Note though, that because the detection
+ * only looks at the start of the input data,
+ * there is a possibility that the returned charset will fail to handle
+ * the full set of input data.
+ * <p>
+ * The returned UCharsetMatch object is owned by the UCharsetDetector.
+ * It will remain valid until the detector input is reset, or until
+ * the detector is closed.
+ * <p>
+ * The function will fail if
+ * <ul>
+ * <li>no charset appears to match the data.</li>
+ * <li>no input text has been provided</li>
+ * </ul>
+ *
+ * @param ucsd the charset detector to be used.
+ * @param status any error conditions are reported back in this variable.
+ * @return a UCharsetMatch representing the best matching charset,
+ * or NULL if no charset matches the byte data.
+ *
+ * @stable ICU 3.6
+ */
+U_STABLE const UCharsetMatch * U_EXPORT2
+ucsdet_detect(UCharsetDetector *ucsd, UErrorCode *status);
+
+
+/**
+ * Find all charset matches that appear to be consistent with the input,
+ * returning an array of results. The results are ordered with the
+ * best quality match first.
+ *
+ * Because the detection only looks at a limited amount of the
+ * input byte data, some of the returned charsets may fail to handle
+ * the all of input data.
+ * <p>
+ * The returned UCharsetMatch objects are owned by the UCharsetDetector.
+ * They will remain valid until the detector is closed or modified
+ *
+ * <p>
+ * Return an error if
+ * <ul>
+ * <li>no charsets appear to match the input data.</li>
+ * <li>no input text has been provided</li>
+ * </ul>
+ *
+ * @param ucsd the charset detector to be used.
+ * @param matchesFound pointer to a variable that will be set to the
+ * number of charsets identified that are consistent with
+ * the input data. Output only.
+ * @param status any error conditions are reported back in this variable.
+ * @return A pointer to an array of pointers to UCharSetMatch objects.
+ * This array, and the UCharSetMatch instances to which it refers,
+ * are owned by the UCharsetDetector, and will remain valid until
+ * the detector is closed or modified.
+ * @stable ICU 3.6
+ */
+U_STABLE const UCharsetMatch ** U_EXPORT2
+ucsdet_detectAll(UCharsetDetector *ucsd, int32_t *matchesFound, UErrorCode *status);
+
+
+
+/**
+ * Get the name of the charset represented by a UCharsetMatch.
+ *
+ * The storage for the returned name string is owned by the
+ * UCharsetMatch, and will remain valid while the UCharsetMatch
+ * is valid.
+ *
+ * The name returned is suitable for use with the ICU conversion APIs.
+ *
+ * @param ucsm The charset match object.
+ * @param status Any error conditions are reported back in this variable.
+ * @return The name of the matching charset.
+ *
+ * @stable ICU 3.6
+ */
+U_STABLE const char * U_EXPORT2
+ucsdet_getName(const UCharsetMatch *ucsm, UErrorCode *status);
+
+/**
+ * Get a confidence number for the quality of the match of the byte
+ * data with the charset. Confidence numbers range from zero to 100,
+ * with 100 representing complete confidence and zero representing
+ * no confidence.
+ *
+ * The confidence values are somewhat arbitrary. They define an
+ * an ordering within the results for any single detection operation
+ * but are not generally comparable between the results for different input.
+ *
+ * A confidence value of ten does have a general meaning - it is used
+ * for charsets that can represent the input data, but for which there
+ * is no other indication that suggests that the charset is the correct one.
+ * Pure 7 bit ASCII data, for example, is compatible with a
+ * great many charsets, most of which will appear as possible matches
+ * with a confidence of 10.
+ *
+ * @param ucsm The charset match object.
+ * @param status Any error conditions are reported back in this variable.
+ * @return A confidence number for the charset match.
+ *
+ * @stable ICU 3.6
+ */
+U_STABLE int32_t U_EXPORT2
+ucsdet_getConfidence(const UCharsetMatch *ucsm, UErrorCode *status);
+
+/**
+ * Get the RFC 3066 code for the language of the input data.
+ *
+ * The Charset Detection service is intended primarily for detecting
+ * charsets, not language. For some, but not all, charsets, a language is
+ * identified as a byproduct of the detection process, and that is what
+ * is returned by this function.
+ *
+ * CAUTION:
+ * 1. Language information is not available for input data encoded in
+ * all charsets. In particular, no language is identified
+ * for UTF-8 input data.
+ *
+ * 2. Closely related languages may sometimes be confused.
+ *
+ * If more accurate language detection is required, a linguistic
+ * analysis package should be used.
+ *
+ * The storage for the returned name string is owned by the
+ * UCharsetMatch, and will remain valid while the UCharsetMatch
+ * is valid.
+ *
+ * @param ucsm The charset match object.
+ * @param status Any error conditions are reported back in this variable.
+ * @return The RFC 3066 code for the language of the input data, or
+ * an empty string if the language could not be determined.
+ *
+ * @stable ICU 3.6
+ */
+U_STABLE const char * U_EXPORT2
+ucsdet_getLanguage(const UCharsetMatch *ucsm, UErrorCode *status);
+
+
+/**
+ * Get the entire input text as a UChar string, placing it into
+ * a caller-supplied buffer. A terminating
+ * NUL character will be appended to the buffer if space is available.
+ *
+ * The number of UChars in the output string, not including the terminating
+ * NUL, is returned.
+ *
+ * If the supplied buffer is smaller than required to hold the output,
+ * the contents of the buffer are undefined. The full output string length
+ * (in UChars) is returned as always, and can be used to allocate a buffer
+ * of the correct size.
+ *
+ *
+ * @param ucsm The charset match object.
+ * @param buf A UChar buffer to be filled with the converted text data.
+ * @param cap The capacity of the buffer in UChars.
+ * @param status Any error conditions are reported back in this variable.
+ * @return The number of UChars in the output string.
+ *
+ * @stable ICU 3.6
+ */
+U_STABLE int32_t U_EXPORT2
+ucsdet_getUChars(const UCharsetMatch *ucsm,
+ UChar *buf, int32_t cap, UErrorCode *status);
+
+
+
+/**
+ * Get an iterator over the set of all detectable charsets -
+ * over the charsets that are known to the charset detection
+ * service.
+ *
+ * The returned UEnumeration provides access to the names of
+ * the charsets.
+ *
+ * <p>
+ * The state of the Charset detector that is passed in does not
+ * affect the result of this function, but requiring a valid, open
+ * charset detector as a parameter insures that the charset detection
+ * service has been safely initialized and that the required detection
+ * data is available.
+ *
+ * <p>
+ * <b>Note:</b> Multiple different charset encodings in a same family may use
+ * a single shared name in this implementation. For example, this method returns
+ * an array including "ISO-8859-1" (ISO Latin 1), but not including "windows-1252"
+ * (Windows Latin 1). However, actual detection result could be "windows-1252"
+ * when the input data matches Latin 1 code points with any points only available
+ * in "windows-1252".
+ *
+ * @param ucsd a Charset detector.
+ * @param status Any error conditions are reported back in this variable.
+ * @return an iterator providing access to the detectable charset names.
+ * @stable ICU 3.6
+ */
+U_STABLE UEnumeration * U_EXPORT2
+ucsdet_getAllDetectableCharsets(const UCharsetDetector *ucsd, UErrorCode *status);
+
+/**
+ * Test whether input filtering is enabled for this charset detector.
+ * Input filtering removes text that appears to be HTML or xml
+ * markup from the input before applying the code page detection
+ * heuristics.
+ *
+ * @param ucsd The charset detector to check.
+ * @return TRUE if filtering is enabled.
+ * @stable ICU 3.6
+ */
+
+U_STABLE UBool U_EXPORT2
+ucsdet_isInputFilterEnabled(const UCharsetDetector *ucsd);
+
+
+/**
+ * Enable filtering of input text. If filtering is enabled,
+ * text within angle brackets ("<" and ">") will be removed
+ * before detection, which will remove most HTML or xml markup.
+ *
+ * @param ucsd the charset detector to be modified.
+ * @param filter <code>true</code> to enable input text filtering.
+ * @return The previous setting.
+ *
+ * @stable ICU 3.6
+ */
+U_STABLE UBool U_EXPORT2
+ucsdet_enableInputFilter(UCharsetDetector *ucsd, UBool filter);
+
+#ifndef U_HIDE_INTERNAL_API
+/**
+ * Get an iterator over the set of detectable charsets -
+ * over the charsets that are enabled by the specified charset detector.
+ *
+ * The returned UEnumeration provides access to the names of
+ * the charsets.
+ *
+ * @param ucsd a Charset detector.
+ * @param status Any error conditions are reported back in this variable.
+ * @return an iterator providing access to the detectable charset names by
+ * the specified charset detector.
+ * @internal
+ */
+U_INTERNAL UEnumeration * U_EXPORT2
+ucsdet_getDetectableCharsets(const UCharsetDetector *ucsd, UErrorCode *status);
+
+/**
+ * Enable or disable individual charset encoding.
+ * A name of charset encoding must be included in the names returned by
+ * {@link #getAllDetectableCharsets()}.
+ *
+ * @param ucsd a Charset detector.
+ * @param encoding encoding the name of charset encoding.
+ * @param enabled <code>TRUE</code> to enable, or <code>FALSE</code> to disable the
+ * charset encoding.
+ * @param status receives the return status. When the name of charset encoding
+ * is not supported, U_ILLEGAL_ARGUMENT_ERROR is set.
+ * @internal
+ */
+U_INTERNAL void U_EXPORT2
+ucsdet_setDetectableCharset(UCharsetDetector *ucsd, const char *encoding, UBool enabled, UErrorCode *status);
+#endif /* U_HIDE_INTERNAL_API */
+
+#endif
+#endif /* __UCSDET_H */
+
+
diff --git a/Source/WebCore/icu/unicode/ucurr.h b/Source/WebCore/icu/unicode/ucurr.h
new file mode 100644
index 000000000..27698133e
--- /dev/null
+++ b/Source/WebCore/icu/unicode/ucurr.h
@@ -0,0 +1,360 @@
+/*
+**********************************************************************
+* Copyright (c) 2002-2013, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*/
+#ifndef _UCURR_H_
+#define _UCURR_H_
+
+#include "unicode/utypes.h"
+#include "unicode/uenum.h"
+
+/**
+ * \file
+ * \brief C API: Encapsulates information about a currency.
+ */
+
+#if !UCONFIG_NO_FORMATTING
+
+/**
+ * The ucurr API encapsulates information about a currency, as defined by
+ * ISO 4217. A currency is represented by a 3-character string
+ * containing its ISO 4217 code. This API can return various data
+ * necessary the proper display of a currency:
+ *
+ * <ul><li>A display symbol, for a specific locale
+ * <li>The number of fraction digits to display
+ * <li>A rounding increment
+ * </ul>
+ *
+ * The <tt>DecimalFormat</tt> class uses these data to display
+ * currencies.
+ * @author Alan Liu
+ * @since ICU 2.2
+ */
+
+/**
+ * Finds a currency code for the given locale.
+ * @param locale the locale for which to retrieve a currency code.
+ * Currency can be specified by the "currency" keyword
+ * in which case it overrides the default currency code
+ * @param buff fill in buffer. Can be NULL for preflighting.
+ * @param buffCapacity capacity of the fill in buffer. Can be 0 for
+ * preflighting. If it is non-zero, the buff parameter
+ * must not be NULL.
+ * @param ec error code
+ * @return length of the currency string. It should always be 3. If 0,
+ * currency couldn't be found or the input values are
+ * invalid.
+ * @stable ICU 2.8
+ */
+U_STABLE int32_t U_EXPORT2
+ucurr_forLocale(const char* locale,
+ UChar* buff,
+ int32_t buffCapacity,
+ UErrorCode* ec);
+
+/**
+ * Selector constants for ucurr_getName().
+ *
+ * @see ucurr_getName
+ * @stable ICU 2.6
+ */
+typedef enum UCurrNameStyle {
+ /**
+ * Selector for ucurr_getName indicating a symbolic name for a
+ * currency, such as "$" for USD.
+ * @stable ICU 2.6
+ */
+ UCURR_SYMBOL_NAME,
+
+ /**
+ * Selector for ucurr_getName indicating the long name for a
+ * currency, such as "US Dollar" for USD.
+ * @stable ICU 2.6
+ */
+ UCURR_LONG_NAME
+} UCurrNameStyle;
+
+#if !UCONFIG_NO_SERVICE
+/**
+ * @stable ICU 2.6
+ */
+typedef const void* UCurrRegistryKey;
+
+/**
+ * Register an (existing) ISO 4217 currency code for the given locale.
+ * Only the country code and the two variants EURO and PRE_EURO are
+ * recognized.
+ * @param isoCode the three-letter ISO 4217 currency code
+ * @param locale the locale for which to register this currency code
+ * @param status the in/out status code
+ * @return a registry key that can be used to unregister this currency code, or NULL
+ * if there was an error.
+ * @stable ICU 2.6
+ */
+U_STABLE UCurrRegistryKey U_EXPORT2
+ucurr_register(const UChar* isoCode,
+ const char* locale,
+ UErrorCode* status);
+/**
+ * Unregister the previously-registered currency definitions using the
+ * URegistryKey returned from ucurr_register. Key becomes invalid after
+ * a successful call and should not be used again. Any currency
+ * that might have been hidden by the original ucurr_register call is
+ * restored.
+ * @param key the registry key returned by a previous call to ucurr_register
+ * @param status the in/out status code, no special meanings are assigned
+ * @return TRUE if the currency for this key was successfully unregistered
+ * @stable ICU 2.6
+ */
+U_STABLE UBool U_EXPORT2
+ucurr_unregister(UCurrRegistryKey key, UErrorCode* status);
+#endif /* UCONFIG_NO_SERVICE */
+
+/**
+ * Returns the display name for the given currency in the
+ * given locale. For example, the display name for the USD
+ * currency object in the en_US locale is "$".
+ * @param currency null-terminated 3-letter ISO 4217 code
+ * @param locale locale in which to display currency
+ * @param nameStyle selector for which kind of name to return
+ * @param isChoiceFormat fill-in set to TRUE if the returned value
+ * is a ChoiceFormat pattern; otherwise it is a static string
+ * @param len fill-in parameter to receive length of result
+ * @param ec error code
+ * @return pointer to display string of 'len' UChars. If the resource
+ * data contains no entry for 'currency', then 'currency' itself is
+ * returned. If *isChoiceFormat is TRUE, then the result is a
+ * ChoiceFormat pattern. Otherwise it is a static string.
+ * @stable ICU 2.6
+ */
+U_STABLE const UChar* U_EXPORT2
+ucurr_getName(const UChar* currency,
+ const char* locale,
+ UCurrNameStyle nameStyle,
+ UBool* isChoiceFormat,
+ int32_t* len,
+ UErrorCode* ec);
+
+/**
+ * Returns the plural name for the given currency in the
+ * given locale. For example, the plural name for the USD
+ * currency object in the en_US locale is "US dollar" or "US dollars".
+ * @param currency null-terminated 3-letter ISO 4217 code
+ * @param locale locale in which to display currency
+ * @param isChoiceFormat fill-in set to TRUE if the returned value
+ * is a ChoiceFormat pattern; otherwise it is a static string
+ * @param pluralCount plural count
+ * @param len fill-in parameter to receive length of result
+ * @param ec error code
+ * @return pointer to display string of 'len' UChars. If the resource
+ * data contains no entry for 'currency', then 'currency' itself is
+ * returned.
+ * @stable ICU 4.2
+ */
+U_STABLE const UChar* U_EXPORT2
+ucurr_getPluralName(const UChar* currency,
+ const char* locale,
+ UBool* isChoiceFormat,
+ const char* pluralCount,
+ int32_t* len,
+ UErrorCode* ec);
+
+/**
+ * Returns the number of the number of fraction digits that should
+ * be displayed for the given currency.
+ * @param currency null-terminated 3-letter ISO 4217 code
+ * @param ec input-output error code
+ * @return a non-negative number of fraction digits to be
+ * displayed, or 0 if there is an error
+ * @stable ICU 3.0
+ */
+U_STABLE int32_t U_EXPORT2
+ucurr_getDefaultFractionDigits(const UChar* currency,
+ UErrorCode* ec);
+
+/**
+ * Returns the rounding increment for the given currency, or 0.0 if no
+ * rounding is done by the currency.
+ * @param currency null-terminated 3-letter ISO 4217 code
+ * @param ec input-output error code
+ * @return the non-negative rounding increment, or 0.0 if none,
+ * or 0.0 if there is an error
+ * @stable ICU 3.0
+ */
+U_STABLE double U_EXPORT2
+ucurr_getRoundingIncrement(const UChar* currency,
+ UErrorCode* ec);
+
+/**
+ * Selector constants for ucurr_openCurrencies().
+ *
+ * @see ucurr_openCurrencies
+ * @stable ICU 3.2
+ */
+typedef enum UCurrCurrencyType {
+ /**
+ * Select all ISO-4217 currency codes.
+ * @stable ICU 3.2
+ */
+ UCURR_ALL = INT32_MAX,
+ /**
+ * Select only ISO-4217 commonly used currency codes.
+ * These currencies can be found in common use, and they usually have
+ * bank notes or coins associated with the currency code.
+ * This does not include fund codes, precious metals and other
+ * various ISO-4217 codes limited to special financial products.
+ * @stable ICU 3.2
+ */
+ UCURR_COMMON = 1,
+ /**
+ * Select ISO-4217 uncommon currency codes.
+ * These codes respresent fund codes, precious metals and other
+ * various ISO-4217 codes limited to special financial products.
+ * A fund code is a monetary resource associated with a currency.
+ * @stable ICU 3.2
+ */
+ UCURR_UNCOMMON = 2,
+ /**
+ * Select only deprecated ISO-4217 codes.
+ * These codes are no longer in general public use.
+ * @stable ICU 3.2
+ */
+ UCURR_DEPRECATED = 4,
+ /**
+ * Select only non-deprecated ISO-4217 codes.
+ * These codes are in general public use.
+ * @stable ICU 3.2
+ */
+ UCURR_NON_DEPRECATED = 8
+} UCurrCurrencyType;
+
+/**
+ * Provides a UEnumeration object for listing ISO-4217 codes.
+ * @param currType You can use one of several UCurrCurrencyType values for this
+ * variable. You can also | (or) them together to get a specific list of
+ * currencies. Most people will want to use the (UCURR_CURRENCY|UCURR_NON_DEPRECATED) value to
+ * get a list of current currencies.
+ * @param pErrorCode Error code
+ * @stable ICU 3.2
+ */
+U_STABLE UEnumeration * U_EXPORT2
+ucurr_openISOCurrencies(uint32_t currType, UErrorCode *pErrorCode);
+
+/**
+ * Queries if the given ISO 4217 3-letter code is available on the specified date range.
+ *
+ * Note: For checking availability of a currency on a specific date, specify the date on both 'from' and 'to'
+ *
+ * When 'from' is U_DATE_MIN and 'to' is U_DATE_MAX, this method checks if the specified currency is available any time.
+ * If 'from' and 'to' are same UDate value, this method checks if the specified currency is available on that date.
+ *
+ * @param isoCode
+ * The ISO 4217 3-letter code.
+ *
+ * @param from
+ * The lower bound of the date range, inclusive. When 'from' is U_DATE_MIN, check the availability
+ * of the currency any date before 'to'
+ *
+ * @param to
+ * The upper bound of the date range, inclusive. When 'to' is U_DATE_MAX, check the availability of
+ * the currency any date after 'from'
+ *
+ * @param errorCode
+ * ICU error code
+ *
+ * @return TRUE if the given ISO 4217 3-letter code is supported on the specified date range.
+ *
+ * @stable ICU 4.8
+ */
+U_STABLE UBool U_EXPORT2
+ucurr_isAvailable(const UChar* isoCode,
+ UDate from,
+ UDate to,
+ UErrorCode* errorCode);
+
+/**
+ * Finds the number of valid currency codes for the
+ * given locale and date.
+ * @param locale the locale for which to retrieve the
+ * currency count.
+ * @param date the date for which to retrieve the
+ * currency count for the given locale.
+ * @param ec error code
+ * @return the number of currency codes for the
+ * given locale and date. If 0, currency
+ * codes couldn't be found for the input
+ * values are invalid.
+ * @stable ICU 4.0
+ */
+U_STABLE int32_t U_EXPORT2
+ucurr_countCurrencies(const char* locale,
+ UDate date,
+ UErrorCode* ec);
+
+/**
+ * Finds a currency code for the given locale and date
+ * @param locale the locale for which to retrieve a currency code.
+ * Currency can be specified by the "currency" keyword
+ * in which case it overrides the default currency code
+ * @param date the date for which to retrieve a currency code for
+ * the given locale.
+ * @param index the index within the available list of currency codes
+ * for the given locale on the given date.
+ * @param buff fill in buffer. Can be NULL for preflighting.
+ * @param buffCapacity capacity of the fill in buffer. Can be 0 for
+ * preflighting. If it is non-zero, the buff parameter
+ * must not be NULL.
+ * @param ec error code
+ * @return length of the currency string. It should always be 3.
+ * If 0, currency couldn't be found or the input values are
+ * invalid.
+ * @stable ICU 4.0
+ */
+U_STABLE int32_t U_EXPORT2
+ucurr_forLocaleAndDate(const char* locale,
+ UDate date,
+ int32_t index,
+ UChar* buff,
+ int32_t buffCapacity,
+ UErrorCode* ec);
+
+/**
+ * Given a key and a locale, returns an array of string values in a preferred
+ * order that would make a difference. These are all and only those values where
+ * the open (creation) of the service with the locale formed from the input locale
+ * plus input keyword and that value has different behavior than creation with the
+ * input locale alone.
+ * @param key one of the keys supported by this service. For now, only
+ * "currency" is supported.
+ * @param locale the locale
+ * @param commonlyUsed if set to true it will return only commonly used values
+ * with the given locale in preferred order. Otherwise,
+ * it will return all the available values for the locale.
+ * @param status error status
+ * @return a string enumeration over keyword values for the given key and the locale.
+ * @stable ICU 4.2
+ */
+U_STABLE UEnumeration* U_EXPORT2
+ucurr_getKeywordValuesForLocale(const char* key,
+ const char* locale,
+ UBool commonlyUsed,
+ UErrorCode* status);
+
+/**
+ * Returns the ISO 4217 numeric code for the currency.
+ * <p>Note: If the ISO 4217 numeric code is not assigned for the currency or
+ * the currency is unknown, this function returns 0.
+ *
+ * @param currency null-terminated 3-letter ISO 4217 code
+ * @return The ISO 4217 numeric code of the currency
+ * @stable ICU 49
+ */
+U_STABLE int32_t U_EXPORT2
+ucurr_getNumericCode(const UChar* currency);
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
+
+#endif
diff --git a/Source/WebCore/icu/unicode/uenum.h b/Source/WebCore/icu/unicode/uenum.h
index 0e7d90cc5..5408ec5a6 100644
--- a/Source/WebCore/icu/unicode/uenum.h
+++ b/Source/WebCore/icu/unicode/uenum.h
@@ -1,7 +1,7 @@
/*
*******************************************************************************
*
-* Copyright (C) 2002-2010, International Business Machines
+* Copyright (C) 2002-2013, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@@ -164,11 +164,43 @@ uenum_reset(UEnumeration* en, UErrorCode* status);
* @param adopted the C++ StringEnumeration to be wrapped in a UEnumeration.
* @param ec the error code.
* @return a UEnumeration wrapping the adopted StringEnumeration.
- * @draft ICU 4.2
+ * @stable ICU 4.2
*/
-U_CAPI UEnumeration* U_EXPORT2
-uenum_openFromStringEnumeration(U_NAMESPACE_QUALIFIER StringEnumeration* adopted, UErrorCode* ec);
+U_STABLE UEnumeration* U_EXPORT2
+uenum_openFromStringEnumeration(icu::StringEnumeration* adopted, UErrorCode* ec);
#endif
+/**
+ * Given an array of const UChar* strings, return a UEnumeration. String pointers from 0..count-1 must not be null.
+ * Do not free or modify either the string array or the characters it points to until this object has been destroyed with uenum_close.
+ * \snippet test/cintltst/uenumtst.c uenum_openUCharStringsEnumeration
+ * @param strings array of const UChar* strings (each null terminated). All storage is owned by the caller.
+ * @param count length of the array
+ * @param ec error code
+ * @return the new UEnumeration object. Caller is responsible for calling uenum_close to free memory.
+ * @see uenum_close
+ * @stable ICU 50
+ */
+U_STABLE UEnumeration* U_EXPORT2
+uenum_openUCharStringsEnumeration(const UChar* const strings[], int32_t count,
+ UErrorCode* ec);
+
+/* Note: next function is not hidden as draft, as it is used internally (it was formerly an internal function). */
+
+/**
+ * Given an array of const char* strings (invariant chars only), return a UEnumeration. String pointers from 0..count-1 must not be null.
+ * Do not free or modify either the string array or the characters it points to until this object has been destroyed with uenum_close.
+ * \snippet test/cintltst/uenumtst.c uenum_openCharStringsEnumeration
+ * @param strings array of char* strings (each null terminated). All storage is owned by the caller.
+ * @param count length of the array
+ * @param ec error code
+ * @return the new UEnumeration object. Caller is responsible for calling uenum_close to free memory
+ * @see uenum_close
+ * @stable ICU 50
+ */
+U_STABLE UEnumeration* U_EXPORT2
+uenum_openCharStringsEnumeration(const char* const strings[], int32_t count,
+ UErrorCode* ec);
+
#endif
diff --git a/Source/WebCore/icu/unicode/uidna.h b/Source/WebCore/icu/unicode/uidna.h
index 04b439d97..2efb130f9 100644
--- a/Source/WebCore/icu/unicode/uidna.h
+++ b/Source/WebCore/icu/unicode/uidna.h
@@ -1,7 +1,7 @@
/*
*******************************************************************************
*
- * Copyright (C) 2003-2010, International Business Machines
+ * Copyright (C) 2003-2013, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@@ -42,12 +42,14 @@
enum {
/**
* Default options value: None of the other options are set.
+ * For use in static worker and factory methods.
* @stable ICU 2.6
*/
UIDNA_DEFAULT=0,
/**
* Option to allow unassigned code points in domain names and labels.
- * This option is ignored by the UTS46 implementation.
+ * For use in static worker and factory methods.
+ * <p>This option is ignored by the UTS46 implementation.
* (UTS #46 disallows unassigned code points.)
* @stable ICU 2.6
*/
@@ -56,47 +58,62 @@ enum {
* Option to check whether the input conforms to the STD3 ASCII rules,
* for example the restriction of labels to LDH characters
* (ASCII Letters, Digits and Hyphen-Minus).
+ * For use in static worker and factory methods.
* @stable ICU 2.6
*/
UIDNA_USE_STD3_RULES=2,
/**
* IDNA option to check for whether the input conforms to the BiDi rules.
- * This option is ignored by the IDNA2003 implementation.
+ * For use in static worker and factory methods.
+ * <p>This option is ignored by the IDNA2003 implementation.
* (IDNA2003 always performs a BiDi check.)
- * @draft ICU 4.6
+ * @stable ICU 4.6
*/
UIDNA_CHECK_BIDI=4,
/**
* IDNA option to check for whether the input conforms to the CONTEXTJ rules.
- * This option is ignored by the IDNA2003 implementation.
+ * For use in static worker and factory methods.
+ * <p>This option is ignored by the IDNA2003 implementation.
* (The CONTEXTJ check is new in IDNA2008.)
- * @draft ICU 4.6
+ * @stable ICU 4.6
*/
UIDNA_CHECK_CONTEXTJ=8,
/**
* IDNA option for nontransitional processing in ToASCII().
- * By default, ToASCII() uses transitional processing.
- * This option is ignored by the IDNA2003 implementation.
+ * For use in static worker and factory methods.
+ * <p>By default, ToASCII() uses transitional processing.
+ * <p>This option is ignored by the IDNA2003 implementation.
* (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
- * @draft ICU 4.6
+ * @stable ICU 4.6
*/
UIDNA_NONTRANSITIONAL_TO_ASCII=0x10,
/**
* IDNA option for nontransitional processing in ToUnicode().
- * By default, ToUnicode() uses transitional processing.
- * This option is ignored by the IDNA2003 implementation.
+ * For use in static worker and factory methods.
+ * <p>By default, ToUnicode() uses transitional processing.
+ * <p>This option is ignored by the IDNA2003 implementation.
* (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
- * @draft ICU 4.6
+ * @stable ICU 4.6
*/
- UIDNA_NONTRANSITIONAL_TO_UNICODE=0x20
+ UIDNA_NONTRANSITIONAL_TO_UNICODE=0x20,
+ /**
+ * IDNA option to check for whether the input conforms to the CONTEXTO rules.
+ * For use in static worker and factory methods.
+ * <p>This option is ignored by the IDNA2003 implementation.
+ * (The CONTEXTO check is new in IDNA2008.)
+ * <p>This is for use by registries for IDNA2008 conformance.
+ * UTS #46 does not require the CONTEXTO check.
+ * @stable ICU 49
+ */
+ UIDNA_CHECK_CONTEXTO=0x40
};
/**
* Opaque C service object type for the new IDNA API.
- * @draft ICU 4.6
+ * @stable ICU 4.6
*/
struct UIDNA;
-typedef struct UIDNA UIDNA; /**< C typedef for struct UIDNA. @draft ICU 4.6 */
+typedef struct UIDNA UIDNA; /**< C typedef for struct UIDNA. @stable ICU 4.6 */
/**
* Returns a UIDNA instance which implements UTS #46.
@@ -113,17 +130,17 @@ typedef struct UIDNA UIDNA; /**< C typedef for struct UIDNA. @draft ICU 4.6 */
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return the UTS #46 UIDNA instance, if successful
- * @draft ICU 4.6
+ * @stable ICU 4.6
*/
-U_DRAFT UIDNA * U_EXPORT2
+U_STABLE UIDNA * U_EXPORT2
uidna_openUTS46(uint32_t options, UErrorCode *pErrorCode);
/**
* Closes a UIDNA instance.
* @param idna UIDNA instance to be closed
- * @draft ICU 4.6
+ * @stable ICU 4.6
*/
-U_DRAFT void U_EXPORT2
+U_STABLE void U_EXPORT2
uidna_close(UIDNA *idna);
#if U_SHOW_CPLUSPLUS_API
@@ -137,7 +154,7 @@ U_NAMESPACE_BEGIN
*
* @see LocalPointerBase
* @see LocalPointer
- * @draft ICU 4.6
+ * @stable ICU 4.6
*/
U_DEFINE_LOCAL_OPEN_POINTER(LocalUIDNAPointer, UIDNA, uidna_close);
@@ -153,32 +170,31 @@ U_NAMESPACE_END
* int32_t length = uidna_nameToASCII(..., &info, &errorCode);
* if(U_SUCCESS(errorCode) && info.errors!=0) { ... }
* \endcode
- * @draft ICU 4.6
+ * @stable ICU 4.6
*/
-struct UIDNAInfo {
- /** sizeof(UIDNAInfo) @draft ICU 4.6 */
+typedef struct UIDNAInfo {
+ /** sizeof(UIDNAInfo) @stable ICU 4.6 */
int16_t size;
/**
* Set to TRUE if transitional and nontransitional processing produce different results.
* For details see C++ IDNAInfo::isTransitionalDifferent().
- * @draft ICU 4.6
+ * @stable ICU 4.6
*/
UBool isTransitionalDifferent;
UBool reservedB3; /**< Reserved field, do not use. @internal */
/**
* Bit set indicating IDNA processing errors. 0 if no errors.
* See UIDNA_ERROR_... constants.
- * @draft ICU 4.6
+ * @stable ICU 4.6
*/
uint32_t errors;
int32_t reservedI2; /**< Reserved field, do not use. @internal */
int32_t reservedI3; /**< Reserved field, do not use. @internal */
-};
-typedef struct UIDNAInfo UIDNAInfo;
+} UIDNAInfo;
/**
* Static initializer for a UIDNAInfo struct.
- * @draft ICU 4.6
+ * @stable ICU 4.6
*/
#define UIDNA_INFO_INITIALIZER { \
(int16_t)sizeof(UIDNAInfo), \
@@ -206,9 +222,9 @@ typedef struct UIDNAInfo UIDNAInfo;
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return destination string length
- * @draft ICU 4.6
+ * @stable ICU 4.6
*/
-U_DRAFT int32_t U_EXPORT2
+U_STABLE int32_t U_EXPORT2
uidna_labelToASCII(const UIDNA *idna,
const UChar *label, int32_t length,
UChar *dest, int32_t capacity,
@@ -233,9 +249,9 @@ uidna_labelToASCII(const UIDNA *idna,
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return destination string length
- * @draft ICU 4.6
+ * @stable ICU 4.6
*/
-U_DRAFT int32_t U_EXPORT2
+U_STABLE int32_t U_EXPORT2
uidna_labelToUnicode(const UIDNA *idna,
const UChar *label, int32_t length,
UChar *dest, int32_t capacity,
@@ -262,9 +278,9 @@ uidna_labelToUnicode(const UIDNA *idna,
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return destination string length
- * @draft ICU 4.6
+ * @stable ICU 4.6
*/
-U_DRAFT int32_t U_EXPORT2
+U_STABLE int32_t U_EXPORT2
uidna_nameToASCII(const UIDNA *idna,
const UChar *name, int32_t length,
UChar *dest, int32_t capacity,
@@ -289,9 +305,9 @@ uidna_nameToASCII(const UIDNA *idna,
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return destination string length
- * @draft ICU 4.6
+ * @stable ICU 4.6
*/
-U_DRAFT int32_t U_EXPORT2
+U_STABLE int32_t U_EXPORT2
uidna_nameToUnicode(const UIDNA *idna,
const UChar *name, int32_t length,
UChar *dest, int32_t capacity,
@@ -314,9 +330,9 @@ uidna_nameToUnicode(const UIDNA *idna,
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return destination string length
- * @draft ICU 4.6
+ * @stable ICU 4.6
*/
-U_DRAFT int32_t U_EXPORT2
+U_STABLE int32_t U_EXPORT2
uidna_labelToASCII_UTF8(const UIDNA *idna,
const char *label, int32_t length,
char *dest, int32_t capacity,
@@ -337,9 +353,9 @@ uidna_labelToASCII_UTF8(const UIDNA *idna,
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return destination string length
- * @draft ICU 4.6
+ * @stable ICU 4.6
*/
-U_DRAFT int32_t U_EXPORT2
+U_STABLE int32_t U_EXPORT2
uidna_labelToUnicodeUTF8(const UIDNA *idna,
const char *label, int32_t length,
char *dest, int32_t capacity,
@@ -360,9 +376,9 @@ uidna_labelToUnicodeUTF8(const UIDNA *idna,
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return destination string length
- * @draft ICU 4.6
+ * @stable ICU 4.6
*/
-U_DRAFT int32_t U_EXPORT2
+U_STABLE int32_t U_EXPORT2
uidna_nameToASCII_UTF8(const UIDNA *idna,
const char *name, int32_t length,
char *dest, int32_t capacity,
@@ -383,9 +399,9 @@ uidna_nameToASCII_UTF8(const UIDNA *idna,
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
* @return destination string length
- * @draft ICU 4.6
+ * @stable ICU 4.6
*/
-U_DRAFT int32_t U_EXPORT2
+U_STABLE int32_t U_EXPORT2
uidna_nameToUnicodeUTF8(const UIDNA *idna,
const char *name, int32_t length,
char *dest, int32_t capacity,
@@ -399,58 +415,58 @@ uidna_nameToUnicodeUTF8(const UIDNA *idna,
enum {
/**
* A non-final domain name label (or the whole domain name) is empty.
- * @draft ICU 4.6
+ * @stable ICU 4.6
*/
UIDNA_ERROR_EMPTY_LABEL=1,
/**
* A domain name label is longer than 63 bytes.
* (See STD13/RFC1034 3.1. Name space specifications and terminology.)
* This is only checked in ToASCII operations, and only if the output label is all-ASCII.
- * @draft ICU 4.6
+ * @stable ICU 4.6
*/
UIDNA_ERROR_LABEL_TOO_LONG=2,
/**
* A domain name is longer than 255 bytes in its storage form.
* (See STD13/RFC1034 3.1. Name space specifications and terminology.)
* This is only checked in ToASCII operations, and only if the output domain name is all-ASCII.
- * @draft ICU 4.6
+ * @stable ICU 4.6
*/
UIDNA_ERROR_DOMAIN_NAME_TOO_LONG=4,
/**
* A label starts with a hyphen-minus ('-').
- * @draft ICU 4.6
+ * @stable ICU 4.6
*/
UIDNA_ERROR_LEADING_HYPHEN=8,
/**
* A label ends with a hyphen-minus ('-').
- * @draft ICU 4.6
+ * @stable ICU 4.6
*/
UIDNA_ERROR_TRAILING_HYPHEN=0x10,
/**
* A label contains hyphen-minus ('-') in the third and fourth positions.
- * @draft ICU 4.6
+ * @stable ICU 4.6
*/
UIDNA_ERROR_HYPHEN_3_4=0x20,
/**
* A label starts with a combining mark.
- * @draft ICU 4.6
+ * @stable ICU 4.6
*/
UIDNA_ERROR_LEADING_COMBINING_MARK=0x40,
/**
* A label or domain name contains disallowed characters.
- * @draft ICU 4.6
+ * @stable ICU 4.6
*/
UIDNA_ERROR_DISALLOWED=0x80,
/**
* A label starts with "xn--" but does not contain valid Punycode.
* That is, an xn-- label failed Punycode decoding.
- * @draft ICU 4.6
+ * @stable ICU 4.6
*/
UIDNA_ERROR_PUNYCODE=0x100,
/**
* A label contains a dot=full stop.
* This can occur in an input string for a single-label function.
- * @draft ICU 4.6
+ * @stable ICU 4.6
*/
UIDNA_ERROR_LABEL_HAS_DOT=0x200,
/**
@@ -459,19 +475,32 @@ enum {
* string had severe validation errors. For example,
* it might contain characters that are not allowed in ACE labels,
* or it might not be normalized.
- * @draft ICU 4.6
+ * @stable ICU 4.6
*/
UIDNA_ERROR_INVALID_ACE_LABEL=0x400,
/**
* A label does not meet the IDNA BiDi requirements (for right-to-left characters).
- * @draft ICU 4.6
+ * @stable ICU 4.6
*/
UIDNA_ERROR_BIDI=0x800,
/**
* A label does not meet the IDNA CONTEXTJ requirements.
- * @draft ICU 4.6
+ * @stable ICU 4.6
+ */
+ UIDNA_ERROR_CONTEXTJ=0x1000,
+ /**
+ * A label does not meet the IDNA CONTEXTO requirements for punctuation characters.
+ * Some punctuation characters "Would otherwise have been DISALLOWED"
+ * but are allowed in certain contexts. (RFC 5892)
+ * @stable ICU 49
+ */
+ UIDNA_ERROR_CONTEXTO_PUNCTUATION=0x2000,
+ /**
+ * A label does not meet the IDNA CONTEXTO requirements for digits.
+ * Arabic-Indic Digits (U+066x) must not be mixed with Extended Arabic-Indic Digits (U+06Fx).
+ * @stable ICU 49
*/
- UIDNA_ERROR_CONTEXTJ=0x1000
+ UIDNA_ERROR_CONTEXTO_DIGITS=0x4000
};
/* IDNA2003 API ------------------------------------------------------------- */
diff --git a/Source/WebCore/icu/unicode/uiter.h b/Source/WebCore/icu/unicode/uiter.h
index b469e24e6..0cdb8ffbe 100644
--- a/Source/WebCore/icu/unicode/uiter.h
+++ b/Source/WebCore/icu/unicode/uiter.h
@@ -1,7 +1,7 @@
/*
*******************************************************************************
*
-* Copyright (C) 2002-2006,2009 International Business Machines
+* Copyright (C) 2002-2011 International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@@ -673,7 +673,7 @@ uiter_setUTF8(UCharIterator *iter, const char *s, int32_t length);
* @stable ICU 2.1
*/
U_STABLE void U_EXPORT2
-uiter_setCharacterIterator(UCharIterator *iter, U_NAMESPACE_QUALIFIER CharacterIterator *charIter);
+uiter_setCharacterIterator(UCharIterator *iter, icu::CharacterIterator *charIter);
/**
* Set up a UCharIterator to iterate over a C++ Replaceable.
@@ -698,7 +698,7 @@ uiter_setCharacterIterator(UCharIterator *iter, U_NAMESPACE_QUALIFIER CharacterI
* @stable ICU 2.1
*/
U_STABLE void U_EXPORT2
-uiter_setReplaceable(UCharIterator *iter, const U_NAMESPACE_QUALIFIER Replaceable *rep);
+uiter_setReplaceable(UCharIterator *iter, const icu::Replaceable *rep);
#endif
diff --git a/Source/WebCore/icu/unicode/uloc.h b/Source/WebCore/icu/unicode/uloc.h
index 95758c3a3..28ab902b5 100644
--- a/Source/WebCore/icu/unicode/uloc.h
+++ b/Source/WebCore/icu/unicode/uloc.h
@@ -1,6 +1,6 @@
/*
**********************************************************************
-* Copyright (C) 1997-2010, International Business Machines
+* Copyright (C) 1997-2013, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*
@@ -289,7 +289,7 @@
/**
* Unicode code point for '@' separating keywords from the locale string.
* @see ULOC_KEYWORD_SEPARATOR
- * @draft ICU 4.6
+ * @stable ICU 4.6
*/
#define ULOC_KEYWORD_SEPARATOR_UNICODE 0x40
@@ -302,7 +302,7 @@
/**
* Unicode code point for '=' for assigning value to a keyword.
* @see ULOC_KEYWORD_ASSIGN
- * @draft ICU 4.6
+ * @stable ICU 4.6
*/
#define ULOC_KEYWORD_ASSIGN_UNICODE 0x3D
@@ -315,7 +315,7 @@
/**
* Unicode code point for ';' separating keywords
* @see ULOC_KEYWORD_ITEM_SEPARATOR
- * @draft ICU 4.6
+ * @stable ICU 4.6
*/
#define ULOC_KEYWORD_ITEM_SEPARATOR_UNICODE 0x3B
@@ -353,7 +353,7 @@ typedef enum {
ULOC_DATA_LOCALE_TYPE_LIMIT = 3
} ULocDataLocaleType ;
-
+#ifndef U_HIDE_SYSTEM_API
/**
* Gets ICU's default locale.
* The returned string is a snapshot in time, and will remain valid
@@ -388,6 +388,7 @@ uloc_getDefault(void);
U_STABLE void U_EXPORT2
uloc_setDefault(const char* localeID,
UErrorCode* status);
+#endif /* U_HIDE_SYSTEM_API */
/**
* Gets the language code for the specified locale.
@@ -752,7 +753,9 @@ U_STABLE int32_t U_EXPORT2 uloc_countAvailable(void);
/**
*
- * Gets a list of all available language codes defined in ISO 639. This is a pointer
+ * Gets a list of all available 2-letter language codes defined in ISO 639,
+ * plus additional 3-letter codes determined to be useful for locale generation as
+ * defined by Unicode CLDR. This is a pointer
* to an array of pointers to arrays of char. All of these pointers are owned
* by ICU-- do not delete them, and do not write through them. The array is
* terminated with a null pointer.
@@ -797,12 +800,15 @@ uloc_getParent(const char* localeID,
/**
- * Gets the full name for the specified locale.
+ * Gets the full name for the specified locale, like uloc_getName(),
+ * but without keywords.
+ *
* Note: This has the effect of 'canonicalizing' the string to
* a certain extent. Upper and lower case are set as needed,
* and if the components were in 'POSIX' format they are changed to
* ICU format. It does NOT map aliased names in any way.
* See the top of this header file.
+ *
* This API strips off the keyword part, so "de_DE\@collation=phonebook"
* will become "de_DE".
* This API supports preflighting.
@@ -853,13 +859,16 @@ uloc_getKeywordValue(const char* localeID,
/**
- * Set the value of the specified keyword.
+ * Sets or removes the value of the specified keyword.
+ *
+ * For removing all keywords, use uloc_getBaseName().
+ *
* NOTE: Unlike almost every other ICU function which takes a
* buffer, this function will NOT truncate the output text. If a
* BUFFER_OVERFLOW_ERROR is received, it means that the original
* buffer is untouched. This is done to prevent incorrect or possibly
* even malformed locales from being generated and used.
- *
+ *
* @param keywordName name of the keyword to be set. Case insensitive.
* @param keywordValue value of the keyword to be set. If 0-length or
* NULL, will result in the keyword being removed. No error is given if
@@ -1065,7 +1074,7 @@ uloc_minimizeSubtags(const char* localeID,
int32_t minimizedLocaleIDCapacity,
UErrorCode* err);
-/**
+/**
* Returns a locale ID for the specified BCP47 language tag string.
* If the specified language tag contains any ill-formed subtags,
* the first such subtag and all following subtags are ignored.
@@ -1081,21 +1090,21 @@ uloc_minimizeSubtags(const char* localeID,
* @param localeID the output buffer receiving a locale ID for the
* specified BCP47 language tag.
* @param localeIDCapacity the size of the locale ID output buffer.
- * @param parsedLength if not NULL, succsessfully parsed length
+ * @param parsedLength if not NULL, successfully parsed length
* for the input language tag is set.
* @param err error information if receiving the locald ID
* failed.
* @return the length of the locale ID.
- * @draft ICU 4.2
+ * @stable ICU 4.2
*/
-U_DRAFT int32_t U_EXPORT2
+U_STABLE int32_t U_EXPORT2
uloc_forLanguageTag(const char* langtag,
char* localeID,
int32_t localeIDCapacity,
int32_t* parsedLength,
UErrorCode* err);
-/**
+/**
* Returns a well-formed language tag for this locale ID.
* <p>
* <b>Note</b>: When <code>strict</code> is FALSE, any locale
@@ -1104,7 +1113,7 @@ uloc_forLanguageTag(const char* langtag,
* TRUE, this function sets U_ILLEGAL_ARGUMENT_ERROR to the
* <code>err</code> if any locale fields do not satisfy the
* BCP47 syntax requirement.
- * @param localeID the input lcoale ID
+ * @param localeID the input locale ID
* @param langtag the output buffer receiving BCP47 language
* tag for the locale ID.
* @param langtagCapacity the size of the BCP47 language tag
@@ -1114,9 +1123,9 @@ uloc_forLanguageTag(const char* langtag,
* @param err error information if receiving the language
* tag failed.
* @return The length of the BCP47 language tag.
- * @draft ICU 4.2
+ * @stable ICU 4.2
*/
-U_DRAFT int32_t U_EXPORT2
+U_STABLE int32_t U_EXPORT2
uloc_toLanguageTag(const char* localeID,
char* langtag,
int32_t langtagCapacity,
diff --git a/Source/WebCore/icu/unicode/umachine.h b/Source/WebCore/icu/unicode/umachine.h
index abbdcb79f..d1102f493 100644
--- a/Source/WebCore/icu/unicode/umachine.h
+++ b/Source/WebCore/icu/unicode/umachine.h
@@ -1,7 +1,7 @@
/*
******************************************************************************
*
-* Copyright (C) 1999-2010, International Business Machines
+* Copyright (C) 1999-2012, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
@@ -41,16 +41,7 @@
/* which are contained in the platform-specific file platform.h */
/*==========================================================================*/
-#if defined(U_PALMOS)
-# include "unicode/ppalmos.h"
-#elif !defined(__MINGW32__) && (defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64))
-#ifdef CYGWINMSVC
-# include "unicode/platform.h"
-#endif
-# include "unicode/pwin32.h"
-#else
-# include "unicode/ptypes.h" /* platform.h is included in ptypes.h */
-#endif
+#include "unicode/ptypes.h" /* platform.h is included in ptypes.h */
/*
* ANSI C headers:
@@ -59,19 +50,6 @@
#include <stddef.h>
/*==========================================================================*/
-/* XP_CPLUSPLUS is a cross-platform symbol which should be defined when */
-/* using C++. It should not be defined when compiling under C. */
-/*==========================================================================*/
-
-#ifdef __cplusplus
-# ifndef XP_CPLUSPLUS
-# define XP_CPLUSPLUS
-# endif
-#else
-# undef XP_CPLUSPLUS
-#endif
-
-/*==========================================================================*/
/* For C wrappers, we use the symbol U_STABLE. */
/* This works properly if the includer is C or C++. */
/* Functions are declared U_STABLE return-type U_EXPORT2 function-name()... */
@@ -95,7 +73,7 @@
* @stable ICU 2.4
*/
-#ifdef XP_CPLUSPLUS
+#ifdef __cplusplus
# define U_CFUNC extern "C"
# define U_CDECL_BEGIN extern "C" {
# define U_CDECL_END }
@@ -105,23 +83,26 @@
# define U_CDECL_END
#endif
+#ifndef U_ATTRIBUTE_DEPRECATED
/**
* \def U_ATTRIBUTE_DEPRECATED
* This is used for GCC specific attributes
* @internal
*/
-#if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 2))
+#if U_GCC_MAJOR_MINOR >= 302
# define U_ATTRIBUTE_DEPRECATED __attribute__ ((deprecated))
/**
* \def U_ATTRIBUTE_DEPRECATED
* This is used for Visual C++ specific attributes
* @internal
*/
-#elif defined(U_WINDOWS) && defined(_MSC_VER) && (_MSC_VER >= 1400)
+#elif defined(_MSC_VER) && (_MSC_VER >= 1400)
# define U_ATTRIBUTE_DEPRECATED __declspec(deprecated)
#else
# define U_ATTRIBUTE_DEPRECATED
#endif
+#endif
+
/** This is used to declare a function as a public ICU C API @stable ICU 2.0*/
#define U_CAPI U_CFUNC U_EXPORT
/** This is used to declare a function as a stable public ICU C API*/
@@ -234,27 +215,6 @@ typedef int8_t UBool;
/* wchar_t-related definitions -------------------------------------------- */
-/**
- * \def U_HAVE_WCHAR_H
- * Indicates whether <wchar.h> is available (1) or not (0). Set to 1 by default.
- *
- * @stable ICU 2.0
- */
-#ifndef U_HAVE_WCHAR_H
-# define U_HAVE_WCHAR_H 1
-#endif
-
-/**
- * \def U_SIZEOF_WCHAR_T
- * U_SIZEOF_WCHAR_T==sizeof(wchar_t) (0 means it is not defined or autoconf could not set it)
- *
- * @stable ICU 2.0
- */
-#if U_SIZEOF_WCHAR_T==0
-# undef U_SIZEOF_WCHAR_T
-# define U_SIZEOF_WCHAR_T 4
-#endif
-
/*
* \def U_WCHAR_IS_UTF16
* Defined if wchar_t uses UTF-16.
@@ -275,14 +235,16 @@ typedef int8_t UBool;
# define U_WCHAR_IS_UTF32
# endif
# elif defined __UCS2__
-# if (__OS390__ || __OS400__) && (U_SIZEOF_WCHAR_T==2)
+# if (U_PF_OS390 <= U_PLATFORM && U_PLATFORM <= U_PF_OS400) && (U_SIZEOF_WCHAR_T==2)
# define U_WCHAR_IS_UTF16
# endif
-# elif defined __UCS4__
+# elif defined(__UCS4__) || (U_PLATFORM == U_PF_OS400 && defined(__UTF32__))
# if (U_SIZEOF_WCHAR_T==4)
# define U_WCHAR_IS_UTF32
# endif
-# elif defined(U_WINDOWS)
+# elif U_PLATFORM_IS_DARWIN_BASED || (U_SIZEOF_WCHAR_T==4 && U_PLATFORM_IS_LINUX_BASED)
+# define U_WCHAR_IS_UTF32
+# elif U_PLATFORM_HAS_WIN32_API
# define U_WCHAR_IS_UTF16
# endif
#endif
@@ -294,24 +256,24 @@ typedef int8_t UBool;
/**
* \var UChar
- * Define UChar to be wchar_t if that is 16 bits wide; always assumed to be unsigned.
- * If wchar_t is not 16 bits wide, then define UChar to be uint16_t or char16_t because GCC >=4.4
- * can handle UTF16 string literals.
+ * Define UChar to be UCHAR_TYPE, if that is #defined (for example, to char16_t),
+ * or wchar_t if that is 16 bits wide; always assumed to be unsigned.
+ * If neither is available, then define UChar to be uint16_t.
+ *
* This makes the definition of UChar platform-dependent
* but allows direct string type compatibility with platforms with
* 16-bit wchar_t types.
*
- * @draft ICU 4.4
+ * @stable ICU 4.4
*/
-
-/* Define UChar to be compatible with wchar_t if possible. */
-#if U_SIZEOF_WCHAR_T==2
+#if defined(UCHAR_TYPE)
+ typedef UCHAR_TYPE UChar;
+/* Not #elif U_HAVE_CHAR16_T -- because that is type-incompatible with pre-C++11 callers
+ typedef char16_t UChar; */
+#elif U_SIZEOF_WCHAR_T==2
typedef wchar_t UChar;
-#elif U_GNUC_UTF16_STRING
-#if defined _GCC_
- typedef __CHAR16_TYPE__ char16_t;
-#endif
- typedef char16_t UChar;
+#elif defined(__CHAR16_TYPE__)
+ typedef __CHAR16_TYPE__ UChar;
#else
typedef uint16_t UChar;
#endif
@@ -335,39 +297,25 @@ typedef int8_t UBool;
*/
typedef int32_t UChar32;
-/*==========================================================================*/
-/* U_INLINE and U_ALIGN_CODE Set default values if these are not already */
-/* defined. Definitions normally are in */
-/* platform.h or the corresponding file for */
-/* the OS in use. */
-/*==========================================================================*/
-
-#ifndef U_HIDE_INTERNAL_API
-
/**
- * \def U_ALIGN_CODE
- * This is used to align code fragments to a specific byte boundary.
- * This is useful for getting consistent performance test results.
- * @internal
- */
-#ifndef U_ALIGN_CODE
-# define U_ALIGN_CODE(n)
-#endif
-
-#endif /* U_HIDE_INTERNAL_API */
-
-/**
- * \def U_INLINE
- * This is used to request inlining of a function, on platforms and languages which support it.
+ * This value is intended for sentinel values for APIs that
+ * (take or) return single code points (UChar32).
+ * It is outside of the Unicode code point range 0..0x10ffff.
+ *
+ * For example, a "done" or "error" value in a new API
+ * could be indicated with U_SENTINEL.
+ *
+ * ICU APIs designed before ICU 2.4 usually define service-specific "done"
+ * values, mostly 0xffff.
+ * Those may need to be distinguished from
+ * actual U+ffff text contents by calling functions like
+ * CharacterIterator::hasNext() or UnicodeString::length().
+ *
+ * @return -1
+ * @see UChar32
+ * @stable ICU 2.4
*/
-
-#ifndef U_INLINE
-# ifdef XP_CPLUSPLUS
-# define U_INLINE inline
-# else
-# define U_INLINE
-# endif
-#endif
+#define U_SENTINEL (-1)
#include "unicode/urename.h"
diff --git a/Source/WebCore/icu/unicode/unistr.h b/Source/WebCore/icu/unicode/unistr.h
new file mode 100644
index 000000000..c6e8b4466
--- /dev/null
+++ b/Source/WebCore/icu/unicode/unistr.h
@@ -0,0 +1,4470 @@
+/*
+**********************************************************************
+* Copyright (C) 1998-2013, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*
+* File unistr.h
+*
+* Modification History:
+*
+* Date Name Description
+* 09/25/98 stephen Creation.
+* 11/11/98 stephen Changed per 11/9 code review.
+* 04/20/99 stephen Overhauled per 4/16 code review.
+* 11/18/99 aliu Made to inherit from Replaceable. Added method
+* handleReplaceBetween(); other methods unchanged.
+* 06/25/01 grhoten Remove dependency on iostream.
+******************************************************************************
+*/
+
+#ifndef UNISTR_H
+#define UNISTR_H
+
+/**
+ * \file
+ * \brief C++ API: Unicode String
+ */
+
+#include "unicode/utypes.h"
+#include "unicode/rep.h"
+#include "unicode/std_string.h"
+#include "unicode/stringpiece.h"
+#include "unicode/bytestream.h"
+#include "unicode/ucasemap.h"
+
+struct UConverter; // unicode/ucnv.h
+class StringThreadTest;
+
+#ifndef U_COMPARE_CODE_POINT_ORDER
+/* see also ustring.h and unorm.h */
+/**
+ * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
+ * Compare strings in code point order instead of code unit order.
+ * @stable ICU 2.2
+ */
+#define U_COMPARE_CODE_POINT_ORDER 0x8000
+#endif
+
+#ifndef USTRING_H
+/**
+ * \ingroup ustring_ustrlen
+ */
+U_STABLE int32_t U_EXPORT2
+u_strlen(const UChar *s);
+#endif
+
+/**
+ * \def U_STRING_CASE_MAPPER_DEFINED
+ * @internal
+ */
+#ifndef U_STRING_CASE_MAPPER_DEFINED
+#define U_STRING_CASE_MAPPER_DEFINED
+
+/**
+ * Internal string case mapping function type.
+ * @internal
+ */
+typedef int32_t U_CALLCONV
+UStringCaseMapper(const UCaseMap *csm,
+ UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+#endif
+
+U_NAMESPACE_BEGIN
+
+class BreakIterator; // unicode/brkiter.h
+class Locale; // unicode/locid.h
+class StringCharacterIterator;
+class UnicodeStringAppendable; // unicode/appendable.h
+
+/* The <iostream> include has been moved to unicode/ustream.h */
+
+/**
+ * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
+ * which constructs a Unicode string from an invariant-character char * string.
+ * About invariant characters see utypes.h.
+ * This constructor has no runtime dependency on conversion code and is
+ * therefore recommended over ones taking a charset name string
+ * (where the empty string "" indicates invariant-character conversion).
+ *
+ * @stable ICU 3.2
+ */
+#define US_INV icu::UnicodeString::kInvariant
+
+/**
+ * Unicode String literals in C++.
+ * Dependent on the platform properties, different UnicodeString
+ * constructors should be used to create a UnicodeString object from
+ * a string literal.
+ * The macros are defined for maximum performance.
+ * They work only for strings that contain "invariant characters", i.e.,
+ * only latin letters, digits, and some punctuation.
+ * See utypes.h for details.
+ *
+ * The string parameter must be a C string literal.
+ * The length of the string, not including the terminating
+ * <code>NUL</code>, must be specified as a constant.
+ * The U_STRING_DECL macro should be invoked exactly once for one
+ * such string variable before it is used.
+ * @stable ICU 2.0
+ */
+#if defined(U_DECLARE_UTF16)
+# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length)
+#elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
+# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)L ## cs, _length)
+#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
+# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)cs, _length)
+#else
+# define UNICODE_STRING(cs, _length) icu::UnicodeString(cs, _length, US_INV)
+#endif
+
+/**
+ * Unicode String literals in C++.
+ * Dependent on the platform properties, different UnicodeString
+ * constructors should be used to create a UnicodeString object from
+ * a string literal.
+ * The macros are defined for improved performance.
+ * They work only for strings that contain "invariant characters", i.e.,
+ * only latin letters, digits, and some punctuation.
+ * See utypes.h for details.
+ *
+ * The string parameter must be a C string literal.
+ * @stable ICU 2.0
+ */
+#define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
+
+/**
+ * \def UNISTR_FROM_CHAR_EXPLICIT
+ * This can be defined to be empty or "explicit".
+ * If explicit, then the UnicodeString(UChar) and UnicodeString(UChar32)
+ * constructors are marked as explicit, preventing their inadvertent use.
+ * @stable ICU 49
+ */
+#ifndef UNISTR_FROM_CHAR_EXPLICIT
+# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
+ // Auto-"explicit" in ICU library code.
+# define UNISTR_FROM_CHAR_EXPLICIT explicit
+# else
+ // Empty by default for source code compatibility.
+# define UNISTR_FROM_CHAR_EXPLICIT
+# endif
+#endif
+
+/**
+ * \def UNISTR_FROM_STRING_EXPLICIT
+ * This can be defined to be empty or "explicit".
+ * If explicit, then the UnicodeString(const char *) and UnicodeString(const UChar *)
+ * constructors are marked as explicit, preventing their inadvertent use.
+ *
+ * In particular, this helps prevent accidentally depending on ICU conversion code
+ * by passing a string literal into an API with a const UnicodeString & parameter.
+ * @stable ICU 49
+ */
+#ifndef UNISTR_FROM_STRING_EXPLICIT
+# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
+ // Auto-"explicit" in ICU library code.
+# define UNISTR_FROM_STRING_EXPLICIT explicit
+# else
+ // Empty by default for source code compatibility.
+# define UNISTR_FROM_STRING_EXPLICIT
+# endif
+#endif
+
+/**
+ * UnicodeString is a string class that stores Unicode characters directly and provides
+ * similar functionality as the Java String and StringBuffer classes.
+ * It is a concrete implementation of the abstract class Replaceable (for transliteration).
+ *
+ * The UnicodeString class is not suitable for subclassing.
+ *
+ * <p>For an overview of Unicode strings in C and C++ see the
+ * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p>
+ *
+ * <p>In ICU, a Unicode string consists of 16-bit Unicode <em>code units</em>.
+ * A Unicode character may be stored with either one code unit
+ * (the most common case) or with a matched pair of special code units
+ * ("surrogates"). The data type for code units is UChar.
+ * For single-character handling, a Unicode character code <em>point</em> is a value
+ * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.</p>
+ *
+ * <p>Indexes and offsets into and lengths of strings always count code units, not code points.
+ * This is the same as with multi-byte char* strings in traditional string handling.
+ * Operations on partial strings typically do not test for code point boundaries.
+ * If necessary, the user needs to take care of such boundaries by testing for the code unit
+ * values or by using functions like
+ * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit()
+ * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).</p>
+ *
+ * UnicodeString methods are more lenient with regard to input parameter values
+ * than other ICU APIs. In particular:
+ * - If indexes are out of bounds for a UnicodeString object
+ * (<0 or >length()) then they are "pinned" to the nearest boundary.
+ * - If primitive string pointer values (e.g., const UChar * or char *)
+ * for input strings are NULL, then those input string parameters are treated
+ * as if they pointed to an empty string.
+ * However, this is <em>not</em> the case for char * parameters for charset names
+ * or other IDs.
+ * - Most UnicodeString methods do not take a UErrorCode parameter because
+ * there are usually very few opportunities for failure other than a shortage
+ * of memory, error codes in low-level C++ string methods would be inconvenient,
+ * and the error code as the last parameter (ICU convention) would prevent
+ * the use of default parameter values.
+ * Instead, such methods set the UnicodeString into a "bogus" state
+ * (see isBogus()) if an error occurs.
+ *
+ * In string comparisons, two UnicodeString objects that are both "bogus"
+ * compare equal (to be transitive and prevent endless loops in sorting),
+ * and a "bogus" string compares less than any non-"bogus" one.
+ *
+ * Const UnicodeString methods are thread-safe. Multiple threads can use
+ * const methods on the same UnicodeString object simultaneously,
+ * but non-const methods must not be called concurrently (in multiple threads)
+ * with any other (const or non-const) methods.
+ *
+ * Similarly, const UnicodeString & parameters are thread-safe.
+ * One object may be passed in as such a parameter concurrently in multiple threads.
+ * This includes the const UnicodeString & parameters for
+ * copy construction, assignment, and cloning.
+ *
+ * <p>UnicodeString uses several storage methods.
+ * String contents can be stored inside the UnicodeString object itself,
+ * in an allocated and shared buffer, or in an outside buffer that is "aliased".
+ * Most of this is done transparently, but careful aliasing in particular provides
+ * significant performance improvements.
+ * Also, the internal buffer is accessible via special functions.
+ * For details see the
+ * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p>
+ *
+ * @see utf.h
+ * @see CharacterIterator
+ * @stable ICU 2.0
+ */
+class U_COMMON_API UnicodeString : public Replaceable
+{
+public:
+
+ /**
+ * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
+ * which constructs a Unicode string from an invariant-character char * string.
+ * Use the macro US_INV instead of the full qualification for this value.
+ *
+ * @see US_INV
+ * @stable ICU 3.2
+ */
+ enum EInvariant {
+ /**
+ * @see EInvariant
+ * @stable ICU 3.2
+ */
+ kInvariant
+ };
+
+ //========================================
+ // Read-only operations
+ //========================================
+
+ /* Comparison - bitwise only - for international comparison use collation */
+
+ /**
+ * Equality operator. Performs only bitwise comparison.
+ * @param text The UnicodeString to compare to this one.
+ * @return TRUE if <TT>text</TT> contains the same characters as this one,
+ * FALSE otherwise.
+ * @stable ICU 2.0
+ */
+ inline UBool operator== (const UnicodeString& text) const;
+
+ /**
+ * Inequality operator. Performs only bitwise comparison.
+ * @param text The UnicodeString to compare to this one.
+ * @return FALSE if <TT>text</TT> contains the same characters as this one,
+ * TRUE otherwise.
+ * @stable ICU 2.0
+ */
+ inline UBool operator!= (const UnicodeString& text) const;
+
+ /**
+ * Greater than operator. Performs only bitwise comparison.
+ * @param text The UnicodeString to compare to this one.
+ * @return TRUE if the characters in this are bitwise
+ * greater than the characters in <code>text</code>, FALSE otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool operator> (const UnicodeString& text) const;
+
+ /**
+ * Less than operator. Performs only bitwise comparison.
+ * @param text The UnicodeString to compare to this one.
+ * @return TRUE if the characters in this are bitwise
+ * less than the characters in <code>text</code>, FALSE otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool operator< (const UnicodeString& text) const;
+
+ /**
+ * Greater than or equal operator. Performs only bitwise comparison.
+ * @param text The UnicodeString to compare to this one.
+ * @return TRUE if the characters in this are bitwise
+ * greater than or equal to the characters in <code>text</code>, FALSE otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool operator>= (const UnicodeString& text) const;
+
+ /**
+ * Less than or equal operator. Performs only bitwise comparison.
+ * @param text The UnicodeString to compare to this one.
+ * @return TRUE if the characters in this are bitwise
+ * less than or equal to the characters in <code>text</code>, FALSE otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool operator<= (const UnicodeString& text) const;
+
+ /**
+ * Compare the characters bitwise in this UnicodeString to
+ * the characters in <code>text</code>.
+ * @param text The UnicodeString to compare to this one.
+ * @return The result of bitwise character comparison: 0 if this
+ * contains the same characters as <code>text</code>, -1 if the characters in
+ * this are bitwise less than the characters in <code>text</code>, +1 if the
+ * characters in this are bitwise greater than the characters
+ * in <code>text</code>.
+ * @stable ICU 2.0
+ */
+ inline int8_t compare(const UnicodeString& text) const;
+
+ /**
+ * Compare the characters bitwise in the range
+ * [<TT>start</TT>, <TT>start + length</TT>) with the characters
+ * in the <b>entire string</b> <TT>text</TT>.
+ * (The parameters "start" and "length" are not applied to the other text "text".)
+ * @param start the offset at which the compare operation begins
+ * @param length the number of characters of text to compare.
+ * @param text the other text to be compared against this string.
+ * @return The result of bitwise character comparison: 0 if this
+ * contains the same characters as <code>text</code>, -1 if the characters in
+ * this are bitwise less than the characters in <code>text</code>, +1 if the
+ * characters in this are bitwise greater than the characters
+ * in <code>text</code>.
+ * @stable ICU 2.0
+ */
+ inline int8_t compare(int32_t start,
+ int32_t length,
+ const UnicodeString& text) const;
+
+ /**
+ * Compare the characters bitwise in the range
+ * [<TT>start</TT>, <TT>start + length</TT>) with the characters
+ * in <TT>srcText</TT> in the range
+ * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
+ * @param start the offset at which the compare operation begins
+ * @param length the number of characters in this to compare.
+ * @param srcText the text to be compared
+ * @param srcStart the offset into <TT>srcText</TT> to start comparison
+ * @param srcLength the number of characters in <TT>src</TT> to compare
+ * @return The result of bitwise character comparison: 0 if this
+ * contains the same characters as <code>srcText</code>, -1 if the characters in
+ * this are bitwise less than the characters in <code>srcText</code>, +1 if the
+ * characters in this are bitwise greater than the characters
+ * in <code>srcText</code>.
+ * @stable ICU 2.0
+ */
+ inline int8_t compare(int32_t start,
+ int32_t length,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ /**
+ * Compare the characters bitwise in this UnicodeString with the first
+ * <TT>srcLength</TT> characters in <TT>srcChars</TT>.
+ * @param srcChars The characters to compare to this UnicodeString.
+ * @param srcLength the number of characters in <TT>srcChars</TT> to compare
+ * @return The result of bitwise character comparison: 0 if this
+ * contains the same characters as <code>srcChars</code>, -1 if the characters in
+ * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
+ * characters in this are bitwise greater than the characters
+ * in <code>srcChars</code>.
+ * @stable ICU 2.0
+ */
+ inline int8_t compare(const UChar *srcChars,
+ int32_t srcLength) const;
+
+ /**
+ * Compare the characters bitwise in the range
+ * [<TT>start</TT>, <TT>start + length</TT>) with the first
+ * <TT>length</TT> characters in <TT>srcChars</TT>
+ * @param start the offset at which the compare operation begins
+ * @param length the number of characters to compare.
+ * @param srcChars the characters to be compared
+ * @return The result of bitwise character comparison: 0 if this
+ * contains the same characters as <code>srcChars</code>, -1 if the characters in
+ * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
+ * characters in this are bitwise greater than the characters
+ * in <code>srcChars</code>.
+ * @stable ICU 2.0
+ */
+ inline int8_t compare(int32_t start,
+ int32_t length,
+ const UChar *srcChars) const;
+
+ /**
+ * Compare the characters bitwise in the range
+ * [<TT>start</TT>, <TT>start + length</TT>) with the characters
+ * in <TT>srcChars</TT> in the range
+ * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
+ * @param start the offset at which the compare operation begins
+ * @param length the number of characters in this to compare
+ * @param srcChars the characters to be compared
+ * @param srcStart the offset into <TT>srcChars</TT> to start comparison
+ * @param srcLength the number of characters in <TT>srcChars</TT> to compare
+ * @return The result of bitwise character comparison: 0 if this
+ * contains the same characters as <code>srcChars</code>, -1 if the characters in
+ * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
+ * characters in this are bitwise greater than the characters
+ * in <code>srcChars</code>.
+ * @stable ICU 2.0
+ */
+ inline int8_t compare(int32_t start,
+ int32_t length,
+ const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ /**
+ * Compare the characters bitwise in the range
+ * [<TT>start</TT>, <TT>limit</TT>) with the characters
+ * in <TT>srcText</TT> in the range
+ * [<TT>srcStart</TT>, <TT>srcLimit</TT>).
+ * @param start the offset at which the compare operation begins
+ * @param limit the offset immediately following the compare operation
+ * @param srcText the text to be compared
+ * @param srcStart the offset into <TT>srcText</TT> to start comparison
+ * @param srcLimit the offset into <TT>srcText</TT> to limit comparison
+ * @return The result of bitwise character comparison: 0 if this
+ * contains the same characters as <code>srcText</code>, -1 if the characters in
+ * this are bitwise less than the characters in <code>srcText</code>, +1 if the
+ * characters in this are bitwise greater than the characters
+ * in <code>srcText</code>.
+ * @stable ICU 2.0
+ */
+ inline int8_t compareBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLimit) const;
+
+ /**
+ * Compare two Unicode strings in code point order.
+ * The result may be different from the results of compare(), operator<, etc.
+ * if supplementary characters are present:
+ *
+ * In UTF-16, supplementary characters (with code points U+10000 and above) are
+ * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+ * which means that they compare as less than some other BMP characters like U+feff.
+ * This function compares Unicode strings in code point order.
+ * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+ *
+ * @param text Another string to compare this one to.
+ * @return a negative/zero/positive integer corresponding to whether
+ * this string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+ inline int8_t compareCodePointOrder(const UnicodeString& text) const;
+
+ /**
+ * Compare two Unicode strings in code point order.
+ * The result may be different from the results of compare(), operator<, etc.
+ * if supplementary characters are present:
+ *
+ * In UTF-16, supplementary characters (with code points U+10000 and above) are
+ * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+ * which means that they compare as less than some other BMP characters like U+feff.
+ * This function compares Unicode strings in code point order.
+ * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param length The number of code units from this string to compare.
+ * @param srcText Another string to compare this one to.
+ * @return a negative/zero/positive integer corresponding to whether
+ * this string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+ inline int8_t compareCodePointOrder(int32_t start,
+ int32_t length,
+ const UnicodeString& srcText) const;
+
+ /**
+ * Compare two Unicode strings in code point order.
+ * The result may be different from the results of compare(), operator<, etc.
+ * if supplementary characters are present:
+ *
+ * In UTF-16, supplementary characters (with code points U+10000 and above) are
+ * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+ * which means that they compare as less than some other BMP characters like U+feff.
+ * This function compares Unicode strings in code point order.
+ * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param length The number of code units from this string to compare.
+ * @param srcText Another string to compare this one to.
+ * @param srcStart The start offset in that string at which the compare operation begins.
+ * @param srcLength The number of code units from that string to compare.
+ * @return a negative/zero/positive integer corresponding to whether
+ * this string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+ inline int8_t compareCodePointOrder(int32_t start,
+ int32_t length,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ /**
+ * Compare two Unicode strings in code point order.
+ * The result may be different from the results of compare(), operator<, etc.
+ * if supplementary characters are present:
+ *
+ * In UTF-16, supplementary characters (with code points U+10000 and above) are
+ * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+ * which means that they compare as less than some other BMP characters like U+feff.
+ * This function compares Unicode strings in code point order.
+ * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+ *
+ * @param srcChars A pointer to another string to compare this one to.
+ * @param srcLength The number of code units from that string to compare.
+ * @return a negative/zero/positive integer corresponding to whether
+ * this string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+ inline int8_t compareCodePointOrder(const UChar *srcChars,
+ int32_t srcLength) const;
+
+ /**
+ * Compare two Unicode strings in code point order.
+ * The result may be different from the results of compare(), operator<, etc.
+ * if supplementary characters are present:
+ *
+ * In UTF-16, supplementary characters (with code points U+10000 and above) are
+ * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+ * which means that they compare as less than some other BMP characters like U+feff.
+ * This function compares Unicode strings in code point order.
+ * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param length The number of code units from this string to compare.
+ * @param srcChars A pointer to another string to compare this one to.
+ * @return a negative/zero/positive integer corresponding to whether
+ * this string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+ inline int8_t compareCodePointOrder(int32_t start,
+ int32_t length,
+ const UChar *srcChars) const;
+
+ /**
+ * Compare two Unicode strings in code point order.
+ * The result may be different from the results of compare(), operator<, etc.
+ * if supplementary characters are present:
+ *
+ * In UTF-16, supplementary characters (with code points U+10000 and above) are
+ * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+ * which means that they compare as less than some other BMP characters like U+feff.
+ * This function compares Unicode strings in code point order.
+ * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param length The number of code units from this string to compare.
+ * @param srcChars A pointer to another string to compare this one to.
+ * @param srcStart The start offset in that string at which the compare operation begins.
+ * @param srcLength The number of code units from that string to compare.
+ * @return a negative/zero/positive integer corresponding to whether
+ * this string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+ inline int8_t compareCodePointOrder(int32_t start,
+ int32_t length,
+ const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ /**
+ * Compare two Unicode strings in code point order.
+ * The result may be different from the results of compare(), operator<, etc.
+ * if supplementary characters are present:
+ *
+ * In UTF-16, supplementary characters (with code points U+10000 and above) are
+ * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+ * which means that they compare as less than some other BMP characters like U+feff.
+ * This function compares Unicode strings in code point order.
+ * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param limit The offset after the last code unit from this string to compare.
+ * @param srcText Another string to compare this one to.
+ * @param srcStart The start offset in that string at which the compare operation begins.
+ * @param srcLimit The offset after the last code unit from that string to compare.
+ * @return a negative/zero/positive integer corresponding to whether
+ * this string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+ inline int8_t compareCodePointOrderBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLimit) const;
+
+ /**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to this->foldCase(options).compare(text.foldCase(options)).
+ *
+ * @param text Another string to compare this one to.
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Comparison in code unit order with default case folding.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+ inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
+
+ /**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param length The number of code units from this string to compare.
+ * @param srcText Another string to compare this one to.
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Comparison in code unit order with default case folding.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+ inline int8_t caseCompare(int32_t start,
+ int32_t length,
+ const UnicodeString& srcText,
+ uint32_t options) const;
+
+ /**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param length The number of code units from this string to compare.
+ * @param srcText Another string to compare this one to.
+ * @param srcStart The start offset in that string at which the compare operation begins.
+ * @param srcLength The number of code units from that string to compare.
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Comparison in code unit order with default case folding.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+ inline int8_t caseCompare(int32_t start,
+ int32_t length,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength,
+ uint32_t options) const;
+
+ /**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
+ *
+ * @param srcChars A pointer to another string to compare this one to.
+ * @param srcLength The number of code units from that string to compare.
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Comparison in code unit order with default case folding.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+ inline int8_t caseCompare(const UChar *srcChars,
+ int32_t srcLength,
+ uint32_t options) const;
+
+ /**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param length The number of code units from this string to compare.
+ * @param srcChars A pointer to another string to compare this one to.
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Comparison in code unit order with default case folding.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+ inline int8_t caseCompare(int32_t start,
+ int32_t length,
+ const UChar *srcChars,
+ uint32_t options) const;
+
+ /**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param length The number of code units from this string to compare.
+ * @param srcChars A pointer to another string to compare this one to.
+ * @param srcStart The start offset in that string at which the compare operation begins.
+ * @param srcLength The number of code units from that string to compare.
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Comparison in code unit order with default case folding.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+ inline int8_t caseCompare(int32_t start,
+ int32_t length,
+ const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength,
+ uint32_t options) const;
+
+ /**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)).
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param limit The offset after the last code unit from this string to compare.
+ * @param srcText Another string to compare this one to.
+ * @param srcStart The start offset in that string at which the compare operation begins.
+ * @param srcLimit The offset after the last code unit from that string to compare.
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Comparison in code unit order with default case folding.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+ inline int8_t caseCompareBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLimit,
+ uint32_t options) const;
+
+ /**
+ * Determine if this starts with the characters in <TT>text</TT>
+ * @param text The text to match.
+ * @return TRUE if this starts with the characters in <TT>text</TT>,
+ * FALSE otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool startsWith(const UnicodeString& text) const;
+
+ /**
+ * Determine if this starts with the characters in <TT>srcText</TT>
+ * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
+ * @param srcText The text to match.
+ * @param srcStart the offset into <TT>srcText</TT> to start matching
+ * @param srcLength the number of characters in <TT>srcText</TT> to match
+ * @return TRUE if this starts with the characters in <TT>text</TT>,
+ * FALSE otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool startsWith(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ /**
+ * Determine if this starts with the characters in <TT>srcChars</TT>
+ * @param srcChars The characters to match.
+ * @param srcLength the number of characters in <TT>srcChars</TT>
+ * @return TRUE if this starts with the characters in <TT>srcChars</TT>,
+ * FALSE otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool startsWith(const UChar *srcChars,
+ int32_t srcLength) const;
+
+ /**
+ * Determine if this ends with the characters in <TT>srcChars</TT>
+ * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
+ * @param srcChars The characters to match.
+ * @param srcStart the offset into <TT>srcText</TT> to start matching
+ * @param srcLength the number of characters in <TT>srcChars</TT> to match
+ * @return TRUE if this ends with the characters in <TT>srcChars</TT>, FALSE otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool startsWith(const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ /**
+ * Determine if this ends with the characters in <TT>text</TT>
+ * @param text The text to match.
+ * @return TRUE if this ends with the characters in <TT>text</TT>,
+ * FALSE otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool endsWith(const UnicodeString& text) const;
+
+ /**
+ * Determine if this ends with the characters in <TT>srcText</TT>
+ * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
+ * @param srcText The text to match.
+ * @param srcStart the offset into <TT>srcText</TT> to start matching
+ * @param srcLength the number of characters in <TT>srcText</TT> to match
+ * @return TRUE if this ends with the characters in <TT>text</TT>,
+ * FALSE otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool endsWith(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ /**
+ * Determine if this ends with the characters in <TT>srcChars</TT>
+ * @param srcChars The characters to match.
+ * @param srcLength the number of characters in <TT>srcChars</TT>
+ * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
+ * FALSE otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool endsWith(const UChar *srcChars,
+ int32_t srcLength) const;
+
+ /**
+ * Determine if this ends with the characters in <TT>srcChars</TT>
+ * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
+ * @param srcChars The characters to match.
+ * @param srcStart the offset into <TT>srcText</TT> to start matching
+ * @param srcLength the number of characters in <TT>srcChars</TT> to match
+ * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
+ * FALSE otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool endsWith(const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+
+ /* Searching - bitwise only */
+
+ /**
+ * Locate in this the first occurrence of the characters in <TT>text</TT>,
+ * using bitwise comparison.
+ * @param text The text to search for.
+ * @return The offset into this of the start of <TT>text</TT>,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(const UnicodeString& text) const;
+
+ /**
+ * Locate in this the first occurrence of the characters in <TT>text</TT>
+ * starting at offset <TT>start</TT>, using bitwise comparison.
+ * @param text The text to search for.
+ * @param start The offset at which searching will start.
+ * @return The offset into this of the start of <TT>text</TT>,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(const UnicodeString& text,
+ int32_t start) const;
+
+ /**
+ * Locate in this the first occurrence in the range
+ * [<TT>start</TT>, <TT>start + length</TT>) of the characters
+ * in <TT>text</TT>, using bitwise comparison.
+ * @param text The text to search for.
+ * @param start The offset at which searching will start.
+ * @param length The number of characters to search
+ * @return The offset into this of the start of <TT>text</TT>,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(const UnicodeString& text,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the first occurrence in the range
+ * [<TT>start</TT>, <TT>start + length</TT>) of the characters
+ * in <TT>srcText</TT> in the range
+ * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
+ * using bitwise comparison.
+ * @param srcText The text to search for.
+ * @param srcStart the offset into <TT>srcText</TT> at which
+ * to start matching
+ * @param srcLength the number of characters in <TT>srcText</TT> to match
+ * @param start the offset into this at which to start matching
+ * @param length the number of characters in this to search
+ * @return The offset into this of the start of <TT>text</TT>,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the first occurrence of the characters in
+ * <TT>srcChars</TT>
+ * starting at offset <TT>start</TT>, using bitwise comparison.
+ * @param srcChars The text to search for.
+ * @param srcLength the number of characters in <TT>srcChars</TT> to match
+ * @param start the offset into this at which to start matching
+ * @return The offset into this of the start of <TT>text</TT>,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(const UChar *srcChars,
+ int32_t srcLength,
+ int32_t start) const;
+
+ /**
+ * Locate in this the first occurrence in the range
+ * [<TT>start</TT>, <TT>start + length</TT>) of the characters
+ * in <TT>srcChars</TT>, using bitwise comparison.
+ * @param srcChars The text to search for.
+ * @param srcLength the number of characters in <TT>srcChars</TT>
+ * @param start The offset at which searching will start.
+ * @param length The number of characters to search
+ * @return The offset into this of the start of <TT>srcChars</TT>,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(const UChar *srcChars,
+ int32_t srcLength,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the first occurrence in the range
+ * [<TT>start</TT>, <TT>start + length</TT>) of the characters
+ * in <TT>srcChars</TT> in the range
+ * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
+ * using bitwise comparison.
+ * @param srcChars The text to search for.
+ * @param srcStart the offset into <TT>srcChars</TT> at which
+ * to start matching
+ * @param srcLength the number of characters in <TT>srcChars</TT> to match
+ * @param start the offset into this at which to start matching
+ * @param length the number of characters in this to search
+ * @return The offset into this of the start of <TT>text</TT>,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ int32_t indexOf(const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the first occurrence of the BMP code point <code>c</code>,
+ * using bitwise comparison.
+ * @param c The code unit to search for.
+ * @return The offset into this of <TT>c</TT>, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(UChar c) const;
+
+ /**
+ * Locate in this the first occurrence of the code point <TT>c</TT>,
+ * using bitwise comparison.
+ *
+ * @param c The code point to search for.
+ * @return The offset into this of <TT>c</TT>, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(UChar32 c) const;
+
+ /**
+ * Locate in this the first occurrence of the BMP code point <code>c</code>,
+ * starting at offset <TT>start</TT>, using bitwise comparison.
+ * @param c The code unit to search for.
+ * @param start The offset at which searching will start.
+ * @return The offset into this of <TT>c</TT>, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(UChar c,
+ int32_t start) const;
+
+ /**
+ * Locate in this the first occurrence of the code point <TT>c</TT>
+ * starting at offset <TT>start</TT>, using bitwise comparison.
+ *
+ * @param c The code point to search for.
+ * @param start The offset at which searching will start.
+ * @return The offset into this of <TT>c</TT>, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(UChar32 c,
+ int32_t start) const;
+
+ /**
+ * Locate in this the first occurrence of the BMP code point <code>c</code>
+ * in the range [<TT>start</TT>, <TT>start + length</TT>),
+ * using bitwise comparison.
+ * @param c The code unit to search for.
+ * @param start the offset into this at which to start matching
+ * @param length the number of characters in this to search
+ * @return The offset into this of <TT>c</TT>, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(UChar c,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the first occurrence of the code point <TT>c</TT>
+ * in the range [<TT>start</TT>, <TT>start + length</TT>),
+ * using bitwise comparison.
+ *
+ * @param c The code point to search for.
+ * @param start the offset into this at which to start matching
+ * @param length the number of characters in this to search
+ * @return The offset into this of <TT>c</TT>, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(UChar32 c,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the last occurrence of the characters in <TT>text</TT>,
+ * using bitwise comparison.
+ * @param text The text to search for.
+ * @return The offset into this of the start of <TT>text</TT>,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(const UnicodeString& text) const;
+
+ /**
+ * Locate in this the last occurrence of the characters in <TT>text</TT>
+ * starting at offset <TT>start</TT>, using bitwise comparison.
+ * @param text The text to search for.
+ * @param start The offset at which searching will start.
+ * @return The offset into this of the start of <TT>text</TT>,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(const UnicodeString& text,
+ int32_t start) const;
+
+ /**
+ * Locate in this the last occurrence in the range
+ * [<TT>start</TT>, <TT>start + length</TT>) of the characters
+ * in <TT>text</TT>, using bitwise comparison.
+ * @param text The text to search for.
+ * @param start The offset at which searching will start.
+ * @param length The number of characters to search
+ * @return The offset into this of the start of <TT>text</TT>,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(const UnicodeString& text,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the last occurrence in the range
+ * [<TT>start</TT>, <TT>start + length</TT>) of the characters
+ * in <TT>srcText</TT> in the range
+ * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
+ * using bitwise comparison.
+ * @param srcText The text to search for.
+ * @param srcStart the offset into <TT>srcText</TT> at which
+ * to start matching
+ * @param srcLength the number of characters in <TT>srcText</TT> to match
+ * @param start the offset into this at which to start matching
+ * @param length the number of characters in this to search
+ * @return The offset into this of the start of <TT>text</TT>,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the last occurrence of the characters in <TT>srcChars</TT>
+ * starting at offset <TT>start</TT>, using bitwise comparison.
+ * @param srcChars The text to search for.
+ * @param srcLength the number of characters in <TT>srcChars</TT> to match
+ * @param start the offset into this at which to start matching
+ * @return The offset into this of the start of <TT>text</TT>,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(const UChar *srcChars,
+ int32_t srcLength,
+ int32_t start) const;
+
+ /**
+ * Locate in this the last occurrence in the range
+ * [<TT>start</TT>, <TT>start + length</TT>) of the characters
+ * in <TT>srcChars</TT>, using bitwise comparison.
+ * @param srcChars The text to search for.
+ * @param srcLength the number of characters in <TT>srcChars</TT>
+ * @param start The offset at which searching will start.
+ * @param length The number of characters to search
+ * @return The offset into this of the start of <TT>srcChars</TT>,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(const UChar *srcChars,
+ int32_t srcLength,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the last occurrence in the range
+ * [<TT>start</TT>, <TT>start + length</TT>) of the characters
+ * in <TT>srcChars</TT> in the range
+ * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
+ * using bitwise comparison.
+ * @param srcChars The text to search for.
+ * @param srcStart the offset into <TT>srcChars</TT> at which
+ * to start matching
+ * @param srcLength the number of characters in <TT>srcChars</TT> to match
+ * @param start the offset into this at which to start matching
+ * @param length the number of characters in this to search
+ * @return The offset into this of the start of <TT>text</TT>,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ int32_t lastIndexOf(const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the last occurrence of the BMP code point <code>c</code>,
+ * using bitwise comparison.
+ * @param c The code unit to search for.
+ * @return The offset into this of <TT>c</TT>, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(UChar c) const;
+
+ /**
+ * Locate in this the last occurrence of the code point <TT>c</TT>,
+ * using bitwise comparison.
+ *
+ * @param c The code point to search for.
+ * @return The offset into this of <TT>c</TT>, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(UChar32 c) const;
+
+ /**
+ * Locate in this the last occurrence of the BMP code point <code>c</code>
+ * starting at offset <TT>start</TT>, using bitwise comparison.
+ * @param c The code unit to search for.
+ * @param start The offset at which searching will start.
+ * @return The offset into this of <TT>c</TT>, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(UChar c,
+ int32_t start) const;
+
+ /**
+ * Locate in this the last occurrence of the code point <TT>c</TT>
+ * starting at offset <TT>start</TT>, using bitwise comparison.
+ *
+ * @param c The code point to search for.
+ * @param start The offset at which searching will start.
+ * @return The offset into this of <TT>c</TT>, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(UChar32 c,
+ int32_t start) const;
+
+ /**
+ * Locate in this the last occurrence of the BMP code point <code>c</code>
+ * in the range [<TT>start</TT>, <TT>start + length</TT>),
+ * using bitwise comparison.
+ * @param c The code unit to search for.
+ * @param start the offset into this at which to start matching
+ * @param length the number of characters in this to search
+ * @return The offset into this of <TT>c</TT>, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(UChar c,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the last occurrence of the code point <TT>c</TT>
+ * in the range [<TT>start</TT>, <TT>start + length</TT>),
+ * using bitwise comparison.
+ *
+ * @param c The code point to search for.
+ * @param start the offset into this at which to start matching
+ * @param length the number of characters in this to search
+ * @return The offset into this of <TT>c</TT>, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(UChar32 c,
+ int32_t start,
+ int32_t length) const;
+
+
+ /* Character access */
+
+ /**
+ * Return the code unit at offset <tt>offset</tt>.
+ * If the offset is not valid (0..length()-1) then U+ffff is returned.
+ * @param offset a valid offset into the text
+ * @return the code unit at offset <tt>offset</tt>
+ * or 0xffff if the offset is not valid for this string
+ * @stable ICU 2.0
+ */
+ inline UChar charAt(int32_t offset) const;
+
+ /**
+ * Return the code unit at offset <tt>offset</tt>.
+ * If the offset is not valid (0..length()-1) then U+ffff is returned.
+ * @param offset a valid offset into the text
+ * @return the code unit at offset <tt>offset</tt>
+ * @stable ICU 2.0
+ */
+ inline UChar operator[] (int32_t offset) const;
+
+ /**
+ * Return the code point that contains the code unit
+ * at offset <tt>offset</tt>.
+ * If the offset is not valid (0..length()-1) then U+ffff is returned.
+ * @param offset a valid offset into the text
+ * that indicates the text offset of any of the code units
+ * that will be assembled into a code point (21-bit value) and returned
+ * @return the code point of text at <tt>offset</tt>
+ * or 0xffff if the offset is not valid for this string
+ * @stable ICU 2.0
+ */
+ UChar32 char32At(int32_t offset) const;
+
+ /**
+ * Adjust a random-access offset so that
+ * it points to the beginning of a Unicode character.
+ * The offset that is passed in points to
+ * any code unit of a code point,
+ * while the returned offset will point to the first code unit
+ * of the same code point.
+ * In UTF-16, if the input offset points to a second surrogate
+ * of a surrogate pair, then the returned offset will point
+ * to the first surrogate.
+ * @param offset a valid offset into one code point of the text
+ * @return offset of the first code unit of the same code point
+ * @see U16_SET_CP_START
+ * @stable ICU 2.0
+ */
+ int32_t getChar32Start(int32_t offset) const;
+
+ /**
+ * Adjust a random-access offset so that
+ * it points behind a Unicode character.
+ * The offset that is passed in points behind
+ * any code unit of a code point,
+ * while the returned offset will point behind the last code unit
+ * of the same code point.
+ * In UTF-16, if the input offset points behind the first surrogate
+ * (i.e., to the second surrogate)
+ * of a surrogate pair, then the returned offset will point
+ * behind the second surrogate (i.e., to the first surrogate).
+ * @param offset a valid offset after any code unit of a code point of the text
+ * @return offset of the first code unit after the same code point
+ * @see U16_SET_CP_LIMIT
+ * @stable ICU 2.0
+ */
+ int32_t getChar32Limit(int32_t offset) const;
+
+ /**
+ * Move the code unit index along the string by delta code points.
+ * Interpret the input index as a code unit-based offset into the string,
+ * move the index forward or backward by delta code points, and
+ * return the resulting index.
+ * The input index should point to the first code unit of a code point,
+ * if there is more than one.
+ *
+ * Both input and output indexes are code unit-based as for all
+ * string indexes/offsets in ICU (and other libraries, like MBCS char*).
+ * If delta<0 then the index is moved backward (toward the start of the string).
+ * If delta>0 then the index is moved forward (toward the end of the string).
+ *
+ * This behaves like CharacterIterator::move32(delta, kCurrent).
+ *
+ * Behavior for out-of-bounds indexes:
+ * <code>moveIndex32</code> pins the input index to 0..length(), i.e.,
+ * if the input index<0 then it is pinned to 0;
+ * if it is index>length() then it is pinned to length().
+ * Afterwards, the index is moved by <code>delta</code> code points
+ * forward or backward,
+ * but no further backward than to 0 and no further forward than to length().
+ * The resulting index return value will be in between 0 and length(), inclusively.
+ *
+ * Examples:
+ * <pre>
+ * // s has code points 'a' U+10000 'b' U+10ffff U+2029
+ * UnicodeString s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unescape();
+ *
+ * // initial index: position of U+10000
+ * int32_t index=1;
+ *
+ * // the following examples will all result in index==4, position of U+10ffff
+ *
+ * // skip 2 code points from some position in the string
+ * index=s.moveIndex32(index, 2); // skips U+10000 and 'b'
+ *
+ * // go to the 3rd code point from the start of s (0-based)
+ * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'
+ *
+ * // go to the next-to-last code point of s
+ * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff
+ * </pre>
+ *
+ * @param index input code unit index
+ * @param delta (signed) code point count to move the index forward or backward
+ * in the string
+ * @return the resulting code unit index
+ * @stable ICU 2.0
+ */
+ int32_t moveIndex32(int32_t index, int32_t delta) const;
+
+ /* Substring extraction */
+
+ /**
+ * Copy the characters in the range
+ * [<tt>start</tt>, <tt>start + length</tt>) into the array <tt>dst</tt>,
+ * beginning at <tt>dstStart</tt>.
+ * If the string aliases to <code>dst</code> itself as an external buffer,
+ * then extract() will not copy the contents.
+ *
+ * @param start offset of first character which will be copied into the array
+ * @param length the number of characters to extract
+ * @param dst array in which to copy characters. The length of <tt>dst</tt>
+ * must be at least (<tt>dstStart + length</tt>).
+ * @param dstStart the offset in <TT>dst</TT> where the first character
+ * will be extracted
+ * @stable ICU 2.0
+ */
+ inline void extract(int32_t start,
+ int32_t length,
+ UChar *dst,
+ int32_t dstStart = 0) const;
+
+ /**
+ * Copy the contents of the string into dest.
+ * This is a convenience function that
+ * checks if there is enough space in dest,
+ * extracts the entire string if possible,
+ * and NUL-terminates dest if possible.
+ *
+ * If the string fits into dest but cannot be NUL-terminated
+ * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING.
+ * If the string itself does not fit into dest
+ * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR.
+ *
+ * If the string aliases to <code>dest</code> itself as an external buffer,
+ * then extract() will not copy the contents.
+ *
+ * @param dest Destination string buffer.
+ * @param destCapacity Number of UChars available at dest.
+ * @param errorCode ICU error code.
+ * @return length()
+ * @stable ICU 2.0
+ */
+ int32_t
+ extract(UChar *dest, int32_t destCapacity,
+ UErrorCode &errorCode) const;
+
+ /**
+ * Copy the characters in the range
+ * [<tt>start</tt>, <tt>start + length</tt>) into the UnicodeString
+ * <tt>target</tt>.
+ * @param start offset of first character which will be copied
+ * @param length the number of characters to extract
+ * @param target UnicodeString into which to copy characters.
+ * @return A reference to <TT>target</TT>
+ * @stable ICU 2.0
+ */
+ inline void extract(int32_t start,
+ int32_t length,
+ UnicodeString& target) const;
+
+ /**
+ * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
+ * into the array <tt>dst</tt>, beginning at <tt>dstStart</tt>.
+ * @param start offset of first character which will be copied into the array
+ * @param limit offset immediately following the last character to be copied
+ * @param dst array in which to copy characters. The length of <tt>dst</tt>
+ * must be at least (<tt>dstStart + (limit - start)</tt>).
+ * @param dstStart the offset in <TT>dst</TT> where the first character
+ * will be extracted
+ * @stable ICU 2.0
+ */
+ inline void extractBetween(int32_t start,
+ int32_t limit,
+ UChar *dst,
+ int32_t dstStart = 0) const;
+
+ /**
+ * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
+ * into the UnicodeString <tt>target</tt>. Replaceable API.
+ * @param start offset of first character which will be copied
+ * @param limit offset immediately following the last character to be copied
+ * @param target UnicodeString into which to copy characters.
+ * @return A reference to <TT>target</TT>
+ * @stable ICU 2.0
+ */
+ virtual void extractBetween(int32_t start,
+ int32_t limit,
+ UnicodeString& target) const;
+
+ /**
+ * Copy the characters in the range
+ * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters.
+ * All characters must be invariant (see utypes.h).
+ * Use US_INV as the last, signature-distinguishing parameter.
+ *
+ * This function does not write any more than <code>targetLength</code>
+ * characters but returns the length of the entire output string
+ * so that one can allocate a larger buffer and call the function again
+ * if necessary.
+ * The output string is NUL-terminated if possible.
+ *
+ * @param start offset of first character which will be copied
+ * @param startLength the number of characters to extract
+ * @param target the target buffer for extraction, can be NULL
+ * if targetLength is 0
+ * @param targetCapacity the length of the target buffer
+ * @param inv Signature-distinguishing paramater, use US_INV.
+ * @return the output string length, not including the terminating NUL
+ * @stable ICU 3.2
+ */
+ int32_t extract(int32_t start,
+ int32_t startLength,
+ char *target,
+ int32_t targetCapacity,
+ enum EInvariant inv) const;
+
+#if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
+
+ /**
+ * Copy the characters in the range
+ * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
+ * in the platform's default codepage.
+ * This function does not write any more than <code>targetLength</code>
+ * characters but returns the length of the entire output string
+ * so that one can allocate a larger buffer and call the function again
+ * if necessary.
+ * The output string is NUL-terminated if possible.
+ *
+ * @param start offset of first character which will be copied
+ * @param startLength the number of characters to extract
+ * @param target the target buffer for extraction
+ * @param targetLength the length of the target buffer
+ * If <TT>target</TT> is NULL, then the number of bytes required for
+ * <TT>target</TT> is returned.
+ * @return the output string length, not including the terminating NUL
+ * @stable ICU 2.0
+ */
+ int32_t extract(int32_t start,
+ int32_t startLength,
+ char *target,
+ uint32_t targetLength) const;
+
+#endif
+
+#if !UCONFIG_NO_CONVERSION
+
+ /**
+ * Copy the characters in the range
+ * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
+ * in a specified codepage.
+ * The output string is NUL-terminated.
+ *
+ * Recommendation: For invariant-character strings use
+ * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
+ * because it avoids object code dependencies of UnicodeString on
+ * the conversion code.
+ *
+ * @param start offset of first character which will be copied
+ * @param startLength the number of characters to extract
+ * @param target the target buffer for extraction
+ * @param codepage the desired codepage for the characters. 0 has
+ * the special meaning of the default codepage
+ * If <code>codepage</code> is an empty string (<code>""</code>),
+ * then a simple conversion is performed on the codepage-invariant
+ * subset ("invariant characters") of the platform encoding. See utypes.h.
+ * If <TT>target</TT> is NULL, then the number of bytes required for
+ * <TT>target</TT> is returned. It is assumed that the target is big enough
+ * to fit all of the characters.
+ * @return the output string length, not including the terminating NUL
+ * @stable ICU 2.0
+ */
+ inline int32_t extract(int32_t start,
+ int32_t startLength,
+ char *target,
+ const char *codepage = 0) const;
+
+ /**
+ * Copy the characters in the range
+ * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
+ * in a specified codepage.
+ * This function does not write any more than <code>targetLength</code>
+ * characters but returns the length of the entire output string
+ * so that one can allocate a larger buffer and call the function again
+ * if necessary.
+ * The output string is NUL-terminated if possible.
+ *
+ * Recommendation: For invariant-character strings use
+ * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
+ * because it avoids object code dependencies of UnicodeString on
+ * the conversion code.
+ *
+ * @param start offset of first character which will be copied
+ * @param startLength the number of characters to extract
+ * @param target the target buffer for extraction
+ * @param targetLength the length of the target buffer
+ * @param codepage the desired codepage for the characters. 0 has
+ * the special meaning of the default codepage
+ * If <code>codepage</code> is an empty string (<code>""</code>),
+ * then a simple conversion is performed on the codepage-invariant
+ * subset ("invariant characters") of the platform encoding. See utypes.h.
+ * If <TT>target</TT> is NULL, then the number of bytes required for
+ * <TT>target</TT> is returned.
+ * @return the output string length, not including the terminating NUL
+ * @stable ICU 2.0
+ */
+ int32_t extract(int32_t start,
+ int32_t startLength,
+ char *target,
+ uint32_t targetLength,
+ const char *codepage) const;
+
+ /**
+ * Convert the UnicodeString into a codepage string using an existing UConverter.
+ * The output string is NUL-terminated if possible.
+ *
+ * This function avoids the overhead of opening and closing a converter if
+ * multiple strings are extracted.
+ *
+ * @param dest destination string buffer, can be NULL if destCapacity==0
+ * @param destCapacity the number of chars available at dest
+ * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called),
+ * or NULL for the default converter
+ * @param errorCode normal ICU error code
+ * @return the length of the output string, not counting the terminating NUL;
+ * if the length is greater than destCapacity, then the string will not fit
+ * and a buffer of the indicated length would need to be passed in
+ * @stable ICU 2.0
+ */
+ int32_t extract(char *dest, int32_t destCapacity,
+ UConverter *cnv,
+ UErrorCode &errorCode) const;
+
+#endif
+
+ /**
+ * Create a temporary substring for the specified range.
+ * Unlike the substring constructor and setTo() functions,
+ * the object returned here will be a read-only alias (using getBuffer())
+ * rather than copying the text.
+ * As a result, this substring operation is much faster but requires
+ * that the original string not be modified or deleted during the lifetime
+ * of the returned substring object.
+ * @param start offset of the first character visible in the substring
+ * @param length length of the substring
+ * @return a read-only alias UnicodeString object for the substring
+ * @stable ICU 4.4
+ */
+ UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const;
+
+ /**
+ * Create a temporary substring for the specified range.
+ * Same as tempSubString(start, length) except that the substring range
+ * is specified as a (start, limit) pair (with an exclusive limit index)
+ * rather than a (start, length) pair.
+ * @param start offset of the first character visible in the substring
+ * @param limit offset immediately following the last character visible in the substring
+ * @return a read-only alias UnicodeString object for the substring
+ * @stable ICU 4.4
+ */
+ inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const;
+
+ /**
+ * Convert the UnicodeString to UTF-8 and write the result
+ * to a ByteSink. This is called by toUTF8String().
+ * Unpaired surrogates are replaced with U+FFFD.
+ * Calls u_strToUTF8WithSub().
+ *
+ * @param sink A ByteSink to which the UTF-8 version of the string is written.
+ * sink.Flush() is called at the end.
+ * @stable ICU 4.2
+ * @see toUTF8String
+ */
+ void toUTF8(ByteSink &sink) const;
+
+#if U_HAVE_STD_STRING
+
+ /**
+ * Convert the UnicodeString to UTF-8 and append the result
+ * to a standard string.
+ * Unpaired surrogates are replaced with U+FFFD.
+ * Calls toUTF8().
+ *
+ * @param result A standard string (or a compatible object)
+ * to which the UTF-8 version of the string is appended.
+ * @return The string object.
+ * @stable ICU 4.2
+ * @see toUTF8
+ */
+ template<typename StringClass>
+ StringClass &toUTF8String(StringClass &result) const {
+ StringByteSink<StringClass> sbs(&result);
+ toUTF8(sbs);
+ return result;
+ }
+
+#endif
+
+ /**
+ * Convert the UnicodeString to UTF-32.
+ * Unpaired surrogates are replaced with U+FFFD.
+ * Calls u_strToUTF32WithSub().
+ *
+ * @param utf32 destination string buffer, can be NULL if capacity==0
+ * @param capacity the number of UChar32s available at utf32
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return The length of the UTF-32 string.
+ * @see fromUTF32
+ * @stable ICU 4.2
+ */
+ int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const;
+
+ /* Length operations */
+
+ /**
+ * Return the length of the UnicodeString object.
+ * The length is the number of UChar code units are in the UnicodeString.
+ * If you want the number of code points, please use countChar32().
+ * @return the length of the UnicodeString object
+ * @see countChar32
+ * @stable ICU 2.0
+ */
+ inline int32_t length(void) const;
+
+ /**
+ * Count Unicode code points in the length UChar code units of the string.
+ * A code point may occupy either one or two UChar code units.
+ * Counting code points involves reading all code units.
+ *
+ * This functions is basically the inverse of moveIndex32().
+ *
+ * @param start the index of the first code unit to check
+ * @param length the number of UChar code units to check
+ * @return the number of code points in the specified code units
+ * @see length
+ * @stable ICU 2.0
+ */
+ int32_t
+ countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
+
+ /**
+ * Check if the length UChar code units of the string
+ * contain more Unicode code points than a certain number.
+ * This is more efficient than counting all code points in this part of the string
+ * and comparing that number with a threshold.
+ * This function may not need to scan the string at all if the length
+ * falls within a certain range, and
+ * never needs to count more than 'number+1' code points.
+ * Logically equivalent to (countChar32(start, length)>number).
+ * A Unicode code point may occupy either one or two UChar code units.
+ *
+ * @param start the index of the first code unit to check (0 for the entire string)
+ * @param length the number of UChar code units to check
+ * (use INT32_MAX for the entire string; remember that start/length
+ * values are pinned)
+ * @param number The number of code points in the (sub)string is compared against
+ * the 'number' parameter.
+ * @return Boolean value for whether the string contains more Unicode code points
+ * than 'number'. Same as (u_countChar32(s, length)>number).
+ * @see countChar32
+ * @see u_strHasMoreChar32Than
+ * @stable ICU 2.4
+ */
+ UBool
+ hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
+
+ /**
+ * Determine if this string is empty.
+ * @return TRUE if this string contains 0 characters, FALSE otherwise.
+ * @stable ICU 2.0
+ */
+ inline UBool isEmpty(void) const;
+
+ /**
+ * Return the capacity of the internal buffer of the UnicodeString object.
+ * This is useful together with the getBuffer functions.
+ * See there for details.
+ *
+ * @return the number of UChars available in the internal buffer
+ * @see getBuffer
+ * @stable ICU 2.0
+ */
+ inline int32_t getCapacity(void) const;
+
+ /* Other operations */
+
+ /**
+ * Generate a hash code for this object.
+ * @return The hash code of this UnicodeString.
+ * @stable ICU 2.0
+ */
+ inline int32_t hashCode(void) const;
+
+ /**
+ * Determine if this object contains a valid string.
+ * A bogus string has no value. It is different from an empty string,
+ * although in both cases isEmpty() returns TRUE and length() returns 0.
+ * setToBogus() and isBogus() can be used to indicate that no string value is available.
+ * For a bogus string, getBuffer() and getTerminatedBuffer() return NULL, and
+ * length() returns 0.
+ *
+ * @return TRUE if the string is bogus/invalid, FALSE otherwise
+ * @see setToBogus()
+ * @stable ICU 2.0
+ */
+ inline UBool isBogus(void) const;
+
+
+ //========================================
+ // Write operations
+ //========================================
+
+ /* Assignment operations */
+
+ /**
+ * Assignment operator. Replace the characters in this UnicodeString
+ * with the characters from <TT>srcText</TT>.
+ * @param srcText The text containing the characters to replace
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ UnicodeString &operator=(const UnicodeString &srcText);
+
+ /**
+ * Almost the same as the assignment operator.
+ * Replace the characters in this UnicodeString
+ * with the characters from <code>srcText</code>.
+ *
+ * This function works the same as the assignment operator
+ * for all strings except for ones that are readonly aliases.
+ *
+ * Starting with ICU 2.4, the assignment operator and the copy constructor
+ * allocate a new buffer and copy the buffer contents even for readonly aliases.
+ * This function implements the old, more efficient but less safe behavior
+ * of making this string also a readonly alias to the same buffer.
+ *
+ * The fastCopyFrom function must be used only if it is known that the lifetime of
+ * this UnicodeString does not exceed the lifetime of the aliased buffer
+ * including its contents, for example for strings from resource bundles
+ * or aliases to string constants.
+ *
+ * @param src The text containing the characters to replace.
+ * @return a reference to this
+ * @stable ICU 2.4
+ */
+ UnicodeString &fastCopyFrom(const UnicodeString &src);
+
+ /**
+ * Assignment operator. Replace the characters in this UnicodeString
+ * with the code unit <TT>ch</TT>.
+ * @param ch the code unit to replace
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& operator= (UChar ch);
+
+ /**
+ * Assignment operator. Replace the characters in this UnicodeString
+ * with the code point <TT>ch</TT>.
+ * @param ch the code point to replace
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& operator= (UChar32 ch);
+
+ /**
+ * Set the text in the UnicodeString object to the characters
+ * in <TT>srcText</TT> in the range
+ * [<TT>srcStart</TT>, <TT>srcText.length()</TT>).
+ * <TT>srcText</TT> is not modified.
+ * @param srcText the source for the new characters
+ * @param srcStart the offset into <TT>srcText</TT> where new characters
+ * will be obtained
+ * @return a reference to this
+ * @stable ICU 2.2
+ */
+ inline UnicodeString& setTo(const UnicodeString& srcText,
+ int32_t srcStart);
+
+ /**
+ * Set the text in the UnicodeString object to the characters
+ * in <TT>srcText</TT> in the range
+ * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
+ * <TT>srcText</TT> is not modified.
+ * @param srcText the source for the new characters
+ * @param srcStart the offset into <TT>srcText</TT> where new characters
+ * will be obtained
+ * @param srcLength the number of characters in <TT>srcText</TT> in the
+ * replace string.
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& setTo(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength);
+
+ /**
+ * Set the text in the UnicodeString object to the characters in
+ * <TT>srcText</TT>.
+ * <TT>srcText</TT> is not modified.
+ * @param srcText the source for the new characters
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& setTo(const UnicodeString& srcText);
+
+ /**
+ * Set the characters in the UnicodeString object to the characters
+ * in <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.
+ * @param srcChars the source for the new characters
+ * @param srcLength the number of Unicode characters in srcChars.
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& setTo(const UChar *srcChars,
+ int32_t srcLength);
+
+ /**
+ * Set the characters in the UnicodeString object to the code unit
+ * <TT>srcChar</TT>.
+ * @param srcChar the code unit which becomes the UnicodeString's character
+ * content
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ UnicodeString& setTo(UChar srcChar);
+
+ /**
+ * Set the characters in the UnicodeString object to the code point
+ * <TT>srcChar</TT>.
+ * @param srcChar the code point which becomes the UnicodeString's character
+ * content
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ UnicodeString& setTo(UChar32 srcChar);
+
+ /**
+ * Aliasing setTo() function, analogous to the readonly-aliasing UChar* constructor.
+ * The text will be used for the UnicodeString object, but
+ * it will not be released when the UnicodeString is destroyed.
+ * This has copy-on-write semantics:
+ * When the string is modified, then the buffer is first copied into
+ * newly allocated memory.
+ * The aliased buffer is never modified.
+ *
+ * In an assignment to another UnicodeString, when using the copy constructor
+ * or the assignment operator, the text will be copied.
+ * When using fastCopyFrom(), the text will be aliased again,
+ * so that both strings then alias the same readonly-text.
+ *
+ * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
+ * This must be true if <code>textLength==-1</code>.
+ * @param text The characters to alias for the UnicodeString.
+ * @param textLength The number of Unicode characters in <code>text</code> to alias.
+ * If -1, then this constructor will determine the length
+ * by calling <code>u_strlen()</code>.
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ UnicodeString &setTo(UBool isTerminated,
+ const UChar *text,
+ int32_t textLength);
+
+ /**
+ * Aliasing setTo() function, analogous to the writable-aliasing UChar* constructor.
+ * The text will be used for the UnicodeString object, but
+ * it will not be released when the UnicodeString is destroyed.
+ * This has write-through semantics:
+ * For as long as the capacity of the buffer is sufficient, write operations
+ * will directly affect the buffer. When more capacity is necessary, then
+ * a new buffer will be allocated and the contents copied as with regularly
+ * constructed strings.
+ * In an assignment to another UnicodeString, the buffer will be copied.
+ * The extract(UChar *dst) function detects whether the dst pointer is the same
+ * as the string buffer itself and will in this case not copy the contents.
+ *
+ * @param buffer The characters to alias for the UnicodeString.
+ * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
+ * @param buffCapacity The size of <code>buffer</code> in UChars.
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ UnicodeString &setTo(UChar *buffer,
+ int32_t buffLength,
+ int32_t buffCapacity);
+
+ /**
+ * Make this UnicodeString object invalid.
+ * The string will test TRUE with isBogus().
+ *
+ * A bogus string has no value. It is different from an empty string.
+ * It can be used to indicate that no string value is available.
+ * getBuffer() and getTerminatedBuffer() return NULL, and
+ * length() returns 0.
+ *
+ * This utility function is used throughout the UnicodeString
+ * implementation to indicate that a UnicodeString operation failed,
+ * and may be used in other functions,
+ * especially but not exclusively when such functions do not
+ * take a UErrorCode for simplicity.
+ *
+ * The following methods, and no others, will clear a string object's bogus flag:
+ * - remove()
+ * - remove(0, INT32_MAX)
+ * - truncate(0)
+ * - operator=() (assignment operator)
+ * - setTo(...)
+ *
+ * The simplest ways to turn a bogus string into an empty one
+ * is to use the remove() function.
+ * Examples for other functions that are equivalent to "set to empty string":
+ * \code
+ * if(s.isBogus()) {
+ * s.remove(); // set to an empty string (remove all), or
+ * s.remove(0, INT32_MAX); // set to an empty string (remove all), or
+ * s.truncate(0); // set to an empty string (complete truncation), or
+ * s=UnicodeString(); // assign an empty string, or
+ * s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or
+ * static const UChar nul=0;
+ * s.setTo(&nul, 0); // set to an empty C Unicode string
+ * }
+ * \endcode
+ *
+ * @see isBogus()
+ * @stable ICU 2.0
+ */
+ void setToBogus();
+
+ /**
+ * Set the character at the specified offset to the specified character.
+ * @param offset A valid offset into the text of the character to set
+ * @param ch The new character
+ * @return A reference to this
+ * @stable ICU 2.0
+ */
+ UnicodeString& setCharAt(int32_t offset,
+ UChar ch);
+
+
+ /* Append operations */
+
+ /**
+ * Append operator. Append the code unit <TT>ch</TT> to the UnicodeString
+ * object.
+ * @param ch the code unit to be appended
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& operator+= (UChar ch);
+
+ /**
+ * Append operator. Append the code point <TT>ch</TT> to the UnicodeString
+ * object.
+ * @param ch the code point to be appended
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& operator+= (UChar32 ch);
+
+ /**
+ * Append operator. Append the characters in <TT>srcText</TT> to the
+ * UnicodeString object. <TT>srcText</TT> is not modified.
+ * @param srcText the source for the new characters
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& operator+= (const UnicodeString& srcText);
+
+ /**
+ * Append the characters
+ * in <TT>srcText</TT> in the range
+ * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the
+ * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT>
+ * is not modified.
+ * @param srcText the source for the new characters
+ * @param srcStart the offset into <TT>srcText</TT> where new characters
+ * will be obtained
+ * @param srcLength the number of characters in <TT>srcText</TT> in
+ * the append string
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& append(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength);
+
+ /**
+ * Append the characters in <TT>srcText</TT> to the UnicodeString object.
+ * <TT>srcText</TT> is not modified.
+ * @param srcText the source for the new characters
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& append(const UnicodeString& srcText);
+
+ /**
+ * Append the characters in <TT>srcChars</TT> in the range
+ * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the UnicodeString
+ * object at offset
+ * <TT>start</TT>. <TT>srcChars</TT> is not modified.
+ * @param srcChars the source for the new characters
+ * @param srcStart the offset into <TT>srcChars</TT> where new characters
+ * will be obtained
+ * @param srcLength the number of characters in <TT>srcChars</TT> in
+ * the append string; can be -1 if <TT>srcChars</TT> is NUL-terminated
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& append(const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength);
+
+ /**
+ * Append the characters in <TT>srcChars</TT> to the UnicodeString object
+ * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
+ * @param srcChars the source for the new characters
+ * @param srcLength the number of Unicode characters in <TT>srcChars</TT>;
+ * can be -1 if <TT>srcChars</TT> is NUL-terminated
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& append(const UChar *srcChars,
+ int32_t srcLength);
+
+ /**
+ * Append the code unit <TT>srcChar</TT> to the UnicodeString object.
+ * @param srcChar the code unit to append
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& append(UChar srcChar);
+
+ /**
+ * Append the code point <TT>srcChar</TT> to the UnicodeString object.
+ * @param srcChar the code point to append
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ UnicodeString& append(UChar32 srcChar);
+
+
+ /* Insert operations */
+
+ /**
+ * Insert the characters in <TT>srcText</TT> in the range
+ * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
+ * object at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
+ * @param start the offset where the insertion begins
+ * @param srcText the source for the new characters
+ * @param srcStart the offset into <TT>srcText</TT> where new characters
+ * will be obtained
+ * @param srcLength the number of characters in <TT>srcText</TT> in
+ * the insert string
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& insert(int32_t start,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength);
+
+ /**
+ * Insert the characters in <TT>srcText</TT> into the UnicodeString object
+ * at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
+ * @param start the offset where the insertion begins
+ * @param srcText the source for the new characters
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& insert(int32_t start,
+ const UnicodeString& srcText);
+
+ /**
+ * Insert the characters in <TT>srcChars</TT> in the range
+ * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
+ * object at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
+ * @param start the offset at which the insertion begins
+ * @param srcChars the source for the new characters
+ * @param srcStart the offset into <TT>srcChars</TT> where new characters
+ * will be obtained
+ * @param srcLength the number of characters in <TT>srcChars</TT>
+ * in the insert string
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& insert(int32_t start,
+ const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength);
+
+ /**
+ * Insert the characters in <TT>srcChars</TT> into the UnicodeString object
+ * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
+ * @param start the offset where the insertion begins
+ * @param srcChars the source for the new characters
+ * @param srcLength the number of Unicode characters in srcChars.
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& insert(int32_t start,
+ const UChar *srcChars,
+ int32_t srcLength);
+
+ /**
+ * Insert the code unit <TT>srcChar</TT> into the UnicodeString object at
+ * offset <TT>start</TT>.
+ * @param start the offset at which the insertion occurs
+ * @param srcChar the code unit to insert
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& insert(int32_t start,
+ UChar srcChar);
+
+ /**
+ * Insert the code point <TT>srcChar</TT> into the UnicodeString object at
+ * offset <TT>start</TT>.
+ * @param start the offset at which the insertion occurs
+ * @param srcChar the code point to insert
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& insert(int32_t start,
+ UChar32 srcChar);
+
+
+ /* Replace operations */
+
+ /**
+ * Replace the characters in the range
+ * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
+ * <TT>srcText</TT> in the range
+ * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
+ * <TT>srcText</TT> is not modified.
+ * @param start the offset at which the replace operation begins
+ * @param length the number of characters to replace. The character at
+ * <TT>start + length</TT> is not modified.
+ * @param srcText the source for the new characters
+ * @param srcStart the offset into <TT>srcText</TT> where new characters
+ * will be obtained
+ * @param srcLength the number of characters in <TT>srcText</TT> in
+ * the replace string
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ UnicodeString& replace(int32_t start,
+ int32_t length,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength);
+
+ /**
+ * Replace the characters in the range
+ * [<TT>start</TT>, <TT>start + length</TT>)
+ * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is
+ * not modified.
+ * @param start the offset at which the replace operation begins
+ * @param length the number of characters to replace. The character at
+ * <TT>start + length</TT> is not modified.
+ * @param srcText the source for the new characters
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ UnicodeString& replace(int32_t start,
+ int32_t length,
+ const UnicodeString& srcText);
+
+ /**
+ * Replace the characters in the range
+ * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
+ * <TT>srcChars</TT> in the range
+ * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). <TT>srcChars</TT>
+ * is not modified.
+ * @param start the offset at which the replace operation begins
+ * @param length the number of characters to replace. The character at
+ * <TT>start + length</TT> is not modified.
+ * @param srcChars the source for the new characters
+ * @param srcStart the offset into <TT>srcChars</TT> where new characters
+ * will be obtained
+ * @param srcLength the number of characters in <TT>srcChars</TT>
+ * in the replace string
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ UnicodeString& replace(int32_t start,
+ int32_t length,
+ const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength);
+
+ /**
+ * Replace the characters in the range
+ * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
+ * <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.
+ * @param start the offset at which the replace operation begins
+ * @param length number of characters to replace. The character at
+ * <TT>start + length</TT> is not modified.
+ * @param srcChars the source for the new characters
+ * @param srcLength the number of Unicode characters in srcChars
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& replace(int32_t start,
+ int32_t length,
+ const UChar *srcChars,
+ int32_t srcLength);
+
+ /**
+ * Replace the characters in the range
+ * [<TT>start</TT>, <TT>start + length</TT>) with the code unit
+ * <TT>srcChar</TT>.
+ * @param start the offset at which the replace operation begins
+ * @param length the number of characters to replace. The character at
+ * <TT>start + length</TT> is not modified.
+ * @param srcChar the new code unit
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& replace(int32_t start,
+ int32_t length,
+ UChar srcChar);
+
+ /**
+ * Replace the characters in the range
+ * [<TT>start</TT>, <TT>start + length</TT>) with the code point
+ * <TT>srcChar</TT>.
+ * @param start the offset at which the replace operation begins
+ * @param length the number of characters to replace. The character at
+ * <TT>start + length</TT> is not modified.
+ * @param srcChar the new code point
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar);
+
+ /**
+ * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
+ * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is not modified.
+ * @param start the offset at which the replace operation begins
+ * @param limit the offset immediately following the replace range
+ * @param srcText the source for the new characters
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& replaceBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& srcText);
+
+ /**
+ * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
+ * with the characters in <TT>srcText</TT> in the range
+ * [<TT>srcStart</TT>, <TT>srcLimit</TT>). <TT>srcText</TT> is not modified.
+ * @param start the offset at which the replace operation begins
+ * @param limit the offset immediately following the replace range
+ * @param srcText the source for the new characters
+ * @param srcStart the offset into <TT>srcChars</TT> where new characters
+ * will be obtained
+ * @param srcLimit the offset immediately following the range to copy
+ * in <TT>srcText</TT>
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& replaceBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLimit);
+
+ /**
+ * Replace a substring of this object with the given text.
+ * @param start the beginning index, inclusive; <code>0 <= start
+ * <= limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit
+ * <= length()</code>.
+ * @param text the text to replace characters <code>start</code>
+ * to <code>limit - 1</code>
+ * @stable ICU 2.0
+ */
+ virtual void handleReplaceBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& text);
+
+ /**
+ * Replaceable API
+ * @return TRUE if it has MetaData
+ * @stable ICU 2.4
+ */
+ virtual UBool hasMetaData() const;
+
+ /**
+ * Copy a substring of this object, retaining attribute (out-of-band)
+ * information. This method is used to duplicate or reorder substrings.
+ * The destination index must not overlap the source range.
+ *
+ * @param start the beginning index, inclusive; <code>0 <= start <=
+ * limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit <=
+ * length()</code>.
+ * @param dest the destination index. The characters from
+ * <code>start..limit-1</code> will be copied to <code>dest</code>.
+ * Implementations of this method may assume that <code>dest <= start ||
+ * dest >= limit</code>.
+ * @stable ICU 2.0
+ */
+ virtual void copy(int32_t start, int32_t limit, int32_t dest);
+
+ /* Search and replace operations */
+
+ /**
+ * Replace all occurrences of characters in oldText with the characters
+ * in newText
+ * @param oldText the text containing the search text
+ * @param newText the text containing the replacement text
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& findAndReplace(const UnicodeString& oldText,
+ const UnicodeString& newText);
+
+ /**
+ * Replace all occurrences of characters in oldText with characters
+ * in newText
+ * in the range [<TT>start</TT>, <TT>start + length</TT>).
+ * @param start the start of the range in which replace will performed
+ * @param length the length of the range in which replace will be performed
+ * @param oldText the text containing the search text
+ * @param newText the text containing the replacement text
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& findAndReplace(int32_t start,
+ int32_t length,
+ const UnicodeString& oldText,
+ const UnicodeString& newText);
+
+ /**
+ * Replace all occurrences of characters in oldText in the range
+ * [<TT>oldStart</TT>, <TT>oldStart + oldLength</TT>) with the characters
+ * in newText in the range
+ * [<TT>newStart</TT>, <TT>newStart + newLength</TT>)
+ * in the range [<TT>start</TT>, <TT>start + length</TT>).
+ * @param start the start of the range in which replace will performed
+ * @param length the length of the range in which replace will be performed
+ * @param oldText the text containing the search text
+ * @param oldStart the start of the search range in <TT>oldText</TT>
+ * @param oldLength the length of the search range in <TT>oldText</TT>
+ * @param newText the text containing the replacement text
+ * @param newStart the start of the replacement range in <TT>newText</TT>
+ * @param newLength the length of the replacement range in <TT>newText</TT>
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ UnicodeString& findAndReplace(int32_t start,
+ int32_t length,
+ const UnicodeString& oldText,
+ int32_t oldStart,
+ int32_t oldLength,
+ const UnicodeString& newText,
+ int32_t newStart,
+ int32_t newLength);
+
+
+ /* Remove operations */
+
+ /**
+ * Remove all characters from the UnicodeString object.
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& remove(void);
+
+ /**
+ * Remove the characters in the range
+ * [<TT>start</TT>, <TT>start + length</TT>) from the UnicodeString object.
+ * @param start the offset of the first character to remove
+ * @param length the number of characters to remove
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& remove(int32_t start,
+ int32_t length = (int32_t)INT32_MAX);
+
+ /**
+ * Remove the characters in the range
+ * [<TT>start</TT>, <TT>limit</TT>) from the UnicodeString object.
+ * @param start the offset of the first character to remove
+ * @param limit the offset immediately following the range to remove
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& removeBetween(int32_t start,
+ int32_t limit = (int32_t)INT32_MAX);
+
+ /**
+ * Retain only the characters in the range
+ * [<code>start</code>, <code>limit</code>) from the UnicodeString object.
+ * Removes characters before <code>start</code> and at and after <code>limit</code>.
+ * @param start the offset of the first character to retain
+ * @param limit the offset immediately following the range to retain
+ * @return a reference to this
+ * @stable ICU 4.4
+ */
+ inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);
+
+ /* Length operations */
+
+ /**
+ * Pad the start of this UnicodeString with the character <TT>padChar</TT>.
+ * If the length of this UnicodeString is less than targetLength,
+ * length() - targetLength copies of padChar will be added to the
+ * beginning of this UnicodeString.
+ * @param targetLength the desired length of the string
+ * @param padChar the character to use for padding. Defaults to
+ * space (U+0020)
+ * @return TRUE if the text was padded, FALSE otherwise.
+ * @stable ICU 2.0
+ */
+ UBool padLeading(int32_t targetLength,
+ UChar padChar = 0x0020);
+
+ /**
+ * Pad the end of this UnicodeString with the character <TT>padChar</TT>.
+ * If the length of this UnicodeString is less than targetLength,
+ * length() - targetLength copies of padChar will be added to the
+ * end of this UnicodeString.
+ * @param targetLength the desired length of the string
+ * @param padChar the character to use for padding. Defaults to
+ * space (U+0020)
+ * @return TRUE if the text was padded, FALSE otherwise.
+ * @stable ICU 2.0
+ */
+ UBool padTrailing(int32_t targetLength,
+ UChar padChar = 0x0020);
+
+ /**
+ * Truncate this UnicodeString to the <TT>targetLength</TT>.
+ * @param targetLength the desired length of this UnicodeString.
+ * @return TRUE if the text was truncated, FALSE otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool truncate(int32_t targetLength);
+
+ /**
+ * Trims leading and trailing whitespace from this UnicodeString.
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ UnicodeString& trim(void);
+
+
+ /* Miscellaneous operations */
+
+ /**
+ * Reverse this UnicodeString in place.
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& reverse(void);
+
+ /**
+ * Reverse the range [<TT>start</TT>, <TT>start + length</TT>) in
+ * this UnicodeString.
+ * @param start the start of the range to reverse
+ * @param length the number of characters to to reverse
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& reverse(int32_t start,
+ int32_t length);
+
+ /**
+ * Convert the characters in this to UPPER CASE following the conventions of
+ * the default locale.
+ * @return A reference to this.
+ * @stable ICU 2.0
+ */
+ UnicodeString& toUpper(void);
+
+ /**
+ * Convert the characters in this to UPPER CASE following the conventions of
+ * a specific locale.
+ * @param locale The locale containing the conventions to use.
+ * @return A reference to this.
+ * @stable ICU 2.0
+ */
+ UnicodeString& toUpper(const Locale& locale);
+
+ /**
+ * Convert the characters in this to lower case following the conventions of
+ * the default locale.
+ * @return A reference to this.
+ * @stable ICU 2.0
+ */
+ UnicodeString& toLower(void);
+
+ /**
+ * Convert the characters in this to lower case following the conventions of
+ * a specific locale.
+ * @param locale The locale containing the conventions to use.
+ * @return A reference to this.
+ * @stable ICU 2.0
+ */
+ UnicodeString& toLower(const Locale& locale);
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+ /**
+ * Titlecase this string, convenience function using the default locale.
+ *
+ * Casing is locale-dependent and context-sensitive.
+ * Titlecasing uses a break iterator to find the first characters of words
+ * that are to be titlecased. It titlecases those characters and lowercases
+ * all others.
+ *
+ * The titlecase break iterator can be provided to customize for arbitrary
+ * styles, using rules and dictionaries beyond the standard iterators.
+ * It may be more efficient to always provide an iterator to avoid
+ * opening and closing one for each string.
+ * The standard titlecase iterator for the root locale implements the
+ * algorithm of Unicode TR 21.
+ *
+ * This function uses only the setText(), first() and next() methods of the
+ * provided break iterator.
+ *
+ * @param titleIter A break iterator to find the first characters of words
+ * that are to be titlecased.
+ * If none is provided (0), then a standard titlecase
+ * break iterator is opened.
+ * Otherwise the provided iterator is set to the string's text.
+ * @return A reference to this.
+ * @stable ICU 2.1
+ */
+ UnicodeString &toTitle(BreakIterator *titleIter);
+
+ /**
+ * Titlecase this string.
+ *
+ * Casing is locale-dependent and context-sensitive.
+ * Titlecasing uses a break iterator to find the first characters of words
+ * that are to be titlecased. It titlecases those characters and lowercases
+ * all others.
+ *
+ * The titlecase break iterator can be provided to customize for arbitrary
+ * styles, using rules and dictionaries beyond the standard iterators.
+ * It may be more efficient to always provide an iterator to avoid
+ * opening and closing one for each string.
+ * The standard titlecase iterator for the root locale implements the
+ * algorithm of Unicode TR 21.
+ *
+ * This function uses only the setText(), first() and next() methods of the
+ * provided break iterator.
+ *
+ * @param titleIter A break iterator to find the first characters of words
+ * that are to be titlecased.
+ * If none is provided (0), then a standard titlecase
+ * break iterator is opened.
+ * Otherwise the provided iterator is set to the string's text.
+ * @param locale The locale to consider.
+ * @return A reference to this.
+ * @stable ICU 2.1
+ */
+ UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
+
+ /**
+ * Titlecase this string, with options.
+ *
+ * Casing is locale-dependent and context-sensitive.
+ * Titlecasing uses a break iterator to find the first characters of words
+ * that are to be titlecased. It titlecases those characters and lowercases
+ * all others. (This can be modified with options.)
+ *
+ * The titlecase break iterator can be provided to customize for arbitrary
+ * styles, using rules and dictionaries beyond the standard iterators.
+ * It may be more efficient to always provide an iterator to avoid
+ * opening and closing one for each string.
+ * The standard titlecase iterator for the root locale implements the
+ * algorithm of Unicode TR 21.
+ *
+ * This function uses only the setText(), first() and next() methods of the
+ * provided break iterator.
+ *
+ * @param titleIter A break iterator to find the first characters of words
+ * that are to be titlecased.
+ * If none is provided (0), then a standard titlecase
+ * break iterator is opened.
+ * Otherwise the provided iterator is set to the string's text.
+ * @param locale The locale to consider.
+ * @param options Options bit set, see ucasemap_open().
+ * @return A reference to this.
+ * @see U_TITLECASE_NO_LOWERCASE
+ * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
+ * @see ucasemap_open
+ * @stable ICU 3.8
+ */
+ UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
+
+#endif
+
+ /**
+ * Case-folds the characters in this string.
+ *
+ * Case-folding is locale-independent and not context-sensitive,
+ * but there is an option for whether to include or exclude mappings for dotted I
+ * and dotless i that are marked with 'T' in CaseFolding.txt.
+ *
+ * The result may be longer or shorter than the original.
+ *
+ * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ * @return A reference to this.
+ * @stable ICU 2.0
+ */
+ UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
+
+ //========================================
+ // Access to the internal buffer
+ //========================================
+
+ /**
+ * Get a read/write pointer to the internal buffer.
+ * The buffer is guaranteed to be large enough for at least minCapacity UChars,
+ * writable, and is still owned by the UnicodeString object.
+ * Calls to getBuffer(minCapacity) must not be nested, and
+ * must be matched with calls to releaseBuffer(newLength).
+ * If the string buffer was read-only or shared,
+ * then it will be reallocated and copied.
+ *
+ * An attempted nested call will return 0, and will not further modify the
+ * state of the UnicodeString object.
+ * It also returns 0 if the string is bogus.
+ *
+ * The actual capacity of the string buffer may be larger than minCapacity.
+ * getCapacity() returns the actual capacity.
+ * For many operations, the full capacity should be used to avoid reallocations.
+ *
+ * While the buffer is "open" between getBuffer(minCapacity)
+ * and releaseBuffer(newLength), the following applies:
+ * - The string length is set to 0.
+ * - Any read API call on the UnicodeString object will behave like on a 0-length string.
+ * - Any write API call on the UnicodeString object is disallowed and will have no effect.
+ * - You can read from and write to the returned buffer.
+ * - The previous string contents will still be in the buffer;
+ * if you want to use it, then you need to call length() before getBuffer(minCapacity).
+ * If the length() was greater than minCapacity, then any contents after minCapacity
+ * may be lost.
+ * The buffer contents is not NUL-terminated by getBuffer().
+ * If length()<getCapacity() then you can terminate it by writing a NUL
+ * at index length().
+ * - You must call releaseBuffer(newLength) before and in order to
+ * return to normal UnicodeString operation.
+ *
+ * @param minCapacity the minimum number of UChars that are to be available
+ * in the buffer, starting at the returned pointer;
+ * default to the current string capacity if minCapacity==-1
+ * @return a writable pointer to the internal string buffer,
+ * or 0 if an error occurs (nested calls, out of memory)
+ *
+ * @see releaseBuffer
+ * @see getTerminatedBuffer()
+ * @stable ICU 2.0
+ */
+ UChar *getBuffer(int32_t minCapacity);
+
+ /**
+ * Release a read/write buffer on a UnicodeString object with an
+ * "open" getBuffer(minCapacity).
+ * This function must be called in a matched pair with getBuffer(minCapacity).
+ * releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open".
+ *
+ * It will set the string length to newLength, at most to the current capacity.
+ * If newLength==-1 then it will set the length according to the
+ * first NUL in the buffer, or to the capacity if there is no NUL.
+ *
+ * After calling releaseBuffer(newLength) the UnicodeString is back to normal operation.
+ *
+ * @param newLength the new length of the UnicodeString object;
+ * defaults to the current capacity if newLength is greater than that;
+ * if newLength==-1, it defaults to u_strlen(buffer) but not more than
+ * the current capacity of the string
+ *
+ * @see getBuffer(int32_t minCapacity)
+ * @stable ICU 2.0
+ */
+ void releaseBuffer(int32_t newLength=-1);
+
+ /**
+ * Get a read-only pointer to the internal buffer.
+ * This can be called at any time on a valid UnicodeString.
+ *
+ * It returns 0 if the string is bogus, or
+ * during an "open" getBuffer(minCapacity).
+ *
+ * It can be called as many times as desired.
+ * The pointer that it returns will remain valid until the UnicodeString object is modified,
+ * at which time the pointer is semantically invalidated and must not be used any more.
+ *
+ * The capacity of the buffer can be determined with getCapacity().
+ * The part after length() may or may not be initialized and valid,
+ * depending on the history of the UnicodeString object.
+ *
+ * The buffer contents is (probably) not NUL-terminated.
+ * You can check if it is with
+ * <code>(s.length()<s.getCapacity() && buffer[s.length()]==0)</code>.
+ * (See getTerminatedBuffer().)
+ *
+ * The buffer may reside in read-only memory. Its contents must not
+ * be modified.
+ *
+ * @return a read-only pointer to the internal string buffer,
+ * or 0 if the string is empty or bogus
+ *
+ * @see getBuffer(int32_t minCapacity)
+ * @see getTerminatedBuffer()
+ * @stable ICU 2.0
+ */
+ inline const UChar *getBuffer() const;
+
+ /**
+ * Get a read-only pointer to the internal buffer,
+ * making sure that it is NUL-terminated.
+ * This can be called at any time on a valid UnicodeString.
+ *
+ * It returns 0 if the string is bogus, or
+ * during an "open" getBuffer(minCapacity), or if the buffer cannot
+ * be NUL-terminated (because memory allocation failed).
+ *
+ * It can be called as many times as desired.
+ * The pointer that it returns will remain valid until the UnicodeString object is modified,
+ * at which time the pointer is semantically invalidated and must not be used any more.
+ *
+ * The capacity of the buffer can be determined with getCapacity().
+ * The part after length()+1 may or may not be initialized and valid,
+ * depending on the history of the UnicodeString object.
+ *
+ * The buffer contents is guaranteed to be NUL-terminated.
+ * getTerminatedBuffer() may reallocate the buffer if a terminating NUL
+ * is written.
+ * For this reason, this function is not const, unlike getBuffer().
+ * Note that a UnicodeString may also contain NUL characters as part of its contents.
+ *
+ * The buffer may reside in read-only memory. Its contents must not
+ * be modified.
+ *
+ * @return a read-only pointer to the internal string buffer,
+ * or 0 if the string is empty or bogus
+ *
+ * @see getBuffer(int32_t minCapacity)
+ * @see getBuffer()
+ * @stable ICU 2.2
+ */
+ const UChar *getTerminatedBuffer();
+
+ //========================================
+ // Constructors
+ //========================================
+
+ /** Construct an empty UnicodeString.
+ * @stable ICU 2.0
+ */
+ inline UnicodeString();
+
+ /**
+ * Construct a UnicodeString with capacity to hold <TT>capacity</TT> UChars
+ * @param capacity the number of UChars this UnicodeString should hold
+ * before a resize is necessary; if count is greater than 0 and count
+ * code points c take up more space than capacity, then capacity is adjusted
+ * accordingly.
+ * @param c is used to initially fill the string
+ * @param count specifies how many code points c are to be written in the
+ * string
+ * @stable ICU 2.0
+ */
+ UnicodeString(int32_t capacity, UChar32 c, int32_t count);
+
+ /**
+ * Single UChar (code unit) constructor.
+ *
+ * It is recommended to mark this constructor "explicit" by
+ * <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code>
+ * on the compiler command line or similar.
+ * @param ch the character to place in the UnicodeString
+ * @stable ICU 2.0
+ */
+ UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar ch);
+
+ /**
+ * Single UChar32 (code point) constructor.
+ *
+ * It is recommended to mark this constructor "explicit" by
+ * <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code>
+ * on the compiler command line or similar.
+ * @param ch the character to place in the UnicodeString
+ * @stable ICU 2.0
+ */
+ UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch);
+
+ /**
+ * UChar* constructor.
+ *
+ * It is recommended to mark this constructor "explicit" by
+ * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
+ * on the compiler command line or similar.
+ * @param text The characters to place in the UnicodeString. <TT>text</TT>
+ * must be NULL (U+0000) terminated.
+ * @stable ICU 2.0
+ */
+ UNISTR_FROM_STRING_EXPLICIT UnicodeString(const UChar *text);
+
+ /**
+ * UChar* constructor.
+ * @param text The characters to place in the UnicodeString.
+ * @param textLength The number of Unicode characters in <TT>text</TT>
+ * to copy.
+ * @stable ICU 2.0
+ */
+ UnicodeString(const UChar *text,
+ int32_t textLength);
+
+ /**
+ * Readonly-aliasing UChar* constructor.
+ * The text will be used for the UnicodeString object, but
+ * it will not be released when the UnicodeString is destroyed.
+ * This has copy-on-write semantics:
+ * When the string is modified, then the buffer is first copied into
+ * newly allocated memory.
+ * The aliased buffer is never modified.
+ *
+ * In an assignment to another UnicodeString, when using the copy constructor
+ * or the assignment operator, the text will be copied.
+ * When using fastCopyFrom(), the text will be aliased again,
+ * so that both strings then alias the same readonly-text.
+ *
+ * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
+ * This must be true if <code>textLength==-1</code>.
+ * @param text The characters to alias for the UnicodeString.
+ * @param textLength The number of Unicode characters in <code>text</code> to alias.
+ * If -1, then this constructor will determine the length
+ * by calling <code>u_strlen()</code>.
+ * @stable ICU 2.0
+ */
+ UnicodeString(UBool isTerminated,
+ const UChar *text,
+ int32_t textLength);
+
+ /**
+ * Writable-aliasing UChar* constructor.
+ * The text will be used for the UnicodeString object, but
+ * it will not be released when the UnicodeString is destroyed.
+ * This has write-through semantics:
+ * For as long as the capacity of the buffer is sufficient, write operations
+ * will directly affect the buffer. When more capacity is necessary, then
+ * a new buffer will be allocated and the contents copied as with regularly
+ * constructed strings.
+ * In an assignment to another UnicodeString, the buffer will be copied.
+ * The extract(UChar *dst) function detects whether the dst pointer is the same
+ * as the string buffer itself and will in this case not copy the contents.
+ *
+ * @param buffer The characters to alias for the UnicodeString.
+ * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
+ * @param buffCapacity The size of <code>buffer</code> in UChars.
+ * @stable ICU 2.0
+ */
+ UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity);
+
+#if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
+
+ /**
+ * char* constructor.
+ * Uses the default converter (and thus depends on the ICU conversion code)
+ * unless U_CHARSET_IS_UTF8 is set to 1.
+ *
+ * For ASCII (really "invariant character") strings it is more efficient to use
+ * the constructor that takes a US_INV (for its enum EInvariant).
+ * For ASCII (invariant-character) string literals, see UNICODE_STRING and
+ * UNICODE_STRING_SIMPLE.
+ *
+ * It is recommended to mark this constructor "explicit" by
+ * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
+ * on the compiler command line or similar.
+ * @param codepageData an array of bytes, null-terminated,
+ * in the platform's default codepage.
+ * @stable ICU 2.0
+ * @see UNICODE_STRING
+ * @see UNICODE_STRING_SIMPLE
+ */
+ UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData);
+
+ /**
+ * char* constructor.
+ * Uses the default converter (and thus depends on the ICU conversion code)
+ * unless U_CHARSET_IS_UTF8 is set to 1.
+ * @param codepageData an array of bytes in the platform's default codepage.
+ * @param dataLength The number of bytes in <TT>codepageData</TT>.
+ * @stable ICU 2.0
+ */
+ UnicodeString(const char *codepageData, int32_t dataLength);
+
+#endif
+
+#if !UCONFIG_NO_CONVERSION
+
+ /**
+ * char* constructor.
+ * @param codepageData an array of bytes, null-terminated
+ * @param codepage the encoding of <TT>codepageData</TT>. The special
+ * value 0 for <TT>codepage</TT> indicates that the text is in the
+ * platform's default codepage.
+ *
+ * If <code>codepage</code> is an empty string (<code>""</code>),
+ * then a simple conversion is performed on the codepage-invariant
+ * subset ("invariant characters") of the platform encoding. See utypes.h.
+ * Recommendation: For invariant-character strings use the constructor
+ * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
+ * because it avoids object code dependencies of UnicodeString on
+ * the conversion code.
+ *
+ * @stable ICU 2.0
+ */
+ UnicodeString(const char *codepageData, const char *codepage);
+
+ /**
+ * char* constructor.
+ * @param codepageData an array of bytes.
+ * @param dataLength The number of bytes in <TT>codepageData</TT>.
+ * @param codepage the encoding of <TT>codepageData</TT>. The special
+ * value 0 for <TT>codepage</TT> indicates that the text is in the
+ * platform's default codepage.
+ * If <code>codepage</code> is an empty string (<code>""</code>),
+ * then a simple conversion is performed on the codepage-invariant
+ * subset ("invariant characters") of the platform encoding. See utypes.h.
+ * Recommendation: For invariant-character strings use the constructor
+ * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
+ * because it avoids object code dependencies of UnicodeString on
+ * the conversion code.
+ *
+ * @stable ICU 2.0
+ */
+ UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage);
+
+ /**
+ * char * / UConverter constructor.
+ * This constructor uses an existing UConverter object to
+ * convert the codepage string to Unicode and construct a UnicodeString
+ * from that.
+ *
+ * The converter is reset at first.
+ * If the error code indicates a failure before this constructor is called,
+ * or if an error occurs during conversion or construction,
+ * then the string will be bogus.
+ *
+ * This function avoids the overhead of opening and closing a converter if
+ * multiple strings are constructed.
+ *
+ * @param src input codepage string
+ * @param srcLength length of the input string, can be -1 for NUL-terminated strings
+ * @param cnv converter object (ucnv_resetToUnicode() will be called),
+ * can be NULL for the default converter
+ * @param errorCode normal ICU error code
+ * @stable ICU 2.0
+ */
+ UnicodeString(
+ const char *src, int32_t srcLength,
+ UConverter *cnv,
+ UErrorCode &errorCode);
+
+#endif
+
+ /**
+ * Constructs a Unicode string from an invariant-character char * string.
+ * About invariant characters see utypes.h.
+ * This constructor has no runtime dependency on conversion code and is
+ * therefore recommended over ones taking a charset name string
+ * (where the empty string "" indicates invariant-character conversion).
+ *
+ * Use the macro US_INV as the third, signature-distinguishing parameter.
+ *
+ * For example:
+ * \code
+ * void fn(const char *s) {
+ * UnicodeString ustr(s, -1, US_INV);
+ * // use ustr ...
+ * }
+ * \endcode
+ *
+ * @param src String using only invariant characters.
+ * @param length Length of src, or -1 if NUL-terminated.
+ * @param inv Signature-distinguishing paramater, use US_INV.
+ *
+ * @see US_INV
+ * @stable ICU 3.2
+ */
+ UnicodeString(const char *src, int32_t length, enum EInvariant inv);
+
+
+ /**
+ * Copy constructor.
+ * @param that The UnicodeString object to copy.
+ * @stable ICU 2.0
+ */
+ UnicodeString(const UnicodeString& that);
+
+ /**
+ * 'Substring' constructor from tail of source string.
+ * @param src The UnicodeString object to copy.
+ * @param srcStart The offset into <tt>src</tt> at which to start copying.
+ * @stable ICU 2.2
+ */
+ UnicodeString(const UnicodeString& src, int32_t srcStart);
+
+ /**
+ * 'Substring' constructor from subrange of source string.
+ * @param src The UnicodeString object to copy.
+ * @param srcStart The offset into <tt>src</tt> at which to start copying.
+ * @param srcLength The number of characters from <tt>src</tt> to copy.
+ * @stable ICU 2.2
+ */
+ UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
+
+ /**
+ * Clone this object, an instance of a subclass of Replaceable.
+ * Clones can be used concurrently in multiple threads.
+ * If a subclass does not implement clone(), or if an error occurs,
+ * then NULL is returned.
+ * The clone functions in all subclasses return a pointer to a Replaceable
+ * because some compilers do not support covariant (same-as-this)
+ * return types; cast to the appropriate subclass if necessary.
+ * The caller must delete the clone.
+ *
+ * @return a clone of this object
+ *
+ * @see Replaceable::clone
+ * @see getDynamicClassID
+ * @stable ICU 2.6
+ */
+ virtual Replaceable *clone() const;
+
+ /** Destructor.
+ * @stable ICU 2.0
+ */
+ virtual ~UnicodeString();
+
+ /**
+ * Create a UnicodeString from a UTF-8 string.
+ * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
+ * Calls u_strFromUTF8WithSub().
+ *
+ * @param utf8 UTF-8 input string.
+ * Note that a StringPiece can be implicitly constructed
+ * from a std::string or a NUL-terminated const char * string.
+ * @return A UnicodeString with equivalent UTF-16 contents.
+ * @see toUTF8
+ * @see toUTF8String
+ * @stable ICU 4.2
+ */
+ static UnicodeString fromUTF8(const StringPiece &utf8);
+
+ /**
+ * Create a UnicodeString from a UTF-32 string.
+ * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
+ * Calls u_strFromUTF32WithSub().
+ *
+ * @param utf32 UTF-32 input string. Must not be NULL.
+ * @param length Length of the input string, or -1 if NUL-terminated.
+ * @return A UnicodeString with equivalent UTF-16 contents.
+ * @see toUTF32
+ * @stable ICU 4.2
+ */
+ static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);
+
+ /* Miscellaneous operations */
+
+ /**
+ * Unescape a string of characters and return a string containing
+ * the result. The following escape sequences are recognized:
+ *
+ * \\uhhhh 4 hex digits; h in [0-9A-Fa-f]
+ * \\Uhhhhhhhh 8 hex digits
+ * \\xhh 1-2 hex digits
+ * \\ooo 1-3 octal digits; o in [0-7]
+ * \\cX control-X; X is masked with 0x1F
+ *
+ * as well as the standard ANSI C escapes:
+ *
+ * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
+ * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
+ * \\&quot; => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
+ *
+ * Anything else following a backslash is generically escaped. For
+ * example, "[a\\-z]" returns "[a-z]".
+ *
+ * If an escape sequence is ill-formed, this method returns an empty
+ * string. An example of an ill-formed sequence is "\\u" followed by
+ * fewer than 4 hex digits.
+ *
+ * This function is similar to u_unescape() but not identical to it.
+ * The latter takes a source char*, so it does escape recognition
+ * and also invariant conversion.
+ *
+ * @return a string with backslash escapes interpreted, or an
+ * empty string on error.
+ * @see UnicodeString#unescapeAt()
+ * @see u_unescape()
+ * @see u_unescapeAt()
+ * @stable ICU 2.0
+ */
+ UnicodeString unescape() const;
+
+ /**
+ * Unescape a single escape sequence and return the represented
+ * character. See unescape() for a listing of the recognized escape
+ * sequences. The character at offset-1 is assumed (without
+ * checking) to be a backslash. If the escape sequence is
+ * ill-formed, or the offset is out of range, U_SENTINEL=-1 is
+ * returned.
+ *
+ * @param offset an input output parameter. On input, it is the
+ * offset into this string where the escape sequence is located,
+ * after the initial backslash. On output, it is advanced after the
+ * last character parsed. On error, it is not advanced at all.
+ * @return the character represented by the escape sequence at
+ * offset, or U_SENTINEL=-1 on error.
+ * @see UnicodeString#unescape()
+ * @see u_unescape()
+ * @see u_unescapeAt()
+ * @stable ICU 2.0
+ */
+ UChar32 unescapeAt(int32_t &offset) const;
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for this class.
+ *
+ * @stable ICU 2.2
+ */
+ static UClassID U_EXPORT2 getStaticClassID();
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for the actual class.
+ *
+ * @stable ICU 2.2
+ */
+ virtual UClassID getDynamicClassID() const;
+
+ //========================================
+ // Implementation methods
+ //========================================
+
+protected:
+ /**
+ * Implement Replaceable::getLength() (see jitterbug 1027).
+ * @stable ICU 2.4
+ */
+ virtual int32_t getLength() const;
+
+ /**
+ * The change in Replaceable to use virtual getCharAt() allows
+ * UnicodeString::charAt() to be inline again (see jitterbug 709).
+ * @stable ICU 2.4
+ */
+ virtual UChar getCharAt(int32_t offset) const;
+
+ /**
+ * The change in Replaceable to use virtual getChar32At() allows
+ * UnicodeString::char32At() to be inline again (see jitterbug 709).
+ * @stable ICU 2.4
+ */
+ virtual UChar32 getChar32At(int32_t offset) const;
+
+private:
+ // For char* constructors. Could be made public.
+ UnicodeString &setToUTF8(const StringPiece &utf8);
+ // For extract(char*).
+ // We could make a toUTF8(target, capacity, errorCode) public but not
+ // this version: New API will be cleaner if we make callers create substrings
+ // rather than having start+length on every method,
+ // and it should take a UErrorCode&.
+ int32_t
+ toUTF8(int32_t start, int32_t len,
+ char *target, int32_t capacity) const;
+
+ /**
+ * Internal string contents comparison, called by operator==.
+ * Requires: this & text not bogus and have same lengths.
+ */
+ UBool doEquals(const UnicodeString &text, int32_t len) const;
+
+ inline int8_t
+ doCompare(int32_t start,
+ int32_t length,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ int8_t doCompare(int32_t start,
+ int32_t length,
+ const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ inline int8_t
+ doCompareCodePointOrder(int32_t start,
+ int32_t length,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ int8_t doCompareCodePointOrder(int32_t start,
+ int32_t length,
+ const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ inline int8_t
+ doCaseCompare(int32_t start,
+ int32_t length,
+ const UnicodeString &srcText,
+ int32_t srcStart,
+ int32_t srcLength,
+ uint32_t options) const;
+
+ int8_t
+ doCaseCompare(int32_t start,
+ int32_t length,
+ const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength,
+ uint32_t options) const;
+
+ int32_t doIndexOf(UChar c,
+ int32_t start,
+ int32_t length) const;
+
+ int32_t doIndexOf(UChar32 c,
+ int32_t start,
+ int32_t length) const;
+
+ int32_t doLastIndexOf(UChar c,
+ int32_t start,
+ int32_t length) const;
+
+ int32_t doLastIndexOf(UChar32 c,
+ int32_t start,
+ int32_t length) const;
+
+ void doExtract(int32_t start,
+ int32_t length,
+ UChar *dst,
+ int32_t dstStart) const;
+
+ inline void doExtract(int32_t start,
+ int32_t length,
+ UnicodeString& target) const;
+
+ inline UChar doCharAt(int32_t offset) const;
+
+ UnicodeString& doReplace(int32_t start,
+ int32_t length,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength);
+
+ UnicodeString& doReplace(int32_t start,
+ int32_t length,
+ const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength);
+
+ UnicodeString& doReverse(int32_t start,
+ int32_t length);
+
+ // calculate hash code
+ int32_t doHashCode(void) const;
+
+ // get pointer to start of array
+ // these do not check for kOpenGetBuffer, unlike the public getBuffer() function
+ inline UChar* getArrayStart(void);
+ inline const UChar* getArrayStart(void) const;
+
+ // A UnicodeString object (not necessarily its current buffer)
+ // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
+ inline UBool isWritable() const;
+
+ // Is the current buffer writable?
+ inline UBool isBufferWritable() const;
+
+ // None of the following does releaseArray().
+ inline void setLength(int32_t len); // sets only fShortLength and fLength
+ inline void setToEmpty(); // sets fFlags=kShortString
+ inline void setArray(UChar *array, int32_t len, int32_t capacity); // does not set fFlags
+
+ // allocate the array; result may be fStackBuffer
+ // sets refCount to 1 if appropriate
+ // sets fArray, fCapacity, and fFlags
+ // returns boolean for success or failure
+ UBool allocate(int32_t capacity);
+
+ // release the array if owned
+ void releaseArray(void);
+
+ // turn a bogus string into an empty one
+ void unBogus();
+
+ // implements assigment operator, copy constructor, and fastCopyFrom()
+ UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=FALSE);
+
+ // Pin start and limit to acceptable values.
+ inline void pinIndex(int32_t& start) const;
+ inline void pinIndices(int32_t& start,
+ int32_t& length) const;
+
+#if !UCONFIG_NO_CONVERSION
+
+ /* Internal extract() using UConverter. */
+ int32_t doExtract(int32_t start, int32_t length,
+ char *dest, int32_t destCapacity,
+ UConverter *cnv,
+ UErrorCode &errorCode) const;
+
+ /*
+ * Real constructor for converting from codepage data.
+ * It assumes that it is called with !fRefCounted.
+ *
+ * If <code>codepage==0</code>, then the default converter
+ * is used for the platform encoding.
+ * If <code>codepage</code> is an empty string (<code>""</code>),
+ * then a simple conversion is performed on the codepage-invariant
+ * subset ("invariant characters") of the platform encoding. See utypes.h.
+ */
+ void doCodepageCreate(const char *codepageData,
+ int32_t dataLength,
+ const char *codepage);
+
+ /*
+ * Worker function for creating a UnicodeString from
+ * a codepage string using a UConverter.
+ */
+ void
+ doCodepageCreate(const char *codepageData,
+ int32_t dataLength,
+ UConverter *converter,
+ UErrorCode &status);
+
+#endif
+
+ /*
+ * This function is called when write access to the array
+ * is necessary.
+ *
+ * We need to make a copy of the array if
+ * the buffer is read-only, or
+ * the buffer is refCounted (shared), and refCount>1, or
+ * the buffer is too small.
+ *
+ * Return FALSE if memory could not be allocated.
+ */
+ UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
+ int32_t growCapacity = -1,
+ UBool doCopyArray = TRUE,
+ int32_t **pBufferToDelete = 0,
+ UBool forceClone = FALSE);
+
+ /**
+ * Common function for UnicodeString case mappings.
+ * The stringCaseMapper has the same type UStringCaseMapper
+ * as in ustr_imp.h for ustrcase_map().
+ */
+ UnicodeString &
+ caseMap(const UCaseMap *csm, UStringCaseMapper *stringCaseMapper);
+
+ // ref counting
+ void addRef(void);
+ int32_t removeRef(void);
+ int32_t refCount(void) const;
+
+ // constants
+ enum {
+ // Set the stack buffer size so that sizeof(UnicodeString) is,
+ // naturally (without padding), a multiple of sizeof(pointer).
+ US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for short strings
+ kInvalidUChar=0xffff, // invalid UChar index
+ kGrowSize=128, // grow size for this buffer
+ kInvalidHashCode=0, // invalid hash code
+ kEmptyHashCode=1, // hash code for empty string
+
+ // bit flag values for fFlags
+ kIsBogus=1, // this string is bogus, i.e., not valid or NULL
+ kUsingStackBuffer=2,// using fUnion.fStackBuffer instead of fUnion.fFields
+ kRefCounted=4, // there is a refCount field before the characters in fArray
+ kBufferIsReadonly=8,// do not write to this buffer
+ kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"),
+ // and releaseBuffer(newLength) must be called
+
+ // combined values for convenience
+ kShortString=kUsingStackBuffer,
+ kLongString=kRefCounted,
+ kReadonlyAlias=kBufferIsReadonly,
+ kWritableAlias=0
+ };
+
+ friend class StringThreadTest;
+ friend class UnicodeStringAppendable;
+
+ union StackBufferOrFields; // forward declaration necessary before friend declaration
+ friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
+
+ /*
+ * The following are all the class fields that are stored
+ * in each UnicodeString object.
+ * Note that UnicodeString has virtual functions,
+ * therefore there is an implicit vtable pointer
+ * as the first real field.
+ * The fields should be aligned such that no padding is necessary.
+ * On 32-bit machines, the size should be 32 bytes,
+ * on 64-bit machines (8-byte pointers), it should be 40 bytes.
+ *
+ * We use a hack to achieve this.
+ *
+ * With at least some compilers, each of the following is forced to
+ * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer],
+ * rounded up with additional padding if the fields do not already fit that requirement:
+ * - sizeof(class UnicodeString)
+ * - offsetof(UnicodeString, fUnion)
+ * - sizeof(fUnion)
+ * - sizeof(fFields)
+ *
+ * In order to avoid padding, we make sizeof(fStackBuffer)=16 (=8 UChars)
+ * which is at least as large as sizeof(fFields) on 32-bit and 64-bit machines.
+ * (Padding at the end of fFields is ok:
+ * As long as there is no padding after fStackBuffer, it is not wasted space.)
+ *
+ * We further assume that the compiler does not reorder the fields,
+ * so that fRestOfStackBuffer (which holds a few more UChars) immediately follows after fUnion,
+ * with at most some padding (but no other field) in between.
+ * (Padding there would be wasted space, but functionally harmless.)
+ *
+ * We use a few more sizeof(pointer)'s chunks of space with
+ * fRestOfStackBuffer, fShortLength and fFlags,
+ * to get up exactly to the intended sizeof(UnicodeString).
+ */
+ // (implicit) *vtable;
+ union StackBufferOrFields {
+ // fStackBuffer is used iff (fFlags&kUsingStackBuffer)
+ // else fFields is used
+ UChar fStackBuffer[8]; // buffer for short strings, together with fRestOfStackBuffer
+ struct {
+ UChar *fArray; // the Unicode data
+ int32_t fCapacity; // capacity of fArray (in UChars)
+ int32_t fLength; // number of characters in fArray if >127; else undefined
+ } fFields;
+ } fUnion;
+ UChar fRestOfStackBuffer[US_STACKBUF_SIZE-8];
+ int8_t fShortLength; // 0..127: length <0: real length is in fUnion.fFields.fLength
+ uint8_t fFlags; // bit flags: see constants above
+};
+
+/**
+ * Create a new UnicodeString with the concatenation of two others.
+ *
+ * @param s1 The first string to be copied to the new one.
+ * @param s2 The second string to be copied to the new one, after s1.
+ * @return UnicodeString(s1).append(s2)
+ * @stable ICU 2.8
+ */
+U_COMMON_API UnicodeString U_EXPORT2
+operator+ (const UnicodeString &s1, const UnicodeString &s2);
+
+//========================================
+// Inline members
+//========================================
+
+//========================================
+// Privates
+//========================================
+
+inline void
+UnicodeString::pinIndex(int32_t& start) const
+{
+ // pin index
+ if(start < 0) {
+ start = 0;
+ } else if(start > length()) {
+ start = length();
+ }
+}
+
+inline void
+UnicodeString::pinIndices(int32_t& start,
+ int32_t& _length) const
+{
+ // pin indices
+ int32_t len = length();
+ if(start < 0) {
+ start = 0;
+ } else if(start > len) {
+ start = len;
+ }
+ if(_length < 0) {
+ _length = 0;
+ } else if(_length > (len - start)) {
+ _length = (len - start);
+ }
+}
+
+inline UChar*
+UnicodeString::getArrayStart()
+{ return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
+
+inline const UChar*
+UnicodeString::getArrayStart() const
+{ return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
+
+//========================================
+// Default constructor
+//========================================
+
+inline
+UnicodeString::UnicodeString()
+ : fShortLength(0),
+ fFlags(kShortString)
+{}
+
+//========================================
+// Read-only implementation methods
+//========================================
+inline int32_t
+UnicodeString::length() const
+{ return fShortLength>=0 ? fShortLength : fUnion.fFields.fLength; }
+
+inline int32_t
+UnicodeString::getCapacity() const
+{ return (fFlags&kUsingStackBuffer) ? US_STACKBUF_SIZE : fUnion.fFields.fCapacity; }
+
+inline int32_t
+UnicodeString::hashCode() const
+{ return doHashCode(); }
+
+inline UBool
+UnicodeString::isBogus() const
+{ return (UBool)(fFlags & kIsBogus); }
+
+inline UBool
+UnicodeString::isWritable() const
+{ return (UBool)!(fFlags&(kOpenGetBuffer|kIsBogus)); }
+
+inline UBool
+UnicodeString::isBufferWritable() const
+{
+ return (UBool)(
+ !(fFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
+ (!(fFlags&kRefCounted) || refCount()==1));
+}
+
+inline const UChar *
+UnicodeString::getBuffer() const {
+ if(fFlags&(kIsBogus|kOpenGetBuffer)) {
+ return 0;
+ } else if(fFlags&kUsingStackBuffer) {
+ return fUnion.fStackBuffer;
+ } else {
+ return fUnion.fFields.fArray;
+ }
+}
+
+//========================================
+// Read-only alias methods
+//========================================
+inline int8_t
+UnicodeString::doCompare(int32_t start,
+ int32_t thisLength,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const
+{
+ if(srcText.isBogus()) {
+ return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
+ } else {
+ srcText.pinIndices(srcStart, srcLength);
+ return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
+ }
+}
+
+inline UBool
+UnicodeString::operator== (const UnicodeString& text) const
+{
+ if(isBogus()) {
+ return text.isBogus();
+ } else {
+ int32_t len = length(), textLength = text.length();
+ return !text.isBogus() && len == textLength && doEquals(text, len);
+ }
+}
+
+inline UBool
+UnicodeString::operator!= (const UnicodeString& text) const
+{ return (! operator==(text)); }
+
+inline UBool
+UnicodeString::operator> (const UnicodeString& text) const
+{ return doCompare(0, length(), text, 0, text.length()) == 1; }
+
+inline UBool
+UnicodeString::operator< (const UnicodeString& text) const
+{ return doCompare(0, length(), text, 0, text.length()) == -1; }
+
+inline UBool
+UnicodeString::operator>= (const UnicodeString& text) const
+{ return doCompare(0, length(), text, 0, text.length()) != -1; }
+
+inline UBool
+UnicodeString::operator<= (const UnicodeString& text) const
+{ return doCompare(0, length(), text, 0, text.length()) != 1; }
+
+inline int8_t
+UnicodeString::compare(const UnicodeString& text) const
+{ return doCompare(0, length(), text, 0, text.length()); }
+
+inline int8_t
+UnicodeString::compare(int32_t start,
+ int32_t _length,
+ const UnicodeString& srcText) const
+{ return doCompare(start, _length, srcText, 0, srcText.length()); }
+
+inline int8_t
+UnicodeString::compare(const UChar *srcChars,
+ int32_t srcLength) const
+{ return doCompare(0, length(), srcChars, 0, srcLength); }
+
+inline int8_t
+UnicodeString::compare(int32_t start,
+ int32_t _length,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const
+{ return doCompare(start, _length, srcText, srcStart, srcLength); }
+
+inline int8_t
+UnicodeString::compare(int32_t start,
+ int32_t _length,
+ const UChar *srcChars) const
+{ return doCompare(start, _length, srcChars, 0, _length); }
+
+inline int8_t
+UnicodeString::compare(int32_t start,
+ int32_t _length,
+ const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const
+{ return doCompare(start, _length, srcChars, srcStart, srcLength); }
+
+inline int8_t
+UnicodeString::compareBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLimit) const
+{ return doCompare(start, limit - start,
+ srcText, srcStart, srcLimit - srcStart); }
+
+inline int8_t
+UnicodeString::doCompareCodePointOrder(int32_t start,
+ int32_t thisLength,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const
+{
+ if(srcText.isBogus()) {
+ return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
+ } else {
+ srcText.pinIndices(srcStart, srcLength);
+ return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
+ }
+}
+
+inline int8_t
+UnicodeString::compareCodePointOrder(const UnicodeString& text) const
+{ return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
+
+inline int8_t
+UnicodeString::compareCodePointOrder(int32_t start,
+ int32_t _length,
+ const UnicodeString& srcText) const
+{ return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
+
+inline int8_t
+UnicodeString::compareCodePointOrder(const UChar *srcChars,
+ int32_t srcLength) const
+{ return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
+
+inline int8_t
+UnicodeString::compareCodePointOrder(int32_t start,
+ int32_t _length,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const
+{ return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
+
+inline int8_t
+UnicodeString::compareCodePointOrder(int32_t start,
+ int32_t _length,
+ const UChar *srcChars) const
+{ return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
+
+inline int8_t
+UnicodeString::compareCodePointOrder(int32_t start,
+ int32_t _length,
+ const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const
+{ return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
+
+inline int8_t
+UnicodeString::compareCodePointOrderBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLimit) const
+{ return doCompareCodePointOrder(start, limit - start,
+ srcText, srcStart, srcLimit - srcStart); }
+
+inline int8_t
+UnicodeString::doCaseCompare(int32_t start,
+ int32_t thisLength,
+ const UnicodeString &srcText,
+ int32_t srcStart,
+ int32_t srcLength,
+ uint32_t options) const
+{
+ if(srcText.isBogus()) {
+ return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
+ } else {
+ srcText.pinIndices(srcStart, srcLength);
+ return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
+ }
+}
+
+inline int8_t
+UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
+ return doCaseCompare(0, length(), text, 0, text.length(), options);
+}
+
+inline int8_t
+UnicodeString::caseCompare(int32_t start,
+ int32_t _length,
+ const UnicodeString &srcText,
+ uint32_t options) const {
+ return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
+}
+
+inline int8_t
+UnicodeString::caseCompare(const UChar *srcChars,
+ int32_t srcLength,
+ uint32_t options) const {
+ return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
+}
+
+inline int8_t
+UnicodeString::caseCompare(int32_t start,
+ int32_t _length,
+ const UnicodeString &srcText,
+ int32_t srcStart,
+ int32_t srcLength,
+ uint32_t options) const {
+ return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
+}
+
+inline int8_t
+UnicodeString::caseCompare(int32_t start,
+ int32_t _length,
+ const UChar *srcChars,
+ uint32_t options) const {
+ return doCaseCompare(start, _length, srcChars, 0, _length, options);
+}
+
+inline int8_t
+UnicodeString::caseCompare(int32_t start,
+ int32_t _length,
+ const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength,
+ uint32_t options) const {
+ return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
+}
+
+inline int8_t
+UnicodeString::caseCompareBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString &srcText,
+ int32_t srcStart,
+ int32_t srcLimit,
+ uint32_t options) const {
+ return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
+}
+
+inline int32_t
+UnicodeString::indexOf(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength,
+ int32_t start,
+ int32_t _length) const
+{
+ if(!srcText.isBogus()) {
+ srcText.pinIndices(srcStart, srcLength);
+ if(srcLength > 0) {
+ return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
+ }
+ }
+ return -1;
+}
+
+inline int32_t
+UnicodeString::indexOf(const UnicodeString& text) const
+{ return indexOf(text, 0, text.length(), 0, length()); }
+
+inline int32_t
+UnicodeString::indexOf(const UnicodeString& text,
+ int32_t start) const {
+ pinIndex(start);
+ return indexOf(text, 0, text.length(), start, length() - start);
+}
+
+inline int32_t
+UnicodeString::indexOf(const UnicodeString& text,
+ int32_t start,
+ int32_t _length) const
+{ return indexOf(text, 0, text.length(), start, _length); }
+
+inline int32_t
+UnicodeString::indexOf(const UChar *srcChars,
+ int32_t srcLength,
+ int32_t start) const {
+ pinIndex(start);
+ return indexOf(srcChars, 0, srcLength, start, length() - start);
+}
+
+inline int32_t
+UnicodeString::indexOf(const UChar *srcChars,
+ int32_t srcLength,
+ int32_t start,
+ int32_t _length) const
+{ return indexOf(srcChars, 0, srcLength, start, _length); }
+
+inline int32_t
+UnicodeString::indexOf(UChar c,
+ int32_t start,
+ int32_t _length) const
+{ return doIndexOf(c, start, _length); }
+
+inline int32_t
+UnicodeString::indexOf(UChar32 c,
+ int32_t start,
+ int32_t _length) const
+{ return doIndexOf(c, start, _length); }
+
+inline int32_t
+UnicodeString::indexOf(UChar c) const
+{ return doIndexOf(c, 0, length()); }
+
+inline int32_t
+UnicodeString::indexOf(UChar32 c) const
+{ return indexOf(c, 0, length()); }
+
+inline int32_t
+UnicodeString::indexOf(UChar c,
+ int32_t start) const {
+ pinIndex(start);
+ return doIndexOf(c, start, length() - start);
+}
+
+inline int32_t
+UnicodeString::indexOf(UChar32 c,
+ int32_t start) const {
+ pinIndex(start);
+ return indexOf(c, start, length() - start);
+}
+
+inline int32_t
+UnicodeString::lastIndexOf(const UChar *srcChars,
+ int32_t srcLength,
+ int32_t start,
+ int32_t _length) const
+{ return lastIndexOf(srcChars, 0, srcLength, start, _length); }
+
+inline int32_t
+UnicodeString::lastIndexOf(const UChar *srcChars,
+ int32_t srcLength,
+ int32_t start) const {
+ pinIndex(start);
+ return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
+}
+
+inline int32_t
+UnicodeString::lastIndexOf(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength,
+ int32_t start,
+ int32_t _length) const
+{
+ if(!srcText.isBogus()) {
+ srcText.pinIndices(srcStart, srcLength);
+ if(srcLength > 0) {
+ return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
+ }
+ }
+ return -1;
+}
+
+inline int32_t
+UnicodeString::lastIndexOf(const UnicodeString& text,
+ int32_t start,
+ int32_t _length) const
+{ return lastIndexOf(text, 0, text.length(), start, _length); }
+
+inline int32_t
+UnicodeString::lastIndexOf(const UnicodeString& text,
+ int32_t start) const {
+ pinIndex(start);
+ return lastIndexOf(text, 0, text.length(), start, length() - start);
+}
+
+inline int32_t
+UnicodeString::lastIndexOf(const UnicodeString& text) const
+{ return lastIndexOf(text, 0, text.length(), 0, length()); }
+
+inline int32_t
+UnicodeString::lastIndexOf(UChar c,
+ int32_t start,
+ int32_t _length) const
+{ return doLastIndexOf(c, start, _length); }
+
+inline int32_t
+UnicodeString::lastIndexOf(UChar32 c,
+ int32_t start,
+ int32_t _length) const {
+ return doLastIndexOf(c, start, _length);
+}
+
+inline int32_t
+UnicodeString::lastIndexOf(UChar c) const
+{ return doLastIndexOf(c, 0, length()); }
+
+inline int32_t
+UnicodeString::lastIndexOf(UChar32 c) const {
+ return lastIndexOf(c, 0, length());
+}
+
+inline int32_t
+UnicodeString::lastIndexOf(UChar c,
+ int32_t start) const {
+ pinIndex(start);
+ return doLastIndexOf(c, start, length() - start);
+}
+
+inline int32_t
+UnicodeString::lastIndexOf(UChar32 c,
+ int32_t start) const {
+ pinIndex(start);
+ return lastIndexOf(c, start, length() - start);
+}
+
+inline UBool
+UnicodeString::startsWith(const UnicodeString& text) const
+{ return compare(0, text.length(), text, 0, text.length()) == 0; }
+
+inline UBool
+UnicodeString::startsWith(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const
+{ return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; }
+
+inline UBool
+UnicodeString::startsWith(const UChar *srcChars, int32_t srcLength) const {
+ if(srcLength < 0) {
+ srcLength = u_strlen(srcChars);
+ }
+ return doCompare(0, srcLength, srcChars, 0, srcLength) == 0;
+}
+
+inline UBool
+UnicodeString::startsWith(const UChar *srcChars, int32_t srcStart, int32_t srcLength) const {
+ if(srcLength < 0) {
+ srcLength = u_strlen(srcChars);
+ }
+ return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;
+}
+
+inline UBool
+UnicodeString::endsWith(const UnicodeString& text) const
+{ return doCompare(length() - text.length(), text.length(),
+ text, 0, text.length()) == 0; }
+
+inline UBool
+UnicodeString::endsWith(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const {
+ srcText.pinIndices(srcStart, srcLength);
+ return doCompare(length() - srcLength, srcLength,
+ srcText, srcStart, srcLength) == 0;
+}
+
+inline UBool
+UnicodeString::endsWith(const UChar *srcChars,
+ int32_t srcLength) const {
+ if(srcLength < 0) {
+ srcLength = u_strlen(srcChars);
+ }
+ return doCompare(length() - srcLength, srcLength,
+ srcChars, 0, srcLength) == 0;
+}
+
+inline UBool
+UnicodeString::endsWith(const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const {
+ if(srcLength < 0) {
+ srcLength = u_strlen(srcChars + srcStart);
+ }
+ return doCompare(length() - srcLength, srcLength,
+ srcChars, srcStart, srcLength) == 0;
+}
+
+//========================================
+// replace
+//========================================
+inline UnicodeString&
+UnicodeString::replace(int32_t start,
+ int32_t _length,
+ const UnicodeString& srcText)
+{ return doReplace(start, _length, srcText, 0, srcText.length()); }
+
+inline UnicodeString&
+UnicodeString::replace(int32_t start,
+ int32_t _length,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength)
+{ return doReplace(start, _length, srcText, srcStart, srcLength); }
+
+inline UnicodeString&
+UnicodeString::replace(int32_t start,
+ int32_t _length,
+ const UChar *srcChars,
+ int32_t srcLength)
+{ return doReplace(start, _length, srcChars, 0, srcLength); }
+
+inline UnicodeString&
+UnicodeString::replace(int32_t start,
+ int32_t _length,
+ const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength)
+{ return doReplace(start, _length, srcChars, srcStart, srcLength); }
+
+inline UnicodeString&
+UnicodeString::replace(int32_t start,
+ int32_t _length,
+ UChar srcChar)
+{ return doReplace(start, _length, &srcChar, 0, 1); }
+
+inline UnicodeString&
+UnicodeString::replaceBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& srcText)
+{ return doReplace(start, limit - start, srcText, 0, srcText.length()); }
+
+inline UnicodeString&
+UnicodeString::replaceBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLimit)
+{ return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
+
+inline UnicodeString&
+UnicodeString::findAndReplace(const UnicodeString& oldText,
+ const UnicodeString& newText)
+{ return findAndReplace(0, length(), oldText, 0, oldText.length(),
+ newText, 0, newText.length()); }
+
+inline UnicodeString&
+UnicodeString::findAndReplace(int32_t start,
+ int32_t _length,
+ const UnicodeString& oldText,
+ const UnicodeString& newText)
+{ return findAndReplace(start, _length, oldText, 0, oldText.length(),
+ newText, 0, newText.length()); }
+
+// ============================
+// extract
+// ============================
+inline void
+UnicodeString::doExtract(int32_t start,
+ int32_t _length,
+ UnicodeString& target) const
+{ target.replace(0, target.length(), *this, start, _length); }
+
+inline void
+UnicodeString::extract(int32_t start,
+ int32_t _length,
+ UChar *target,
+ int32_t targetStart) const
+{ doExtract(start, _length, target, targetStart); }
+
+inline void
+UnicodeString::extract(int32_t start,
+ int32_t _length,
+ UnicodeString& target) const
+{ doExtract(start, _length, target); }
+
+#if !UCONFIG_NO_CONVERSION
+
+inline int32_t
+UnicodeString::extract(int32_t start,
+ int32_t _length,
+ char *dst,
+ const char *codepage) const
+
+{
+ // This dstSize value will be checked explicitly
+ return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage);
+}
+
+#endif
+
+inline void
+UnicodeString::extractBetween(int32_t start,
+ int32_t limit,
+ UChar *dst,
+ int32_t dstStart) const {
+ pinIndex(start);
+ pinIndex(limit);
+ doExtract(start, limit - start, dst, dstStart);
+}
+
+inline UnicodeString
+UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {
+ return tempSubString(start, limit - start);
+}
+
+inline UChar
+UnicodeString::doCharAt(int32_t offset) const
+{
+ if((uint32_t)offset < (uint32_t)length()) {
+ return getArrayStart()[offset];
+ } else {
+ return kInvalidUChar;
+ }
+}
+
+inline UChar
+UnicodeString::charAt(int32_t offset) const
+{ return doCharAt(offset); }
+
+inline UChar
+UnicodeString::operator[] (int32_t offset) const
+{ return doCharAt(offset); }
+
+inline UBool
+UnicodeString::isEmpty() const {
+ return fShortLength == 0;
+}
+
+//========================================
+// Write implementation methods
+//========================================
+inline void
+UnicodeString::setLength(int32_t len) {
+ if(len <= 127) {
+ fShortLength = (int8_t)len;
+ } else {
+ fShortLength = (int8_t)-1;
+ fUnion.fFields.fLength = len;
+ }
+}
+
+inline void
+UnicodeString::setToEmpty() {
+ fShortLength = 0;
+ fFlags = kShortString;
+}
+
+inline void
+UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) {
+ setLength(len);
+ fUnion.fFields.fArray = array;
+ fUnion.fFields.fCapacity = capacity;
+}
+
+inline UnicodeString&
+UnicodeString::operator= (UChar ch)
+{ return doReplace(0, length(), &ch, 0, 1); }
+
+inline UnicodeString&
+UnicodeString::operator= (UChar32 ch)
+{ return replace(0, length(), ch); }
+
+inline UnicodeString&
+UnicodeString::setTo(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength)
+{
+ unBogus();
+ return doReplace(0, length(), srcText, srcStart, srcLength);
+}
+
+inline UnicodeString&
+UnicodeString::setTo(const UnicodeString& srcText,
+ int32_t srcStart)
+{
+ unBogus();
+ srcText.pinIndex(srcStart);
+ return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
+}
+
+inline UnicodeString&
+UnicodeString::setTo(const UnicodeString& srcText)
+{
+ return copyFrom(srcText);
+}
+
+inline UnicodeString&
+UnicodeString::setTo(const UChar *srcChars,
+ int32_t srcLength)
+{
+ unBogus();
+ return doReplace(0, length(), srcChars, 0, srcLength);
+}
+
+inline UnicodeString&
+UnicodeString::setTo(UChar srcChar)
+{
+ unBogus();
+ return doReplace(0, length(), &srcChar, 0, 1);
+}
+
+inline UnicodeString&
+UnicodeString::setTo(UChar32 srcChar)
+{
+ unBogus();
+ return replace(0, length(), srcChar);
+}
+
+inline UnicodeString&
+UnicodeString::append(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength)
+{ return doReplace(length(), 0, srcText, srcStart, srcLength); }
+
+inline UnicodeString&
+UnicodeString::append(const UnicodeString& srcText)
+{ return doReplace(length(), 0, srcText, 0, srcText.length()); }
+
+inline UnicodeString&
+UnicodeString::append(const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength)
+{ return doReplace(length(), 0, srcChars, srcStart, srcLength); }
+
+inline UnicodeString&
+UnicodeString::append(const UChar *srcChars,
+ int32_t srcLength)
+{ return doReplace(length(), 0, srcChars, 0, srcLength); }
+
+inline UnicodeString&
+UnicodeString::append(UChar srcChar)
+{ return doReplace(length(), 0, &srcChar, 0, 1); }
+
+inline UnicodeString&
+UnicodeString::operator+= (UChar ch)
+{ return doReplace(length(), 0, &ch, 0, 1); }
+
+inline UnicodeString&
+UnicodeString::operator+= (UChar32 ch) {
+ return append(ch);
+}
+
+inline UnicodeString&
+UnicodeString::operator+= (const UnicodeString& srcText)
+{ return doReplace(length(), 0, srcText, 0, srcText.length()); }
+
+inline UnicodeString&
+UnicodeString::insert(int32_t start,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength)
+{ return doReplace(start, 0, srcText, srcStart, srcLength); }
+
+inline UnicodeString&
+UnicodeString::insert(int32_t start,
+ const UnicodeString& srcText)
+{ return doReplace(start, 0, srcText, 0, srcText.length()); }
+
+inline UnicodeString&
+UnicodeString::insert(int32_t start,
+ const UChar *srcChars,
+ int32_t srcStart,
+ int32_t srcLength)
+{ return doReplace(start, 0, srcChars, srcStart, srcLength); }
+
+inline UnicodeString&
+UnicodeString::insert(int32_t start,
+ const UChar *srcChars,
+ int32_t srcLength)
+{ return doReplace(start, 0, srcChars, 0, srcLength); }
+
+inline UnicodeString&
+UnicodeString::insert(int32_t start,
+ UChar srcChar)
+{ return doReplace(start, 0, &srcChar, 0, 1); }
+
+inline UnicodeString&
+UnicodeString::insert(int32_t start,
+ UChar32 srcChar)
+{ return replace(start, 0, srcChar); }
+
+
+inline UnicodeString&
+UnicodeString::remove()
+{
+ // remove() of a bogus string makes the string empty and non-bogus
+ if(isBogus()) {
+ setToEmpty();
+ } else {
+ fShortLength = 0;
+ }
+ return *this;
+}
+
+inline UnicodeString&
+UnicodeString::remove(int32_t start,
+ int32_t _length)
+{
+ if(start <= 0 && _length == INT32_MAX) {
+ // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
+ return remove();
+ }
+ return doReplace(start, _length, NULL, 0, 0);
+}
+
+inline UnicodeString&
+UnicodeString::removeBetween(int32_t start,
+ int32_t limit)
+{ return doReplace(start, limit - start, NULL, 0, 0); }
+
+inline UnicodeString &
+UnicodeString::retainBetween(int32_t start, int32_t limit) {
+ truncate(limit);
+ return doReplace(0, start, NULL, 0, 0);
+}
+
+inline UBool
+UnicodeString::truncate(int32_t targetLength)
+{
+ if(isBogus() && targetLength == 0) {
+ // truncate(0) of a bogus string makes the string empty and non-bogus
+ unBogus();
+ return FALSE;
+ } else if((uint32_t)targetLength < (uint32_t)length()) {
+ setLength(targetLength);
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+inline UnicodeString&
+UnicodeString::reverse()
+{ return doReverse(0, length()); }
+
+inline UnicodeString&
+UnicodeString::reverse(int32_t start,
+ int32_t _length)
+{ return doReverse(start, _length); }
+
+U_NAMESPACE_END
+
+#endif
diff --git a/Source/WebCore/icu/unicode/unorm2.h b/Source/WebCore/icu/unicode/unorm2.h
new file mode 100644
index 000000000..7152fc109
--- /dev/null
+++ b/Source/WebCore/icu/unicode/unorm2.h
@@ -0,0 +1,528 @@
+/*
+*******************************************************************************
+*
+* Copyright (C) 2009-2013, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: unorm2.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2009dec15
+* created by: Markus W. Scherer
+*/
+
+#ifndef __UNORM2_H__
+#define __UNORM2_H__
+
+/**
+ * \file
+ * \brief C API: New API for Unicode Normalization.
+ *
+ * Unicode normalization functionality for standard Unicode normalization or
+ * for using custom mapping tables.
+ * All instances of UNormalizer2 are unmodifiable/immutable.
+ * Instances returned by unorm2_getInstance() are singletons that must not be deleted by the caller.
+ * For more details see the Normalizer2 C++ class.
+ */
+
+#include "unicode/utypes.h"
+#include "unicode/localpointer.h"
+#include "unicode/uset.h"
+
+/**
+ * Constants for normalization modes.
+ * For details about standard Unicode normalization forms
+ * and about the algorithms which are also used with custom mapping tables
+ * see http://www.unicode.org/unicode/reports/tr15/
+ * @stable ICU 4.4
+ */
+typedef enum {
+ /**
+ * Decomposition followed by composition.
+ * Same as standard NFC when using an "nfc" instance.
+ * Same as standard NFKC when using an "nfkc" instance.
+ * For details about standard Unicode normalization forms
+ * see http://www.unicode.org/unicode/reports/tr15/
+ * @stable ICU 4.4
+ */
+ UNORM2_COMPOSE,
+ /**
+ * Map, and reorder canonically.
+ * Same as standard NFD when using an "nfc" instance.
+ * Same as standard NFKD when using an "nfkc" instance.
+ * For details about standard Unicode normalization forms
+ * see http://www.unicode.org/unicode/reports/tr15/
+ * @stable ICU 4.4
+ */
+ UNORM2_DECOMPOSE,
+ /**
+ * "Fast C or D" form.
+ * If a string is in this form, then further decomposition <i>without reordering</i>
+ * would yield the same form as DECOMPOSE.
+ * Text in "Fast C or D" form can be processed efficiently with data tables
+ * that are "canonically closed", that is, that provide equivalent data for
+ * equivalent text, without having to be fully normalized.
+ * Not a standard Unicode normalization form.
+ * Not a unique form: Different FCD strings can be canonically equivalent.
+ * For details see http://www.unicode.org/notes/tn5/#FCD
+ * @stable ICU 4.4
+ */
+ UNORM2_FCD,
+ /**
+ * Compose only contiguously.
+ * Also known as "FCC" or "Fast C Contiguous".
+ * The result will often but not always be in NFC.
+ * The result will conform to FCD which is useful for processing.
+ * Not a standard Unicode normalization form.
+ * For details see http://www.unicode.org/notes/tn5/#FCC
+ * @stable ICU 4.4
+ */
+ UNORM2_COMPOSE_CONTIGUOUS
+} UNormalization2Mode;
+
+/**
+ * Result values for normalization quick check functions.
+ * For details see http://www.unicode.org/reports/tr15/#Detecting_Normalization_Forms
+ * @stable ICU 2.0
+ */
+typedef enum UNormalizationCheckResult {
+ /**
+ * The input string is not in the normalization form.
+ * @stable ICU 2.0
+ */
+ UNORM_NO,
+ /**
+ * The input string is in the normalization form.
+ * @stable ICU 2.0
+ */
+ UNORM_YES,
+ /**
+ * The input string may or may not be in the normalization form.
+ * This value is only returned for composition forms like NFC and FCC,
+ * when a backward-combining character is found for which the surrounding text
+ * would have to be analyzed further.
+ * @stable ICU 2.0
+ */
+ UNORM_MAYBE
+} UNormalizationCheckResult;
+
+/**
+ * Opaque C service object type for the new normalization API.
+ * @stable ICU 4.4
+ */
+struct UNormalizer2;
+typedef struct UNormalizer2 UNormalizer2; /**< C typedef for struct UNormalizer2. @stable ICU 4.4 */
+
+#if !UCONFIG_NO_NORMALIZATION
+
+/**
+ * Returns a UNormalizer2 instance for Unicode NFC normalization.
+ * Same as unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE, pErrorCode).
+ * Returns an unmodifiable singleton instance. Do not delete it.
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return the requested Normalizer2, if successful
+ * @stable ICU 49
+ */
+U_STABLE const UNormalizer2 * U_EXPORT2
+unorm2_getNFCInstance(UErrorCode *pErrorCode);
+
+/**
+ * Returns a UNormalizer2 instance for Unicode NFD normalization.
+ * Same as unorm2_getInstance(NULL, "nfc", UNORM2_DECOMPOSE, pErrorCode).
+ * Returns an unmodifiable singleton instance. Do not delete it.
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return the requested Normalizer2, if successful
+ * @stable ICU 49
+ */
+U_STABLE const UNormalizer2 * U_EXPORT2
+unorm2_getNFDInstance(UErrorCode *pErrorCode);
+
+/**
+ * Returns a UNormalizer2 instance for Unicode NFKC normalization.
+ * Same as unorm2_getInstance(NULL, "nfkc", UNORM2_COMPOSE, pErrorCode).
+ * Returns an unmodifiable singleton instance. Do not delete it.
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return the requested Normalizer2, if successful
+ * @stable ICU 49
+ */
+U_STABLE const UNormalizer2 * U_EXPORT2
+unorm2_getNFKCInstance(UErrorCode *pErrorCode);
+
+/**
+ * Returns a UNormalizer2 instance for Unicode NFKD normalization.
+ * Same as unorm2_getInstance(NULL, "nfkc", UNORM2_DECOMPOSE, pErrorCode).
+ * Returns an unmodifiable singleton instance. Do not delete it.
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return the requested Normalizer2, if successful
+ * @stable ICU 49
+ */
+U_STABLE const UNormalizer2 * U_EXPORT2
+unorm2_getNFKDInstance(UErrorCode *pErrorCode);
+
+/**
+ * Returns a UNormalizer2 instance for Unicode NFKC_Casefold normalization.
+ * Same as unorm2_getInstance(NULL, "nfkc_cf", UNORM2_COMPOSE, pErrorCode).
+ * Returns an unmodifiable singleton instance. Do not delete it.
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return the requested Normalizer2, if successful
+ * @stable ICU 49
+ */
+U_STABLE const UNormalizer2 * U_EXPORT2
+unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode);
+
+/**
+ * Returns a UNormalizer2 instance which uses the specified data file
+ * (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle)
+ * and which composes or decomposes text according to the specified mode.
+ * Returns an unmodifiable singleton instance. Do not delete it.
+ *
+ * Use packageName=NULL for data files that are part of ICU's own data.
+ * Use name="nfc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFC/NFD.
+ * Use name="nfkc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFKC/NFKD.
+ * Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold.
+ *
+ * @param packageName NULL for ICU built-in data, otherwise application data package name
+ * @param name "nfc" or "nfkc" or "nfkc_cf" or name of custom data file
+ * @param mode normalization mode (compose or decompose etc.)
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return the requested UNormalizer2, if successful
+ * @stable ICU 4.4
+ */
+U_STABLE const UNormalizer2 * U_EXPORT2
+unorm2_getInstance(const char *packageName,
+ const char *name,
+ UNormalization2Mode mode,
+ UErrorCode *pErrorCode);
+
+/**
+ * Constructs a filtered normalizer wrapping any UNormalizer2 instance
+ * and a filter set.
+ * Both are aliased and must not be modified or deleted while this object
+ * is used.
+ * The filter set should be frozen; otherwise the performance will suffer greatly.
+ * @param norm2 wrapped UNormalizer2 instance
+ * @param filterSet USet which determines the characters to be normalized
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return the requested UNormalizer2, if successful
+ * @stable ICU 4.4
+ */
+U_STABLE UNormalizer2 * U_EXPORT2
+unorm2_openFiltered(const UNormalizer2 *norm2, const USet *filterSet, UErrorCode *pErrorCode);
+
+/**
+ * Closes a UNormalizer2 instance from unorm2_openFiltered().
+ * Do not close instances from unorm2_getInstance()!
+ * @param norm2 UNormalizer2 instance to be closed
+ * @stable ICU 4.4
+ */
+U_STABLE void U_EXPORT2
+unorm2_close(UNormalizer2 *norm2);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUNormalizer2Pointer
+ * "Smart pointer" class, closes a UNormalizer2 via unorm2_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUNormalizer2Pointer, UNormalizer2, unorm2_close);
+
+U_NAMESPACE_END
+
+#endif
+
+/**
+ * Writes the normalized form of the source string to the destination string
+ * (replacing its contents) and returns the length of the destination string.
+ * The source and destination strings must be different buffers.
+ * @param norm2 UNormalizer2 instance
+ * @param src source string
+ * @param length length of the source string, or -1 if NUL-terminated
+ * @param dest destination string; its contents is replaced with normalized src
+ * @param capacity number of UChars that can be written to dest
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return dest
+ * @stable ICU 4.4
+ */
+U_STABLE int32_t U_EXPORT2
+unorm2_normalize(const UNormalizer2 *norm2,
+ const UChar *src, int32_t length,
+ UChar *dest, int32_t capacity,
+ UErrorCode *pErrorCode);
+/**
+ * Appends the normalized form of the second string to the first string
+ * (merging them at the boundary) and returns the length of the first string.
+ * The result is normalized if the first string was normalized.
+ * The first and second strings must be different buffers.
+ * @param norm2 UNormalizer2 instance
+ * @param first string, should be normalized
+ * @param firstLength length of the first string, or -1 if NUL-terminated
+ * @param firstCapacity number of UChars that can be written to first
+ * @param second string, will be normalized
+ * @param secondLength length of the source string, or -1 if NUL-terminated
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return first
+ * @stable ICU 4.4
+ */
+U_STABLE int32_t U_EXPORT2
+unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2,
+ UChar *first, int32_t firstLength, int32_t firstCapacity,
+ const UChar *second, int32_t secondLength,
+ UErrorCode *pErrorCode);
+/**
+ * Appends the second string to the first string
+ * (merging them at the boundary) and returns the length of the first string.
+ * The result is normalized if both the strings were normalized.
+ * The first and second strings must be different buffers.
+ * @param norm2 UNormalizer2 instance
+ * @param first string, should be normalized
+ * @param firstLength length of the first string, or -1 if NUL-terminated
+ * @param firstCapacity number of UChars that can be written to first
+ * @param second string, should be normalized
+ * @param secondLength length of the source string, or -1 if NUL-terminated
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return first
+ * @stable ICU 4.4
+ */
+U_STABLE int32_t U_EXPORT2
+unorm2_append(const UNormalizer2 *norm2,
+ UChar *first, int32_t firstLength, int32_t firstCapacity,
+ const UChar *second, int32_t secondLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Gets the decomposition mapping of c.
+ * Roughly equivalent to normalizing the String form of c
+ * on a UNORM2_DECOMPOSE UNormalizer2 instance, but much faster, and except that this function
+ * returns a negative value and does not write a string
+ * if c does not have a decomposition mapping in this instance's data.
+ * This function is independent of the mode of the UNormalizer2.
+ * @param norm2 UNormalizer2 instance
+ * @param c code point
+ * @param decomposition String buffer which will be set to c's
+ * decomposition mapping, if there is one.
+ * @param capacity number of UChars that can be written to decomposition
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return the non-negative length of c's decomposition, if there is one; otherwise a negative value
+ * @stable ICU 4.6
+ */
+U_STABLE int32_t U_EXPORT2
+unorm2_getDecomposition(const UNormalizer2 *norm2,
+ UChar32 c, UChar *decomposition, int32_t capacity,
+ UErrorCode *pErrorCode);
+
+/**
+ * Gets the raw decomposition mapping of c.
+ *
+ * This is similar to the unorm2_getDecomposition() function but returns the
+ * raw decomposition mapping as specified in UnicodeData.txt or
+ * (for custom data) in the mapping files processed by the gennorm2 tool.
+ * By contrast, unorm2_getDecomposition() returns the processed,
+ * recursively-decomposed version of this mapping.
+ *
+ * When used on a standard NFKC Normalizer2 instance,
+ * unorm2_getRawDecomposition() returns the Unicode Decomposition_Mapping (dm) property.
+ *
+ * When used on a standard NFC Normalizer2 instance,
+ * it returns the Decomposition_Mapping only if the Decomposition_Type (dt) is Canonical (Can);
+ * in this case, the result contains either one or two code points (=1..4 UChars).
+ *
+ * This function is independent of the mode of the UNormalizer2.
+ * @param norm2 UNormalizer2 instance
+ * @param c code point
+ * @param decomposition String buffer which will be set to c's
+ * raw decomposition mapping, if there is one.
+ * @param capacity number of UChars that can be written to decomposition
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return the non-negative length of c's raw decomposition, if there is one; otherwise a negative value
+ * @stable ICU 49
+ */
+U_STABLE int32_t U_EXPORT2
+unorm2_getRawDecomposition(const UNormalizer2 *norm2,
+ UChar32 c, UChar *decomposition, int32_t capacity,
+ UErrorCode *pErrorCode);
+
+/**
+ * Performs pairwise composition of a & b and returns the composite if there is one.
+ *
+ * Returns a composite code point c only if c has a two-way mapping to a+b.
+ * In standard Unicode normalization, this means that
+ * c has a canonical decomposition to a+b
+ * and c does not have the Full_Composition_Exclusion property.
+ *
+ * This function is independent of the mode of the UNormalizer2.
+ * @param norm2 UNormalizer2 instance
+ * @param a A (normalization starter) code point.
+ * @param b Another code point.
+ * @return The non-negative composite code point if there is one; otherwise a negative value.
+ * @stable ICU 49
+ */
+U_STABLE UChar32 U_EXPORT2
+unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b);
+
+/**
+ * Gets the combining class of c.
+ * The default implementation returns 0
+ * but all standard implementations return the Unicode Canonical_Combining_Class value.
+ * @param norm2 UNormalizer2 instance
+ * @param c code point
+ * @return c's combining class
+ * @stable ICU 49
+ */
+U_STABLE uint8_t U_EXPORT2
+unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c);
+
+/**
+ * Tests if the string is normalized.
+ * Internally, in cases where the quickCheck() method would return "maybe"
+ * (which is only possible for the two COMPOSE modes) this method
+ * resolves to "yes" or "no" to provide a definitive result,
+ * at the cost of doing more work in those cases.
+ * @param norm2 UNormalizer2 instance
+ * @param s input string
+ * @param length length of the string, or -1 if NUL-terminated
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return TRUE if s is normalized
+ * @stable ICU 4.4
+ */
+U_STABLE UBool U_EXPORT2
+unorm2_isNormalized(const UNormalizer2 *norm2,
+ const UChar *s, int32_t length,
+ UErrorCode *pErrorCode);
+
+/**
+ * Tests if the string is normalized.
+ * For the two COMPOSE modes, the result could be "maybe" in cases that
+ * would take a little more work to resolve definitively.
+ * Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster
+ * combination of quick check + normalization, to avoid
+ * re-checking the "yes" prefix.
+ * @param norm2 UNormalizer2 instance
+ * @param s input string
+ * @param length length of the string, or -1 if NUL-terminated
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return UNormalizationCheckResult
+ * @stable ICU 4.4
+ */
+U_STABLE UNormalizationCheckResult U_EXPORT2
+unorm2_quickCheck(const UNormalizer2 *norm2,
+ const UChar *s, int32_t length,
+ UErrorCode *pErrorCode);
+
+/**
+ * Returns the end of the normalized substring of the input string.
+ * In other words, with <code>end=spanQuickCheckYes(s, ec);</code>
+ * the substring <code>UnicodeString(s, 0, end)</code>
+ * will pass the quick check with a "yes" result.
+ *
+ * The returned end index is usually one or more characters before the
+ * "no" or "maybe" character: The end index is at a normalization boundary.
+ * (See the class documentation for more about normalization boundaries.)
+ *
+ * When the goal is a normalized string and most input strings are expected
+ * to be normalized already, then call this method,
+ * and if it returns a prefix shorter than the input string,
+ * copy that prefix and use normalizeSecondAndAppend() for the remainder.
+ * @param norm2 UNormalizer2 instance
+ * @param s input string
+ * @param length length of the string, or -1 if NUL-terminated
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return "yes" span end index
+ * @stable ICU 4.4
+ */
+U_STABLE int32_t U_EXPORT2
+unorm2_spanQuickCheckYes(const UNormalizer2 *norm2,
+ const UChar *s, int32_t length,
+ UErrorCode *pErrorCode);
+
+/**
+ * Tests if the character always has a normalization boundary before it,
+ * regardless of context.
+ * For details see the Normalizer2 base class documentation.
+ * @param norm2 UNormalizer2 instance
+ * @param c character to test
+ * @return TRUE if c has a normalization boundary before it
+ * @stable ICU 4.4
+ */
+U_STABLE UBool U_EXPORT2
+unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c);
+
+/**
+ * Tests if the character always has a normalization boundary after it,
+ * regardless of context.
+ * For details see the Normalizer2 base class documentation.
+ * @param norm2 UNormalizer2 instance
+ * @param c character to test
+ * @return TRUE if c has a normalization boundary after it
+ * @stable ICU 4.4
+ */
+U_STABLE UBool U_EXPORT2
+unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c);
+
+/**
+ * Tests if the character is normalization-inert.
+ * For details see the Normalizer2 base class documentation.
+ * @param norm2 UNormalizer2 instance
+ * @param c character to test
+ * @return TRUE if c is normalization-inert
+ * @stable ICU 4.4
+ */
+U_STABLE UBool U_EXPORT2
+unorm2_isInert(const UNormalizer2 *norm2, UChar32 c);
+
+#endif /* !UCONFIG_NO_NORMALIZATION */
+#endif /* __UNORM2_H__ */
diff --git a/Source/WebCore/icu/unicode/uobject.h b/Source/WebCore/icu/unicode/uobject.h
new file mode 100644
index 000000000..54ceace62
--- /dev/null
+++ b/Source/WebCore/icu/unicode/uobject.h
@@ -0,0 +1,320 @@
+/*
+******************************************************************************
+*
+* Copyright (C) 2002-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: uobject.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2002jun26
+* created by: Markus W. Scherer
+*/
+
+#ifndef __UOBJECT_H__
+#define __UOBJECT_H__
+
+#include "unicode/utypes.h"
+
+/**
+ * \file
+ * \brief C++ API: Common ICU base class UObject.
+ */
+
+/**
+ * @{
+ * \def U_NO_THROW
+ * Define this to define the throw() specification so
+ * certain functions do not throw any exceptions
+ *
+ * UMemory operator new methods should have the throw() specification
+ * appended to them, so that the compiler adds the additional NULL check
+ * before calling constructors. Without, if <code>operator new</code> returns NULL the
+ * constructor is still called, and if the constructor references member
+ * data, (which it typically does), the result is a segmentation violation.
+ *
+ * @stable ICU 4.2
+ */
+#ifndef U_NO_THROW
+#define U_NO_THROW throw()
+#endif
+
+/** @} */
+
+/*===========================================================================*/
+/* UClassID-based RTTI */
+/*===========================================================================*/
+
+/**
+ * UClassID is used to identify classes without using the compiler's RTTI.
+ * This was used before C++ compilers consistently supported RTTI.
+ * ICU 4.6 requires compiler RTTI to be turned on.
+ *
+ * Each class hierarchy which needs
+ * to implement polymorphic clone() or operator==() defines two methods,
+ * described in detail below. UClassID values can be compared using
+ * operator==(). Nothing else should be done with them.
+ *
+ * \par
+ * In class hierarchies that implement "poor man's RTTI",
+ * each concrete subclass implements getDynamicClassID() in the same way:
+ *
+ * \code
+ * class Derived {
+ * public:
+ * virtual UClassID getDynamicClassID() const
+ * { return Derived::getStaticClassID(); }
+ * }
+ * \endcode
+ *
+ * Each concrete class implements getStaticClassID() as well, which allows
+ * clients to test for a specific type.
+ *
+ * \code
+ * class Derived {
+ * public:
+ * static UClassID U_EXPORT2 getStaticClassID();
+ * private:
+ * static char fgClassID;
+ * }
+ *
+ * // In Derived.cpp:
+ * UClassID Derived::getStaticClassID()
+ * { return (UClassID)&Derived::fgClassID; }
+ * char Derived::fgClassID = 0; // Value is irrelevant
+ * \endcode
+ * @stable ICU 2.0
+ */
+typedef void* UClassID;
+
+U_NAMESPACE_BEGIN
+
+/**
+ * UMemory is the common ICU base class.
+ * All other ICU C++ classes are derived from UMemory (starting with ICU 2.4).
+ *
+ * This is primarily to make it possible and simple to override the
+ * C++ memory management by adding new/delete operators to this base class.
+ *
+ * To override ALL ICU memory management, including that from plain C code,
+ * replace the allocation functions declared in cmemory.h
+ *
+ * UMemory does not contain any virtual functions.
+ * Common "boilerplate" functions are defined in UObject.
+ *
+ * @stable ICU 2.4
+ */
+class U_COMMON_API UMemory {
+public:
+
+/* test versions for debugging shaper heap memory problems */
+#ifdef SHAPER_MEMORY_DEBUG
+ static void * NewArray(int size, int count);
+ static void * GrowArray(void * array, int newSize );
+ static void FreeArray(void * array );
+#endif
+
+#if U_OVERRIDE_CXX_ALLOCATION
+ /**
+ * Override for ICU4C C++ memory management.
+ * simple, non-class types are allocated using the macros in common/cmemory.h
+ * (uprv_malloc(), uprv_free(), uprv_realloc());
+ * they or something else could be used here to implement C++ new/delete
+ * for ICU4C C++ classes
+ * @stable ICU 2.4
+ */
+ static void * U_EXPORT2 operator new(size_t size) U_NO_THROW;
+
+ /**
+ * Override for ICU4C C++ memory management.
+ * See new().
+ * @stable ICU 2.4
+ */
+ static void * U_EXPORT2 operator new[](size_t size) U_NO_THROW;
+
+ /**
+ * Override for ICU4C C++ memory management.
+ * simple, non-class types are allocated using the macros in common/cmemory.h
+ * (uprv_malloc(), uprv_free(), uprv_realloc());
+ * they or something else could be used here to implement C++ new/delete
+ * for ICU4C C++ classes
+ * @stable ICU 2.4
+ */
+ static void U_EXPORT2 operator delete(void *p) U_NO_THROW;
+
+ /**
+ * Override for ICU4C C++ memory management.
+ * See delete().
+ * @stable ICU 2.4
+ */
+ static void U_EXPORT2 operator delete[](void *p) U_NO_THROW;
+
+#if U_HAVE_PLACEMENT_NEW
+ /**
+ * Override for ICU4C C++ memory management for STL.
+ * See new().
+ * @stable ICU 2.6
+ */
+ static inline void * U_EXPORT2 operator new(size_t, void *ptr) U_NO_THROW { return ptr; }
+
+ /**
+ * Override for ICU4C C++ memory management for STL.
+ * See delete().
+ * @stable ICU 2.6
+ */
+ static inline void U_EXPORT2 operator delete(void *, void *) U_NO_THROW {}
+#endif /* U_HAVE_PLACEMENT_NEW */
+#if U_HAVE_DEBUG_LOCATION_NEW
+ /**
+ * This method overrides the MFC debug version of the operator new
+ *
+ * @param size The requested memory size
+ * @param file The file where the allocation was requested
+ * @param line The line where the allocation was requested
+ */
+ static void * U_EXPORT2 operator new(size_t size, const char* file, int line) U_NO_THROW;
+ /**
+ * This method provides a matching delete for the MFC debug new
+ *
+ * @param p The pointer to the allocated memory
+ * @param file The file where the allocation was requested
+ * @param line The line where the allocation was requested
+ */
+ static void U_EXPORT2 operator delete(void* p, const char* file, int line) U_NO_THROW;
+#endif /* U_HAVE_DEBUG_LOCATION_NEW */
+#endif /* U_OVERRIDE_CXX_ALLOCATION */
+
+ /*
+ * Assignment operator not declared. The compiler will provide one
+ * which does nothing since this class does not contain any data members.
+ * API/code coverage may show the assignment operator as present and
+ * untested - ignore.
+ * Subclasses need this assignment operator if they use compiler-provided
+ * assignment operators of their own. An alternative to not declaring one
+ * here would be to declare and empty-implement a protected or public one.
+ UMemory &UMemory::operator=(const UMemory &);
+ */
+};
+
+/**
+ * UObject is the common ICU "boilerplate" class.
+ * UObject inherits UMemory (starting with ICU 2.4),
+ * and all other public ICU C++ classes
+ * are derived from UObject (starting with ICU 2.2).
+ *
+ * UObject contains common virtual functions, in particular a virtual destructor.
+ *
+ * The clone() function is not available in UObject because it is not
+ * implemented by all ICU classes.
+ * Many ICU services provide a clone() function for their class trees,
+ * defined on the service's C++ base class, and all subclasses within that
+ * service class tree return a pointer to the service base class
+ * (which itself is a subclass of UObject).
+ * This is because some compilers do not support covariant (same-as-this)
+ * return types; cast to the appropriate subclass if necessary.
+ *
+ * @stable ICU 2.2
+ */
+class U_COMMON_API UObject : public UMemory {
+public:
+ /**
+ * Destructor.
+ *
+ * @stable ICU 2.2
+ */
+ virtual ~UObject();
+
+ /**
+ * ICU4C "poor man's RTTI", returns a UClassID for the actual ICU class.
+ * The base class implementation returns a dummy value.
+ *
+ * Use compiler RTTI rather than ICU's "poor man's RTTI".
+ * Since ICU 4.6, new ICU C++ class hierarchies do not implement "poor man's RTTI".
+ *
+ * @stable ICU 2.2
+ */
+ virtual UClassID getDynamicClassID() const;
+
+protected:
+ // the following functions are protected to prevent instantiation and
+ // direct use of UObject itself
+
+ // default constructor
+ // inline UObject() {}
+
+ // copy constructor
+ // inline UObject(const UObject &other) {}
+
+#if 0
+ // TODO Sometime in the future. Implement operator==().
+ // (This comment inserted in 2.2)
+ // some or all of the following "boilerplate" functions may be made public
+ // in a future ICU4C release when all subclasses implement them
+
+ // assignment operator
+ // (not virtual, see "Taligent's Guide to Designing Programs" pp.73..74)
+ // commented out because the implementation is the same as a compiler's default
+ // UObject &operator=(const UObject &other) { return *this; }
+
+ // comparison operators
+ virtual inline UBool operator==(const UObject &other) const { return this==&other; }
+ inline UBool operator!=(const UObject &other) const { return !operator==(other); }
+
+ // clone() commented out from the base class:
+ // some compilers do not support co-variant return types
+ // (i.e., subclasses would have to return UObject * as well, instead of SubClass *)
+ // see also UObject class documentation.
+ // virtual UObject *clone() const;
+#endif
+
+ /*
+ * Assignment operator not declared. The compiler will provide one
+ * which does nothing since this class does not contain any data members.
+ * API/code coverage may show the assignment operator as present and
+ * untested - ignore.
+ * Subclasses need this assignment operator if they use compiler-provided
+ * assignment operators of their own. An alternative to not declaring one
+ * here would be to declare and empty-implement a protected or public one.
+ UObject &UObject::operator=(const UObject &);
+ */
+};
+
+#ifndef U_HIDE_INTERNAL_API
+/**
+ * This is a simple macro to add ICU RTTI to an ICU object implementation.
+ * This does not go into the header. This should only be used in *.cpp files.
+ *
+ * @param myClass The name of the class that needs RTTI defined.
+ * @internal
+ */
+#define UOBJECT_DEFINE_RTTI_IMPLEMENTATION(myClass) \
+ UClassID U_EXPORT2 myClass::getStaticClassID() { \
+ static char classID = 0; \
+ return (UClassID)&classID; \
+ } \
+ UClassID myClass::getDynamicClassID() const \
+ { return myClass::getStaticClassID(); }
+
+
+/**
+ * This macro adds ICU RTTI to an ICU abstract class implementation.
+ * This macro should be invoked in *.cpp files. The corresponding
+ * header should declare getStaticClassID.
+ *
+ * @param myClass The name of the class that needs RTTI defined.
+ * @internal
+ */
+#define UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(myClass) \
+ UClassID U_EXPORT2 myClass::getStaticClassID() { \
+ static char classID = 0; \
+ return (UClassID)&classID; \
+ }
+
+#endif /* U_HIDE_INTERNAL_API */
+
+U_NAMESPACE_END
+
+#endif
diff --git a/Source/WebCore/icu/unicode/urename.h b/Source/WebCore/icu/unicode/urename.h
index 468bdbd0f..6b1f49098 100644
--- a/Source/WebCore/icu/unicode/urename.h
+++ b/Source/WebCore/icu/unicode/urename.h
@@ -1,6 +1,6 @@
/*
*******************************************************************************
-* Copyright (C) 2002-2010, International Business Machines
+* Copyright (C) 2002-2013, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
*
@@ -9,7 +9,7 @@
* tab size: 8 (not used)
* indentation:4
*
-* Created by: Perl script written by Vladimir Weinstein
+* Created by: Perl script tools/genren.pl written by Vladimir Weinstein
*
* Contains data for renaming ICU exports.
* Gets included by umachine.h
@@ -21,9 +21,13 @@
#ifndef URENAME_H
#define URENAME_H
-/* Uncomment the following line to disable renaming on platforms
- that do not use Autoconf. */
-/* #define U_DISABLE_RENAMING 1 */
+/* U_DISABLE_RENAMING can be defined in the following ways:
+ * - when running configure, e.g.
+ * runConfigureICU Linux --disable-renaming
+ * - by changing the default setting of U_DISABLE_RENAMING in uconfig.h
+ */
+
+#include "unicode/uconfig.h"
#if !U_DISABLE_RENAMING
@@ -31,6 +35,7 @@
the platform a chance to define it first.
Normally (if utypes.h or umachine.h was included first) this will not be necessary as it will already be defined.
*/
+
#ifndef U_ICU_ENTRY_POINT_RENAME
#include "unicode/umachine.h"
#endif
@@ -48,14 +53,9 @@
/* C exports renaming data */
-#define DECPOWERS U_ICU_ENTRY_POINT_RENAME(DECPOWERS)
-#define DECSTICKYTAB U_ICU_ENTRY_POINT_RENAME(DECSTICKYTAB)
-#define LNnn U_ICU_ENTRY_POINT_RENAME(LNnn)
#define T_CString_int64ToString U_ICU_ENTRY_POINT_RENAME(T_CString_int64ToString)
#define T_CString_integerToString U_ICU_ENTRY_POINT_RENAME(T_CString_integerToString)
-#define T_CString_stricmp U_ICU_ENTRY_POINT_RENAME(T_CString_stricmp)
#define T_CString_stringToInteger U_ICU_ENTRY_POINT_RENAME(T_CString_stringToInteger)
-#define T_CString_strnicmp U_ICU_ENTRY_POINT_RENAME(T_CString_strnicmp)
#define T_CString_toLowerCase U_ICU_ENTRY_POINT_RENAME(T_CString_toLowerCase)
#define T_CString_toUpperCase U_ICU_ENTRY_POINT_RENAME(T_CString_toUpperCase)
#define UCNV_FROM_U_CALLBACK_ESCAPE U_ICU_ENTRY_POINT_RENAME(UCNV_FROM_U_CALLBACK_ESCAPE)
@@ -75,6 +75,7 @@
#define _ASCIIData U_ICU_ENTRY_POINT_RENAME(_ASCIIData)
#define _Bocu1Data U_ICU_ENTRY_POINT_RENAME(_Bocu1Data)
#define _CESU8Data U_ICU_ENTRY_POINT_RENAME(_CESU8Data)
+#define _CompoundTextData U_ICU_ENTRY_POINT_RENAME(_CompoundTextData)
#define _HZData U_ICU_ENTRY_POINT_RENAME(_HZData)
#define _IMAPData U_ICU_ENTRY_POINT_RENAME(_IMAPData)
#define _ISCIIData U_ICU_ENTRY_POINT_RENAME(_ISCIIData)
@@ -102,21 +103,8 @@
#define _UTF32LEData U_ICU_ENTRY_POINT_RENAME(_UTF32LEData)
#define _UTF7Data U_ICU_ENTRY_POINT_RENAME(_UTF7Data)
#define _UTF8Data U_ICU_ENTRY_POINT_RENAME(_UTF8Data)
-#define bms_close U_ICU_ENTRY_POINT_RENAME(bms_close)
-#define bms_empty U_ICU_ENTRY_POINT_RENAME(bms_empty)
-#define bms_getData U_ICU_ENTRY_POINT_RENAME(bms_getData)
-#define bms_open U_ICU_ENTRY_POINT_RENAME(bms_open)
-#define bms_search U_ICU_ENTRY_POINT_RENAME(bms_search)
-#define bms_setTargetString U_ICU_ENTRY_POINT_RENAME(bms_setTargetString)
-#define buildWSConfusableData U_ICU_ENTRY_POINT_RENAME(buildWSConfusableData)
#define cmemory_cleanup U_ICU_ENTRY_POINT_RENAME(cmemory_cleanup)
#define cmemory_inUse U_ICU_ENTRY_POINT_RENAME(cmemory_inUse)
-#define d2utable U_ICU_ENTRY_POINT_RENAME(d2utable)
-#define deleteCEList U_ICU_ENTRY_POINT_RENAME(deleteCEList)
-#define deleteChars U_ICU_ENTRY_POINT_RENAME(deleteChars)
-#define deleteCollDataCacheEntry U_ICU_ENTRY_POINT_RENAME(deleteCollDataCacheEntry)
-#define deleteStringList U_ICU_ENTRY_POINT_RENAME(deleteStringList)
-#define deleteUnicodeStringKey U_ICU_ENTRY_POINT_RENAME(deleteUnicodeStringKey)
#define izrule_clone U_ICU_ENTRY_POINT_RENAME(izrule_clone)
#define izrule_close U_ICU_ENTRY_POINT_RENAME(izrule_close)
#define izrule_equals U_ICU_ENTRY_POINT_RENAME(izrule_equals)
@@ -212,8 +200,6 @@
#define res_load U_ICU_ENTRY_POINT_RENAME(res_load)
#define res_read U_ICU_ENTRY_POINT_RENAME(res_read)
#define res_unload U_ICU_ENTRY_POINT_RENAME(res_unload)
-#define tmutfmtHashTableValueComparator U_ICU_ENTRY_POINT_RENAME(tmutfmtHashTableValueComparator)
-#define triedict_swap U_ICU_ENTRY_POINT_RENAME(triedict_swap)
#define u_UCharsToChars U_ICU_ENTRY_POINT_RENAME(u_UCharsToChars)
#define u_austrcpy U_ICU_ENTRY_POINT_RENAME(u_austrcpy)
#define u_austrncpy U_ICU_ENTRY_POINT_RENAME(u_austrncpy)
@@ -239,6 +225,7 @@
#define u_feof U_ICU_ENTRY_POINT_RENAME(u_feof)
#define u_fflush U_ICU_ENTRY_POINT_RENAME(u_fflush)
#define u_fgetConverter U_ICU_ENTRY_POINT_RENAME(u_fgetConverter)
+#define u_fgetNumberFormat U_ICU_ENTRY_POINT_RENAME(u_fgetNumberFormat)
#define u_fgetc U_ICU_ENTRY_POINT_RENAME(u_fgetc)
#define u_fgetcodepage U_ICU_ENTRY_POINT_RENAME(u_fgetcodepage)
#define u_fgetcx U_ICU_ENTRY_POINT_RENAME(u_fgetcx)
@@ -267,6 +254,7 @@
#define u_fsettransliterator U_ICU_ENTRY_POINT_RENAME(u_fsettransliterator)
#define u_fstropen U_ICU_ENTRY_POINT_RENAME(u_fstropen)
#define u_fungetc U_ICU_ENTRY_POINT_RENAME(u_fungetc)
+#define u_getBidiPairedBracket U_ICU_ENTRY_POINT_RENAME(u_getBidiPairedBracket)
#define u_getCombiningClass U_ICU_ENTRY_POINT_RENAME(u_getCombiningClass)
#define u_getDataDirectory U_ICU_ENTRY_POINT_RENAME(u_getDataDirectory)
#define u_getDataVersion U_ICU_ENTRY_POINT_RENAME(u_getDataVersion)
@@ -276,6 +264,7 @@
#define u_getIntPropertyMaxValue U_ICU_ENTRY_POINT_RENAME(u_getIntPropertyMaxValue)
#define u_getIntPropertyMinValue U_ICU_ENTRY_POINT_RENAME(u_getIntPropertyMinValue)
#define u_getIntPropertyValue U_ICU_ENTRY_POINT_RENAME(u_getIntPropertyValue)
+#define u_getMainProperties U_ICU_ENTRY_POINT_RENAME(u_getMainProperties)
#define u_getNumericValue U_ICU_ENTRY_POINT_RENAME(u_getNumericValue)
#define u_getPropertyEnum U_ICU_ENTRY_POINT_RENAME(u_getPropertyEnum)
#define u_getPropertyName U_ICU_ENTRY_POINT_RENAME(u_getPropertyName)
@@ -284,9 +273,9 @@
#define u_getUnicodeProperties U_ICU_ENTRY_POINT_RENAME(u_getUnicodeProperties)
#define u_getUnicodeVersion U_ICU_ENTRY_POINT_RENAME(u_getUnicodeVersion)
#define u_getVersion U_ICU_ENTRY_POINT_RENAME(u_getVersion)
+#define u_get_stdout U_ICU_ENTRY_POINT_RENAME(u_get_stdout)
#define u_hasBinaryProperty U_ICU_ENTRY_POINT_RENAME(u_hasBinaryProperty)
#define u_init U_ICU_ENTRY_POINT_RENAME(u_init)
-#define u_isDataOlder U_ICU_ENTRY_POINT_RENAME(u_isDataOlder)
#define u_isIDIgnorable U_ICU_ENTRY_POINT_RENAME(u_isIDIgnorable)
#define u_isIDPart U_ICU_ENTRY_POINT_RENAME(u_isIDPart)
#define u_isIDStart U_ICU_ENTRY_POINT_RENAME(u_isIDStart)
@@ -318,7 +307,6 @@
#define u_istitle U_ICU_ENTRY_POINT_RENAME(u_istitle)
#define u_isupper U_ICU_ENTRY_POINT_RENAME(u_isupper)
#define u_isxdigit U_ICU_ENTRY_POINT_RENAME(u_isxdigit)
-#define u_lengthOfIdenticalLevelRun U_ICU_ENTRY_POINT_RENAME(u_lengthOfIdenticalLevelRun)
#define u_locbund_close U_ICU_ENTRY_POINT_RENAME(u_locbund_close)
#define u_locbund_getNumberFormat U_ICU_ENTRY_POINT_RENAME(u_locbund_getNumberFormat)
#define u_locbund_init U_ICU_ENTRY_POINT_RENAME(u_locbund_init)
@@ -334,7 +322,9 @@
#define u_memset U_ICU_ENTRY_POINT_RENAME(u_memset)
#define u_parseMessage U_ICU_ENTRY_POINT_RENAME(u_parseMessage)
#define u_parseMessageWithError U_ICU_ENTRY_POINT_RENAME(u_parseMessageWithError)
+#define u_printf U_ICU_ENTRY_POINT_RENAME(u_printf)
#define u_printf_parse U_ICU_ENTRY_POINT_RENAME(u_printf_parse)
+#define u_printf_u U_ICU_ENTRY_POINT_RENAME(u_printf_u)
#define u_releaseDefaultConverter U_ICU_ENTRY_POINT_RENAME(u_releaseDefaultConverter)
#define u_scanf_parse U_ICU_ENTRY_POINT_RENAME(u_scanf_parse)
#define u_setAtomicIncDecFunctions U_ICU_ENTRY_POINT_RENAME(u_setAtomicIncDecFunctions)
@@ -446,7 +436,10 @@
#define ubidi_getMaxValue U_ICU_ENTRY_POINT_RENAME(ubidi_getMaxValue)
#define ubidi_getMemory U_ICU_ENTRY_POINT_RENAME(ubidi_getMemory)
#define ubidi_getMirror U_ICU_ENTRY_POINT_RENAME(ubidi_getMirror)
+#define ubidi_getPairedBracket U_ICU_ENTRY_POINT_RENAME(ubidi_getPairedBracket)
+#define ubidi_getPairedBracketType U_ICU_ENTRY_POINT_RENAME(ubidi_getPairedBracketType)
#define ubidi_getParaLevel U_ICU_ENTRY_POINT_RENAME(ubidi_getParaLevel)
+#define ubidi_getParaLevelAtIndex U_ICU_ENTRY_POINT_RENAME(ubidi_getParaLevelAtIndex)
#define ubidi_getParagraph U_ICU_ENTRY_POINT_RENAME(ubidi_getParagraph)
#define ubidi_getParagraphByIndex U_ICU_ENTRY_POINT_RENAME(ubidi_getParagraphByIndex)
#define ubidi_getProcessedLength U_ICU_ENTRY_POINT_RENAME(ubidi_getProcessedLength)
@@ -471,6 +464,7 @@
#define ubidi_reorderLogical U_ICU_ENTRY_POINT_RENAME(ubidi_reorderLogical)
#define ubidi_reorderVisual U_ICU_ENTRY_POINT_RENAME(ubidi_reorderVisual)
#define ubidi_setClassCallback U_ICU_ENTRY_POINT_RENAME(ubidi_setClassCallback)
+#define ubidi_setContext U_ICU_ENTRY_POINT_RENAME(ubidi_setContext)
#define ubidi_setInverse U_ICU_ENTRY_POINT_RENAME(ubidi_setInverse)
#define ubidi_setLine U_ICU_ENTRY_POINT_RENAME(ubidi_setLine)
#define ubidi_setPara U_ICU_ENTRY_POINT_RENAME(ubidi_setPara)
@@ -495,6 +489,7 @@
#define ubrk_openRules U_ICU_ENTRY_POINT_RENAME(ubrk_openRules)
#define ubrk_preceding U_ICU_ENTRY_POINT_RENAME(ubrk_preceding)
#define ubrk_previous U_ICU_ENTRY_POINT_RENAME(ubrk_previous)
+#define ubrk_refreshUText U_ICU_ENTRY_POINT_RENAME(ubrk_refreshUText)
#define ubrk_safeClone U_ICU_ENTRY_POINT_RENAME(ubrk_safeClone)
#define ubrk_setText U_ICU_ENTRY_POINT_RENAME(ubrk_setText)
#define ubrk_setUText U_ICU_ENTRY_POINT_RENAME(ubrk_setUText)
@@ -513,6 +508,7 @@
#define ucal_getDSTSavings U_ICU_ENTRY_POINT_RENAME(ucal_getDSTSavings)
#define ucal_getDayOfWeekType U_ICU_ENTRY_POINT_RENAME(ucal_getDayOfWeekType)
#define ucal_getDefaultTimeZone U_ICU_ENTRY_POINT_RENAME(ucal_getDefaultTimeZone)
+#define ucal_getFieldDifference U_ICU_ENTRY_POINT_RENAME(ucal_getFieldDifference)
#define ucal_getGregorianChange U_ICU_ENTRY_POINT_RENAME(ucal_getGregorianChange)
#define ucal_getKeywordValuesForLocale U_ICU_ENTRY_POINT_RENAME(ucal_getKeywordValuesForLocale)
#define ucal_getLimit U_ICU_ENTRY_POINT_RENAME(ucal_getLimit)
@@ -521,13 +517,18 @@
#define ucal_getNow U_ICU_ENTRY_POINT_RENAME(ucal_getNow)
#define ucal_getTZDataVersion U_ICU_ENTRY_POINT_RENAME(ucal_getTZDataVersion)
#define ucal_getTimeZoneDisplayName U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneDisplayName)
+#define ucal_getTimeZoneID U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneID)
+#define ucal_getTimeZoneIDForWindowsID U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneIDForWindowsID)
+#define ucal_getTimeZoneTransitionDate U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneTransitionDate)
#define ucal_getType U_ICU_ENTRY_POINT_RENAME(ucal_getType)
#define ucal_getWeekendTransition U_ICU_ENTRY_POINT_RENAME(ucal_getWeekendTransition)
+#define ucal_getWindowsTimeZoneID U_ICU_ENTRY_POINT_RENAME(ucal_getWindowsTimeZoneID)
#define ucal_inDaylightTime U_ICU_ENTRY_POINT_RENAME(ucal_inDaylightTime)
#define ucal_isSet U_ICU_ENTRY_POINT_RENAME(ucal_isSet)
#define ucal_isWeekend U_ICU_ENTRY_POINT_RENAME(ucal_isWeekend)
#define ucal_open U_ICU_ENTRY_POINT_RENAME(ucal_open)
#define ucal_openCountryTimeZones U_ICU_ENTRY_POINT_RENAME(ucal_openCountryTimeZones)
+#define ucal_openTimeZoneIDEnumeration U_ICU_ENTRY_POINT_RENAME(ucal_openTimeZoneIDEnumeration)
#define ucal_openTimeZones U_ICU_ENTRY_POINT_RENAME(ucal_openTimeZones)
#define ucal_roll U_ICU_ENTRY_POINT_RENAME(ucal_roll)
#define ucal_set U_ICU_ENTRY_POINT_RENAME(ucal_set)
@@ -560,6 +561,8 @@
#define ucasemap_getBreakIterator U_ICU_ENTRY_POINT_RENAME(ucasemap_getBreakIterator)
#define ucasemap_getLocale U_ICU_ENTRY_POINT_RENAME(ucasemap_getLocale)
#define ucasemap_getOptions U_ICU_ENTRY_POINT_RENAME(ucasemap_getOptions)
+#define ucasemap_internalUTF8ToTitle U_ICU_ENTRY_POINT_RENAME(ucasemap_internalUTF8ToTitle)
+#define ucasemap_mapUTF8 U_ICU_ENTRY_POINT_RENAME(ucasemap_mapUTF8)
#define ucasemap_open U_ICU_ENTRY_POINT_RENAME(ucasemap_open)
#define ucasemap_setBreakIterator U_ICU_ENTRY_POINT_RENAME(ucasemap_setBreakIterator)
#define ucasemap_setLocale U_ICU_ENTRY_POINT_RENAME(ucasemap_setLocale)
@@ -569,11 +572,6 @@
#define ucasemap_utf8ToLower U_ICU_ENTRY_POINT_RENAME(ucasemap_utf8ToLower)
#define ucasemap_utf8ToTitle U_ICU_ENTRY_POINT_RENAME(ucasemap_utf8ToTitle)
#define ucasemap_utf8ToUpper U_ICU_ENTRY_POINT_RENAME(ucasemap_utf8ToUpper)
-#define ucd_close U_ICU_ENTRY_POINT_RENAME(ucd_close)
-#define ucd_flushCache U_ICU_ENTRY_POINT_RENAME(ucd_flushCache)
-#define ucd_freeCache U_ICU_ENTRY_POINT_RENAME(ucd_freeCache)
-#define ucd_getCollator U_ICU_ENTRY_POINT_RENAME(ucd_getCollator)
-#define ucd_open U_ICU_ENTRY_POINT_RENAME(ucd_open)
#define uchar_addPropertyStarts U_ICU_ENTRY_POINT_RENAME(uchar_addPropertyStarts)
#define uchar_swapNames U_ICU_ENTRY_POINT_RENAME(uchar_swapNames)
#define ucln_cleanupOne U_ICU_ENTRY_POINT_RENAME(ucln_cleanupOne)
@@ -656,6 +654,7 @@
#define ucnv_io_stripASCIIForCompare U_ICU_ENTRY_POINT_RENAME(ucnv_io_stripASCIIForCompare)
#define ucnv_io_stripEBCDICForCompare U_ICU_ENTRY_POINT_RENAME(ucnv_io_stripEBCDICForCompare)
#define ucnv_isAmbiguous U_ICU_ENTRY_POINT_RENAME(ucnv_isAmbiguous)
+#define ucnv_isFixedWidth U_ICU_ENTRY_POINT_RENAME(ucnv_isFixedWidth)
#define ucnv_load U_ICU_ENTRY_POINT_RENAME(ucnv_load)
#define ucnv_loadSharedData U_ICU_ENTRY_POINT_RENAME(ucnv_loadSharedData)
#define ucnv_open U_ICU_ENTRY_POINT_RENAME(ucnv_open)
@@ -697,7 +696,6 @@
#define ucol_calcSortKey U_ICU_ENTRY_POINT_RENAME(ucol_calcSortKey)
#define ucol_calcSortKeySimpleTertiary U_ICU_ENTRY_POINT_RENAME(ucol_calcSortKeySimpleTertiary)
#define ucol_cloneBinary U_ICU_ENTRY_POINT_RENAME(ucol_cloneBinary)
-#define ucol_cloneRuleData U_ICU_ENTRY_POINT_RENAME(ucol_cloneRuleData)
#define ucol_close U_ICU_ENTRY_POINT_RENAME(ucol_close)
#define ucol_closeElements U_ICU_ENTRY_POINT_RENAME(ucol_closeElements)
#define ucol_countAvailable U_ICU_ENTRY_POINT_RENAME(ucol_countAvailable)
@@ -714,9 +712,11 @@
#define ucol_getAvailable U_ICU_ENTRY_POINT_RENAME(ucol_getAvailable)
#define ucol_getBound U_ICU_ENTRY_POINT_RENAME(ucol_getBound)
#define ucol_getCEStrengthDifference U_ICU_ENTRY_POINT_RENAME(ucol_getCEStrengthDifference)
+#define ucol_getCollationKey U_ICU_ENTRY_POINT_RENAME(ucol_getCollationKey)
#define ucol_getContractions U_ICU_ENTRY_POINT_RENAME(ucol_getContractions)
#define ucol_getContractionsAndExpansions U_ICU_ENTRY_POINT_RENAME(ucol_getContractionsAndExpansions)
#define ucol_getDisplayName U_ICU_ENTRY_POINT_RENAME(ucol_getDisplayName)
+#define ucol_getEquivalentReorderCodes U_ICU_ENTRY_POINT_RENAME(ucol_getEquivalentReorderCodes)
#define ucol_getFirstCE U_ICU_ENTRY_POINT_RENAME(ucol_getFirstCE)
#define ucol_getFunctionalEquivalent U_ICU_ENTRY_POINT_RENAME(ucol_getFunctionalEquivalent)
#define ucol_getKeywordValues U_ICU_ENTRY_POINT_RENAME(ucol_getKeywordValues)
@@ -735,8 +735,6 @@
#define ucol_getRulesEx U_ICU_ENTRY_POINT_RENAME(ucol_getRulesEx)
#define ucol_getShortDefinitionString U_ICU_ENTRY_POINT_RENAME(ucol_getShortDefinitionString)
#define ucol_getSortKey U_ICU_ENTRY_POINT_RENAME(ucol_getSortKey)
-#define ucol_getSortKeySize U_ICU_ENTRY_POINT_RENAME(ucol_getSortKeySize)
-#define ucol_getSortKeyWithAllocation U_ICU_ENTRY_POINT_RENAME(ucol_getSortKeyWithAllocation)
#define ucol_getStrength U_ICU_ENTRY_POINT_RENAME(ucol_getStrength)
#define ucol_getTailoredSet U_ICU_ENTRY_POINT_RENAME(ucol_getTailoredSet)
#define ucol_getUCAVersion U_ICU_ENTRY_POINT_RENAME(ucol_getUCAVersion)
@@ -788,6 +786,7 @@
#define ucol_setVariableTop U_ICU_ENTRY_POINT_RENAME(ucol_setVariableTop)
#define ucol_strcoll U_ICU_ENTRY_POINT_RENAME(ucol_strcoll)
#define ucol_strcollIter U_ICU_ENTRY_POINT_RENAME(ucol_strcollIter)
+#define ucol_strcollUTF8 U_ICU_ENTRY_POINT_RENAME(ucol_strcollUTF8)
#define ucol_swap U_ICU_ENTRY_POINT_RENAME(ucol_swap)
#define ucol_swapBinary U_ICU_ENTRY_POINT_RENAME(ucol_swapBinary)
#define ucol_swapInverseUCA U_ICU_ENTRY_POINT_RENAME(ucol_swapInverseUCA)
@@ -805,12 +804,14 @@
#define ucsdet_enableInputFilter U_ICU_ENTRY_POINT_RENAME(ucsdet_enableInputFilter)
#define ucsdet_getAllDetectableCharsets U_ICU_ENTRY_POINT_RENAME(ucsdet_getAllDetectableCharsets)
#define ucsdet_getConfidence U_ICU_ENTRY_POINT_RENAME(ucsdet_getConfidence)
+#define ucsdet_getDetectableCharsets U_ICU_ENTRY_POINT_RENAME(ucsdet_getDetectableCharsets)
#define ucsdet_getLanguage U_ICU_ENTRY_POINT_RENAME(ucsdet_getLanguage)
#define ucsdet_getName U_ICU_ENTRY_POINT_RENAME(ucsdet_getName)
#define ucsdet_getUChars U_ICU_ENTRY_POINT_RENAME(ucsdet_getUChars)
#define ucsdet_isInputFilterEnabled U_ICU_ENTRY_POINT_RENAME(ucsdet_isInputFilterEnabled)
#define ucsdet_open U_ICU_ENTRY_POINT_RENAME(ucsdet_open)
#define ucsdet_setDeclaredEncoding U_ICU_ENTRY_POINT_RENAME(ucsdet_setDeclaredEncoding)
+#define ucsdet_setDetectableCharset U_ICU_ENTRY_POINT_RENAME(ucsdet_setDetectableCharset)
#define ucsdet_setText U_ICU_ENTRY_POINT_RENAME(ucsdet_setText)
#define ucurr_countCurrencies U_ICU_ENTRY_POINT_RENAME(ucurr_countCurrencies)
#define ucurr_forLocale U_ICU_ENTRY_POINT_RENAME(ucurr_forLocale)
@@ -818,8 +819,10 @@
#define ucurr_getDefaultFractionDigits U_ICU_ENTRY_POINT_RENAME(ucurr_getDefaultFractionDigits)
#define ucurr_getKeywordValuesForLocale U_ICU_ENTRY_POINT_RENAME(ucurr_getKeywordValuesForLocale)
#define ucurr_getName U_ICU_ENTRY_POINT_RENAME(ucurr_getName)
+#define ucurr_getNumericCode U_ICU_ENTRY_POINT_RENAME(ucurr_getNumericCode)
#define ucurr_getPluralName U_ICU_ENTRY_POINT_RENAME(ucurr_getPluralName)
#define ucurr_getRoundingIncrement U_ICU_ENTRY_POINT_RENAME(ucurr_getRoundingIncrement)
+#define ucurr_isAvailable U_ICU_ENTRY_POINT_RENAME(ucurr_isAvailable)
#define ucurr_openISOCurrencies U_ICU_ENTRY_POINT_RENAME(ucurr_openISOCurrencies)
#define ucurr_register U_ICU_ENTRY_POINT_RENAME(ucurr_register)
#define ucurr_unregister U_ICU_ENTRY_POINT_RENAME(ucurr_unregister)
@@ -833,6 +836,7 @@
#define udat_get2DigitYearStart U_ICU_ENTRY_POINT_RENAME(udat_get2DigitYearStart)
#define udat_getAvailable U_ICU_ENTRY_POINT_RENAME(udat_getAvailable)
#define udat_getCalendar U_ICU_ENTRY_POINT_RENAME(udat_getCalendar)
+#define udat_getContext U_ICU_ENTRY_POINT_RENAME(udat_getContext)
#define udat_getLocaleByType U_ICU_ENTRY_POINT_RENAME(udat_getLocaleByType)
#define udat_getNumberFormat U_ICU_ENTRY_POINT_RENAME(udat_getNumberFormat)
#define udat_getSymbols U_ICU_ENTRY_POINT_RENAME(udat_getSymbols)
@@ -840,8 +844,10 @@
#define udat_open U_ICU_ENTRY_POINT_RENAME(udat_open)
#define udat_parse U_ICU_ENTRY_POINT_RENAME(udat_parse)
#define udat_parseCalendar U_ICU_ENTRY_POINT_RENAME(udat_parseCalendar)
+#define udat_registerOpener U_ICU_ENTRY_POINT_RENAME(udat_registerOpener)
#define udat_set2DigitYearStart U_ICU_ENTRY_POINT_RENAME(udat_set2DigitYearStart)
#define udat_setCalendar U_ICU_ENTRY_POINT_RENAME(udat_setCalendar)
+#define udat_setContext U_ICU_ENTRY_POINT_RENAME(udat_setContext)
#define udat_setLenient U_ICU_ENTRY_POINT_RENAME(udat_setLenient)
#define udat_setNumberFormat U_ICU_ENTRY_POINT_RENAME(udat_setNumberFormat)
#define udat_setSymbols U_ICU_ENTRY_POINT_RENAME(udat_setSymbols)
@@ -849,6 +855,7 @@
#define udat_toPattern U_ICU_ENTRY_POINT_RENAME(udat_toPattern)
#define udat_toPatternRelativeDate U_ICU_ENTRY_POINT_RENAME(udat_toPatternRelativeDate)
#define udat_toPatternRelativeTime U_ICU_ENTRY_POINT_RENAME(udat_toPatternRelativeTime)
+#define udat_unregisterOpener U_ICU_ENTRY_POINT_RENAME(udat_unregisterOpener)
#define udata_checkCommonData U_ICU_ENTRY_POINT_RENAME(udata_checkCommonData)
#define udata_close U_ICU_ENTRY_POINT_RENAME(udata_close)
#define udata_closeSwapper U_ICU_ENTRY_POINT_RENAME(udata_closeSwapper)
@@ -892,12 +899,17 @@
#define udatpg_setAppendItemName U_ICU_ENTRY_POINT_RENAME(udatpg_setAppendItemName)
#define udatpg_setDateTimeFormat U_ICU_ENTRY_POINT_RENAME(udatpg_setDateTimeFormat)
#define udatpg_setDecimal U_ICU_ENTRY_POINT_RENAME(udatpg_setDecimal)
+#define udict_swap U_ICU_ENTRY_POINT_RENAME(udict_swap)
+#define udtitvfmt_close U_ICU_ENTRY_POINT_RENAME(udtitvfmt_close)
+#define udtitvfmt_format U_ICU_ENTRY_POINT_RENAME(udtitvfmt_format)
+#define udtitvfmt_open U_ICU_ENTRY_POINT_RENAME(udtitvfmt_open)
#define uenum_close U_ICU_ENTRY_POINT_RENAME(uenum_close)
#define uenum_count U_ICU_ENTRY_POINT_RENAME(uenum_count)
#define uenum_next U_ICU_ENTRY_POINT_RENAME(uenum_next)
#define uenum_nextDefault U_ICU_ENTRY_POINT_RENAME(uenum_nextDefault)
#define uenum_openCharStringsEnumeration U_ICU_ENTRY_POINT_RENAME(uenum_openCharStringsEnumeration)
#define uenum_openFromStringEnumeration U_ICU_ENTRY_POINT_RENAME(uenum_openFromStringEnumeration)
+#define uenum_openUCharStringsEnumeration U_ICU_ENTRY_POINT_RENAME(uenum_openUCharStringsEnumeration)
#define uenum_reset U_ICU_ENTRY_POINT_RENAME(uenum_reset)
#define uenum_unext U_ICU_ENTRY_POINT_RENAME(uenum_unext)
#define uenum_unextDefault U_ICU_ENTRY_POINT_RENAME(uenum_unextDefault)
@@ -908,34 +920,49 @@
#define ufile_getch U_ICU_ENTRY_POINT_RENAME(ufile_getch)
#define ufile_getch32 U_ICU_ENTRY_POINT_RENAME(ufile_getch32)
#define ufmt_64tou U_ICU_ENTRY_POINT_RENAME(ufmt_64tou)
+#define ufmt_close U_ICU_ENTRY_POINT_RENAME(ufmt_close)
#define ufmt_defaultCPToUnicode U_ICU_ENTRY_POINT_RENAME(ufmt_defaultCPToUnicode)
#define ufmt_digitvalue U_ICU_ENTRY_POINT_RENAME(ufmt_digitvalue)
+#define ufmt_getArrayItemByIndex U_ICU_ENTRY_POINT_RENAME(ufmt_getArrayItemByIndex)
+#define ufmt_getArrayLength U_ICU_ENTRY_POINT_RENAME(ufmt_getArrayLength)
+#define ufmt_getDate U_ICU_ENTRY_POINT_RENAME(ufmt_getDate)
+#define ufmt_getDecNumChars U_ICU_ENTRY_POINT_RENAME(ufmt_getDecNumChars)
+#define ufmt_getDouble U_ICU_ENTRY_POINT_RENAME(ufmt_getDouble)
+#define ufmt_getInt64 U_ICU_ENTRY_POINT_RENAME(ufmt_getInt64)
+#define ufmt_getLong U_ICU_ENTRY_POINT_RENAME(ufmt_getLong)
+#define ufmt_getObject U_ICU_ENTRY_POINT_RENAME(ufmt_getObject)
+#define ufmt_getType U_ICU_ENTRY_POINT_RENAME(ufmt_getType)
+#define ufmt_getUChars U_ICU_ENTRY_POINT_RENAME(ufmt_getUChars)
+#define ufmt_isNumeric U_ICU_ENTRY_POINT_RENAME(ufmt_isNumeric)
#define ufmt_isdigit U_ICU_ENTRY_POINT_RENAME(ufmt_isdigit)
+#define ufmt_open U_ICU_ENTRY_POINT_RENAME(ufmt_open)
#define ufmt_ptou U_ICU_ENTRY_POINT_RENAME(ufmt_ptou)
#define ufmt_uto64 U_ICU_ENTRY_POINT_RENAME(ufmt_uto64)
#define ufmt_utop U_ICU_ENTRY_POINT_RENAME(ufmt_utop)
+#define ugender_getInstance U_ICU_ENTRY_POINT_RENAME(ugender_getInstance)
+#define ugender_getListGender U_ICU_ENTRY_POINT_RENAME(ugender_getListGender)
#define uhash_close U_ICU_ENTRY_POINT_RENAME(uhash_close)
#define uhash_compareCaselessUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_compareCaselessUnicodeString)
#define uhash_compareChars U_ICU_ENTRY_POINT_RENAME(uhash_compareChars)
#define uhash_compareIChars U_ICU_ENTRY_POINT_RENAME(uhash_compareIChars)
#define uhash_compareLong U_ICU_ENTRY_POINT_RENAME(uhash_compareLong)
+#define uhash_compareScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_compareScriptSet)
#define uhash_compareUChars U_ICU_ENTRY_POINT_RENAME(uhash_compareUChars)
#define uhash_compareUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_compareUnicodeString)
#define uhash_count U_ICU_ENTRY_POINT_RENAME(uhash_count)
#define uhash_deleteHashtable U_ICU_ENTRY_POINT_RENAME(uhash_deleteHashtable)
-#define uhash_deleteUObject U_ICU_ENTRY_POINT_RENAME(uhash_deleteUObject)
-#define uhash_deleteUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_deleteUnicodeString)
+#define uhash_deleteScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_deleteScriptSet)
#define uhash_equals U_ICU_ENTRY_POINT_RENAME(uhash_equals)
+#define uhash_equalsScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_equalsScriptSet)
#define uhash_find U_ICU_ENTRY_POINT_RENAME(uhash_find)
-#define uhash_freeBlock U_ICU_ENTRY_POINT_RENAME(uhash_freeBlock)
#define uhash_get U_ICU_ENTRY_POINT_RENAME(uhash_get)
#define uhash_geti U_ICU_ENTRY_POINT_RENAME(uhash_geti)
#define uhash_hashCaselessUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_hashCaselessUnicodeString)
#define uhash_hashChars U_ICU_ENTRY_POINT_RENAME(uhash_hashChars)
#define uhash_hashIChars U_ICU_ENTRY_POINT_RENAME(uhash_hashIChars)
#define uhash_hashLong U_ICU_ENTRY_POINT_RENAME(uhash_hashLong)
+#define uhash_hashScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_hashScriptSet)
#define uhash_hashUChars U_ICU_ENTRY_POINT_RENAME(uhash_hashUChars)
-#define uhash_hashUCharsN U_ICU_ENTRY_POINT_RENAME(uhash_hashUCharsN)
#define uhash_hashUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_hashUnicodeString)
#define uhash_iget U_ICU_ENTRY_POINT_RENAME(uhash_iget)
#define uhash_igeti U_ICU_ENTRY_POINT_RENAME(uhash_igeti)
@@ -985,6 +1012,7 @@
#define uiter_setUTF16BE U_ICU_ENTRY_POINT_RENAME(uiter_setUTF16BE)
#define uiter_setUTF8 U_ICU_ENTRY_POINT_RENAME(uiter_setUTF8)
#define uldn_close U_ICU_ENTRY_POINT_RENAME(uldn_close)
+#define uldn_getContext U_ICU_ENTRY_POINT_RENAME(uldn_getContext)
#define uldn_getDialectHandling U_ICU_ENTRY_POINT_RENAME(uldn_getDialectHandling)
#define uldn_getLocale U_ICU_ENTRY_POINT_RENAME(uldn_getLocale)
#define uldn_keyDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_keyDisplayName)
@@ -992,6 +1020,7 @@
#define uldn_languageDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_languageDisplayName)
#define uldn_localeDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_localeDisplayName)
#define uldn_open U_ICU_ENTRY_POINT_RENAME(uldn_open)
+#define uldn_openForContext U_ICU_ENTRY_POINT_RENAME(uldn_openForContext)
#define uldn_regionDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_regionDisplayName)
#define uldn_scriptCodeDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_scriptCodeDisplayName)
#define uldn_scriptDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_scriptDisplayName)
@@ -1028,6 +1057,7 @@
#define uloc_getDisplayLanguage U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayLanguage)
#define uloc_getDisplayName U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayName)
#define uloc_getDisplayScript U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayScript)
+#define uloc_getDisplayScriptInContext U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayScriptInContext)
#define uloc_getDisplayVariant U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayVariant)
#define uloc_getISO3Country U_ICU_ENTRY_POINT_RENAME(uloc_getISO3Country)
#define uloc_getISO3Language U_ICU_ENTRY_POINT_RENAME(uloc_getISO3Language)
@@ -1075,18 +1105,21 @@
#define umsg_toPattern U_ICU_ENTRY_POINT_RENAME(umsg_toPattern)
#define umsg_vformat U_ICU_ENTRY_POINT_RENAME(umsg_vformat)
#define umsg_vparse U_ICU_ENTRY_POINT_RENAME(umsg_vparse)
-#define umtx_atomic_dec U_ICU_ENTRY_POINT_RENAME(umtx_atomic_dec)
-#define umtx_atomic_inc U_ICU_ENTRY_POINT_RENAME(umtx_atomic_inc)
-#define umtx_cleanup U_ICU_ENTRY_POINT_RENAME(umtx_cleanup)
-#define umtx_destroy U_ICU_ENTRY_POINT_RENAME(umtx_destroy)
-#define umtx_init U_ICU_ENTRY_POINT_RENAME(umtx_init)
#define umtx_lock U_ICU_ENTRY_POINT_RENAME(umtx_lock)
#define umtx_unlock U_ICU_ENTRY_POINT_RENAME(umtx_unlock)
#define uniset_getUnicode32Instance U_ICU_ENTRY_POINT_RENAME(uniset_getUnicode32Instance)
#define unorm2_append U_ICU_ENTRY_POINT_RENAME(unorm2_append)
#define unorm2_close U_ICU_ENTRY_POINT_RENAME(unorm2_close)
+#define unorm2_composePair U_ICU_ENTRY_POINT_RENAME(unorm2_composePair)
+#define unorm2_getCombiningClass U_ICU_ENTRY_POINT_RENAME(unorm2_getCombiningClass)
#define unorm2_getDecomposition U_ICU_ENTRY_POINT_RENAME(unorm2_getDecomposition)
#define unorm2_getInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getInstance)
+#define unorm2_getNFCInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFCInstance)
+#define unorm2_getNFDInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFDInstance)
+#define unorm2_getNFKCCasefoldInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFKCCasefoldInstance)
+#define unorm2_getNFKCInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFKCInstance)
+#define unorm2_getNFKDInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFKDInstance)
+#define unorm2_getRawDecomposition U_ICU_ENTRY_POINT_RENAME(unorm2_getRawDecomposition)
#define unorm2_hasBoundaryAfter U_ICU_ENTRY_POINT_RENAME(unorm2_hasBoundaryAfter)
#define unorm2_hasBoundaryBefore U_ICU_ENTRY_POINT_RENAME(unorm2_hasBoundaryBefore)
#define unorm2_isInert U_ICU_ENTRY_POINT_RENAME(unorm2_isInert)
@@ -1100,7 +1133,7 @@
#define unorm_closeIter U_ICU_ENTRY_POINT_RENAME(unorm_closeIter)
#define unorm_compare U_ICU_ENTRY_POINT_RENAME(unorm_compare)
#define unorm_concatenate U_ICU_ENTRY_POINT_RENAME(unorm_concatenate)
-#define unorm_getFCDTrieIndex U_ICU_ENTRY_POINT_RENAME(unorm_getFCDTrieIndex)
+#define unorm_getFCD16 U_ICU_ENTRY_POINT_RENAME(unorm_getFCD16)
#define unorm_getQuickCheck U_ICU_ENTRY_POINT_RENAME(unorm_getQuickCheck)
#define unorm_isNormalized U_ICU_ENTRY_POINT_RENAME(unorm_isNormalized)
#define unorm_isNormalizedWithOptions U_ICU_ENTRY_POINT_RENAME(unorm_isNormalizedWithOptions)
@@ -1120,6 +1153,7 @@
#define unum_formatDouble U_ICU_ENTRY_POINT_RENAME(unum_formatDouble)
#define unum_formatDoubleCurrency U_ICU_ENTRY_POINT_RENAME(unum_formatDoubleCurrency)
#define unum_formatInt64 U_ICU_ENTRY_POINT_RENAME(unum_formatInt64)
+#define unum_formatUFormattable U_ICU_ENTRY_POINT_RENAME(unum_formatUFormattable)
#define unum_getAttribute U_ICU_ENTRY_POINT_RENAME(unum_getAttribute)
#define unum_getAvailable U_ICU_ENTRY_POINT_RENAME(unum_getAvailable)
#define unum_getDoubleAttribute U_ICU_ENTRY_POINT_RENAME(unum_getDoubleAttribute)
@@ -1132,11 +1166,24 @@
#define unum_parseDouble U_ICU_ENTRY_POINT_RENAME(unum_parseDouble)
#define unum_parseDoubleCurrency U_ICU_ENTRY_POINT_RENAME(unum_parseDoubleCurrency)
#define unum_parseInt64 U_ICU_ENTRY_POINT_RENAME(unum_parseInt64)
+#define unum_parseToUFormattable U_ICU_ENTRY_POINT_RENAME(unum_parseToUFormattable)
#define unum_setAttribute U_ICU_ENTRY_POINT_RENAME(unum_setAttribute)
#define unum_setDoubleAttribute U_ICU_ENTRY_POINT_RENAME(unum_setDoubleAttribute)
#define unum_setSymbol U_ICU_ENTRY_POINT_RENAME(unum_setSymbol)
#define unum_setTextAttribute U_ICU_ENTRY_POINT_RENAME(unum_setTextAttribute)
#define unum_toPattern U_ICU_ENTRY_POINT_RENAME(unum_toPattern)
+#define unumsys_close U_ICU_ENTRY_POINT_RENAME(unumsys_close)
+#define unumsys_getDescription U_ICU_ENTRY_POINT_RENAME(unumsys_getDescription)
+#define unumsys_getName U_ICU_ENTRY_POINT_RENAME(unumsys_getName)
+#define unumsys_getRadix U_ICU_ENTRY_POINT_RENAME(unumsys_getRadix)
+#define unumsys_isAlgorithmic U_ICU_ENTRY_POINT_RENAME(unumsys_isAlgorithmic)
+#define unumsys_open U_ICU_ENTRY_POINT_RENAME(unumsys_open)
+#define unumsys_openAvailableNames U_ICU_ENTRY_POINT_RENAME(unumsys_openAvailableNames)
+#define unumsys_openByName U_ICU_ENTRY_POINT_RENAME(unumsys_openByName)
+#define uplrules_close U_ICU_ENTRY_POINT_RENAME(uplrules_close)
+#define uplrules_open U_ICU_ENTRY_POINT_RENAME(uplrules_open)
+#define uplrules_openForType U_ICU_ENTRY_POINT_RENAME(uplrules_openForType)
+#define uplrules_select U_ICU_ENTRY_POINT_RENAME(uplrules_select)
#define uplug_closeLibrary U_ICU_ENTRY_POINT_RENAME(uplug_closeLibrary)
#define uplug_findLibrary U_ICU_ENTRY_POINT_RENAME(uplug_findLibrary)
#define uplug_getConfiguration U_ICU_ENTRY_POINT_RENAME(uplug_getConfiguration)
@@ -1160,12 +1207,12 @@
#define uplug_setPlugLevel U_ICU_ENTRY_POINT_RENAME(uplug_setPlugLevel)
#define uplug_setPlugName U_ICU_ENTRY_POINT_RENAME(uplug_setPlugName)
#define uplug_setPlugNoUnload U_ICU_ENTRY_POINT_RENAME(uplug_setPlugNoUnload)
-#define upname_swap U_ICU_ENTRY_POINT_RENAME(upname_swap)
#define uprops_getSource U_ICU_ENTRY_POINT_RENAME(uprops_getSource)
#define upropsvec_addPropertyStarts U_ICU_ENTRY_POINT_RENAME(upropsvec_addPropertyStarts)
#define uprv_aestrncpy U_ICU_ENTRY_POINT_RENAME(uprv_aestrncpy)
#define uprv_asciiFromEbcdic U_ICU_ENTRY_POINT_RENAME(uprv_asciiFromEbcdic)
#define uprv_asciitolower U_ICU_ENTRY_POINT_RENAME(uprv_asciitolower)
+#define uprv_calloc U_ICU_ENTRY_POINT_RENAME(uprv_calloc)
#define uprv_ceil U_ICU_ENTRY_POINT_RENAME(uprv_ceil)
#define uprv_cnttab_addContraction U_ICU_ENTRY_POINT_RENAME(uprv_cnttab_addContraction)
#define uprv_cnttab_changeContraction U_ICU_ENTRY_POINT_RENAME(uprv_cnttab_changeContraction)
@@ -1202,7 +1249,6 @@
#define uprv_decContextSetStatusFromStringQuiet U_ICU_ENTRY_POINT_RENAME(uprv_decContextSetStatusFromStringQuiet)
#define uprv_decContextSetStatusQuiet U_ICU_ENTRY_POINT_RENAME(uprv_decContextSetStatusQuiet)
#define uprv_decContextStatusToString U_ICU_ENTRY_POINT_RENAME(uprv_decContextStatusToString)
-#define uprv_decContextTestEndian U_ICU_ENTRY_POINT_RENAME(uprv_decContextTestEndian)
#define uprv_decContextTestSavedStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextTestSavedStatus)
#define uprv_decContextTestStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextTestStatus)
#define uprv_decContextZeroStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextZeroStatus)
@@ -1268,12 +1314,14 @@
#define uprv_decNumberVersion U_ICU_ENTRY_POINT_RENAME(uprv_decNumberVersion)
#define uprv_decNumberXor U_ICU_ENTRY_POINT_RENAME(uprv_decNumberXor)
#define uprv_decNumberZero U_ICU_ENTRY_POINT_RENAME(uprv_decNumberZero)
+#define uprv_deleteUObject U_ICU_ENTRY_POINT_RENAME(uprv_deleteUObject)
#define uprv_delete_collIterate U_ICU_ENTRY_POINT_RENAME(uprv_delete_collIterate)
#define uprv_dl_close U_ICU_ENTRY_POINT_RENAME(uprv_dl_close)
#define uprv_dl_open U_ICU_ENTRY_POINT_RENAME(uprv_dl_open)
-#define uprv_dl_sym U_ICU_ENTRY_POINT_RENAME(uprv_dl_sym)
+#define uprv_dlsym_func U_ICU_ENTRY_POINT_RENAME(uprv_dlsym_func)
#define uprv_eastrncpy U_ICU_ENTRY_POINT_RENAME(uprv_eastrncpy)
#define uprv_ebcdicFromAscii U_ICU_ENTRY_POINT_RENAME(uprv_ebcdicFromAscii)
+#define uprv_ebcdicToLowercaseAscii U_ICU_ENTRY_POINT_RENAME(uprv_ebcdicToLowercaseAscii)
#define uprv_ebcdictolower U_ICU_ENTRY_POINT_RENAME(uprv_ebcdictolower)
#define uprv_fabs U_ICU_ENTRY_POINT_RENAME(uprv_fabs)
#define uprv_floor U_ICU_ENTRY_POINT_RENAME(uprv_floor)
@@ -1295,13 +1343,13 @@
#define uprv_init_collIterate U_ICU_ENTRY_POINT_RENAME(uprv_init_collIterate)
#define uprv_init_pce U_ICU_ENTRY_POINT_RENAME(uprv_init_pce)
#define uprv_int32Comparator U_ICU_ENTRY_POINT_RENAME(uprv_int32Comparator)
+#define uprv_isASCIILetter U_ICU_ENTRY_POINT_RENAME(uprv_isASCIILetter)
#define uprv_isInfinite U_ICU_ENTRY_POINT_RENAME(uprv_isInfinite)
#define uprv_isInvariantString U_ICU_ENTRY_POINT_RENAME(uprv_isInvariantString)
#define uprv_isInvariantUString U_ICU_ENTRY_POINT_RENAME(uprv_isInvariantUString)
#define uprv_isNaN U_ICU_ENTRY_POINT_RENAME(uprv_isNaN)
#define uprv_isNegativeInfinity U_ICU_ENTRY_POINT_RENAME(uprv_isNegativeInfinity)
#define uprv_isPositiveInfinity U_ICU_ENTRY_POINT_RENAME(uprv_isPositiveInfinity)
-#define uprv_isRuleWhiteSpace U_ICU_ENTRY_POINT_RENAME(uprv_isRuleWhiteSpace)
#define uprv_itou U_ICU_ENTRY_POINT_RENAME(uprv_itou)
#define uprv_log U_ICU_ENTRY_POINT_RENAME(uprv_log)
#define uprv_malloc U_ICU_ENTRY_POINT_RENAME(uprv_malloc)
@@ -1312,7 +1360,6 @@
#define uprv_min U_ICU_ENTRY_POINT_RENAME(uprv_min)
#define uprv_modf U_ICU_ENTRY_POINT_RENAME(uprv_modf)
#define uprv_new_collIterate U_ICU_ENTRY_POINT_RENAME(uprv_new_collIterate)
-#define uprv_openRuleWhiteSpaceSet U_ICU_ENTRY_POINT_RENAME(uprv_openRuleWhiteSpaceSet)
#define uprv_parseCurrency U_ICU_ENTRY_POINT_RENAME(uprv_parseCurrency)
#define uprv_pathIsAbsolute U_ICU_ENTRY_POINT_RENAME(uprv_pathIsAbsolute)
#define uprv_pow U_ICU_ENTRY_POINT_RENAME(uprv_pow)
@@ -1320,9 +1367,12 @@
#define uprv_realloc U_ICU_ENTRY_POINT_RENAME(uprv_realloc)
#define uprv_round U_ICU_ENTRY_POINT_RENAME(uprv_round)
#define uprv_sortArray U_ICU_ENTRY_POINT_RENAME(uprv_sortArray)
+#define uprv_stableBinarySearch U_ICU_ENTRY_POINT_RENAME(uprv_stableBinarySearch)
#define uprv_strCompare U_ICU_ENTRY_POINT_RENAME(uprv_strCompare)
#define uprv_strdup U_ICU_ENTRY_POINT_RENAME(uprv_strdup)
+#define uprv_stricmp U_ICU_ENTRY_POINT_RENAME(uprv_stricmp)
#define uprv_strndup U_ICU_ENTRY_POINT_RENAME(uprv_strndup)
+#define uprv_strnicmp U_ICU_ENTRY_POINT_RENAME(uprv_strnicmp)
#define uprv_syntaxError U_ICU_ENTRY_POINT_RENAME(uprv_syntaxError)
#define uprv_timezone U_ICU_ENTRY_POINT_RENAME(uprv_timezone)
#define uprv_toupper U_ICU_ENTRY_POINT_RENAME(uprv_toupper)
@@ -1386,6 +1436,7 @@
#define uregex_openUText U_ICU_ENTRY_POINT_RENAME(uregex_openUText)
#define uregex_pattern U_ICU_ENTRY_POINT_RENAME(uregex_pattern)
#define uregex_patternUText U_ICU_ENTRY_POINT_RENAME(uregex_patternUText)
+#define uregex_refreshUText U_ICU_ENTRY_POINT_RENAME(uregex_refreshUText)
#define uregex_regionEnd U_ICU_ENTRY_POINT_RENAME(uregex_regionEnd)
#define uregex_regionEnd64 U_ICU_ENTRY_POINT_RENAME(uregex_regionEnd64)
#define uregex_regionStart U_ICU_ENTRY_POINT_RENAME(uregex_regionStart)
@@ -1414,6 +1465,19 @@
#define uregex_useAnchoringBounds U_ICU_ENTRY_POINT_RENAME(uregex_useAnchoringBounds)
#define uregex_useTransparentBounds U_ICU_ENTRY_POINT_RENAME(uregex_useTransparentBounds)
#define uregex_utext_unescape_charAt U_ICU_ENTRY_POINT_RENAME(uregex_utext_unescape_charAt)
+#define uregion_areEqual U_ICU_ENTRY_POINT_RENAME(uregion_areEqual)
+#define uregion_contains U_ICU_ENTRY_POINT_RENAME(uregion_contains)
+#define uregion_getAvailable U_ICU_ENTRY_POINT_RENAME(uregion_getAvailable)
+#define uregion_getContainedRegions U_ICU_ENTRY_POINT_RENAME(uregion_getContainedRegions)
+#define uregion_getContainedRegionsOfType U_ICU_ENTRY_POINT_RENAME(uregion_getContainedRegionsOfType)
+#define uregion_getContainingRegion U_ICU_ENTRY_POINT_RENAME(uregion_getContainingRegion)
+#define uregion_getContainingRegionOfType U_ICU_ENTRY_POINT_RENAME(uregion_getContainingRegionOfType)
+#define uregion_getNumericCode U_ICU_ENTRY_POINT_RENAME(uregion_getNumericCode)
+#define uregion_getPreferredValues U_ICU_ENTRY_POINT_RENAME(uregion_getPreferredValues)
+#define uregion_getRegionCode U_ICU_ENTRY_POINT_RENAME(uregion_getRegionCode)
+#define uregion_getRegionFromCode U_ICU_ENTRY_POINT_RENAME(uregion_getRegionFromCode)
+#define uregion_getRegionFromNumericCode U_ICU_ENTRY_POINT_RENAME(uregion_getRegionFromNumericCode)
+#define uregion_getType U_ICU_ENTRY_POINT_RENAME(uregion_getType)
#define ures_close U_ICU_ENTRY_POINT_RENAME(ures_close)
#define ures_copyResb U_ICU_ENTRY_POINT_RENAME(ures_copyResb)
#define ures_countArrayItems U_ICU_ENTRY_POINT_RENAME(ures_countArrayItems)
@@ -1457,13 +1521,19 @@
#define ures_openU U_ICU_ENTRY_POINT_RENAME(ures_openU)
#define ures_resetIterator U_ICU_ENTRY_POINT_RENAME(ures_resetIterator)
#define ures_swap U_ICU_ENTRY_POINT_RENAME(ures_swap)
+#define uscript_breaksBetweenLetters U_ICU_ENTRY_POINT_RENAME(uscript_breaksBetweenLetters)
#define uscript_closeRun U_ICU_ENTRY_POINT_RENAME(uscript_closeRun)
#define uscript_getCode U_ICU_ENTRY_POINT_RENAME(uscript_getCode)
#define uscript_getName U_ICU_ENTRY_POINT_RENAME(uscript_getName)
+#define uscript_getSampleString U_ICU_ENTRY_POINT_RENAME(uscript_getSampleString)
+#define uscript_getSampleUnicodeString U_ICU_ENTRY_POINT_RENAME(uscript_getSampleUnicodeString)
#define uscript_getScript U_ICU_ENTRY_POINT_RENAME(uscript_getScript)
#define uscript_getScriptExtensions U_ICU_ENTRY_POINT_RENAME(uscript_getScriptExtensions)
#define uscript_getShortName U_ICU_ENTRY_POINT_RENAME(uscript_getShortName)
+#define uscript_getUsage U_ICU_ENTRY_POINT_RENAME(uscript_getUsage)
#define uscript_hasScript U_ICU_ENTRY_POINT_RENAME(uscript_hasScript)
+#define uscript_isCased U_ICU_ENTRY_POINT_RENAME(uscript_isCased)
+#define uscript_isRightToLeft U_ICU_ENTRY_POINT_RENAME(uscript_isRightToLeft)
#define uscript_nextRun U_ICU_ENTRY_POINT_RENAME(uscript_nextRun)
#define uscript_openRun U_ICU_ENTRY_POINT_RENAME(uscript_openRun)
#define uscript_resetRun U_ICU_ENTRY_POINT_RENAME(uscript_resetRun)
@@ -1567,6 +1637,11 @@
#define uspoof_getAllowedLocales U_ICU_ENTRY_POINT_RENAME(uspoof_getAllowedLocales)
#define uspoof_getAllowedUnicodeSet U_ICU_ENTRY_POINT_RENAME(uspoof_getAllowedUnicodeSet)
#define uspoof_getChecks U_ICU_ENTRY_POINT_RENAME(uspoof_getChecks)
+#define uspoof_getInclusionSet U_ICU_ENTRY_POINT_RENAME(uspoof_getInclusionSet)
+#define uspoof_getInclusionUnicodeSet U_ICU_ENTRY_POINT_RENAME(uspoof_getInclusionUnicodeSet)
+#define uspoof_getRecommendedSet U_ICU_ENTRY_POINT_RENAME(uspoof_getRecommendedSet)
+#define uspoof_getRecommendedUnicodeSet U_ICU_ENTRY_POINT_RENAME(uspoof_getRecommendedUnicodeSet)
+#define uspoof_getRestrictionLevel U_ICU_ENTRY_POINT_RENAME(uspoof_getRestrictionLevel)
#define uspoof_getSkeleton U_ICU_ENTRY_POINT_RENAME(uspoof_getSkeleton)
#define uspoof_getSkeletonUTF8 U_ICU_ENTRY_POINT_RENAME(uspoof_getSkeletonUTF8)
#define uspoof_getSkeletonUnicodeString U_ICU_ENTRY_POINT_RENAME(uspoof_getSkeletonUnicodeString)
@@ -1578,23 +1653,25 @@
#define uspoof_setAllowedLocales U_ICU_ENTRY_POINT_RENAME(uspoof_setAllowedLocales)
#define uspoof_setAllowedUnicodeSet U_ICU_ENTRY_POINT_RENAME(uspoof_setAllowedUnicodeSet)
#define uspoof_setChecks U_ICU_ENTRY_POINT_RENAME(uspoof_setChecks)
+#define uspoof_setRestrictionLevel U_ICU_ENTRY_POINT_RENAME(uspoof_setRestrictionLevel)
#define uspoof_swap U_ICU_ENTRY_POINT_RENAME(uspoof_swap)
#define usprep_close U_ICU_ENTRY_POINT_RENAME(usprep_close)
#define usprep_open U_ICU_ENTRY_POINT_RENAME(usprep_open)
#define usprep_openByType U_ICU_ENTRY_POINT_RENAME(usprep_openByType)
#define usprep_prepare U_ICU_ENTRY_POINT_RENAME(usprep_prepare)
#define usprep_swap U_ICU_ENTRY_POINT_RENAME(usprep_swap)
-#define ustr_foldCase U_ICU_ENTRY_POINT_RENAME(ustr_foldCase)
-#define ustr_toLower U_ICU_ENTRY_POINT_RENAME(ustr_toLower)
-#define ustr_toTitle U_ICU_ENTRY_POINT_RENAME(ustr_toTitle)
-#define ustr_toUpper U_ICU_ENTRY_POINT_RENAME(ustr_toUpper)
-#define utext_caseCompare U_ICU_ENTRY_POINT_RENAME(utext_caseCompare)
-#define utext_caseCompareNativeLimit U_ICU_ENTRY_POINT_RENAME(utext_caseCompareNativeLimit)
+#define ustr_hashCharsN U_ICU_ENTRY_POINT_RENAME(ustr_hashCharsN)
+#define ustr_hashICharsN U_ICU_ENTRY_POINT_RENAME(ustr_hashICharsN)
+#define ustr_hashUCharsN U_ICU_ENTRY_POINT_RENAME(ustr_hashUCharsN)
+#define ustrcase_internalFold U_ICU_ENTRY_POINT_RENAME(ustrcase_internalFold)
+#define ustrcase_internalToLower U_ICU_ENTRY_POINT_RENAME(ustrcase_internalToLower)
+#define ustrcase_internalToTitle U_ICU_ENTRY_POINT_RENAME(ustrcase_internalToTitle)
+#define ustrcase_internalToUpper U_ICU_ENTRY_POINT_RENAME(ustrcase_internalToUpper)
+#define ustrcase_map U_ICU_ENTRY_POINT_RENAME(ustrcase_map)
+#define ustrcase_setTempCaseMapLocale U_ICU_ENTRY_POINT_RENAME(ustrcase_setTempCaseMapLocale)
#define utext_char32At U_ICU_ENTRY_POINT_RENAME(utext_char32At)
#define utext_clone U_ICU_ENTRY_POINT_RENAME(utext_clone)
#define utext_close U_ICU_ENTRY_POINT_RENAME(utext_close)
-#define utext_compare U_ICU_ENTRY_POINT_RENAME(utext_compare)
-#define utext_compareNativeLimit U_ICU_ENTRY_POINT_RENAME(utext_compareNativeLimit)
#define utext_copy U_ICU_ENTRY_POINT_RENAME(utext_copy)
#define utext_current32 U_ICU_ENTRY_POINT_RENAME(utext_current32)
#define utext_equals U_ICU_ENTRY_POINT_RENAME(utext_equals)
@@ -1743,499 +1820,6 @@
#define ztrans_setTime U_ICU_ENTRY_POINT_RENAME(ztrans_setTime)
#define ztrans_setTo U_ICU_ENTRY_POINT_RENAME(ztrans_setTo)
-
-/* C++ class names renaming defines */
-
-#ifdef XP_CPLUSPLUS
-#if !U_HAVE_NAMESPACE
-
-#define AbsoluteValueSubstitution U_ICU_ENTRY_POINT_RENAME(AbsoluteValueSubstitution)
-#define AlternateSubstitutionSubtable U_ICU_ENTRY_POINT_RENAME(AlternateSubstitutionSubtable)
-#define AnchorTable U_ICU_ENTRY_POINT_RENAME(AnchorTable)
-#define AndConstraint U_ICU_ENTRY_POINT_RENAME(AndConstraint)
-#define AnnualTimeZoneRule U_ICU_ENTRY_POINT_RENAME(AnnualTimeZoneRule)
-#define AnyTransliterator U_ICU_ENTRY_POINT_RENAME(AnyTransliterator)
-#define ArabicOpenTypeLayoutEngine U_ICU_ENTRY_POINT_RENAME(ArabicOpenTypeLayoutEngine)
-#define ArabicShaping U_ICU_ENTRY_POINT_RENAME(ArabicShaping)
-#define ArgExtractor U_ICU_ENTRY_POINT_RENAME(ArgExtractor)
-#define BMPSet U_ICU_ENTRY_POINT_RENAME(BMPSet)
-#define BackwardUTrie2StringIterator U_ICU_ENTRY_POINT_RENAME(BackwardUTrie2StringIterator)
-#define BadCharacterTable U_ICU_ENTRY_POINT_RENAME(BadCharacterTable)
-#define BasicCalendarFactory U_ICU_ENTRY_POINT_RENAME(BasicCalendarFactory)
-#define BasicTimeZone U_ICU_ENTRY_POINT_RENAME(BasicTimeZone)
-#define BinarySearchLookupTable U_ICU_ENTRY_POINT_RENAME(BinarySearchLookupTable)
-#define BoyerMooreSearch U_ICU_ENTRY_POINT_RENAME(BoyerMooreSearch)
-#define BreakIterator U_ICU_ENTRY_POINT_RENAME(BreakIterator)
-#define BreakTransliterator U_ICU_ENTRY_POINT_RENAME(BreakTransliterator)
-#define BuddhistCalendar U_ICU_ENTRY_POINT_RENAME(BuddhistCalendar)
-#define BuildCompactTrieHorizontalNode U_ICU_ENTRY_POINT_RENAME(BuildCompactTrieHorizontalNode)
-#define BuildCompactTrieNode U_ICU_ENTRY_POINT_RENAME(BuildCompactTrieNode)
-#define BuildCompactTrieVerticalNode U_ICU_ENTRY_POINT_RENAME(BuildCompactTrieVerticalNode)
-#define BuilderScriptSet U_ICU_ENTRY_POINT_RENAME(BuilderScriptSet)
-#define ByteSink U_ICU_ENTRY_POINT_RENAME(ByteSink)
-#define CEBuffer U_ICU_ENTRY_POINT_RENAME(CEBuffer)
-#define CECalendar U_ICU_ENTRY_POINT_RENAME(CECalendar)
-#define CEList U_ICU_ENTRY_POINT_RENAME(CEList)
-#define CEToStringsMap U_ICU_ENTRY_POINT_RENAME(CEToStringsMap)
-#define CFactory U_ICU_ENTRY_POINT_RENAME(CFactory)
-#define Calendar U_ICU_ENTRY_POINT_RENAME(Calendar)
-#define CalendarAstronomer U_ICU_ENTRY_POINT_RENAME(CalendarAstronomer)
-#define CalendarCache U_ICU_ENTRY_POINT_RENAME(CalendarCache)
-#define CalendarData U_ICU_ENTRY_POINT_RENAME(CalendarData)
-#define CalendarService U_ICU_ENTRY_POINT_RENAME(CalendarService)
-#define CanonIterData U_ICU_ENTRY_POINT_RENAME(CanonIterData)
-#define CanonIterDataSingleton U_ICU_ENTRY_POINT_RENAME(CanonIterDataSingleton)
-#define CanonMarkFilter U_ICU_ENTRY_POINT_RENAME(CanonMarkFilter)
-#define CanonShaping U_ICU_ENTRY_POINT_RENAME(CanonShaping)
-#define CanonicalIterator U_ICU_ENTRY_POINT_RENAME(CanonicalIterator)
-#define CaseMapTransliterator U_ICU_ENTRY_POINT_RENAME(CaseMapTransliterator)
-#define ChainingContextualSubstitutionFormat1Subtable U_ICU_ENTRY_POINT_RENAME(ChainingContextualSubstitutionFormat1Subtable)
-#define ChainingContextualSubstitutionFormat2Subtable U_ICU_ENTRY_POINT_RENAME(ChainingContextualSubstitutionFormat2Subtable)
-#define ChainingContextualSubstitutionFormat3Subtable U_ICU_ENTRY_POINT_RENAME(ChainingContextualSubstitutionFormat3Subtable)
-#define ChainingContextualSubstitutionSubtable U_ICU_ENTRY_POINT_RENAME(ChainingContextualSubstitutionSubtable)
-#define CharString U_ICU_ENTRY_POINT_RENAME(CharString)
-#define CharSubstitutionFilter U_ICU_ENTRY_POINT_RENAME(CharSubstitutionFilter)
-#define CharacterIterator U_ICU_ENTRY_POINT_RENAME(CharacterIterator)
-#define CharacterNode U_ICU_ENTRY_POINT_RENAME(CharacterNode)
-#define CharsetDetector U_ICU_ENTRY_POINT_RENAME(CharsetDetector)
-#define CharsetMatch U_ICU_ENTRY_POINT_RENAME(CharsetMatch)
-#define CharsetRecog_2022 U_ICU_ENTRY_POINT_RENAME(CharsetRecog_2022)
-#define CharsetRecog_2022CN U_ICU_ENTRY_POINT_RENAME(CharsetRecog_2022CN)
-#define CharsetRecog_2022JP U_ICU_ENTRY_POINT_RENAME(CharsetRecog_2022JP)
-#define CharsetRecog_2022KR U_ICU_ENTRY_POINT_RENAME(CharsetRecog_2022KR)
-#define CharsetRecog_8859_1 U_ICU_ENTRY_POINT_RENAME(CharsetRecog_8859_1)
-#define CharsetRecog_8859_1_da U_ICU_ENTRY_POINT_RENAME(CharsetRecog_8859_1_da)
-#define CharsetRecog_8859_1_de U_ICU_ENTRY_POINT_RENAME(CharsetRecog_8859_1_de)
-#define CharsetRecog_8859_1_en U_ICU_ENTRY_POINT_RENAME(CharsetRecog_8859_1_en)
-#define CharsetRecog_8859_1_es U_ICU_ENTRY_POINT_RENAME(CharsetRecog_8859_1_es)
-#define CharsetRecog_8859_1_fr U_ICU_ENTRY_POINT_RENAME(CharsetRecog_8859_1_fr)
-#define CharsetRecog_8859_1_it U_ICU_ENTRY_POINT_RENAME(CharsetRecog_8859_1_it)
-#define CharsetRecog_8859_1_nl U_ICU_ENTRY_POINT_RENAME(CharsetRecog_8859_1_nl)
-#define CharsetRecog_8859_1_no U_ICU_ENTRY_POINT_RENAME(CharsetRecog_8859_1_no)
-#define CharsetRecog_8859_1_pt U_ICU_ENTRY_POINT_RENAME(CharsetRecog_8859_1_pt)
-#define CharsetRecog_8859_1_sv U_ICU_ENTRY_POINT_RENAME(CharsetRecog_8859_1_sv)
-#define CharsetRecog_8859_2 U_ICU_ENTRY_POINT_RENAME(CharsetRecog_8859_2)
-#define CharsetRecog_8859_2_cs U_ICU_ENTRY_POINT_RENAME(CharsetRecog_8859_2_cs)
-#define CharsetRecog_8859_2_hu U_ICU_ENTRY_POINT_RENAME(CharsetRecog_8859_2_hu)
-#define CharsetRecog_8859_2_pl U_ICU_ENTRY_POINT_RENAME(CharsetRecog_8859_2_pl)
-#define CharsetRecog_8859_2_ro U_ICU_ENTRY_POINT_RENAME(CharsetRecog_8859_2_ro)
-#define CharsetRecog_8859_5 U_ICU_ENTRY_POINT_RENAME(CharsetRecog_8859_5)
-#define CharsetRecog_8859_5_ru U_ICU_ENTRY_POINT_RENAME(CharsetRecog_8859_5_ru)
-#define CharsetRecog_8859_6 U_ICU_ENTRY_POINT_RENAME(CharsetRecog_8859_6)
-#define CharsetRecog_8859_6_ar U_ICU_ENTRY_POINT_RENAME(CharsetRecog_8859_6_ar)
-#define CharsetRecog_8859_7 U_ICU_ENTRY_POINT_RENAME(CharsetRecog_8859_7)
-#define CharsetRecog_8859_7_el U_ICU_ENTRY_POINT_RENAME(CharsetRecog_8859_7_el)
-#define CharsetRecog_8859_8 U_ICU_ENTRY_POINT_RENAME(CharsetRecog_8859_8)
-#define CharsetRecog_8859_8_I_he U_ICU_ENTRY_POINT_RENAME(CharsetRecog_8859_8_I_he)
-#define CharsetRecog_8859_8_he U_ICU_ENTRY_POINT_RENAME(CharsetRecog_8859_8_he)
-#define CharsetRecog_8859_9 U_ICU_ENTRY_POINT_RENAME(CharsetRecog_8859_9)
-#define CharsetRecog_8859_9_tr U_ICU_ENTRY_POINT_RENAME(CharsetRecog_8859_9_tr)
-#define CharsetRecog_IBM420_ar U_ICU_ENTRY_POINT_RENAME(CharsetRecog_IBM420_ar)
-#define CharsetRecog_IBM420_ar_ltr U_ICU_ENTRY_POINT_RENAME(CharsetRecog_IBM420_ar_ltr)
-#define CharsetRecog_IBM420_ar_rtl U_ICU_ENTRY_POINT_RENAME(CharsetRecog_IBM420_ar_rtl)
-#define CharsetRecog_IBM424_he U_ICU_ENTRY_POINT_RENAME(CharsetRecog_IBM424_he)
-#define CharsetRecog_IBM424_he_ltr U_ICU_ENTRY_POINT_RENAME(CharsetRecog_IBM424_he_ltr)
-#define CharsetRecog_IBM424_he_rtl U_ICU_ENTRY_POINT_RENAME(CharsetRecog_IBM424_he_rtl)
-#define CharsetRecog_KOI8_R U_ICU_ENTRY_POINT_RENAME(CharsetRecog_KOI8_R)
-#define CharsetRecog_UTF8 U_ICU_ENTRY_POINT_RENAME(CharsetRecog_UTF8)
-#define CharsetRecog_UTF_16_BE U_ICU_ENTRY_POINT_RENAME(CharsetRecog_UTF_16_BE)
-#define CharsetRecog_UTF_16_LE U_ICU_ENTRY_POINT_RENAME(CharsetRecog_UTF_16_LE)
-#define CharsetRecog_UTF_32 U_ICU_ENTRY_POINT_RENAME(CharsetRecog_UTF_32)
-#define CharsetRecog_UTF_32_BE U_ICU_ENTRY_POINT_RENAME(CharsetRecog_UTF_32_BE)
-#define CharsetRecog_UTF_32_LE U_ICU_ENTRY_POINT_RENAME(CharsetRecog_UTF_32_LE)
-#define CharsetRecog_Unicode U_ICU_ENTRY_POINT_RENAME(CharsetRecog_Unicode)
-#define CharsetRecog_big5 U_ICU_ENTRY_POINT_RENAME(CharsetRecog_big5)
-#define CharsetRecog_euc U_ICU_ENTRY_POINT_RENAME(CharsetRecog_euc)
-#define CharsetRecog_euc_jp U_ICU_ENTRY_POINT_RENAME(CharsetRecog_euc_jp)
-#define CharsetRecog_euc_kr U_ICU_ENTRY_POINT_RENAME(CharsetRecog_euc_kr)
-#define CharsetRecog_gb_18030 U_ICU_ENTRY_POINT_RENAME(CharsetRecog_gb_18030)
-#define CharsetRecog_mbcs U_ICU_ENTRY_POINT_RENAME(CharsetRecog_mbcs)
-#define CharsetRecog_sbcs U_ICU_ENTRY_POINT_RENAME(CharsetRecog_sbcs)
-#define CharsetRecog_sjis U_ICU_ENTRY_POINT_RENAME(CharsetRecog_sjis)
-#define CharsetRecog_windows_1251 U_ICU_ENTRY_POINT_RENAME(CharsetRecog_windows_1251)
-#define CharsetRecog_windows_1256 U_ICU_ENTRY_POINT_RENAME(CharsetRecog_windows_1256)
-#define CharsetRecognizer U_ICU_ENTRY_POINT_RENAME(CharsetRecognizer)
-#define CheckedArrayByteSink U_ICU_ENTRY_POINT_RENAME(CheckedArrayByteSink)
-#define ChineseCalendar U_ICU_ENTRY_POINT_RENAME(ChineseCalendar)
-#define ChoiceFormat U_ICU_ENTRY_POINT_RENAME(ChoiceFormat)
-#define ClassDefFormat1Table U_ICU_ENTRY_POINT_RENAME(ClassDefFormat1Table)
-#define ClassDefFormat2Table U_ICU_ENTRY_POINT_RENAME(ClassDefFormat2Table)
-#define ClassDefinitionTable U_ICU_ENTRY_POINT_RENAME(ClassDefinitionTable)
-#define ClockMath U_ICU_ENTRY_POINT_RENAME(ClockMath)
-#define CollData U_ICU_ENTRY_POINT_RENAME(CollData)
-#define CollDataCache U_ICU_ENTRY_POINT_RENAME(CollDataCache)
-#define CollDataCacheEntry U_ICU_ENTRY_POINT_RENAME(CollDataCacheEntry)
-#define CollationElementIterator U_ICU_ENTRY_POINT_RENAME(CollationElementIterator)
-#define CollationKey U_ICU_ENTRY_POINT_RENAME(CollationKey)
-#define CollationLocaleListEnumeration U_ICU_ENTRY_POINT_RENAME(CollationLocaleListEnumeration)
-#define Collator U_ICU_ENTRY_POINT_RENAME(Collator)
-#define CollatorFactory U_ICU_ENTRY_POINT_RENAME(CollatorFactory)
-#define CompactTrieDictionary U_ICU_ENTRY_POINT_RENAME(CompactTrieDictionary)
-#define CompactTrieEnumeration U_ICU_ENTRY_POINT_RENAME(CompactTrieEnumeration)
-#define ComposeNormalizer2 U_ICU_ENTRY_POINT_RENAME(ComposeNormalizer2)
-#define CompoundTransliterator U_ICU_ENTRY_POINT_RENAME(CompoundTransliterator)
-#define ConfusabledataBuilder U_ICU_ENTRY_POINT_RENAME(ConfusabledataBuilder)
-#define ContextualGlyphSubstitutionProcessor U_ICU_ENTRY_POINT_RENAME(ContextualGlyphSubstitutionProcessor)
-#define ContextualSubstitutionBase U_ICU_ENTRY_POINT_RENAME(ContextualSubstitutionBase)
-#define ContextualSubstitutionFormat1Subtable U_ICU_ENTRY_POINT_RENAME(ContextualSubstitutionFormat1Subtable)
-#define ContextualSubstitutionFormat2Subtable U_ICU_ENTRY_POINT_RENAME(ContextualSubstitutionFormat2Subtable)
-#define ContextualSubstitutionFormat3Subtable U_ICU_ENTRY_POINT_RENAME(ContextualSubstitutionFormat3Subtable)
-#define ContextualSubstitutionSubtable U_ICU_ENTRY_POINT_RENAME(ContextualSubstitutionSubtable)
-#define CopticCalendar U_ICU_ENTRY_POINT_RENAME(CopticCalendar)
-#define CoverageFormat1Table U_ICU_ENTRY_POINT_RENAME(CoverageFormat1Table)
-#define CoverageFormat2Table U_ICU_ENTRY_POINT_RENAME(CoverageFormat2Table)
-#define CoverageTable U_ICU_ENTRY_POINT_RENAME(CoverageTable)
-#define CurrencyAmount U_ICU_ENTRY_POINT_RENAME(CurrencyAmount)
-#define CurrencyFormat U_ICU_ENTRY_POINT_RENAME(CurrencyFormat)
-#define CurrencyPluralInfo U_ICU_ENTRY_POINT_RENAME(CurrencyPluralInfo)
-#define CurrencyUnit U_ICU_ENTRY_POINT_RENAME(CurrencyUnit)
-#define CursiveAttachmentSubtable U_ICU_ENTRY_POINT_RENAME(CursiveAttachmentSubtable)
-#define DTRedundantEnumeration U_ICU_ENTRY_POINT_RENAME(DTRedundantEnumeration)
-#define DTSkeletonEnumeration U_ICU_ENTRY_POINT_RENAME(DTSkeletonEnumeration)
-#define DateFormat U_ICU_ENTRY_POINT_RENAME(DateFormat)
-#define DateFormatSymbols U_ICU_ENTRY_POINT_RENAME(DateFormatSymbols)
-#define DateInterval U_ICU_ENTRY_POINT_RENAME(DateInterval)
-#define DateIntervalFormat U_ICU_ENTRY_POINT_RENAME(DateIntervalFormat)
-#define DateIntervalInfo U_ICU_ENTRY_POINT_RENAME(DateIntervalInfo)
-#define DateTimeMatcher U_ICU_ENTRY_POINT_RENAME(DateTimeMatcher)
-#define DateTimePatternGenerator U_ICU_ENTRY_POINT_RENAME(DateTimePatternGenerator)
-#define DateTimeRule U_ICU_ENTRY_POINT_RENAME(DateTimeRule)
-#define DecimalFormat U_ICU_ENTRY_POINT_RENAME(DecimalFormat)
-#define DecimalFormatSymbols U_ICU_ENTRY_POINT_RENAME(DecimalFormatSymbols)
-#define DecomposeNormalizer2 U_ICU_ENTRY_POINT_RENAME(DecomposeNormalizer2)
-#define DefaultCalendarFactory U_ICU_ENTRY_POINT_RENAME(DefaultCalendarFactory)
-#define DefaultCharMapper U_ICU_ENTRY_POINT_RENAME(DefaultCharMapper)
-#define DeviceTable U_ICU_ENTRY_POINT_RENAME(DeviceTable)
-#define DictionaryBreakEngine U_ICU_ENTRY_POINT_RENAME(DictionaryBreakEngine)
-#define DigitList U_ICU_ENTRY_POINT_RENAME(DigitList)
-#define DistanceInfo U_ICU_ENTRY_POINT_RENAME(DistanceInfo)
-#define EnumToOffset U_ICU_ENTRY_POINT_RENAME(EnumToOffset)
-#define ErrorCode U_ICU_ENTRY_POINT_RENAME(ErrorCode)
-#define EscapeTransliterator U_ICU_ENTRY_POINT_RENAME(EscapeTransliterator)
-#define EthiopicCalendar U_ICU_ENTRY_POINT_RENAME(EthiopicCalendar)
-#define EventListener U_ICU_ENTRY_POINT_RENAME(EventListener)
-#define ExtensionSubtable U_ICU_ENTRY_POINT_RENAME(ExtensionSubtable)
-#define FCDNormalizer2 U_ICU_ENTRY_POINT_RENAME(FCDNormalizer2)
-#define FCDTrieSingleton U_ICU_ENTRY_POINT_RENAME(FCDTrieSingleton)
-#define FeatureListTable U_ICU_ENTRY_POINT_RENAME(FeatureListTable)
-#define FieldPosition U_ICU_ENTRY_POINT_RENAME(FieldPosition)
-#define FieldPositionHandler U_ICU_ENTRY_POINT_RENAME(FieldPositionHandler)
-#define FieldPositionIterator U_ICU_ENTRY_POINT_RENAME(FieldPositionIterator)
-#define FieldPositionIteratorHandler U_ICU_ENTRY_POINT_RENAME(FieldPositionIteratorHandler)
-#define FieldPositionOnlyHandler U_ICU_ENTRY_POINT_RENAME(FieldPositionOnlyHandler)
-#define FilteredNormalizer2 U_ICU_ENTRY_POINT_RENAME(FilteredNormalizer2)
-#define FontRuns U_ICU_ENTRY_POINT_RENAME(FontRuns)
-#define Format U_ICU_ENTRY_POINT_RENAME(Format)
-#define Format1AnchorTable U_ICU_ENTRY_POINT_RENAME(Format1AnchorTable)
-#define Format2AnchorTable U_ICU_ENTRY_POINT_RENAME(Format2AnchorTable)
-#define Format3AnchorTable U_ICU_ENTRY_POINT_RENAME(Format3AnchorTable)
-#define FormatNameEnumeration U_ICU_ENTRY_POINT_RENAME(FormatNameEnumeration)
-#define FormatParser U_ICU_ENTRY_POINT_RENAME(FormatParser)
-#define Formattable U_ICU_ENTRY_POINT_RENAME(Formattable)
-#define ForwardCharacterIterator U_ICU_ENTRY_POINT_RENAME(ForwardCharacterIterator)
-#define ForwardUTrie2StringIterator U_ICU_ENTRY_POINT_RENAME(ForwardUTrie2StringIterator)
-#define FractionalPartSubstitution U_ICU_ENTRY_POINT_RENAME(FractionalPartSubstitution)
-#define FunctionReplacer U_ICU_ENTRY_POINT_RENAME(FunctionReplacer)
-#define GDEFMarkFilter U_ICU_ENTRY_POINT_RENAME(GDEFMarkFilter)
-#define GXLayoutEngine U_ICU_ENTRY_POINT_RENAME(GXLayoutEngine)
-#define GlyphDefinitionTableHeader U_ICU_ENTRY_POINT_RENAME(GlyphDefinitionTableHeader)
-#define GlyphIterator U_ICU_ENTRY_POINT_RENAME(GlyphIterator)
-#define GlyphLookupTableHeader U_ICU_ENTRY_POINT_RENAME(GlyphLookupTableHeader)
-#define GlyphPositionAdjustments U_ICU_ENTRY_POINT_RENAME(GlyphPositionAdjustments)
-#define GlyphPositioningLookupProcessor U_ICU_ENTRY_POINT_RENAME(GlyphPositioningLookupProcessor)
-#define GlyphPositioningTableHeader U_ICU_ENTRY_POINT_RENAME(GlyphPositioningTableHeader)
-#define GlyphSubstitutionLookupProcessor U_ICU_ENTRY_POINT_RENAME(GlyphSubstitutionLookupProcessor)
-#define GlyphSubstitutionTableHeader U_ICU_ENTRY_POINT_RENAME(GlyphSubstitutionTableHeader)
-#define GoodSuffixTable U_ICU_ENTRY_POINT_RENAME(GoodSuffixTable)
-#define Grego U_ICU_ENTRY_POINT_RENAME(Grego)
-#define GregorianCalendar U_ICU_ENTRY_POINT_RENAME(GregorianCalendar)
-#define HanOpenTypeLayoutEngine U_ICU_ENTRY_POINT_RENAME(HanOpenTypeLayoutEngine)
-#define HangulOpenTypeLayoutEngine U_ICU_ENTRY_POINT_RENAME(HangulOpenTypeLayoutEngine)
-#define HebrewCalendar U_ICU_ENTRY_POINT_RENAME(HebrewCalendar)
-#define ICUBreakIteratorFactory U_ICU_ENTRY_POINT_RENAME(ICUBreakIteratorFactory)
-#define ICUBreakIteratorService U_ICU_ENTRY_POINT_RENAME(ICUBreakIteratorService)
-#define ICUCollatorFactory U_ICU_ENTRY_POINT_RENAME(ICUCollatorFactory)
-#define ICUCollatorService U_ICU_ENTRY_POINT_RENAME(ICUCollatorService)
-#define ICUDataTable U_ICU_ENTRY_POINT_RENAME(ICUDataTable)
-#define ICULanguageBreakFactory U_ICU_ENTRY_POINT_RENAME(ICULanguageBreakFactory)
-#define ICULocaleService U_ICU_ENTRY_POINT_RENAME(ICULocaleService)
-#define ICUNotifier U_ICU_ENTRY_POINT_RENAME(ICUNotifier)
-#define ICUNumberFormatFactory U_ICU_ENTRY_POINT_RENAME(ICUNumberFormatFactory)
-#define ICUNumberFormatService U_ICU_ENTRY_POINT_RENAME(ICUNumberFormatService)
-#define ICUResourceBundleFactory U_ICU_ENTRY_POINT_RENAME(ICUResourceBundleFactory)
-#define ICUService U_ICU_ENTRY_POINT_RENAME(ICUService)
-#define ICUServiceFactory U_ICU_ENTRY_POINT_RENAME(ICUServiceFactory)
-#define ICUServiceKey U_ICU_ENTRY_POINT_RENAME(ICUServiceKey)
-#define ICU_Utility U_ICU_ENTRY_POINT_RENAME(ICU_Utility)
-#define IDNA U_ICU_ENTRY_POINT_RENAME(IDNA)
-#define IndianCalendar U_ICU_ENTRY_POINT_RENAME(IndianCalendar)
-#define IndicClassTable U_ICU_ENTRY_POINT_RENAME(IndicClassTable)
-#define IndicOpenTypeLayoutEngine U_ICU_ENTRY_POINT_RENAME(IndicOpenTypeLayoutEngine)
-#define IndicRearrangementProcessor U_ICU_ENTRY_POINT_RENAME(IndicRearrangementProcessor)
-#define IndicReordering U_ICU_ENTRY_POINT_RENAME(IndicReordering)
-#define InitialTimeZoneRule U_ICU_ENTRY_POINT_RENAME(InitialTimeZoneRule)
-#define InputText U_ICU_ENTRY_POINT_RENAME(InputText)
-#define IntegralPartSubstitution U_ICU_ENTRY_POINT_RENAME(IntegralPartSubstitution)
-#define IslamicCalendar U_ICU_ENTRY_POINT_RENAME(IslamicCalendar)
-#define IteratedChar U_ICU_ENTRY_POINT_RENAME(IteratedChar)
-#define JapaneseCalendar U_ICU_ENTRY_POINT_RENAME(JapaneseCalendar)
-#define KernTable U_ICU_ENTRY_POINT_RENAME(KernTable)
-#define KeywordEnumeration U_ICU_ENTRY_POINT_RENAME(KeywordEnumeration)
-#define KhmerClassTable U_ICU_ENTRY_POINT_RENAME(KhmerClassTable)
-#define KhmerOpenTypeLayoutEngine U_ICU_ENTRY_POINT_RENAME(KhmerOpenTypeLayoutEngine)
-#define KhmerReordering U_ICU_ENTRY_POINT_RENAME(KhmerReordering)
-#define LECharMapper U_ICU_ENTRY_POINT_RENAME(LECharMapper)
-#define LEFontInstance U_ICU_ENTRY_POINT_RENAME(LEFontInstance)
-#define LEGlyphFilter U_ICU_ENTRY_POINT_RENAME(LEGlyphFilter)
-#define LEGlyphStorage U_ICU_ENTRY_POINT_RENAME(LEGlyphStorage)
-#define LEInsertionCallback U_ICU_ENTRY_POINT_RENAME(LEInsertionCallback)
-#define LEInsertionList U_ICU_ENTRY_POINT_RENAME(LEInsertionList)
-#define LXUtilities U_ICU_ENTRY_POINT_RENAME(LXUtilities)
-#define LanguageBreakEngine U_ICU_ENTRY_POINT_RENAME(LanguageBreakEngine)
-#define LanguageBreakFactory U_ICU_ENTRY_POINT_RENAME(LanguageBreakFactory)
-#define LayoutEngine U_ICU_ENTRY_POINT_RENAME(LayoutEngine)
-#define LigatureSubstitutionProcessor U_ICU_ENTRY_POINT_RENAME(LigatureSubstitutionProcessor)
-#define LigatureSubstitutionSubtable U_ICU_ENTRY_POINT_RENAME(LigatureSubstitutionSubtable)
-#define LocDataParser U_ICU_ENTRY_POINT_RENAME(LocDataParser)
-#define Locale U_ICU_ENTRY_POINT_RENAME(Locale)
-#define LocaleBased U_ICU_ENTRY_POINT_RENAME(LocaleBased)
-#define LocaleDisplayNames U_ICU_ENTRY_POINT_RENAME(LocaleDisplayNames)
-#define LocaleDisplayNamesImpl U_ICU_ENTRY_POINT_RENAME(LocaleDisplayNamesImpl)
-#define LocaleKey U_ICU_ENTRY_POINT_RENAME(LocaleKey)
-#define LocaleKeyFactory U_ICU_ENTRY_POINT_RENAME(LocaleKeyFactory)
-#define LocaleRuns U_ICU_ENTRY_POINT_RENAME(LocaleRuns)
-#define LocaleUtility U_ICU_ENTRY_POINT_RENAME(LocaleUtility)
-#define LocalizationInfo U_ICU_ENTRY_POINT_RENAME(LocalizationInfo)
-#define LookupListTable U_ICU_ENTRY_POINT_RENAME(LookupListTable)
-#define LookupProcessor U_ICU_ENTRY_POINT_RENAME(LookupProcessor)
-#define LookupSubtable U_ICU_ENTRY_POINT_RENAME(LookupSubtable)
-#define LookupTable U_ICU_ENTRY_POINT_RENAME(LookupTable)
-#define LowercaseTransliterator U_ICU_ENTRY_POINT_RENAME(LowercaseTransliterator)
-#define MPreFixups U_ICU_ENTRY_POINT_RENAME(MPreFixups)
-#define MarkArray U_ICU_ENTRY_POINT_RENAME(MarkArray)
-#define MarkToBasePositioningSubtable U_ICU_ENTRY_POINT_RENAME(MarkToBasePositioningSubtable)
-#define MarkToLigaturePositioningSubtable U_ICU_ENTRY_POINT_RENAME(MarkToLigaturePositioningSubtable)
-#define MarkToMarkPositioningSubtable U_ICU_ENTRY_POINT_RENAME(MarkToMarkPositioningSubtable)
-#define Measure U_ICU_ENTRY_POINT_RENAME(Measure)
-#define MeasureFormat U_ICU_ENTRY_POINT_RENAME(MeasureFormat)
-#define MeasureUnit U_ICU_ENTRY_POINT_RENAME(MeasureUnit)
-#define MessageFormat U_ICU_ENTRY_POINT_RENAME(MessageFormat)
-#define MessageFormatAdapter U_ICU_ENTRY_POINT_RENAME(MessageFormatAdapter)
-#define ModulusSubstitution U_ICU_ENTRY_POINT_RENAME(ModulusSubstitution)
-#define MoonRiseSetCoordFunc U_ICU_ENTRY_POINT_RENAME(MoonRiseSetCoordFunc)
-#define MoonTimeAngleFunc U_ICU_ENTRY_POINT_RENAME(MoonTimeAngleFunc)
-#define MorphSubtableHeader U_ICU_ENTRY_POINT_RENAME(MorphSubtableHeader)
-#define MorphTableHeader U_ICU_ENTRY_POINT_RENAME(MorphTableHeader)
-#define MultipleSubstitutionSubtable U_ICU_ENTRY_POINT_RENAME(MultipleSubstitutionSubtable)
-#define MultiplierSubstitution U_ICU_ENTRY_POINT_RENAME(MultiplierSubstitution)
-#define MutableTrieDictionary U_ICU_ENTRY_POINT_RENAME(MutableTrieDictionary)
-#define MutableTrieEnumeration U_ICU_ENTRY_POINT_RENAME(MutableTrieEnumeration)
-#define NFFactory U_ICU_ENTRY_POINT_RENAME(NFFactory)
-#define NFKDBuffer U_ICU_ENTRY_POINT_RENAME(NFKDBuffer)
-#define NFRule U_ICU_ENTRY_POINT_RENAME(NFRule)
-#define NFRuleSet U_ICU_ENTRY_POINT_RENAME(NFRuleSet)
-#define NFSubstitution U_ICU_ENTRY_POINT_RENAME(NFSubstitution)
-#define NGramParser U_ICU_ENTRY_POINT_RENAME(NGramParser)
-#define NameToEnum U_ICU_ENTRY_POINT_RENAME(NameToEnum)
-#define NameUnicodeTransliterator U_ICU_ENTRY_POINT_RENAME(NameUnicodeTransliterator)
-#define NonContextualGlyphSubstitutionProcessor U_ICU_ENTRY_POINT_RENAME(NonContextualGlyphSubstitutionProcessor)
-#define NonContiguousEnumToOffset U_ICU_ENTRY_POINT_RENAME(NonContiguousEnumToOffset)
-#define NoopNormalizer2 U_ICU_ENTRY_POINT_RENAME(NoopNormalizer2)
-#define Norm2AllModes U_ICU_ENTRY_POINT_RENAME(Norm2AllModes)
-#define NormalizationTransliterator U_ICU_ENTRY_POINT_RENAME(NormalizationTransliterator)
-#define Normalizer U_ICU_ENTRY_POINT_RENAME(Normalizer)
-#define Normalizer2 U_ICU_ENTRY_POINT_RENAME(Normalizer2)
-#define Normalizer2Factory U_ICU_ENTRY_POINT_RENAME(Normalizer2Factory)
-#define Normalizer2Impl U_ICU_ENTRY_POINT_RENAME(Normalizer2Impl)
-#define Normalizer2WithImpl U_ICU_ENTRY_POINT_RENAME(Normalizer2WithImpl)
-#define NullSubstitution U_ICU_ENTRY_POINT_RENAME(NullSubstitution)
-#define NullTransliterator U_ICU_ENTRY_POINT_RENAME(NullTransliterator)
-#define NumberFormat U_ICU_ENTRY_POINT_RENAME(NumberFormat)
-#define NumberFormatFactory U_ICU_ENTRY_POINT_RENAME(NumberFormatFactory)
-#define NumberingSystem U_ICU_ENTRY_POINT_RENAME(NumberingSystem)
-#define NumeratorSubstitution U_ICU_ENTRY_POINT_RENAME(NumeratorSubstitution)
-#define OlsonTimeZone U_ICU_ENTRY_POINT_RENAME(OlsonTimeZone)
-#define OpenTypeLayoutEngine U_ICU_ENTRY_POINT_RENAME(OpenTypeLayoutEngine)
-#define OpenTypeUtilities U_ICU_ENTRY_POINT_RENAME(OpenTypeUtilities)
-#define OrConstraint U_ICU_ENTRY_POINT_RENAME(OrConstraint)
-#define PCEBuffer U_ICU_ENTRY_POINT_RENAME(PCEBuffer)
-#define PairPositioningFormat1Subtable U_ICU_ENTRY_POINT_RENAME(PairPositioningFormat1Subtable)
-#define PairPositioningFormat2Subtable U_ICU_ENTRY_POINT_RENAME(PairPositioningFormat2Subtable)
-#define PairPositioningSubtable U_ICU_ENTRY_POINT_RENAME(PairPositioningSubtable)
-#define ParagraphLayout U_ICU_ENTRY_POINT_RENAME(ParagraphLayout)
-#define ParseData U_ICU_ENTRY_POINT_RENAME(ParseData)
-#define ParsePosition U_ICU_ENTRY_POINT_RENAME(ParsePosition)
-#define PatternMap U_ICU_ENTRY_POINT_RENAME(PatternMap)
-#define PatternMapIterator U_ICU_ENTRY_POINT_RENAME(PatternMapIterator)
-#define PersianCalendar U_ICU_ENTRY_POINT_RENAME(PersianCalendar)
-#define PluralFormat U_ICU_ENTRY_POINT_RENAME(PluralFormat)
-#define PluralKeywordEnumeration U_ICU_ENTRY_POINT_RENAME(PluralKeywordEnumeration)
-#define PluralRules U_ICU_ENTRY_POINT_RENAME(PluralRules)
-#define PropertyAliases U_ICU_ENTRY_POINT_RENAME(PropertyAliases)
-#define PtnElem U_ICU_ENTRY_POINT_RENAME(PtnElem)
-#define PtnSkeleton U_ICU_ENTRY_POINT_RENAME(PtnSkeleton)
-#define Quantifier U_ICU_ENTRY_POINT_RENAME(Quantifier)
-#define RBBIDataWrapper U_ICU_ENTRY_POINT_RENAME(RBBIDataWrapper)
-#define RBBINode U_ICU_ENTRY_POINT_RENAME(RBBINode)
-#define RBBIRuleBuilder U_ICU_ENTRY_POINT_RENAME(RBBIRuleBuilder)
-#define RBBIRuleScanner U_ICU_ENTRY_POINT_RENAME(RBBIRuleScanner)
-#define RBBISetBuilder U_ICU_ENTRY_POINT_RENAME(RBBISetBuilder)
-#define RBBIStateDescriptor U_ICU_ENTRY_POINT_RENAME(RBBIStateDescriptor)
-#define RBBISymbolTable U_ICU_ENTRY_POINT_RENAME(RBBISymbolTable)
-#define RBBISymbolTableEntry U_ICU_ENTRY_POINT_RENAME(RBBISymbolTableEntry)
-#define RBBITableBuilder U_ICU_ENTRY_POINT_RENAME(RBBITableBuilder)
-#define RCEBuffer U_ICU_ENTRY_POINT_RENAME(RCEBuffer)
-#define RangeDescriptor U_ICU_ENTRY_POINT_RENAME(RangeDescriptor)
-#define RegexCompile U_ICU_ENTRY_POINT_RENAME(RegexCompile)
-#define RegexMatcher U_ICU_ENTRY_POINT_RENAME(RegexMatcher)
-#define RegexPattern U_ICU_ENTRY_POINT_RENAME(RegexPattern)
-#define RegexStaticSets U_ICU_ENTRY_POINT_RENAME(RegexStaticSets)
-#define RegularExpression U_ICU_ENTRY_POINT_RENAME(RegularExpression)
-#define RelativeDateFormat U_ICU_ENTRY_POINT_RENAME(RelativeDateFormat)
-#define RemoveTransliterator U_ICU_ENTRY_POINT_RENAME(RemoveTransliterator)
-#define ReorderingBuffer U_ICU_ENTRY_POINT_RENAME(ReorderingBuffer)
-#define Replaceable U_ICU_ENTRY_POINT_RENAME(Replaceable)
-#define ReplaceableGlue U_ICU_ENTRY_POINT_RENAME(ReplaceableGlue)
-#define ResourceBundle U_ICU_ENTRY_POINT_RENAME(ResourceBundle)
-#define RiseSetCoordFunc U_ICU_ENTRY_POINT_RENAME(RiseSetCoordFunc)
-#define RuleBasedBreakIterator U_ICU_ENTRY_POINT_RENAME(RuleBasedBreakIterator)
-#define RuleBasedCollator U_ICU_ENTRY_POINT_RENAME(RuleBasedCollator)
-#define RuleBasedNumberFormat U_ICU_ENTRY_POINT_RENAME(RuleBasedNumberFormat)
-#define RuleBasedTimeZone U_ICU_ENTRY_POINT_RENAME(RuleBasedTimeZone)
-#define RuleBasedTransliterator U_ICU_ENTRY_POINT_RENAME(RuleBasedTransliterator)
-#define RuleChain U_ICU_ENTRY_POINT_RENAME(RuleChain)
-#define RuleCharacterIterator U_ICU_ENTRY_POINT_RENAME(RuleCharacterIterator)
-#define RuleHalf U_ICU_ENTRY_POINT_RENAME(RuleHalf)
-#define RuleParser U_ICU_ENTRY_POINT_RENAME(RuleParser)
-#define RunArray U_ICU_ENTRY_POINT_RENAME(RunArray)
-#define SPUString U_ICU_ENTRY_POINT_RENAME(SPUString)
-#define SPUStringPool U_ICU_ENTRY_POINT_RENAME(SPUStringPool)
-#define SafeZoneStringFormatPtr U_ICU_ENTRY_POINT_RENAME(SafeZoneStringFormatPtr)
-#define SameValueSubstitution U_ICU_ENTRY_POINT_RENAME(SameValueSubstitution)
-#define ScriptListTable U_ICU_ENTRY_POINT_RENAME(ScriptListTable)
-#define ScriptRunIterator U_ICU_ENTRY_POINT_RENAME(ScriptRunIterator)
-#define ScriptSet U_ICU_ENTRY_POINT_RENAME(ScriptSet)
-#define ScriptTable U_ICU_ENTRY_POINT_RENAME(ScriptTable)
-#define SearchIterator U_ICU_ENTRY_POINT_RENAME(SearchIterator)
-#define SegmentArrayProcessor U_ICU_ENTRY_POINT_RENAME(SegmentArrayProcessor)
-#define SegmentSingleProcessor U_ICU_ENTRY_POINT_RENAME(SegmentSingleProcessor)
-#define SelectFormat U_ICU_ENTRY_POINT_RENAME(SelectFormat)
-#define ServiceEnumeration U_ICU_ENTRY_POINT_RENAME(ServiceEnumeration)
-#define ServiceListener U_ICU_ENTRY_POINT_RENAME(ServiceListener)
-#define SimpleArrayProcessor U_ICU_ENTRY_POINT_RENAME(SimpleArrayProcessor)
-#define SimpleDateFormat U_ICU_ENTRY_POINT_RENAME(SimpleDateFormat)
-#define SimpleFactory U_ICU_ENTRY_POINT_RENAME(SimpleFactory)
-#define SimpleLocaleKeyFactory U_ICU_ENTRY_POINT_RENAME(SimpleLocaleKeyFactory)
-#define SimpleNumberFormatFactory U_ICU_ENTRY_POINT_RENAME(SimpleNumberFormatFactory)
-#define SimpleSingleton U_ICU_ENTRY_POINT_RENAME(SimpleSingleton)
-#define SimpleTimeZone U_ICU_ENTRY_POINT_RENAME(SimpleTimeZone)
-#define SinglePositioningFormat1Subtable U_ICU_ENTRY_POINT_RENAME(SinglePositioningFormat1Subtable)
-#define SinglePositioningFormat2Subtable U_ICU_ENTRY_POINT_RENAME(SinglePositioningFormat2Subtable)
-#define SinglePositioningSubtable U_ICU_ENTRY_POINT_RENAME(SinglePositioningSubtable)
-#define SingleSubstitutionFormat1Subtable U_ICU_ENTRY_POINT_RENAME(SingleSubstitutionFormat1Subtable)
-#define SingleSubstitutionFormat2Subtable U_ICU_ENTRY_POINT_RENAME(SingleSubstitutionFormat2Subtable)
-#define SingleSubstitutionSubtable U_ICU_ENTRY_POINT_RENAME(SingleSubstitutionSubtable)
-#define SingleTableProcessor U_ICU_ENTRY_POINT_RENAME(SingleTableProcessor)
-#define SpoofData U_ICU_ENTRY_POINT_RENAME(SpoofData)
-#define SpoofImpl U_ICU_ENTRY_POINT_RENAME(SpoofImpl)
-#define StateTableProcessor U_ICU_ENTRY_POINT_RENAME(StateTableProcessor)
-#define StringCharacterIterator U_ICU_ENTRY_POINT_RENAME(StringCharacterIterator)
-#define StringEnumeration U_ICU_ENTRY_POINT_RENAME(StringEnumeration)
-#define StringList U_ICU_ENTRY_POINT_RENAME(StringList)
-#define StringLocalizationInfo U_ICU_ENTRY_POINT_RENAME(StringLocalizationInfo)
-#define StringMatcher U_ICU_ENTRY_POINT_RENAME(StringMatcher)
-#define StringPair U_ICU_ENTRY_POINT_RENAME(StringPair)
-#define StringPiece U_ICU_ENTRY_POINT_RENAME(StringPiece)
-#define StringReplacer U_ICU_ENTRY_POINT_RENAME(StringReplacer)
-#define StringSearch U_ICU_ENTRY_POINT_RENAME(StringSearch)
-#define StringToCEsMap U_ICU_ENTRY_POINT_RENAME(StringToCEsMap)
-#define StyleRuns U_ICU_ENTRY_POINT_RENAME(StyleRuns)
-#define SubstitutionLookup U_ICU_ENTRY_POINT_RENAME(SubstitutionLookup)
-#define SubtableProcessor U_ICU_ENTRY_POINT_RENAME(SubtableProcessor)
-#define SunTimeAngleFunc U_ICU_ENTRY_POINT_RENAME(SunTimeAngleFunc)
-#define SymbolTable U_ICU_ENTRY_POINT_RENAME(SymbolTable)
-#define TZEnumeration U_ICU_ENTRY_POINT_RENAME(TZEnumeration)
-#define TaiwanCalendar U_ICU_ENTRY_POINT_RENAME(TaiwanCalendar)
-#define Target U_ICU_ENTRY_POINT_RENAME(Target)
-#define TernaryNode U_ICU_ENTRY_POINT_RENAME(TernaryNode)
-#define TextTrieMap U_ICU_ENTRY_POINT_RENAME(TextTrieMap)
-#define TextTrieMapSearchResultHandler U_ICU_ENTRY_POINT_RENAME(TextTrieMapSearchResultHandler)
-#define ThaiBreakEngine U_ICU_ENTRY_POINT_RENAME(ThaiBreakEngine)
-#define ThaiLayoutEngine U_ICU_ENTRY_POINT_RENAME(ThaiLayoutEngine)
-#define ThaiShaping U_ICU_ENTRY_POINT_RENAME(ThaiShaping)
-#define TibetanClassTable U_ICU_ENTRY_POINT_RENAME(TibetanClassTable)
-#define TibetanOpenTypeLayoutEngine U_ICU_ENTRY_POINT_RENAME(TibetanOpenTypeLayoutEngine)
-#define TibetanReordering U_ICU_ENTRY_POINT_RENAME(TibetanReordering)
-#define TimeArrayTimeZoneRule U_ICU_ENTRY_POINT_RENAME(TimeArrayTimeZoneRule)
-#define TimeUnit U_ICU_ENTRY_POINT_RENAME(TimeUnit)
-#define TimeUnitAmount U_ICU_ENTRY_POINT_RENAME(TimeUnitAmount)
-#define TimeUnitFormat U_ICU_ENTRY_POINT_RENAME(TimeUnitFormat)
-#define TimeZone U_ICU_ENTRY_POINT_RENAME(TimeZone)
-#define TimeZoneRule U_ICU_ENTRY_POINT_RENAME(TimeZoneRule)
-#define TimeZoneTransition U_ICU_ENTRY_POINT_RENAME(TimeZoneTransition)
-#define TitlecaseTransliterator U_ICU_ENTRY_POINT_RENAME(TitlecaseTransliterator)
-#define TransliterationRule U_ICU_ENTRY_POINT_RENAME(TransliterationRule)
-#define TransliterationRuleData U_ICU_ENTRY_POINT_RENAME(TransliterationRuleData)
-#define TransliterationRuleSet U_ICU_ENTRY_POINT_RENAME(TransliterationRuleSet)
-#define Transliterator U_ICU_ENTRY_POINT_RENAME(Transliterator)
-#define TransliteratorAlias U_ICU_ENTRY_POINT_RENAME(TransliteratorAlias)
-#define TransliteratorEntry U_ICU_ENTRY_POINT_RENAME(TransliteratorEntry)
-#define TransliteratorIDParser U_ICU_ENTRY_POINT_RENAME(TransliteratorIDParser)
-#define TransliteratorParser U_ICU_ENTRY_POINT_RENAME(TransliteratorParser)
-#define TransliteratorRegistry U_ICU_ENTRY_POINT_RENAME(TransliteratorRegistry)
-#define TransliteratorSpec U_ICU_ENTRY_POINT_RENAME(TransliteratorSpec)
-#define TriStateSingleton U_ICU_ENTRY_POINT_RENAME(TriStateSingleton)
-#define TrieWordDictionary U_ICU_ENTRY_POINT_RENAME(TrieWordDictionary)
-#define TrimmedArrayProcessor U_ICU_ENTRY_POINT_RENAME(TrimmedArrayProcessor)
-#define UCharCharacterIterator U_ICU_ENTRY_POINT_RENAME(UCharCharacterIterator)
-#define UCollationPCE U_ICU_ENTRY_POINT_RENAME(UCollationPCE)
-#define UDataPathIterator U_ICU_ENTRY_POINT_RENAME(UDataPathIterator)
-#define ULocRuns U_ICU_ENTRY_POINT_RENAME(ULocRuns)
-#define UMemory U_ICU_ENTRY_POINT_RENAME(UMemory)
-#define UObject U_ICU_ENTRY_POINT_RENAME(UObject)
-#define UStack U_ICU_ENTRY_POINT_RENAME(UStack)
-#define UStringEnumeration U_ICU_ENTRY_POINT_RENAME(UStringEnumeration)
-#define UTS46 U_ICU_ENTRY_POINT_RENAME(UTS46)
-#define UTrie2Singleton U_ICU_ENTRY_POINT_RENAME(UTrie2Singleton)
-#define UVector U_ICU_ENTRY_POINT_RENAME(UVector)
-#define UVector32 U_ICU_ENTRY_POINT_RENAME(UVector32)
-#define UVector64 U_ICU_ENTRY_POINT_RENAME(UVector64)
-#define UnescapeTransliterator U_ICU_ENTRY_POINT_RENAME(UnescapeTransliterator)
-#define UnhandledEngine U_ICU_ENTRY_POINT_RENAME(UnhandledEngine)
-#define UnicodeArabicOpenTypeLayoutEngine U_ICU_ENTRY_POINT_RENAME(UnicodeArabicOpenTypeLayoutEngine)
-#define UnicodeFilter U_ICU_ENTRY_POINT_RENAME(UnicodeFilter)
-#define UnicodeFunctor U_ICU_ENTRY_POINT_RENAME(UnicodeFunctor)
-#define UnicodeMatcher U_ICU_ENTRY_POINT_RENAME(UnicodeMatcher)
-#define UnicodeNameTransliterator U_ICU_ENTRY_POINT_RENAME(UnicodeNameTransliterator)
-#define UnicodeReplacer U_ICU_ENTRY_POINT_RENAME(UnicodeReplacer)
-#define UnicodeSet U_ICU_ENTRY_POINT_RENAME(UnicodeSet)
-#define UnicodeSetIterator U_ICU_ENTRY_POINT_RENAME(UnicodeSetIterator)
-#define UnicodeSetStringSpan U_ICU_ENTRY_POINT_RENAME(UnicodeSetStringSpan)
-#define UnicodeString U_ICU_ENTRY_POINT_RENAME(UnicodeString)
-#define UppercaseTransliterator U_ICU_ENTRY_POINT_RENAME(UppercaseTransliterator)
-#define VTZReader U_ICU_ENTRY_POINT_RENAME(VTZReader)
-#define VTZWriter U_ICU_ENTRY_POINT_RENAME(VTZWriter)
-#define VTimeZone U_ICU_ENTRY_POINT_RENAME(VTimeZone)
-#define ValueRecord U_ICU_ENTRY_POINT_RENAME(ValueRecord)
-#define ValueRuns U_ICU_ENTRY_POINT_RENAME(ValueRuns)
-#define ZSFCache U_ICU_ENTRY_POINT_RENAME(ZSFCache)
-#define ZSFCacheEntry U_ICU_ENTRY_POINT_RENAME(ZSFCacheEntry)
-#define ZSFStringPool U_ICU_ENTRY_POINT_RENAME(ZSFStringPool)
-#define ZSFStringPoolChunk U_ICU_ENTRY_POINT_RENAME(ZSFStringPoolChunk)
-#define ZoneMeta U_ICU_ENTRY_POINT_RENAME(ZoneMeta)
-#define ZoneStringFormat U_ICU_ENTRY_POINT_RENAME(ZoneStringFormat)
-#define ZoneStringInfo U_ICU_ENTRY_POINT_RENAME(ZoneStringInfo)
-#define ZoneStringSearchResultHandler U_ICU_ENTRY_POINT_RENAME(ZoneStringSearchResultHandler)
-#define ZoneStrings U_ICU_ENTRY_POINT_RENAME(ZoneStrings)
-#define collIterate U_ICU_ENTRY_POINT_RENAME(collIterate)
-#define locale_set_default_internal U_ICU_ENTRY_POINT_RENAME(locale_set_default_internal)
-#define util64_fromDouble U_ICU_ENTRY_POINT_RENAME(util64_fromDouble)
-#define util64_pow U_ICU_ENTRY_POINT_RENAME(util64_pow)
-#define util64_tou U_ICU_ENTRY_POINT_RENAME(util64_tou)
-
-#endif
-#endif
-
#endif
#endif
diff --git a/Source/WebCore/icu/unicode/uscript.h b/Source/WebCore/icu/unicode/uscript.h
index ee21c740d..57255c4f9 100644
--- a/Source/WebCore/icu/unicode/uscript.h
+++ b/Source/WebCore/icu/unicode/uscript.h
@@ -1,6 +1,6 @@
/*
**********************************************************************
- * Copyright (C) 1997-2010, International Business Machines
+ * Copyright (C) 1997-2013, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*
@@ -44,182 +44,369 @@
* @stable ICU 2.2
*/
typedef enum UScriptCode {
+ /*
+ * Note: UScriptCode constants and their ISO script code comments
+ * are parsed by preparseucd.py.
+ * It matches lines like
+ * USCRIPT_<Unicode Script value name> = <integer>, / * <ISO script code> * /
+ */
+
+ /** @stable ICU 2.2 */
USCRIPT_INVALID_CODE = -1,
+ /** @stable ICU 2.2 */
USCRIPT_COMMON = 0, /* Zyyy */
+ /** @stable ICU 2.2 */
USCRIPT_INHERITED = 1, /* Zinh */ /* "Code for inherited script", for non-spacing combining marks; also Qaai */
+ /** @stable ICU 2.2 */
USCRIPT_ARABIC = 2, /* Arab */
+ /** @stable ICU 2.2 */
USCRIPT_ARMENIAN = 3, /* Armn */
+ /** @stable ICU 2.2 */
USCRIPT_BENGALI = 4, /* Beng */
+ /** @stable ICU 2.2 */
USCRIPT_BOPOMOFO = 5, /* Bopo */
+ /** @stable ICU 2.2 */
USCRIPT_CHEROKEE = 6, /* Cher */
+ /** @stable ICU 2.2 */
USCRIPT_COPTIC = 7, /* Copt */
+ /** @stable ICU 2.2 */
USCRIPT_CYRILLIC = 8, /* Cyrl */
+ /** @stable ICU 2.2 */
USCRIPT_DESERET = 9, /* Dsrt */
+ /** @stable ICU 2.2 */
USCRIPT_DEVANAGARI = 10, /* Deva */
+ /** @stable ICU 2.2 */
USCRIPT_ETHIOPIC = 11, /* Ethi */
+ /** @stable ICU 2.2 */
USCRIPT_GEORGIAN = 12, /* Geor */
+ /** @stable ICU 2.2 */
USCRIPT_GOTHIC = 13, /* Goth */
+ /** @stable ICU 2.2 */
USCRIPT_GREEK = 14, /* Grek */
+ /** @stable ICU 2.2 */
USCRIPT_GUJARATI = 15, /* Gujr */
+ /** @stable ICU 2.2 */
USCRIPT_GURMUKHI = 16, /* Guru */
+ /** @stable ICU 2.2 */
USCRIPT_HAN = 17, /* Hani */
+ /** @stable ICU 2.2 */
USCRIPT_HANGUL = 18, /* Hang */
+ /** @stable ICU 2.2 */
USCRIPT_HEBREW = 19, /* Hebr */
+ /** @stable ICU 2.2 */
USCRIPT_HIRAGANA = 20, /* Hira */
+ /** @stable ICU 2.2 */
USCRIPT_KANNADA = 21, /* Knda */
+ /** @stable ICU 2.2 */
USCRIPT_KATAKANA = 22, /* Kana */
+ /** @stable ICU 2.2 */
USCRIPT_KHMER = 23, /* Khmr */
+ /** @stable ICU 2.2 */
USCRIPT_LAO = 24, /* Laoo */
+ /** @stable ICU 2.2 */
USCRIPT_LATIN = 25, /* Latn */
+ /** @stable ICU 2.2 */
USCRIPT_MALAYALAM = 26, /* Mlym */
+ /** @stable ICU 2.2 */
USCRIPT_MONGOLIAN = 27, /* Mong */
+ /** @stable ICU 2.2 */
USCRIPT_MYANMAR = 28, /* Mymr */
+ /** @stable ICU 2.2 */
USCRIPT_OGHAM = 29, /* Ogam */
+ /** @stable ICU 2.2 */
USCRIPT_OLD_ITALIC = 30, /* Ital */
+ /** @stable ICU 2.2 */
USCRIPT_ORIYA = 31, /* Orya */
+ /** @stable ICU 2.2 */
USCRIPT_RUNIC = 32, /* Runr */
+ /** @stable ICU 2.2 */
USCRIPT_SINHALA = 33, /* Sinh */
+ /** @stable ICU 2.2 */
USCRIPT_SYRIAC = 34, /* Syrc */
+ /** @stable ICU 2.2 */
USCRIPT_TAMIL = 35, /* Taml */
+ /** @stable ICU 2.2 */
USCRIPT_TELUGU = 36, /* Telu */
+ /** @stable ICU 2.2 */
USCRIPT_THAANA = 37, /* Thaa */
+ /** @stable ICU 2.2 */
USCRIPT_THAI = 38, /* Thai */
+ /** @stable ICU 2.2 */
USCRIPT_TIBETAN = 39, /* Tibt */
/** Canadian_Aboriginal script. @stable ICU 2.6 */
USCRIPT_CANADIAN_ABORIGINAL = 40, /* Cans */
/** Canadian_Aboriginal script (alias). @stable ICU 2.2 */
USCRIPT_UCAS = USCRIPT_CANADIAN_ABORIGINAL,
+ /** @stable ICU 2.2 */
USCRIPT_YI = 41, /* Yiii */
+ /* New scripts in Unicode 3.2 */
+ /** @stable ICU 2.2 */
USCRIPT_TAGALOG = 42, /* Tglg */
+ /** @stable ICU 2.2 */
USCRIPT_HANUNOO = 43, /* Hano */
+ /** @stable ICU 2.2 */
USCRIPT_BUHID = 44, /* Buhd */
+ /** @stable ICU 2.2 */
USCRIPT_TAGBANWA = 45, /* Tagb */
- /* New scripts in Unicode 4 @stable ICU 2.6 */
+ /* New scripts in Unicode 4 */
+ /** @stable ICU 2.6 */
USCRIPT_BRAILLE = 46, /* Brai */
+ /** @stable ICU 2.6 */
USCRIPT_CYPRIOT = 47, /* Cprt */
+ /** @stable ICU 2.6 */
USCRIPT_LIMBU = 48, /* Limb */
+ /** @stable ICU 2.6 */
USCRIPT_LINEAR_B = 49, /* Linb */
+ /** @stable ICU 2.6 */
USCRIPT_OSMANYA = 50, /* Osma */
+ /** @stable ICU 2.6 */
USCRIPT_SHAVIAN = 51, /* Shaw */
+ /** @stable ICU 2.6 */
USCRIPT_TAI_LE = 52, /* Tale */
+ /** @stable ICU 2.6 */
USCRIPT_UGARITIC = 53, /* Ugar */
/** New script code in Unicode 4.0.1 @stable ICU 3.0 */
USCRIPT_KATAKANA_OR_HIRAGANA = 54,/*Hrkt */
- /* New scripts in Unicode 4.1 @stable ICU 3.4 */
+ /* New scripts in Unicode 4.1 */
+ /** @stable ICU 3.4 */
USCRIPT_BUGINESE = 55, /* Bugi */
+ /** @stable ICU 3.4 */
USCRIPT_GLAGOLITIC = 56, /* Glag */
+ /** @stable ICU 3.4 */
USCRIPT_KHAROSHTHI = 57, /* Khar */
+ /** @stable ICU 3.4 */
USCRIPT_SYLOTI_NAGRI = 58, /* Sylo */
+ /** @stable ICU 3.4 */
USCRIPT_NEW_TAI_LUE = 59, /* Talu */
+ /** @stable ICU 3.4 */
USCRIPT_TIFINAGH = 60, /* Tfng */
+ /** @stable ICU 3.4 */
USCRIPT_OLD_PERSIAN = 61, /* Xpeo */
- /* New script codes from ISO 15924 @stable ICU 3.6 */
+ /* New script codes from ISO 15924 */
+ /** @stable ICU 3.6 */
USCRIPT_BALINESE = 62, /* Bali */
+ /** @stable ICU 3.6 */
USCRIPT_BATAK = 63, /* Batk */
+ /** @stable ICU 3.6 */
USCRIPT_BLISSYMBOLS = 64, /* Blis */
+ /** @stable ICU 3.6 */
USCRIPT_BRAHMI = 65, /* Brah */
+ /** @stable ICU 3.6 */
USCRIPT_CHAM = 66, /* Cham */
+ /** @stable ICU 3.6 */
USCRIPT_CIRTH = 67, /* Cirt */
+ /** @stable ICU 3.6 */
USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC = 68, /* Cyrs */
+ /** @stable ICU 3.6 */
USCRIPT_DEMOTIC_EGYPTIAN = 69, /* Egyd */
+ /** @stable ICU 3.6 */
USCRIPT_HIERATIC_EGYPTIAN = 70, /* Egyh */
+ /** @stable ICU 3.6 */
USCRIPT_EGYPTIAN_HIEROGLYPHS = 71, /* Egyp */
+ /** @stable ICU 3.6 */
USCRIPT_KHUTSURI = 72, /* Geok */
+ /** @stable ICU 3.6 */
USCRIPT_SIMPLIFIED_HAN = 73, /* Hans */
+ /** @stable ICU 3.6 */
USCRIPT_TRADITIONAL_HAN = 74, /* Hant */
+ /** @stable ICU 3.6 */
USCRIPT_PAHAWH_HMONG = 75, /* Hmng */
+ /** @stable ICU 3.6 */
USCRIPT_OLD_HUNGARIAN = 76, /* Hung */
+ /** @stable ICU 3.6 */
USCRIPT_HARAPPAN_INDUS = 77, /* Inds */
+ /** @stable ICU 3.6 */
USCRIPT_JAVANESE = 78, /* Java */
+ /** @stable ICU 3.6 */
USCRIPT_KAYAH_LI = 79, /* Kali */
+ /** @stable ICU 3.6 */
USCRIPT_LATIN_FRAKTUR = 80, /* Latf */
+ /** @stable ICU 3.6 */
USCRIPT_LATIN_GAELIC = 81, /* Latg */
+ /** @stable ICU 3.6 */
USCRIPT_LEPCHA = 82, /* Lepc */
+ /** @stable ICU 3.6 */
USCRIPT_LINEAR_A = 83, /* Lina */
/** @stable ICU 4.6 */
USCRIPT_MANDAIC = 84, /* Mand */
/** @stable ICU 3.6 */
USCRIPT_MANDAEAN = USCRIPT_MANDAIC,
+ /** @stable ICU 3.6 */
USCRIPT_MAYAN_HIEROGLYPHS = 85, /* Maya */
/** @stable ICU 4.6 */
USCRIPT_MEROITIC_HIEROGLYPHS = 86, /* Mero */
/** @stable ICU 3.6 */
USCRIPT_MEROITIC = USCRIPT_MEROITIC_HIEROGLYPHS,
+ /** @stable ICU 3.6 */
USCRIPT_NKO = 87, /* Nkoo */
+ /** @stable ICU 3.6 */
USCRIPT_ORKHON = 88, /* Orkh */
+ /** @stable ICU 3.6 */
USCRIPT_OLD_PERMIC = 89, /* Perm */
+ /** @stable ICU 3.6 */
USCRIPT_PHAGS_PA = 90, /* Phag */
+ /** @stable ICU 3.6 */
USCRIPT_PHOENICIAN = 91, /* Phnx */
- USCRIPT_PHONETIC_POLLARD = 92, /* Plrd */
+ /** @stable ICU 52 */
+ USCRIPT_MIAO = 92, /* Plrd */
+ /** @stable ICU 3.6 */
+ USCRIPT_PHONETIC_POLLARD = USCRIPT_MIAO,
+ /** @stable ICU 3.6 */
USCRIPT_RONGORONGO = 93, /* Roro */
+ /** @stable ICU 3.6 */
USCRIPT_SARATI = 94, /* Sara */
+ /** @stable ICU 3.6 */
USCRIPT_ESTRANGELO_SYRIAC = 95, /* Syre */
+ /** @stable ICU 3.6 */
USCRIPT_WESTERN_SYRIAC = 96, /* Syrj */
+ /** @stable ICU 3.6 */
USCRIPT_EASTERN_SYRIAC = 97, /* Syrn */
+ /** @stable ICU 3.6 */
USCRIPT_TENGWAR = 98, /* Teng */
+ /** @stable ICU 3.6 */
USCRIPT_VAI = 99, /* Vaii */
+ /** @stable ICU 3.6 */
USCRIPT_VISIBLE_SPEECH = 100,/* Visp */
+ /** @stable ICU 3.6 */
USCRIPT_CUNEIFORM = 101,/* Xsux */
+ /** @stable ICU 3.6 */
USCRIPT_UNWRITTEN_LANGUAGES = 102,/* Zxxx */
+ /** @stable ICU 3.6 */
USCRIPT_UNKNOWN = 103,/* Zzzz */ /* Unknown="Code for uncoded script", for unassigned code points */
- /* New script codes from ISO 15924 @stable ICU 3.8 */
+ /* New script codes from ISO 15924 */
+ /** @stable ICU 3.8 */
USCRIPT_CARIAN = 104,/* Cari */
+ /** @stable ICU 3.8 */
USCRIPT_JAPANESE = 105,/* Jpan */
+ /** @stable ICU 3.8 */
USCRIPT_LANNA = 106,/* Lana */
+ /** @stable ICU 3.8 */
USCRIPT_LYCIAN = 107,/* Lyci */
+ /** @stable ICU 3.8 */
USCRIPT_LYDIAN = 108,/* Lydi */
+ /** @stable ICU 3.8 */
USCRIPT_OL_CHIKI = 109,/* Olck */
+ /** @stable ICU 3.8 */
USCRIPT_REJANG = 110,/* Rjng */
+ /** @stable ICU 3.8 */
USCRIPT_SAURASHTRA = 111,/* Saur */
+ /** @stable ICU 3.8 */
USCRIPT_SIGN_WRITING = 112,/* Sgnw */
+ /** @stable ICU 3.8 */
USCRIPT_SUNDANESE = 113,/* Sund */
+ /** @stable ICU 3.8 */
USCRIPT_MOON = 114,/* Moon */
+ /** @stable ICU 3.8 */
USCRIPT_MEITEI_MAYEK = 115,/* Mtei */
- /* New script codes from ISO 15924 @stable ICU 4.0 */
+ /* New script codes from ISO 15924 */
+ /** @stable ICU 4.0 */
USCRIPT_IMPERIAL_ARAMAIC = 116,/* Armi */
+ /** @stable ICU 4.0 */
USCRIPT_AVESTAN = 117,/* Avst */
+ /** @stable ICU 4.0 */
USCRIPT_CHAKMA = 118,/* Cakm */
+ /** @stable ICU 4.0 */
USCRIPT_KOREAN = 119,/* Kore */
+ /** @stable ICU 4.0 */
USCRIPT_KAITHI = 120,/* Kthi */
+ /** @stable ICU 4.0 */
USCRIPT_MANICHAEAN = 121,/* Mani */
+ /** @stable ICU 4.0 */
USCRIPT_INSCRIPTIONAL_PAHLAVI = 122,/* Phli */
+ /** @stable ICU 4.0 */
USCRIPT_PSALTER_PAHLAVI = 123,/* Phlp */
+ /** @stable ICU 4.0 */
USCRIPT_BOOK_PAHLAVI = 124,/* Phlv */
+ /** @stable ICU 4.0 */
USCRIPT_INSCRIPTIONAL_PARTHIAN = 125,/* Prti */
+ /** @stable ICU 4.0 */
USCRIPT_SAMARITAN = 126,/* Samr */
+ /** @stable ICU 4.0 */
USCRIPT_TAI_VIET = 127,/* Tavt */
+ /** @stable ICU 4.0 */
USCRIPT_MATHEMATICAL_NOTATION = 128,/* Zmth */
+ /** @stable ICU 4.0 */
USCRIPT_SYMBOLS = 129,/* Zsym */
- /* New script codes from ISO 15924 @stable ICU 4.4 */
+ /* New script codes from ISO 15924 */
+ /** @stable ICU 4.4 */
USCRIPT_BAMUM = 130,/* Bamu */
+ /** @stable ICU 4.4 */
USCRIPT_LISU = 131,/* Lisu */
+ /** @stable ICU 4.4 */
USCRIPT_NAKHI_GEBA = 132,/* Nkgb */
+ /** @stable ICU 4.4 */
USCRIPT_OLD_SOUTH_ARABIAN = 133,/* Sarb */
- /* New script codes from ISO 15924 @stable ICU 4.6 */
+ /* New script codes from ISO 15924 */
+ /** @stable ICU 4.6 */
USCRIPT_BASSA_VAH = 134,/* Bass */
+ /** @stable ICU 4.6 */
USCRIPT_DUPLOYAN_SHORTAND = 135,/* Dupl */
+ /** @stable ICU 4.6 */
USCRIPT_ELBASAN = 136,/* Elba */
+ /** @stable ICU 4.6 */
USCRIPT_GRANTHA = 137,/* Gran */
+ /** @stable ICU 4.6 */
USCRIPT_KPELLE = 138,/* Kpel */
+ /** @stable ICU 4.6 */
USCRIPT_LOMA = 139,/* Loma */
+ /** @stable ICU 4.6 */
USCRIPT_MENDE = 140,/* Mend */
+ /** @stable ICU 4.6 */
USCRIPT_MEROITIC_CURSIVE = 141,/* Merc */
+ /** @stable ICU 4.6 */
USCRIPT_OLD_NORTH_ARABIAN = 142,/* Narb */
+ /** @stable ICU 4.6 */
USCRIPT_NABATAEAN = 143,/* Nbat */
+ /** @stable ICU 4.6 */
USCRIPT_PALMYRENE = 144,/* Palm */
+ /** @stable ICU 4.6 */
USCRIPT_SINDHI = 145,/* Sind */
+ /** @stable ICU 4.6 */
USCRIPT_WARANG_CITI = 146,/* Wara */
+ /** @stable ICU 4.8 */
+ USCRIPT_AFAKA = 147,/* Afak */
+ /** @stable ICU 4.8 */
+ USCRIPT_JURCHEN = 148,/* Jurc */
+ /** @stable ICU 4.8 */
+ USCRIPT_MRO = 149,/* Mroo */
+ /** @stable ICU 4.8 */
+ USCRIPT_NUSHU = 150,/* Nshu */
+ /** @stable ICU 4.8 */
+ USCRIPT_SHARADA = 151,/* Shrd */
+ /** @stable ICU 4.8 */
+ USCRIPT_SORA_SOMPENG = 152,/* Sora */
+ /** @stable ICU 4.8 */
+ USCRIPT_TAKRI = 153,/* Takr */
+ /** @stable ICU 4.8 */
+ USCRIPT_TANGUT = 154,/* Tang */
+ /** @stable ICU 4.8 */
+ USCRIPT_WOLEAI = 155,/* Wole */
+
+ /** @stable ICU 49 */
+ USCRIPT_ANATOLIAN_HIEROGLYPHS = 156,/* Hluw */
+ /** @stable ICU 49 */
+ USCRIPT_KHOJKI = 157,/* Khoj */
+ /** @stable ICU 49 */
+ USCRIPT_TIRHUTA = 158,/* Tirh */
+
+ /** @stable ICU 52 */
+ USCRIPT_CAUCASIAN_ALBANIAN = 159,/* Aghb */
+ /** @stable ICU 52 */
+ USCRIPT_MAHAJANI = 160,/* Mahj */
+
/* Private use codes from Qaaa - Qabx are not supported */
- USCRIPT_CODE_LIMIT = 147
+
+ /** @stable ICU 2.2 */
+ USCRIPT_CODE_LIMIT = 161
} UScriptCode;
/**
@@ -277,9 +464,9 @@ U_STABLE UScriptCode U_EXPORT2
uscript_getScript(UChar32 codepoint, UErrorCode *err);
/**
- * Is code point c used in script sc?
- * That is, does code point c have the Script property value sc,
- * or do code point c's Script_Extensions include script code sc?
+ * Do the Script_Extensions of code point c contain script sc?
+ * If c does not have explicit Script_Extensions, then this tests whether
+ * c has the Script property value sc.
*
* Some characters are commonly used in multiple scripts.
* For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
@@ -288,15 +475,22 @@ uscript_getScript(UChar32 codepoint, UErrorCode *err);
* in future versions of the Unicode Standard, and thus in ICU.
* @param c code point
* @param sc script code
- * @return TRUE if Script(c)==sc or sc is in Script_Extensions(c)
- * @draft ICU 4.6
+ * @return TRUE if sc is in Script_Extensions(c)
+ * @stable ICU 49
*/
-U_DRAFT UBool U_EXPORT2
+U_STABLE UBool U_EXPORT2
uscript_hasScript(UChar32 c, UScriptCode sc);
/**
* Writes code point c's Script_Extensions as a list of UScriptCode values
- * to the output scripts array.
+ * to the output scripts array and returns the number of script codes.
+ * - If c does have Script_Extensions, then the Script property value
+ * (normally Common or Inherited) is not included.
+ * - If c does not have Script_Extensions, then the one Script code is written to the output array.
+ * - If c is not a valid code point, then the one USCRIPT_UNKNOWN code is written.
+ * In other words, if the return value is 1,
+ * then the output array contains exactly c's single Script code.
+ * If the return value is n>=2, then the output array contains c's n Script_Extensions script codes.
*
* Some characters are commonly used in multiple scripts.
* For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
@@ -314,13 +508,120 @@ uscript_hasScript(UChar32 c, UScriptCode sc);
* pass the U_SUCCESS() test, or else the function returns
* immediately. Check for U_FAILURE() on output or use with
* function chaining. (See User Guide for details.)
- * @return number of script codes in c's Script_Extensions,
+ * @return number of script codes in c's Script_Extensions, or 1 for the single Script value,
* written to scripts unless U_BUFFER_OVERFLOW_ERROR indicates insufficient capacity
- * @draft ICU 4.6
+ * @stable ICU 49
*/
-U_DRAFT int32_t U_EXPORT2
+U_STABLE int32_t U_EXPORT2
uscript_getScriptExtensions(UChar32 c,
UScriptCode *scripts, int32_t capacity,
- UErrorCode *pErrorCode);
+ UErrorCode *errorCode);
+
+#ifndef U_HIDE_DRAFT_API
+
+/**
+ * Script usage constants.
+ * See UAX #31 Unicode Identifier and Pattern Syntax.
+ * http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Exclusion_from_Identifiers
+ *
+ * @draft ICU 51
+ */
+typedef enum UScriptUsage {
+ /** Not encoded in Unicode. @draft ICU 51 */
+ USCRIPT_USAGE_NOT_ENCODED,
+ /** Unknown script usage. @draft ICU 51 */
+ USCRIPT_USAGE_UNKNOWN,
+ /** Candidate for Exclusion from Identifiers. @draft ICU 51 */
+ USCRIPT_USAGE_EXCLUDED,
+ /** Limited Use script. @draft ICU 51 */
+ USCRIPT_USAGE_LIMITED_USE,
+ /** Aspirational Use script. @draft ICU 51 */
+ USCRIPT_USAGE_ASPIRATIONAL,
+ /** Recommended script. @draft ICU 51 */
+ USCRIPT_USAGE_RECOMMENDED
+} UScriptUsage;
+
+/**
+ * Writes the script sample character string.
+ * This string normally consists of one code point but might be longer.
+ * The string is empty if the script is not encoded.
+ *
+ * @param script script code
+ * @param dest output string array
+ * @param capacity number of UChars in the dest array
+ * @param pErrorCode standard ICU in/out error code, must pass U_SUCCESS() on input
+ * @return the string length, even if U_BUFFER_OVERFLOW_ERROR
+ * @draft ICU 51
+ */
+U_DRAFT int32_t U_EXPORT2
+uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+class UnicodeString;
+U_NAMESPACE_END
+
+/**
+ * Returns the script sample character string.
+ * This string normally consists of one code point but might be longer.
+ * The string is empty if the script is not encoded.
+ *
+ * @param script script code
+ * @return the sample character string
+ * @draft ICU 51
+ */
+U_COMMON_API icu::UnicodeString U_EXPORT2
+uscript_getSampleUnicodeString(UScriptCode script);
+
+#endif
+
+/**
+ * Returns the script usage according to UAX #31 Unicode Identifier and Pattern Syntax.
+ * Returns USCRIPT_USAGE_NOT_ENCODED if the script is not encoded in Unicode.
+ *
+ * @param script script code
+ * @return script usage
+ * @see UScriptUsage
+ * @draft ICU 51
+ */
+U_DRAFT UScriptUsage U_EXPORT2
+uscript_getUsage(UScriptCode script);
+
+/**
+ * Returns TRUE if the script is written right-to-left.
+ * For example, Arab and Hebr.
+ *
+ * @param script script code
+ * @return TRUE if the script is right-to-left
+ * @draft ICU 51
+ */
+U_DRAFT UBool U_EXPORT2
+uscript_isRightToLeft(UScriptCode script);
+
+/**
+ * Returns TRUE if the script allows line breaks between letters (excluding hyphenation).
+ * Such a script typically requires dictionary-based line breaking.
+ * For example, Hani and Thai.
+ *
+ * @param script script code
+ * @return TRUE if the script allows line breaks between letters
+ * @draft ICU 51
+ */
+U_DRAFT UBool U_EXPORT2
+uscript_breaksBetweenLetters(UScriptCode script);
+
+/**
+ * Returns TRUE if in modern (or most recent) usage of the script case distinctions are customary.
+ * For example, Latn and Cyrl.
+ *
+ * @param script script code
+ * @return TRUE if the script is cased
+ * @draft ICU 51
+ */
+U_DRAFT UBool U_EXPORT2
+uscript_isCased(UScriptCode script);
+
+#endif /* U_HIDE_DRAFT_API */
#endif
diff --git a/Source/WebCore/icu/unicode/usearch.h b/Source/WebCore/icu/unicode/usearch.h
new file mode 100644
index 000000000..9f305eb87
--- /dev/null
+++ b/Source/WebCore/icu/unicode/usearch.h
@@ -0,0 +1,836 @@
+/*
+**********************************************************************
+* Copyright (C) 2001-2011 IBM and others. All rights reserved.
+**********************************************************************
+* Date Name Description
+* 06/28/2001 synwee Creation.
+**********************************************************************
+*/
+#ifndef USEARCH_H
+#define USEARCH_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
+
+#include "unicode/localpointer.h"
+#include "unicode/ucol.h"
+#include "unicode/ucoleitr.h"
+#include "unicode/ubrk.h"
+
+/**
+ * \file
+ * \brief C API: StringSearch
+ *
+ * C Apis for an engine that provides language-sensitive text searching based
+ * on the comparison rules defined in a <tt>UCollator</tt> data struct,
+ * see <tt>ucol.h</tt>. This ensures that language eccentricity can be
+ * handled, e.g. for the German collator, characters &szlig; and SS will be matched
+ * if case is chosen to be ignored.
+ * See the <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm">
+ * "ICU Collation Design Document"</a> for more information.
+ * <p>
+ * The algorithm implemented is a modified form of the Boyer Moore's search.
+ * For more information see
+ * <a href="http://icu-project.org/docs/papers/efficient_text_searching_in_java.html">
+ * "Efficient Text Searching in Java"</a>, published in <i>Java Report</i>
+ * in February, 1999, for further information on the algorithm.
+ * <p>
+ * There are 2 match options for selection:<br>
+ * Let S' be the sub-string of a text string S between the offsets start and
+ * end <start, end>.
+ * <br>
+ * A pattern string P matches a text string S at the offsets <start, end>
+ * if
+ * <pre>
+ * option 1. Some canonical equivalent of P matches some canonical equivalent
+ * of S'
+ * option 2. P matches S' and if P starts or ends with a combining mark,
+ * there exists no non-ignorable combining mark before or after S'
+ * in S respectively.
+ * </pre>
+ * Option 2. will be the default.
+ * <p>
+ * This search has APIs similar to that of other text iteration mechanisms
+ * such as the break iterators in <tt>ubrk.h</tt>. Using these
+ * APIs, it is easy to scan through text looking for all occurances of
+ * a given pattern. This search iterator allows changing of direction by
+ * calling a <tt>reset</tt> followed by a <tt>next</tt> or <tt>previous</tt>.
+ * Though a direction change can occur without calling <tt>reset</tt> first,
+ * this operation comes with some speed penalty.
+ * Generally, match results in the forward direction will match the result
+ * matches in the backwards direction in the reverse order
+ * <p>
+ * <tt>usearch.h</tt> provides APIs to specify the starting position
+ * within the text string to be searched, e.g. <tt>usearch_setOffset</tt>,
+ * <tt>usearch_preceding</tt> and <tt>usearch_following</tt>. Since the
+ * starting position will be set as it is specified, please take note that
+ * there are some dangerous positions which the search may render incorrect
+ * results:
+ * <ul>
+ * <li> The midst of a substring that requires normalization.
+ * <li> If the following match is to be found, the position should not be the
+ * second character which requires to be swapped with the preceding
+ * character. Vice versa, if the preceding match is to be found,
+ * position to search from should not be the first character which
+ * requires to be swapped with the next character. E.g certain Thai and
+ * Lao characters require swapping.
+ * <li> If a following pattern match is to be found, any position within a
+ * contracting sequence except the first will fail. Vice versa if a
+ * preceding pattern match is to be found, a invalid starting point
+ * would be any character within a contracting sequence except the last.
+ * </ul>
+ * <p>
+ * A breakiterator can be used if only matches at logical breaks are desired.
+ * Using a breakiterator will only give you results that exactly matches the
+ * boundaries given by the breakiterator. For instance the pattern "e" will
+ * not be found in the string "\u00e9" if a character break iterator is used.
+ * <p>
+ * Options are provided to handle overlapping matches.
+ * E.g. In English, overlapping matches produces the result 0 and 2
+ * for the pattern "abab" in the text "ababab", where else mutually
+ * exclusive matches only produce the result of 0.
+ * <p>
+ * Though collator attributes will be taken into consideration while
+ * performing matches, there are no APIs here for setting and getting the
+ * attributes. These attributes can be set by getting the collator
+ * from <tt>usearch_getCollator</tt> and using the APIs in <tt>ucol.h</tt>.
+ * Lastly to update String Search to the new collator attributes,
+ * usearch_reset() has to be called.
+ * <p>
+ * Restriction: <br>
+ * Currently there are no composite characters that consists of a
+ * character with combining class > 0 before a character with combining
+ * class == 0. However, if such a character exists in the future, the
+ * search mechanism does not guarantee the results for option 1.
+ *
+ * <p>
+ * Example of use:<br>
+ * <pre><code>
+ * char *tgtstr = "The quick brown fox jumped over the lazy fox";
+ * char *patstr = "fox";
+ * UChar target[64];
+ * UChar pattern[16];
+ * UErrorCode status = U_ZERO_ERROR;
+ * u_uastrcpy(target, tgtstr);
+ * u_uastrcpy(pattern, patstr);
+ *
+ * UStringSearch *search = usearch_open(pattern, -1, target, -1, "en_US",
+ * NULL, &status);
+ * if (U_SUCCESS(status)) {
+ * for (int pos = usearch_first(search, &status);
+ * pos != USEARCH_DONE;
+ * pos = usearch_next(search, &status))
+ * {
+ * printf("Found match at %d pos, length is %d\n", pos,
+ * usearch_getMatchLength(search));
+ * }
+ * }
+ *
+ * usearch_close(search);
+ * </code></pre>
+ * @stable ICU 2.4
+ */
+
+/**
+* DONE is returned by previous() and next() after all valid matches have
+* been returned, and by first() and last() if there are no matches at all.
+* @stable ICU 2.4
+*/
+#define USEARCH_DONE -1
+
+/**
+* Data structure for searching
+* @stable ICU 2.4
+*/
+struct UStringSearch;
+/**
+* Data structure for searching
+* @stable ICU 2.4
+*/
+typedef struct UStringSearch UStringSearch;
+
+/**
+* @stable ICU 2.4
+*/
+typedef enum {
+ /** Option for overlapping matches */
+ USEARCH_OVERLAP,
+ /**
+ * Option for canonical matches. option 1 in header documentation.
+ * The default value will be USEARCH_OFF
+ */
+ USEARCH_CANONICAL_MATCH,
+ /**
+ * Option to control how collation elements are compared.
+ * The default value will be USEARCH_STANDARD_ELEMENT_COMPARISON.
+ * @stable ICU 4.4
+ */
+ USEARCH_ELEMENT_COMPARISON,
+
+ USEARCH_ATTRIBUTE_COUNT
+} USearchAttribute;
+
+/**
+* @stable ICU 2.4
+*/
+typedef enum {
+ /** Default value for any USearchAttribute */
+ USEARCH_DEFAULT = -1,
+ /** Value for USEARCH_OVERLAP and USEARCH_CANONICAL_MATCH */
+ USEARCH_OFF,
+ /** Value for USEARCH_OVERLAP and USEARCH_CANONICAL_MATCH */
+ USEARCH_ON,
+ /**
+ * Value (default) for USEARCH_ELEMENT_COMPARISON;
+ * standard collation element comparison at the specified collator
+ * strength.
+ * @stable ICU 4.4
+ */
+ USEARCH_STANDARD_ELEMENT_COMPARISON,
+ /**
+ * Value for USEARCH_ELEMENT_COMPARISON;
+ * collation element comparison is modified to effectively provide
+ * behavior between the specified strength and strength - 1. Collation
+ * elements in the pattern that have the base weight for the specified
+ * strength are treated as "wildcards" that match an element with any
+ * other weight at that collation level in the searched text. For
+ * example, with a secondary-strength English collator, a plain 'e' in
+ * the pattern will match a plain e or an e with any diacritic in the
+ * searched text, but an e with diacritic in the pattern will only
+ * match an e with the same diacritic in the searched text.
+ * @stable ICU 4.4
+ */
+ USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD,
+ /**
+ * Value for USEARCH_ELEMENT_COMPARISON.
+ * collation element comparison is modified to effectively provide
+ * behavior between the specified strength and strength - 1. Collation
+ * elements in either the pattern or the searched text that have the
+ * base weight for the specified strength are treated as "wildcards"
+ * that match an element with any other weight at that collation level.
+ * For example, with a secondary-strength English collator, a plain 'e'
+ * in the pattern will match a plain e or an e with any diacritic in the
+ * searched text, but an e with diacritic in the pattern will only
+ * match an e with the same diacritic or a plain e in the searched text.
+ * @stable ICU 4.4
+ */
+ USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD,
+
+ USEARCH_ATTRIBUTE_VALUE_COUNT
+} USearchAttributeValue;
+
+/* open and close ------------------------------------------------------ */
+
+/**
+* Creating a search iterator data struct using the argument locale language
+* rule set. A collator will be created in the process, which will be owned by
+* this search and will be deleted in <tt>usearch_close</tt>.
+* @param pattern for matching
+* @param patternlength length of the pattern, -1 for null-termination
+* @param text text string
+* @param textlength length of the text string, -1 for null-termination
+* @param locale name of locale for the rules to be used
+* @param breakiter A BreakIterator that will be used to restrict the points
+* at which matches are detected. If a match is found, but
+* the match's start or end index is not a boundary as
+* determined by the <tt>BreakIterator</tt>, the match will
+* be rejected and another will be searched for.
+* If this parameter is <tt>NULL</tt>, no break detection is
+* attempted.
+* @param status for errors if it occurs. If pattern or text is NULL, or if
+* patternlength or textlength is 0 then an
+* U_ILLEGAL_ARGUMENT_ERROR is returned.
+* @return search iterator data structure, or NULL if there is an error.
+* @stable ICU 2.4
+*/
+U_STABLE UStringSearch * U_EXPORT2 usearch_open(const UChar *pattern,
+ int32_t patternlength,
+ const UChar *text,
+ int32_t textlength,
+ const char *locale,
+ UBreakIterator *breakiter,
+ UErrorCode *status);
+
+/**
+* Creating a search iterator data struct using the argument collator language
+* rule set. Note, user retains the ownership of this collator, thus the
+* responsibility of deletion lies with the user.
+* NOTE: string search cannot be instantiated from a collator that has
+* collate digits as numbers (CODAN) turned on.
+* @param pattern for matching
+* @param patternlength length of the pattern, -1 for null-termination
+* @param text text string
+* @param textlength length of the text string, -1 for null-termination
+* @param collator used for the language rules
+* @param breakiter A BreakIterator that will be used to restrict the points
+* at which matches are detected. If a match is found, but
+* the match's start or end index is not a boundary as
+* determined by the <tt>BreakIterator</tt>, the match will
+* be rejected and another will be searched for.
+* If this parameter is <tt>NULL</tt>, no break detection is
+* attempted.
+* @param status for errors if it occurs. If collator, pattern or text is NULL,
+* or if patternlength or textlength is 0 then an
+* U_ILLEGAL_ARGUMENT_ERROR is returned.
+* @return search iterator data structure, or NULL if there is an error.
+* @stable ICU 2.4
+*/
+U_STABLE UStringSearch * U_EXPORT2 usearch_openFromCollator(
+ const UChar *pattern,
+ int32_t patternlength,
+ const UChar *text,
+ int32_t textlength,
+ const UCollator *collator,
+ UBreakIterator *breakiter,
+ UErrorCode *status);
+
+/**
+* Destroying and cleaning up the search iterator data struct.
+* If a collator is created in <tt>usearch_open</tt>, it will be destroyed here.
+* @param searchiter data struct to clean up
+* @stable ICU 2.4
+*/
+U_STABLE void U_EXPORT2 usearch_close(UStringSearch *searchiter);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUStringSearchPointer
+ * "Smart pointer" class, closes a UStringSearch via usearch_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUStringSearchPointer, UStringSearch, usearch_close);
+
+U_NAMESPACE_END
+
+#endif
+
+/* get and set methods -------------------------------------------------- */
+
+/**
+* Sets the current position in the text string which the next search will
+* start from. Clears previous states.
+* This method takes the argument index and sets the position in the text
+* string accordingly without checking if the index is pointing to a
+* valid starting point to begin searching.
+* Search positions that may render incorrect results are highlighted in the
+* header comments
+* @param strsrch search iterator data struct
+* @param position position to start next search from. If position is less
+* than or greater than the text range for searching,
+* an U_INDEX_OUTOFBOUNDS_ERROR will be returned
+* @param status error status if any.
+* @stable ICU 2.4
+*/
+U_STABLE void U_EXPORT2 usearch_setOffset(UStringSearch *strsrch,
+ int32_t position,
+ UErrorCode *status);
+
+/**
+* Return the current index in the string text being searched.
+* If the iteration has gone past the end of the text (or past the beginning
+* for a backwards search), <tt>USEARCH_DONE</tt> is returned.
+* @param strsrch search iterator data struct
+* @see #USEARCH_DONE
+* @stable ICU 2.4
+*/
+U_STABLE int32_t U_EXPORT2 usearch_getOffset(const UStringSearch *strsrch);
+
+/**
+* Sets the text searching attributes located in the enum USearchAttribute
+* with values from the enum USearchAttributeValue.
+* <tt>USEARCH_DEFAULT</tt> can be used for all attributes for resetting.
+* @param strsrch search iterator data struct
+* @param attribute text attribute to be set
+* @param value text attribute value
+* @param status for errors if it occurs
+* @see #usearch_getAttribute
+* @stable ICU 2.4
+*/
+U_STABLE void U_EXPORT2 usearch_setAttribute(UStringSearch *strsrch,
+ USearchAttribute attribute,
+ USearchAttributeValue value,
+ UErrorCode *status);
+
+/**
+* Gets the text searching attributes.
+* @param strsrch search iterator data struct
+* @param attribute text attribute to be retrieve
+* @return text attribute value
+* @see #usearch_setAttribute
+* @stable ICU 2.4
+*/
+U_STABLE USearchAttributeValue U_EXPORT2 usearch_getAttribute(
+ const UStringSearch *strsrch,
+ USearchAttribute attribute);
+
+/**
+* Returns the index to the match in the text string that was searched.
+* This call returns a valid result only after a successful call to
+* <tt>usearch_first</tt>, <tt>usearch_next</tt>, <tt>usearch_previous</tt>,
+* or <tt>usearch_last</tt>.
+* Just after construction, or after a searching method returns
+* <tt>USEARCH_DONE</tt>, this method will return <tt>USEARCH_DONE</tt>.
+* <p>
+* Use <tt>usearch_getMatchedLength</tt> to get the matched string length.
+* @param strsrch search iterator data struct
+* @return index to a substring within the text string that is being
+* searched.
+* @see #usearch_first
+* @see #usearch_next
+* @see #usearch_previous
+* @see #usearch_last
+* @see #USEARCH_DONE
+* @stable ICU 2.4
+*/
+U_STABLE int32_t U_EXPORT2 usearch_getMatchedStart(
+ const UStringSearch *strsrch);
+
+/**
+* Returns the length of text in the string which matches the search pattern.
+* This call returns a valid result only after a successful call to
+* <tt>usearch_first</tt>, <tt>usearch_next</tt>, <tt>usearch_previous</tt>,
+* or <tt>usearch_last</tt>.
+* Just after construction, or after a searching method returns
+* <tt>USEARCH_DONE</tt>, this method will return 0.
+* @param strsrch search iterator data struct
+* @return The length of the match in the string text, or 0 if there is no
+* match currently.
+* @see #usearch_first
+* @see #usearch_next
+* @see #usearch_previous
+* @see #usearch_last
+* @see #USEARCH_DONE
+* @stable ICU 2.4
+*/
+U_STABLE int32_t U_EXPORT2 usearch_getMatchedLength(
+ const UStringSearch *strsrch);
+
+/**
+* Returns the text that was matched by the most recent call to
+* <tt>usearch_first</tt>, <tt>usearch_next</tt>, <tt>usearch_previous</tt>,
+* or <tt>usearch_last</tt>.
+* If the iterator is not pointing at a valid match (e.g. just after
+* construction or after <tt>USEARCH_DONE</tt> has been returned, returns
+* an empty string. If result is not large enough to store the matched text,
+* result will be filled with the partial text and an U_BUFFER_OVERFLOW_ERROR
+* will be returned in status. result will be null-terminated whenever
+* possible. If the buffer fits the matched text exactly, a null-termination
+* is not possible, then a U_STRING_NOT_TERMINATED_ERROR set in status.
+* Pre-flighting can be either done with length = 0 or the API
+* <tt>usearch_getMatchLength</tt>.
+* @param strsrch search iterator data struct
+* @param result UChar buffer to store the matched string
+* @param resultCapacity length of the result buffer
+* @param status error returned if result is not large enough
+* @return exact length of the matched text, not counting the null-termination
+* @see #usearch_first
+* @see #usearch_next
+* @see #usearch_previous
+* @see #usearch_last
+* @see #USEARCH_DONE
+* @stable ICU 2.4
+*/
+U_STABLE int32_t U_EXPORT2 usearch_getMatchedText(const UStringSearch *strsrch,
+ UChar *result,
+ int32_t resultCapacity,
+ UErrorCode *status);
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+/**
+* Set the BreakIterator that will be used to restrict the points at which
+* matches are detected.
+* @param strsrch search iterator data struct
+* @param breakiter A BreakIterator that will be used to restrict the points
+* at which matches are detected. If a match is found, but
+* the match's start or end index is not a boundary as
+* determined by the <tt>BreakIterator</tt>, the match will
+* be rejected and another will be searched for.
+* If this parameter is <tt>NULL</tt>, no break detection is
+* attempted.
+* @param status for errors if it occurs
+* @see #usearch_getBreakIterator
+* @stable ICU 2.4
+*/
+U_STABLE void U_EXPORT2 usearch_setBreakIterator(UStringSearch *strsrch,
+ UBreakIterator *breakiter,
+ UErrorCode *status);
+
+/**
+* Returns the BreakIterator that is used to restrict the points at which
+* matches are detected. This will be the same object that was passed to the
+* constructor or to <tt>usearch_setBreakIterator</tt>. Note that
+* <tt>NULL</tt>
+* is a legal value; it means that break detection should not be attempted.
+* @param strsrch search iterator data struct
+* @return break iterator used
+* @see #usearch_setBreakIterator
+* @stable ICU 2.4
+*/
+U_STABLE const UBreakIterator * U_EXPORT2 usearch_getBreakIterator(
+ const UStringSearch *strsrch);
+
+#endif
+
+/**
+* Set the string text to be searched. Text iteration will hence begin at the
+* start of the text string. This method is useful if you want to re-use an
+* iterator to search for the same pattern within a different body of text.
+* @param strsrch search iterator data struct
+* @param text new string to look for match
+* @param textlength length of the new string, -1 for null-termination
+* @param status for errors if it occurs. If text is NULL, or textlength is 0
+* then an U_ILLEGAL_ARGUMENT_ERROR is returned with no change
+* done to strsrch.
+* @see #usearch_getText
+* @stable ICU 2.4
+*/
+U_STABLE void U_EXPORT2 usearch_setText( UStringSearch *strsrch,
+ const UChar *text,
+ int32_t textlength,
+ UErrorCode *status);
+
+/**
+* Return the string text to be searched.
+* @param strsrch search iterator data struct
+* @param length returned string text length
+* @return string text
+* @see #usearch_setText
+* @stable ICU 2.4
+*/
+U_STABLE const UChar * U_EXPORT2 usearch_getText(const UStringSearch *strsrch,
+ int32_t *length);
+
+/**
+* Gets the collator used for the language rules.
+* <p>
+* Deleting the returned <tt>UCollator</tt> before calling
+* <tt>usearch_close</tt> would cause the string search to fail.
+* <tt>usearch_close</tt> will delete the collator if this search owns it.
+* @param strsrch search iterator data struct
+* @return collator
+* @stable ICU 2.4
+*/
+U_STABLE UCollator * U_EXPORT2 usearch_getCollator(
+ const UStringSearch *strsrch);
+
+/**
+* Sets the collator used for the language rules. User retains the ownership
+* of this collator, thus the responsibility of deletion lies with the user.
+* This method causes internal data such as Boyer-Moore shift tables to
+* be recalculated, but the iterator's position is unchanged.
+* @param strsrch search iterator data struct
+* @param collator to be used
+* @param status for errors if it occurs
+* @stable ICU 2.4
+*/
+U_STABLE void U_EXPORT2 usearch_setCollator( UStringSearch *strsrch,
+ const UCollator *collator,
+ UErrorCode *status);
+
+/**
+* Sets the pattern used for matching.
+* Internal data like the Boyer Moore table will be recalculated, but the
+* iterator's position is unchanged.
+* @param strsrch search iterator data struct
+* @param pattern string
+* @param patternlength pattern length, -1 for null-terminated string
+* @param status for errors if it occurs. If text is NULL, or textlength is 0
+* then an U_ILLEGAL_ARGUMENT_ERROR is returned with no change
+* done to strsrch.
+* @stable ICU 2.4
+*/
+U_STABLE void U_EXPORT2 usearch_setPattern( UStringSearch *strsrch,
+ const UChar *pattern,
+ int32_t patternlength,
+ UErrorCode *status);
+
+/**
+* Gets the search pattern
+* @param strsrch search iterator data struct
+* @param length return length of the pattern, -1 indicates that the pattern
+* is null-terminated
+* @return pattern string
+* @stable ICU 2.4
+*/
+U_STABLE const UChar * U_EXPORT2 usearch_getPattern(
+ const UStringSearch *strsrch,
+ int32_t *length);
+
+/* methods ------------------------------------------------------------- */
+
+/**
+* Returns the first index at which the string text matches the search
+* pattern.
+* The iterator is adjusted so that its current index (as returned by
+* <tt>usearch_getOffset</tt>) is the match position if one was found.
+* If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
+* the iterator will be adjusted to the index <tt>USEARCH_DONE</tt>.
+* @param strsrch search iterator data struct
+* @param status for errors if it occurs
+* @return The character index of the first match, or
+* <tt>USEARCH_DONE</tt> if there are no matches.
+* @see #usearch_getOffset
+* @see #USEARCH_DONE
+* @stable ICU 2.4
+*/
+U_STABLE int32_t U_EXPORT2 usearch_first(UStringSearch *strsrch,
+ UErrorCode *status);
+
+/**
+* Returns the first index equal or greater than <tt>position</tt> at which
+* the string text
+* matches the search pattern. The iterator is adjusted so that its current
+* index (as returned by <tt>usearch_getOffset</tt>) is the match position if
+* one was found.
+* If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
+* the iterator will be adjusted to the index <tt>USEARCH_DONE</tt>
+* <p>
+* Search positions that may render incorrect results are highlighted in the
+* header comments. If position is less than or greater than the text range
+* for searching, an U_INDEX_OUTOFBOUNDS_ERROR will be returned
+* @param strsrch search iterator data struct
+* @param position to start the search at
+* @param status for errors if it occurs
+* @return The character index of the first match following <tt>pos</tt>,
+* or <tt>USEARCH_DONE</tt> if there are no matches.
+* @see #usearch_getOffset
+* @see #USEARCH_DONE
+* @stable ICU 2.4
+*/
+U_STABLE int32_t U_EXPORT2 usearch_following(UStringSearch *strsrch,
+ int32_t position,
+ UErrorCode *status);
+
+/**
+* Returns the last index in the target text at which it matches the search
+* pattern. The iterator is adjusted so that its current
+* index (as returned by <tt>usearch_getOffset</tt>) is the match position if
+* one was found.
+* If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
+* the iterator will be adjusted to the index <tt>USEARCH_DONE</tt>.
+* @param strsrch search iterator data struct
+* @param status for errors if it occurs
+* @return The index of the first match, or <tt>USEARCH_DONE</tt> if there
+* are no matches.
+* @see #usearch_getOffset
+* @see #USEARCH_DONE
+* @stable ICU 2.4
+*/
+U_STABLE int32_t U_EXPORT2 usearch_last(UStringSearch *strsrch,
+ UErrorCode *status);
+
+/**
+* Returns the first index less than <tt>position</tt> at which the string text
+* matches the search pattern. The iterator is adjusted so that its current
+* index (as returned by <tt>usearch_getOffset</tt>) is the match position if
+* one was found.
+* If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
+* the iterator will be adjusted to the index <tt>USEARCH_DONE</tt>
+* <p>
+* Search positions that may render incorrect results are highlighted in the
+* header comments. If position is less than or greater than the text range
+* for searching, an U_INDEX_OUTOFBOUNDS_ERROR will be returned.
+* <p>
+* When <tt>USEARCH_OVERLAP</tt> option is off, the last index of the
+* result match is always less than <tt>position</tt>.
+* When <tt>USERARCH_OVERLAP</tt> is on, the result match may span across
+* <tt>position</tt>.
+* @param strsrch search iterator data struct
+* @param position index position the search is to begin at
+* @param status for errors if it occurs
+* @return The character index of the first match preceding <tt>pos</tt>,
+* or <tt>USEARCH_DONE</tt> if there are no matches.
+* @see #usearch_getOffset
+* @see #USEARCH_DONE
+* @stable ICU 2.4
+*/
+U_STABLE int32_t U_EXPORT2 usearch_preceding(UStringSearch *strsrch,
+ int32_t position,
+ UErrorCode *status);
+
+/**
+* Returns the index of the next point at which the string text matches the
+* search pattern, starting from the current position.
+* The iterator is adjusted so that its current
+* index (as returned by <tt>usearch_getOffset</tt>) is the match position if
+* one was found.
+* If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
+* the iterator will be adjusted to the index <tt>USEARCH_DONE</tt>
+* @param strsrch search iterator data struct
+* @param status for errors if it occurs
+* @return The index of the next match after the current position, or
+* <tt>USEARCH_DONE</tt> if there are no more matches.
+* @see #usearch_first
+* @see #usearch_getOffset
+* @see #USEARCH_DONE
+* @stable ICU 2.4
+*/
+U_STABLE int32_t U_EXPORT2 usearch_next(UStringSearch *strsrch,
+ UErrorCode *status);
+
+/**
+* Returns the index of the previous point at which the string text matches
+* the search pattern, starting at the current position.
+* The iterator is adjusted so that its current
+* index (as returned by <tt>usearch_getOffset</tt>) is the match position if
+* one was found.
+* If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
+* the iterator will be adjusted to the index <tt>USEARCH_DONE</tt>
+* @param strsrch search iterator data struct
+* @param status for errors if it occurs
+* @return The index of the previous match before the current position,
+* or <tt>USEARCH_DONE</tt> if there are no more matches.
+* @see #usearch_last
+* @see #usearch_getOffset
+* @see #USEARCH_DONE
+* @stable ICU 2.4
+*/
+U_STABLE int32_t U_EXPORT2 usearch_previous(UStringSearch *strsrch,
+ UErrorCode *status);
+
+/**
+* Reset the iteration.
+* Search will begin at the start of the text string if a forward iteration
+* is initiated before a backwards iteration. Otherwise if a backwards
+* iteration is initiated before a forwards iteration, the search will begin
+* at the end of the text string.
+* @param strsrch search iterator data struct
+* @see #usearch_first
+* @stable ICU 2.4
+*/
+U_STABLE void U_EXPORT2 usearch_reset(UStringSearch *strsrch);
+
+#ifndef U_HIDE_INTERNAL_API
+/**
+ * Simple forward search for the pattern, starting at a specified index,
+ * and using using a default set search options.
+ *
+ * This is an experimental function, and is not an official part of the
+ * ICU API.
+ *
+ * The collator options, such as UCOL_STRENGTH and UCOL_NORMALIZTION, are honored.
+ *
+ * The UStringSearch options USEARCH_CANONICAL_MATCH, USEARCH_OVERLAP and
+ * any Break Iterator are ignored.
+ *
+ * Matches obey the following constraints:
+ *
+ * Characters at the start or end positions of a match that are ignorable
+ * for collation are not included as part of the match, unless they
+ * are part of a combining sequence, as described below.
+ *
+ * A match will not include a partial combining sequence. Combining
+ * character sequences are considered to be inseperable units,
+ * and either match the pattern completely, or are considered to not match
+ * at all. Thus, for example, an A followed a combining accent mark will
+ * not be found when searching for a plain (unaccented) A. (unless
+ * the collation strength has been set to ignore all accents).
+ *
+ * When beginning a search, the initial starting position, startIdx,
+ * is assumed to be an acceptable match boundary with respect to
+ * combining characters. A combining sequence that spans across the
+ * starting point will not supress a match beginning at startIdx.
+ *
+ * Characters that expand to multiple collation elements
+ * (German sharp-S becoming 'ss', or the composed forms of accented
+ * characters, for example) also must match completely.
+ * Searching for a single 's' in a string containing only a sharp-s will
+ * find no match.
+ *
+ *
+ * @param strsrch the UStringSearch struct, which references both
+ * the text to be searched and the pattern being sought.
+ * @param startIdx The index into the text to begin the search.
+ * @param matchStart An out parameter, the starting index of the matched text.
+ * This parameter may be NULL.
+ * A value of -1 will be returned if no match was found.
+ * @param matchLimit Out parameter, the index of the first position following the matched text.
+ * The matchLimit will be at a suitable position for beginning a subsequent search
+ * in the input text.
+ * This parameter may be NULL.
+ * A value of -1 will be returned if no match was found.
+ *
+ * @param status Report any errors. Note that no match found is not an error.
+ * @return TRUE if a match was found, FALSE otherwise.
+ *
+ * @internal
+ */
+U_INTERNAL UBool U_EXPORT2 usearch_search(UStringSearch *strsrch,
+ int32_t startIdx,
+ int32_t *matchStart,
+ int32_t *matchLimit,
+ UErrorCode *status);
+
+/**
+ * Simple backwards search for the pattern, starting at a specified index,
+ * and using using a default set search options.
+ *
+ * This is an experimental function, and is not an official part of the
+ * ICU API.
+ *
+ * The collator options, such as UCOL_STRENGTH and UCOL_NORMALIZTION, are honored.
+ *
+ * The UStringSearch options USEARCH_CANONICAL_MATCH, USEARCH_OVERLAP and
+ * any Break Iterator are ignored.
+ *
+ * Matches obey the following constraints:
+ *
+ * Characters at the start or end positions of a match that are ignorable
+ * for collation are not included as part of the match, unless they
+ * are part of a combining sequence, as described below.
+ *
+ * A match will not include a partial combining sequence. Combining
+ * character sequences are considered to be inseperable units,
+ * and either match the pattern completely, or are considered to not match
+ * at all. Thus, for example, an A followed a combining accent mark will
+ * not be found when searching for a plain (unaccented) A. (unless
+ * the collation strength has been set to ignore all accents).
+ *
+ * When beginning a search, the initial starting position, startIdx,
+ * is assumed to be an acceptable match boundary with respect to
+ * combining characters. A combining sequence that spans across the
+ * starting point will not supress a match beginning at startIdx.
+ *
+ * Characters that expand to multiple collation elements
+ * (German sharp-S becoming 'ss', or the composed forms of accented
+ * characters, for example) also must match completely.
+ * Searching for a single 's' in a string containing only a sharp-s will
+ * find no match.
+ *
+ *
+ * @param strsrch the UStringSearch struct, which references both
+ * the text to be searched and the pattern being sought.
+ * @param startIdx The index into the text to begin the search.
+ * @param matchStart An out parameter, the starting index of the matched text.
+ * This parameter may be NULL.
+ * A value of -1 will be returned if no match was found.
+ * @param matchLimit Out parameter, the index of the first position following the matched text.
+ * The matchLimit will be at a suitable position for beginning a subsequent search
+ * in the input text.
+ * This parameter may be NULL.
+ * A value of -1 will be returned if no match was found.
+ *
+ * @param status Report any errors. Note that no match found is not an error.
+ * @return TRUE if a match was found, FALSE otherwise.
+ *
+ * @internal
+ */
+U_INTERNAL UBool U_EXPORT2 usearch_searchBackwards(UStringSearch *strsrch,
+ int32_t startIdx,
+ int32_t *matchStart,
+ int32_t *matchLimit,
+ UErrorCode *status);
+#endif /* U_HIDE_INTERNAL_API */
+
+#endif /* #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION */
+
+#endif
diff --git a/Source/WebCore/icu/unicode/uset.h b/Source/WebCore/icu/unicode/uset.h
index 77ab06342..40510cd41 100644
--- a/Source/WebCore/icu/unicode/uset.h
+++ b/Source/WebCore/icu/unicode/uset.h
@@ -1,7 +1,7 @@
/*
*******************************************************************************
*
-* Copyright (C) 2002-2010, International Business Machines
+* Copyright (C) 2002-2012, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@@ -89,13 +89,7 @@ enum {
* of each existing element in the set.
* @stable ICU 3.2
*/
- USET_ADD_CASE_MAPPINGS = 4,
-
- /**
- * Enough for any single-code point set
- * @internal
- */
- USET_SERIALIZED_STATIC_ARRAY_CAPACITY=8
+ USET_ADD_CASE_MAPPINGS = 4
};
/**
@@ -207,6 +201,16 @@ typedef enum USetSpanCondition {
USET_SPAN_CONDITION_COUNT
} USetSpanCondition;
+enum {
+ /**
+ * Capacity of USerializedSet::staticArray.
+ * Enough for any single-code point set.
+ * Also provides padding for nice sizeof(USerializedSet).
+ * @stable ICU 2.4
+ */
+ USET_SERIALIZED_STATIC_ARRAY_CAPACITY=8
+};
+
/**
* A serialized form of a Unicode set. Limited manipulations are
* possible directly on a serialized set. See below.
@@ -247,7 +251,7 @@ typedef struct USerializedSet {
* @stable ICU 4.2
*/
U_STABLE USet* U_EXPORT2
-uset_openEmpty();
+uset_openEmpty(void);
/**
* Creates a USet object that contains the range of characters
diff --git a/Source/WebCore/icu/unicode/ushape.h b/Source/WebCore/icu/unicode/ushape.h
index 6c3655f7a..9fb5ab4e5 100644
--- a/Source/WebCore/icu/unicode/ushape.h
+++ b/Source/WebCore/icu/unicode/ushape.h
@@ -1,7 +1,7 @@
/*
******************************************************************************
*
-* Copyright (C) 2000-2010, International Business Machines
+* Copyright (C) 2000-2012, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
@@ -461,14 +461,14 @@ u_shapeArabic(const UChar *source, int32_t sourceLength,
* Shaping Mode: Only shaping.
* De-shaping Mode: N/A.
* Affects: All Seen options
- * @draft ICU 4.2
+ * @stable ICU 4.8
*/
-#define SHAPE_TAIL_NEW_UNICODE 0x8000000
+#define U_SHAPE_TAIL_NEW_UNICODE 0x8000000
/**
* Bit mask for new Unicode Tail option
- * @draft ICU 4.2
+ * @stable ICU 4.8
*/
-#define SHAPE_TAIL_TYPE_MASK 0x8000000
+#define U_SHAPE_TAIL_TYPE_MASK 0x8000000
#endif
diff --git a/Source/WebCore/icu/unicode/ustring.h b/Source/WebCore/icu/unicode/ustring.h
index 2ee16e99c..d2ea31c67 100644
--- a/Source/WebCore/icu/unicode/ustring.h
+++ b/Source/WebCore/icu/unicode/ustring.h
@@ -1,6 +1,6 @@
/*
**********************************************************************
-* Copyright (C) 1998-2010, International Business Machines
+* Copyright (C) 1998-2012, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*
@@ -20,9 +20,14 @@
#include "unicode/putil.h"
#include "unicode/uiter.h"
-/** Simple declaration for u_strToTitle() to avoid including unicode/ubrk.h. @stable ICU 2.1*/
+/**
+ * \def UBRK_TYPEDEF_UBREAK_ITERATOR
+ * @internal
+ */
+
#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
# define UBRK_TYPEDEF_UBREAK_ITERATOR
+/** Simple declaration for u_strToTitle() to avoid including unicode/ubrk.h. @stable ICU 2.1*/
typedef struct UBreakIterator UBreakIterator;
#endif
@@ -146,8 +151,8 @@ u_strcat(UChar *dst,
* If <code>n&lt;=0</code> then dst is not modified.
*
* @param dst The destination string.
- * @param src The source string.
- * @param n The maximum number of characters to append.
+ * @param src The source string (can be NULL/invalid if n<=0).
+ * @param n The maximum number of characters to append; no-op if <=0.
* @return A pointer to <code>dst</code>.
* @stable ICU 2.0
*/
@@ -550,9 +555,9 @@ u_strCaseCompare(const UChar *s1, int32_t length1,
* Compare two ustrings for bitwise equality.
* Compares at most <code>n</code> characters.
*
- * @param ucs1 A string to compare.
- * @param ucs2 A string to compare.
- * @param n The maximum number of characters to compare.
+ * @param ucs1 A string to compare (can be NULL/invalid if n<=0).
+ * @param ucs2 A string to compare (can be NULL/invalid if n<=0).
+ * @param n The maximum number of characters to compare; always returns 0 if n<=0.
* @return 0 if <code>s1</code> and <code>s2</code> are bitwise equal; a negative
* value if <code>s1</code> is bitwise less than <code>s2</code>; a positive
* value if <code>s1</code> is bitwise greater than <code>s2</code>.
@@ -667,8 +672,8 @@ u_strcpy(UChar *dst,
* if the length of <code>src</code> is less than <code>n</code>.
*
* @param dst The destination string.
- * @param src The source string.
- * @param n The maximum number of characters to copy.
+ * @param src The source string (can be NULL/invalid if n<=0).
+ * @param n The maximum number of characters to copy; no-op if <=0.
* @return A pointer to <code>dst</code>.
* @stable ICU 2.0
*/
@@ -742,8 +747,8 @@ U_STABLE char* U_EXPORT2 u_austrncpy(char *dst,
/**
* Synonym for memcpy(), but with UChars only.
* @param dest The destination string
- * @param src The source string
- * @param count The number of characters to copy
+ * @param src The source string (can be NULL/invalid if count<=0)
+ * @param count The number of characters to copy; no-op if <=0
* @return A pointer to <code>dest</code>
* @stable ICU 2.0
*/
@@ -753,8 +758,8 @@ u_memcpy(UChar *dest, const UChar *src, int32_t count);
/**
* Synonym for memmove(), but with UChars only.
* @param dest The destination string
- * @param src The source string
- * @param count The number of characters to move
+ * @param src The source string (can be NULL/invalid if count<=0)
+ * @param count The number of characters to move; no-op if <=0
* @return A pointer to <code>dest</code>
* @stable ICU 2.0
*/
@@ -918,7 +923,7 @@ u_memrchr32(const UChar *s, UChar32 c, int32_t count);
* }
* </pre>
*
- * Note that the macros will NOT consistently work if their argument is another #define.
+ * Note that the macros will NOT consistently work if their argument is another <code>#define</code>.
* The following will not work on all platforms, don't use it.
*
* <pre>
@@ -934,7 +939,7 @@ u_memrchr32(const UChar *s, UChar32 c, int32_t count);
* @stable ICU 2.0
*/
#if defined(U_DECLARE_UTF16)
-# define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]=U_DECLARE_UTF16(cs)
+# define U_STRING_DECL(var, cs, length) static const UChar *var=(const UChar *)U_DECLARE_UTF16(cs)
/**@stable ICU 2.0 */
# define U_STRING_INIT(var, cs, length)
#elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
@@ -942,7 +947,7 @@ u_memrchr32(const UChar *s, UChar32 c, int32_t count);
/**@stable ICU 2.0 */
# define U_STRING_INIT(var, cs, length)
#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
-# define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]={ (const UChar *)cs }
+# define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]=cs
/**@stable ICU 2.0 */
# define U_STRING_INIT(var, cs, length)
#else
@@ -1154,10 +1159,12 @@ u_strToTitle(UChar *dest, int32_t destCapacity,
#endif
/**
- * Case-fold the characters in a string.
+ * Case-folds the characters in a string.
+ *
* Case-folding is locale-independent and not context-sensitive,
* but there is an option for whether to include or exclude mappings for dotted I
- * and dotless i that are marked with 'I' in CaseFolding.txt.
+ * and dotless i that are marked with 'T' in CaseFolding.txt.
+ *
* The result may be longer or shorter than the original.
* The source string and the destination buffer are allowed to overlap.
*
diff --git a/Source/WebCore/icu/unicode/utext.h b/Source/WebCore/icu/unicode/utext.h
new file mode 100644
index 000000000..d431913d3
--- /dev/null
+++ b/Source/WebCore/icu/unicode/utext.h
@@ -0,0 +1,1600 @@
+/*
+*******************************************************************************
+*
+* Copyright (C) 2004-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: utext.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2004oct06
+* created by: Markus W. Scherer
+*/
+
+#ifndef __UTEXT_H__
+#define __UTEXT_H__
+
+/**
+ * \file
+ * \brief C API: Abstract Unicode Text API
+ *
+ * The Text Access API provides a means to allow text that is stored in alternative
+ * formats to work with ICU services. ICU normally operates on text that is
+ * stored in UTF-16 format, in (UChar *) arrays for the C APIs or as type
+ * UnicodeString for C++ APIs.
+ *
+ * ICU Text Access allows other formats, such as UTF-8 or non-contiguous
+ * UTF-16 strings, to be placed in a UText wrapper and then passed to ICU services.
+ *
+ * There are three general classes of usage for UText:
+ *
+ * Application Level Use. This is the simplest usage - applications would
+ * use one of the utext_open() functions on their input text, and pass
+ * the resulting UText to the desired ICU service.
+ *
+ * Second is usage in ICU Services, such as break iteration, that will need to
+ * operate on input presented to them as a UText. These implementations
+ * will need to use the iteration and related UText functions to gain
+ * access to the actual text.
+ *
+ * The third class of UText users are "text providers." These are the
+ * UText implementations for the various text storage formats. An application
+ * or system with a unique text storage format can implement a set of
+ * UText provider functions for that format, which will then allow
+ * ICU services to operate on that format.
+ *
+ *
+ * <em>Iterating over text</em>
+ *
+ * Here is sample code for a forward iteration over the contents of a UText
+ *
+ * \code
+ * UChar32 c;
+ * UText *ut = whatever();
+ *
+ * for (c=utext_next32From(ut, 0); c>=0; c=utext_next32(ut)) {
+ * // do whatever with the codepoint c here.
+ * }
+ * \endcode
+ *
+ * And here is similar code to iterate in the reverse direction, from the end
+ * of the text towards the beginning.
+ *
+ * \code
+ * UChar32 c;
+ * UText *ut = whatever();
+ * int textLength = utext_nativeLength(ut);
+ * for (c=utext_previous32From(ut, textLength); c>=0; c=utext_previous32(ut)) {
+ * // do whatever with the codepoint c here.
+ * }
+ * \endcode
+ *
+ * <em>Characters and Indexing</em>
+ *
+ * Indexing into text by UText functions is nearly always in terms of the native
+ * indexing of the underlying text storage. The storage format could be UTF-8
+ * or UTF-32, for example. When coding to the UText access API, no assumptions
+ * can be made regarding the size of characters, or how far an index
+ * may move when iterating between characters.
+ *
+ * All indices supplied to UText functions are pinned to the length of the
+ * text. An out-of-bounds index is not considered to be an error, but is
+ * adjusted to be in the range 0 <= index <= length of input text.
+ *
+ *
+ * When an index position is returned from a UText function, it will be
+ * a native index to the underlying text. In the case of multi-unit characters,
+ * it will always refer to the first position of the character,
+ * never to the interior. This is essentially the same thing as saying that
+ * a returned index will always point to a boundary between characters.
+ *
+ * When a native index is supplied to a UText function, all indices that
+ * refer to any part of a multi-unit character representation are considered
+ * to be equivalent. In the case of multi-unit characters, an incoming index
+ * will be logically normalized to refer to the start of the character.
+ *
+ * It is possible to test whether a native index is on a code point boundary
+ * by doing a utext_setNativeIndex() followed by a utext_getNativeIndex().
+ * If the index is returned unchanged, it was on a code point boundary. If
+ * an adjusted index is returned, the original index referred to the
+ * interior of a character.
+ *
+ * <em>Conventions for calling UText functions</em>
+ *
+ * Most UText access functions have as their first parameter a (UText *) pointer,
+ * which specifies the UText to be used. Unless otherwise noted, the
+ * pointer must refer to a valid, open UText. Attempting to
+ * use a closed UText or passing a NULL pointer is a programming error and
+ * will produce undefined results or NULL pointer exceptions.
+ *
+ * The UText_Open family of functions can either open an existing (closed)
+ * UText, or heap allocate a new UText. Here is sample code for creating
+ * a stack-allocated UText.
+ *
+ * \code
+ * char *s = whatever(); // A utf-8 string
+ * U_ErrorCode status = U_ZERO_ERROR;
+ * UText ut = UTEXT_INITIALIZER;
+ * utext_openUTF8(ut, s, -1, &status);
+ * if (U_FAILURE(status)) {
+ * // error handling
+ * } else {
+ * // work with the UText
+ * }
+ * \endcode
+ *
+ * Any existing UText passed to an open function _must_ have been initialized,
+ * either by the UTEXT_INITIALIZER, or by having been originally heap-allocated
+ * by an open function. Passing NULL will cause the open function to
+ * heap-allocate and fully initialize a new UText.
+ *
+ */
+
+
+
+#include "unicode/utypes.h"
+#include "unicode/uchar.h"
+#if U_SHOW_CPLUSPLUS_API
+#include "unicode/localpointer.h"
+#include "unicode/rep.h"
+#include "unicode/unistr.h"
+#include "unicode/chariter.h"
+#endif
+
+
+U_CDECL_BEGIN
+
+struct UText;
+typedef struct UText UText; /**< C typedef for struct UText. @stable ICU 3.6 */
+
+
+/***************************************************************************************
+ *
+ * C Functions for creating UText wrappers around various kinds of text strings.
+ *
+ ****************************************************************************************/
+
+
+/**
+ * Close function for UText instances.
+ * Cleans up, releases any resources being held by an open UText.
+ * <p>
+ * If the UText was originally allocated by one of the utext_open functions,
+ * the storage associated with the utext will also be freed.
+ * If the UText storage originated with the application, as it would with
+ * a local or static instance, the storage will not be deleted.
+ *
+ * An open UText can be reset to refer to new string by using one of the utext_open()
+ * functions without first closing the UText.
+ *
+ * @param ut The UText to be closed.
+ * @return NULL if the UText struct was deleted by the close. If the UText struct
+ * was originally provided by the caller to the open function, it is
+ * returned by this function, and may be safely used again in
+ * a subsequent utext_open.
+ *
+ * @stable ICU 3.4
+ */
+U_STABLE UText * U_EXPORT2
+utext_close(UText *ut);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUTextPointer
+ * "Smart pointer" class, closes a UText via utext_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUTextPointer, UText, utext_close);
+
+U_NAMESPACE_END
+
+#endif
+
+/**
+ * Open a read-only UText implementation for UTF-8 strings.
+ *
+ * \htmlonly
+ * Any invalid UTF-8 in the input will be handled in this way:
+ * a sequence of bytes that has the form of a truncated, but otherwise valid,
+ * UTF-8 sequence will be replaced by a single unicode replacement character, \uFFFD.
+ * Any other illegal bytes will each be replaced by a \uFFFD.
+ * \endhtmlonly
+ *
+ * @param ut Pointer to a UText struct. If NULL, a new UText will be created.
+ * If non-NULL, must refer to an initialized UText struct, which will then
+ * be reset to reference the specified UTF-8 string.
+ * @param s A UTF-8 string. Must not be NULL.
+ * @param length The length of the UTF-8 string in bytes, or -1 if the string is
+ * zero terminated.
+ * @param status Errors are returned here.
+ * @return A pointer to the UText. If a pre-allocated UText was provided, it
+ * will always be used and returned.
+ * @stable ICU 3.4
+ */
+U_STABLE UText * U_EXPORT2
+utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status);
+
+
+/**
+ * Open a read-only UText for UChar * string.
+ *
+ * @param ut Pointer to a UText struct. If NULL, a new UText will be created.
+ * If non-NULL, must refer to an initialized UText struct, which will then
+ * be reset to reference the specified UChar string.
+ * @param s A UChar (UTF-16) string
+ * @param length The number of UChars in the input string, or -1 if the string is
+ * zero terminated.
+ * @param status Errors are returned here.
+ * @return A pointer to the UText. If a pre-allocated UText was provided, it
+ * will always be used and returned.
+ * @stable ICU 3.4
+ */
+U_STABLE UText * U_EXPORT2
+utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status);
+
+
+#if U_SHOW_CPLUSPLUS_API
+/**
+ * Open a writable UText for a non-const UnicodeString.
+ *
+ * @param ut Pointer to a UText struct. If NULL, a new UText will be created.
+ * If non-NULL, must refer to an initialized UText struct, which will then
+ * be reset to reference the specified input string.
+ * @param s A UnicodeString.
+ * @param status Errors are returned here.
+ * @return Pointer to the UText. If a UText was supplied as input, this
+ * will always be used and returned.
+ * @stable ICU 3.4
+ */
+U_STABLE UText * U_EXPORT2
+utext_openUnicodeString(UText *ut, icu::UnicodeString *s, UErrorCode *status);
+
+
+/**
+ * Open a UText for a const UnicodeString. The resulting UText will not be writable.
+ *
+ * @param ut Pointer to a UText struct. If NULL, a new UText will be created.
+ * If non-NULL, must refer to an initialized UText struct, which will then
+ * be reset to reference the specified input string.
+ * @param s A const UnicodeString to be wrapped.
+ * @param status Errors are returned here.
+ * @return Pointer to the UText. If a UText was supplied as input, this
+ * will always be used and returned.
+ * @stable ICU 3.4
+ */
+U_STABLE UText * U_EXPORT2
+utext_openConstUnicodeString(UText *ut, const icu::UnicodeString *s, UErrorCode *status);
+
+
+/**
+ * Open a writable UText implementation for an ICU Replaceable object.
+ * @param ut Pointer to a UText struct. If NULL, a new UText will be created.
+ * If non-NULL, must refer to an already existing UText, which will then
+ * be reset to reference the specified replaceable text.
+ * @param rep A Replaceable text object.
+ * @param status Errors are returned here.
+ * @return Pointer to the UText. If a UText was supplied as input, this
+ * will always be used and returned.
+ * @see Replaceable
+ * @stable ICU 3.4
+ */
+U_STABLE UText * U_EXPORT2
+utext_openReplaceable(UText *ut, icu::Replaceable *rep, UErrorCode *status);
+
+/**
+ * Open a UText implementation over an ICU CharacterIterator.
+ * @param ut Pointer to a UText struct. If NULL, a new UText will be created.
+ * If non-NULL, must refer to an already existing UText, which will then
+ * be reset to reference the specified replaceable text.
+ * @param ci A Character Iterator.
+ * @param status Errors are returned here.
+ * @return Pointer to the UText. If a UText was supplied as input, this
+ * will always be used and returned.
+ * @see Replaceable
+ * @stable ICU 3.4
+ */
+U_STABLE UText * U_EXPORT2
+utext_openCharacterIterator(UText *ut, icu::CharacterIterator *ci, UErrorCode *status);
+
+#endif
+
+
+/**
+ * Clone a UText. This is much like opening a UText where the source text is itself
+ * another UText.
+ *
+ * A deep clone will copy both the UText data structures and the underlying text.
+ * The original and cloned UText will operate completely independently; modifications
+ * made to the text in one will not affect the other. Text providers are not
+ * required to support deep clones. The user of clone() must check the status return
+ * and be prepared to handle failures.
+ *
+ * The standard UText implementations for UTF8, UChar *, UnicodeString and
+ * Replaceable all support deep cloning.
+ *
+ * The UText returned from a deep clone will be writable, assuming that the text
+ * provider is able to support writing, even if the source UText had been made
+ * non-writable by means of UText_freeze().
+ *
+ * A shallow clone replicates only the UText data structures; it does not make
+ * a copy of the underlying text. Shallow clones can be used as an efficient way to
+ * have multiple iterators active in a single text string that is not being
+ * modified.
+ *
+ * A shallow clone operation will not fail, barring truly exceptional conditions such
+ * as memory allocation failures.
+ *
+ * Shallow UText clones should be avoided if the UText functions that modify the
+ * text are expected to be used, either on the original or the cloned UText.
+ * Any such modifications can cause unpredictable behavior. Read Only
+ * shallow clones provide some protection against errors of this type by
+ * disabling text modification via the cloned UText.
+ *
+ * A shallow clone made with the readOnly parameter == FALSE will preserve the
+ * utext_isWritable() state of the source object. Note, however, that
+ * write operations must be avoided while more than one UText exists that refer
+ * to the same underlying text.
+ *
+ * A UText and its clone may be safely concurrently accessed by separate threads.
+ * This is true for read access only with shallow clones, and for both read and
+ * write access with deep clones.
+ * It is the responsibility of the Text Provider to ensure that this thread safety
+ * constraint is met.
+ *
+ * @param dest A UText struct to be filled in with the result of the clone operation,
+ * or NULL if the clone function should heap-allocate a new UText struct.
+ * If non-NULL, must refer to an already existing UText, which will then
+ * be reset to become the clone.
+ * @param src The UText to be cloned.
+ * @param deep TRUE to request a deep clone, FALSE for a shallow clone.
+ * @param readOnly TRUE to request that the cloned UText have read only access to the
+ * underlying text.
+
+ * @param status Errors are returned here. For deep clones, U_UNSUPPORTED_ERROR
+ * will be returned if the text provider is unable to clone the
+ * original text.
+ * @return The newly created clone, or NULL if the clone operation failed.
+ * @stable ICU 3.4
+ */
+U_STABLE UText * U_EXPORT2
+utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status);
+
+
+/**
+ * Compare two UText objects for equality.
+ * UTexts are equal if they are iterating over the same text, and
+ * have the same iteration position within the text.
+ * If either or both of the parameters are NULL, the comparison is FALSE.
+ *
+ * @param a The first of the two UTexts to compare.
+ * @param b The other UText to be compared.
+ * @return TRUE if the two UTexts are equal.
+ * @stable ICU 3.6
+ */
+U_STABLE UBool U_EXPORT2
+utext_equals(const UText *a, const UText *b);
+
+
+/*****************************************************************************
+ *
+ * Functions to work with the text represeted by a UText wrapper
+ *
+ *****************************************************************************/
+
+/**
+ * Get the length of the text. Depending on the characteristics
+ * of the underlying text representation, this may be expensive.
+ * @see utext_isLengthExpensive()
+ *
+ *
+ * @param ut the text to be accessed.
+ * @return the length of the text, expressed in native units.
+ *
+ * @stable ICU 3.4
+ */
+U_STABLE int64_t U_EXPORT2
+utext_nativeLength(UText *ut);
+
+/**
+ * Return TRUE if calculating the length of the text could be expensive.
+ * Finding the length of NUL terminated strings is considered to be expensive.
+ *
+ * Note that the value of this function may change
+ * as the result of other operations on a UText.
+ * Once the length of a string has been discovered, it will no longer
+ * be expensive to report it.
+ *
+ * @param ut the text to be accessed.
+ * @return TRUE if determining the length of the text could be time consuming.
+ * @stable ICU 3.4
+ */
+U_STABLE UBool U_EXPORT2
+utext_isLengthExpensive(const UText *ut);
+
+/**
+ * Returns the code point at the requested index,
+ * or U_SENTINEL (-1) if it is out of bounds.
+ *
+ * If the specified index points to the interior of a multi-unit
+ * character - one of the trail bytes of a UTF-8 sequence, for example -
+ * the complete code point will be returned.
+ *
+ * The iteration position will be set to the start of the returned code point.
+ *
+ * This function is roughly equivalent to the the sequence
+ * utext_setNativeIndex(index);
+ * utext_current32();
+ * (There is a subtle difference if the index is out of bounds by being less than zero -
+ * utext_setNativeIndex(negative value) sets the index to zero, after which utext_current()
+ * will return the char at zero. utext_char32At(negative index), on the other hand, will
+ * return the U_SENTINEL value of -1.)
+ *
+ * @param ut the text to be accessed
+ * @param nativeIndex the native index of the character to be accessed. If the index points
+ * to other than the first unit of a multi-unit character, it will be adjusted
+ * to the start of the character.
+ * @return the code point at the specified index.
+ * @stable ICU 3.4
+ */
+U_STABLE UChar32 U_EXPORT2
+utext_char32At(UText *ut, int64_t nativeIndex);
+
+
+/**
+ *
+ * Get the code point at the current iteration position,
+ * or U_SENTINEL (-1) if the iteration has reached the end of
+ * the input text.
+ *
+ * @param ut the text to be accessed.
+ * @return the Unicode code point at the current iterator position.
+ * @stable ICU 3.4
+ */
+U_STABLE UChar32 U_EXPORT2
+utext_current32(UText *ut);
+
+
+/**
+ * Get the code point at the current iteration position of the UText, and
+ * advance the position to the first index following the character.
+ *
+ * If the position is at the end of the text (the index following
+ * the last character, which is also the length of the text),
+ * return U_SENTINEL (-1) and do not advance the index.
+ *
+ * This is a post-increment operation.
+ *
+ * An inline macro version of this function, UTEXT_NEXT32(),
+ * is available for performance critical use.
+ *
+ * @param ut the text to be accessed.
+ * @return the Unicode code point at the iteration position.
+ * @see UTEXT_NEXT32
+ * @stable ICU 3.4
+ */
+U_STABLE UChar32 U_EXPORT2
+utext_next32(UText *ut);
+
+
+/**
+ * Move the iterator position to the character (code point) whose
+ * index precedes the current position, and return that character.
+ * This is a pre-decrement operation.
+ *
+ * If the initial position is at the start of the text (index of 0)
+ * return U_SENTINEL (-1), and leave the position unchanged.
+ *
+ * An inline macro version of this function, UTEXT_PREVIOUS32(),
+ * is available for performance critical use.
+ *
+ * @param ut the text to be accessed.
+ * @return the previous UChar32 code point, or U_SENTINEL (-1)
+ * if the iteration has reached the start of the text.
+ * @see UTEXT_PREVIOUS32
+ * @stable ICU 3.4
+ */
+U_STABLE UChar32 U_EXPORT2
+utext_previous32(UText *ut);
+
+
+/**
+ * Set the iteration index and return the code point at that index.
+ * Leave the iteration index at the start of the following code point.
+ *
+ * This function is the most efficient and convenient way to
+ * begin a forward iteration. The results are identical to the those
+ * from the sequence
+ * \code
+ * utext_setIndex();
+ * utext_next32();
+ * \endcode
+ *
+ * @param ut the text to be accessed.
+ * @param nativeIndex Iteration index, in the native units of the text provider.
+ * @return Code point which starts at or before index,
+ * or U_SENTINEL (-1) if it is out of bounds.
+ * @stable ICU 3.4
+ */
+U_STABLE UChar32 U_EXPORT2
+utext_next32From(UText *ut, int64_t nativeIndex);
+
+
+
+/**
+ * Set the iteration index, and return the code point preceding the
+ * one specified by the initial index. Leave the iteration position
+ * at the start of the returned code point.
+ *
+ * This function is the most efficient and convenient way to
+ * begin a backwards iteration.
+ *
+ * @param ut the text to be accessed.
+ * @param nativeIndex Iteration index in the native units of the text provider.
+ * @return Code point preceding the one at the initial index,
+ * or U_SENTINEL (-1) if it is out of bounds.
+ *
+ * @stable ICU 3.4
+ */
+U_STABLE UChar32 U_EXPORT2
+utext_previous32From(UText *ut, int64_t nativeIndex);
+
+/**
+ * Get the current iterator position, which can range from 0 to
+ * the length of the text.
+ * The position is a native index into the input text, in whatever format it
+ * may have (possibly UTF-8 for example), and may not always be the same as
+ * the corresponding UChar (UTF-16) index.
+ * The returned position will always be aligned to a code point boundary.
+ *
+ * @param ut the text to be accessed.
+ * @return the current index position, in the native units of the text provider.
+ * @stable ICU 3.4
+ */
+U_STABLE int64_t U_EXPORT2
+utext_getNativeIndex(const UText *ut);
+
+/**
+ * Set the current iteration position to the nearest code point
+ * boundary at or preceding the specified index.
+ * The index is in the native units of the original input text.
+ * If the index is out of range, it will be pinned to be within
+ * the range of the input text.
+ * <p>
+ * It will usually be more efficient to begin an iteration
+ * using the functions utext_next32From() or utext_previous32From()
+ * rather than setIndex().
+ * <p>
+ * Moving the index position to an adjacent character is best done
+ * with utext_next32(), utext_previous32() or utext_moveIndex32().
+ * Attempting to do direct arithmetic on the index position is
+ * complicated by the fact that the size (in native units) of a
+ * character depends on the underlying representation of the character
+ * (UTF-8, UTF-16, UTF-32, arbitrary codepage), and is not
+ * easily knowable.
+ *
+ * @param ut the text to be accessed.
+ * @param nativeIndex the native unit index of the new iteration position.
+ * @stable ICU 3.4
+ */
+U_STABLE void U_EXPORT2
+utext_setNativeIndex(UText *ut, int64_t nativeIndex);
+
+/**
+ * Move the iterator postion by delta code points. The number of code points
+ * is a signed number; a negative delta will move the iterator backwards,
+ * towards the start of the text.
+ * <p>
+ * The index is moved by <code>delta</code> code points
+ * forward or backward, but no further backward than to 0 and
+ * no further forward than to utext_nativeLength().
+ * The resulting index value will be in between 0 and length, inclusive.
+ *
+ * @param ut the text to be accessed.
+ * @param delta the signed number of code points to move the iteration position.
+ * @return TRUE if the position could be moved the requested number of positions while
+ * staying within the range [0 - text length].
+ * @stable ICU 3.4
+ */
+U_STABLE UBool U_EXPORT2
+utext_moveIndex32(UText *ut, int32_t delta);
+
+/**
+ * Get the native index of the character preceeding the current position.
+ * If the iteration position is already at the start of the text, zero
+ * is returned.
+ * The value returned is the same as that obtained from the following sequence,
+ * but without the side effect of changing the iteration position.
+ *
+ * \code
+ * UText *ut = whatever;
+ * ...
+ * utext_previous(ut)
+ * utext_getNativeIndex(ut);
+ * \endcode
+ *
+ * This function is most useful during forwards iteration, where it will get the
+ * native index of the character most recently returned from utext_next().
+ *
+ * @param ut the text to be accessed
+ * @return the native index of the character preceeding the current index position,
+ * or zero if the current position is at the start of the text.
+ * @stable ICU 3.6
+ */
+U_STABLE int64_t U_EXPORT2
+utext_getPreviousNativeIndex(UText *ut);
+
+
+/**
+ *
+ * Extract text from a UText into a UChar buffer. The range of text to be extracted
+ * is specified in the native indices of the UText provider. These may not necessarily
+ * be UTF-16 indices.
+ * <p>
+ * The size (number of 16 bit UChars) of the data to be extracted is returned. The
+ * full number of UChars is returned, even when the extracted text is truncated
+ * because the specified buffer size is too small.
+ * <p>
+ * The extracted string will (if you are a user) / must (if you are a text provider)
+ * be NUL-terminated if there is sufficient space in the destination buffer. This
+ * terminating NUL is not included in the returned length.
+ * <p>
+ * The iteration index is left at the position following the last extracted character.
+ *
+ * @param ut the UText from which to extract data.
+ * @param nativeStart the native index of the first character to extract.\
+ * If the specified index is out of range,
+ * it will be pinned to to be within 0 <= index <= textLength
+ * @param nativeLimit the native string index of the position following the last
+ * character to extract. If the specified index is out of range,
+ * it will be pinned to to be within 0 <= index <= textLength.
+ * nativeLimit must be >= nativeStart.
+ * @param dest the UChar (UTF-16) buffer into which the extracted text is placed
+ * @param destCapacity The size, in UChars, of the destination buffer. May be zero
+ * for precomputing the required size.
+ * @param status receives any error status.
+ * U_BUFFER_OVERFLOW_ERROR: the extracted text was truncated because the
+ * buffer was too small. Returns number of UChars for preflighting.
+ * @return Number of UChars in the data to be extracted. Does not include a trailing NUL.
+ *
+ * @stable ICU 3.4
+ */
+U_STABLE int32_t U_EXPORT2
+utext_extract(UText *ut,
+ int64_t nativeStart, int64_t nativeLimit,
+ UChar *dest, int32_t destCapacity,
+ UErrorCode *status);
+
+
+
+/************************************************************************************
+ *
+ * #define inline versions of selected performance-critical text access functions
+ * Caution: do not use auto increment++ or decrement-- expressions
+ * as parameters to these macros.
+ *
+ * For most use, where there is no extreme performance constraint, the
+ * normal, non-inline functions are a better choice. The resulting code
+ * will be smaller, and, if the need ever arises, easier to debug.
+ *
+ * These are implemented as #defines rather than real functions
+ * because there is no fully portable way to do inline functions in plain C.
+ *
+ ************************************************************************************/
+
+#ifndef U_HIDE_INTERNAL_API
+/**
+ * inline version of utext_current32(), for performance-critical situations.
+ *
+ * Get the code point at the current iteration position of the UText.
+ * Returns U_SENTINEL (-1) if the position is at the end of the
+ * text.
+ *
+ * @internal ICU 4.4 technology preview
+ */
+#define UTEXT_CURRENT32(ut) \
+ ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \
+ ((ut)->chunkContents)[((ut)->chunkOffset)] : utext_current32(ut))
+#endif /* U_HIDE_INTERNAL_API */
+
+/**
+ * inline version of utext_next32(), for performance-critical situations.
+ *
+ * Get the code point at the current iteration position of the UText, and
+ * advance the position to the first index following the character.
+ * This is a post-increment operation.
+ * Returns U_SENTINEL (-1) if the position is at the end of the
+ * text.
+ *
+ * @stable ICU 3.4
+ */
+#define UTEXT_NEXT32(ut) \
+ ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \
+ ((ut)->chunkContents)[((ut)->chunkOffset)++] : utext_next32(ut))
+
+/**
+ * inline version of utext_previous32(), for performance-critical situations.
+ *
+ * Move the iterator position to the character (code point) whose
+ * index precedes the current position, and return that character.
+ * This is a pre-decrement operation.
+ * Returns U_SENTINEL (-1) if the position is at the start of the text.
+ *
+ * @stable ICU 3.4
+ */
+#define UTEXT_PREVIOUS32(ut) \
+ ((ut)->chunkOffset > 0 && \
+ (ut)->chunkContents[(ut)->chunkOffset-1] < 0xd800 ? \
+ (ut)->chunkContents[--((ut)->chunkOffset)] : utext_previous32(ut))
+
+/**
+ * inline version of utext_getNativeIndex(), for performance-critical situations.
+ *
+ * Get the current iterator position, which can range from 0 to
+ * the length of the text.
+ * The position is a native index into the input text, in whatever format it
+ * may have (possibly UTF-8 for example), and may not always be the same as
+ * the corresponding UChar (UTF-16) index.
+ * The returned position will always be aligned to a code point boundary.
+ *
+ * @stable ICU 3.6
+ */
+#define UTEXT_GETNATIVEINDEX(ut) \
+ ((ut)->chunkOffset <= (ut)->nativeIndexingLimit? \
+ (ut)->chunkNativeStart+(ut)->chunkOffset : \
+ (ut)->pFuncs->mapOffsetToNative(ut))
+
+/**
+ * inline version of utext_setNativeIndex(), for performance-critical situations.
+ *
+ * Set the current iteration position to the nearest code point
+ * boundary at or preceding the specified index.
+ * The index is in the native units of the original input text.
+ * If the index is out of range, it will be pinned to be within
+ * the range of the input text.
+ *
+ * @stable ICU 3.8
+ */
+#define UTEXT_SETNATIVEINDEX(ut, ix) \
+ { int64_t __offset = (ix) - (ut)->chunkNativeStart; \
+ if (__offset>=0 && __offset<=(int64_t)(ut)->nativeIndexingLimit) { \
+ (ut)->chunkOffset=(int32_t)__offset; \
+ } else { \
+ utext_setNativeIndex((ut), (ix)); } }
+
+
+
+/************************************************************************************
+ *
+ * Functions related to writing or modifying the text.
+ * These will work only with modifiable UTexts. Attempting to
+ * modify a read-only UText will return an error status.
+ *
+ ************************************************************************************/
+
+
+/**
+ * Return TRUE if the text can be written (modified) with utext_replace() or
+ * utext_copy(). For the text to be writable, the text provider must
+ * be of a type that supports writing and the UText must not be frozen.
+ *
+ * Attempting to modify text when utext_isWriteable() is FALSE will fail -
+ * the text will not be modified, and an error will be returned from the function
+ * that attempted the modification.
+ *
+ * @param ut the UText to be tested.
+ * @return TRUE if the text is modifiable.
+ *
+ * @see utext_freeze()
+ * @see utext_replace()
+ * @see utext_copy()
+ * @stable ICU 3.4
+ *
+ */
+U_STABLE UBool U_EXPORT2
+utext_isWritable(const UText *ut);
+
+
+/**
+ * Test whether there is meta data associated with the text.
+ * @see Replaceable::hasMetaData()
+ *
+ * @param ut The UText to be tested
+ * @return TRUE if the underlying text includes meta data.
+ * @stable ICU 3.4
+ */
+U_STABLE UBool U_EXPORT2
+utext_hasMetaData(const UText *ut);
+
+
+/**
+ * Replace a range of the original text with a replacement text.
+ *
+ * Leaves the current iteration position at the position following the
+ * newly inserted replacement text.
+ *
+ * This function is only available on UText types that support writing,
+ * that is, ones where utext_isWritable() returns TRUE.
+ *
+ * When using this function, there should be only a single UText opened onto the
+ * underlying native text string. Behavior after a replace operation
+ * on a UText is undefined for any other additional UTexts that refer to the
+ * modified string.
+ *
+ * @param ut the UText representing the text to be operated on.
+ * @param nativeStart the native index of the start of the region to be replaced
+ * @param nativeLimit the native index of the character following the region to be replaced.
+ * @param replacementText pointer to the replacement text
+ * @param replacementLength length of the replacement text, or -1 if the text is NUL terminated.
+ * @param status receives any error status. Possible errors include
+ * U_NO_WRITE_PERMISSION
+ *
+ * @return The signed number of (native) storage units by which
+ * the length of the text expanded or contracted.
+ *
+ * @stable ICU 3.4
+ */
+U_STABLE int32_t U_EXPORT2
+utext_replace(UText *ut,
+ int64_t nativeStart, int64_t nativeLimit,
+ const UChar *replacementText, int32_t replacementLength,
+ UErrorCode *status);
+
+
+
+/**
+ *
+ * Copy or move a substring from one position to another within the text,
+ * while retaining any metadata associated with the text.
+ * This function is used to duplicate or reorder substrings.
+ * The destination index must not overlap the source range.
+ *
+ * The text to be copied or moved is inserted at destIndex;
+ * it does not replace or overwrite any existing text.
+ *
+ * The iteration position is left following the newly inserted text
+ * at the destination position.
+ *
+ * This function is only available on UText types that support writing,
+ * that is, ones where utext_isWritable() returns TRUE.
+ *
+ * When using this function, there should be only a single UText opened onto the
+ * underlying native text string. Behavior after a copy operation
+ * on a UText is undefined in any other additional UTexts that refer to the
+ * modified string.
+ *
+ * @param ut The UText representing the text to be operated on.
+ * @param nativeStart The native index of the start of the region to be copied or moved
+ * @param nativeLimit The native index of the character position following the region
+ * to be copied.
+ * @param destIndex The native destination index to which the source substring is
+ * copied or moved.
+ * @param move If TRUE, then the substring is moved, not copied/duplicated.
+ * @param status receives any error status. Possible errors include U_NO_WRITE_PERMISSION
+ *
+ * @stable ICU 3.4
+ */
+U_STABLE void U_EXPORT2
+utext_copy(UText *ut,
+ int64_t nativeStart, int64_t nativeLimit,
+ int64_t destIndex,
+ UBool move,
+ UErrorCode *status);
+
+
+/**
+ * <p>
+ * Freeze a UText. This prevents any modification to the underlying text itself
+ * by means of functions operating on this UText.
+ * </p>
+ * <p>
+ * Once frozen, a UText can not be unfrozen. The intent is to ensure
+ * that a the text underlying a frozen UText wrapper cannot be modified via that UText.
+ * </p>
+ * <p>
+ * Caution: freezing a UText will disable changes made via the specific
+ * frozen UText wrapper only; it will not have any effect on the ability to
+ * directly modify the text by bypassing the UText. Any such backdoor modifications
+ * are always an error while UText access is occuring because the underlying
+ * text can get out of sync with UText's buffering.
+ * </p>
+ *
+ * @param ut The UText to be frozen.
+ * @see utext_isWritable()
+ * @stable ICU 3.6
+ */
+U_STABLE void U_EXPORT2
+utext_freeze(UText *ut);
+
+
+/**
+ * UText provider properties (bit field indexes).
+ *
+ * @see UText
+ * @stable ICU 3.4
+ */
+enum {
+ /**
+ * It is potentially time consuming for the provider to determine the length of the text.
+ * @stable ICU 3.4
+ */
+ UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE = 1,
+ /**
+ * Text chunks remain valid and usable until the text object is modified or
+ * deleted, not just until the next time the access() function is called
+ * (which is the default).
+ * @stable ICU 3.4
+ */
+ UTEXT_PROVIDER_STABLE_CHUNKS = 2,
+ /**
+ * The provider supports modifying the text via the replace() and copy()
+ * functions.
+ * @see Replaceable
+ * @stable ICU 3.4
+ */
+ UTEXT_PROVIDER_WRITABLE = 3,
+ /**
+ * There is meta data associated with the text.
+ * @see Replaceable::hasMetaData()
+ * @stable ICU 3.4
+ */
+ UTEXT_PROVIDER_HAS_META_DATA = 4,
+ /**
+ * Text provider owns the text storage.
+ * Generally occurs as the result of a deep clone of the UText.
+ * When closing the UText, the associated text must
+ * also be closed/deleted/freed/ whatever is appropriate.
+ * @stable ICU 3.6
+ */
+ UTEXT_PROVIDER_OWNS_TEXT = 5
+};
+
+/**
+ * Function type declaration for UText.clone().
+ *
+ * clone a UText. Much like opening a UText where the source text is itself
+ * another UText.
+ *
+ * A deep clone will copy both the UText data structures and the underlying text.
+ * The original and cloned UText will operate completely independently; modifications
+ * made to the text in one will not effect the other. Text providers are not
+ * required to support deep clones. The user of clone() must check the status return
+ * and be prepared to handle failures.
+ *
+ * A shallow clone replicates only the UText data structures; it does not make
+ * a copy of the underlying text. Shallow clones can be used as an efficient way to
+ * have multiple iterators active in a single text string that is not being
+ * modified.
+ *
+ * A shallow clone operation must not fail except for truly exceptional conditions such
+ * as memory allocation failures.
+ *
+ * A UText and its clone may be safely concurrently accessed by separate threads.
+ * This is true for both shallow and deep clones.
+ * It is the responsibility of the Text Provider to ensure that this thread safety
+ * constraint is met.
+
+ *
+ * @param dest A UText struct to be filled in with the result of the clone operation,
+ * or NULL if the clone function should heap-allocate a new UText struct.
+ * @param src The UText to be cloned.
+ * @param deep TRUE to request a deep clone, FALSE for a shallow clone.
+ * @param status Errors are returned here. For deep clones, U_UNSUPPORTED_ERROR
+ * should be returned if the text provider is unable to clone the
+ * original text.
+ * @return The newly created clone, or NULL if the clone operation failed.
+ *
+ * @stable ICU 3.4
+ */
+typedef UText * U_CALLCONV
+UTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status);
+
+
+/**
+ * Function type declaration for UText.nativeLength().
+ *
+ * @param ut the UText to get the length of.
+ * @return the length, in the native units of the original text string.
+ * @see UText
+ * @stable ICU 3.4
+ */
+typedef int64_t U_CALLCONV
+UTextNativeLength(UText *ut);
+
+/**
+ * Function type declaration for UText.access(). Get the description of the text chunk
+ * containing the text at a requested native index. The UText's iteration
+ * position will be left at the requested index. If the index is out
+ * of bounds, the iteration position will be left at the start or end
+ * of the string, as appropriate.
+ *
+ * Chunks must begin and end on code point boundaries. A single code point
+ * comprised of multiple storage units must never span a chunk boundary.
+ *
+ *
+ * @param ut the UText being accessed.
+ * @param nativeIndex Requested index of the text to be accessed.
+ * @param forward If TRUE, then the returned chunk must contain text
+ * starting from the index, so that start<=index<limit.
+ * If FALSE, then the returned chunk must contain text
+ * before the index, so that start<index<=limit.
+ * @return True if the requested index could be accessed. The chunk
+ * will contain the requested text.
+ * False value if a chunk cannot be accessed
+ * (the requested index is out of bounds).
+ *
+ * @see UText
+ * @stable ICU 3.4
+ */
+typedef UBool U_CALLCONV
+UTextAccess(UText *ut, int64_t nativeIndex, UBool forward);
+
+/**
+ * Function type declaration for UText.extract().
+ *
+ * Extract text from a UText into a UChar buffer. The range of text to be extracted
+ * is specified in the native indices of the UText provider. These may not necessarily
+ * be UTF-16 indices.
+ * <p>
+ * The size (number of 16 bit UChars) in the data to be extracted is returned. The
+ * full amount is returned, even when the specified buffer size is smaller.
+ * <p>
+ * The extracted string will (if you are a user) / must (if you are a text provider)
+ * be NUL-terminated if there is sufficient space in the destination buffer.
+ *
+ * @param ut the UText from which to extract data.
+ * @param nativeStart the native index of the first characer to extract.
+ * @param nativeLimit the native string index of the position following the last
+ * character to extract.
+ * @param dest the UChar (UTF-16) buffer into which the extracted text is placed
+ * @param destCapacity The size, in UChars, of the destination buffer. May be zero
+ * for precomputing the required size.
+ * @param status receives any error status.
+ * If U_BUFFER_OVERFLOW_ERROR: Returns number of UChars for
+ * preflighting.
+ * @return Number of UChars in the data. Does not include a trailing NUL.
+ *
+ * @stable ICU 3.4
+ */
+typedef int32_t U_CALLCONV
+UTextExtract(UText *ut,
+ int64_t nativeStart, int64_t nativeLimit,
+ UChar *dest, int32_t destCapacity,
+ UErrorCode *status);
+
+/**
+ * Function type declaration for UText.replace().
+ *
+ * Replace a range of the original text with a replacement text.
+ *
+ * Leaves the current iteration position at the position following the
+ * newly inserted replacement text.
+ *
+ * This function need only be implemented on UText types that support writing.
+ *
+ * When using this function, there should be only a single UText opened onto the
+ * underlying native text string. The function is responsible for updating the
+ * text chunk within the UText to reflect the updated iteration position,
+ * taking into account any changes to the underlying string's structure caused
+ * by the replace operation.
+ *
+ * @param ut the UText representing the text to be operated on.
+ * @param nativeStart the index of the start of the region to be replaced
+ * @param nativeLimit the index of the character following the region to be replaced.
+ * @param replacementText pointer to the replacement text
+ * @param replacmentLength length of the replacement text in UChars, or -1 if the text is NUL terminated.
+ * @param status receives any error status. Possible errors include
+ * U_NO_WRITE_PERMISSION
+ *
+ * @return The signed number of (native) storage units by which
+ * the length of the text expanded or contracted.
+ *
+ * @stable ICU 3.4
+ */
+typedef int32_t U_CALLCONV
+UTextReplace(UText *ut,
+ int64_t nativeStart, int64_t nativeLimit,
+ const UChar *replacementText, int32_t replacmentLength,
+ UErrorCode *status);
+
+/**
+ * Function type declaration for UText.copy().
+ *
+ * Copy or move a substring from one position to another within the text,
+ * while retaining any metadata associated with the text.
+ * This function is used to duplicate or reorder substrings.
+ * The destination index must not overlap the source range.
+ *
+ * The text to be copied or moved is inserted at destIndex;
+ * it does not replace or overwrite any existing text.
+ *
+ * This function need only be implemented for UText types that support writing.
+ *
+ * When using this function, there should be only a single UText opened onto the
+ * underlying native text string. The function is responsible for updating the
+ * text chunk within the UText to reflect the updated iteration position,
+ * taking into account any changes to the underlying string's structure caused
+ * by the replace operation.
+ *
+ * @param ut The UText representing the text to be operated on.
+ * @param nativeStart The index of the start of the region to be copied or moved
+ * @param nativeLimit The index of the character following the region to be replaced.
+ * @param nativeDest The destination index to which the source substring is copied or moved.
+ * @param move If TRUE, then the substring is moved, not copied/duplicated.
+ * @param status receives any error status. Possible errors include U_NO_WRITE_PERMISSION
+ *
+ * @stable ICU 3.4
+ */
+typedef void U_CALLCONV
+UTextCopy(UText *ut,
+ int64_t nativeStart, int64_t nativeLimit,
+ int64_t nativeDest,
+ UBool move,
+ UErrorCode *status);
+
+/**
+ * Function type declaration for UText.mapOffsetToNative().
+ * Map from the current UChar offset within the current text chunk to
+ * the corresponding native index in the original source text.
+ *
+ * This is required only for text providers that do not use native UTF-16 indexes.
+ *
+ * @param ut the UText.
+ * @return Absolute (native) index corresponding to chunkOffset in the current chunk.
+ * The returned native index should always be to a code point boundary.
+ *
+ * @stable ICU 3.4
+ */
+typedef int64_t U_CALLCONV
+UTextMapOffsetToNative(const UText *ut);
+
+/**
+ * Function type declaration for UText.mapIndexToUTF16().
+ * Map from a native index to a UChar offset within a text chunk.
+ * Behavior is undefined if the native index does not fall within the
+ * current chunk.
+ *
+ * This function is required only for text providers that do not use native UTF-16 indexes.
+ *
+ * @param ut The UText containing the text chunk.
+ * @param nativeIndex Absolute (native) text index, chunk->start<=index<=chunk->limit.
+ * @return Chunk-relative UTF-16 offset corresponding to the specified native
+ * index.
+ *
+ * @stable ICU 3.4
+ */
+typedef int32_t U_CALLCONV
+UTextMapNativeIndexToUTF16(const UText *ut, int64_t nativeIndex);
+
+
+/**
+ * Function type declaration for UText.utextClose().
+ *
+ * A Text Provider close function is only required for provider types that make
+ * allocations in their open function (or other functions) that must be
+ * cleaned when the UText is closed.
+ *
+ * The allocation of the UText struct itself and any "extra" storage
+ * associated with the UText is handled by the common UText implementation
+ * and does not require provider specific cleanup in a close function.
+ *
+ * Most UText provider implementations do not need to implement this function.
+ *
+ * @param ut A UText object to be closed.
+ *
+ * @stable ICU 3.4
+ */
+typedef void U_CALLCONV
+UTextClose(UText *ut);
+
+
+/**
+ * (public) Function dispatch table for UText.
+ * Conceptually very much like a C++ Virtual Function Table.
+ * This struct defines the organization of the table.
+ * Each text provider implementation must provide an
+ * actual table that is initialized with the appropriate functions
+ * for the type of text being handled.
+ * @stable ICU 3.6
+ */
+struct UTextFuncs {
+ /**
+ * (public) Function table size, sizeof(UTextFuncs)
+ * Intended for use should the table grow to accomodate added
+ * functions in the future, to allow tests for older format
+ * function tables that do not contain the extensions.
+ *
+ * Fields are placed for optimal alignment on
+ * 32/64/128-bit-pointer machines, by normally grouping together
+ * 4 32-bit fields,
+ * 4 pointers,
+ * 2 64-bit fields
+ * in sequence.
+ * @stable ICU 3.6
+ */
+ int32_t tableSize;
+
+ /**
+ * (private) Alignment padding.
+ * Do not use, reserved for use by the UText framework only.
+ * @internal
+ */
+ int32_t reserved1, /** @internal */ reserved2, /** @internal */ reserved3;
+
+
+ /**
+ * (public) Function pointer for UTextClone
+ *
+ * @see UTextClone
+ * @stable ICU 3.6
+ */
+ UTextClone *clone;
+
+ /**
+ * (public) function pointer for UTextLength
+ * May be expensive to compute!
+ *
+ * @see UTextLength
+ * @stable ICU 3.6
+ */
+ UTextNativeLength *nativeLength;
+
+ /**
+ * (public) Function pointer for UTextAccess.
+ *
+ * @see UTextAccess
+ * @stable ICU 3.6
+ */
+ UTextAccess *access;
+
+ /**
+ * (public) Function pointer for UTextExtract.
+ *
+ * @see UTextExtract
+ * @stable ICU 3.6
+ */
+ UTextExtract *extract;
+
+ /**
+ * (public) Function pointer for UTextReplace.
+ *
+ * @see UTextReplace
+ * @stable ICU 3.6
+ */
+ UTextReplace *replace;
+
+ /**
+ * (public) Function pointer for UTextCopy.
+ *
+ * @see UTextCopy
+ * @stable ICU 3.6
+ */
+ UTextCopy *copy;
+
+ /**
+ * (public) Function pointer for UTextMapOffsetToNative.
+ *
+ * @see UTextMapOffsetToNative
+ * @stable ICU 3.6
+ */
+ UTextMapOffsetToNative *mapOffsetToNative;
+
+ /**
+ * (public) Function pointer for UTextMapNativeIndexToUTF16.
+ *
+ * @see UTextMapNativeIndexToUTF16
+ * @stable ICU 3.6
+ */
+ UTextMapNativeIndexToUTF16 *mapNativeIndexToUTF16;
+
+ /**
+ * (public) Function pointer for UTextClose.
+ *
+ * @see UTextClose
+ * @stable ICU 3.6
+ */
+ UTextClose *close;
+
+ /**
+ * (private) Spare function pointer
+ * @internal
+ */
+ UTextClose *spare1;
+
+ /**
+ * (private) Spare function pointer
+ * @internal
+ */
+ UTextClose *spare2;
+
+ /**
+ * (private) Spare function pointer
+ * @internal
+ */
+ UTextClose *spare3;
+
+};
+/**
+ * Function dispatch table for UText
+ * @see UTextFuncs
+ */
+typedef struct UTextFuncs UTextFuncs;
+
+ /**
+ * UText struct. Provides the interface between the generic UText access code
+ * and the UText provider code that works on specific kinds of
+ * text (UTF-8, noncontiguous UTF-16, whatever.)
+ *
+ * Applications that are using predefined types of text providers
+ * to pass text data to ICU services will have no need to view the
+ * internals of the UText structs that they open.
+ *
+ * @stable ICU 3.6
+ */
+struct UText {
+ /**
+ * (private) Magic. Used to help detect when UText functions are handed
+ * invalid or unitialized UText structs.
+ * utext_openXYZ() functions take an initialized,
+ * but not necessarily open, UText struct as an
+ * optional fill-in parameter. This magic field
+ * is used to check for that initialization.
+ * Text provider close functions must NOT clear
+ * the magic field because that would prevent
+ * reuse of the UText struct.
+ * @internal
+ */
+ uint32_t magic;
+
+
+ /**
+ * (private) Flags for managing the allocation and freeing of
+ * memory associated with this UText.
+ * @internal
+ */
+ int32_t flags;
+
+
+ /**
+ * Text provider properties. This set of flags is maintainted by the
+ * text provider implementation.
+ * @stable ICU 3.4
+ */
+ int32_t providerProperties;
+
+ /**
+ * (public) sizeOfStruct=sizeof(UText)
+ * Allows possible backward compatible extension.
+ *
+ * @stable ICU 3.4
+ */
+ int32_t sizeOfStruct;
+
+ /* ------ 16 byte alignment boundary ----------- */
+
+
+ /**
+ * (protected) Native index of the first character position following
+ * the current chunk.
+ * @stable ICU 3.6
+ */
+ int64_t chunkNativeLimit;
+
+ /**
+ * (protected) Size in bytes of the extra space (pExtra).
+ * @stable ICU 3.4
+ */
+ int32_t extraSize;
+
+ /**
+ * (protected) The highest chunk offset where native indexing and
+ * chunk (UTF-16) indexing correspond. For UTF-16 sources, value
+ * will be equal to chunkLength.
+ *
+ * @stable ICU 3.6
+ */
+ int32_t nativeIndexingLimit;
+
+ /* ---- 16 byte alignment boundary------ */
+
+ /**
+ * (protected) Native index of the first character in the text chunk.
+ * @stable ICU 3.6
+ */
+ int64_t chunkNativeStart;
+
+ /**
+ * (protected) Current iteration position within the text chunk (UTF-16 buffer).
+ * This is the index to the character that will be returned by utext_next32().
+ * @stable ICU 3.6
+ */
+ int32_t chunkOffset;
+
+ /**
+ * (protected) Length the text chunk (UTF-16 buffer), in UChars.
+ * @stable ICU 3.6
+ */
+ int32_t chunkLength;
+
+ /* ---- 16 byte alignment boundary-- */
+
+
+ /**
+ * (protected) pointer to a chunk of text in UTF-16 format.
+ * May refer either to original storage of the source of the text, or
+ * if conversion was required, to a buffer owned by the UText.
+ * @stable ICU 3.6
+ */
+ const UChar *chunkContents;
+
+ /**
+ * (public) Pointer to Dispatch table for accessing functions for this UText.
+ * @stable ICU 3.6
+ */
+ const UTextFuncs *pFuncs;
+
+ /**
+ * (protected) Pointer to additional space requested by the
+ * text provider during the utext_open operation.
+ * @stable ICU 3.4
+ */
+ void *pExtra;
+
+ /**
+ * (protected) Pointer to string or text-containin object or similar.
+ * This is the source of the text that this UText is wrapping, in a format
+ * that is known to the text provider functions.
+ * @stable ICU 3.4
+ */
+ const void *context;
+
+ /* --- 16 byte alignment boundary--- */
+
+ /**
+ * (protected) Pointer fields available for use by the text provider.
+ * Not used by UText common code.
+ * @stable ICU 3.6
+ */
+ const void *p;
+ /**
+ * (protected) Pointer fields available for use by the text provider.
+ * Not used by UText common code.
+ * @stable ICU 3.6
+ */
+ const void *q;
+ /**
+ * (protected) Pointer fields available for use by the text provider.
+ * Not used by UText common code.
+ * @stable ICU 3.6
+ */
+ const void *r;
+
+ /**
+ * Private field reserved for future use by the UText framework
+ * itself. This is not to be touched by the text providers.
+ * @internal ICU 3.4
+ */
+ void *privP;
+
+
+ /* --- 16 byte alignment boundary--- */
+
+
+ /**
+ * (protected) Integer field reserved for use by the text provider.
+ * Not used by the UText framework, or by the client (user) of the UText.
+ * @stable ICU 3.4
+ */
+ int64_t a;
+
+ /**
+ * (protected) Integer field reserved for use by the text provider.
+ * Not used by the UText framework, or by the client (user) of the UText.
+ * @stable ICU 3.4
+ */
+ int32_t b;
+
+ /**
+ * (protected) Integer field reserved for use by the text provider.
+ * Not used by the UText framework, or by the client (user) of the UText.
+ * @stable ICU 3.4
+ */
+ int32_t c;
+
+ /* ---- 16 byte alignment boundary---- */
+
+
+ /**
+ * Private field reserved for future use by the UText framework
+ * itself. This is not to be touched by the text providers.
+ * @internal ICU 3.4
+ */
+ int64_t privA;
+ /**
+ * Private field reserved for future use by the UText framework
+ * itself. This is not to be touched by the text providers.
+ * @internal ICU 3.4
+ */
+ int32_t privB;
+ /**
+ * Private field reserved for future use by the UText framework
+ * itself. This is not to be touched by the text providers.
+ * @internal ICU 3.4
+ */
+ int32_t privC;
+};
+
+
+/**
+ * Common function for use by Text Provider implementations to allocate and/or initialize
+ * a new UText struct. To be called in the implementation of utext_open() functions.
+ * If the supplied UText parameter is null, a new UText struct will be allocated on the heap.
+ * If the supplied UText is already open, the provider's close function will be called
+ * so that the struct can be reused by the open that is in progress.
+ *
+ * @param ut pointer to a UText struct to be re-used, or null if a new UText
+ * should be allocated.
+ * @param extraSpace The amount of additional space to be allocated as part
+ * of this UText, for use by types of providers that require
+ * additional storage.
+ * @param status Errors are returned here.
+ * @return pointer to the UText, allocated if necessary, with extra space set up if requested.
+ * @stable ICU 3.4
+ */
+U_STABLE UText * U_EXPORT2
+utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status);
+
+#ifndef U_HIDE_INTERNAL_API
+/**
+ * @internal
+ * Value used to help identify correctly initialized UText structs.
+ * Note: must be publicly visible so that UTEXT_INITIALIZER can access it.
+ */
+enum {
+ UTEXT_MAGIC = 0x345ad82c
+};
+#endif /* U_HIDE_INTERNAL_API */
+
+/**
+ * initializer to be used with local (stack) instances of a UText
+ * struct. UText structs must be initialized before passing
+ * them to one of the utext_open functions.
+ *
+ * @stable ICU 3.6
+ */
+#define UTEXT_INITIALIZER { \
+ UTEXT_MAGIC, /* magic */ \
+ 0, /* flags */ \
+ 0, /* providerProps */ \
+ sizeof(UText), /* sizeOfStruct */ \
+ 0, /* chunkNativeLimit */ \
+ 0, /* extraSize */ \
+ 0, /* nativeIndexingLimit */ \
+ 0, /* chunkNativeStart */ \
+ 0, /* chunkOffset */ \
+ 0, /* chunkLength */ \
+ NULL, /* chunkContents */ \
+ NULL, /* pFuncs */ \
+ NULL, /* pExtra */ \
+ NULL, /* context */ \
+ NULL, NULL, NULL, /* p, q, r */ \
+ NULL, /* privP */ \
+ 0, 0, 0, /* a, b, c */ \
+ 0, 0, 0 /* privA,B,C, */ \
+ }
+
+
+U_CDECL_END
+
+
+
+#endif
diff --git a/Source/WebCore/icu/unicode/utf.h b/Source/WebCore/icu/unicode/utf.h
index f79479935..f5954fe9f 100644
--- a/Source/WebCore/icu/unicode/utf.h
+++ b/Source/WebCore/icu/unicode/utf.h
@@ -1,7 +1,7 @@
/*
*******************************************************************************
*
-* Copyright (C) 1999-2010, International Business Machines
+* Copyright (C) 1999-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@@ -22,15 +22,20 @@
* a surrogate or a non-character etc.
*
* The UChar and UChar32 data types for Unicode code units and code points
- * are defined in umachines.h because they can be machine-dependent.
+ * are defined in umachine.h because they can be machine-dependent.
*
- * utf.h is included by utypes.h and itself includes utf8.h and utf16.h after some
- * common definitions. Those files define macros for efficiently getting code points
+ * If U_NO_DEFAULT_INCLUDE_UTF_HEADERS is 0 then utf.h is included by utypes.h
+ * and itself includes utf8.h and utf16.h after some
+ * common definitions.
+ * If U_NO_DEFAULT_INCLUDE_UTF_HEADERS is 1 then each of these headers must be
+ * included explicitly if their definitions are used.
+ *
+ * utf8.h and utf16.h define macros for efficiently getting code points
* in and out of UTF-8/16 strings.
* utf16.h macros have "U16_" prefixes.
* utf8.h defines similar macros with "U8_" prefixes for UTF-8 string handling.
*
- * ICU processes 16-bit Unicode strings.
+ * ICU mostly processes 16-bit Unicode strings.
* Most of the time, such strings are well-formed UTF-16.
* Single, unpaired surrogates must be handled as well, and are treated in ICU
* like regular code points where possible.
@@ -42,15 +47,16 @@
* ICU functions handle supplementary code points (U+10000..U+10ffff)
* but are optimized for the much more frequently occurring BMP code points.
*
- * utf.h defines UChar to be an unsigned 16-bit integer. If this matches wchar_t, then
- * UChar is defined to be exactly wchar_t, otherwise uint16_t.
+ * umachine.h defines UChar to be an unsigned 16-bit integer.
+ * Where available, UChar is defined to be a char16_t
+ * or a wchar_t (if that is an unsigned 16-bit type), otherwise uint16_t.
*
* UChar32 is defined to be a signed 32-bit integer (int32_t), large enough for a 21-bit
* Unicode code point (Unicode scalar value, 0..0x10ffff).
* Before ICU 2.4, the definition of UChar32 was similarly platform-dependent as
* the definition of UChar. For details see the documentation for UChar32 itself.
*
- * utf.h also defines a small number of C macros for single Unicode code points.
+ * utf.h defines a small number of C macros for single Unicode code points.
* These are simple checks for surrogates and non-characters.
* For actual Unicode character properties see uchar.h.
*
@@ -59,9 +65,6 @@
* The macros will detect if a surrogate code unit is unpaired
* (lead unit without trail unit or vice versa) and just return the unit itself
* as the code point.
- * (It is an accidental property of Unicode and UTF-16 that all
- * malformed sequences can be expressed unambiguously with a distinct subrange
- * of Unicode code points.)
*
* The regular "safe" macros require that the initial, passed-in string index
* is within bounds. They only check the index when they read more than one
@@ -95,7 +98,7 @@
* code point values (0..U+10ffff). They are indicated with negative values instead.
*
* For more information see the ICU User Guide Strings chapter
- * (http://icu-project.org/userguide/strings.html).
+ * (http://userguide.icu-project.org/strings).
*
* <em>Usage:</em>
* ICU coding guidelines for if() statements should be followed when using these macros.
@@ -108,32 +111,12 @@
#ifndef __UTF_H__
#define __UTF_H__
-#include "unicode/utypes.h"
+#include "unicode/umachine.h"
/* include the utfXX.h after the following definitions */
/* single-code point definitions -------------------------------------------- */
/**
- * This value is intended for sentinel values for APIs that
- * (take or) return single code points (UChar32).
- * It is outside of the Unicode code point range 0..0x10ffff.
- *
- * For example, a "done" or "error" value in a new API
- * could be indicated with U_SENTINEL.
- *
- * ICU APIs designed before ICU 2.4 usually define service-specific "done"
- * values, mostly 0xffff.
- * Those may need to be distinguished from
- * actual U+ffff text contents by calling functions like
- * CharacterIterator::hasNext() or UnicodeString::length().
- *
- * @return -1
- * @see UChar32
- * @stable ICU 2.4
- */
-#define U_SENTINEL (-1)
-
-/**
* Is this code point a Unicode noncharacter?
* @param c 32-bit code point
* @return TRUE or FALSE
@@ -227,10 +210,14 @@
/* include the utfXX.h ------------------------------------------------------ */
+#if !U_NO_DEFAULT_INCLUDE_UTF_HEADERS
+
#include "unicode/utf8.h"
#include "unicode/utf16.h"
/* utf_old.h contains deprecated, pre-ICU 2.4 definitions */
#include "unicode/utf_old.h"
-#endif
+#endif /* !U_NO_DEFAULT_INCLUDE_UTF_HEADERS */
+
+#endif /* __UTF_H__ */
diff --git a/Source/WebCore/icu/unicode/utf16.h b/Source/WebCore/icu/unicode/utf16.h
index 5079c1146..bdd88a8b9 100644
--- a/Source/WebCore/icu/unicode/utf16.h
+++ b/Source/WebCore/icu/unicode/utf16.h
@@ -1,7 +1,7 @@
/*
*******************************************************************************
*
-* Copyright (C) 1999-2010, International Business Machines
+* Copyright (C) 1999-2012, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@@ -19,11 +19,9 @@
* \brief C API: 16-bit Unicode handling macros
*
* This file defines macros to deal with 16-bit Unicode (UTF-16) code units and strings.
- * utf16.h is included by utf.h after unicode/umachine.h
- * and some common definitions.
*
* For more information see utf.h and the ICU User Guide Strings chapter
- * (http://icu-project.org/userguide/strings.html).
+ * (http://userguide.icu-project.org/strings).
*
* <em>Usage:</em>
* ICU coding guidelines for if() statements should be followed when using these macros.
@@ -34,7 +32,7 @@
#ifndef __UTF16_H__
#define __UTF16_H__
-/* utf.h must be included first. */
+#include "unicode/umachine.h"
#ifndef __UTF_H__
# include "unicode/utf.h"
#endif
@@ -182,6 +180,9 @@
* The offset may point to either the lead or trail surrogate unit
* for a supplementary code point, in which case the macro will read
* the adjacent matching surrogate as well.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
* If the offset points to a single, unpaired surrogate, then that itself
* will be returned as the code point.
* Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
@@ -199,7 +200,7 @@
if(U16_IS_SURROGATE(c)) { \
uint16_t __c2; \
if(U16_IS_SURROGATE_LEAD(c)) { \
- if((i)+1<(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
+ if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
(c)=U16_GET_SUPPLEMENTARY((c), __c2); \
} \
} else { \
@@ -244,6 +245,8 @@
* (Post-incrementing forward iteration.)
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
*
+ * The length can be negative for a NUL-terminated string.
+ *
* The offset may point to the lead surrogate unit
* for a supplementary code point, in which case the macro will read
* the following trail surrogate as well.
@@ -262,7 +265,7 @@
(c)=(s)[(i)++]; \
if(U16_IS_LEAD(c)) { \
uint16_t __c2; \
- if((i)<(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
+ if((i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
++(i); \
(c)=U16_GET_SUPPLEMENTARY((c), __c2); \
} \
@@ -340,6 +343,8 @@
* (Post-incrementing iteration.)
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
*
+ * The length can be negative for a NUL-terminated string.
+ *
* @param s const UChar * string
* @param i string offset, must be i<length
* @param length string length
@@ -347,7 +352,7 @@
* @stable ICU 2.4
*/
#define U16_FWD_1(s, i, length) { \
- if(U16_IS_LEAD((s)[(i)++]) && (i)<(length) && U16_IS_TRAIL((s)[i])) { \
+ if(U16_IS_LEAD((s)[(i)++]) && (i)!=(length) && U16_IS_TRAIL((s)[i])) { \
++(i); \
} \
}
@@ -378,16 +383,18 @@
* (Post-incrementing iteration.)
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
*
+ * The length can be negative for a NUL-terminated string.
+ *
* @param s const UChar * string
- * @param i string offset, must be i<length
- * @param length string length
+ * @param i int32_t string offset, must be i<length
+ * @param length int32_t string length
* @param n number of code points to skip
* @see U16_FWD_N_UNSAFE
* @stable ICU 2.4
*/
#define U16_FWD_N(s, i, length, n) { \
int32_t __N=(n); \
- while(__N>0 && (i)<(length)) { \
+ while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \
U16_FWD_1(s, i, length); \
--__N; \
} \
@@ -598,15 +605,17 @@
* The input offset may be the same as the string length.
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
*
+ * The length can be negative for a NUL-terminated string.
+ *
* @param s const UChar * string
- * @param start starting string offset (usually 0)
- * @param i string offset, start<=i<=length
- * @param length string length
+ * @param start int32_t starting string offset (usually 0)
+ * @param i int32_t string offset, start<=i<=length
+ * @param length int32_t string length
* @see U16_SET_CP_LIMIT_UNSAFE
* @stable ICU 2.4
*/
#define U16_SET_CP_LIMIT(s, start, i, length) { \
- if((start)<(i) && (i)<(length) && U16_IS_LEAD((s)[(i)-1]) && U16_IS_TRAIL((s)[i])) { \
+ if((start)<(i) && ((i)<(length) || (length)<0) && U16_IS_LEAD((s)[(i)-1]) && U16_IS_TRAIL((s)[i])) { \
++(i); \
} \
}
diff --git a/Source/WebCore/icu/unicode/utf8.h b/Source/WebCore/icu/unicode/utf8.h
index 6405795a5..21e5f3d04 100644
--- a/Source/WebCore/icu/unicode/utf8.h
+++ b/Source/WebCore/icu/unicode/utf8.h
@@ -1,7 +1,7 @@
/*
*******************************************************************************
*
-* Copyright (C) 1999-2009, International Business Machines
+* Copyright (C) 1999-2013, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
@@ -19,11 +19,9 @@
* \brief C API: 8-bit Unicode handling macros
*
* This file defines macros to deal with 8-bit Unicode (UTF-8) code units (bytes) and strings.
- * utf8.h is included by utf.h after unicode/umachine.h
- * and some common definitions.
*
* For more information see utf.h and the ICU User Guide Strings chapter
- * (http://icu-project.org/userguide/strings.html).
+ * (http://userguide.icu-project.org/strings).
*
* <em>Usage:</em>
* ICU coding guidelines for if() statements should be followed when using these macros.
@@ -34,7 +32,7 @@
#ifndef __UTF8_H__
#define __UTF8_H__
-/* utf.h must be included first. */
+#include "unicode/umachine.h"
#ifndef __UTF_H__
# include "unicode/utf.h"
#endif
@@ -62,13 +60,41 @@ U_CFUNC U_IMPORT const uint8_t /* U_IMPORT2? */ /*U_IMPORT*/
utf8_countTrailBytes[256];
/**
- * Count the trail bytes for a UTF-8 lead byte.
+ * Counts the trail bytes for a UTF-8 lead byte.
+ * Returns 0 for 0..0xbf as well as for 0xfe and 0xff.
*
* This is internal since it is not meant to be called directly by external clients;
* however it is called by public macros in this file and thus must remain stable.
+ *
+ * Note: Beginning with ICU 50, the implementation uses a multi-condition expression
+ * which was shown in 2012 (on x86-64) to compile to fast, branch-free code.
+ * leadByte is evaluated multiple times.
+ *
+ * The pre-ICU 50 implementation used the exported array utf8_countTrailBytes:
+ * #define U8_COUNT_TRAIL_BYTES(leadByte) (utf8_countTrailBytes[leadByte])
+ * leadByte was evaluated exactly once.
+ *
+ * @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff.
* @internal
*/
-#define U8_COUNT_TRAIL_BYTES(leadByte) (utf8_countTrailBytes[(uint8_t)leadByte])
+#define U8_COUNT_TRAIL_BYTES(leadByte) \
+ ((leadByte)<0xf0 ? \
+ ((leadByte)>=0xc0)+((leadByte)>=0xe0) : \
+ (leadByte)<0xfe ? 3+((leadByte)>=0xf8)+((leadByte)>=0xfc) : 0)
+
+/**
+ * Counts the trail bytes for a UTF-8 lead byte of a valid UTF-8 sequence.
+ * The maximum supported lead byte is 0xf4 corresponding to U+10FFFF.
+ * leadByte might be evaluated multiple times.
+ *
+ * This is internal since it is not meant to be called directly by external clients;
+ * however it is called by public macros in this file and thus must remain stable.
+ *
+ * @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff.
+ * @internal
+ */
+#define U8_COUNT_TRAIL_BYTES_UNSAFE(leadByte) \
+ (((leadByte)>=0xc0)+((leadByte)>=0xe0)+((leadByte)>=0xf0))
/**
* Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value.
@@ -206,24 +232,60 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
* The offset may point to either the lead byte or one of the trail bytes
* for a code point, in which case the macro will read all of the bytes
* for the code point.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
* If the offset points to an illegal UTF-8 byte sequence, then
* c is set to a negative value.
* Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.
*
* @param s const uint8_t * string
- * @param start starting string offset
- * @param i string offset, must be start<=i<length
- * @param length string length
+ * @param start int32_t starting string offset
+ * @param i int32_t string offset, must be start<=i<length
+ * @param length int32_t string length
* @param c output UChar32 variable, set to <0 in case of an error
* @see U8_GET_UNSAFE
* @stable ICU 2.4
*/
#define U8_GET(s, start, i, length, c) { \
- int32_t _u8_get_index=(int32_t)(i); \
+ int32_t _u8_get_index=(i); \
U8_SET_CP_START(s, start, _u8_get_index); \
U8_NEXT(s, _u8_get_index, length, c); \
}
+#ifndef U_HIDE_DRAFT_API
+/**
+ * Get a code point from a string at a random-access offset,
+ * without changing the offset.
+ * The offset may point to either the lead byte or one of the trail bytes
+ * for a code point, in which case the macro will read all of the bytes
+ * for the code point.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * If the offset points to an illegal UTF-8 byte sequence, then
+ * c is set to U+FFFD.
+ * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT_OR_FFFD.
+ *
+ * This macro does not distinguish between a real U+FFFD in the text
+ * and U+FFFD returned for an ill-formed sequence.
+ * Use U8_GET() if that distinction is important.
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t starting string offset
+ * @param i int32_t string offset, must be start<=i<length
+ * @param length int32_t string length
+ * @param c output UChar32 variable, set to U+FFFD in case of an error
+ * @see U8_GET
+ * @draft ICU 51
+ */
+#define U8_GET_OR_FFFD(s, start, i, length, c) { \
+ int32_t _u8_get_index=(i); \
+ U8_SET_CP_START(s, start, _u8_get_index); \
+ U8_NEXT_OR_FFFD(s, _u8_get_index, length, c); \
+}
+#endif /* U_HIDE_DRAFT_API */
+
/* definitions with forward iteration --------------------------------------- */
/**
@@ -245,19 +307,16 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
*/
#define U8_NEXT_UNSAFE(s, i, c) { \
(c)=(uint8_t)(s)[(i)++]; \
- if((uint8_t)((c)-0xc0)<0x35) { \
- uint8_t __count=U8_COUNT_TRAIL_BYTES(c); \
- U8_MASK_LEAD_BYTE(c, __count); \
- switch(__count) { \
- /* each following branch falls through to the next one */ \
- case 3: \
- (c)=((c)<<6)|((s)[(i)++]&0x3f); \
- case 2: \
- (c)=((c)<<6)|((s)[(i)++]&0x3f); \
- case 1: \
- (c)=((c)<<6)|((s)[(i)++]&0x3f); \
- /* no other branches to optimize switch() */ \
- break; \
+ if((c)>=0x80) { \
+ if((c)<0xe0) { \
+ (c)=(((c)&0x1f)<<6)|((s)[(i)++]&0x3f); \
+ } else if((c)<0xf0) { \
+ /* no need for (c&0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ \
+ (c)=(UChar)(((c)<<12)|(((s)[i]&0x3f)<<6)|((s)[(i)+1]&0x3f)); \
+ (i)+=2; \
+ } else { \
+ (c)=(((c)&7)<<18)|(((s)[i]&0x3f)<<12)|(((s)[(i)+1]&0x3f)<<6)|((s)[(i)+2]&0x3f); \
+ (i)+=3; \
} \
} \
}
@@ -268,14 +327,16 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
* (Post-incrementing forward iteration.)
* "Safe" macro, checks for illegal sequences and for string boundaries.
*
+ * The length can be negative for a NUL-terminated string.
+ *
* The offset may point to the lead byte of a multi-byte sequence,
* in which case the macro will read the whole sequence.
* If the offset points to a trail byte or an illegal UTF-8 sequence, then
* c is set to a negative value.
*
* @param s const uint8_t * string
- * @param i string offset, must be i<length
- * @param length string length
+ * @param i int32_t string offset, must be i<length
+ * @param length int32_t string length
* @param c output UChar32 variable, set to <0 in case of an error
* @see U8_NEXT_UNSAFE
* @stable ICU 2.4
@@ -286,7 +347,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
uint8_t __t1, __t2; \
if( /* handle U+1000..U+CFFF inline */ \
(0xe0<(c) && (c)<=0xec) && \
- (((i)+1)<(length)) && \
+ (((i)+1)<(length) || (length)<0) && \
(__t1=(uint8_t)((s)[i]-0x80))<=0x3f && \
(__t2=(uint8_t)((s)[(i)+1]-0x80))<= 0x3f \
) { \
@@ -295,19 +356,70 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
(i)+=2; \
} else if( /* handle U+0080..U+07FF inline */ \
((c)<0xe0 && (c)>=0xc2) && \
- ((i)<(length)) && \
+ ((i)!=(length)) && \
(__t1=(uint8_t)((s)[i]-0x80))<=0x3f \
) { \
- (c)=(UChar)((((c)&0x1f)<<6)|__t1); \
+ (c)=(((c)&0x1f)<<6)|__t1; \
++(i); \
- } else if(U8_IS_LEAD(c)) { \
+ } else { \
/* function call for "complicated" and error cases */ \
- (c)=utf8_nextCharSafeBody((const uint8_t *)s, &(i), (int32_t)(length), c, -1); \
+ (c)=utf8_nextCharSafeBody((const uint8_t *)s, &(i), (length), c, -1); \
+ } \
+ } \
+}
+
+#ifndef U_HIDE_DRAFT_API
+/**
+ * Get a code point from a string at a code point boundary offset,
+ * and advance the offset to the next code point boundary.
+ * (Post-incrementing forward iteration.)
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * The offset may point to the lead byte of a multi-byte sequence,
+ * in which case the macro will read the whole sequence.
+ * If the offset points to a trail byte or an illegal UTF-8 sequence, then
+ * c is set to U+FFFD.
+ *
+ * This macro does not distinguish between a real U+FFFD in the text
+ * and U+FFFD returned for an ill-formed sequence.
+ * Use U8_NEXT() if that distinction is important.
+ *
+ * @param s const uint8_t * string
+ * @param i int32_t string offset, must be i<length
+ * @param length int32_t string length
+ * @param c output UChar32 variable, set to U+FFFD in case of an error
+ * @see U8_NEXT
+ * @draft ICU 51
+ */
+#define U8_NEXT_OR_FFFD(s, i, length, c) { \
+ (c)=(uint8_t)(s)[(i)++]; \
+ if((c)>=0x80) { \
+ uint8_t __t1, __t2; \
+ if( /* handle U+1000..U+CFFF inline */ \
+ (0xe0<(c) && (c)<=0xec) && \
+ (((i)+1)<(length) || (length)<0) && \
+ (__t1=(uint8_t)((s)[i]-0x80))<=0x3f && \
+ (__t2=(uint8_t)((s)[(i)+1]-0x80))<= 0x3f \
+ ) { \
+ /* no need for (c&0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ \
+ (c)=(UChar)(((c)<<12)|(__t1<<6)|__t2); \
+ (i)+=2; \
+ } else if( /* handle U+0080..U+07FF inline */ \
+ ((c)<0xe0 && (c)>=0xc2) && \
+ ((i)!=(length)) && \
+ (__t1=(uint8_t)((s)[i]-0x80))<=0x3f \
+ ) { \
+ (c)=(((c)&0x1f)<<6)|__t1; \
+ ++(i); \
} else { \
- (c)=U_SENTINEL; \
+ /* function call for "complicated" and error cases */ \
+ (c)=utf8_nextCharSafeBody((const uint8_t *)s, &(i), (length), c, -3); \
} \
} \
}
+#endif /* U_HIDE_DRAFT_API */
/**
* Append a code point to a string, overwriting 1 to 4 bytes.
@@ -351,9 +463,9 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
* then isError is set to TRUE.
*
* @param s const uint8_t * string buffer
- * @param i string offset, must be i<capacity
- * @param capacity size of the string buffer
- * @param c code point to append
+ * @param i int32_t string offset, must be i<capacity
+ * @param capacity int32_t size of the string buffer
+ * @param c UChar32 code point to append
* @param isError output UBool set to TRUE if an error occurs, otherwise not modified
* @see U8_APPEND_UNSAFE
* @stable ICU 2.4
@@ -369,7 +481,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
(s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \
(s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
} else { \
- (i)=utf8_appendCharSafeBody(s, (int32_t)(i), (int32_t)(capacity), c, &(isError)); \
+ (i)=utf8_appendCharSafeBody(s, (i), (capacity), c, &(isError)); \
} \
}
@@ -384,7 +496,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
* @stable ICU 2.4
*/
#define U8_FWD_1_UNSAFE(s, i) { \
- (i)+=1+U8_COUNT_TRAIL_BYTES((s)[i]); \
+ (i)+=1+U8_COUNT_TRAIL_BYTES_UNSAFE((uint8_t)(s)[i]); \
}
/**
@@ -392,9 +504,11 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
* (Post-incrementing iteration.)
* "Safe" macro, checks for illegal sequences and for string boundaries.
*
+ * The length can be negative for a NUL-terminated string.
+ *
* @param s const uint8_t * string
- * @param i string offset, must be i<length
- * @param length string length
+ * @param i int32_t string offset, must be i<length
+ * @param length int32_t string length
* @see U8_FWD_1_UNSAFE
* @stable ICU 2.4
*/
@@ -402,7 +516,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
uint8_t __b=(uint8_t)(s)[(i)++]; \
if(U8_IS_LEAD(__b)) { \
uint8_t __count=U8_COUNT_TRAIL_BYTES(__b); \
- if((i)+__count>(length)) { \
+ if((i)+__count>(length) && (length)>=0) { \
__count=(uint8_t)((length)-(i)); \
} \
while(__count>0 && U8_IS_TRAIL((s)[i])) { \
@@ -438,16 +552,18 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
* (Post-incrementing iteration.)
* "Safe" macro, checks for illegal sequences and for string boundaries.
*
+ * The length can be negative for a NUL-terminated string.
+ *
* @param s const uint8_t * string
- * @param i string offset, must be i<length
- * @param length string length
+ * @param i int32_t string offset, must be i<length
+ * @param length int32_t string length
* @param n number of code points to skip
* @see U8_FWD_N_UNSAFE
* @stable ICU 2.4
*/
#define U8_FWD_N(s, i, length, n) { \
int32_t __N=(n); \
- while(__N>0 && (i)<(length)) { \
+ while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \
U8_FWD_1(s, i, length); \
--__N; \
} \
@@ -479,14 +595,14 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
* "Safe" macro, checks for illegal sequences and for string boundaries.
*
* @param s const uint8_t * string
- * @param start starting string offset (usually 0)
- * @param i string offset, must be start<=i
+ * @param start int32_t starting string offset (usually 0)
+ * @param i int32_t string offset, must be start<=i
* @see U8_SET_CP_START_UNSAFE
* @stable ICU 2.4
*/
#define U8_SET_CP_START(s, start, i) { \
if(U8_IS_TRAIL((s)[(i)])) { \
- (i)=utf8_back1SafeBody(s, start, (int32_t)(i)); \
+ (i)=utf8_back1SafeBody(s, start, (i)); \
} \
}
@@ -547,8 +663,8 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
* If the offset is behind an illegal UTF-8 sequence, then c is set to a negative value.
*
* @param s const uint8_t * string
- * @param start starting string offset (usually 0)
- * @param i string offset, must be start<i
+ * @param start int32_t starting string offset (usually 0)
+ * @param i int32_t string offset, must be start<i
* @param c output UChar32 variable, set to <0 in case of an error
* @see U8_PREV_UNSAFE
* @stable ICU 2.4
@@ -556,13 +672,42 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
#define U8_PREV(s, start, i, c) { \
(c)=(uint8_t)(s)[--(i)]; \
if((c)>=0x80) { \
- if((c)<=0xbf) { \
- (c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -1); \
- } else { \
- (c)=U_SENTINEL; \
- } \
+ (c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -1); \
+ } \
+}
+
+#ifndef U_HIDE_DRAFT_API
+/**
+ * Move the string offset from one code point boundary to the previous one
+ * and get the code point between them.
+ * (Pre-decrementing backward iteration.)
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * The input offset may be the same as the string length.
+ * If the offset is behind a multi-byte sequence, then the macro will read
+ * the whole sequence.
+ * If the offset is behind a lead byte, then that itself
+ * will be returned as the code point.
+ * If the offset is behind an illegal UTF-8 sequence, then c is set to U+FFFD.
+ *
+ * This macro does not distinguish between a real U+FFFD in the text
+ * and U+FFFD returned for an ill-formed sequence.
+ * Use U8_PREV() if that distinction is important.
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t starting string offset (usually 0)
+ * @param i int32_t string offset, must be start<i
+ * @param c output UChar32 variable, set to U+FFFD in case of an error
+ * @see U8_PREV
+ * @draft ICU 51
+ */
+#define U8_PREV_OR_FFFD(s, start, i, c) { \
+ (c)=(uint8_t)(s)[--(i)]; \
+ if((c)>=0x80) { \
+ (c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -3); \
} \
}
+#endif /* U_HIDE_DRAFT_API */
/**
* Move the string offset from one code point boundary to the previous one.
@@ -586,14 +731,14 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
* "Safe" macro, checks for illegal sequences and for string boundaries.
*
* @param s const uint8_t * string
- * @param start starting string offset (usually 0)
- * @param i string offset, must be start<i
+ * @param start int32_t starting string offset (usually 0)
+ * @param i int32_t string offset, must be start<i
* @see U8_BACK_1_UNSAFE
* @stable ICU 2.4
*/
#define U8_BACK_1(s, start, i) { \
if(U8_IS_TRAIL((s)[--(i)])) { \
- (i)=utf8_back1SafeBody(s, start, (int32_t)(i)); \
+ (i)=utf8_back1SafeBody(s, start, (i)); \
} \
}
@@ -626,8 +771,8 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
* "Safe" macro, checks for illegal sequences and for string boundaries.
*
* @param s const uint8_t * string
- * @param start index of the start of the string
- * @param i string offset, must be start<i
+ * @param start int32_t index of the start of the string
+ * @param i int32_t string offset, must be start<i
* @param n number of code points to skip
* @see U8_BACK_N_UNSAFE
* @stable ICU 2.4
@@ -666,15 +811,17 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
* The input offset may be the same as the string length.
* "Safe" macro, checks for illegal sequences and for string boundaries.
*
+ * The length can be negative for a NUL-terminated string.
+ *
* @param s const uint8_t * string
- * @param start starting string offset (usually 0)
- * @param i string offset, must be start<=i<=length
- * @param length string length
+ * @param start int32_t starting string offset (usually 0)
+ * @param i int32_t string offset, must be start<=i<=length
+ * @param length int32_t string length
* @see U8_SET_CP_LIMIT_UNSAFE
* @stable ICU 2.4
*/
#define U8_SET_CP_LIMIT(s, start, i, length) { \
- if((start)<(i) && (i)<(length)) { \
+ if((start)<(i) && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \
U8_BACK_1(s, start, i); \
U8_FWD_1(s, i, length); \
} \
diff --git a/Source/WebCore/icu/unicode/utf_old.h b/Source/WebCore/icu/unicode/utf_old.h
index 8504a030d..f9125b1dd 100644
--- a/Source/WebCore/icu/unicode/utf_old.h
+++ b/Source/WebCore/icu/unicode/utf_old.h
@@ -1 +1,1169 @@
-/* This file is intentionally left blank. */
+/*
+*******************************************************************************
+*
+* Copyright (C) 2002-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: utf_old.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2002sep21
+* created by: Markus W. Scherer
+*/
+
+/**
+ * \file
+ * \brief C API: Deprecated macros for Unicode string handling
+ */
+
+/**
+ *
+ * The macros in utf_old.h are all deprecated and their use discouraged.
+ * Some of the design principles behind the set of UTF macros
+ * have changed or proved impractical.
+ * Almost all of the old "UTF macros" are at least renamed.
+ * If you are looking for a new equivalent to an old macro, please see the
+ * comment at the old one.
+ *
+ * Brief summary of reasons for deprecation:
+ * - Switch on UTF_SIZE (selection of UTF-8/16/32 default string processing)
+ * was impractical.
+ * - Switch on UTF_SAFE etc. (selection of unsafe/safe/strict default string processing)
+ * was of little use and impractical.
+ * - Whole classes of macros became obsolete outside of the UTF_SIZE/UTF_SAFE
+ * selection framework: UTF32_ macros (all trivial)
+ * and UTF_ default and intermediate macros (all aliases).
+ * - The selection framework also caused many macro aliases.
+ * - Change in Unicode standard: "irregular" sequences (3.0) became illegal (3.2).
+ * - Change of language in Unicode standard:
+ * Growing distinction between internal x-bit Unicode strings and external UTF-x
+ * forms, with the former more lenient.
+ * Suggests renaming of UTF16_ macros to U16_.
+ * - The prefix "UTF_" without a width number confused some users.
+ * - "Safe" append macros needed the addition of an error indicator output.
+ * - "Safe" UTF-8 macros used legitimate (if rarely used) code point values
+ * to indicate error conditions.
+ * - The use of the "_CHAR" infix for code point operations confused some users.
+ *
+ * More details:
+ *
+ * Until ICU 2.2, utf.h theoretically allowed to choose among UTF-8/16/32
+ * for string processing, and among unsafe/safe/strict default macros for that.
+ *
+ * It proved nearly impossible to write non-trivial, high-performance code
+ * that is UTF-generic.
+ * Unsafe default macros would be dangerous for default string processing,
+ * and the main reason for the "strict" versions disappeared:
+ * Between Unicode 3.0 and 3.2 all "irregular" UTF-8 sequences became illegal.
+ * The only other conditions that "strict" checked for were non-characters,
+ * which are valid during processing. Only during text input/output should they
+ * be checked, and at that time other well-formedness checks may be
+ * necessary or useful as well.
+ * This can still be done by using U16_NEXT and U_IS_UNICODE_NONCHAR
+ * or U_IS_UNICODE_CHAR.
+ *
+ * The old UTF8_..._SAFE macros also used some normal Unicode code points
+ * to indicate malformed sequences.
+ * The new UTF8_ macros without suffix use negative values instead.
+ *
+ * The entire contents of utf32.h was moved here without replacement
+ * because all those macros were trivial and
+ * were meaningful only in the framework of choosing the UTF size.
+ *
+ * See Jitterbug 2150 and its discussion on the ICU mailing list
+ * in September 2002.
+ *
+ * <hr>
+ *
+ * <em>Obsolete part</em> of pre-ICU 2.4 utf.h file documentation:
+ *
+ * <p>The original concept for these files was for ICU to allow
+ * in principle to set which UTF (UTF-8/16/32) is used internally
+ * by defining UTF_SIZE to either 8, 16, or 32. utf.h would then define the UChar type
+ * accordingly. UTF-16 was the default.</p>
+ *
+ * <p>This concept has been abandoned.
+ * A lot of the ICU source code assumes UChar strings are in UTF-16.
+ * This is especially true for low-level code like
+ * conversion, normalization, and collation.
+ * The utf.h header enforces the default of UTF-16.
+ * The UTF-8 and UTF-32 macros remain for now for completeness and backward compatibility.</p>
+ *
+ * <p>Accordingly, utf.h defines UChar to be an unsigned 16-bit integer. If this matches wchar_t, then
+ * UChar is defined to be exactly wchar_t, otherwise uint16_t.</p>
+ *
+ * <p>UChar32 is defined to be a signed 32-bit integer (int32_t), large enough for a 21-bit
+ * Unicode code point (Unicode scalar value, 0..0x10ffff).
+ * Before ICU 2.4, the definition of UChar32 was similarly platform-dependent as
+ * the definition of UChar. For details see the documentation for UChar32 itself.</p>
+ *
+ * <p>utf.h also defines a number of C macros for handling single Unicode code points and
+ * for using UTF Unicode strings. It includes utf8.h, utf16.h, and utf32.h for the actual
+ * implementations of those macros and then aliases one set of them (for UTF-16) for general use.
+ * The UTF-specific macros have the UTF size in the macro name prefixes (UTF16_...), while
+ * the general alias macros always begin with UTF_...</p>
+ *
+ * <p>Many string operations can be done with or without error checking.
+ * Where such a distinction is useful, there are two versions of the macros, "unsafe" and "safe"
+ * ones with ..._UNSAFE and ..._SAFE suffixes. The unsafe macros are fast but may cause
+ * program failures if the strings are not well-formed. The safe macros have an additional, boolean
+ * parameter "strict". If strict is FALSE, then only illegal sequences are detected.
+ * Otherwise, irregular sequences and non-characters are detected as well (like single surrogates).
+ * Safe macros return special error code points for illegal/irregular sequences:
+ * Typically, U+ffff, or values that would result in a code unit sequence of the same length
+ * as the erroneous input sequence.<br>
+ * Note that _UNSAFE macros have fewer parameters: They do not have the strictness parameter, and
+ * they do not have start/length parameters for boundary checking.</p>
+ *
+ * <p>Here, the macros are aliased in two steps:
+ * In the first step, the UTF-specific macros with UTF16_ prefix and _UNSAFE and _SAFE suffixes are
+ * aliased according to the UTF_SIZE to macros with UTF_ prefix and the same suffixes and signatures.
+ * Then, in a second step, the default, general alias macros are set to use either the unsafe or
+ * the safe/not strict (default) or the safe/strict macro;
+ * these general macros do not have a strictness parameter.</p>
+ *
+ * <p>It is possible to change the default choice for the general alias macros to be unsafe, safe/not strict or safe/strict.
+ * The default is safe/not strict. It is not recommended to select the unsafe macros as the basis for
+ * Unicode string handling in ICU! To select this, define UTF_SAFE, UTF_STRICT, or UTF_UNSAFE.</p>
+ *
+ * <p>For general use, one should use the default, general macros with UTF_ prefix and no _SAFE/_UNSAFE suffix.
+ * Only in some cases it may be necessary to control the choice of macro directly and use a less generic alias.
+ * For example, if it can be assumed that a string is well-formed and the index will stay within the bounds,
+ * then the _UNSAFE version may be used.
+ * If a UTF-8 string is to be processed, then the macros with UTF8_ prefixes need to be used.</p>
+ *
+ * <hr>
+ *
+ * @deprecated ICU 2.4. Use the macros in utf.h, utf16.h, utf8.h instead.
+ */
+
+#ifndef __UTF_OLD_H__
+#define __UTF_OLD_H__
+
+#ifndef U_HIDE_DEPRECATED_API
+
+#include "unicode/utf.h"
+#include "unicode/utf8.h"
+#include "unicode/utf16.h"
+
+/* Formerly utf.h, part 1 --------------------------------------------------- */
+
+#ifdef U_USE_UTF_DEPRECATES
+/**
+ * Unicode string and array offset and index type.
+ * ICU always counts Unicode code units (UChars) for
+ * string offsets, indexes, and lengths, not Unicode code points.
+ *
+ * @obsolete ICU 2.6. Use int32_t directly instead since this API will be removed in that release.
+ */
+typedef int32_t UTextOffset;
+#endif
+
+/** Number of bits in a Unicode string code unit - ICU uses 16-bit Unicode. @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF_SIZE 16
+
+/**
+ * The default choice for general Unicode string macros is to use the ..._SAFE macro implementations
+ * with strict=FALSE.
+ *
+ * @deprecated ICU 2.4. Obsolete, see utf_old.h.
+ */
+#define UTF_SAFE
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#undef UTF_UNSAFE
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#undef UTF_STRICT
+
+/**
+ * UTF8_ERROR_VALUE_1 and UTF8_ERROR_VALUE_2 are special error values for UTF-8,
+ * which need 1 or 2 bytes in UTF-8:
+ * \code
+ * U+0015 = NAK = Negative Acknowledge, C0 control character
+ * U+009f = highest C1 control character
+ * \endcode
+ *
+ * These are used by UTF8_..._SAFE macros so that they can return an error value
+ * that needs the same number of code units (bytes) as were seen by
+ * a macro. They should be tested with UTF_IS_ERROR() or UTF_IS_VALID().
+ *
+ * @deprecated ICU 2.4. Obsolete, see utf_old.h.
+ */
+#define UTF8_ERROR_VALUE_1 0x15
+
+/**
+ * See documentation on UTF8_ERROR_VALUE_1 for details.
+ *
+ * @deprecated ICU 2.4. Obsolete, see utf_old.h.
+ */
+#define UTF8_ERROR_VALUE_2 0x9f
+
+/**
+ * Error value for all UTFs. This code point value will be set by macros with error
+ * checking if an error is detected.
+ *
+ * @deprecated ICU 2.4. Obsolete, see utf_old.h.
+ */
+#define UTF_ERROR_VALUE 0xffff
+
+/**
+ * Is a given 32-bit code an error value
+ * as returned by one of the macros for any UTF?
+ *
+ * @deprecated ICU 2.4. Obsolete, see utf_old.h.
+ */
+#define UTF_IS_ERROR(c) \
+ (((c)&0xfffe)==0xfffe || (c)==UTF8_ERROR_VALUE_1 || (c)==UTF8_ERROR_VALUE_2)
+
+/**
+ * This is a combined macro: Is c a valid Unicode value _and_ not an error code?
+ *
+ * @deprecated ICU 2.4. Obsolete, see utf_old.h.
+ */
+#define UTF_IS_VALID(c) \
+ (UTF_IS_UNICODE_CHAR(c) && \
+ (c)!=UTF8_ERROR_VALUE_1 && (c)!=UTF8_ERROR_VALUE_2)
+
+/**
+ * Is this code unit or code point a surrogate (U+d800..U+dfff)?
+ * @deprecated ICU 2.4. Renamed to U_IS_SURROGATE and U16_IS_SURROGATE, see utf_old.h.
+ */
+#define UTF_IS_SURROGATE(uchar) (((uchar)&0xfffff800)==0xd800)
+
+/**
+ * Is a given 32-bit code point a Unicode noncharacter?
+ *
+ * @deprecated ICU 2.4. Renamed to U_IS_UNICODE_NONCHAR, see utf_old.h.
+ */
+#define UTF_IS_UNICODE_NONCHAR(c) \
+ ((c)>=0xfdd0 && \
+ ((uint32_t)(c)<=0xfdef || ((c)&0xfffe)==0xfffe) && \
+ (uint32_t)(c)<=0x10ffff)
+
+/**
+ * Is a given 32-bit value a Unicode code point value (0..U+10ffff)
+ * that can be assigned a character?
+ *
+ * Code points that are not characters include:
+ * - single surrogate code points (U+d800..U+dfff, 2048 code points)
+ * - the last two code points on each plane (U+__fffe and U+__ffff, 34 code points)
+ * - U+fdd0..U+fdef (new with Unicode 3.1, 32 code points)
+ * - the highest Unicode code point value is U+10ffff
+ *
+ * This means that all code points below U+d800 are character code points,
+ * and that boundary is tested first for performance.
+ *
+ * @deprecated ICU 2.4. Renamed to U_IS_UNICODE_CHAR, see utf_old.h.
+ */
+#define UTF_IS_UNICODE_CHAR(c) \
+ ((uint32_t)(c)<0xd800 || \
+ ((uint32_t)(c)>0xdfff && \
+ (uint32_t)(c)<=0x10ffff && \
+ !UTF_IS_UNICODE_NONCHAR(c)))
+
+/* Formerly utf8.h ---------------------------------------------------------- */
+
+/**
+ * Count the trail bytes for a UTF-8 lead byte.
+ * @deprecated ICU 2.4. Renamed to U8_COUNT_TRAIL_BYTES, see utf_old.h.
+ */
+#define UTF8_COUNT_TRAIL_BYTES(leadByte) (utf8_countTrailBytes[(uint8_t)leadByte])
+
+/**
+ * Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value.
+ * @deprecated ICU 2.4. Renamed to U8_MASK_LEAD_BYTE, see utf_old.h.
+ */
+#define UTF8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
+
+/** Is this this code point a single code unit (byte)? @deprecated ICU 2.4. Renamed to U8_IS_SINGLE, see utf_old.h. */
+#define UTF8_IS_SINGLE(uchar) (((uchar)&0x80)==0)
+/** Is this this code unit the lead code unit (byte) of a code point? @deprecated ICU 2.4. Renamed to U8_IS_LEAD, see utf_old.h. */
+#define UTF8_IS_LEAD(uchar) ((uint8_t)((uchar)-0xc0)<0x3e)
+/** Is this this code unit a trailing code unit (byte) of a code point? @deprecated ICU 2.4. Renamed to U8_IS_TRAIL, see utf_old.h. */
+#define UTF8_IS_TRAIL(uchar) (((uchar)&0xc0)==0x80)
+
+/** Does this scalar Unicode value need multiple code units for storage? @deprecated ICU 2.4. Use U8_LENGTH or test ((uint32_t)(c)>0x7f) instead, see utf_old.h. */
+#define UTF8_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0x7f)
+
+/**
+ * Given the lead character, how many bytes are taken by this code point.
+ * ICU does not deal with code points >0x10ffff
+ * unless necessary for advancing in the byte stream.
+ *
+ * These length macros take into account that for values >0x10ffff
+ * the UTF8_APPEND_CHAR_SAFE macros would write the error code point 0xffff
+ * with 3 bytes.
+ * Code point comparisons need to be in uint32_t because UChar32
+ * may be a signed type, and negative values must be recognized.
+ *
+ * @deprecated ICU 2.4. Use U8_LENGTH instead, see utf.h.
+ */
+#if 1
+# define UTF8_CHAR_LENGTH(c) \
+ ((uint32_t)(c)<=0x7f ? 1 : \
+ ((uint32_t)(c)<=0x7ff ? 2 : \
+ ((uint32_t)((c)-0x10000)>0xfffff ? 3 : 4) \
+ ) \
+ )
+#else
+# define UTF8_CHAR_LENGTH(c) \
+ ((uint32_t)(c)<=0x7f ? 1 : \
+ ((uint32_t)(c)<=0x7ff ? 2 : \
+ ((uint32_t)(c)<=0xffff ? 3 : \
+ ((uint32_t)(c)<=0x10ffff ? 4 : \
+ ((uint32_t)(c)<=0x3ffffff ? 5 : \
+ ((uint32_t)(c)<=0x7fffffff ? 6 : 3) \
+ ) \
+ ) \
+ ) \
+ ) \
+ )
+#endif
+
+/** The maximum number of bytes per code point. @deprecated ICU 2.4. Renamed to U8_MAX_LENGTH, see utf_old.h. */
+#define UTF8_MAX_CHAR_LENGTH 4
+
+/** Average number of code units compared to UTF-16. @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF8_ARRAY_SIZE(size) ((5*(size))/2)
+
+/** @deprecated ICU 2.4. Renamed to U8_GET_UNSAFE, see utf_old.h. */
+#define UTF8_GET_CHAR_UNSAFE(s, i, c) { \
+ int32_t _utf8_get_char_unsafe_index=(int32_t)(i); \
+ UTF8_SET_CHAR_START_UNSAFE(s, _utf8_get_char_unsafe_index); \
+ UTF8_NEXT_CHAR_UNSAFE(s, _utf8_get_char_unsafe_index, c); \
+}
+
+/** @deprecated ICU 2.4. Use U8_GET instead, see utf_old.h. */
+#define UTF8_GET_CHAR_SAFE(s, start, i, length, c, strict) { \
+ int32_t _utf8_get_char_safe_index=(int32_t)(i); \
+ UTF8_SET_CHAR_START_SAFE(s, start, _utf8_get_char_safe_index); \
+ UTF8_NEXT_CHAR_SAFE(s, _utf8_get_char_safe_index, length, c, strict); \
+}
+
+/** @deprecated ICU 2.4. Renamed to U8_NEXT_UNSAFE, see utf_old.h. */
+#define UTF8_NEXT_CHAR_UNSAFE(s, i, c) { \
+ (c)=(s)[(i)++]; \
+ if((uint8_t)((c)-0xc0)<0x35) { \
+ uint8_t __count=UTF8_COUNT_TRAIL_BYTES(c); \
+ UTF8_MASK_LEAD_BYTE(c, __count); \
+ switch(__count) { \
+ /* each following branch falls through to the next one */ \
+ case 3: \
+ (c)=((c)<<6)|((s)[(i)++]&0x3f); \
+ case 2: \
+ (c)=((c)<<6)|((s)[(i)++]&0x3f); \
+ case 1: \
+ (c)=((c)<<6)|((s)[(i)++]&0x3f); \
+ /* no other branches to optimize switch() */ \
+ break; \
+ } \
+ } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U8_APPEND_UNSAFE, see utf_old.h. */
+#define UTF8_APPEND_CHAR_UNSAFE(s, i, c) { \
+ if((uint32_t)(c)<=0x7f) { \
+ (s)[(i)++]=(uint8_t)(c); \
+ } else { \
+ if((uint32_t)(c)<=0x7ff) { \
+ (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \
+ } else { \
+ if((uint32_t)(c)<=0xffff) { \
+ (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \
+ } else { \
+ (s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); \
+ (s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); \
+ } \
+ (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \
+ } \
+ (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
+ } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U8_FWD_1_UNSAFE, see utf_old.h. */
+#define UTF8_FWD_1_UNSAFE(s, i) { \
+ (i)+=1+UTF8_COUNT_TRAIL_BYTES((s)[i]); \
+}
+
+/** @deprecated ICU 2.4. Renamed to U8_FWD_N_UNSAFE, see utf_old.h. */
+#define UTF8_FWD_N_UNSAFE(s, i, n) { \
+ int32_t __N=(n); \
+ while(__N>0) { \
+ UTF8_FWD_1_UNSAFE(s, i); \
+ --__N; \
+ } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U8_SET_CP_START_UNSAFE, see utf_old.h. */
+#define UTF8_SET_CHAR_START_UNSAFE(s, i) { \
+ while(UTF8_IS_TRAIL((s)[i])) { --(i); } \
+}
+
+/** @deprecated ICU 2.4. Use U8_NEXT instead, see utf_old.h. */
+#define UTF8_NEXT_CHAR_SAFE(s, i, length, c, strict) { \
+ (c)=(s)[(i)++]; \
+ if((c)>=0x80) { \
+ if(UTF8_IS_LEAD(c)) { \
+ (c)=utf8_nextCharSafeBody(s, &(i), (int32_t)(length), c, strict); \
+ } else { \
+ (c)=UTF8_ERROR_VALUE_1; \
+ } \
+ } \
+}
+
+/** @deprecated ICU 2.4. Use U8_APPEND instead, see utf_old.h. */
+#define UTF8_APPEND_CHAR_SAFE(s, i, length, c) { \
+ if((uint32_t)(c)<=0x7f) { \
+ (s)[(i)++]=(uint8_t)(c); \
+ } else { \
+ (i)=utf8_appendCharSafeBody(s, (int32_t)(i), (int32_t)(length), c, NULL); \
+ } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U8_FWD_1, see utf_old.h. */
+#define UTF8_FWD_1_SAFE(s, i, length) U8_FWD_1(s, i, length)
+
+/** @deprecated ICU 2.4. Renamed to U8_FWD_N, see utf_old.h. */
+#define UTF8_FWD_N_SAFE(s, i, length, n) U8_FWD_N(s, i, length, n)
+
+/** @deprecated ICU 2.4. Renamed to U8_SET_CP_START, see utf_old.h. */
+#define UTF8_SET_CHAR_START_SAFE(s, start, i) U8_SET_CP_START(s, start, i)
+
+/** @deprecated ICU 2.4. Renamed to U8_PREV_UNSAFE, see utf_old.h. */
+#define UTF8_PREV_CHAR_UNSAFE(s, i, c) { \
+ (c)=(s)[--(i)]; \
+ if(UTF8_IS_TRAIL(c)) { \
+ uint8_t __b, __count=1, __shift=6; \
+\
+ /* c is a trail byte */ \
+ (c)&=0x3f; \
+ for(;;) { \
+ __b=(s)[--(i)]; \
+ if(__b>=0xc0) { \
+ UTF8_MASK_LEAD_BYTE(__b, __count); \
+ (c)|=(UChar32)__b<<__shift; \
+ break; \
+ } else { \
+ (c)|=(UChar32)(__b&0x3f)<<__shift; \
+ ++__count; \
+ __shift+=6; \
+ } \
+ } \
+ } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U8_BACK_1_UNSAFE, see utf_old.h. */
+#define UTF8_BACK_1_UNSAFE(s, i) { \
+ while(UTF8_IS_TRAIL((s)[--(i)])) {} \
+}
+
+/** @deprecated ICU 2.4. Renamed to U8_BACK_N_UNSAFE, see utf_old.h. */
+#define UTF8_BACK_N_UNSAFE(s, i, n) { \
+ int32_t __N=(n); \
+ while(__N>0) { \
+ UTF8_BACK_1_UNSAFE(s, i); \
+ --__N; \
+ } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U8_SET_CP_LIMIT_UNSAFE, see utf_old.h. */
+#define UTF8_SET_CHAR_LIMIT_UNSAFE(s, i) { \
+ UTF8_BACK_1_UNSAFE(s, i); \
+ UTF8_FWD_1_UNSAFE(s, i); \
+}
+
+/** @deprecated ICU 2.4. Use U8_PREV instead, see utf_old.h. */
+#define UTF8_PREV_CHAR_SAFE(s, start, i, c, strict) { \
+ (c)=(s)[--(i)]; \
+ if((c)>=0x80) { \
+ if((c)<=0xbf) { \
+ (c)=utf8_prevCharSafeBody(s, start, &(i), c, strict); \
+ } else { \
+ (c)=UTF8_ERROR_VALUE_1; \
+ } \
+ } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U8_BACK_1, see utf_old.h. */
+#define UTF8_BACK_1_SAFE(s, start, i) U8_BACK_1(s, start, i)
+
+/** @deprecated ICU 2.4. Renamed to U8_BACK_N, see utf_old.h. */
+#define UTF8_BACK_N_SAFE(s, start, i, n) U8_BACK_N(s, start, i, n)
+
+/** @deprecated ICU 2.4. Renamed to U8_SET_CP_LIMIT, see utf_old.h. */
+#define UTF8_SET_CHAR_LIMIT_SAFE(s, start, i, length) U8_SET_CP_LIMIT(s, start, i, length)
+
+/* Formerly utf16.h --------------------------------------------------------- */
+
+/** Is uchar a first/lead surrogate? @deprecated ICU 2.4. Renamed to U_IS_LEAD and U16_IS_LEAD, see utf_old.h. */
+#define UTF_IS_FIRST_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xd800)
+
+/** Is uchar a second/trail surrogate? @deprecated ICU 2.4. Renamed to U_IS_TRAIL and U16_IS_TRAIL, see utf_old.h. */
+#define UTF_IS_SECOND_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xdc00)
+
+/** Assuming c is a surrogate, is it a first/lead surrogate? @deprecated ICU 2.4. Renamed to U_IS_SURROGATE_LEAD and U16_IS_SURROGATE_LEAD, see utf_old.h. */
+#define UTF_IS_SURROGATE_FIRST(c) (((c)&0x400)==0)
+
+/** Helper constant for UTF16_GET_PAIR_VALUE. @deprecated ICU 2.4. Renamed to U16_SURROGATE_OFFSET, see utf_old.h. */
+#define UTF_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
+
+/** Get the UTF-32 value from the surrogate code units. @deprecated ICU 2.4. Renamed to U16_GET_SUPPLEMENTARY, see utf_old.h. */
+#define UTF16_GET_PAIR_VALUE(first, second) \
+ (((first)<<10UL)+(second)-UTF_SURROGATE_OFFSET)
+
+/** @deprecated ICU 2.4. Renamed to U16_LEAD, see utf_old.h. */
+#define UTF_FIRST_SURROGATE(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
+
+/** @deprecated ICU 2.4. Renamed to U16_TRAIL, see utf_old.h. */
+#define UTF_SECOND_SURROGATE(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
+
+/** @deprecated ICU 2.4. Renamed to U16_LEAD, see utf_old.h. */
+#define UTF16_LEAD(supplementary) UTF_FIRST_SURROGATE(supplementary)
+
+/** @deprecated ICU 2.4. Renamed to U16_TRAIL, see utf_old.h. */
+#define UTF16_TRAIL(supplementary) UTF_SECOND_SURROGATE(supplementary)
+
+/** @deprecated ICU 2.4. Renamed to U16_IS_SINGLE, see utf_old.h. */
+#define UTF16_IS_SINGLE(uchar) !UTF_IS_SURROGATE(uchar)
+
+/** @deprecated ICU 2.4. Renamed to U16_IS_LEAD, see utf_old.h. */
+#define UTF16_IS_LEAD(uchar) UTF_IS_FIRST_SURROGATE(uchar)
+
+/** @deprecated ICU 2.4. Renamed to U16_IS_TRAIL, see utf_old.h. */
+#define UTF16_IS_TRAIL(uchar) UTF_IS_SECOND_SURROGATE(uchar)
+
+/** Does this scalar Unicode value need multiple code units for storage? @deprecated ICU 2.4. Use U16_LENGTH or test ((uint32_t)(c)>0xffff) instead, see utf_old.h. */
+#define UTF16_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0xffff)
+
+/** @deprecated ICU 2.4. Renamed to U16_LENGTH, see utf_old.h. */
+#define UTF16_CHAR_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)
+
+/** @deprecated ICU 2.4. Renamed to U16_MAX_LENGTH, see utf_old.h. */
+#define UTF16_MAX_CHAR_LENGTH 2
+
+/** Average number of code units compared to UTF-16. @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF16_ARRAY_SIZE(size) (size)
+
+/**
+ * Get a single code point from an offset that points to any
+ * of the code units that belong to that code point.
+ * Assume 0<=i<length.
+ *
+ * This could be used for iteration together with
+ * UTF16_CHAR_LENGTH() and UTF_IS_ERROR(),
+ * but the use of UTF16_NEXT_CHAR[_UNSAFE]() and
+ * UTF16_PREV_CHAR[_UNSAFE]() is more efficient for that.
+ * @deprecated ICU 2.4. Renamed to U16_GET_UNSAFE, see utf_old.h.
+ */
+#define UTF16_GET_CHAR_UNSAFE(s, i, c) { \
+ (c)=(s)[i]; \
+ if(UTF_IS_SURROGATE(c)) { \
+ if(UTF_IS_SURROGATE_FIRST(c)) { \
+ (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)+1]); \
+ } else { \
+ (c)=UTF16_GET_PAIR_VALUE((s)[(i)-1], (c)); \
+ } \
+ } \
+}
+
+/** @deprecated ICU 2.4. Use U16_GET instead, see utf_old.h. */
+#define UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) { \
+ (c)=(s)[i]; \
+ if(UTF_IS_SURROGATE(c)) { \
+ uint16_t __c2; \
+ if(UTF_IS_SURROGATE_FIRST(c)) { \
+ if((i)+1<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)+1])) { \
+ (c)=UTF16_GET_PAIR_VALUE((c), __c2); \
+ /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
+ } else if(strict) {\
+ /* unmatched first surrogate */ \
+ (c)=UTF_ERROR_VALUE; \
+ } \
+ } else { \
+ if((i)-1>=(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
+ (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \
+ /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
+ } else if(strict) {\
+ /* unmatched second surrogate */ \
+ (c)=UTF_ERROR_VALUE; \
+ } \
+ } \
+ } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
+ (c)=UTF_ERROR_VALUE; \
+ } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U16_NEXT_UNSAFE, see utf_old.h. */
+#define UTF16_NEXT_CHAR_UNSAFE(s, i, c) { \
+ (c)=(s)[(i)++]; \
+ if(UTF_IS_FIRST_SURROGATE(c)) { \
+ (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)++]); \
+ } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U16_APPEND_UNSAFE, see utf_old.h. */
+#define UTF16_APPEND_CHAR_UNSAFE(s, i, c) { \
+ if((uint32_t)(c)<=0xffff) { \
+ (s)[(i)++]=(uint16_t)(c); \
+ } else { \
+ (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
+ (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
+ } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U16_FWD_1_UNSAFE, see utf_old.h. */
+#define UTF16_FWD_1_UNSAFE(s, i) { \
+ if(UTF_IS_FIRST_SURROGATE((s)[(i)++])) { \
+ ++(i); \
+ } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U16_FWD_N_UNSAFE, see utf_old.h. */
+#define UTF16_FWD_N_UNSAFE(s, i, n) { \
+ int32_t __N=(n); \
+ while(__N>0) { \
+ UTF16_FWD_1_UNSAFE(s, i); \
+ --__N; \
+ } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U16_SET_CP_START_UNSAFE, see utf_old.h. */
+#define UTF16_SET_CHAR_START_UNSAFE(s, i) { \
+ if(UTF_IS_SECOND_SURROGATE((s)[i])) { \
+ --(i); \
+ } \
+}
+
+/** @deprecated ICU 2.4. Use U16_NEXT instead, see utf_old.h. */
+#define UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) { \
+ (c)=(s)[(i)++]; \
+ if(UTF_IS_FIRST_SURROGATE(c)) { \
+ uint16_t __c2; \
+ if((i)<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)])) { \
+ ++(i); \
+ (c)=UTF16_GET_PAIR_VALUE((c), __c2); \
+ /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
+ } else if(strict) {\
+ /* unmatched first surrogate */ \
+ (c)=UTF_ERROR_VALUE; \
+ } \
+ } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
+ /* unmatched second surrogate or other non-character */ \
+ (c)=UTF_ERROR_VALUE; \
+ } \
+}
+
+/** @deprecated ICU 2.4. Use U16_APPEND instead, see utf_old.h. */
+#define UTF16_APPEND_CHAR_SAFE(s, i, length, c) { \
+ if((uint32_t)(c)<=0xffff) { \
+ (s)[(i)++]=(uint16_t)(c); \
+ } else if((uint32_t)(c)<=0x10ffff) { \
+ if((i)+1<(length)) { \
+ (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
+ (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
+ } else /* not enough space */ { \
+ (s)[(i)++]=UTF_ERROR_VALUE; \
+ } \
+ } else /* c>0x10ffff, write error value */ { \
+ (s)[(i)++]=UTF_ERROR_VALUE; \
+ } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U16_FWD_1, see utf_old.h. */
+#define UTF16_FWD_1_SAFE(s, i, length) U16_FWD_1(s, i, length)
+
+/** @deprecated ICU 2.4. Renamed to U16_FWD_N, see utf_old.h. */
+#define UTF16_FWD_N_SAFE(s, i, length, n) U16_FWD_N(s, i, length, n)
+
+/** @deprecated ICU 2.4. Renamed to U16_SET_CP_START, see utf_old.h. */
+#define UTF16_SET_CHAR_START_SAFE(s, start, i) U16_SET_CP_START(s, start, i)
+
+/** @deprecated ICU 2.4. Renamed to U16_PREV_UNSAFE, see utf_old.h. */
+#define UTF16_PREV_CHAR_UNSAFE(s, i, c) { \
+ (c)=(s)[--(i)]; \
+ if(UTF_IS_SECOND_SURROGATE(c)) { \
+ (c)=UTF16_GET_PAIR_VALUE((s)[--(i)], (c)); \
+ } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U16_BACK_1_UNSAFE, see utf_old.h. */
+#define UTF16_BACK_1_UNSAFE(s, i) { \
+ if(UTF_IS_SECOND_SURROGATE((s)[--(i)])) { \
+ --(i); \
+ } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U16_BACK_N_UNSAFE, see utf_old.h. */
+#define UTF16_BACK_N_UNSAFE(s, i, n) { \
+ int32_t __N=(n); \
+ while(__N>0) { \
+ UTF16_BACK_1_UNSAFE(s, i); \
+ --__N; \
+ } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT_UNSAFE, see utf_old.h. */
+#define UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) { \
+ if(UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \
+ ++(i); \
+ } \
+}
+
+/** @deprecated ICU 2.4. Use U16_PREV instead, see utf_old.h. */
+#define UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) { \
+ (c)=(s)[--(i)]; \
+ if(UTF_IS_SECOND_SURROGATE(c)) { \
+ uint16_t __c2; \
+ if((i)>(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
+ --(i); \
+ (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \
+ /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
+ } else if(strict) {\
+ /* unmatched second surrogate */ \
+ (c)=UTF_ERROR_VALUE; \
+ } \
+ } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
+ /* unmatched first surrogate or other non-character */ \
+ (c)=UTF_ERROR_VALUE; \
+ } \
+}
+
+/** @deprecated ICU 2.4. Renamed to U16_BACK_1, see utf_old.h. */
+#define UTF16_BACK_1_SAFE(s, start, i) U16_BACK_1(s, start, i)
+
+/** @deprecated ICU 2.4. Renamed to U16_BACK_N, see utf_old.h. */
+#define UTF16_BACK_N_SAFE(s, start, i, n) U16_BACK_N(s, start, i, n)
+
+/** @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT, see utf_old.h. */
+#define UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length)
+
+/* Formerly utf32.h --------------------------------------------------------- */
+
+/*
+* Old documentation:
+*
+* This file defines macros to deal with UTF-32 code units and code points.
+* Signatures and semantics are the same as for the similarly named macros
+* in utf16.h.
+* utf32.h is included by utf.h after unicode/umachine.h</p>
+* and some common definitions.
+* <p><b>Usage:</b> ICU coding guidelines for if() statements should be followed when using these macros.
+* Compound statements (curly braces {}) must be used for if-else-while...
+* bodies and all macro statements should be terminated with semicolon.</p>
+*/
+
+/* internal definitions ----------------------------------------------------- */
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_IS_SAFE(c, strict) \
+ (!(strict) ? \
+ (uint32_t)(c)<=0x10ffff : \
+ UTF_IS_UNICODE_CHAR(c))
+
+/*
+ * For the semantics of all of these macros, see utf16.h.
+ * The UTF-32 versions are trivial because any code point is
+ * encoded using exactly one code unit.
+ */
+
+/* single-code point definitions -------------------------------------------- */
+
+/* classes of code unit values */
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_IS_SINGLE(uchar) 1
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_IS_LEAD(uchar) 0
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_IS_TRAIL(uchar) 0
+
+/* number of code units per code point */
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_NEED_MULTIPLE_UCHAR(c) 0
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_CHAR_LENGTH(c) 1
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_MAX_CHAR_LENGTH 1
+
+/* average number of code units compared to UTF-16 */
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_ARRAY_SIZE(size) (size)
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_GET_CHAR_UNSAFE(s, i, c) { \
+ (c)=(s)[i]; \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_GET_CHAR_SAFE(s, start, i, length, c, strict) { \
+ (c)=(s)[i]; \
+ if(!UTF32_IS_SAFE(c, strict)) { \
+ (c)=UTF_ERROR_VALUE; \
+ } \
+}
+
+/* definitions with forward iteration --------------------------------------- */
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_NEXT_CHAR_UNSAFE(s, i, c) { \
+ (c)=(s)[(i)++]; \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_APPEND_CHAR_UNSAFE(s, i, c) { \
+ (s)[(i)++]=(c); \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_FWD_1_UNSAFE(s, i) { \
+ ++(i); \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_FWD_N_UNSAFE(s, i, n) { \
+ (i)+=(n); \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_SET_CHAR_START_UNSAFE(s, i) { \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_NEXT_CHAR_SAFE(s, i, length, c, strict) { \
+ (c)=(s)[(i)++]; \
+ if(!UTF32_IS_SAFE(c, strict)) { \
+ (c)=UTF_ERROR_VALUE; \
+ } \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_APPEND_CHAR_SAFE(s, i, length, c) { \
+ if((uint32_t)(c)<=0x10ffff) { \
+ (s)[(i)++]=(c); \
+ } else /* c>0x10ffff, write 0xfffd */ { \
+ (s)[(i)++]=0xfffd; \
+ } \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_FWD_1_SAFE(s, i, length) { \
+ ++(i); \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_FWD_N_SAFE(s, i, length, n) { \
+ if(((i)+=(n))>(length)) { \
+ (i)=(length); \
+ } \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_SET_CHAR_START_SAFE(s, start, i) { \
+}
+
+/* definitions with backward iteration -------------------------------------- */
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_PREV_CHAR_UNSAFE(s, i, c) { \
+ (c)=(s)[--(i)]; \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_BACK_1_UNSAFE(s, i) { \
+ --(i); \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_BACK_N_UNSAFE(s, i, n) { \
+ (i)-=(n); \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_SET_CHAR_LIMIT_UNSAFE(s, i) { \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_PREV_CHAR_SAFE(s, start, i, c, strict) { \
+ (c)=(s)[--(i)]; \
+ if(!UTF32_IS_SAFE(c, strict)) { \
+ (c)=UTF_ERROR_VALUE; \
+ } \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_BACK_1_SAFE(s, start, i) { \
+ --(i); \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_BACK_N_SAFE(s, start, i, n) { \
+ (i)-=(n); \
+ if((i)<(start)) { \
+ (i)=(start); \
+ } \
+}
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_SET_CHAR_LIMIT_SAFE(s, i, length) { \
+}
+
+/* Formerly utf.h, part 2 --------------------------------------------------- */
+
+/**
+ * Estimate the number of code units for a string based on the number of UTF-16 code units.
+ *
+ * @deprecated ICU 2.4. Obsolete, see utf_old.h.
+ */
+#define UTF_ARRAY_SIZE(size) UTF16_ARRAY_SIZE(size)
+
+/** @deprecated ICU 2.4. Renamed to U16_GET_UNSAFE, see utf_old.h. */
+#define UTF_GET_CHAR_UNSAFE(s, i, c) UTF16_GET_CHAR_UNSAFE(s, i, c)
+
+/** @deprecated ICU 2.4. Use U16_GET instead, see utf_old.h. */
+#define UTF_GET_CHAR_SAFE(s, start, i, length, c, strict) UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict)
+
+
+/** @deprecated ICU 2.4. Renamed to U16_NEXT_UNSAFE, see utf_old.h. */
+#define UTF_NEXT_CHAR_UNSAFE(s, i, c) UTF16_NEXT_CHAR_UNSAFE(s, i, c)
+
+/** @deprecated ICU 2.4. Use U16_NEXT instead, see utf_old.h. */
+#define UTF_NEXT_CHAR_SAFE(s, i, length, c, strict) UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict)
+
+
+/** @deprecated ICU 2.4. Renamed to U16_APPEND_UNSAFE, see utf_old.h. */
+#define UTF_APPEND_CHAR_UNSAFE(s, i, c) UTF16_APPEND_CHAR_UNSAFE(s, i, c)
+
+/** @deprecated ICU 2.4. Use U16_APPEND instead, see utf_old.h. */
+#define UTF_APPEND_CHAR_SAFE(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c)
+
+
+/** @deprecated ICU 2.4. Renamed to U16_FWD_1_UNSAFE, see utf_old.h. */
+#define UTF_FWD_1_UNSAFE(s, i) UTF16_FWD_1_UNSAFE(s, i)
+
+/** @deprecated ICU 2.4. Renamed to U16_FWD_1, see utf_old.h. */
+#define UTF_FWD_1_SAFE(s, i, length) UTF16_FWD_1_SAFE(s, i, length)
+
+
+/** @deprecated ICU 2.4. Renamed to U16_FWD_N_UNSAFE, see utf_old.h. */
+#define UTF_FWD_N_UNSAFE(s, i, n) UTF16_FWD_N_UNSAFE(s, i, n)
+
+/** @deprecated ICU 2.4. Renamed to U16_FWD_N, see utf_old.h. */
+#define UTF_FWD_N_SAFE(s, i, length, n) UTF16_FWD_N_SAFE(s, i, length, n)
+
+
+/** @deprecated ICU 2.4. Renamed to U16_SET_CP_START_UNSAFE, see utf_old.h. */
+#define UTF_SET_CHAR_START_UNSAFE(s, i) UTF16_SET_CHAR_START_UNSAFE(s, i)
+
+/** @deprecated ICU 2.4. Renamed to U16_SET_CP_START, see utf_old.h. */
+#define UTF_SET_CHAR_START_SAFE(s, start, i) UTF16_SET_CHAR_START_SAFE(s, start, i)
+
+
+/** @deprecated ICU 2.4. Renamed to U16_PREV_UNSAFE, see utf_old.h. */
+#define UTF_PREV_CHAR_UNSAFE(s, i, c) UTF16_PREV_CHAR_UNSAFE(s, i, c)
+
+/** @deprecated ICU 2.4. Use U16_PREV instead, see utf_old.h. */
+#define UTF_PREV_CHAR_SAFE(s, start, i, c, strict) UTF16_PREV_CHAR_SAFE(s, start, i, c, strict)
+
+
+/** @deprecated ICU 2.4. Renamed to U16_BACK_1_UNSAFE, see utf_old.h. */
+#define UTF_BACK_1_UNSAFE(s, i) UTF16_BACK_1_UNSAFE(s, i)
+
+/** @deprecated ICU 2.4. Renamed to U16_BACK_1, see utf_old.h. */
+#define UTF_BACK_1_SAFE(s, start, i) UTF16_BACK_1_SAFE(s, start, i)
+
+
+/** @deprecated ICU 2.4. Renamed to U16_BACK_N_UNSAFE, see utf_old.h. */
+#define UTF_BACK_N_UNSAFE(s, i, n) UTF16_BACK_N_UNSAFE(s, i, n)
+
+/** @deprecated ICU 2.4. Renamed to U16_BACK_N, see utf_old.h. */
+#define UTF_BACK_N_SAFE(s, start, i, n) UTF16_BACK_N_SAFE(s, start, i, n)
+
+
+/** @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT_UNSAFE, see utf_old.h. */
+#define UTF_SET_CHAR_LIMIT_UNSAFE(s, i) UTF16_SET_CHAR_LIMIT_UNSAFE(s, i)
+
+/** @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT, see utf_old.h. */
+#define UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length) UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length)
+
+/* Define default macros (UTF-16 "safe") ------------------------------------ */
+
+/**
+ * Does this code unit alone encode a code point (BMP, not a surrogate)?
+ * Same as UTF16_IS_SINGLE.
+ * @deprecated ICU 2.4. Renamed to U_IS_SINGLE and U16_IS_SINGLE, see utf_old.h.
+ */
+#define UTF_IS_SINGLE(uchar) U16_IS_SINGLE(uchar)
+
+/**
+ * Is this code unit the first one of several (a lead surrogate)?
+ * Same as UTF16_IS_LEAD.
+ * @deprecated ICU 2.4. Renamed to U_IS_LEAD and U16_IS_LEAD, see utf_old.h.
+ */
+#define UTF_IS_LEAD(uchar) U16_IS_LEAD(uchar)
+
+/**
+ * Is this code unit one of several but not the first one (a trail surrogate)?
+ * Same as UTF16_IS_TRAIL.
+ * @deprecated ICU 2.4. Renamed to U_IS_TRAIL and U16_IS_TRAIL, see utf_old.h.
+ */
+#define UTF_IS_TRAIL(uchar) U16_IS_TRAIL(uchar)
+
+/**
+ * Does this code point require multiple code units (is it a supplementary code point)?
+ * Same as UTF16_NEED_MULTIPLE_UCHAR.
+ * @deprecated ICU 2.4. Use U16_LENGTH or test ((uint32_t)(c)>0xffff) instead.
+ */
+#define UTF_NEED_MULTIPLE_UCHAR(c) UTF16_NEED_MULTIPLE_UCHAR(c)
+
+/**
+ * How many code units are used to encode this code point (1 or 2)?
+ * Same as UTF16_CHAR_LENGTH.
+ * @deprecated ICU 2.4. Renamed to U16_LENGTH, see utf_old.h.
+ */
+#define UTF_CHAR_LENGTH(c) U16_LENGTH(c)
+
+/**
+ * How many code units are used at most for any Unicode code point (2)?
+ * Same as UTF16_MAX_CHAR_LENGTH.
+ * @deprecated ICU 2.4. Renamed to U16_MAX_LENGTH, see utf_old.h.
+ */
+#define UTF_MAX_CHAR_LENGTH U16_MAX_LENGTH
+
+/**
+ * Set c to the code point that contains the code unit i.
+ * i could point to the lead or the trail surrogate for the code point.
+ * i is not modified.
+ * Same as UTF16_GET_CHAR.
+ * \pre 0<=i<length
+ *
+ * @deprecated ICU 2.4. Renamed to U16_GET, see utf_old.h.
+ */
+#define UTF_GET_CHAR(s, start, i, length, c) U16_GET(s, start, i, length, c)
+
+/**
+ * Set c to the code point that starts at code unit i
+ * and advance i to beyond the code units of this code point (post-increment).
+ * i must point to the first code unit of a code point.
+ * Otherwise c is set to the trail unit (surrogate) itself.
+ * Same as UTF16_NEXT_CHAR.
+ * \pre 0<=i<length
+ * \post 0<i<=length
+ *
+ * @deprecated ICU 2.4. Renamed to U16_NEXT, see utf_old.h.
+ */
+#define UTF_NEXT_CHAR(s, i, length, c) U16_NEXT(s, i, length, c)
+
+/**
+ * Append the code units of code point c to the string at index i
+ * and advance i to beyond the new code units (post-increment).
+ * The code units beginning at index i will be overwritten.
+ * Same as UTF16_APPEND_CHAR.
+ * \pre 0<=c<=0x10ffff
+ * \pre 0<=i<length
+ * \post 0<i<=length
+ *
+ * @deprecated ICU 2.4. Use U16_APPEND instead, see utf_old.h.
+ */
+#define UTF_APPEND_CHAR(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c)
+
+/**
+ * Advance i to beyond the code units of the code point that begins at i.
+ * I.e., advance i by one code point.
+ * Same as UTF16_FWD_1.
+ * \pre 0<=i<length
+ * \post 0<i<=length
+ *
+ * @deprecated ICU 2.4. Renamed to U16_FWD_1, see utf_old.h.
+ */
+#define UTF_FWD_1(s, i, length) U16_FWD_1(s, i, length)
+
+/**
+ * Advance i to beyond the code units of the n code points where the first one begins at i.
+ * I.e., advance i by n code points.
+ * Same as UT16_FWD_N.
+ * \pre 0<=i<length
+ * \post 0<i<=length
+ *
+ * @deprecated ICU 2.4. Renamed to U16_FWD_N, see utf_old.h.
+ */
+#define UTF_FWD_N(s, i, length, n) U16_FWD_N(s, i, length, n)
+
+/**
+ * Take the random-access index i and adjust it so that it points to the beginning
+ * of a code point.
+ * The input index points to any code unit of a code point and is moved to point to
+ * the first code unit of the same code point. i is never incremented.
+ * In other words, if i points to a trail surrogate that is preceded by a matching
+ * lead surrogate, then i is decremented. Otherwise it is not modified.
+ * This can be used to start an iteration with UTF_NEXT_CHAR() from a random index.
+ * Same as UTF16_SET_CHAR_START.
+ * \pre start<=i<length
+ * \post start<=i<length
+ *
+ * @deprecated ICU 2.4. Renamed to U16_SET_CP_START, see utf_old.h.
+ */
+#define UTF_SET_CHAR_START(s, start, i) U16_SET_CP_START(s, start, i)
+
+/**
+ * Set c to the code point that has code units before i
+ * and move i backward (towards the beginning of the string)
+ * to the first code unit of this code point (pre-increment).
+ * i must point to the first code unit after the last unit of a code point (i==length is allowed).
+ * Same as UTF16_PREV_CHAR.
+ * \pre start<i<=length
+ * \post start<=i<length
+ *
+ * @deprecated ICU 2.4. Renamed to U16_PREV, see utf_old.h.
+ */
+#define UTF_PREV_CHAR(s, start, i, c) U16_PREV(s, start, i, c)
+
+/**
+ * Move i backward (towards the beginning of the string)
+ * to the first code unit of the code point that has code units before i.
+ * I.e., move i backward by one code point.
+ * i must point to the first code unit after the last unit of a code point (i==length is allowed).
+ * Same as UTF16_BACK_1.
+ * \pre start<i<=length
+ * \post start<=i<length
+ *
+ * @deprecated ICU 2.4. Renamed to U16_BACK_1, see utf_old.h.
+ */
+#define UTF_BACK_1(s, start, i) U16_BACK_1(s, start, i)
+
+/**
+ * Move i backward (towards the beginning of the string)
+ * to the first code unit of the n code points that have code units before i.
+ * I.e., move i backward by n code points.
+ * i must point to the first code unit after the last unit of a code point (i==length is allowed).
+ * Same as UTF16_BACK_N.
+ * \pre start<i<=length
+ * \post start<=i<length
+ *
+ * @deprecated ICU 2.4. Renamed to U16_BACK_N, see utf_old.h.
+ */
+#define UTF_BACK_N(s, start, i, n) U16_BACK_N(s, start, i, n)
+
+/**
+ * Take the random-access index i and adjust it so that it points beyond
+ * a code point. The input index points beyond any code unit
+ * of a code point and is moved to point beyond the last code unit of the same
+ * code point. i is never decremented.
+ * In other words, if i points to a trail surrogate that is preceded by a matching
+ * lead surrogate, then i is incremented. Otherwise it is not modified.
+ * This can be used to start an iteration with UTF_PREV_CHAR() from a random index.
+ * Same as UTF16_SET_CHAR_LIMIT.
+ * \pre start<i<=length
+ * \post start<i<=length
+ *
+ * @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT, see utf_old.h.
+ */
+#define UTF_SET_CHAR_LIMIT(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length)
+
+#endif /* U_HIDE_DEPRECATED_API */
+
+#endif
+
diff --git a/Source/WebCore/icu/unicode/utypes.h b/Source/WebCore/icu/unicode/utypes.h
index 00bf14ce8..8f924c9d1 100644
--- a/Source/WebCore/icu/unicode/utypes.h
+++ b/Source/WebCore/icu/unicode/utypes.h
@@ -1,6 +1,6 @@
/*
**********************************************************************
-* Copyright (C) 1996-2010, International Business Machines
+* Copyright (C) 1996-2012, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*
@@ -34,9 +34,13 @@
#include "unicode/umachine.h"
-#include "unicode/utf.h"
#include "unicode/uversion.h"
#include "unicode/uconfig.h"
+#include <float.h>
+
+#if !U_NO_DEFAULT_INCLUDE_UTF_HEADERS
+# include "unicode/utf.h"
+#endif
/*!
* \file
@@ -53,9 +57,9 @@
* \def U_SHOW_CPLUSPLUS_API
* @internal
*/
-#ifdef XP_CPLUSPLUS
+#ifdef __cplusplus
# ifndef U_SHOW_CPLUSPLUS_API
-# define U_SHOW_CPLUSPLUS_API 1
+# define U_SHOW_CPLUSPLUS_API 0
# endif
#else
# undef U_SHOW_CPLUSPLUS_API
@@ -67,6 +71,12 @@
/**
* \def U_HIDE_DRAFT_API
* Define this to 1 to request that draft API be "hidden"
+ * @internal
+ */
+/**
+ * \def U_HIDE_INTERNAL_API
+ * Define this to 1 to request that internal API be "hidden"
+ * @internal
*/
#if !U_DEFAULT_SHOW_DRAFT && !defined(U_SHOW_DRAFT_API)
#define U_HIDE_DRAFT_API 1
@@ -75,116 +85,8 @@
#define U_HIDE_INTERNAL_API 1
#endif
-#ifdef U_HIDE_DRAFT_API
-#include "unicode/udraft.h"
-#endif
-
-#ifdef U_HIDE_DEPRECATED_API
-#include "unicode/udeprctd.h"
-#endif
-
-#ifdef U_HIDE_DEPRECATED_API
-#include "unicode/uobslete.h"
-#endif
-
-#ifdef U_HIDE_INTERNAL_API
-#include "unicode/uintrnal.h"
-#endif
-
-#ifdef U_HIDE_SYSTEM_API
-#include "unicode/usystem.h"
-#endif
-
/** @} */
-
-/*===========================================================================*/
-/* char Character set family */
-/*===========================================================================*/
-
-/**
- * U_CHARSET_FAMILY is equal to this value when the platform is an ASCII based platform.
- * @stable ICU 2.0
- */
-#define U_ASCII_FAMILY 0
-
-/**
- * U_CHARSET_FAMILY is equal to this value when the platform is an EBCDIC based platform.
- * @stable ICU 2.0
- */
-#define U_EBCDIC_FAMILY 1
-
-/**
- * \def U_CHARSET_FAMILY
- *
- * <p>These definitions allow to specify the encoding of text
- * in the char data type as defined by the platform and the compiler.
- * It is enough to determine the code point values of "invariant characters",
- * which are the ones shared by all encodings that are in use
- * on a given platform.</p>
- *
- * <p>Those "invariant characters" should be all the uppercase and lowercase
- * latin letters, the digits, the space, and "basic punctuation".
- * Also, '\\n', '\\r', '\\t' should be available.</p>
- *
- * <p>The list of "invariant characters" is:<br>
- * \code
- * A-Z a-z 0-9 SPACE " % &amp; ' ( ) * + , - . / : ; < = > ? _
- * \endcode
- * <br>
- * (52 letters + 10 numbers + 20 punc/sym/space = 82 total)</p>
- *
- * <p>This matches the IBM Syntactic Character Set (CS 640).</p>
- *
- * <p>In other words, all the graphic characters in 7-bit ASCII should
- * be safely accessible except the following:</p>
- *
- * \code
- * '\' <backslash>
- * '[' <left bracket>
- * ']' <right bracket>
- * '{' <left brace>
- * '}' <right brace>
- * '^' <circumflex>
- * '~' <tilde>
- * '!' <exclamation mark>
- * '#' <number sign>
- * '|' <vertical line>
- * '$' <dollar sign>
- * '@' <commercial at>
- * '`' <grave accent>
- * \endcode
- * @stable ICU 2.0
- */
-
-#ifndef U_CHARSET_FAMILY
-# define U_CHARSET_FAMILY 0
-#endif
-
-/**
- * \def U_CHARSET_IS_UTF8
- *
- * Hardcode the default charset to UTF-8.
- *
- * If this is set to 1, then
- * - ICU will assume that all non-invariant char*, StringPiece, std::string etc.
- * contain UTF-8 text, regardless of what the system API uses
- * - some ICU code will use fast functions like u_strFromUTF8()
- * rather than the more general and more heavy-weight conversion API (ucnv.h)
- * - ucnv_getDefaultName() always returns "UTF-8"
- * - ucnv_setDefaultName() is disabled and will not change the default charset
- * - static builds of ICU are smaller
- * - more functionality is available with the UCONFIG_NO_CONVERSION build-time
- * configuration option (see unicode/uconfig.h)
- * - the UCONFIG_NO_CONVERSION build option in uconfig.h is more usable
- *
- * @stable ICU 4.2
- * @see UCONFIG_NO_CONVERSION
- */
-#ifndef U_CHARSET_IS_UTF8
-# define U_CHARSET_IS_UTF8 0
-#endif
-
/*===========================================================================*/
/* ICUDATA naming scheme */
/*===========================================================================*/
@@ -232,9 +134,11 @@
* ICU 1.8.x on EBCDIC, etc..
* @stable ICU 2.0
*/
-#define U_ICUDATA_NAME "icudt" U_ICU_VERSION_SHORT U_ICUDATA_TYPE_LETTER /**< @internal */
+#define U_ICUDATA_NAME "icudt" U_ICU_VERSION_SHORT U_ICUDATA_TYPE_LETTER
+#ifndef U_HIDE_INTERNAL_API
#define U_USRDATA_NAME "usrdt" U_ICU_VERSION_SHORT U_ICUDATA_TYPE_LETTER /**< @internal */
-#define U_USE_USRDATA 0 /**< @internal */
+#define U_USE_USRDATA 1 /**< @internal */
+#endif /* U_HIDE_INTERNAL_API */
/**
* U_ICU_ENTRY_POINT is the name of the DLL entry point to the ICU data library.
@@ -247,38 +151,28 @@
* \#define U_ICU_ENTRY_POINT icudt19_dat
* @stable ICU 2.4
*/
-#define U_ICUDATA_ENTRY_POINT U_DEF2_ICUDATA_ENTRY_POINT(U_ICU_VERSION_MAJOR_NUM, U_ICU_VERSION_MINOR_NUM)
+#define U_ICUDATA_ENTRY_POINT U_DEF2_ICUDATA_ENTRY_POINT(U_ICU_VERSION_MAJOR_NUM,U_LIB_SUFFIX_C_NAME)
+#ifndef U_HIDE_INTERNAL_API
/**
- * Do not use.
+ * Do not use. Note that it's OK for the 2nd argument to be undefined (literal).
* @internal
*/
-#define U_DEF2_ICUDATA_ENTRY_POINT(major, minor) U_DEF_ICUDATA_ENTRY_POINT(major, minor)
+#define U_DEF2_ICUDATA_ENTRY_POINT(major,suff) U_DEF_ICUDATA_ENTRY_POINT(major,suff)
+
/**
* Do not use.
* @internal
*/
#ifndef U_DEF_ICUDATA_ENTRY_POINT
/* affected by symbol renaming. See platform.h */
-#define U_DEF_ICUDATA_ENTRY_POINT(major, minor) icudt##major##minor##_dat
-#endif
-
-/**
- * \def U_CALLCONV
- * Similar to U_CDECL_BEGIN/U_CDECL_END, this qualifier is necessary
- * in callback function typedefs to make sure that the calling convention
- * is compatible.
- *
- * This is only used for non-ICU-API functions.
- * When a function is a public ICU API,
- * you must use the U_CAPI and U_EXPORT2 qualifiers.
- * @stable ICU 2.0
- */
-#if defined(OS390) && defined(XP_CPLUSPLUS)
-# define U_CALLCONV __cdecl
+#ifndef U_LIB_SUFFIX_C_NAME
+#define U_DEF_ICUDATA_ENTRY_POINT(major, suff) icudt##major##_dat
#else
-# define U_CALLCONV U_EXPORT2
+#define U_DEF_ICUDATA_ENTRY_POINT(major, suff) icudt##suff ## major##_dat
+#endif
#endif
+#endif /* U_HIDE_INTERNAL_API */
/**
* \def NULL
@@ -286,7 +180,7 @@
* @stable ICU 2.0
*/
#ifndef NULL
-#ifdef XP_CPLUSPLUS
+#ifdef __cplusplus
#define NULL 0
#else
#define NULL ((void *)0)
@@ -315,54 +209,17 @@ typedef double UDate;
/** The number of milliseconds per day @stable ICU 2.0 */
#define U_MILLIS_PER_DAY (86400000)
-
-/*===========================================================================*/
-/* UClassID-based RTTI */
-/*===========================================================================*/
+/**
+ * Maximum UDate value
+ * @stable ICU 4.8
+ */
+#define U_DATE_MAX DBL_MAX
/**
- * UClassID is used to identify classes without using RTTI, since RTTI
- * is not yet supported by all C++ compilers. Each class hierarchy which needs
- * to implement polymorphic clone() or operator==() defines two methods,
- * described in detail below. UClassID values can be compared using
- * operator==(). Nothing else should be done with them.
- *
- * \par
- * getDynamicClassID() is declared in the base class of the hierarchy as
- * a pure virtual. Each concrete subclass implements it in the same way:
- *
- * \code
- * class Base {
- * public:
- * virtual UClassID getDynamicClassID() const = 0;
- * }
- *
- * class Derived {
- * public:
- * virtual UClassID getDynamicClassID() const
- * { return Derived::getStaticClassID(); }
- * }
- * \endcode
- *
- * Each concrete class implements getStaticClassID() as well, which allows
- * clients to test for a specific type.
- *
- * \code
- * class Derived {
- * public:
- * static UClassID U_EXPORT2 getStaticClassID();
- * private:
- * static char fgClassID;
- * }
- *
- * // In Derived.cpp:
- * UClassID Derived::getStaticClassID()
- * { return (UClassID)&Derived::fgClassID; }
- * char Derived::fgClassID = 0; // Value is irrelevant
- * \endcode
- * @stable ICU 2.0
- */
-typedef void* UClassID;
+ * Minimum UDate value
+ * @stable ICU 4.8
+ */
+#define U_DATE_MIN -U_DATE_MAX
/*===========================================================================*/
/* Shared library/DLL import-export API control */
@@ -373,7 +230,7 @@ typedef void* UClassID;
* ICU is separated into three libraries.
*/
-/*
+/**
* \def U_COMBINED_IMPLEMENTATION
* Set to export library symbols from inside the ICU library
* when all of ICU is in a single library.
@@ -540,9 +397,9 @@ typedef void* UClassID;
*
* Note: This is currently only done on Windows because
* some Linux/Unix compilers have problems with defining global new/delete.
- * On Windows, U_WINDOWS is defined, and it is _MSC_VER>=1200 for MSVC 6.0 and higher.
+ * On Windows, it is _MSC_VER>=1200 for MSVC 6.0 and higher.
*/
-#if defined(XP_CPLUSPLUS) && defined(WIN32) && U_DEBUG && U_OVERRIDE_CXX_ALLOCATION && (_MSC_VER>=1200) && !defined(U_STATIC_IMPLEMENTATION) && (defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) || defined(U_LAYOUT_IMPLEMENTATION) || defined(U_LAYOUTEX_IMPLEMENTATION))
+#if defined(__cplusplus) && U_DEBUG && U_OVERRIDE_CXX_ALLOCATION && (_MSC_VER>=1200) && !defined(U_STATIC_IMPLEMENTATION) && (defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) || defined(U_LAYOUT_IMPLEMENTATION) || defined(U_LAYOUTEX_IMPLEMENTATION))
#ifndef U_HIDE_INTERNAL_API
/**
@@ -743,6 +600,7 @@ typedef enum UErrorCode {
U_UNDEFINED_KEYWORD, /**< Undefined Plural keyword */
U_DEFAULT_KEYWORD_MISSING, /**< Missing DEFAULT rule in plural rules */
U_DECIMAL_NUMBER_SYNTAX_ERROR, /**< Decimal number syntax error */
+ U_FORMAT_INEXACT_ERROR, /**< Cannot format a number exactly and rounding mode is ROUND_UNNECESSARY @stable ICU 4.8 */
U_FMT_PARSE_ERROR_LIMIT, /**< The limit for format library errors */
/*
@@ -826,7 +684,7 @@ typedef enum UErrorCode {
/* Use the following to determine if an UErrorCode represents */
/* operational success or failure. */
-#ifdef XP_CPLUSPLUS
+#ifdef __cplusplus
/**
* Does the error code indicate success?
* @stable ICU 2.0
diff --git a/Source/WebCore/icu/unicode/uvernum.h b/Source/WebCore/icu/unicode/uvernum.h
new file mode 100644
index 000000000..bd0b0c989
--- /dev/null
+++ b/Source/WebCore/icu/unicode/uvernum.h
@@ -0,0 +1,167 @@
+/*
+*******************************************************************************
+* Copyright (C) 2000-2013, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+*
+* file name: uvernum.h
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+*
+* Created by: Vladimir Weinstein
+* Updated by: Steven R. Loomis
+*
+*/
+
+/**
+ * \file
+ * \brief C API: definitions of ICU version numbers
+ *
+ * This file is included by uversion.h and other files. This file contains only
+ * macros and definitions. The actual version numbers are defined here.
+ */
+
+ /*
+ * IMPORTANT: When updating version, the following things need to be done:
+ * source/common/unicode/uvernum.h - this file: update major, minor,
+ * patchlevel, suffix, version, short version constants, namespace,
+ * renaming macro, and copyright
+ *
+ * The following files need to be updated as well, which can be done
+ * by running the UNIX makefile target 'update-windows-makefiles' in icu/source.
+ *
+ *
+ * source/common/common.vcproj - update 'Output file name' on the link tab so
+ * that it contains the new major/minor combination
+ * source/i18n/i18n.vcproj - same as for the common.vcproj
+ * source/layout/layout.vcproj - same as for the common.vcproj
+ * source/layoutex/layoutex.vcproj - same
+ * source/stubdata/stubdata.vcproj - same as for the common.vcproj
+ * source/io/io.vcproj - same as for the common.vcproj
+ * source/data/makedata.mak - change U_ICUDATA_NAME so that it contains
+ * the new major/minor combination and the Unicode version.
+ */
+
+#ifndef UVERNUM_H
+#define UVERNUM_H
+
+/** The standard copyright notice that gets compiled into each library.
+ * This value will change in the subsequent releases of ICU
+ * @stable ICU 2.4
+ */
+#define U_COPYRIGHT_STRING \
+ " Copyright (C) 2013, International Business Machines Corporation and others. All Rights Reserved. "
+
+/** The current ICU major version as an integer.
+ * This value will change in the subsequent releases of ICU
+ * @stable ICU 2.4
+ */
+#define U_ICU_VERSION_MAJOR_NUM 52
+
+/** The current ICU minor version as an integer.
+ * This value will change in the subsequent releases of ICU
+ * @stable ICU 2.6
+ */
+#define U_ICU_VERSION_MINOR_NUM 1
+
+/** The current ICU patchlevel version as an integer.
+ * This value will change in the subsequent releases of ICU
+ * @stable ICU 2.4
+ */
+#define U_ICU_VERSION_PATCHLEVEL_NUM 0
+
+/** The current ICU build level version as an integer.
+ * This value is for use by ICU clients. It defaults to 0.
+ * @stable ICU 4.0
+ */
+#ifndef U_ICU_VERSION_BUILDLEVEL_NUM
+#define U_ICU_VERSION_BUILDLEVEL_NUM 0
+#endif
+
+/** Glued version suffix for renamers
+ * This value will change in the subsequent releases of ICU
+ * @stable ICU 2.6
+ */
+#define U_ICU_VERSION_SUFFIX _52
+
+/**
+ * \def U_DEF2_ICU_ENTRY_POINT_RENAME
+ * @internal
+ */
+/**
+ * \def U_DEF_ICU_ENTRY_POINT_RENAME
+ * @internal
+ */
+/** Glued version suffix function for renamers
+ * This value will change in the subsequent releases of ICU.
+ * If a custom suffix (such as matching library suffixes) is desired, this can be modified.
+ * Note that if present, platform.h may contain an earlier definition of this macro.
+ * \def U_ICU_ENTRY_POINT_RENAME
+ * @stable ICU 4.2
+ */
+
+#ifndef U_ICU_ENTRY_POINT_RENAME
+#ifdef U_HAVE_LIB_SUFFIX
+#define U_DEF_ICU_ENTRY_POINT_RENAME(x,y,z) x ## y ## z
+#define U_DEF2_ICU_ENTRY_POINT_RENAME(x,y,z) U_DEF_ICU_ENTRY_POINT_RENAME(x,y,z)
+#define U_ICU_ENTRY_POINT_RENAME(x) U_DEF2_ICU_ENTRY_POINT_RENAME(x,U_ICU_VERSION_SUFFIX,U_LIB_SUFFIX_C_NAME)
+#else
+#define U_DEF_ICU_ENTRY_POINT_RENAME(x,y) x ## y
+#define U_DEF2_ICU_ENTRY_POINT_RENAME(x,y) U_DEF_ICU_ENTRY_POINT_RENAME(x,y)
+#define U_ICU_ENTRY_POINT_RENAME(x) U_DEF2_ICU_ENTRY_POINT_RENAME(x,U_ICU_VERSION_SUFFIX)
+#endif
+#endif
+
+/** The current ICU library version as a dotted-decimal string. The patchlevel
+ * only appears in this string if it non-zero.
+ * This value will change in the subsequent releases of ICU
+ * @stable ICU 2.4
+ */
+#define U_ICU_VERSION "52.1"
+
+/** The current ICU library major/minor version as a string without dots, for library name suffixes.
+ * This value will change in the subsequent releases of ICU
+ * @stable ICU 2.6
+ */
+#define U_ICU_VERSION_SHORT "52"
+
+#ifndef U_HIDE_INTERNAL_API
+/** Data version in ICU4C.
+ * @internal ICU 4.4 Internal Use Only
+ **/
+#define U_ICU_DATA_VERSION "52.1"
+#endif /* U_HIDE_INTERNAL_API */
+
+/*===========================================================================
+ * ICU collation framework version information
+ * Version info that can be obtained from a collator is affected by these
+ * numbers in a secret and magic way. Please use collator version as whole
+ *===========================================================================
+ */
+
+/**
+ * Collation runtime version (sort key generator, strcoll).
+ * If the version is different, sort keys for the same string could be different.
+ * This value may change in subsequent releases of ICU.
+ * @stable ICU 2.4
+ */
+#define UCOL_RUNTIME_VERSION 7
+
+/**
+ * Collation builder code version.
+ * When this is different, the same tailoring might result
+ * in assigning different collation elements to code points.
+ * This value may change in subsequent releases of ICU.
+ * @stable ICU 2.4
+ */
+#define UCOL_BUILDER_VERSION 8
+
+/**
+ * This is the version of collation tailorings.
+ * This value may change in subsequent releases of ICU.
+ * @stable ICU 2.4
+ */
+#define UCOL_TAILORINGS_VERSION 1
+
+#endif
diff --git a/Source/WebCore/icu/unicode/uversion.h b/Source/WebCore/icu/unicode/uversion.h
index e54cd55a6..74e309105 100644
--- a/Source/WebCore/icu/unicode/uversion.h
+++ b/Source/WebCore/icu/unicode/uversion.h
@@ -1,6 +1,6 @@
/*
*******************************************************************************
-* Copyright (C) 2000-2010, International Business Machines
+* Copyright (C) 2000-2011, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
*
@@ -67,7 +67,8 @@ typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH];
* When compiling for C++, it begins an extern "C++" linkage block (to protect
* against cases in which an external client includes ICU header files inside
* an extern "C" linkage block).
- * If the C++ compiler supports namespaces, it also begins a namespace block.
+ *
+ * It also begins a versioned-ICU-namespace block.
* @stable ICU 2.4
*/
@@ -77,8 +78,8 @@ typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH];
* When not compiling for C++, it does nothing.
* When compiling for C++, it ends the extern "C++" block begun by
* U_NAMESPACE_BEGIN.
- * If the C++ compiler supports namespaces, it also ends the namespace block
- * begun by U_NAMESPACE_BEGIN.
+ *
+ * It also ends the versioned-ICU-namespace block begun by U_NAMESPACE_BEGIN.
* @stable ICU 2.4
*/
@@ -86,7 +87,9 @@ typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH];
* \def U_NAMESPACE_USE
* This is used to specify that the rest of the code uses the
* public ICU C++ API namespace.
- * If the compiler doesn't support namespaces, this does nothing.
+ * This is invoked by default; we recommend that you turn it off:
+ * See the "Recommended Build Options" section of the ICU4C readme
+ * (http://source.icu-project.org/repos/icu/icu/trunk/readme.html#RecBuild)
* @stable ICU 2.4
*/
@@ -94,13 +97,14 @@ typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH];
* \def U_NAMESPACE_QUALIFIER
* This is used to qualify that a function or class is part of
* the public ICU C++ API namespace.
- * If the compiler doesn't support namespaces, this does nothing.
+ *
+ * This macro is unnecessary since ICU 49 requires namespace support.
+ * You can just use "icu::" instead.
* @stable ICU 2.4
*/
/* Define namespace symbols if the compiler supports it. */
-#ifdef XP_CPLUSPLUS
-#if U_HAVE_NAMESPACE
+#ifdef __cplusplus
# if U_DISABLE_RENAMING
# define U_ICU_NAMESPACE icu
namespace U_ICU_NAMESPACE { }
@@ -122,12 +126,6 @@ typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH];
U_NAMESPACE_USE
# endif
#else
-# define U_NAMESPACE_BEGIN extern "C++" {
-# define U_NAMESPACE_END }
-# define U_NAMESPACE_USE
-# define U_NAMESPACE_QUALIFIER
-#endif
-#else
# define U_NAMESPACE_BEGIN
# define U_NAMESPACE_END
# define U_NAMESPACE_USE
@@ -180,7 +178,7 @@ u_versionFromUString(UVersionInfo versionArray, const UChar *versionString);
* @stable ICU 2.4
*/
U_STABLE void U_EXPORT2
-u_versionToString(UVersionInfo versionArray, char *versionString);
+u_versionToString(const UVersionInfo versionArray, char *versionString);
/**
* Gets the ICU release version. The version array stores the version information