diff options
author | Lorry Tar Creator <lorry-tar-importer@lorry> | 2016-05-24 08:28:08 +0000 |
---|---|---|
committer | Lorry Tar Creator <lorry-tar-importer@lorry> | 2016-05-24 08:28:08 +0000 |
commit | a4e969f4965059196ca948db781e52f7cfebf19e (patch) | |
tree | 6ca352808c8fdc52006a0f33f6ae3c593b23867d /Source/WTF/icu | |
parent | 41386e9cb918eed93b3f13648cbef387e371e451 (diff) | |
download | WebKitGtk-tarball-a4e969f4965059196ca948db781e52f7cfebf19e.tar.gz |
webkitgtk-2.12.3webkitgtk-2.12.3
Diffstat (limited to 'Source/WTF/icu')
36 files changed, 27651 insertions, 0 deletions
diff --git a/Source/WTF/icu/LICENSE b/Source/WTF/icu/LICENSE new file mode 100644 index 000000000..385d130cd --- /dev/null +++ b/Source/WTF/icu/LICENSE @@ -0,0 +1,25 @@ +COPYRIGHT AND PERMISSION NOTICE + +Copyright (c) 1995-2006 International Business Machines Corporation and others + +All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy of this +software and associated documentation files (the "Software"), to deal in the Software +without restriction, including without limitation the rights to use, copy, modify, +merge, publish, distribute, and/or sell copies of the Software, and to permit persons +to whom the Software is furnished to do so, provided that the above copyright notice(s) +and this permission notice appear in all copies of the Software and that both the above +copyright notice(s) and this permission notice appear in supporting documentation. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR +PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER +OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR +CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR +PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING +OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +Except as contained in this notice, the name of a copyright holder shall not be used in +advertising or otherwise to promote the sale, use or other dealings in this Software +without prior written authorization of the copyright holder. diff --git a/Source/WTF/icu/README b/Source/WTF/icu/README new file mode 100644 index 000000000..389e2e801 --- /dev/null +++ b/Source/WTF/icu/README @@ -0,0 +1,4 @@ +The headers in this directory are for compiling on Mac OS X 10.4. +The Mac OS X 10.4 release includes the ICU binary, but not ICU headers. +For other platforms, installed ICU headers should be used rather than these. +They are specific to Mac OS X 10.4. diff --git a/Source/WTF/icu/unicode/bytestream.h b/Source/WTF/icu/unicode/bytestream.h new file mode 100644 index 000000000..174aa38af --- /dev/null +++ b/Source/WTF/icu/unicode/bytestream.h @@ -0,0 +1,257 @@ +// Copyright (C) 2009-2012, International Business Machines +// Corporation and others. All Rights Reserved. +// +// Copyright 2007 Google Inc. All Rights Reserved. +// Author: sanjay@google.com (Sanjay Ghemawat) +// +// Abstract interface that consumes a sequence of bytes (ByteSink). +// +// Used so that we can write a single piece of code that can operate +// on a variety of output string types. +// +// Various implementations of this interface are provided: +// ByteSink: +// CheckedArrayByteSink Write to a flat array, with bounds checking +// StringByteSink Write to an STL string + +// This code is a contribution of Google code, and the style used here is +// a compromise between the original Google code and the ICU coding guidelines. +// For example, data types are ICU-ified (size_t,int->int32_t), +// and API comments doxygen-ified, but function names and behavior are +// as in the original, if possible. +// Assertion-style error handling, not available in ICU, was changed to +// parameter "pinning" similar to UnicodeString. +// +// In addition, this is only a partial port of the original Google code, +// limited to what was needed so far. The (nearly) complete original code +// is in the ICU svn repository at icuhtml/trunk/design/strings/contrib +// (see ICU ticket 6765, r25517). + +#ifndef __BYTESTREAM_H__ +#define __BYTESTREAM_H__ + +/** + * \file + * \brief C++ API: Interface for writing bytes, and implementation classes. + */ + +#include "unicode/utypes.h" +#include "unicode/uobject.h" +#include "unicode/std_string.h" + +U_NAMESPACE_BEGIN + +/** + * A ByteSink can be filled with bytes. + * @stable ICU 4.2 + */ +class U_COMMON_API ByteSink : public UMemory { +public: + /** + * Default constructor. + * @stable ICU 4.2 + */ + ByteSink() { } + /** + * Virtual destructor. + * @stable ICU 4.2 + */ + virtual ~ByteSink(); + + /** + * Append "bytes[0,n-1]" to this. + * @param bytes the pointer to the bytes + * @param n the number of bytes; must be non-negative + * @stable ICU 4.2 + */ + virtual void Append(const char* bytes, int32_t n) = 0; + + /** + * Returns a writable buffer for appending and writes the buffer's capacity to + * *result_capacity. Guarantees *result_capacity>=min_capacity. + * May return a pointer to the caller-owned scratch buffer which must have + * scratch_capacity>=min_capacity. + * The returned buffer is only valid until the next operation + * on this ByteSink. + * + * After writing at most *result_capacity bytes, call Append() with the + * pointer returned from this function and the number of bytes written. + * Many Append() implementations will avoid copying bytes if this function + * returned an internal buffer. + * + * Partial usage example: + * int32_t capacity; + * char* buffer = sink->GetAppendBuffer(..., &capacity); + * ... Write n bytes into buffer, with n <= capacity. + * sink->Append(buffer, n); + * In many implementations, that call to Append will avoid copying bytes. + * + * If the ByteSink allocates or reallocates an internal buffer, it should use + * the desired_capacity_hint if appropriate. + * If a caller cannot provide a reasonable guess at the desired capacity, + * it should pass desired_capacity_hint=0. + * + * If a non-scratch buffer is returned, the caller may only pass + * a prefix to it to Append(). + * That is, it is not correct to pass an interior pointer to Append(). + * + * The default implementation always returns the scratch buffer. + * + * @param min_capacity required minimum capacity of the returned buffer; + * must be non-negative + * @param desired_capacity_hint desired capacity of the returned buffer; + * must be non-negative + * @param scratch default caller-owned buffer + * @param scratch_capacity capacity of the scratch buffer + * @param result_capacity pointer to an integer which will be set to the + * capacity of the returned buffer + * @return a buffer with *result_capacity>=min_capacity + * @stable ICU 4.2 + */ + virtual char* GetAppendBuffer(int32_t min_capacity, + int32_t desired_capacity_hint, + char* scratch, int32_t scratch_capacity, + int32_t* result_capacity); + + /** + * Flush internal buffers. + * Some byte sinks use internal buffers or provide buffering + * and require calling Flush() at the end of the stream. + * The ByteSink should be ready for further Append() calls after Flush(). + * The default implementation of Flush() does nothing. + * @stable ICU 4.2 + */ + virtual void Flush(); + +private: + ByteSink(const ByteSink &); // copy constructor not implemented + ByteSink &operator=(const ByteSink &); // assignment operator not implemented +}; + +// ------------------------------------------------------------- +// Some standard implementations + +/** + * Implementation of ByteSink that writes to a flat byte array, + * with bounds-checking: + * This sink will not write more than capacity bytes to outbuf. + * If more than capacity bytes are Append()ed, then excess bytes are ignored, + * and Overflowed() will return true. + * Overflow does not cause a runtime error. + * @stable ICU 4.2 + */ +class U_COMMON_API CheckedArrayByteSink : public ByteSink { +public: + /** + * Constructs a ByteSink that will write to outbuf[0..capacity-1]. + * @param outbuf buffer to write to + * @param capacity size of the buffer + * @stable ICU 4.2 + */ + CheckedArrayByteSink(char* outbuf, int32_t capacity); + /** + * Destructor. + * @stable ICU 4.2 + */ + virtual ~CheckedArrayByteSink(); + /** + * Returns the sink to its original state, without modifying the buffer. + * Useful for reusing both the buffer and the sink for multiple streams. + * Resets the state to NumberOfBytesWritten()=NumberOfBytesAppended()=0 + * and Overflowed()=FALSE. + * @return *this + * @stable ICU 4.6 + */ + virtual CheckedArrayByteSink& Reset(); + /** + * Append "bytes[0,n-1]" to this. + * @param bytes the pointer to the bytes + * @param n the number of bytes; must be non-negative + * @stable ICU 4.2 + */ + virtual void Append(const char* bytes, int32_t n); + /** + * Returns a writable buffer for appending and writes the buffer's capacity to + * *result_capacity. For details see the base class documentation. + * @param min_capacity required minimum capacity of the returned buffer; + * must be non-negative + * @param desired_capacity_hint desired capacity of the returned buffer; + * must be non-negative + * @param scratch default caller-owned buffer + * @param scratch_capacity capacity of the scratch buffer + * @param result_capacity pointer to an integer which will be set to the + * capacity of the returned buffer + * @return a buffer with *result_capacity>=min_capacity + * @stable ICU 4.2 + */ + virtual char* GetAppendBuffer(int32_t min_capacity, + int32_t desired_capacity_hint, + char* scratch, int32_t scratch_capacity, + int32_t* result_capacity); + /** + * Returns the number of bytes actually written to the sink. + * @return number of bytes written to the buffer + * @stable ICU 4.2 + */ + int32_t NumberOfBytesWritten() const { return size_; } + /** + * Returns true if any bytes were discarded, i.e., if there was an + * attempt to write more than 'capacity' bytes. + * @return TRUE if more than 'capacity' bytes were Append()ed + * @stable ICU 4.2 + */ + UBool Overflowed() const { return overflowed_; } + /** + * Returns the number of bytes appended to the sink. + * If Overflowed() then NumberOfBytesAppended()>NumberOfBytesWritten() + * else they return the same number. + * @return number of bytes written to the buffer + * @stable ICU 4.6 + */ + int32_t NumberOfBytesAppended() const { return appended_; } +private: + char* outbuf_; + const int32_t capacity_; + int32_t size_; + int32_t appended_; + UBool overflowed_; + CheckedArrayByteSink(); ///< default constructor not implemented + CheckedArrayByteSink(const CheckedArrayByteSink &); ///< copy constructor not implemented + CheckedArrayByteSink &operator=(const CheckedArrayByteSink &); ///< assignment operator not implemented +}; + +#if U_HAVE_STD_STRING + +/** + * Implementation of ByteSink that writes to a "string". + * The StringClass is usually instantiated with a std::string. + * @stable ICU 4.2 + */ +template<typename StringClass> +class StringByteSink : public ByteSink { + public: + /** + * Constructs a ByteSink that will append bytes to the dest string. + * @param dest pointer to string object to append to + * @stable ICU 4.2 + */ + StringByteSink(StringClass* dest) : dest_(dest) { } + /** + * Append "bytes[0,n-1]" to this. + * @param data the pointer to the bytes + * @param n the number of bytes; must be non-negative + * @stable ICU 4.2 + */ + virtual void Append(const char* data, int32_t n) { dest_->append(data, n); } + private: + StringClass* dest_; + StringByteSink(); ///< default constructor not implemented + StringByteSink(const StringByteSink &); ///< copy constructor not implemented + StringByteSink &operator=(const StringByteSink &); ///< assignment operator not implemented +}; + +#endif + +U_NAMESPACE_END + +#endif // __BYTESTREAM_H__ diff --git a/Source/WTF/icu/unicode/localpointer.h b/Source/WTF/icu/unicode/localpointer.h new file mode 100644 index 000000000..e3ccb2581 --- /dev/null +++ b/Source/WTF/icu/unicode/localpointer.h @@ -0,0 +1,304 @@ +/* +******************************************************************************* +* +* Copyright (C) 2009-2012, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: localpointer.h +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2009nov13 +* created by: Markus W. Scherer +*/ + +#ifndef __LOCALPOINTER_H__ +#define __LOCALPOINTER_H__ + +/** + * \file + * \brief C++ API: "Smart pointers" for use with and in ICU4C C++ code. + * + * These classes are inspired by + * - std::auto_ptr + * - boost::scoped_ptr & boost::scoped_array + * - Taligent Safe Pointers (TOnlyPointerTo) + * + * but none of those provide for all of the goals for ICU smart pointers: + * - Smart pointer owns the object and releases it when it goes out of scope. + * - No transfer of ownership via copy/assignment to reduce misuse. Simpler & more robust. + * - ICU-compatible: No exceptions. + * - Need to be able to orphan/release the pointer and its ownership. + * - Need variants for normal C++ object pointers, C++ arrays, and ICU C service objects. + * + * For details see http://site.icu-project.org/design/cpp/scoped_ptr + */ + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * "Smart pointer" base class; do not use directly: use LocalPointer etc. + * + * Base class for smart pointer classes that do not throw exceptions. + * + * Do not use this base class directly, since it does not delete its pointer. + * A subclass must implement methods that delete the pointer: + * Destructor and adoptInstead(). + * + * There is no operator T *() provided because the programmer must decide + * whether to use getAlias() (without transfer of ownership) or orpan() + * (with transfer of ownership and NULLing of the pointer). + * + * @see LocalPointer + * @see LocalArray + * @see U_DEFINE_LOCAL_OPEN_POINTER + * @stable ICU 4.4 + */ +template<typename T> +class LocalPointerBase { +public: + /** + * Constructor takes ownership. + * @param p simple pointer to an object that is adopted + * @stable ICU 4.4 + */ + explicit LocalPointerBase(T *p=NULL) : ptr(p) {} + /** + * Destructor deletes the object it owns. + * Subclass must override: Base class does nothing. + * @stable ICU 4.4 + */ + ~LocalPointerBase() { /* delete ptr; */ } + /** + * NULL check. + * @return TRUE if ==NULL + * @stable ICU 4.4 + */ + UBool isNull() const { return ptr==NULL; } + /** + * NULL check. + * @return TRUE if !=NULL + * @stable ICU 4.4 + */ + UBool isValid() const { return ptr!=NULL; } + /** + * Comparison with a simple pointer, so that existing code + * with ==NULL need not be changed. + * @param other simple pointer for comparison + * @return true if this pointer value equals other + * @stable ICU 4.4 + */ + bool operator==(const T *other) const { return ptr==other; } + /** + * Comparison with a simple pointer, so that existing code + * with !=NULL need not be changed. + * @param other simple pointer for comparison + * @return true if this pointer value differs from other + * @stable ICU 4.4 + */ + bool operator!=(const T *other) const { return ptr!=other; } + /** + * Access without ownership change. + * @return the pointer value + * @stable ICU 4.4 + */ + T *getAlias() const { return ptr; } + /** + * Access without ownership change. + * @return the pointer value as a reference + * @stable ICU 4.4 + */ + T &operator*() const { return *ptr; } + /** + * Access without ownership change. + * @return the pointer value + * @stable ICU 4.4 + */ + T *operator->() const { return ptr; } + /** + * Gives up ownership; the internal pointer becomes NULL. + * @return the pointer value; + * caller becomes responsible for deleting the object + * @stable ICU 4.4 + */ + T *orphan() { + T *p=ptr; + ptr=NULL; + return p; + } + /** + * Deletes the object it owns, + * and adopts (takes ownership of) the one passed in. + * Subclass must override: Base class does not delete the object. + * @param p simple pointer to an object that is adopted + * @stable ICU 4.4 + */ + void adoptInstead(T *p) { + // delete ptr; + ptr=p; + } +protected: + /** + * Actual pointer. + * @internal + */ + T *ptr; +private: + // No comparison operators with other LocalPointerBases. + bool operator==(const LocalPointerBase &other); + bool operator!=(const LocalPointerBase &other); + // No ownership transfer: No copy constructor, no assignment operator. + LocalPointerBase(const LocalPointerBase &other); + void operator=(const LocalPointerBase &other); + // No heap allocation. Use only on the stack. + static void * U_EXPORT2 operator new(size_t size); + static void * U_EXPORT2 operator new[](size_t size); +#if U_HAVE_PLACEMENT_NEW + static void * U_EXPORT2 operator new(size_t, void *ptr); +#endif +}; + +/** + * "Smart pointer" class, deletes objects via the standard C++ delete operator. + * For most methods see the LocalPointerBase base class. + * + * Usage example: + * \code + * LocalPointer<UnicodeString> s(new UnicodeString((UChar32)0x50005)); + * int32_t length=s->length(); // 2 + * UChar lead=s->charAt(0); // 0xd900 + * if(some condition) { return; } // no need to explicitly delete the pointer + * s.adoptInstead(new UnicodeString((UChar)0xfffc)); + * length=s->length(); // 1 + * // no need to explicitly delete the pointer + * \endcode + * + * @see LocalPointerBase + * @stable ICU 4.4 + */ +template<typename T> +class LocalPointer : public LocalPointerBase<T> { +public: + /** + * Constructor takes ownership. + * @param p simple pointer to an object that is adopted + * @stable ICU 4.4 + */ + explicit LocalPointer(T *p=NULL) : LocalPointerBase<T>(p) {} + /** + * Destructor deletes the object it owns. + * @stable ICU 4.4 + */ + ~LocalPointer() { + delete LocalPointerBase<T>::ptr; + } + /** + * Deletes the object it owns, + * and adopts (takes ownership of) the one passed in. + * @param p simple pointer to an object that is adopted + * @stable ICU 4.4 + */ + void adoptInstead(T *p) { + delete LocalPointerBase<T>::ptr; + LocalPointerBase<T>::ptr=p; + } +}; + +/** + * "Smart pointer" class, deletes objects via the C++ array delete[] operator. + * For most methods see the LocalPointerBase base class. + * Adds operator[] for array item access. + * + * Usage example: + * \code + * LocalArray<UnicodeString> a(new UnicodeString[2]); + * a[0].append((UChar)0x61); + * if(some condition) { return; } // no need to explicitly delete the array + * a.adoptInstead(new UnicodeString[4]); + * a[3].append((UChar)0x62).append((UChar)0x63).reverse(); + * // no need to explicitly delete the array + * \endcode + * + * @see LocalPointerBase + * @stable ICU 4.4 + */ +template<typename T> +class LocalArray : public LocalPointerBase<T> { +public: + /** + * Constructor takes ownership. + * @param p simple pointer to an array of T objects that is adopted + * @stable ICU 4.4 + */ + explicit LocalArray(T *p=NULL) : LocalPointerBase<T>(p) {} + /** + * Destructor deletes the array it owns. + * @stable ICU 4.4 + */ + ~LocalArray() { + delete[] LocalPointerBase<T>::ptr; + } + /** + * Deletes the array it owns, + * and adopts (takes ownership of) the one passed in. + * @param p simple pointer to an array of T objects that is adopted + * @stable ICU 4.4 + */ + void adoptInstead(T *p) { + delete[] LocalPointerBase<T>::ptr; + LocalPointerBase<T>::ptr=p; + } + /** + * Array item access (writable). + * No index bounds check. + * @param i array index + * @return reference to the array item + * @stable ICU 4.4 + */ + T &operator[](ptrdiff_t i) const { return LocalPointerBase<T>::ptr[i]; } +}; + +/** + * \def U_DEFINE_LOCAL_OPEN_POINTER + * "Smart pointer" definition macro, deletes objects via the closeFunction. + * Defines a subclass of LocalPointerBase which works just + * like LocalPointer<Type> except that this subclass will use the closeFunction + * rather than the C++ delete operator. + * + * Requirement: The closeFunction must tolerate a NULL pointer. + * (We could add a NULL check here but it is normally redundant.) + * + * Usage example: + * \code + * LocalUCaseMapPointer csm(ucasemap_open(localeID, options, &errorCode)); + * utf8OutLength=ucasemap_utf8ToLower(csm.getAlias(), + * utf8Out, (int32_t)sizeof(utf8Out), + * utf8In, utf8InLength, &errorCode); + * if(U_FAILURE(errorCode)) { return; } // no need to explicitly delete the UCaseMap + * \endcode + * + * @see LocalPointerBase + * @see LocalPointer + * @stable ICU 4.4 + */ +#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction) \ + class LocalPointerClassName : public LocalPointerBase<Type> { \ + public: \ + explicit LocalPointerClassName(Type *p=NULL) : LocalPointerBase<Type>(p) {} \ + ~LocalPointerClassName() { closeFunction(ptr); } \ + void adoptInstead(Type *p) { \ + closeFunction(ptr); \ + ptr=p; \ + } \ + } + +U_NAMESPACE_END + +#endif /* U_SHOW_CPLUSPLUS_API */ +#endif /* __LOCALPOINTER_H__ */ diff --git a/Source/WTF/icu/unicode/parseerr.h b/Source/WTF/icu/unicode/parseerr.h new file mode 100644 index 000000000..44ff00811 --- /dev/null +++ b/Source/WTF/icu/unicode/parseerr.h @@ -0,0 +1,92 @@ +/* +********************************************************************** +* Copyright (C) 1999-2005, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* Date Name Description +* 03/14/00 aliu Creation. +* 06/27/00 aliu Change from C++ class to C struct +********************************************************************** +*/ +#ifndef PARSEERR_H +#define PARSEERR_H + +#include "unicode/utypes.h" + + +/** + * \file + * \brief C API: Parse Error Information + */ +/** + * The capacity of the context strings in UParseError. + * @stable ICU 2.0 + */ +enum { U_PARSE_CONTEXT_LEN = 16 }; + +/** + * A UParseError struct is used to returned detailed information about + * parsing errors. It is used by ICU parsing engines that parse long + * rules, patterns, or programs, where the text being parsed is long + * enough that more information than a UErrorCode is needed to + * localize the error. + * + * <p>The line, offset, and context fields are optional; parsing + * engines may choose not to use to use them. + * + * <p>The preContext and postContext strings include some part of the + * context surrounding the error. If the source text is "let for=7" + * and "for" is the error (e.g., because it is a reserved word), then + * some examples of what a parser might produce are the following: + * + * <pre> + * preContext postContext + * "" "" The parser does not support context + * "let " "=7" Pre- and post-context only + * "let " "for=7" Pre- and post-context and error text + * "" "for" Error text only + * </pre> + * + * <p>Examples of engines which use UParseError (or may use it in the + * future) are Transliterator, RuleBasedBreakIterator, and + * RegexPattern. + * + * @stable ICU 2.0 + */ +typedef struct UParseError { + + /** + * The line on which the error occured. If the parser uses this + * field, it sets it to the line number of the source text line on + * which the error appears, which will be be a value >= 1. If the + * parse does not support line numbers, the value will be <= 0. + * @stable ICU 2.0 + */ + int32_t line; + + /** + * The character offset to the error. If the line field is >= 1, + * then this is the offset from the start of the line. Otherwise, + * this is the offset from the start of the text. If the parser + * does not support this field, it will have a value < 0. + * @stable ICU 2.0 + */ + int32_t offset; + + /** + * Textual context before the error. Null-terminated. The empty + * string if not supported by parser. + * @stable ICU 2.0 + */ + UChar preContext[U_PARSE_CONTEXT_LEN]; + + /** + * The error itself and/or textual context after the error. + * Null-terminated. The empty string if not supported by parser. + * @stable ICU 2.0 + */ + UChar postContext[U_PARSE_CONTEXT_LEN]; + +} UParseError; + +#endif diff --git a/Source/WTF/icu/unicode/platform.h b/Source/WTF/icu/unicode/platform.h new file mode 100644 index 000000000..1b2ab306e --- /dev/null +++ b/Source/WTF/icu/unicode/platform.h @@ -0,0 +1,755 @@ +/* +****************************************************************************** +* +* Copyright (C) 1997-2013, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* FILE NAME : platform.h +* +* Date Name Description +* 05/13/98 nos Creation (content moved here from ptypes.h). +* 03/02/99 stephen Added AS400 support. +* 03/30/99 stephen Added Linux support. +* 04/13/99 stephen Reworked for autoconf. +****************************************************************************** +*/ + +#ifndef _PLATFORM_H +#define _PLATFORM_H + +#include "unicode/uconfig.h" +#include "unicode/uvernum.h" + +/** + * \file + * \brief Basic types for the platform. + * + * This file used to be generated by autoconf/configure. + * Starting with ICU 49, platform.h is a normal source file, + * to simplify cross-compiling and working with non-autoconf/make build systems. + * + * When a value in this file does not work on a platform, then please + * try to derive it from the U_PLATFORM value + * (for which we might need a new value constant in rare cases) + * and/or from other macros that are predefined by the compiler + * or defined in standard (POSIX or platform or compiler) headers. + * + * As a temporary workaround, you can add an explicit <code>#define</code> for some macros + * before it is first tested, or add an equivalent -D macro definition + * to the compiler's command line. + * + * Note: Some compilers provide ways to show the predefined macros. + * For example, with gcc you can compile an empty .c file and have the compiler + * print the predefined macros with + * \code + * gcc -E -dM -x c /dev/null | sort + * \endcode + * (You can provide an actual empty .c file rather than /dev/null. + * <code>-x c++</code> is for C++.) + */ + +/** + * Define some things so that they can be documented. + * @internal + */ +#ifdef U_IN_DOXYGEN +/* + * Problem: "platform.h:335: warning: documentation for unknown define U_HAVE_STD_STRING found." means that U_HAVE_STD_STRING is not documented. + * Solution: #define any defines for non @internal API here, so that they are visible in the docs. If you just set PREDEFINED in Doxyfile.in, they won't be documented. + */ + +/* None for now. */ +#endif + +/** + * \def U_PLATFORM + * The U_PLATFORM macro defines the platform we're on. + * + * We used to define one different, value-less macro per platform. + * That made it hard to know the set of relevant platforms and macros, + * and hard to deal with variants of platforms. + * + * Starting with ICU 49, we define platforms as numeric macros, + * with ranges of values for related platforms and their variants. + * The U_PLATFORM macro is set to one of these values. + * + * Historical note from the Solaris Wikipedia article: + * AT&T and Sun collaborated on a project to merge the most popular Unix variants + * on the market at that time: BSD, System V, and Xenix. + * This became Unix System V Release 4 (SVR4). + * + * @internal + */ + +/** Unknown platform. @internal */ +#define U_PF_UNKNOWN 0 +/** Windows @internal */ +#define U_PF_WINDOWS 1000 +/** MinGW. Windows, calls to Win32 API, but using GNU gcc and binutils. @internal */ +#define U_PF_MINGW 1800 +/** + * Cygwin. Windows, calls to cygwin1.dll for Posix functions, + * using MSVC or GNU gcc and binutils. + * @internal + */ +#define U_PF_CYGWIN 1900 +/* Reserve 2000 for U_PF_UNIX? */ +/** HP-UX is based on UNIX System V. @internal */ +#define U_PF_HPUX 2100 +/** Solaris is a Unix operating system based on SVR4. @internal */ +#define U_PF_SOLARIS 2600 +/** BSD is a UNIX operating system derivative. @internal */ +#define U_PF_BSD 3000 +/** AIX is based on UNIX System V Releases and 4.3 BSD. @internal */ +#define U_PF_AIX 3100 +/** IRIX is based on UNIX System V with BSD extensions. @internal */ +#define U_PF_IRIX 3200 +/** + * Darwin is a POSIX-compliant operating system, composed of code developed by Apple, + * as well as code derived from NeXTSTEP, BSD, and other projects, + * built around the Mach kernel. + * Darwin forms the core set of components upon which Mac OS X, Apple TV, and iOS are based. + * (Original description modified from WikiPedia.) + * @internal + */ +#define U_PF_DARWIN 3500 +/** iPhone OS (iOS) is a derivative of Mac OS X. @internal */ +#define U_PF_IPHONE 3550 +/** QNX is a commercial Unix-like real-time operating system related to BSD. @internal */ +#define U_PF_QNX 3700 +/** Linux is a Unix-like operating system. @internal */ +#define U_PF_LINUX 4000 +/** Android is based on Linux. @internal */ +#define U_PF_ANDROID 4050 +/** "Classic" Mac OS (1984-2001) @internal */ +#define U_PF_CLASSIC_MACOS 8000 +/** z/OS is the successor to OS/390 which was the successor to MVS. @internal */ +#define U_PF_OS390 9000 +/** "IBM i" is the current name of what used to be i5/OS and earlier OS/400. @internal */ +#define U_PF_OS400 9400 + +#ifdef U_PLATFORM + /* Use the predefined value. */ +#elif defined(__MINGW32__) +# define U_PLATFORM U_PF_MINGW +#elif defined(__CYGWIN__) +# define U_PLATFORM U_PF_CYGWIN +#elif defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) +# define U_PLATFORM U_PF_WINDOWS +#elif defined(__ANDROID__) +# define U_PLATFORM U_PF_ANDROID + /* Android wchar_t support depends on the API level. */ +# include <android/api-level.h> +#elif defined(linux) || defined(__linux__) || defined(__linux) +# define U_PLATFORM U_PF_LINUX +#elif defined(__APPLE__) && defined(__MACH__) +# include <TargetConditionals.h> +# if defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE /* variant of TARGET_OS_MAC */ +# define U_PLATFORM U_PF_IPHONE +# else +# define U_PLATFORM U_PF_DARWIN +# endif +#elif defined(BSD) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__MirBSD__) +# define U_PLATFORM U_PF_BSD +#elif defined(sun) || defined(__sun) + /* Check defined(__SVR4) || defined(__svr4__) to distinguish Solaris from SunOS? */ +# define U_PLATFORM U_PF_SOLARIS +# if defined(__GNUC__) + /* Solaris/GCC needs this header file to get the proper endianness. Normally, this + * header file is included with stddef.h but on Solairs/GCC, the GCC version of stddef.h + * is included which does not include this header file. + */ +# include <sys/isa_defs.h> +# endif +#elif defined(_AIX) || defined(__TOS_AIX__) +# define U_PLATFORM U_PF_AIX +#elif defined(_hpux) || defined(hpux) || defined(__hpux) +# define U_PLATFORM U_PF_HPUX +#elif defined(sgi) || defined(__sgi) +# define U_PLATFORM U_PF_IRIX +#elif defined(macintosh) +# define U_PLATFORM U_PF_CLASSIC_MACOS +#elif defined(__QNX__) || defined(__QNXNTO__) +# define U_PLATFORM U_PF_QNX +#elif defined(__TOS_MVS__) +# define U_PLATFORM U_PF_OS390 +#elif defined(__OS400__) || defined(__TOS_OS400__) +# define U_PLATFORM U_PF_OS400 +#else +# define U_PLATFORM U_PF_UNKNOWN +#endif + +/** + * \def CYGWINMSVC + * Defined if this is Windows with Cygwin, but using MSVC rather than gcc. + * Otherwise undefined. + * @internal + */ +/* Commented out because this is already set in mh-cygwin-msvc +#if U_PLATFORM == U_PF_CYGWIN && defined(_MSC_VER) +# define CYGWINMSVC +#endif +*/ + +/** + * \def U_PLATFORM_USES_ONLY_WIN32_API + * Defines whether the platform uses only the Win32 API. + * Set to 1 for Windows/MSVC and MinGW but not Cygwin. + * @internal + */ +#ifdef U_PLATFORM_USES_ONLY_WIN32_API + /* Use the predefined value. */ +#elif (U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_MINGW) || defined(CYGWINMSVC) +# define U_PLATFORM_USES_ONLY_WIN32_API 1 +#else + /* Cygwin implements POSIX. */ +# define U_PLATFORM_USES_ONLY_WIN32_API 0 +#endif + +/** + * \def U_PLATFORM_HAS_WIN32_API + * Defines whether the Win32 API is available on the platform. + * Set to 1 for Windows/MSVC, MinGW and Cygwin. + * @internal + */ +#ifdef U_PLATFORM_HAS_WIN32_API + /* Use the predefined value. */ +#elif U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN +# define U_PLATFORM_HAS_WIN32_API 1 +#else +# define U_PLATFORM_HAS_WIN32_API 0 +#endif + +/** + * \def U_PLATFORM_IMPLEMENTS_POSIX + * Defines whether the platform implements (most of) the POSIX API. + * Set to 1 for Cygwin and most other platforms. + * @internal + */ +#ifdef U_PLATFORM_IMPLEMENTS_POSIX + /* Use the predefined value. */ +#elif U_PLATFORM_USES_ONLY_WIN32_API || U_PLATFORM == U_PF_CLASSIC_MACOS +# define U_PLATFORM_IMPLEMENTS_POSIX 0 +#else +# define U_PLATFORM_IMPLEMENTS_POSIX 1 +#endif + +/** + * \def U_PLATFORM_IS_LINUX_BASED + * Defines whether the platform is Linux or one of its derivatives. + * @internal + */ +#ifdef U_PLATFORM_IS_LINUX_BASED + /* Use the predefined value. */ +#elif U_PF_LINUX <= U_PLATFORM && U_PLATFORM <= U_PF_ANDROID +# define U_PLATFORM_IS_LINUX_BASED 1 +#else +# define U_PLATFORM_IS_LINUX_BASED 0 +#endif + +/** + * \def U_PLATFORM_IS_DARWIN_BASED + * Defines whether the platform is Darwin or one of its derivatives. + * @internal + */ +#ifdef U_PLATFORM_IS_DARWIN_BASED + /* Use the predefined value. */ +#elif U_PF_DARWIN <= U_PLATFORM && U_PLATFORM <= U_PF_IPHONE +# define U_PLATFORM_IS_DARWIN_BASED 1 +#else +# define U_PLATFORM_IS_DARWIN_BASED 0 +#endif + +/** + * \def U_HAVE_STDINT_H + * Defines whether stdint.h is available. It is a C99 standard header. + * We used to include inttypes.h which includes stdint.h but we usually do not need + * the additional definitions from inttypes.h. + * @internal + */ +#ifdef U_HAVE_STDINT_H + /* Use the predefined value. */ +#elif U_PLATFORM_USES_ONLY_WIN32_API +# if defined(__BORLANDC__) || U_PLATFORM == U_PF_MINGW || (defined(_MSC_VER) && _MSC_VER>=1600) + /* Windows Visual Studio 9 and below do not have stdint.h & inttypes.h, but VS 2010 adds them. */ +# define U_HAVE_STDINT_H 1 +# else +# define U_HAVE_STDINT_H 0 +# endif +#elif U_PLATFORM == U_PF_SOLARIS + /* Solaris has inttypes.h but not stdint.h. */ +# define U_HAVE_STDINT_H 0 +#elif U_PLATFORM == U_PF_AIX && !defined(_AIX51) && defined(_POWER) + /* PPC AIX <= 4.3 has inttypes.h but not stdint.h. */ +# define U_HAVE_STDINT_H 0 +#else +# define U_HAVE_STDINT_H 1 +#endif + +/** + * \def U_HAVE_INTTYPES_H + * Defines whether inttypes.h is available. It is a C99 standard header. + * We include inttypes.h where it is available but stdint.h is not. + * @internal + */ +#ifdef U_HAVE_INTTYPES_H + /* Use the predefined value. */ +#elif U_PLATFORM == U_PF_SOLARIS + /* Solaris has inttypes.h but not stdint.h. */ +# define U_HAVE_INTTYPES_H 1 +#elif U_PLATFORM == U_PF_AIX && !defined(_AIX51) && defined(_POWER) + /* PPC AIX <= 4.3 has inttypes.h but not stdint.h. */ +# define U_HAVE_INTTYPES_H 1 +#else + /* Most platforms have both inttypes.h and stdint.h, or neither. */ +# define U_HAVE_INTTYPES_H U_HAVE_STDINT_H +#endif + +/** + * \def U_IOSTREAM_SOURCE + * Defines what support for C++ streams is available. + * + * If U_IOSTREAM_SOURCE is set to 199711, then <iostream> is available + * (the ISO/IEC C++ FDIS was published in November 1997), and then + * one should qualify streams using the std namespace in ICU header + * files. + * Starting with ICU 49, this is the only supported version. + * + * If U_IOSTREAM_SOURCE is set to 198506, then <iostream.h> is + * available instead (in June 1985 Stroustrup published + * "An Extensible I/O Facility for C++" at the summer USENIX conference). + * Starting with ICU 49, this version is not supported any more. + * + * If U_IOSTREAM_SOURCE is 0 (or any value less than 199711), + * then C++ streams are not available and + * support for them will be silently suppressed in ICU. + * + * @internal + */ +#ifndef U_IOSTREAM_SOURCE +#define U_IOSTREAM_SOURCE 199711 +#endif + +/** + * \def U_HAVE_STD_STRING + * Defines whether the standard C++ (STL) <string> header is available. + * @internal + */ +#ifdef U_HAVE_STD_STRING + /* Use the predefined value. */ +#else +# define U_HAVE_STD_STRING 1 +#endif + +/*===========================================================================*/ +/** @{ Compiler and environment features */ +/*===========================================================================*/ + +/** + * \def U_GCC_MAJOR_MINOR + * Indicates whether the compiler is gcc (test for != 0), + * and if so, contains its major (times 100) and minor version numbers. + * If the compiler is not gcc, then U_GCC_MAJOR_MINOR == 0. + * + * For example, for testing for whether we have gcc, and whether it's 4.6 or higher, + * use "#if U_GCC_MAJOR_MINOR >= 406". + * @internal + */ +#ifdef __GNUC__ +# define U_GCC_MAJOR_MINOR (__GNUC__ * 100 + __GNUC_MINOR__) +#else +# define U_GCC_MAJOR_MINOR 0 +#endif + +/** + * \def U_IS_BIG_ENDIAN + * Determines the endianness of the platform. + * @internal + */ +#ifdef U_IS_BIG_ENDIAN + /* Use the predefined value. */ +#elif defined(BYTE_ORDER) && defined(BIG_ENDIAN) +# define U_IS_BIG_ENDIAN (BYTE_ORDER == BIG_ENDIAN) +#elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) + /* gcc */ +# define U_IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +#elif defined(__BIG_ENDIAN__) || defined(_BIG_ENDIAN) +# define U_IS_BIG_ENDIAN 1 +#elif defined(__LITTLE_ENDIAN__) || defined(_LITTLE_ENDIAN) +# define U_IS_BIG_ENDIAN 0 +#elif U_PLATFORM == U_PF_OS390 || U_PLATFORM == U_PF_OS400 || defined(__s390__) || defined(__s390x__) + /* These platforms do not appear to predefine any endianness macros. */ +# define U_IS_BIG_ENDIAN 1 +#elif defined(_PA_RISC1_0) || defined(_PA_RISC1_1) || defined(_PA_RISC2_0) + /* HPPA do not appear to predefine any endianness macros. */ +# define U_IS_BIG_ENDIAN 1 +#elif defined(sparc) || defined(__sparc) || defined(__sparc__) + /* Some sparc based systems (e.g. Linux) do not predefine any endianness macros. */ +# define U_IS_BIG_ENDIAN 1 +#else +# define U_IS_BIG_ENDIAN 0 +#endif + +/** + * \def U_HAVE_PLACEMENT_NEW + * Determines whether to override placement new and delete for STL. + * @stable ICU 2.6 + */ +#ifdef U_HAVE_PLACEMENT_NEW + /* Use the predefined value. */ +#elif defined(__BORLANDC__) +# define U_HAVE_PLACEMENT_NEW 0 +#else +# define U_HAVE_PLACEMENT_NEW 1 +#endif + +/** + * \def U_HAVE_DEBUG_LOCATION_NEW + * Define this to define the MFC debug version of the operator new. + * + * @stable ICU 3.4 + */ +#ifdef U_HAVE_DEBUG_LOCATION_NEW + /* Use the predefined value. */ +#elif defined(_MSC_VER) +# define U_HAVE_DEBUG_LOCATION_NEW 1 +#else +# define U_HAVE_DEBUG_LOCATION_NEW 0 +#endif + +/* Compatibility with non clang compilers */ +#ifndef __has_attribute +# define __has_attribute(x) 0 +#endif + +/** + * \def U_MALLOC_ATTR + * Attribute to mark functions as malloc-like + * @internal + */ +#if defined(__GNUC__) && __GNUC__>=3 +# define U_MALLOC_ATTR __attribute__ ((__malloc__)) +#else +# define U_MALLOC_ATTR +#endif + +/** + * \def U_ALLOC_SIZE_ATTR + * Attribute to specify the size of the allocated buffer for malloc-like functions + * @internal + */ +#if (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) || __has_attribute(alloc_size) +# define U_ALLOC_SIZE_ATTR(X) __attribute__ ((alloc_size(X))) +# define U_ALLOC_SIZE_ATTR2(X,Y) __attribute__ ((alloc_size(X,Y))) +#else +# define U_ALLOC_SIZE_ATTR(X) +# define U_ALLOC_SIZE_ATTR2(X,Y) +#endif + +/** @} */ + +/*===========================================================================*/ +/** @{ Character data types */ +/*===========================================================================*/ + +/** + * U_CHARSET_FAMILY is equal to this value when the platform is an ASCII based platform. + * @stable ICU 2.0 + */ +#define U_ASCII_FAMILY 0 + +/** + * U_CHARSET_FAMILY is equal to this value when the platform is an EBCDIC based platform. + * @stable ICU 2.0 + */ +#define U_EBCDIC_FAMILY 1 + +/** + * \def U_CHARSET_FAMILY + * + * <p>These definitions allow to specify the encoding of text + * in the char data type as defined by the platform and the compiler. + * It is enough to determine the code point values of "invariant characters", + * which are the ones shared by all encodings that are in use + * on a given platform.</p> + * + * <p>Those "invariant characters" should be all the uppercase and lowercase + * latin letters, the digits, the space, and "basic punctuation". + * Also, '\\n', '\\r', '\\t' should be available.</p> + * + * <p>The list of "invariant characters" is:<br> + * \code + * A-Z a-z 0-9 SPACE " % & ' ( ) * + , - . / : ; < = > ? _ + * \endcode + * <br> + * (52 letters + 10 numbers + 20 punc/sym/space = 82 total)</p> + * + * <p>This matches the IBM Syntactic Character Set (CS 640).</p> + * + * <p>In other words, all the graphic characters in 7-bit ASCII should + * be safely accessible except the following:</p> + * + * \code + * '\' <backslash> + * '[' <left bracket> + * ']' <right bracket> + * '{' <left brace> + * '}' <right brace> + * '^' <circumflex> + * '~' <tilde> + * '!' <exclamation mark> + * '#' <number sign> + * '|' <vertical line> + * '$' <dollar sign> + * '@' <commercial at> + * '`' <grave accent> + * \endcode + * @stable ICU 2.0 + */ +#ifdef U_CHARSET_FAMILY + /* Use the predefined value. */ +#elif U_PLATFORM == U_PF_OS390 && (!defined(__CHARSET_LIB) || !__CHARSET_LIB) +# define U_CHARSET_FAMILY U_EBCDIC_FAMILY +#elif U_PLATFORM == U_PF_OS400 && !defined(__UTF32__) +# define U_CHARSET_FAMILY U_EBCDIC_FAMILY +#else +# define U_CHARSET_FAMILY U_ASCII_FAMILY +#endif + +/** + * \def U_CHARSET_IS_UTF8 + * + * Hardcode the default charset to UTF-8. + * + * If this is set to 1, then + * - ICU will assume that all non-invariant char*, StringPiece, std::string etc. + * contain UTF-8 text, regardless of what the system API uses + * - some ICU code will use fast functions like u_strFromUTF8() + * rather than the more general and more heavy-weight conversion API (ucnv.h) + * - ucnv_getDefaultName() always returns "UTF-8" + * - ucnv_setDefaultName() is disabled and will not change the default charset + * - static builds of ICU are smaller + * - more functionality is available with the UCONFIG_NO_CONVERSION build-time + * configuration option (see unicode/uconfig.h) + * - the UCONFIG_NO_CONVERSION build option in uconfig.h is more usable + * + * @stable ICU 4.2 + * @see UCONFIG_NO_CONVERSION + */ +#ifdef U_CHARSET_IS_UTF8 + /* Use the predefined value. */ +#elif U_PLATFORM == U_PF_ANDROID || U_PLATFORM_IS_DARWIN_BASED +# define U_CHARSET_IS_UTF8 1 +#else +# define U_CHARSET_IS_UTF8 0 +#endif + +/** @} */ + +/*===========================================================================*/ +/** @{ Information about wchar support */ +/*===========================================================================*/ + +/** + * \def U_HAVE_WCHAR_H + * Indicates whether <wchar.h> is available (1) or not (0). Set to 1 by default. + * + * @stable ICU 2.0 + */ +#ifdef U_HAVE_WCHAR_H + /* Use the predefined value. */ +#elif U_PLATFORM == U_PF_ANDROID && __ANDROID_API__ < 9 + /* + * Android before Gingerbread (Android 2.3, API level 9) did not support wchar_t. + * The type and header existed, but the library functions did not work as expected. + * The size of wchar_t was 1 but L"xyz" string literals had 32-bit units anyway. + */ +# define U_HAVE_WCHAR_H 0 +#else +# define U_HAVE_WCHAR_H 1 +#endif + +/** + * \def U_SIZEOF_WCHAR_T + * U_SIZEOF_WCHAR_T==sizeof(wchar_t) + * + * @stable ICU 2.0 + */ +#ifdef U_SIZEOF_WCHAR_T + /* Use the predefined value. */ +#elif (U_PLATFORM == U_PF_ANDROID && __ANDROID_API__ < 9) || U_PLATFORM == U_PF_CLASSIC_MACOS + /* + * Classic Mac OS and Mac OS X before 10.3 (Panther) did not support wchar_t or wstring. + * Newer Mac OS X has size 4. + */ +# define U_SIZEOF_WCHAR_T 1 +#elif U_PLATFORM_HAS_WIN32_API || U_PLATFORM == U_PF_CYGWIN +# define U_SIZEOF_WCHAR_T 2 +#elif U_PLATFORM == U_PF_AIX + /* + * AIX 6.1 information, section "Wide character data representation": + * "... the wchar_t datatype is 32-bit in the 64-bit environment and + * 16-bit in the 32-bit environment." + * and + * "All locales use Unicode for their wide character code values (process code), + * except the IBM-eucTW codeset." + */ +# ifdef __64BIT__ +# define U_SIZEOF_WCHAR_T 4 +# else +# define U_SIZEOF_WCHAR_T 2 +# endif +#elif U_PLATFORM == U_PF_OS390 + /* + * z/OS V1R11 information center, section "LP64 | ILP32": + * "In 31-bit mode, the size of long and pointers is 4 bytes and the size of wchar_t is 2 bytes. + * Under LP64, the size of long and pointer is 8 bytes and the size of wchar_t is 4 bytes." + */ +# ifdef _LP64 +# define U_SIZEOF_WCHAR_T 4 +# else +# define U_SIZEOF_WCHAR_T 2 +# endif +#elif U_PLATFORM == U_PF_OS400 +# if defined(__UTF32__) + /* + * LOCALETYPE(*LOCALEUTF) is specified. + * Wide-character strings are in UTF-32, + * narrow-character strings are in UTF-8. + */ +# define U_SIZEOF_WCHAR_T 4 +# elif defined(__UCS2__) + /* + * LOCALETYPE(*LOCALEUCS2) is specified. + * Wide-character strings are in UCS-2, + * narrow-character strings are in EBCDIC. + */ +# define U_SIZEOF_WCHAR_T 2 +#else + /* + * LOCALETYPE(*CLD) or LOCALETYPE(*LOCALE) is specified. + * Wide-character strings are in 16-bit EBCDIC, + * narrow-character strings are in EBCDIC. + */ +# define U_SIZEOF_WCHAR_T 2 +# endif +#else +# define U_SIZEOF_WCHAR_T 4 +#endif + +#ifndef U_HAVE_WCSCPY +#define U_HAVE_WCSCPY U_HAVE_WCHAR_H +#endif + +/** @} */ + +/** + * \def U_HAVE_CHAR16_T + * Defines whether the char16_t type is available for UTF-16 + * and u"abc" UTF-16 string literals are supported. + * This is a new standard type and standard string literal syntax in C++0x + * but has been available in some compilers before. + * @internal + */ +#ifdef U_HAVE_CHAR16_T + /* Use the predefined value. */ +#else + /* + * Notes: + * Visual Studio 10 (_MSC_VER>=1600) defines char16_t but + * does not support u"abc" string literals. + * gcc 4.4 defines the __CHAR16_TYPE__ macro to a usable type but + * does not support u"abc" string literals. + * C++11 and C11 require support for UTF-16 literals + */ +# if (defined(__cplusplus) && __cplusplus >= 201103L) || (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) +# define U_HAVE_CHAR16_T 1 +# else +# define U_HAVE_CHAR16_T 0 +# endif +#endif + +/** + * @{ + * \def U_DECLARE_UTF16 + * Do not use this macro because it is not defined on all platforms. + * Use the UNICODE_STRING or U_STRING_DECL macros instead. + * @internal + */ +#ifdef U_DECLARE_UTF16 + /* Use the predefined value. */ +#elif U_HAVE_CHAR16_T \ + || (defined(__xlC__) && defined(__IBM_UTF_LITERAL) && U_SIZEOF_WCHAR_T != 2) \ + || (defined(__HP_aCC) && __HP_aCC >= 035000) \ + || (defined(__HP_cc) && __HP_cc >= 111106) +# define U_DECLARE_UTF16(string) u ## string +#elif U_SIZEOF_WCHAR_T == 2 \ + && (U_CHARSET_FAMILY == 0 || (U_PF_OS390 <= U_PLATFORM && U_PLATFORM <= U_PF_OS400 && defined(__UCS2__))) +# define U_DECLARE_UTF16(string) L ## string +#else + /* Leave U_DECLARE_UTF16 undefined. See unistr.h. */ +#endif + +/** @} */ + +/*===========================================================================*/ +/** @{ Symbol import-export control */ +/*===========================================================================*/ + +#ifdef U_EXPORT + /* Use the predefined value. */ +#elif defined(U_STATIC_IMPLEMENTATION) +# define U_EXPORT +#elif defined(__GNUC__) +# define U_EXPORT __attribute__((visibility("default"))) +#elif (defined(__SUNPRO_CC) && __SUNPRO_CC >= 0x550) \ + || (defined(__SUNPRO_C) && __SUNPRO_C >= 0x550) +# define U_EXPORT __global +/*#elif defined(__HP_aCC) || defined(__HP_cc) +# define U_EXPORT __declspec(dllexport)*/ +#elif defined(_MSC_VER) +# define U_EXPORT __declspec(dllexport) +#else +# define U_EXPORT +#endif + +/* U_CALLCONV is releated to U_EXPORT2 */ +#ifdef U_EXPORT2 + /* Use the predefined value. */ +#elif defined(_MSC_VER) +# define U_EXPORT2 __cdecl +#else +# define U_EXPORT2 +#endif + +#ifdef U_IMPORT + /* Use the predefined value. */ +#elif defined(_MSC_VER) + /* Windows needs to export/import data. */ +# define U_IMPORT __declspec(dllimport) +#else +# define U_IMPORT +#endif + +/** + * \def U_CALLCONV + * Similar to U_CDECL_BEGIN/U_CDECL_END, this qualifier is necessary + * in callback function typedefs to make sure that the calling convention + * is compatible. + * + * This is only used for non-ICU-API functions. + * When a function is a public ICU API, + * you must use the U_CAPI and U_EXPORT2 qualifiers. + * @stable ICU 2.0 + */ +#if U_PLATFORM == U_PF_OS390 && defined(__cplusplus) +# define U_CALLCONV __cdecl +#else +# define U_CALLCONV U_EXPORT2 +#endif + +/* @} */ + +#endif diff --git a/Source/WTF/icu/unicode/ptypes.h b/Source/WTF/icu/unicode/ptypes.h new file mode 100644 index 000000000..b7f711603 --- /dev/null +++ b/Source/WTF/icu/unicode/ptypes.h @@ -0,0 +1,126 @@ +/* +****************************************************************************** +* +* Copyright (C) 1997-2012, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* FILE NAME : ptypes.h +* +* Date Name Description +* 05/13/98 nos Creation (content moved here from ptypes.h). +* 03/02/99 stephen Added AS400 support. +* 03/30/99 stephen Added Linux support. +* 04/13/99 stephen Reworked for autoconf. +* 09/18/08 srl Moved basic types back to ptypes.h from platform.h +****************************************************************************** +*/ + +/** + * \file + * \brief C API: Definitions of integer types of various widths + */ + +#ifndef _PTYPES_H +#define _PTYPES_H + +/** + * \def __STDC_LIMIT_MACROS + * According to the Linux stdint.h, the ISO C99 standard specifies that in C++ implementations + * macros like INT32_MIN and UINTPTR_MAX should only be defined if explicitly requested. + * We need to define __STDC_LIMIT_MACROS before including stdint.h in C++ code + * that uses such limit macros. + * @internal + */ +#ifndef __STDC_LIMIT_MACROS +#define __STDC_LIMIT_MACROS +#endif + +/* NULL, size_t, wchar_t */ +#include <stddef.h> + +/* + * If all compilers provided all of the C99 headers and types, + * we would just unconditionally #include <stdint.h> here + * and not need any of the stuff after including platform.h. + */ + +/* Find out if we have stdint.h etc. */ +#include "unicode/platform.h" + +/*===========================================================================*/ +/* Generic data types */ +/*===========================================================================*/ + +/* If your platform does not have the <stdint.h> header, you may + need to edit the typedefs in the #else section below. + Use #if...#else...#endif with predefined compiler macros if possible. */ +#if U_HAVE_STDINT_H + +/* + * We mostly need <stdint.h> (which defines the standard integer types) but not <inttypes.h>. + * <inttypes.h> includes <stdint.h> and adds the printf/scanf helpers PRId32, SCNx16 etc. + * which we almost never use, plus stuff like imaxabs() which we never use. + */ +#include <stdint.h> + +#if U_PLATFORM == U_PF_OS390 +/* The features header is needed to get (u)int64_t sometimes. */ +#include <features.h> +/* z/OS has <stdint.h>, but some versions are missing uint8_t (APAR PK62248). */ +#if !defined(__uint8_t) +#define __uint8_t 1 +typedef unsigned char uint8_t; +#endif +#endif /* U_PLATFORM == U_PF_OS390 */ + +#elif U_HAVE_INTTYPES_H + +# include <inttypes.h> + +#else /* neither U_HAVE_STDINT_H nor U_HAVE_INTTYPES_H */ + +#if ! U_HAVE_INT8_T +typedef signed char int8_t; +#endif + +#if ! U_HAVE_UINT8_T +typedef unsigned char uint8_t; +#endif + +#if ! U_HAVE_INT16_T +typedef signed short int16_t; +#endif + +#if ! U_HAVE_UINT16_T +typedef unsigned short uint16_t; +#endif + +#if ! U_HAVE_INT32_T +typedef signed int int32_t; +#endif + +#if ! U_HAVE_UINT32_T +typedef unsigned int uint32_t; +#endif + +#if ! U_HAVE_INT64_T +#ifdef _MSC_VER + typedef signed __int64 int64_t; +#else + typedef signed long long int64_t; +#endif +#endif + +#if ! U_HAVE_UINT64_T +#ifdef _MSC_VER + typedef unsigned __int64 uint64_t; +#else + typedef unsigned long long uint64_t; +#endif +#endif + +#endif /* U_HAVE_STDINT_H / U_HAVE_INTTYPES_H */ + +#endif /* _PTYPES_H */ diff --git a/Source/WTF/icu/unicode/putil.h b/Source/WTF/icu/unicode/putil.h new file mode 100644 index 000000000..6fc7e9cd5 --- /dev/null +++ b/Source/WTF/icu/unicode/putil.h @@ -0,0 +1,164 @@ +/* +****************************************************************************** +* +* Copyright (C) 1997-2011, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* FILE NAME : putil.h +* +* Date Name Description +* 05/14/98 nos Creation (content moved here from utypes.h). +* 06/17/99 erm Added IEEE_754 +* 07/22/98 stephen Added IEEEremainder, max, min, trunc +* 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity +* 08/24/98 stephen Added longBitsFromDouble +* 03/02/99 stephen Removed openFile(). Added AS400 support. +* 04/15/99 stephen Converted to C +* 11/15/99 helena Integrated S/390 changes for IEEE support. +* 01/11/00 helena Added u_getVersion. +****************************************************************************** +*/ + +#ifndef PUTIL_H +#define PUTIL_H + +#include "unicode/utypes.h" + /** + * \file + * \brief C API: Platform Utilities + */ + +/*==========================================================================*/ +/* Platform utilities */ +/*==========================================================================*/ + +/** + * Platform utilities isolates the platform dependencies of the + * libarary. For each platform which this code is ported to, these + * functions may have to be re-implemented. + */ + +/** + * Return the ICU data directory. + * The data directory is where common format ICU data files (.dat files) + * are loaded from. Note that normal use of the built-in ICU + * facilities does not require loading of an external data file; + * unless you are adding custom data to ICU, the data directory + * does not need to be set. + * + * The data directory is determined as follows: + * If u_setDataDirectory() has been called, that is it, otherwise + * if the ICU_DATA environment variable is set, use that, otherwise + * If a data directory was specifed at ICU build time + * <code> + * \code + * #define ICU_DATA_DIR "path" + * \endcode + * </code> use that, + * otherwise no data directory is available. + * + * @return the data directory, or an empty string ("") if no data directory has + * been specified. + * + * @stable ICU 2.0 + */ +U_STABLE const char* U_EXPORT2 u_getDataDirectory(void); + +/** + * Set the ICU data directory. + * The data directory is where common format ICU data files (.dat files) + * are loaded from. Note that normal use of the built-in ICU + * facilities does not require loading of an external data file; + * unless you are adding custom data to ICU, the data directory + * does not need to be set. + * + * This function should be called at most once in a process, before the + * first ICU operation (e.g., u_init()) that will require the loading of an + * ICU data file. + * This function is not thread-safe. Use it before calling ICU APIs from + * multiple threads. + * + * @param directory The directory to be set. + * + * @see u_init + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 u_setDataDirectory(const char *directory); + +/** + * @{ + * Filesystem file and path separator characters. + * Example: '/' and ':' on Unix, '\\' and ';' on Windows. + * @stable ICU 2.0 + */ +#if U_PLATFORM == U_PF_CLASSIC_MACOS +# define U_FILE_SEP_CHAR ':' +# define U_FILE_ALT_SEP_CHAR ':' +# define U_PATH_SEP_CHAR ';' +# define U_FILE_SEP_STRING ":" +# define U_FILE_ALT_SEP_STRING ":" +# define U_PATH_SEP_STRING ";" +#elif U_PLATFORM_USES_ONLY_WIN32_API +# define U_FILE_SEP_CHAR '\\' +# define U_FILE_ALT_SEP_CHAR '/' +# define U_PATH_SEP_CHAR ';' +# define U_FILE_SEP_STRING "\\" +# define U_FILE_ALT_SEP_STRING "/" +# define U_PATH_SEP_STRING ";" +#else +# define U_FILE_SEP_CHAR '/' +# define U_FILE_ALT_SEP_CHAR '/' +# define U_PATH_SEP_CHAR ':' +# define U_FILE_SEP_STRING "/" +# define U_FILE_ALT_SEP_STRING "/" +# define U_PATH_SEP_STRING ":" +#endif + +/** @} */ + +/** + * Convert char characters to UChar characters. + * This utility function is useful only for "invariant characters" + * that are encoded in the platform default encoding. + * They are a small, constant subset of the encoding and include + * just the latin letters, digits, and some punctuation. + * For details, see U_CHARSET_FAMILY. + * + * @param cs Input string, points to <code>length</code> + * character bytes from a subset of the platform encoding. + * @param us Output string, points to memory for <code>length</code> + * Unicode characters. + * @param length The number of characters to convert; this may + * include the terminating <code>NUL</code>. + * + * @see U_CHARSET_FAMILY + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +u_charsToUChars(const char *cs, UChar *us, int32_t length); + +/** + * Convert UChar characters to char characters. + * This utility function is useful only for "invariant characters" + * that can be encoded in the platform default encoding. + * They are a small, constant subset of the encoding and include + * just the latin letters, digits, and some punctuation. + * For details, see U_CHARSET_FAMILY. + * + * @param us Input string, points to <code>length</code> + * Unicode characters that can be encoded with the + * codepage-invariant subset of the platform encoding. + * @param cs Output string, points to memory for <code>length</code> + * character bytes. + * @param length The number of characters to convert; this may + * include the terminating <code>NUL</code>. + * + * @see U_CHARSET_FAMILY + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +u_UCharsToChars(const UChar *us, char *cs, int32_t length); + +#endif diff --git a/Source/WTF/icu/unicode/rep.h b/Source/WTF/icu/unicode/rep.h new file mode 100644 index 000000000..4c7eae140 --- /dev/null +++ b/Source/WTF/icu/unicode/rep.h @@ -0,0 +1,261 @@ +/* +************************************************************************** +* Copyright (C) 1999-2012, International Business Machines Corporation and +* others. All Rights Reserved. +************************************************************************** +* Date Name Description +* 11/17/99 aliu Creation. Ported from java. Modified to +* match current UnicodeString API. Forced +* to use name "handleReplaceBetween" because +* of existing methods in UnicodeString. +************************************************************************** +*/ + +#ifndef REP_H +#define REP_H + +#include "unicode/uobject.h" + +/** + * \file + * \brief C++ API: Replaceable String + */ + +U_NAMESPACE_BEGIN + +class UnicodeString; + +/** + * <code>Replaceable</code> is an abstract base class representing a + * string of characters that supports the replacement of a range of + * itself with a new string of characters. It is used by APIs that + * change a piece of text while retaining metadata. Metadata is data + * other than the Unicode characters returned by char32At(). One + * example of metadata is style attributes; another is an edit + * history, marking each character with an author and revision number. + * + * <p>An implicit aspect of the <code>Replaceable</code> API is that + * during a replace operation, new characters take on the metadata of + * the old characters. For example, if the string "the <b>bold</b> + * font" has range (4, 8) replaced with "strong", then it becomes "the + * <b>strong</b> font". + * + * <p><code>Replaceable</code> specifies ranges using a start + * offset and a limit offset. The range of characters thus specified + * includes the characters at offset start..limit-1. That is, the + * start offset is inclusive, and the limit offset is exclusive. + * + * <p><code>Replaceable</code> also includes API to access characters + * in the string: <code>length()</code>, <code>charAt()</code>, + * <code>char32At()</code>, and <code>extractBetween()</code>. + * + * <p>For a subclass to support metadata, typical behavior of + * <code>replace()</code> is the following: + * <ul> + * <li>Set the metadata of the new text to the metadata of the first + * character replaced</li> + * <li>If no characters are replaced, use the metadata of the + * previous character</li> + * <li>If there is no previous character (i.e. start == 0), use the + * following character</li> + * <li>If there is no following character (i.e. the replaceable was + * empty), use default metadata.<br> + * <li>If the code point U+FFFF is seen, it should be interpreted as + * a special marker having no metadata<li> + * </li> + * </ul> + * If this is not the behavior, the subclass should document any differences. + * @author Alan Liu + * @stable ICU 2.0 + */ +class U_COMMON_API Replaceable : public UObject { + +public: + /** + * Destructor. + * @stable ICU 2.0 + */ + virtual ~Replaceable(); + + /** + * Returns the number of 16-bit code units in the text. + * @return number of 16-bit code units in text + * @stable ICU 1.8 + */ + inline int32_t length() const; + + /** + * Returns the 16-bit code unit at the given offset into the text. + * @param offset an integer between 0 and <code>length()</code>-1 + * inclusive + * @return 16-bit code unit of text at given offset + * @stable ICU 1.8 + */ + inline UChar charAt(int32_t offset) const; + + /** + * Returns the 32-bit code point at the given 16-bit offset into + * the text. This assumes the text is stored as 16-bit code units + * with surrogate pairs intermixed. If the offset of a leading or + * trailing code unit of a surrogate pair is given, return the + * code point of the surrogate pair. + * + * @param offset an integer between 0 and <code>length()</code>-1 + * inclusive + * @return 32-bit code point of text at given offset + * @stable ICU 1.8 + */ + inline UChar32 char32At(int32_t offset) const; + + /** + * Copies characters in the range [<tt>start</tt>, <tt>limit</tt>) + * into the UnicodeString <tt>target</tt>. + * @param start offset of first character which will be copied + * @param limit offset immediately following the last character to + * be copied + * @param target UnicodeString into which to copy characters. + * @return A reference to <TT>target</TT> + * @stable ICU 2.1 + */ + virtual void extractBetween(int32_t start, + int32_t limit, + UnicodeString& target) const = 0; + + /** + * Replaces a substring of this object with the given text. If the + * characters being replaced have metadata, the new characters + * that replace them should be given the same metadata. + * + * <p>Subclasses must ensure that if the text between start and + * limit is equal to the replacement text, that replace has no + * effect. That is, any metadata + * should be unaffected. In addition, subclasses are encouraged to + * check for initial and trailing identical characters, and make a + * smaller replacement if possible. This will preserve as much + * metadata as possible. + * @param start the beginning index, inclusive; <code>0 <= start + * <= limit</code>. + * @param limit the ending index, exclusive; <code>start <= limit + * <= length()</code>. + * @param text the text to replace characters <code>start</code> + * to <code>limit - 1</code> + * @stable ICU 2.0 + */ + virtual void handleReplaceBetween(int32_t start, + int32_t limit, + const UnicodeString& text) = 0; + // Note: All other methods in this class take the names of + // existing UnicodeString methods. This method is the exception. + // It is named differently because all replace methods of + // UnicodeString return a UnicodeString&. The 'between' is + // required in order to conform to the UnicodeString naming + // convention; API taking start/length are named <operation>, and + // those taking start/limit are named <operationBetween>. The + // 'handle' is added because 'replaceBetween' and + // 'doReplaceBetween' are already taken. + + /** + * Copies a substring of this object, retaining metadata. + * This method is used to duplicate or reorder substrings. + * The destination index must not overlap the source range. + * + * @param start the beginning index, inclusive; <code>0 <= start <= + * limit</code>. + * @param limit the ending index, exclusive; <code>start <= limit <= + * length()</code>. + * @param dest the destination index. The characters from + * <code>start..limit-1</code> will be copied to <code>dest</code>. + * Implementations of this method may assume that <code>dest <= start || + * dest >= limit</code>. + * @stable ICU 2.0 + */ + virtual void copy(int32_t start, int32_t limit, int32_t dest) = 0; + + /** + * Returns true if this object contains metadata. If a + * Replaceable object has metadata, calls to the Replaceable API + * must be made so as to preserve metadata. If it does not, calls + * to the Replaceable API may be optimized to improve performance. + * The default implementation returns true. + * @return true if this object contains metadata + * @stable ICU 2.2 + */ + virtual UBool hasMetaData() const; + + /** + * Clone this object, an instance of a subclass of Replaceable. + * Clones can be used concurrently in multiple threads. + * If a subclass does not implement clone(), or if an error occurs, + * then NULL is returned. + * The clone functions in all subclasses return a pointer to a Replaceable + * because some compilers do not support covariant (same-as-this) + * return types; cast to the appropriate subclass if necessary. + * The caller must delete the clone. + * + * @return a clone of this object + * + * @see getDynamicClassID + * @stable ICU 2.6 + */ + virtual Replaceable *clone() const; + +protected: + + /** + * Default constructor. + * @stable ICU 2.4 + */ + inline Replaceable(); + + /* + * Assignment operator not declared. The compiler will provide one + * which does nothing since this class does not contain any data members. + * API/code coverage may show the assignment operator as present and + * untested - ignore. + * Subclasses need this assignment operator if they use compiler-provided + * assignment operators of their own. An alternative to not declaring one + * here would be to declare and empty-implement a protected or public one. + Replaceable &Replaceable::operator=(const Replaceable &); + */ + + /** + * Virtual version of length(). + * @stable ICU 2.4 + */ + virtual int32_t getLength() const = 0; + + /** + * Virtual version of charAt(). + * @stable ICU 2.4 + */ + virtual UChar getCharAt(int32_t offset) const = 0; + + /** + * Virtual version of char32At(). + * @stable ICU 2.4 + */ + virtual UChar32 getChar32At(int32_t offset) const = 0; +}; + +inline Replaceable::Replaceable() {} + +inline int32_t +Replaceable::length() const { + return getLength(); +} + +inline UChar +Replaceable::charAt(int32_t offset) const { + return getCharAt(offset); +} + +inline UChar32 +Replaceable::char32At(int32_t offset) const { + return getChar32At(offset); +} + +// There is no rep.cpp, see unistr.cpp for Replaceable function implementations. + +U_NAMESPACE_END + +#endif diff --git a/Source/WTF/icu/unicode/std_string.h b/Source/WTF/icu/unicode/std_string.h new file mode 100644 index 000000000..67b1d6c5a --- /dev/null +++ b/Source/WTF/icu/unicode/std_string.h @@ -0,0 +1,34 @@ +/* +******************************************************************************* +* +* Copyright (C) 2009-2011, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: std_string.h +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2009feb19 +* created by: Markus W. Scherer +*/ + +#ifndef __STD_STRING_H__ +#define __STD_STRING_H__ + +/** + * \file + * \brief C++ API: Central ICU header for including the C++ standard <string> + * header and for related definitions. + */ + +#include "unicode/utypes.h" + +#if U_HAVE_STD_STRING + +#include <string> + +#endif // U_HAVE_STD_STRING + +#endif // __STD_STRING_H__ diff --git a/Source/WTF/icu/unicode/strenum.h b/Source/WTF/icu/unicode/strenum.h new file mode 100644 index 000000000..3dbe21c6b --- /dev/null +++ b/Source/WTF/icu/unicode/strenum.h @@ -0,0 +1,276 @@ +/* +******************************************************************************* +* +* Copyright (C) 2002-2012, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +*/ + +#ifndef STRENUM_H +#define STRENUM_H + +#include "unicode/uobject.h" +#include "unicode/unistr.h" + +/** + * \file + * \brief C++ API: String Enumeration + */ + +U_NAMESPACE_BEGIN + +/** + * Base class for 'pure' C++ implementations of uenum api. Adds a + * method that returns the next UnicodeString since in C++ this can + * be a common storage format for strings. + * + * <p>The model is that the enumeration is over strings maintained by + * a 'service.' At any point, the service might change, invalidating + * the enumerator (though this is expected to be rare). The iterator + * returns an error if this has occurred. Lack of the error is no + * guarantee that the service didn't change immediately after the + * call, so the returned string still might not be 'valid' on + * subsequent use.</p> + * + * <p>Strings may take the form of const char*, const UChar*, or const + * UnicodeString*. The type you get is determine by the variant of + * 'next' that you call. In general the StringEnumeration is + * optimized for one of these types, but all StringEnumerations can + * return all types. Returned strings are each terminated with a NUL. + * Depending on the service data, they might also include embedded NUL + * characters, so API is provided to optionally return the true + * length, counting the embedded NULs but not counting the terminating + * NUL.</p> + * + * <p>The pointers returned by next, unext, and snext become invalid + * upon any subsequent call to the enumeration's destructor, next, + * unext, snext, or reset.</p> + * + * ICU 2.8 adds some default implementations and helper functions + * for subclasses. + * + * @stable ICU 2.4 + */ +class U_COMMON_API StringEnumeration : public UObject { +public: + /** + * Destructor. + * @stable ICU 2.4 + */ + virtual ~StringEnumeration(); + + /** + * Clone this object, an instance of a subclass of StringEnumeration. + * Clones can be used concurrently in multiple threads. + * If a subclass does not implement clone(), or if an error occurs, + * then NULL is returned. + * The clone functions in all subclasses return a base class pointer + * because some compilers do not support covariant (same-as-this) + * return types; cast to the appropriate subclass if necessary. + * The caller must delete the clone. + * + * @return a clone of this object + * + * @see getDynamicClassID + * @stable ICU 2.8 + */ + virtual StringEnumeration *clone() const; + + /** + * <p>Return the number of elements that the iterator traverses. If + * the iterator is out of sync with its service, status is set to + * U_ENUM_OUT_OF_SYNC_ERROR, and the return value is zero.</p> + * + * <p>The return value will not change except possibly as a result of + * a subsequent call to reset, or if the iterator becomes out of sync.</p> + * + * <p>This is a convenience function. It can end up being very + * expensive as all the items might have to be pre-fetched + * (depending on the storage format of the data being + * traversed).</p> + * + * @param status the error code. + * @return number of elements in the iterator. + * + * @stable ICU 2.4 */ + virtual int32_t count(UErrorCode& status) const = 0; + + /** + * <p>Returns the next element as a NUL-terminated char*. If there + * are no more elements, returns NULL. If the resultLength pointer + * is not NULL, the length of the string (not counting the + * terminating NUL) is returned at that address. If an error + * status is returned, the value at resultLength is undefined.</p> + * + * <p>The returned pointer is owned by this iterator and must not be + * deleted by the caller. The pointer is valid until the next call + * to next, unext, snext, reset, or the enumerator's destructor.</p> + * + * <p>If the iterator is out of sync with its service, status is set + * to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.</p> + * + * <p>If the native service string is a UChar* string, it is + * converted to char* with the invariant converter. If the + * conversion fails (because a character cannot be converted) then + * status is set to U_INVARIANT_CONVERSION_ERROR and the return + * value is undefined (though not NULL).</p> + * + * Starting with ICU 2.8, the default implementation calls snext() + * and handles the conversion. + * Either next() or snext() must be implemented differently by a subclass. + * + * @param status the error code. + * @param resultLength a pointer to receive the length, can be NULL. + * @return a pointer to the string, or NULL. + * + * @stable ICU 2.4 + */ + virtual const char* next(int32_t *resultLength, UErrorCode& status); + + /** + * <p>Returns the next element as a NUL-terminated UChar*. If there + * are no more elements, returns NULL. If the resultLength pointer + * is not NULL, the length of the string (not counting the + * terminating NUL) is returned at that address. If an error + * status is returned, the value at resultLength is undefined.</p> + * + * <p>The returned pointer is owned by this iterator and must not be + * deleted by the caller. The pointer is valid until the next call + * to next, unext, snext, reset, or the enumerator's destructor.</p> + * + * <p>If the iterator is out of sync with its service, status is set + * to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.</p> + * + * Starting with ICU 2.8, the default implementation calls snext() + * and handles the conversion. + * + * @param status the error code. + * @param resultLength a ponter to receive the length, can be NULL. + * @return a pointer to the string, or NULL. + * + * @stable ICU 2.4 + */ + virtual const UChar* unext(int32_t *resultLength, UErrorCode& status); + + /** + * <p>Returns the next element a UnicodeString*. If there are no + * more elements, returns NULL.</p> + * + * <p>The returned pointer is owned by this iterator and must not be + * deleted by the caller. The pointer is valid until the next call + * to next, unext, snext, reset, or the enumerator's destructor.</p> + * + * <p>If the iterator is out of sync with its service, status is set + * to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.</p> + * + * Starting with ICU 2.8, the default implementation calls next() + * and handles the conversion. + * Either next() or snext() must be implemented differently by a subclass. + * + * @param status the error code. + * @return a pointer to the string, or NULL. + * + * @stable ICU 2.4 + */ + virtual const UnicodeString* snext(UErrorCode& status); + + /** + * <p>Resets the iterator. This re-establishes sync with the + * service and rewinds the iterator to start at the first + * element.</p> + * + * <p>Previous pointers returned by next, unext, or snext become + * invalid, and the value returned by count might change.</p> + * + * @param status the error code. + * + * @stable ICU 2.4 + */ + virtual void reset(UErrorCode& status) = 0; + + /** + * Compares this enumeration to other to check if both are equal + * + * @param that The other string enumeration to compare this object to + * @return TRUE if the enumerations are equal. FALSE if not. + * @stable ICU 3.6 + */ + virtual UBool operator==(const StringEnumeration& that)const; + /** + * Compares this enumeration to other to check if both are not equal + * + * @param that The other string enumeration to compare this object to + * @return TRUE if the enumerations are equal. FALSE if not. + * @stable ICU 3.6 + */ + virtual UBool operator!=(const StringEnumeration& that)const; + +protected: + /** + * UnicodeString field for use with default implementations and subclasses. + * @stable ICU 2.8 + */ + UnicodeString unistr; + /** + * char * default buffer for use with default implementations and subclasses. + * @stable ICU 2.8 + */ + char charsBuffer[32]; + /** + * char * buffer for use with default implementations and subclasses. + * Allocated in constructor and in ensureCharsCapacity(). + * @stable ICU 2.8 + */ + char *chars; + /** + * Capacity of chars, for use with default implementations and subclasses. + * @stable ICU 2.8 + */ + int32_t charsCapacity; + + /** + * Default constructor for use with default implementations and subclasses. + * @stable ICU 2.8 + */ + StringEnumeration(); + + /** + * Ensures that chars is at least as large as the requested capacity. + * For use with default implementations and subclasses. + * + * @param capacity Requested capacity. + * @param status ICU in/out error code. + * @stable ICU 2.8 + */ + void ensureCharsCapacity(int32_t capacity, UErrorCode &status); + + /** + * Converts s to Unicode and sets unistr to the result. + * For use with default implementations and subclasses, + * especially for implementations of snext() in terms of next(). + * This is provided with a helper function instead of a default implementation + * of snext() to avoid potential infinite loops between next() and snext(). + * + * For example: + * \code + * const UnicodeString* snext(UErrorCode& status) { + * int32_t resultLength=0; + * const char *s=next(&resultLength, status); + * return setChars(s, resultLength, status); + * } + * \endcode + * + * @param s String to be converted to Unicode. + * @param length Length of the string. + * @param status ICU in/out error code. + * @return A pointer to unistr. + * @stable ICU 2.8 + */ + UnicodeString *setChars(const char *s, int32_t length, UErrorCode &status); +}; + +U_NAMESPACE_END + +/* STRENUM_H */ +#endif diff --git a/Source/WTF/icu/unicode/stringpiece.h b/Source/WTF/icu/unicode/stringpiece.h new file mode 100644 index 000000000..b29571d4a --- /dev/null +++ b/Source/WTF/icu/unicode/stringpiece.h @@ -0,0 +1,224 @@ +// Copyright (C) 2009-2013, International Business Machines +// Corporation and others. All Rights Reserved. +// +// Copyright 2001 and onwards Google Inc. +// Author: Sanjay Ghemawat + +// This code is a contribution of Google code, and the style used here is +// a compromise between the original Google code and the ICU coding guidelines. +// For example, data types are ICU-ified (size_t,int->int32_t), +// and API comments doxygen-ified, but function names and behavior are +// as in the original, if possible. +// Assertion-style error handling, not available in ICU, was changed to +// parameter "pinning" similar to UnicodeString. +// +// In addition, this is only a partial port of the original Google code, +// limited to what was needed so far. The (nearly) complete original code +// is in the ICU svn repository at icuhtml/trunk/design/strings/contrib +// (see ICU ticket 6765, r25517). + +#ifndef __STRINGPIECE_H__ +#define __STRINGPIECE_H__ + +/** + * \file + * \brief C++ API: StringPiece: Read-only byte string wrapper class. + */ + +#include "unicode/utypes.h" +#include "unicode/uobject.h" +#include "unicode/std_string.h" + +// Arghh! I wish C++ literals were "string". + +U_NAMESPACE_BEGIN + +/** + * A string-like object that points to a sized piece of memory. + * + * We provide non-explicit singleton constructors so users can pass + * in a "const char*" or a "string" wherever a "StringPiece" is + * expected. + * + * Functions or methods may use const StringPiece& parameters to accept either + * a "const char*" or a "string" value that will be implicitly converted to + * a StringPiece. + * + * Systematic usage of StringPiece is encouraged as it will reduce unnecessary + * conversions from "const char*" to "string" and back again. + * + * @stable ICU 4.2 + */ +class U_COMMON_API StringPiece : public UMemory { + private: + const char* ptr_; + int32_t length_; + + public: + /** + * Default constructor, creates an empty StringPiece. + * @stable ICU 4.2 + */ + StringPiece() : ptr_(NULL), length_(0) { } + /** + * Constructs from a NUL-terminated const char * pointer. + * @param str a NUL-terminated const char * pointer + * @stable ICU 4.2 + */ + StringPiece(const char* str); +#if U_HAVE_STD_STRING + /** + * Constructs from a std::string. + * @stable ICU 4.2 + */ + StringPiece(const std::string& str) + : ptr_(str.data()), length_(static_cast<int32_t>(str.size())) { } +#endif + /** + * Constructs from a const char * pointer and a specified length. + * @param offset a const char * pointer (need not be terminated) + * @param len the length of the string; must be non-negative + * @stable ICU 4.2 + */ + StringPiece(const char* offset, int32_t len) : ptr_(offset), length_(len) { } + /** + * Substring of another StringPiece. + * @param x the other StringPiece + * @param pos start position in x; must be non-negative and <= x.length(). + * @stable ICU 4.2 + */ + StringPiece(const StringPiece& x, int32_t pos); + /** + * Substring of another StringPiece. + * @param x the other StringPiece + * @param pos start position in x; must be non-negative and <= x.length(). + * @param len length of the substring; + * must be non-negative and will be pinned to at most x.length() - pos. + * @stable ICU 4.2 + */ + StringPiece(const StringPiece& x, int32_t pos, int32_t len); + + /** + * Returns the string pointer. May be NULL if it is empty. + * + * data() may return a pointer to a buffer with embedded NULs, and the + * returned buffer may or may not be null terminated. Therefore it is + * typically a mistake to pass data() to a routine that expects a NUL + * terminated string. + * @return the string pointer + * @stable ICU 4.2 + */ + const char* data() const { return ptr_; } + /** + * Returns the string length. Same as length(). + * @return the string length + * @stable ICU 4.2 + */ + int32_t size() const { return length_; } + /** + * Returns the string length. Same as size(). + * @return the string length + * @stable ICU 4.2 + */ + int32_t length() const { return length_; } + /** + * Returns whether the string is empty. + * @return TRUE if the string is empty + * @stable ICU 4.2 + */ + UBool empty() const { return length_ == 0; } + + /** + * Sets to an empty string. + * @stable ICU 4.2 + */ + void clear() { ptr_ = NULL; length_ = 0; } + + /** + * Reset the stringpiece to refer to new data. + * @param xdata pointer the new string data. Need not be nul terminated. + * @param len the length of the new data + * @stable ICU 4.8 + */ + void set(const char* xdata, int32_t len) { ptr_ = xdata; length_ = len; } + + /** + * Reset the stringpiece to refer to new data. + * @param str a pointer to a NUL-terminated string. + * @stable ICU 4.8 + */ + void set(const char* str); + + /** + * Removes the first n string units. + * @param n prefix length, must be non-negative and <=length() + * @stable ICU 4.2 + */ + void remove_prefix(int32_t n) { + if (n >= 0) { + if (n > length_) { + n = length_; + } + ptr_ += n; + length_ -= n; + } + } + + /** + * Removes the last n string units. + * @param n suffix length, must be non-negative and <=length() + * @stable ICU 4.2 + */ + void remove_suffix(int32_t n) { + if (n >= 0) { + if (n <= length_) { + length_ -= n; + } else { + length_ = 0; + } + } + } + + /** + * Maximum integer, used as a default value for substring methods. + * @stable ICU 4.2 + */ + static const int32_t npos; // = 0x7fffffff; + + /** + * Returns a substring of this StringPiece. + * @param pos start position; must be non-negative and <= length(). + * @param len length of the substring; + * must be non-negative and will be pinned to at most length() - pos. + * @return the substring StringPiece + * @stable ICU 4.2 + */ + StringPiece substr(int32_t pos, int32_t len = npos) const { + return StringPiece(*this, pos, len); + } +}; + +/** + * Global operator == for StringPiece + * @param x The first StringPiece to compare. + * @param y The second StringPiece to compare. + * @return TRUE if the string data is equal + * @stable ICU 4.8 + */ +U_EXPORT UBool U_EXPORT2 +operator==(const StringPiece& x, const StringPiece& y); + +/** + * Global operator != for StringPiece + * @param x The first StringPiece to compare. + * @param y The second StringPiece to compare. + * @return TRUE if the string data is not equal + * @stable ICU 4.8 + */ +inline UBool operator!=(const StringPiece& x, const StringPiece& y) { + return !(x == y); +} + +U_NAMESPACE_END + +#endif // __STRINGPIECE_H__ diff --git a/Source/WTF/icu/unicode/uchar.h b/Source/WTF/icu/unicode/uchar.h new file mode 100644 index 000000000..1a5b71b46 --- /dev/null +++ b/Source/WTF/icu/unicode/uchar.h @@ -0,0 +1,3331 @@ +/* +********************************************************************** +* Copyright (C) 1997-2013, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* +* File UCHAR.H +* +* Modification History: +* +* Date Name Description +* 04/02/97 aliu Creation. +* 03/29/99 helena Updated for C APIs. +* 4/15/99 Madhu Updated for C Implementation and Javadoc +* 5/20/99 Madhu Added the function u_getVersion() +* 8/19/1999 srl Upgraded scripts to Unicode 3.0 +* 8/27/1999 schererm UCharDirection constants: U_... +* 11/11/1999 weiv added u_isalnum(), cleaned comments +* 01/11/2000 helena Renamed u_getVersion to u_getUnicodeVersion(). +****************************************************************************** +*/ + +#ifndef UCHAR_H +#define UCHAR_H + +#include "unicode/utypes.h" + +U_CDECL_BEGIN + +/*==========================================================================*/ +/* Unicode version number */ +/*==========================================================================*/ +/** + * Unicode version number, default for the current ICU version. + * The actual Unicode Character Database (UCD) data is stored in uprops.dat + * and may be generated from UCD files from a different Unicode version. + * Call u_getUnicodeVersion to get the actual Unicode version of the data. + * + * @see u_getUnicodeVersion + * @stable ICU 2.0 + */ +#define U_UNICODE_VERSION "6.3" + +/** + * \file + * \brief C API: Unicode Properties + * + * This C API provides low-level access to the Unicode Character Database. + * In addition to raw property values, some convenience functions calculate + * derived properties, for example for Java-style programming. + * + * Unicode assigns each code point (not just assigned character) values for + * many properties. + * Most of them are simple boolean flags, or constants from a small enumerated list. + * For some properties, values are strings or other relatively more complex types. + * + * For more information see + * "About the Unicode Character Database" (http://www.unicode.org/ucd/) + * and the ICU User Guide chapter on Properties (http://icu-project.org/userguide/properties.html). + * + * Many functions are designed to match java.lang.Character functions. + * See the individual function documentation, + * and see the JDK 1.4 java.lang.Character documentation + * at http://java.sun.com/j2se/1.4/docs/api/java/lang/Character.html + * + * There are also functions that provide easy migration from C/POSIX functions + * like isblank(). Their use is generally discouraged because the C/POSIX + * standards do not define their semantics beyond the ASCII range, which means + * that different implementations exhibit very different behavior. + * Instead, Unicode properties should be used directly. + * + * There are also only a few, broad C/POSIX character classes, and they tend + * to be used for conflicting purposes. For example, the "isalpha()" class + * is sometimes used to determine word boundaries, while a more sophisticated + * approach would at least distinguish initial letters from continuation + * characters (the latter including combining marks). + * (In ICU, BreakIterator is the most sophisticated API for word boundaries.) + * Another example: There is no "istitle()" class for titlecase characters. + * + * ICU 3.4 and later provides API access for all twelve C/POSIX character classes. + * ICU implements them according to the Standard Recommendations in + * Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions + * (http://www.unicode.org/reports/tr18/#Compatibility_Properties). + * + * API access for C/POSIX character classes is as follows: + * - alpha: u_isUAlphabetic(c) or u_hasBinaryProperty(c, UCHAR_ALPHABETIC) + * - lower: u_isULowercase(c) or u_hasBinaryProperty(c, UCHAR_LOWERCASE) + * - upper: u_isUUppercase(c) or u_hasBinaryProperty(c, UCHAR_UPPERCASE) + * - punct: u_ispunct(c) + * - digit: u_isdigit(c) or u_charType(c)==U_DECIMAL_DIGIT_NUMBER + * - xdigit: u_isxdigit(c) or u_hasBinaryProperty(c, UCHAR_POSIX_XDIGIT) + * - alnum: u_hasBinaryProperty(c, UCHAR_POSIX_ALNUM) + * - space: u_isUWhiteSpace(c) or u_hasBinaryProperty(c, UCHAR_WHITE_SPACE) + * - blank: u_isblank(c) or u_hasBinaryProperty(c, UCHAR_POSIX_BLANK) + * - cntrl: u_charType(c)==U_CONTROL_CHAR + * - graph: u_hasBinaryProperty(c, UCHAR_POSIX_GRAPH) + * - print: u_hasBinaryProperty(c, UCHAR_POSIX_PRINT) + * + * Note: Some of the u_isxyz() functions in uchar.h predate, and do not match, + * the Standard Recommendations in UTS #18. Instead, they match Java + * functions according to their API documentation. + * + * \htmlonly + * The C/POSIX character classes are also available in UnicodeSet patterns, + * using patterns like [:graph:] or \p{graph}. + * \endhtmlonly + * + * Note: There are several ICU whitespace functions. + * Comparison: + * - u_isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property; + * most of general categories "Z" (separators) + most whitespace ISO controls + * (including no-break spaces, but excluding IS1..IS4 and ZWSP) + * - u_isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces + * - u_isJavaSpaceChar: Java isSpaceChar; just Z (including no-break spaces) + * - u_isspace: Z + whitespace ISO controls (including no-break spaces) + * - u_isblank: "horizontal spaces" = TAB + Zs - ZWSP + */ + +/** + * Constants. + */ + +/** The lowest Unicode code point value. Code points are non-negative. @stable ICU 2.0 */ +#define UCHAR_MIN_VALUE 0 + +/** + * The highest Unicode code point value (scalar value) according to + * The Unicode Standard. This is a 21-bit value (20.1 bits, rounded up). + * For a single character, UChar32 is a simple type that can hold any code point value. + * + * @see UChar32 + * @stable ICU 2.0 + */ +#define UCHAR_MAX_VALUE 0x10ffff + +/** + * Get a single-bit bit set (a flag) from a bit number 0..31. + * @stable ICU 2.1 + */ +#define U_MASK(x) ((uint32_t)1<<(x)) + +/** + * Selection constants for Unicode properties. + * These constants are used in functions like u_hasBinaryProperty to select + * one of the Unicode properties. + * + * The properties APIs are intended to reflect Unicode properties as defined + * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR). + * For details about the properties see http://www.unicode.org/ucd/ . + * For names of Unicode properties see the UCD file PropertyAliases.txt. + * + * Important: If ICU is built with UCD files from Unicode versions below, e.g., 3.2, + * then properties marked with "new in Unicode 3.2" are not or not fully available. + * Check u_getUnicodeVersion to be sure. + * + * @see u_hasBinaryProperty + * @see u_getIntPropertyValue + * @see u_getUnicodeVersion + * @stable ICU 2.1 + */ +typedef enum UProperty { + /* + * Note: UProperty constants are parsed by preparseucd.py. + * It matches lines like + * UCHAR_<Unicode property name>=<integer>, + */ + + /* Note: Place UCHAR_ALPHABETIC before UCHAR_BINARY_START so that + debuggers display UCHAR_ALPHABETIC as the symbolic name for 0, + rather than UCHAR_BINARY_START. Likewise for other *_START + identifiers. */ + + /** Binary property Alphabetic. Same as u_isUAlphabetic, different from u_isalpha. + Lu+Ll+Lt+Lm+Lo+Nl+Other_Alphabetic @stable ICU 2.1 */ + UCHAR_ALPHABETIC=0, + /** First constant for binary Unicode properties. @stable ICU 2.1 */ + UCHAR_BINARY_START=UCHAR_ALPHABETIC, + /** Binary property ASCII_Hex_Digit. 0-9 A-F a-f @stable ICU 2.1 */ + UCHAR_ASCII_HEX_DIGIT=1, + /** Binary property Bidi_Control. + Format controls which have specific functions + in the Bidi Algorithm. @stable ICU 2.1 */ + UCHAR_BIDI_CONTROL=2, + /** Binary property Bidi_Mirrored. + Characters that may change display in RTL text. + Same as u_isMirrored. + See Bidi Algorithm, UTR 9. @stable ICU 2.1 */ + UCHAR_BIDI_MIRRORED=3, + /** Binary property Dash. Variations of dashes. @stable ICU 2.1 */ + UCHAR_DASH=4, + /** Binary property Default_Ignorable_Code_Point (new in Unicode 3.2). + Ignorable in most processing. + <2060..206F, FFF0..FFFB, E0000..E0FFF>+Other_Default_Ignorable_Code_Point+(Cf+Cc+Cs-White_Space) @stable ICU 2.1 */ + UCHAR_DEFAULT_IGNORABLE_CODE_POINT=5, + /** Binary property Deprecated (new in Unicode 3.2). + The usage of deprecated characters is strongly discouraged. @stable ICU 2.1 */ + UCHAR_DEPRECATED=6, + /** Binary property Diacritic. Characters that linguistically modify + the meaning of another character to which they apply. @stable ICU 2.1 */ + UCHAR_DIACRITIC=7, + /** Binary property Extender. + Extend the value or shape of a preceding alphabetic character, + e.g., length and iteration marks. @stable ICU 2.1 */ + UCHAR_EXTENDER=8, + /** Binary property Full_Composition_Exclusion. + CompositionExclusions.txt+Singleton Decompositions+ + Non-Starter Decompositions. @stable ICU 2.1 */ + UCHAR_FULL_COMPOSITION_EXCLUSION=9, + /** Binary property Grapheme_Base (new in Unicode 3.2). + For programmatic determination of grapheme cluster boundaries. + [0..10FFFF]-Cc-Cf-Cs-Co-Cn-Zl-Zp-Grapheme_Link-Grapheme_Extend-CGJ @stable ICU 2.1 */ + UCHAR_GRAPHEME_BASE=10, + /** Binary property Grapheme_Extend (new in Unicode 3.2). + For programmatic determination of grapheme cluster boundaries. + Me+Mn+Mc+Other_Grapheme_Extend-Grapheme_Link-CGJ @stable ICU 2.1 */ + UCHAR_GRAPHEME_EXTEND=11, + /** Binary property Grapheme_Link (new in Unicode 3.2). + For programmatic determination of grapheme cluster boundaries. @stable ICU 2.1 */ + UCHAR_GRAPHEME_LINK=12, + /** Binary property Hex_Digit. + Characters commonly used for hexadecimal numbers. @stable ICU 2.1 */ + UCHAR_HEX_DIGIT=13, + /** Binary property Hyphen. Dashes used to mark connections + between pieces of words, plus the Katakana middle dot. @stable ICU 2.1 */ + UCHAR_HYPHEN=14, + /** Binary property ID_Continue. + Characters that can continue an identifier. + DerivedCoreProperties.txt also says "NOTE: Cf characters should be filtered out." + ID_Start+Mn+Mc+Nd+Pc @stable ICU 2.1 */ + UCHAR_ID_CONTINUE=15, + /** Binary property ID_Start. + Characters that can start an identifier. + Lu+Ll+Lt+Lm+Lo+Nl @stable ICU 2.1 */ + UCHAR_ID_START=16, + /** Binary property Ideographic. + CJKV ideographs. @stable ICU 2.1 */ + UCHAR_IDEOGRAPHIC=17, + /** Binary property IDS_Binary_Operator (new in Unicode 3.2). + For programmatic determination of + Ideographic Description Sequences. @stable ICU 2.1 */ + UCHAR_IDS_BINARY_OPERATOR=18, + /** Binary property IDS_Trinary_Operator (new in Unicode 3.2). + For programmatic determination of + Ideographic Description Sequences. @stable ICU 2.1 */ + UCHAR_IDS_TRINARY_OPERATOR=19, + /** Binary property Join_Control. + Format controls for cursive joining and ligation. @stable ICU 2.1 */ + UCHAR_JOIN_CONTROL=20, + /** Binary property Logical_Order_Exception (new in Unicode 3.2). + Characters that do not use logical order and + require special handling in most processing. @stable ICU 2.1 */ + UCHAR_LOGICAL_ORDER_EXCEPTION=21, + /** Binary property Lowercase. Same as u_isULowercase, different from u_islower. + Ll+Other_Lowercase @stable ICU 2.1 */ + UCHAR_LOWERCASE=22, + /** Binary property Math. Sm+Other_Math @stable ICU 2.1 */ + UCHAR_MATH=23, + /** Binary property Noncharacter_Code_Point. + Code points that are explicitly defined as illegal + for the encoding of characters. @stable ICU 2.1 */ + UCHAR_NONCHARACTER_CODE_POINT=24, + /** Binary property Quotation_Mark. @stable ICU 2.1 */ + UCHAR_QUOTATION_MARK=25, + /** Binary property Radical (new in Unicode 3.2). + For programmatic determination of + Ideographic Description Sequences. @stable ICU 2.1 */ + UCHAR_RADICAL=26, + /** Binary property Soft_Dotted (new in Unicode 3.2). + Characters with a "soft dot", like i or j. + An accent placed on these characters causes + the dot to disappear. @stable ICU 2.1 */ + UCHAR_SOFT_DOTTED=27, + /** Binary property Terminal_Punctuation. + Punctuation characters that generally mark + the end of textual units. @stable ICU 2.1 */ + UCHAR_TERMINAL_PUNCTUATION=28, + /** Binary property Unified_Ideograph (new in Unicode 3.2). + For programmatic determination of + Ideographic Description Sequences. @stable ICU 2.1 */ + UCHAR_UNIFIED_IDEOGRAPH=29, + /** Binary property Uppercase. Same as u_isUUppercase, different from u_isupper. + Lu+Other_Uppercase @stable ICU 2.1 */ + UCHAR_UPPERCASE=30, + /** Binary property White_Space. + Same as u_isUWhiteSpace, different from u_isspace and u_isWhitespace. + Space characters+TAB+CR+LF-ZWSP-ZWNBSP @stable ICU 2.1 */ + UCHAR_WHITE_SPACE=31, + /** Binary property XID_Continue. + ID_Continue modified to allow closure under + normalization forms NFKC and NFKD. @stable ICU 2.1 */ + UCHAR_XID_CONTINUE=32, + /** Binary property XID_Start. ID_Start modified to allow + closure under normalization forms NFKC and NFKD. @stable ICU 2.1 */ + UCHAR_XID_START=33, + /** Binary property Case_Sensitive. Either the source of a case + mapping or _in_ the target of a case mapping. Not the same as + the general category Cased_Letter. @stable ICU 2.6 */ + UCHAR_CASE_SENSITIVE=34, + /** Binary property STerm (new in Unicode 4.0.1). + Sentence Terminal. Used in UAX #29: Text Boundaries + (http://www.unicode.org/reports/tr29/) + @stable ICU 3.0 */ + UCHAR_S_TERM=35, + /** Binary property Variation_Selector (new in Unicode 4.0.1). + Indicates all those characters that qualify as Variation Selectors. + For details on the behavior of these characters, + see StandardizedVariants.html and 15.6 Variation Selectors. + @stable ICU 3.0 */ + UCHAR_VARIATION_SELECTOR=36, + /** Binary property NFD_Inert. + ICU-specific property for characters that are inert under NFD, + i.e., they do not interact with adjacent characters. + See the documentation for the Normalizer2 class and the + Normalizer2::isInert() method. + @stable ICU 3.0 */ + UCHAR_NFD_INERT=37, + /** Binary property NFKD_Inert. + ICU-specific property for characters that are inert under NFKD, + i.e., they do not interact with adjacent characters. + See the documentation for the Normalizer2 class and the + Normalizer2::isInert() method. + @stable ICU 3.0 */ + UCHAR_NFKD_INERT=38, + /** Binary property NFC_Inert. + ICU-specific property for characters that are inert under NFC, + i.e., they do not interact with adjacent characters. + See the documentation for the Normalizer2 class and the + Normalizer2::isInert() method. + @stable ICU 3.0 */ + UCHAR_NFC_INERT=39, + /** Binary property NFKC_Inert. + ICU-specific property for characters that are inert under NFKC, + i.e., they do not interact with adjacent characters. + See the documentation for the Normalizer2 class and the + Normalizer2::isInert() method. + @stable ICU 3.0 */ + UCHAR_NFKC_INERT=40, + /** Binary Property Segment_Starter. + ICU-specific property for characters that are starters in terms of + Unicode normalization and combining character sequences. + They have ccc=0 and do not occur in non-initial position of the + canonical decomposition of any character + (like a-umlaut in NFD and a Jamo T in an NFD(Hangul LVT)). + ICU uses this property for segmenting a string for generating a set of + canonically equivalent strings, e.g. for canonical closure while + processing collation tailoring rules. + @stable ICU 3.0 */ + UCHAR_SEGMENT_STARTER=41, + /** Binary property Pattern_Syntax (new in Unicode 4.1). + See UAX #31 Identifier and Pattern Syntax + (http://www.unicode.org/reports/tr31/) + @stable ICU 3.4 */ + UCHAR_PATTERN_SYNTAX=42, + /** Binary property Pattern_White_Space (new in Unicode 4.1). + See UAX #31 Identifier and Pattern Syntax + (http://www.unicode.org/reports/tr31/) + @stable ICU 3.4 */ + UCHAR_PATTERN_WHITE_SPACE=43, + /** Binary property alnum (a C/POSIX character class). + Implemented according to the UTS #18 Annex C Standard Recommendation. + See the uchar.h file documentation. + @stable ICU 3.4 */ + UCHAR_POSIX_ALNUM=44, + /** Binary property blank (a C/POSIX character class). + Implemented according to the UTS #18 Annex C Standard Recommendation. + See the uchar.h file documentation. + @stable ICU 3.4 */ + UCHAR_POSIX_BLANK=45, + /** Binary property graph (a C/POSIX character class). + Implemented according to the UTS #18 Annex C Standard Recommendation. + See the uchar.h file documentation. + @stable ICU 3.4 */ + UCHAR_POSIX_GRAPH=46, + /** Binary property print (a C/POSIX character class). + Implemented according to the UTS #18 Annex C Standard Recommendation. + See the uchar.h file documentation. + @stable ICU 3.4 */ + UCHAR_POSIX_PRINT=47, + /** Binary property xdigit (a C/POSIX character class). + Implemented according to the UTS #18 Annex C Standard Recommendation. + See the uchar.h file documentation. + @stable ICU 3.4 */ + UCHAR_POSIX_XDIGIT=48, + /** Binary property Cased. For Lowercase, Uppercase and Titlecase characters. @stable ICU 4.4 */ + UCHAR_CASED=49, + /** Binary property Case_Ignorable. Used in context-sensitive case mappings. @stable ICU 4.4 */ + UCHAR_CASE_IGNORABLE=50, + /** Binary property Changes_When_Lowercased. @stable ICU 4.4 */ + UCHAR_CHANGES_WHEN_LOWERCASED=51, + /** Binary property Changes_When_Uppercased. @stable ICU 4.4 */ + UCHAR_CHANGES_WHEN_UPPERCASED=52, + /** Binary property Changes_When_Titlecased. @stable ICU 4.4 */ + UCHAR_CHANGES_WHEN_TITLECASED=53, + /** Binary property Changes_When_Casefolded. @stable ICU 4.4 */ + UCHAR_CHANGES_WHEN_CASEFOLDED=54, + /** Binary property Changes_When_Casemapped. @stable ICU 4.4 */ + UCHAR_CHANGES_WHEN_CASEMAPPED=55, + /** Binary property Changes_When_NFKC_Casefolded. @stable ICU 4.4 */ + UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED=56, + /** One more than the last constant for binary Unicode properties. @stable ICU 2.1 */ + UCHAR_BINARY_LIMIT=57, + + /** Enumerated property Bidi_Class. + Same as u_charDirection, returns UCharDirection values. @stable ICU 2.2 */ + UCHAR_BIDI_CLASS=0x1000, + /** First constant for enumerated/integer Unicode properties. @stable ICU 2.2 */ + UCHAR_INT_START=UCHAR_BIDI_CLASS, + /** Enumerated property Block. + Same as ublock_getCode, returns UBlockCode values. @stable ICU 2.2 */ + UCHAR_BLOCK=0x1001, + /** Enumerated property Canonical_Combining_Class. + Same as u_getCombiningClass, returns 8-bit numeric values. @stable ICU 2.2 */ + UCHAR_CANONICAL_COMBINING_CLASS=0x1002, + /** Enumerated property Decomposition_Type. + Returns UDecompositionType values. @stable ICU 2.2 */ + UCHAR_DECOMPOSITION_TYPE=0x1003, + /** Enumerated property East_Asian_Width. + See http://www.unicode.org/reports/tr11/ + Returns UEastAsianWidth values. @stable ICU 2.2 */ + UCHAR_EAST_ASIAN_WIDTH=0x1004, + /** Enumerated property General_Category. + Same as u_charType, returns UCharCategory values. @stable ICU 2.2 */ + UCHAR_GENERAL_CATEGORY=0x1005, + /** Enumerated property Joining_Group. + Returns UJoiningGroup values. @stable ICU 2.2 */ + UCHAR_JOINING_GROUP=0x1006, + /** Enumerated property Joining_Type. + Returns UJoiningType values. @stable ICU 2.2 */ + UCHAR_JOINING_TYPE=0x1007, + /** Enumerated property Line_Break. + Returns ULineBreak values. @stable ICU 2.2 */ + UCHAR_LINE_BREAK=0x1008, + /** Enumerated property Numeric_Type. + Returns UNumericType values. @stable ICU 2.2 */ + UCHAR_NUMERIC_TYPE=0x1009, + /** Enumerated property Script. + Same as uscript_getScript, returns UScriptCode values. @stable ICU 2.2 */ + UCHAR_SCRIPT=0x100A, + /** Enumerated property Hangul_Syllable_Type, new in Unicode 4. + Returns UHangulSyllableType values. @stable ICU 2.6 */ + UCHAR_HANGUL_SYLLABLE_TYPE=0x100B, + /** Enumerated property NFD_Quick_Check. + Returns UNormalizationCheckResult values. @stable ICU 3.0 */ + UCHAR_NFD_QUICK_CHECK=0x100C, + /** Enumerated property NFKD_Quick_Check. + Returns UNormalizationCheckResult values. @stable ICU 3.0 */ + UCHAR_NFKD_QUICK_CHECK=0x100D, + /** Enumerated property NFC_Quick_Check. + Returns UNormalizationCheckResult values. @stable ICU 3.0 */ + UCHAR_NFC_QUICK_CHECK=0x100E, + /** Enumerated property NFKC_Quick_Check. + Returns UNormalizationCheckResult values. @stable ICU 3.0 */ + UCHAR_NFKC_QUICK_CHECK=0x100F, + /** Enumerated property Lead_Canonical_Combining_Class. + ICU-specific property for the ccc of the first code point + of the decomposition, or lccc(c)=ccc(NFD(c)[0]). + Useful for checking for canonically ordered text; + see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD . + Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. @stable ICU 3.0 */ + UCHAR_LEAD_CANONICAL_COMBINING_CLASS=0x1010, + /** Enumerated property Trail_Canonical_Combining_Class. + ICU-specific property for the ccc of the last code point + of the decomposition, or tccc(c)=ccc(NFD(c)[last]). + Useful for checking for canonically ordered text; + see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD . + Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. @stable ICU 3.0 */ + UCHAR_TRAIL_CANONICAL_COMBINING_CLASS=0x1011, + /** Enumerated property Grapheme_Cluster_Break (new in Unicode 4.1). + Used in UAX #29: Text Boundaries + (http://www.unicode.org/reports/tr29/) + Returns UGraphemeClusterBreak values. @stable ICU 3.4 */ + UCHAR_GRAPHEME_CLUSTER_BREAK=0x1012, + /** Enumerated property Sentence_Break (new in Unicode 4.1). + Used in UAX #29: Text Boundaries + (http://www.unicode.org/reports/tr29/) + Returns USentenceBreak values. @stable ICU 3.4 */ + UCHAR_SENTENCE_BREAK=0x1013, + /** Enumerated property Word_Break (new in Unicode 4.1). + Used in UAX #29: Text Boundaries + (http://www.unicode.org/reports/tr29/) + Returns UWordBreakValues values. @stable ICU 3.4 */ + UCHAR_WORD_BREAK=0x1014, + /** Enumerated property Bidi_Paired_Bracket_Type (new in Unicode 6.3). + Used in UAX #9: Unicode Bidirectional Algorithm + (http://www.unicode.org/reports/tr9/) + Returns UBidiPairedBracketType values. @stable ICU 52 */ + UCHAR_BIDI_PAIRED_BRACKET_TYPE=0x1015, + /** One more than the last constant for enumerated/integer Unicode properties. @stable ICU 2.2 */ + UCHAR_INT_LIMIT=0x1016, + + /** Bitmask property General_Category_Mask. + This is the General_Category property returned as a bit mask. + When used in u_getIntPropertyValue(c), same as U_MASK(u_charType(c)), + returns bit masks for UCharCategory values where exactly one bit is set. + When used with u_getPropertyValueName() and u_getPropertyValueEnum(), + a multi-bit mask is used for sets of categories like "Letters". + Mask values should be cast to uint32_t. + @stable ICU 2.4 */ + UCHAR_GENERAL_CATEGORY_MASK=0x2000, + /** First constant for bit-mask Unicode properties. @stable ICU 2.4 */ + UCHAR_MASK_START=UCHAR_GENERAL_CATEGORY_MASK, + /** One more than the last constant for bit-mask Unicode properties. @stable ICU 2.4 */ + UCHAR_MASK_LIMIT=0x2001, + + /** Double property Numeric_Value. + Corresponds to u_getNumericValue. @stable ICU 2.4 */ + UCHAR_NUMERIC_VALUE=0x3000, + /** First constant for double Unicode properties. @stable ICU 2.4 */ + UCHAR_DOUBLE_START=UCHAR_NUMERIC_VALUE, + /** One more than the last constant for double Unicode properties. @stable ICU 2.4 */ + UCHAR_DOUBLE_LIMIT=0x3001, + + /** String property Age. + Corresponds to u_charAge. @stable ICU 2.4 */ + UCHAR_AGE=0x4000, + /** First constant for string Unicode properties. @stable ICU 2.4 */ + UCHAR_STRING_START=UCHAR_AGE, + /** String property Bidi_Mirroring_Glyph. + Corresponds to u_charMirror. @stable ICU 2.4 */ + UCHAR_BIDI_MIRRORING_GLYPH=0x4001, + /** String property Case_Folding. + Corresponds to u_strFoldCase in ustring.h. @stable ICU 2.4 */ + UCHAR_CASE_FOLDING=0x4002, +#ifndef U_HIDE_DEPRECATED_API + /** Deprecated string property ISO_Comment. + Corresponds to u_getISOComment. @deprecated ICU 49 */ + UCHAR_ISO_COMMENT=0x4003, +#endif /* U_HIDE_DEPRECATED_API */ + /** String property Lowercase_Mapping. + Corresponds to u_strToLower in ustring.h. @stable ICU 2.4 */ + UCHAR_LOWERCASE_MAPPING=0x4004, + /** String property Name. + Corresponds to u_charName. @stable ICU 2.4 */ + UCHAR_NAME=0x4005, + /** String property Simple_Case_Folding. + Corresponds to u_foldCase. @stable ICU 2.4 */ + UCHAR_SIMPLE_CASE_FOLDING=0x4006, + /** String property Simple_Lowercase_Mapping. + Corresponds to u_tolower. @stable ICU 2.4 */ + UCHAR_SIMPLE_LOWERCASE_MAPPING=0x4007, + /** String property Simple_Titlecase_Mapping. + Corresponds to u_totitle. @stable ICU 2.4 */ + UCHAR_SIMPLE_TITLECASE_MAPPING=0x4008, + /** String property Simple_Uppercase_Mapping. + Corresponds to u_toupper. @stable ICU 2.4 */ + UCHAR_SIMPLE_UPPERCASE_MAPPING=0x4009, + /** String property Titlecase_Mapping. + Corresponds to u_strToTitle in ustring.h. @stable ICU 2.4 */ + UCHAR_TITLECASE_MAPPING=0x400A, +#ifndef U_HIDE_DEPRECATED_API + /** String property Unicode_1_Name. + This property is of little practical value. + Beginning with ICU 49, ICU APIs return an empty string for this property. + Corresponds to u_charName(U_UNICODE_10_CHAR_NAME). @deprecated ICU 49 */ + UCHAR_UNICODE_1_NAME=0x400B, +#endif /* U_HIDE_DEPRECATED_API */ + /** String property Uppercase_Mapping. + Corresponds to u_strToUpper in ustring.h. @stable ICU 2.4 */ + UCHAR_UPPERCASE_MAPPING=0x400C, + /** String property Bidi_Paired_Bracket (new in Unicode 6.3). + Corresponds to u_getBidiPairedBracket. @stable ICU 52 */ + UCHAR_BIDI_PAIRED_BRACKET=0x400D, + /** One more than the last constant for string Unicode properties. @stable ICU 2.4 */ + UCHAR_STRING_LIMIT=0x400E, + + /** Miscellaneous property Script_Extensions (new in Unicode 6.0). + Some characters are commonly used in multiple scripts. + For more information, see UAX #24: http://www.unicode.org/reports/tr24/. + Corresponds to uscript_hasScript and uscript_getScriptExtensions in uscript.h. + @stable ICU 4.6 */ + UCHAR_SCRIPT_EXTENSIONS=0x7000, + /** First constant for Unicode properties with unusual value types. @stable ICU 4.6 */ + UCHAR_OTHER_PROPERTY_START=UCHAR_SCRIPT_EXTENSIONS, + /** One more than the last constant for Unicode properties with unusual value types. + * @stable ICU 4.6 */ + UCHAR_OTHER_PROPERTY_LIMIT=0x7001, + /** Represents a nonexistent or invalid property or property value. @stable ICU 2.4 */ + UCHAR_INVALID_CODE = -1 +} UProperty; + +/** + * Data for enumerated Unicode general category types. + * See http://www.unicode.org/Public/UNIDATA/UnicodeData.html . + * @stable ICU 2.0 + */ +typedef enum UCharCategory +{ + /* + * Note: UCharCategory constants and their API comments are parsed by preparseucd.py. + * It matches pairs of lines like + * / ** <Unicode 2-letter General_Category value> comment... * / + * U_<[A-Z_]+> = <integer>, + */ + + /** Non-category for unassigned and non-character code points. @stable ICU 2.0 */ + U_UNASSIGNED = 0, + /** Cn "Other, Not Assigned (no characters in [UnicodeData.txt] have this property)" (same as U_UNASSIGNED!) @stable ICU 2.0 */ + U_GENERAL_OTHER_TYPES = 0, + /** Lu @stable ICU 2.0 */ + U_UPPERCASE_LETTER = 1, + /** Ll @stable ICU 2.0 */ + U_LOWERCASE_LETTER = 2, + /** Lt @stable ICU 2.0 */ + U_TITLECASE_LETTER = 3, + /** Lm @stable ICU 2.0 */ + U_MODIFIER_LETTER = 4, + /** Lo @stable ICU 2.0 */ + U_OTHER_LETTER = 5, + /** Mn @stable ICU 2.0 */ + U_NON_SPACING_MARK = 6, + /** Me @stable ICU 2.0 */ + U_ENCLOSING_MARK = 7, + /** Mc @stable ICU 2.0 */ + U_COMBINING_SPACING_MARK = 8, + /** Nd @stable ICU 2.0 */ + U_DECIMAL_DIGIT_NUMBER = 9, + /** Nl @stable ICU 2.0 */ + U_LETTER_NUMBER = 10, + /** No @stable ICU 2.0 */ + U_OTHER_NUMBER = 11, + /** Zs @stable ICU 2.0 */ + U_SPACE_SEPARATOR = 12, + /** Zl @stable ICU 2.0 */ + U_LINE_SEPARATOR = 13, + /** Zp @stable ICU 2.0 */ + U_PARAGRAPH_SEPARATOR = 14, + /** Cc @stable ICU 2.0 */ + U_CONTROL_CHAR = 15, + /** Cf @stable ICU 2.0 */ + U_FORMAT_CHAR = 16, + /** Co @stable ICU 2.0 */ + U_PRIVATE_USE_CHAR = 17, + /** Cs @stable ICU 2.0 */ + U_SURROGATE = 18, + /** Pd @stable ICU 2.0 */ + U_DASH_PUNCTUATION = 19, + /** Ps @stable ICU 2.0 */ + U_START_PUNCTUATION = 20, + /** Pe @stable ICU 2.0 */ + U_END_PUNCTUATION = 21, + /** Pc @stable ICU 2.0 */ + U_CONNECTOR_PUNCTUATION = 22, + /** Po @stable ICU 2.0 */ + U_OTHER_PUNCTUATION = 23, + /** Sm @stable ICU 2.0 */ + U_MATH_SYMBOL = 24, + /** Sc @stable ICU 2.0 */ + U_CURRENCY_SYMBOL = 25, + /** Sk @stable ICU 2.0 */ + U_MODIFIER_SYMBOL = 26, + /** So @stable ICU 2.0 */ + U_OTHER_SYMBOL = 27, + /** Pi @stable ICU 2.0 */ + U_INITIAL_PUNCTUATION = 28, + /** Pf @stable ICU 2.0 */ + U_FINAL_PUNCTUATION = 29, + /** One higher than the last enum UCharCategory constant. @stable ICU 2.0 */ + U_CHAR_CATEGORY_COUNT +} UCharCategory; + +/** + * U_GC_XX_MASK constants are bit flags corresponding to Unicode + * general category values. + * For each category, the nth bit is set if the numeric value of the + * corresponding UCharCategory constant is n. + * + * There are also some U_GC_Y_MASK constants for groups of general categories + * like L for all letter categories. + * + * @see u_charType + * @see U_GET_GC_MASK + * @see UCharCategory + * @stable ICU 2.1 + */ +#define U_GC_CN_MASK U_MASK(U_GENERAL_OTHER_TYPES) + +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_LU_MASK U_MASK(U_UPPERCASE_LETTER) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_LL_MASK U_MASK(U_LOWERCASE_LETTER) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_LT_MASK U_MASK(U_TITLECASE_LETTER) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_LM_MASK U_MASK(U_MODIFIER_LETTER) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_LO_MASK U_MASK(U_OTHER_LETTER) + +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_MN_MASK U_MASK(U_NON_SPACING_MARK) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_ME_MASK U_MASK(U_ENCLOSING_MARK) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_MC_MASK U_MASK(U_COMBINING_SPACING_MARK) + +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_ND_MASK U_MASK(U_DECIMAL_DIGIT_NUMBER) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_NL_MASK U_MASK(U_LETTER_NUMBER) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_NO_MASK U_MASK(U_OTHER_NUMBER) + +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_ZS_MASK U_MASK(U_SPACE_SEPARATOR) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_ZL_MASK U_MASK(U_LINE_SEPARATOR) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_ZP_MASK U_MASK(U_PARAGRAPH_SEPARATOR) + +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_CC_MASK U_MASK(U_CONTROL_CHAR) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_CF_MASK U_MASK(U_FORMAT_CHAR) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_CO_MASK U_MASK(U_PRIVATE_USE_CHAR) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_CS_MASK U_MASK(U_SURROGATE) + +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_PD_MASK U_MASK(U_DASH_PUNCTUATION) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_PS_MASK U_MASK(U_START_PUNCTUATION) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_PE_MASK U_MASK(U_END_PUNCTUATION) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_PC_MASK U_MASK(U_CONNECTOR_PUNCTUATION) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_PO_MASK U_MASK(U_OTHER_PUNCTUATION) + +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_SM_MASK U_MASK(U_MATH_SYMBOL) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_SC_MASK U_MASK(U_CURRENCY_SYMBOL) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_SK_MASK U_MASK(U_MODIFIER_SYMBOL) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_SO_MASK U_MASK(U_OTHER_SYMBOL) + +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_PI_MASK U_MASK(U_INITIAL_PUNCTUATION) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_PF_MASK U_MASK(U_FINAL_PUNCTUATION) + + +/** Mask constant for multiple UCharCategory bits (L Letters). @stable ICU 2.1 */ +#define U_GC_L_MASK \ + (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK|U_GC_LM_MASK|U_GC_LO_MASK) + +/** Mask constant for multiple UCharCategory bits (LC Cased Letters). @stable ICU 2.1 */ +#define U_GC_LC_MASK \ + (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK) + +/** Mask constant for multiple UCharCategory bits (M Marks). @stable ICU 2.1 */ +#define U_GC_M_MASK (U_GC_MN_MASK|U_GC_ME_MASK|U_GC_MC_MASK) + +/** Mask constant for multiple UCharCategory bits (N Numbers). @stable ICU 2.1 */ +#define U_GC_N_MASK (U_GC_ND_MASK|U_GC_NL_MASK|U_GC_NO_MASK) + +/** Mask constant for multiple UCharCategory bits (Z Separators). @stable ICU 2.1 */ +#define U_GC_Z_MASK (U_GC_ZS_MASK|U_GC_ZL_MASK|U_GC_ZP_MASK) + +/** Mask constant for multiple UCharCategory bits (C Others). @stable ICU 2.1 */ +#define U_GC_C_MASK \ + (U_GC_CN_MASK|U_GC_CC_MASK|U_GC_CF_MASK|U_GC_CO_MASK|U_GC_CS_MASK) + +/** Mask constant for multiple UCharCategory bits (P Punctuation). @stable ICU 2.1 */ +#define U_GC_P_MASK \ + (U_GC_PD_MASK|U_GC_PS_MASK|U_GC_PE_MASK|U_GC_PC_MASK|U_GC_PO_MASK| \ + U_GC_PI_MASK|U_GC_PF_MASK) + +/** Mask constant for multiple UCharCategory bits (S Symbols). @stable ICU 2.1 */ +#define U_GC_S_MASK (U_GC_SM_MASK|U_GC_SC_MASK|U_GC_SK_MASK|U_GC_SO_MASK) + +/** + * This specifies the language directional property of a character set. + * @stable ICU 2.0 + */ +typedef enum UCharDirection { + /* + * Note: UCharDirection constants and their API comments are parsed by preparseucd.py. + * It matches pairs of lines like + * / ** <Unicode 1..3-letter Bidi_Class value> comment... * / + * U_<[A-Z_]+> = <integer>, + */ + + /** L @stable ICU 2.0 */ + U_LEFT_TO_RIGHT = 0, + /** R @stable ICU 2.0 */ + U_RIGHT_TO_LEFT = 1, + /** EN @stable ICU 2.0 */ + U_EUROPEAN_NUMBER = 2, + /** ES @stable ICU 2.0 */ + U_EUROPEAN_NUMBER_SEPARATOR = 3, + /** ET @stable ICU 2.0 */ + U_EUROPEAN_NUMBER_TERMINATOR = 4, + /** AN @stable ICU 2.0 */ + U_ARABIC_NUMBER = 5, + /** CS @stable ICU 2.0 */ + U_COMMON_NUMBER_SEPARATOR = 6, + /** B @stable ICU 2.0 */ + U_BLOCK_SEPARATOR = 7, + /** S @stable ICU 2.0 */ + U_SEGMENT_SEPARATOR = 8, + /** WS @stable ICU 2.0 */ + U_WHITE_SPACE_NEUTRAL = 9, + /** ON @stable ICU 2.0 */ + U_OTHER_NEUTRAL = 10, + /** LRE @stable ICU 2.0 */ + U_LEFT_TO_RIGHT_EMBEDDING = 11, + /** LRO @stable ICU 2.0 */ + U_LEFT_TO_RIGHT_OVERRIDE = 12, + /** AL @stable ICU 2.0 */ + U_RIGHT_TO_LEFT_ARABIC = 13, + /** RLE @stable ICU 2.0 */ + U_RIGHT_TO_LEFT_EMBEDDING = 14, + /** RLO @stable ICU 2.0 */ + U_RIGHT_TO_LEFT_OVERRIDE = 15, + /** PDF @stable ICU 2.0 */ + U_POP_DIRECTIONAL_FORMAT = 16, + /** NSM @stable ICU 2.0 */ + U_DIR_NON_SPACING_MARK = 17, + /** BN @stable ICU 2.0 */ + U_BOUNDARY_NEUTRAL = 18, + /** FSI @stable ICU 52 */ + U_FIRST_STRONG_ISOLATE = 19, + /** LRI @stable ICU 52 */ + U_LEFT_TO_RIGHT_ISOLATE = 20, + /** RLI @stable ICU 52 */ + U_RIGHT_TO_LEFT_ISOLATE = 21, + /** PDI @stable ICU 52 */ + U_POP_DIRECTIONAL_ISOLATE = 22, + /** @stable ICU 2.0 */ + U_CHAR_DIRECTION_COUNT +} UCharDirection; + +/** + * Bidi Paired Bracket Type constants. + * + * @see UCHAR_BIDI_PAIRED_BRACKET_TYPE + * @stable ICU 52 + */ +typedef enum UBidiPairedBracketType { + /* + * Note: UBidiPairedBracketType constants are parsed by preparseucd.py. + * It matches lines like + * U_BPT_<Unicode Bidi_Paired_Bracket_Type value name> + */ + + /** Not a paired bracket. @stable ICU 52 */ + U_BPT_NONE, + /** Open paired bracket. @stable ICU 52 */ + U_BPT_OPEN, + /** Close paired bracket. @stable ICU 52 */ + U_BPT_CLOSE, + /** @stable ICU 52 */ + U_BPT_COUNT /* 3 */ +} UBidiPairedBracketType; + +/** + * Constants for Unicode blocks, see the Unicode Data file Blocks.txt + * @stable ICU 2.0 + */ +enum UBlockCode { + /* + * Note: UBlockCode constants are parsed by preparseucd.py. + * It matches lines like + * UBLOCK_<Unicode Block value name> = <integer>, + */ + + /** New No_Block value in Unicode 4. @stable ICU 2.6 */ + UBLOCK_NO_BLOCK = 0, /*[none]*/ /* Special range indicating No_Block */ + + /** @stable ICU 2.0 */ + UBLOCK_BASIC_LATIN = 1, /*[0000]*/ + + /** @stable ICU 2.0 */ + UBLOCK_LATIN_1_SUPPLEMENT=2, /*[0080]*/ + + /** @stable ICU 2.0 */ + UBLOCK_LATIN_EXTENDED_A =3, /*[0100]*/ + + /** @stable ICU 2.0 */ + UBLOCK_LATIN_EXTENDED_B =4, /*[0180]*/ + + /** @stable ICU 2.0 */ + UBLOCK_IPA_EXTENSIONS =5, /*[0250]*/ + + /** @stable ICU 2.0 */ + UBLOCK_SPACING_MODIFIER_LETTERS =6, /*[02B0]*/ + + /** @stable ICU 2.0 */ + UBLOCK_COMBINING_DIACRITICAL_MARKS =7, /*[0300]*/ + + /** + * Unicode 3.2 renames this block to "Greek and Coptic". + * @stable ICU 2.0 + */ + UBLOCK_GREEK =8, /*[0370]*/ + + /** @stable ICU 2.0 */ + UBLOCK_CYRILLIC =9, /*[0400]*/ + + /** @stable ICU 2.0 */ + UBLOCK_ARMENIAN =10, /*[0530]*/ + + /** @stable ICU 2.0 */ + UBLOCK_HEBREW =11, /*[0590]*/ + + /** @stable ICU 2.0 */ + UBLOCK_ARABIC =12, /*[0600]*/ + + /** @stable ICU 2.0 */ + UBLOCK_SYRIAC =13, /*[0700]*/ + + /** @stable ICU 2.0 */ + UBLOCK_THAANA =14, /*[0780]*/ + + /** @stable ICU 2.0 */ + UBLOCK_DEVANAGARI =15, /*[0900]*/ + + /** @stable ICU 2.0 */ + UBLOCK_BENGALI =16, /*[0980]*/ + + /** @stable ICU 2.0 */ + UBLOCK_GURMUKHI =17, /*[0A00]*/ + + /** @stable ICU 2.0 */ + UBLOCK_GUJARATI =18, /*[0A80]*/ + + /** @stable ICU 2.0 */ + UBLOCK_ORIYA =19, /*[0B00]*/ + + /** @stable ICU 2.0 */ + UBLOCK_TAMIL =20, /*[0B80]*/ + + /** @stable ICU 2.0 */ + UBLOCK_TELUGU =21, /*[0C00]*/ + + /** @stable ICU 2.0 */ + UBLOCK_KANNADA =22, /*[0C80]*/ + + /** @stable ICU 2.0 */ + UBLOCK_MALAYALAM =23, /*[0D00]*/ + + /** @stable ICU 2.0 */ + UBLOCK_SINHALA =24, /*[0D80]*/ + + /** @stable ICU 2.0 */ + UBLOCK_THAI =25, /*[0E00]*/ + + /** @stable ICU 2.0 */ + UBLOCK_LAO =26, /*[0E80]*/ + + /** @stable ICU 2.0 */ + UBLOCK_TIBETAN =27, /*[0F00]*/ + + /** @stable ICU 2.0 */ + UBLOCK_MYANMAR =28, /*[1000]*/ + + /** @stable ICU 2.0 */ + UBLOCK_GEORGIAN =29, /*[10A0]*/ + + /** @stable ICU 2.0 */ + UBLOCK_HANGUL_JAMO =30, /*[1100]*/ + + /** @stable ICU 2.0 */ + UBLOCK_ETHIOPIC =31, /*[1200]*/ + + /** @stable ICU 2.0 */ + UBLOCK_CHEROKEE =32, /*[13A0]*/ + + /** @stable ICU 2.0 */ + UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =33, /*[1400]*/ + + /** @stable ICU 2.0 */ + UBLOCK_OGHAM =34, /*[1680]*/ + + /** @stable ICU 2.0 */ + UBLOCK_RUNIC =35, /*[16A0]*/ + + /** @stable ICU 2.0 */ + UBLOCK_KHMER =36, /*[1780]*/ + + /** @stable ICU 2.0 */ + UBLOCK_MONGOLIAN =37, /*[1800]*/ + + /** @stable ICU 2.0 */ + UBLOCK_LATIN_EXTENDED_ADDITIONAL =38, /*[1E00]*/ + + /** @stable ICU 2.0 */ + UBLOCK_GREEK_EXTENDED =39, /*[1F00]*/ + + /** @stable ICU 2.0 */ + UBLOCK_GENERAL_PUNCTUATION =40, /*[2000]*/ + + /** @stable ICU 2.0 */ + UBLOCK_SUPERSCRIPTS_AND_SUBSCRIPTS =41, /*[2070]*/ + + /** @stable ICU 2.0 */ + UBLOCK_CURRENCY_SYMBOLS =42, /*[20A0]*/ + + /** + * Unicode 3.2 renames this block to "Combining Diacritical Marks for Symbols". + * @stable ICU 2.0 + */ + UBLOCK_COMBINING_MARKS_FOR_SYMBOLS =43, /*[20D0]*/ + + /** @stable ICU 2.0 */ + UBLOCK_LETTERLIKE_SYMBOLS =44, /*[2100]*/ + + /** @stable ICU 2.0 */ + UBLOCK_NUMBER_FORMS =45, /*[2150]*/ + + /** @stable ICU 2.0 */ + UBLOCK_ARROWS =46, /*[2190]*/ + + /** @stable ICU 2.0 */ + UBLOCK_MATHEMATICAL_OPERATORS =47, /*[2200]*/ + + /** @stable ICU 2.0 */ + UBLOCK_MISCELLANEOUS_TECHNICAL =48, /*[2300]*/ + + /** @stable ICU 2.0 */ + UBLOCK_CONTROL_PICTURES =49, /*[2400]*/ + + /** @stable ICU 2.0 */ + UBLOCK_OPTICAL_CHARACTER_RECOGNITION =50, /*[2440]*/ + + /** @stable ICU 2.0 */ + UBLOCK_ENCLOSED_ALPHANUMERICS =51, /*[2460]*/ + + /** @stable ICU 2.0 */ + UBLOCK_BOX_DRAWING =52, /*[2500]*/ + + /** @stable ICU 2.0 */ + UBLOCK_BLOCK_ELEMENTS =53, /*[2580]*/ + + /** @stable ICU 2.0 */ + UBLOCK_GEOMETRIC_SHAPES =54, /*[25A0]*/ + + /** @stable ICU 2.0 */ + UBLOCK_MISCELLANEOUS_SYMBOLS =55, /*[2600]*/ + + /** @stable ICU 2.0 */ + UBLOCK_DINGBATS =56, /*[2700]*/ + + /** @stable ICU 2.0 */ + UBLOCK_BRAILLE_PATTERNS =57, /*[2800]*/ + + /** @stable ICU 2.0 */ + UBLOCK_CJK_RADICALS_SUPPLEMENT =58, /*[2E80]*/ + + /** @stable ICU 2.0 */ + UBLOCK_KANGXI_RADICALS =59, /*[2F00]*/ + + /** @stable ICU 2.0 */ + UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS =60, /*[2FF0]*/ + + /** @stable ICU 2.0 */ + UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION =61, /*[3000]*/ + + /** @stable ICU 2.0 */ + UBLOCK_HIRAGANA =62, /*[3040]*/ + + /** @stable ICU 2.0 */ + UBLOCK_KATAKANA =63, /*[30A0]*/ + + /** @stable ICU 2.0 */ + UBLOCK_BOPOMOFO =64, /*[3100]*/ + + /** @stable ICU 2.0 */ + UBLOCK_HANGUL_COMPATIBILITY_JAMO =65, /*[3130]*/ + + /** @stable ICU 2.0 */ + UBLOCK_KANBUN =66, /*[3190]*/ + + /** @stable ICU 2.0 */ + UBLOCK_BOPOMOFO_EXTENDED =67, /*[31A0]*/ + + /** @stable ICU 2.0 */ + UBLOCK_ENCLOSED_CJK_LETTERS_AND_MONTHS =68, /*[3200]*/ + + /** @stable ICU 2.0 */ + UBLOCK_CJK_COMPATIBILITY =69, /*[3300]*/ + + /** @stable ICU 2.0 */ + UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =70, /*[3400]*/ + + /** @stable ICU 2.0 */ + UBLOCK_CJK_UNIFIED_IDEOGRAPHS =71, /*[4E00]*/ + + /** @stable ICU 2.0 */ + UBLOCK_YI_SYLLABLES =72, /*[A000]*/ + + /** @stable ICU 2.0 */ + UBLOCK_YI_RADICALS =73, /*[A490]*/ + + /** @stable ICU 2.0 */ + UBLOCK_HANGUL_SYLLABLES =74, /*[AC00]*/ + + /** @stable ICU 2.0 */ + UBLOCK_HIGH_SURROGATES =75, /*[D800]*/ + + /** @stable ICU 2.0 */ + UBLOCK_HIGH_PRIVATE_USE_SURROGATES =76, /*[DB80]*/ + + /** @stable ICU 2.0 */ + UBLOCK_LOW_SURROGATES =77, /*[DC00]*/ + + /** + * Same as UBLOCK_PRIVATE_USE. + * Until Unicode 3.1.1, the corresponding block name was "Private Use", + * and multiple code point ranges had this block. + * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and + * adds separate blocks for the supplementary PUAs. + * + * @stable ICU 2.0 + */ + UBLOCK_PRIVATE_USE_AREA =78, /*[E000]*/ + /** + * Same as UBLOCK_PRIVATE_USE_AREA. + * Until Unicode 3.1.1, the corresponding block name was "Private Use", + * and multiple code point ranges had this block. + * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and + * adds separate blocks for the supplementary PUAs. + * + * @stable ICU 2.0 + */ + UBLOCK_PRIVATE_USE = UBLOCK_PRIVATE_USE_AREA, + + /** @stable ICU 2.0 */ + UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS =79, /*[F900]*/ + + /** @stable ICU 2.0 */ + UBLOCK_ALPHABETIC_PRESENTATION_FORMS =80, /*[FB00]*/ + + /** @stable ICU 2.0 */ + UBLOCK_ARABIC_PRESENTATION_FORMS_A =81, /*[FB50]*/ + + /** @stable ICU 2.0 */ + UBLOCK_COMBINING_HALF_MARKS =82, /*[FE20]*/ + + /** @stable ICU 2.0 */ + UBLOCK_CJK_COMPATIBILITY_FORMS =83, /*[FE30]*/ + + /** @stable ICU 2.0 */ + UBLOCK_SMALL_FORM_VARIANTS =84, /*[FE50]*/ + + /** @stable ICU 2.0 */ + UBLOCK_ARABIC_PRESENTATION_FORMS_B =85, /*[FE70]*/ + + /** @stable ICU 2.0 */ + UBLOCK_SPECIALS =86, /*[FFF0]*/ + + /** @stable ICU 2.0 */ + UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS =87, /*[FF00]*/ + + /* New blocks in Unicode 3.1 */ + + /** @stable ICU 2.0 */ + UBLOCK_OLD_ITALIC = 88, /*[10300]*/ + /** @stable ICU 2.0 */ + UBLOCK_GOTHIC = 89, /*[10330]*/ + /** @stable ICU 2.0 */ + UBLOCK_DESERET = 90, /*[10400]*/ + /** @stable ICU 2.0 */ + UBLOCK_BYZANTINE_MUSICAL_SYMBOLS = 91, /*[1D000]*/ + /** @stable ICU 2.0 */ + UBLOCK_MUSICAL_SYMBOLS = 92, /*[1D100]*/ + /** @stable ICU 2.0 */ + UBLOCK_MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 93, /*[1D400]*/ + /** @stable ICU 2.0 */ + UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 94, /*[20000]*/ + /** @stable ICU 2.0 */ + UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 95, /*[2F800]*/ + /** @stable ICU 2.0 */ + UBLOCK_TAGS = 96, /*[E0000]*/ + + /* New blocks in Unicode 3.2 */ + + /** @stable ICU 3.0 */ + UBLOCK_CYRILLIC_SUPPLEMENT = 97, /*[0500]*/ + /** + * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". + * @stable ICU 2.2 + */ + UBLOCK_CYRILLIC_SUPPLEMENTARY = UBLOCK_CYRILLIC_SUPPLEMENT, + /** @stable ICU 2.2 */ + UBLOCK_TAGALOG = 98, /*[1700]*/ + /** @stable ICU 2.2 */ + UBLOCK_HANUNOO = 99, /*[1720]*/ + /** @stable ICU 2.2 */ + UBLOCK_BUHID = 100, /*[1740]*/ + /** @stable ICU 2.2 */ + UBLOCK_TAGBANWA = 101, /*[1760]*/ + /** @stable ICU 2.2 */ + UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 102, /*[27C0]*/ + /** @stable ICU 2.2 */ + UBLOCK_SUPPLEMENTAL_ARROWS_A = 103, /*[27F0]*/ + /** @stable ICU 2.2 */ + UBLOCK_SUPPLEMENTAL_ARROWS_B = 104, /*[2900]*/ + /** @stable ICU 2.2 */ + UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = 105, /*[2980]*/ + /** @stable ICU 2.2 */ + UBLOCK_SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 106, /*[2A00]*/ + /** @stable ICU 2.2 */ + UBLOCK_KATAKANA_PHONETIC_EXTENSIONS = 107, /*[31F0]*/ + /** @stable ICU 2.2 */ + UBLOCK_VARIATION_SELECTORS = 108, /*[FE00]*/ + /** @stable ICU 2.2 */ + UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_A = 109, /*[F0000]*/ + /** @stable ICU 2.2 */ + UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B = 110, /*[100000]*/ + + /* New blocks in Unicode 4 */ + + /** @stable ICU 2.6 */ + UBLOCK_LIMBU = 111, /*[1900]*/ + /** @stable ICU 2.6 */ + UBLOCK_TAI_LE = 112, /*[1950]*/ + /** @stable ICU 2.6 */ + UBLOCK_KHMER_SYMBOLS = 113, /*[19E0]*/ + /** @stable ICU 2.6 */ + UBLOCK_PHONETIC_EXTENSIONS = 114, /*[1D00]*/ + /** @stable ICU 2.6 */ + UBLOCK_MISCELLANEOUS_SYMBOLS_AND_ARROWS = 115, /*[2B00]*/ + /** @stable ICU 2.6 */ + UBLOCK_YIJING_HEXAGRAM_SYMBOLS = 116, /*[4DC0]*/ + /** @stable ICU 2.6 */ + UBLOCK_LINEAR_B_SYLLABARY = 117, /*[10000]*/ + /** @stable ICU 2.6 */ + UBLOCK_LINEAR_B_IDEOGRAMS = 118, /*[10080]*/ + /** @stable ICU 2.6 */ + UBLOCK_AEGEAN_NUMBERS = 119, /*[10100]*/ + /** @stable ICU 2.6 */ + UBLOCK_UGARITIC = 120, /*[10380]*/ + /** @stable ICU 2.6 */ + UBLOCK_SHAVIAN = 121, /*[10450]*/ + /** @stable ICU 2.6 */ + UBLOCK_OSMANYA = 122, /*[10480]*/ + /** @stable ICU 2.6 */ + UBLOCK_CYPRIOT_SYLLABARY = 123, /*[10800]*/ + /** @stable ICU 2.6 */ + UBLOCK_TAI_XUAN_JING_SYMBOLS = 124, /*[1D300]*/ + /** @stable ICU 2.6 */ + UBLOCK_VARIATION_SELECTORS_SUPPLEMENT = 125, /*[E0100]*/ + + /* New blocks in Unicode 4.1 */ + + /** @stable ICU 3.4 */ + UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION = 126, /*[1D200]*/ + /** @stable ICU 3.4 */ + UBLOCK_ANCIENT_GREEK_NUMBERS = 127, /*[10140]*/ + /** @stable ICU 3.4 */ + UBLOCK_ARABIC_SUPPLEMENT = 128, /*[0750]*/ + /** @stable ICU 3.4 */ + UBLOCK_BUGINESE = 129, /*[1A00]*/ + /** @stable ICU 3.4 */ + UBLOCK_CJK_STROKES = 130, /*[31C0]*/ + /** @stable ICU 3.4 */ + UBLOCK_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 131, /*[1DC0]*/ + /** @stable ICU 3.4 */ + UBLOCK_COPTIC = 132, /*[2C80]*/ + /** @stable ICU 3.4 */ + UBLOCK_ETHIOPIC_EXTENDED = 133, /*[2D80]*/ + /** @stable ICU 3.4 */ + UBLOCK_ETHIOPIC_SUPPLEMENT = 134, /*[1380]*/ + /** @stable ICU 3.4 */ + UBLOCK_GEORGIAN_SUPPLEMENT = 135, /*[2D00]*/ + /** @stable ICU 3.4 */ + UBLOCK_GLAGOLITIC = 136, /*[2C00]*/ + /** @stable ICU 3.4 */ + UBLOCK_KHAROSHTHI = 137, /*[10A00]*/ + /** @stable ICU 3.4 */ + UBLOCK_MODIFIER_TONE_LETTERS = 138, /*[A700]*/ + /** @stable ICU 3.4 */ + UBLOCK_NEW_TAI_LUE = 139, /*[1980]*/ + /** @stable ICU 3.4 */ + UBLOCK_OLD_PERSIAN = 140, /*[103A0]*/ + /** @stable ICU 3.4 */ + UBLOCK_PHONETIC_EXTENSIONS_SUPPLEMENT = 141, /*[1D80]*/ + /** @stable ICU 3.4 */ + UBLOCK_SUPPLEMENTAL_PUNCTUATION = 142, /*[2E00]*/ + /** @stable ICU 3.4 */ + UBLOCK_SYLOTI_NAGRI = 143, /*[A800]*/ + /** @stable ICU 3.4 */ + UBLOCK_TIFINAGH = 144, /*[2D30]*/ + /** @stable ICU 3.4 */ + UBLOCK_VERTICAL_FORMS = 145, /*[FE10]*/ + + /* New blocks in Unicode 5.0 */ + + /** @stable ICU 3.6 */ + UBLOCK_NKO = 146, /*[07C0]*/ + /** @stable ICU 3.6 */ + UBLOCK_BALINESE = 147, /*[1B00]*/ + /** @stable ICU 3.6 */ + UBLOCK_LATIN_EXTENDED_C = 148, /*[2C60]*/ + /** @stable ICU 3.6 */ + UBLOCK_LATIN_EXTENDED_D = 149, /*[A720]*/ + /** @stable ICU 3.6 */ + UBLOCK_PHAGS_PA = 150, /*[A840]*/ + /** @stable ICU 3.6 */ + UBLOCK_PHOENICIAN = 151, /*[10900]*/ + /** @stable ICU 3.6 */ + UBLOCK_CUNEIFORM = 152, /*[12000]*/ + /** @stable ICU 3.6 */ + UBLOCK_CUNEIFORM_NUMBERS_AND_PUNCTUATION = 153, /*[12400]*/ + /** @stable ICU 3.6 */ + UBLOCK_COUNTING_ROD_NUMERALS = 154, /*[1D360]*/ + + /* New blocks in Unicode 5.1 */ + + /** @stable ICU 4.0 */ + UBLOCK_SUNDANESE = 155, /*[1B80]*/ + /** @stable ICU 4.0 */ + UBLOCK_LEPCHA = 156, /*[1C00]*/ + /** @stable ICU 4.0 */ + UBLOCK_OL_CHIKI = 157, /*[1C50]*/ + /** @stable ICU 4.0 */ + UBLOCK_CYRILLIC_EXTENDED_A = 158, /*[2DE0]*/ + /** @stable ICU 4.0 */ + UBLOCK_VAI = 159, /*[A500]*/ + /** @stable ICU 4.0 */ + UBLOCK_CYRILLIC_EXTENDED_B = 160, /*[A640]*/ + /** @stable ICU 4.0 */ + UBLOCK_SAURASHTRA = 161, /*[A880]*/ + /** @stable ICU 4.0 */ + UBLOCK_KAYAH_LI = 162, /*[A900]*/ + /** @stable ICU 4.0 */ + UBLOCK_REJANG = 163, /*[A930]*/ + /** @stable ICU 4.0 */ + UBLOCK_CHAM = 164, /*[AA00]*/ + /** @stable ICU 4.0 */ + UBLOCK_ANCIENT_SYMBOLS = 165, /*[10190]*/ + /** @stable ICU 4.0 */ + UBLOCK_PHAISTOS_DISC = 166, /*[101D0]*/ + /** @stable ICU 4.0 */ + UBLOCK_LYCIAN = 167, /*[10280]*/ + /** @stable ICU 4.0 */ + UBLOCK_CARIAN = 168, /*[102A0]*/ + /** @stable ICU 4.0 */ + UBLOCK_LYDIAN = 169, /*[10920]*/ + /** @stable ICU 4.0 */ + UBLOCK_MAHJONG_TILES = 170, /*[1F000]*/ + /** @stable ICU 4.0 */ + UBLOCK_DOMINO_TILES = 171, /*[1F030]*/ + + /* New blocks in Unicode 5.2 */ + + /** @stable ICU 4.4 */ + UBLOCK_SAMARITAN = 172, /*[0800]*/ + /** @stable ICU 4.4 */ + UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 173, /*[18B0]*/ + /** @stable ICU 4.4 */ + UBLOCK_TAI_THAM = 174, /*[1A20]*/ + /** @stable ICU 4.4 */ + UBLOCK_VEDIC_EXTENSIONS = 175, /*[1CD0]*/ + /** @stable ICU 4.4 */ + UBLOCK_LISU = 176, /*[A4D0]*/ + /** @stable ICU 4.4 */ + UBLOCK_BAMUM = 177, /*[A6A0]*/ + /** @stable ICU 4.4 */ + UBLOCK_COMMON_INDIC_NUMBER_FORMS = 178, /*[A830]*/ + /** @stable ICU 4.4 */ + UBLOCK_DEVANAGARI_EXTENDED = 179, /*[A8E0]*/ + /** @stable ICU 4.4 */ + UBLOCK_HANGUL_JAMO_EXTENDED_A = 180, /*[A960]*/ + /** @stable ICU 4.4 */ + UBLOCK_JAVANESE = 181, /*[A980]*/ + /** @stable ICU 4.4 */ + UBLOCK_MYANMAR_EXTENDED_A = 182, /*[AA60]*/ + /** @stable ICU 4.4 */ + UBLOCK_TAI_VIET = 183, /*[AA80]*/ + /** @stable ICU 4.4 */ + UBLOCK_MEETEI_MAYEK = 184, /*[ABC0]*/ + /** @stable ICU 4.4 */ + UBLOCK_HANGUL_JAMO_EXTENDED_B = 185, /*[D7B0]*/ + /** @stable ICU 4.4 */ + UBLOCK_IMPERIAL_ARAMAIC = 186, /*[10840]*/ + /** @stable ICU 4.4 */ + UBLOCK_OLD_SOUTH_ARABIAN = 187, /*[10A60]*/ + /** @stable ICU 4.4 */ + UBLOCK_AVESTAN = 188, /*[10B00]*/ + /** @stable ICU 4.4 */ + UBLOCK_INSCRIPTIONAL_PARTHIAN = 189, /*[10B40]*/ + /** @stable ICU 4.4 */ + UBLOCK_INSCRIPTIONAL_PAHLAVI = 190, /*[10B60]*/ + /** @stable ICU 4.4 */ + UBLOCK_OLD_TURKIC = 191, /*[10C00]*/ + /** @stable ICU 4.4 */ + UBLOCK_RUMI_NUMERAL_SYMBOLS = 192, /*[10E60]*/ + /** @stable ICU 4.4 */ + UBLOCK_KAITHI = 193, /*[11080]*/ + /** @stable ICU 4.4 */ + UBLOCK_EGYPTIAN_HIEROGLYPHS = 194, /*[13000]*/ + /** @stable ICU 4.4 */ + UBLOCK_ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 195, /*[1F100]*/ + /** @stable ICU 4.4 */ + UBLOCK_ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 196, /*[1F200]*/ + /** @stable ICU 4.4 */ + UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 197, /*[2A700]*/ + + /* New blocks in Unicode 6.0 */ + + /** @stable ICU 4.6 */ + UBLOCK_MANDAIC = 198, /*[0840]*/ + /** @stable ICU 4.6 */ + UBLOCK_BATAK = 199, /*[1BC0]*/ + /** @stable ICU 4.6 */ + UBLOCK_ETHIOPIC_EXTENDED_A = 200, /*[AB00]*/ + /** @stable ICU 4.6 */ + UBLOCK_BRAHMI = 201, /*[11000]*/ + /** @stable ICU 4.6 */ + UBLOCK_BAMUM_SUPPLEMENT = 202, /*[16800]*/ + /** @stable ICU 4.6 */ + UBLOCK_KANA_SUPPLEMENT = 203, /*[1B000]*/ + /** @stable ICU 4.6 */ + UBLOCK_PLAYING_CARDS = 204, /*[1F0A0]*/ + /** @stable ICU 4.6 */ + UBLOCK_MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 205, /*[1F300]*/ + /** @stable ICU 4.6 */ + UBLOCK_EMOTICONS = 206, /*[1F600]*/ + /** @stable ICU 4.6 */ + UBLOCK_TRANSPORT_AND_MAP_SYMBOLS = 207, /*[1F680]*/ + /** @stable ICU 4.6 */ + UBLOCK_ALCHEMICAL_SYMBOLS = 208, /*[1F700]*/ + /** @stable ICU 4.6 */ + UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 209, /*[2B740]*/ + + /* New blocks in Unicode 6.1 */ + + /** @stable ICU 49 */ + UBLOCK_ARABIC_EXTENDED_A = 210, /*[08A0]*/ + /** @stable ICU 49 */ + UBLOCK_ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 211, /*[1EE00]*/ + /** @stable ICU 49 */ + UBLOCK_CHAKMA = 212, /*[11100]*/ + /** @stable ICU 49 */ + UBLOCK_MEETEI_MAYEK_EXTENSIONS = 213, /*[AAE0]*/ + /** @stable ICU 49 */ + UBLOCK_MEROITIC_CURSIVE = 214, /*[109A0]*/ + /** @stable ICU 49 */ + UBLOCK_MEROITIC_HIEROGLYPHS = 215, /*[10980]*/ + /** @stable ICU 49 */ + UBLOCK_MIAO = 216, /*[16F00]*/ + /** @stable ICU 49 */ + UBLOCK_SHARADA = 217, /*[11180]*/ + /** @stable ICU 49 */ + UBLOCK_SORA_SOMPENG = 218, /*[110D0]*/ + /** @stable ICU 49 */ + UBLOCK_SUNDANESE_SUPPLEMENT = 219, /*[1CC0]*/ + /** @stable ICU 49 */ + UBLOCK_TAKRI = 220, /*[11680]*/ + + /** @stable ICU 2.0 */ + UBLOCK_COUNT = 221, + + /** @stable ICU 2.0 */ + UBLOCK_INVALID_CODE=-1 +}; + +/** @stable ICU 2.0 */ +typedef enum UBlockCode UBlockCode; + +/** + * East Asian Width constants. + * + * @see UCHAR_EAST_ASIAN_WIDTH + * @see u_getIntPropertyValue + * @stable ICU 2.2 + */ +typedef enum UEastAsianWidth { + /* + * Note: UEastAsianWidth constants are parsed by preparseucd.py. + * It matches lines like + * U_EA_<Unicode East_Asian_Width value name> + */ + + U_EA_NEUTRAL, /*[N]*/ + U_EA_AMBIGUOUS, /*[A]*/ + U_EA_HALFWIDTH, /*[H]*/ + U_EA_FULLWIDTH, /*[F]*/ + U_EA_NARROW, /*[Na]*/ + U_EA_WIDE, /*[W]*/ + U_EA_COUNT +} UEastAsianWidth; + +/** + * Selector constants for u_charName(). + * u_charName() returns the "modern" name of a + * Unicode character; or the name that was defined in + * Unicode version 1.0, before the Unicode standard merged + * with ISO-10646; or an "extended" name that gives each + * Unicode code point a unique name. + * + * @see u_charName + * @stable ICU 2.0 + */ +typedef enum UCharNameChoice { + /** Unicode character name (Name property). @stable ICU 2.0 */ + U_UNICODE_CHAR_NAME, +#ifndef U_HIDE_DEPRECATED_API + /** + * The Unicode_1_Name property value which is of little practical value. + * Beginning with ICU 49, ICU APIs return an empty string for this name choice. + * @deprecated ICU 49 + */ + U_UNICODE_10_CHAR_NAME, +#endif /* U_HIDE_DEPRECATED_API */ + /** Standard or synthetic character name. @stable ICU 2.0 */ + U_EXTENDED_CHAR_NAME = U_UNICODE_CHAR_NAME+2, + /** Corrected name from NameAliases.txt. @stable ICU 4.4 */ + U_CHAR_NAME_ALIAS, + /** @stable ICU 2.0 */ + U_CHAR_NAME_CHOICE_COUNT +} UCharNameChoice; + +/** + * Selector constants for u_getPropertyName() and + * u_getPropertyValueName(). These selectors are used to choose which + * name is returned for a given property or value. All properties and + * values have a long name. Most have a short name, but some do not. + * Unicode allows for additional names, beyond the long and short + * name, which would be indicated by U_LONG_PROPERTY_NAME + i, where + * i=1, 2,... + * + * @see u_getPropertyName() + * @see u_getPropertyValueName() + * @stable ICU 2.4 + */ +typedef enum UPropertyNameChoice { + U_SHORT_PROPERTY_NAME, + U_LONG_PROPERTY_NAME, + U_PROPERTY_NAME_CHOICE_COUNT +} UPropertyNameChoice; + +/** + * Decomposition Type constants. + * + * @see UCHAR_DECOMPOSITION_TYPE + * @stable ICU 2.2 + */ +typedef enum UDecompositionType { + /* + * Note: UDecompositionType constants are parsed by preparseucd.py. + * It matches lines like + * U_DT_<Unicode Decomposition_Type value name> + */ + + U_DT_NONE, /*[none]*/ + U_DT_CANONICAL, /*[can]*/ + U_DT_COMPAT, /*[com]*/ + U_DT_CIRCLE, /*[enc]*/ + U_DT_FINAL, /*[fin]*/ + U_DT_FONT, /*[font]*/ + U_DT_FRACTION, /*[fra]*/ + U_DT_INITIAL, /*[init]*/ + U_DT_ISOLATED, /*[iso]*/ + U_DT_MEDIAL, /*[med]*/ + U_DT_NARROW, /*[nar]*/ + U_DT_NOBREAK, /*[nb]*/ + U_DT_SMALL, /*[sml]*/ + U_DT_SQUARE, /*[sqr]*/ + U_DT_SUB, /*[sub]*/ + U_DT_SUPER, /*[sup]*/ + U_DT_VERTICAL, /*[vert]*/ + U_DT_WIDE, /*[wide]*/ + U_DT_COUNT /* 18 */ +} UDecompositionType; + +/** + * Joining Type constants. + * + * @see UCHAR_JOINING_TYPE + * @stable ICU 2.2 + */ +typedef enum UJoiningType { + /* + * Note: UJoiningType constants are parsed by preparseucd.py. + * It matches lines like + * U_JT_<Unicode Joining_Type value name> + */ + + U_JT_NON_JOINING, /*[U]*/ + U_JT_JOIN_CAUSING, /*[C]*/ + U_JT_DUAL_JOINING, /*[D]*/ + U_JT_LEFT_JOINING, /*[L]*/ + U_JT_RIGHT_JOINING, /*[R]*/ + U_JT_TRANSPARENT, /*[T]*/ + U_JT_COUNT /* 6 */ +} UJoiningType; + +/** + * Joining Group constants. + * + * @see UCHAR_JOINING_GROUP + * @stable ICU 2.2 + */ +typedef enum UJoiningGroup { + /* + * Note: UJoiningGroup constants are parsed by preparseucd.py. + * It matches lines like + * U_JG_<Unicode Joining_Group value name> + */ + + U_JG_NO_JOINING_GROUP, + U_JG_AIN, + U_JG_ALAPH, + U_JG_ALEF, + U_JG_BEH, + U_JG_BETH, + U_JG_DAL, + U_JG_DALATH_RISH, + U_JG_E, + U_JG_FEH, + U_JG_FINAL_SEMKATH, + U_JG_GAF, + U_JG_GAMAL, + U_JG_HAH, + U_JG_TEH_MARBUTA_GOAL, /**< @stable ICU 4.6 */ + U_JG_HAMZA_ON_HEH_GOAL=U_JG_TEH_MARBUTA_GOAL, + U_JG_HE, + U_JG_HEH, + U_JG_HEH_GOAL, + U_JG_HETH, + U_JG_KAF, + U_JG_KAPH, + U_JG_KNOTTED_HEH, + U_JG_LAM, + U_JG_LAMADH, + U_JG_MEEM, + U_JG_MIM, + U_JG_NOON, + U_JG_NUN, + U_JG_PE, + U_JG_QAF, + U_JG_QAPH, + U_JG_REH, + U_JG_REVERSED_PE, + U_JG_SAD, + U_JG_SADHE, + U_JG_SEEN, + U_JG_SEMKATH, + U_JG_SHIN, + U_JG_SWASH_KAF, + U_JG_SYRIAC_WAW, + U_JG_TAH, + U_JG_TAW, + U_JG_TEH_MARBUTA, + U_JG_TETH, + U_JG_WAW, + U_JG_YEH, + U_JG_YEH_BARREE, + U_JG_YEH_WITH_TAIL, + U_JG_YUDH, + U_JG_YUDH_HE, + U_JG_ZAIN, + U_JG_FE, /**< @stable ICU 2.6 */ + U_JG_KHAPH, /**< @stable ICU 2.6 */ + U_JG_ZHAIN, /**< @stable ICU 2.6 */ + U_JG_BURUSHASKI_YEH_BARREE, /**< @stable ICU 4.0 */ + U_JG_FARSI_YEH, /**< @stable ICU 4.4 */ + U_JG_NYA, /**< @stable ICU 4.4 */ + U_JG_ROHINGYA_YEH, /**< @stable ICU 49 */ + U_JG_COUNT +} UJoiningGroup; + +/** + * Grapheme Cluster Break constants. + * + * @see UCHAR_GRAPHEME_CLUSTER_BREAK + * @stable ICU 3.4 + */ +typedef enum UGraphemeClusterBreak { + /* + * Note: UGraphemeClusterBreak constants are parsed by preparseucd.py. + * It matches lines like + * U_GCB_<Unicode Grapheme_Cluster_Break value name> + */ + + U_GCB_OTHER = 0, /*[XX]*/ + U_GCB_CONTROL = 1, /*[CN]*/ + U_GCB_CR = 2, /*[CR]*/ + U_GCB_EXTEND = 3, /*[EX]*/ + U_GCB_L = 4, /*[L]*/ + U_GCB_LF = 5, /*[LF]*/ + U_GCB_LV = 6, /*[LV]*/ + U_GCB_LVT = 7, /*[LVT]*/ + U_GCB_T = 8, /*[T]*/ + U_GCB_V = 9, /*[V]*/ + U_GCB_SPACING_MARK = 10, /*[SM]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */ + U_GCB_PREPEND = 11, /*[PP]*/ + U_GCB_REGIONAL_INDICATOR = 12, /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ + U_GCB_COUNT = 13 +} UGraphemeClusterBreak; + +/** + * Word Break constants. + * (UWordBreak is a pre-existing enum type in ubrk.h for word break status tags.) + * + * @see UCHAR_WORD_BREAK + * @stable ICU 3.4 + */ +typedef enum UWordBreakValues { + /* + * Note: UWordBreakValues constants are parsed by preparseucd.py. + * It matches lines like + * U_WB_<Unicode Word_Break value name> + */ + + U_WB_OTHER = 0, /*[XX]*/ + U_WB_ALETTER = 1, /*[LE]*/ + U_WB_FORMAT = 2, /*[FO]*/ + U_WB_KATAKANA = 3, /*[KA]*/ + U_WB_MIDLETTER = 4, /*[ML]*/ + U_WB_MIDNUM = 5, /*[MN]*/ + U_WB_NUMERIC = 6, /*[NU]*/ + U_WB_EXTENDNUMLET = 7, /*[EX]*/ + U_WB_CR = 8, /*[CR]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */ + U_WB_EXTEND = 9, /*[Extend]*/ + U_WB_LF = 10, /*[LF]*/ + U_WB_MIDNUMLET =11, /*[MB]*/ + U_WB_NEWLINE =12, /*[NL]*/ + U_WB_REGIONAL_INDICATOR = 13, /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ + U_WB_HEBREW_LETTER = 14, /*[HL]*/ /* from here on: new in Unicode 6.3/ICU 52 */ + U_WB_SINGLE_QUOTE = 15, /*[SQ]*/ + U_WB_DOUBLE_QUOTE = 16, /*[DQ]*/ + U_WB_COUNT = 17 +} UWordBreakValues; + +/** + * Sentence Break constants. + * + * @see UCHAR_SENTENCE_BREAK + * @stable ICU 3.4 + */ +typedef enum USentenceBreak { + /* + * Note: USentenceBreak constants are parsed by preparseucd.py. + * It matches lines like + * U_SB_<Unicode Sentence_Break value name> + */ + + U_SB_OTHER = 0, /*[XX]*/ + U_SB_ATERM = 1, /*[AT]*/ + U_SB_CLOSE = 2, /*[CL]*/ + U_SB_FORMAT = 3, /*[FO]*/ + U_SB_LOWER = 4, /*[LO]*/ + U_SB_NUMERIC = 5, /*[NU]*/ + U_SB_OLETTER = 6, /*[LE]*/ + U_SB_SEP = 7, /*[SE]*/ + U_SB_SP = 8, /*[SP]*/ + U_SB_STERM = 9, /*[ST]*/ + U_SB_UPPER = 10, /*[UP]*/ + U_SB_CR = 11, /*[CR]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */ + U_SB_EXTEND = 12, /*[EX]*/ + U_SB_LF = 13, /*[LF]*/ + U_SB_SCONTINUE = 14, /*[SC]*/ + U_SB_COUNT = 15 +} USentenceBreak; + +/** + * Line Break constants. + * + * @see UCHAR_LINE_BREAK + * @stable ICU 2.2 + */ +typedef enum ULineBreak { + /* + * Note: ULineBreak constants are parsed by preparseucd.py. + * It matches lines like + * U_LB_<Unicode Line_Break value name> + */ + + U_LB_UNKNOWN = 0, /*[XX]*/ + U_LB_AMBIGUOUS = 1, /*[AI]*/ + U_LB_ALPHABETIC = 2, /*[AL]*/ + U_LB_BREAK_BOTH = 3, /*[B2]*/ + U_LB_BREAK_AFTER = 4, /*[BA]*/ + U_LB_BREAK_BEFORE = 5, /*[BB]*/ + U_LB_MANDATORY_BREAK = 6, /*[BK]*/ + U_LB_CONTINGENT_BREAK = 7, /*[CB]*/ + U_LB_CLOSE_PUNCTUATION = 8, /*[CL]*/ + U_LB_COMBINING_MARK = 9, /*[CM]*/ + U_LB_CARRIAGE_RETURN = 10, /*[CR]*/ + U_LB_EXCLAMATION = 11, /*[EX]*/ + U_LB_GLUE = 12, /*[GL]*/ + U_LB_HYPHEN = 13, /*[HY]*/ + U_LB_IDEOGRAPHIC = 14, /*[ID]*/ + /** Renamed from the misspelled "inseperable" in Unicode 4.0.1/ICU 3.0 @stable ICU 3.0 */ + U_LB_INSEPARABLE = 15, /*[IN]*/ + U_LB_INSEPERABLE = U_LB_INSEPARABLE, + U_LB_INFIX_NUMERIC = 16, /*[IS]*/ + U_LB_LINE_FEED = 17, /*[LF]*/ + U_LB_NONSTARTER = 18, /*[NS]*/ + U_LB_NUMERIC = 19, /*[NU]*/ + U_LB_OPEN_PUNCTUATION = 20, /*[OP]*/ + U_LB_POSTFIX_NUMERIC = 21, /*[PO]*/ + U_LB_PREFIX_NUMERIC = 22, /*[PR]*/ + U_LB_QUOTATION = 23, /*[QU]*/ + U_LB_COMPLEX_CONTEXT = 24, /*[SA]*/ + U_LB_SURROGATE = 25, /*[SG]*/ + U_LB_SPACE = 26, /*[SP]*/ + U_LB_BREAK_SYMBOLS = 27, /*[SY]*/ + U_LB_ZWSPACE = 28, /*[ZW]*/ + U_LB_NEXT_LINE = 29, /*[NL]*/ /* from here on: new in Unicode 4/ICU 2.6 */ + U_LB_WORD_JOINER = 30, /*[WJ]*/ + U_LB_H2 = 31, /*[H2]*/ /* from here on: new in Unicode 4.1/ICU 3.4 */ + U_LB_H3 = 32, /*[H3]*/ + U_LB_JL = 33, /*[JL]*/ + U_LB_JT = 34, /*[JT]*/ + U_LB_JV = 35, /*[JV]*/ + U_LB_CLOSE_PARENTHESIS = 36, /*[CP]*/ /* new in Unicode 5.2/ICU 4.4 */ + U_LB_CONDITIONAL_JAPANESE_STARTER = 37,/*[CJ]*/ /* new in Unicode 6.1/ICU 49 */ + U_LB_HEBREW_LETTER = 38, /*[HL]*/ /* new in Unicode 6.1/ICU 49 */ + U_LB_REGIONAL_INDICATOR = 39,/*[RI]*/ /* new in Unicode 6.2/ICU 50 */ + U_LB_COUNT = 40 +} ULineBreak; + +/** + * Numeric Type constants. + * + * @see UCHAR_NUMERIC_TYPE + * @stable ICU 2.2 + */ +typedef enum UNumericType { + /* + * Note: UNumericType constants are parsed by preparseucd.py. + * It matches lines like + * U_NT_<Unicode Numeric_Type value name> + */ + + U_NT_NONE, /*[None]*/ + U_NT_DECIMAL, /*[de]*/ + U_NT_DIGIT, /*[di]*/ + U_NT_NUMERIC, /*[nu]*/ + U_NT_COUNT +} UNumericType; + +/** + * Hangul Syllable Type constants. + * + * @see UCHAR_HANGUL_SYLLABLE_TYPE + * @stable ICU 2.6 + */ +typedef enum UHangulSyllableType { + /* + * Note: UHangulSyllableType constants are parsed by preparseucd.py. + * It matches lines like + * U_HST_<Unicode Hangul_Syllable_Type value name> + */ + + U_HST_NOT_APPLICABLE, /*[NA]*/ + U_HST_LEADING_JAMO, /*[L]*/ + U_HST_VOWEL_JAMO, /*[V]*/ + U_HST_TRAILING_JAMO, /*[T]*/ + U_HST_LV_SYLLABLE, /*[LV]*/ + U_HST_LVT_SYLLABLE, /*[LVT]*/ + U_HST_COUNT +} UHangulSyllableType; + +/** + * Check a binary Unicode property for a code point. + * + * Unicode, especially in version 3.2, defines many more properties than the + * original set in UnicodeData.txt. + * + * The properties APIs are intended to reflect Unicode properties as defined + * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR). + * For details about the properties see http://www.unicode.org/ucd/ . + * For names of Unicode properties see the UCD file PropertyAliases.txt. + * + * Important: If ICU is built with UCD files from Unicode versions below 3.2, + * then properties marked with "new in Unicode 3.2" are not or not fully available. + * + * @param c Code point to test. + * @param which UProperty selector constant, identifies which binary property to check. + * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT. + * @return TRUE or FALSE according to the binary Unicode property value for c. + * Also FALSE if 'which' is out of bounds or if the Unicode version + * does not have data for the property at all, or not for this code point. + * + * @see UProperty + * @see u_getIntPropertyValue + * @see u_getUnicodeVersion + * @stable ICU 2.1 + */ +U_STABLE UBool U_EXPORT2 +u_hasBinaryProperty(UChar32 c, UProperty which); + +/** + * Check if a code point has the Alphabetic Unicode property. + * Same as u_hasBinaryProperty(c, UCHAR_ALPHABETIC). + * This is different from u_isalpha! + * @param c Code point to test + * @return true if the code point has the Alphabetic Unicode property, false otherwise + * + * @see UCHAR_ALPHABETIC + * @see u_isalpha + * @see u_hasBinaryProperty + * @stable ICU 2.1 + */ +U_STABLE UBool U_EXPORT2 +u_isUAlphabetic(UChar32 c); + +/** + * Check if a code point has the Lowercase Unicode property. + * Same as u_hasBinaryProperty(c, UCHAR_LOWERCASE). + * This is different from u_islower! + * @param c Code point to test + * @return true if the code point has the Lowercase Unicode property, false otherwise + * + * @see UCHAR_LOWERCASE + * @see u_islower + * @see u_hasBinaryProperty + * @stable ICU 2.1 + */ +U_STABLE UBool U_EXPORT2 +u_isULowercase(UChar32 c); + +/** + * Check if a code point has the Uppercase Unicode property. + * Same as u_hasBinaryProperty(c, UCHAR_UPPERCASE). + * This is different from u_isupper! + * @param c Code point to test + * @return true if the code point has the Uppercase Unicode property, false otherwise + * + * @see UCHAR_UPPERCASE + * @see u_isupper + * @see u_hasBinaryProperty + * @stable ICU 2.1 + */ +U_STABLE UBool U_EXPORT2 +u_isUUppercase(UChar32 c); + +/** + * Check if a code point has the White_Space Unicode property. + * Same as u_hasBinaryProperty(c, UCHAR_WHITE_SPACE). + * This is different from both u_isspace and u_isWhitespace! + * + * Note: There are several ICU whitespace functions; please see the uchar.h + * file documentation for a detailed comparison. + * + * @param c Code point to test + * @return true if the code point has the White_Space Unicode property, false otherwise. + * + * @see UCHAR_WHITE_SPACE + * @see u_isWhitespace + * @see u_isspace + * @see u_isJavaSpaceChar + * @see u_hasBinaryProperty + * @stable ICU 2.1 + */ +U_STABLE UBool U_EXPORT2 +u_isUWhiteSpace(UChar32 c); + +/** + * Get the property value for an enumerated or integer Unicode property for a code point. + * Also returns binary and mask property values. + * + * Unicode, especially in version 3.2, defines many more properties than the + * original set in UnicodeData.txt. + * + * The properties APIs are intended to reflect Unicode properties as defined + * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR). + * For details about the properties see http://www.unicode.org/ . + * For names of Unicode properties see the UCD file PropertyAliases.txt. + * + * Sample usage: + * UEastAsianWidth ea=(UEastAsianWidth)u_getIntPropertyValue(c, UCHAR_EAST_ASIAN_WIDTH); + * UBool b=(UBool)u_getIntPropertyValue(c, UCHAR_IDEOGRAPHIC); + * + * @param c Code point to test. + * @param which UProperty selector constant, identifies which property to check. + * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT + * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT + * or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT. + * @return Numeric value that is directly the property value or, + * for enumerated properties, corresponds to the numeric value of the enumerated + * constant of the respective property value enumeration type + * (cast to enum type if necessary). + * Returns 0 or 1 (for FALSE/TRUE) for binary Unicode properties. + * Returns a bit-mask for mask properties. + * Returns 0 if 'which' is out of bounds or if the Unicode version + * does not have data for the property at all, or not for this code point. + * + * @see UProperty + * @see u_hasBinaryProperty + * @see u_getIntPropertyMinValue + * @see u_getIntPropertyMaxValue + * @see u_getUnicodeVersion + * @stable ICU 2.2 + */ +U_STABLE int32_t U_EXPORT2 +u_getIntPropertyValue(UChar32 c, UProperty which); + +/** + * Get the minimum value for an enumerated/integer/binary Unicode property. + * Can be used together with u_getIntPropertyMaxValue + * to allocate arrays of UnicodeSet or similar. + * + * @param which UProperty selector constant, identifies which binary property to check. + * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT + * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT. + * @return Minimum value returned by u_getIntPropertyValue for a Unicode property. + * 0 if the property selector is out of range. + * + * @see UProperty + * @see u_hasBinaryProperty + * @see u_getUnicodeVersion + * @see u_getIntPropertyMaxValue + * @see u_getIntPropertyValue + * @stable ICU 2.2 + */ +U_STABLE int32_t U_EXPORT2 +u_getIntPropertyMinValue(UProperty which); + +/** + * Get the maximum value for an enumerated/integer/binary Unicode property. + * Can be used together with u_getIntPropertyMinValue + * to allocate arrays of UnicodeSet or similar. + * + * Examples for min/max values (for Unicode 3.2): + * + * - UCHAR_BIDI_CLASS: 0/18 (U_LEFT_TO_RIGHT/U_BOUNDARY_NEUTRAL) + * - UCHAR_SCRIPT: 0/45 (USCRIPT_COMMON/USCRIPT_TAGBANWA) + * - UCHAR_IDEOGRAPHIC: 0/1 (FALSE/TRUE) + * + * For undefined UProperty constant values, min/max values will be 0/-1. + * + * @param which UProperty selector constant, identifies which binary property to check. + * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT + * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT. + * @return Maximum value returned by u_getIntPropertyValue for a Unicode property. + * <=0 if the property selector is out of range. + * + * @see UProperty + * @see u_hasBinaryProperty + * @see u_getUnicodeVersion + * @see u_getIntPropertyMaxValue + * @see u_getIntPropertyValue + * @stable ICU 2.2 + */ +U_STABLE int32_t U_EXPORT2 +u_getIntPropertyMaxValue(UProperty which); + +/** + * Get the numeric value for a Unicode code point as defined in the + * Unicode Character Database. + * + * A "double" return type is necessary because + * some numeric values are fractions, negative, or too large for int32_t. + * + * For characters without any numeric values in the Unicode Character Database, + * this function will return U_NO_NUMERIC_VALUE. + * Note: This is different from the Unicode Standard which specifies NaN as the default value. + * (NaN is not available on all platforms.) + * + * Similar to java.lang.Character.getNumericValue(), but u_getNumericValue() + * also supports negative values, large values, and fractions, + * while Java's getNumericValue() returns values 10..35 for ASCII letters. + * + * @param c Code point to get the numeric value for. + * @return Numeric value of c, or U_NO_NUMERIC_VALUE if none is defined. + * + * @see U_NO_NUMERIC_VALUE + * @stable ICU 2.2 + */ +U_STABLE double U_EXPORT2 +u_getNumericValue(UChar32 c); + +/** + * Special value that is returned by u_getNumericValue when + * no numeric value is defined for a code point. + * + * @see u_getNumericValue + * @stable ICU 2.2 + */ +#define U_NO_NUMERIC_VALUE ((double)-123456789.) + +/** + * Determines whether the specified code point has the general category "Ll" + * (lowercase letter). + * + * Same as java.lang.Character.isLowerCase(). + * + * This misses some characters that are also lowercase but + * have a different general category value. + * In order to include those, use UCHAR_LOWERCASE. + * + * In addition to being equivalent to a Java function, this also serves + * as a C/POSIX migration function. + * See the comments about C/POSIX character classification functions in the + * documentation at the top of this header file. + * + * @param c the code point to be tested + * @return TRUE if the code point is an Ll lowercase letter + * + * @see UCHAR_LOWERCASE + * @see u_isupper + * @see u_istitle + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +u_islower(UChar32 c); + +/** + * Determines whether the specified code point has the general category "Lu" + * (uppercase letter). + * + * Same as java.lang.Character.isUpperCase(). + * + * This misses some characters that are also uppercase but + * have a different general category value. + * In order to include those, use UCHAR_UPPERCASE. + * + * In addition to being equivalent to a Java function, this also serves + * as a C/POSIX migration function. + * See the comments about C/POSIX character classification functions in the + * documentation at the top of this header file. + * + * @param c the code point to be tested + * @return TRUE if the code point is an Lu uppercase letter + * + * @see UCHAR_UPPERCASE + * @see u_islower + * @see u_istitle + * @see u_tolower + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +u_isupper(UChar32 c); + +/** + * Determines whether the specified code point is a titlecase letter. + * True for general category "Lt" (titlecase letter). + * + * Same as java.lang.Character.isTitleCase(). + * + * @param c the code point to be tested + * @return TRUE if the code point is an Lt titlecase letter + * + * @see u_isupper + * @see u_islower + * @see u_totitle + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +u_istitle(UChar32 c); + +/** + * Determines whether the specified code point is a digit character according to Java. + * True for characters with general category "Nd" (decimal digit numbers). + * Beginning with Unicode 4, this is the same as + * testing for the Numeric_Type of Decimal. + * + * Same as java.lang.Character.isDigit(). + * + * In addition to being equivalent to a Java function, this also serves + * as a C/POSIX migration function. + * See the comments about C/POSIX character classification functions in the + * documentation at the top of this header file. + * + * @param c the code point to be tested + * @return TRUE if the code point is a digit character according to Character.isDigit() + * + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +u_isdigit(UChar32 c); + +/** + * Determines whether the specified code point is a letter character. + * True for general categories "L" (letters). + * + * Same as java.lang.Character.isLetter(). + * + * In addition to being equivalent to a Java function, this also serves + * as a C/POSIX migration function. + * See the comments about C/POSIX character classification functions in the + * documentation at the top of this header file. + * + * @param c the code point to be tested + * @return TRUE if the code point is a letter character + * + * @see u_isdigit + * @see u_isalnum + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +u_isalpha(UChar32 c); + +/** + * Determines whether the specified code point is an alphanumeric character + * (letter or digit) according to Java. + * True for characters with general categories + * "L" (letters) and "Nd" (decimal digit numbers). + * + * Same as java.lang.Character.isLetterOrDigit(). + * + * In addition to being equivalent to a Java function, this also serves + * as a C/POSIX migration function. + * See the comments about C/POSIX character classification functions in the + * documentation at the top of this header file. + * + * @param c the code point to be tested + * @return TRUE if the code point is an alphanumeric character according to Character.isLetterOrDigit() + * + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +u_isalnum(UChar32 c); + +/** + * Determines whether the specified code point is a hexadecimal digit. + * This is equivalent to u_digit(c, 16)>=0. + * True for characters with general category "Nd" (decimal digit numbers) + * as well as Latin letters a-f and A-F in both ASCII and Fullwidth ASCII. + * (That is, for letters with code points + * 0041..0046, 0061..0066, FF21..FF26, FF41..FF46.) + * + * In order to narrow the definition of hexadecimal digits to only ASCII + * characters, use (c<=0x7f && u_isxdigit(c)). + * + * This is a C/POSIX migration function. + * See the comments about C/POSIX character classification functions in the + * documentation at the top of this header file. + * + * @param c the code point to be tested + * @return TRUE if the code point is a hexadecimal digit + * + * @stable ICU 2.6 + */ +U_STABLE UBool U_EXPORT2 +u_isxdigit(UChar32 c); + +/** + * Determines whether the specified code point is a punctuation character. + * True for characters with general categories "P" (punctuation). + * + * This is a C/POSIX migration function. + * See the comments about C/POSIX character classification functions in the + * documentation at the top of this header file. + * + * @param c the code point to be tested + * @return TRUE if the code point is a punctuation character + * + * @stable ICU 2.6 + */ +U_STABLE UBool U_EXPORT2 +u_ispunct(UChar32 c); + +/** + * Determines whether the specified code point is a "graphic" character + * (printable, excluding spaces). + * TRUE for all characters except those with general categories + * "Cc" (control codes), "Cf" (format controls), "Cs" (surrogates), + * "Cn" (unassigned), and "Z" (separators). + * + * This is a C/POSIX migration function. + * See the comments about C/POSIX character classification functions in the + * documentation at the top of this header file. + * + * @param c the code point to be tested + * @return TRUE if the code point is a "graphic" character + * + * @stable ICU 2.6 + */ +U_STABLE UBool U_EXPORT2 +u_isgraph(UChar32 c); + +/** + * Determines whether the specified code point is a "blank" or "horizontal space", + * a character that visibly separates words on a line. + * The following are equivalent definitions: + * + * TRUE for Unicode White_Space characters except for "vertical space controls" + * where "vertical space controls" are the following characters: + * U+000A (LF) U+000B (VT) U+000C (FF) U+000D (CR) U+0085 (NEL) U+2028 (LS) U+2029 (PS) + * + * same as + * + * TRUE for U+0009 (TAB) and characters with general category "Zs" (space separators) + * except Zero Width Space (ZWSP, U+200B). + * + * Note: There are several ICU whitespace functions; please see the uchar.h + * file documentation for a detailed comparison. + * + * This is a C/POSIX migration function. + * See the comments about C/POSIX character classification functions in the + * documentation at the top of this header file. + * + * @param c the code point to be tested + * @return TRUE if the code point is a "blank" + * + * @stable ICU 2.6 + */ +U_STABLE UBool U_EXPORT2 +u_isblank(UChar32 c); + +/** + * Determines whether the specified code point is "defined", + * which usually means that it is assigned a character. + * True for general categories other than "Cn" (other, not assigned), + * i.e., true for all code points mentioned in UnicodeData.txt. + * + * Note that non-character code points (e.g., U+FDD0) are not "defined" + * (they are Cn), but surrogate code points are "defined" (Cs). + * + * Same as java.lang.Character.isDefined(). + * + * @param c the code point to be tested + * @return TRUE if the code point is assigned a character + * + * @see u_isdigit + * @see u_isalpha + * @see u_isalnum + * @see u_isupper + * @see u_islower + * @see u_istitle + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +u_isdefined(UChar32 c); + +/** + * Determines if the specified character is a space character or not. + * + * Note: There are several ICU whitespace functions; please see the uchar.h + * file documentation for a detailed comparison. + * + * This is a C/POSIX migration function. + * See the comments about C/POSIX character classification functions in the + * documentation at the top of this header file. + * + * @param c the character to be tested + * @return true if the character is a space character; false otherwise. + * + * @see u_isJavaSpaceChar + * @see u_isWhitespace + * @see u_isUWhiteSpace + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +u_isspace(UChar32 c); + +/** + * Determine if the specified code point is a space character according to Java. + * True for characters with general categories "Z" (separators), + * which does not include control codes (e.g., TAB or Line Feed). + * + * Same as java.lang.Character.isSpaceChar(). + * + * Note: There are several ICU whitespace functions; please see the uchar.h + * file documentation for a detailed comparison. + * + * @param c the code point to be tested + * @return TRUE if the code point is a space character according to Character.isSpaceChar() + * + * @see u_isspace + * @see u_isWhitespace + * @see u_isUWhiteSpace + * @stable ICU 2.6 + */ +U_STABLE UBool U_EXPORT2 +u_isJavaSpaceChar(UChar32 c); + +/** + * Determines if the specified code point is a whitespace character according to Java/ICU. + * A character is considered to be a Java whitespace character if and only + * if it satisfies one of the following criteria: + * + * - It is a Unicode Separator character (categories "Z" = "Zs" or "Zl" or "Zp"), but is not + * also a non-breaking space (U+00A0 NBSP or U+2007 Figure Space or U+202F Narrow NBSP). + * - It is U+0009 HORIZONTAL TABULATION. + * - It is U+000A LINE FEED. + * - It is U+000B VERTICAL TABULATION. + * - It is U+000C FORM FEED. + * - It is U+000D CARRIAGE RETURN. + * - It is U+001C FILE SEPARATOR. + * - It is U+001D GROUP SEPARATOR. + * - It is U+001E RECORD SEPARATOR. + * - It is U+001F UNIT SEPARATOR. + * + * This API tries to sync with the semantics of Java's + * java.lang.Character.isWhitespace(), but it may not return + * the exact same results because of the Unicode version + * difference. + * + * Note: Unicode 4.0.1 changed U+200B ZERO WIDTH SPACE from a Space Separator (Zs) + * to a Format Control (Cf). Since then, isWhitespace(0x200b) returns false. + * See http://www.unicode.org/versions/Unicode4.0.1/ + * + * Note: There are several ICU whitespace functions; please see the uchar.h + * file documentation for a detailed comparison. + * + * @param c the code point to be tested + * @return TRUE if the code point is a whitespace character according to Java/ICU + * + * @see u_isspace + * @see u_isJavaSpaceChar + * @see u_isUWhiteSpace + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +u_isWhitespace(UChar32 c); + +/** + * Determines whether the specified code point is a control character + * (as defined by this function). + * A control character is one of the following: + * - ISO 8-bit control character (U+0000..U+001f and U+007f..U+009f) + * - U_CONTROL_CHAR (Cc) + * - U_FORMAT_CHAR (Cf) + * - U_LINE_SEPARATOR (Zl) + * - U_PARAGRAPH_SEPARATOR (Zp) + * + * This is a C/POSIX migration function. + * See the comments about C/POSIX character classification functions in the + * documentation at the top of this header file. + * + * @param c the code point to be tested + * @return TRUE if the code point is a control character + * + * @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT + * @see u_isprint + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +u_iscntrl(UChar32 c); + +/** + * Determines whether the specified code point is an ISO control code. + * True for U+0000..U+001f and U+007f..U+009f (general category "Cc"). + * + * Same as java.lang.Character.isISOControl(). + * + * @param c the code point to be tested + * @return TRUE if the code point is an ISO control code + * + * @see u_iscntrl + * @stable ICU 2.6 + */ +U_STABLE UBool U_EXPORT2 +u_isISOControl(UChar32 c); + +/** + * Determines whether the specified code point is a printable character. + * True for general categories <em>other</em> than "C" (controls). + * + * This is a C/POSIX migration function. + * See the comments about C/POSIX character classification functions in the + * documentation at the top of this header file. + * + * @param c the code point to be tested + * @return TRUE if the code point is a printable character + * + * @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT + * @see u_iscntrl + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +u_isprint(UChar32 c); + +/** + * Determines whether the specified code point is a base character. + * True for general categories "L" (letters), "N" (numbers), + * "Mc" (spacing combining marks), and "Me" (enclosing marks). + * + * Note that this is different from the Unicode definition in + * chapter 3.5, conformance clause D13, + * which defines base characters to be all characters (not Cn) + * that do not graphically combine with preceding characters (M) + * and that are neither control (Cc) or format (Cf) characters. + * + * @param c the code point to be tested + * @return TRUE if the code point is a base character according to this function + * + * @see u_isalpha + * @see u_isdigit + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +u_isbase(UChar32 c); + +/** + * Returns the bidirectional category value for the code point, + * which is used in the Unicode bidirectional algorithm + * (UAX #9 http://www.unicode.org/reports/tr9/). + * Note that some <em>unassigned</em> code points have bidi values + * of R or AL because they are in blocks that are reserved + * for Right-To-Left scripts. + * + * Same as java.lang.Character.getDirectionality() + * + * @param c the code point to be tested + * @return the bidirectional category (UCharDirection) value + * + * @see UCharDirection + * @stable ICU 2.0 + */ +U_STABLE UCharDirection U_EXPORT2 +u_charDirection(UChar32 c); + +/** + * Determines whether the code point has the Bidi_Mirrored property. + * This property is set for characters that are commonly used in + * Right-To-Left contexts and need to be displayed with a "mirrored" + * glyph. + * + * Same as java.lang.Character.isMirrored(). + * Same as UCHAR_BIDI_MIRRORED + * + * @param c the code point to be tested + * @return TRUE if the character has the Bidi_Mirrored property + * + * @see UCHAR_BIDI_MIRRORED + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +u_isMirrored(UChar32 c); + +/** + * Maps the specified character to a "mirror-image" character. + * For characters with the Bidi_Mirrored property, implementations + * sometimes need a "poor man's" mapping to another Unicode + * character (code point) such that the default glyph may serve + * as the mirror-image of the default glyph of the specified + * character. This is useful for text conversion to and from + * codepages with visual order, and for displays without glyph + * selection capabilities. + * + * @param c the code point to be mapped + * @return another Unicode code point that may serve as a mirror-image + * substitute, or c itself if there is no such mapping or c + * does not have the Bidi_Mirrored property + * + * @see UCHAR_BIDI_MIRRORED + * @see u_isMirrored + * @stable ICU 2.0 + */ +U_STABLE UChar32 U_EXPORT2 +u_charMirror(UChar32 c); + +/** + * Maps the specified character to its paired bracket character. + * For Bidi_Paired_Bracket_Type!=None, this is the same as u_charMirror(). + * Otherwise c itself is returned. + * See http://www.unicode.org/reports/tr9/ + * + * @param c the code point to be mapped + * @return the paired bracket code point, + * or c itself if there is no such mapping + * (Bidi_Paired_Bracket_Type=None) + * + * @see UCHAR_BIDI_PAIRED_BRACKET + * @see UCHAR_BIDI_PAIRED_BRACKET_TYPE + * @see u_charMirror + * @stable ICU 52 + */ +U_STABLE UChar32 U_EXPORT2 +u_getBidiPairedBracket(UChar32 c); + +/** + * Returns the general category value for the code point. + * + * Same as java.lang.Character.getType(). + * + * @param c the code point to be tested + * @return the general category (UCharCategory) value + * + * @see UCharCategory + * @stable ICU 2.0 + */ +U_STABLE int8_t U_EXPORT2 +u_charType(UChar32 c); + +/** + * Get a single-bit bit set for the general category of a character. + * This bit set can be compared bitwise with U_GC_SM_MASK, U_GC_L_MASK, etc. + * Same as U_MASK(u_charType(c)). + * + * @param c the code point to be tested + * @return a single-bit mask corresponding to the general category (UCharCategory) value + * + * @see u_charType + * @see UCharCategory + * @see U_GC_CN_MASK + * @stable ICU 2.1 + */ +#define U_GET_GC_MASK(c) U_MASK(u_charType(c)) + +/** + * Callback from u_enumCharTypes(), is called for each contiguous range + * of code points c (where start<=c<limit) + * with the same Unicode general category ("character type"). + * + * The callback function can stop the enumeration by returning FALSE. + * + * @param context an opaque pointer, as passed into utrie_enum() + * @param start the first code point in a contiguous range with value + * @param limit one past the last code point in a contiguous range with value + * @param type the general category for all code points in [start..limit[ + * @return FALSE to stop the enumeration + * + * @stable ICU 2.1 + * @see UCharCategory + * @see u_enumCharTypes + */ +typedef UBool U_CALLCONV +UCharEnumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type); + +/** + * Enumerate efficiently all code points with their Unicode general categories. + * + * This is useful for building data structures (e.g., UnicodeSet's), + * for enumerating all assigned code points (type!=U_UNASSIGNED), etc. + * + * For each contiguous range of code points with a given general category ("character type"), + * the UCharEnumTypeRange function is called. + * Adjacent ranges have different types. + * The Unicode Standard guarantees that the numeric value of the type is 0..31. + * + * @param enumRange a pointer to a function that is called for each contiguous range + * of code points with the same general category + * @param context an opaque pointer that is passed on to the callback function + * + * @stable ICU 2.1 + * @see UCharCategory + * @see UCharEnumTypeRange + */ +U_STABLE void U_EXPORT2 +u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context); + +#if !UCONFIG_NO_NORMALIZATION + +/** + * Returns the combining class of the code point as specified in UnicodeData.txt. + * + * @param c the code point of the character + * @return the combining class of the character + * @stable ICU 2.0 + */ +U_STABLE uint8_t U_EXPORT2 +u_getCombiningClass(UChar32 c); + +#endif + +/** + * Returns the decimal digit value of a decimal digit character. + * Such characters have the general category "Nd" (decimal digit numbers) + * and a Numeric_Type of Decimal. + * + * Unlike ICU releases before 2.6, no digit values are returned for any + * Han characters because Han number characters are often used with a special + * Chinese-style number format (with characters for powers of 10 in between) + * instead of in decimal-positional notation. + * Unicode 4 explicitly assigns Han number characters the Numeric_Type + * Numeric instead of Decimal. + * See Jitterbug 1483 for more details. + * + * Use u_getIntPropertyValue(c, UCHAR_NUMERIC_TYPE) and u_getNumericValue() + * for complete numeric Unicode properties. + * + * @param c the code point for which to get the decimal digit value + * @return the decimal digit value of c, + * or -1 if c is not a decimal digit character + * + * @see u_getNumericValue + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_charDigitValue(UChar32 c); + +/** + * Returns the Unicode allocation block that contains the character. + * + * @param c the code point to be tested + * @return the block value (UBlockCode) for c + * + * @see UBlockCode + * @stable ICU 2.0 + */ +U_STABLE UBlockCode U_EXPORT2 +ublock_getCode(UChar32 c); + +/** + * Retrieve the name of a Unicode character. + * Depending on <code>nameChoice</code>, the character name written + * into the buffer is the "modern" name or the name that was defined + * in Unicode version 1.0. + * The name contains only "invariant" characters + * like A-Z, 0-9, space, and '-'. + * Unicode 1.0 names are only retrieved if they are different from the modern + * names and if the data file contains the data for them. gennames may or may + * not be called with a command line option to include 1.0 names in unames.dat. + * + * @param code The character (code point) for which to get the name. + * It must be <code>0<=code<=0x10ffff</code>. + * @param nameChoice Selector for which name to get. + * @param buffer Destination address for copying the name. + * The name will always be zero-terminated. + * If there is no name, then the buffer will be set to the empty string. + * @param bufferLength <code>==sizeof(buffer)</code> + * @param pErrorCode Pointer to a UErrorCode variable; + * check for <code>U_SUCCESS()</code> after <code>u_charName()</code> + * returns. + * @return The length of the name, or 0 if there is no name for this character. + * If the bufferLength is less than or equal to the length, then the buffer + * contains the truncated name and the returned length indicates the full + * length of the name. + * The length does not include the zero-termination. + * + * @see UCharNameChoice + * @see u_charFromName + * @see u_enumCharNames + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_charName(UChar32 code, UCharNameChoice nameChoice, + char *buffer, int32_t bufferLength, + UErrorCode *pErrorCode); + +#ifndef U_HIDE_DEPRECATED_API +/** + * Returns an empty string. + * Used to return the ISO 10646 comment for a character. + * The Unicode ISO_Comment property is deprecated and has no values. + * + * @param c The character (code point) for which to get the ISO comment. + * It must be <code>0<=c<=0x10ffff</code>. + * @param dest Destination address for copying the comment. + * The comment will be zero-terminated if possible. + * If there is no comment, then the buffer will be set to the empty string. + * @param destCapacity <code>==sizeof(dest)</code> + * @param pErrorCode Pointer to a UErrorCode variable; + * check for <code>U_SUCCESS()</code> after <code>u_getISOComment()</code> + * returns. + * @return 0 + * + * @deprecated ICU 49 + */ +U_STABLE int32_t U_EXPORT2 +u_getISOComment(UChar32 c, + char *dest, int32_t destCapacity, + UErrorCode *pErrorCode); +#endif /* U_HIDE_DEPRECATED_API */ + +/** + * Find a Unicode character by its name and return its code point value. + * The name is matched exactly and completely. + * If the name does not correspond to a code point, <i>pErrorCode</i> + * is set to <code>U_INVALID_CHAR_FOUND</code>. + * A Unicode 1.0 name is matched only if it differs from the modern name. + * Unicode names are all uppercase. Extended names are lowercase followed + * by an uppercase hexadecimal number, and within angle brackets. + * + * @param nameChoice Selector for which name to match. + * @param name The name to match. + * @param pErrorCode Pointer to a UErrorCode variable + * @return The Unicode value of the code point with the given name, + * or an undefined value if there is no such code point. + * + * @see UCharNameChoice + * @see u_charName + * @see u_enumCharNames + * @stable ICU 1.7 + */ +U_STABLE UChar32 U_EXPORT2 +u_charFromName(UCharNameChoice nameChoice, + const char *name, + UErrorCode *pErrorCode); + +/** + * Type of a callback function for u_enumCharNames() that gets called + * for each Unicode character with the code point value and + * the character name. + * If such a function returns FALSE, then the enumeration is stopped. + * + * @param context The context pointer that was passed to u_enumCharNames(). + * @param code The Unicode code point for the character with this name. + * @param nameChoice Selector for which kind of names is enumerated. + * @param name The character's name, zero-terminated. + * @param length The length of the name. + * @return TRUE if the enumeration should continue, FALSE to stop it. + * + * @see UCharNameChoice + * @see u_enumCharNames + * @stable ICU 1.7 + */ +typedef UBool U_CALLCONV UEnumCharNamesFn(void *context, + UChar32 code, + UCharNameChoice nameChoice, + const char *name, + int32_t length); + +/** + * Enumerate all assigned Unicode characters between the start and limit + * code points (start inclusive, limit exclusive) and call a function + * for each, passing the code point value and the character name. + * For Unicode 1.0 names, only those are enumerated that differ from the + * modern names. + * + * @param start The first code point in the enumeration range. + * @param limit One more than the last code point in the enumeration range + * (the first one after the range). + * @param fn The function that is to be called for each character name. + * @param context An arbitrary pointer that is passed to the function. + * @param nameChoice Selector for which kind of names to enumerate. + * @param pErrorCode Pointer to a UErrorCode variable + * + * @see UCharNameChoice + * @see UEnumCharNamesFn + * @see u_charName + * @see u_charFromName + * @stable ICU 1.7 + */ +U_STABLE void U_EXPORT2 +u_enumCharNames(UChar32 start, UChar32 limit, + UEnumCharNamesFn *fn, + void *context, + UCharNameChoice nameChoice, + UErrorCode *pErrorCode); + +/** + * Return the Unicode name for a given property, as given in the + * Unicode database file PropertyAliases.txt. + * + * In addition, this function maps the property + * UCHAR_GENERAL_CATEGORY_MASK to the synthetic names "gcm" / + * "General_Category_Mask". These names are not in + * PropertyAliases.txt. + * + * @param property UProperty selector other than UCHAR_INVALID_CODE. + * If out of range, NULL is returned. + * + * @param nameChoice selector for which name to get. If out of range, + * NULL is returned. All properties have a long name. Most + * have a short name, but some do not. Unicode allows for + * additional names; if present these will be returned by + * U_LONG_PROPERTY_NAME + i, where i=1, 2,... + * + * @return a pointer to the name, or NULL if either the + * property or the nameChoice is out of range. If a given + * nameChoice returns NULL, then all larger values of + * nameChoice will return NULL, with one exception: if NULL is + * returned for U_SHORT_PROPERTY_NAME, then + * U_LONG_PROPERTY_NAME (and higher) may still return a + * non-NULL value. The returned pointer is valid until + * u_cleanup() is called. + * + * @see UProperty + * @see UPropertyNameChoice + * @stable ICU 2.4 + */ +U_STABLE const char* U_EXPORT2 +u_getPropertyName(UProperty property, + UPropertyNameChoice nameChoice); + +/** + * Return the UProperty enum for a given property name, as specified + * in the Unicode database file PropertyAliases.txt. Short, long, and + * any other variants are recognized. + * + * In addition, this function maps the synthetic names "gcm" / + * "General_Category_Mask" to the property + * UCHAR_GENERAL_CATEGORY_MASK. These names are not in + * PropertyAliases.txt. + * + * @param alias the property name to be matched. The name is compared + * using "loose matching" as described in PropertyAliases.txt. + * + * @return a UProperty enum, or UCHAR_INVALID_CODE if the given name + * does not match any property. + * + * @see UProperty + * @stable ICU 2.4 + */ +U_STABLE UProperty U_EXPORT2 +u_getPropertyEnum(const char* alias); + +/** + * Return the Unicode name for a given property value, as given in the + * Unicode database file PropertyValueAliases.txt. + * + * Note: Some of the names in PropertyValueAliases.txt can only be + * retrieved using UCHAR_GENERAL_CATEGORY_MASK, not + * UCHAR_GENERAL_CATEGORY. These include: "C" / "Other", "L" / + * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P" + * / "Punctuation", "S" / "Symbol", and "Z" / "Separator". + * + * @param property UProperty selector constant. + * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT + * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT + * or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT. + * If out of range, NULL is returned. + * + * @param value selector for a value for the given property. If out + * of range, NULL is returned. In general, valid values range + * from 0 up to some maximum. There are a few exceptions: + * (1.) UCHAR_BLOCK values begin at the non-zero value + * UBLOCK_BASIC_LATIN. (2.) UCHAR_CANONICAL_COMBINING_CLASS + * values are not contiguous and range from 0..240. (3.) + * UCHAR_GENERAL_CATEGORY_MASK values are not values of + * UCharCategory, but rather mask values produced by + * U_GET_GC_MASK(). This allows grouped categories such as + * [:L:] to be represented. Mask values range + * non-contiguously from 1..U_GC_P_MASK. + * + * @param nameChoice selector for which name to get. If out of range, + * NULL is returned. All values have a long name. Most have + * a short name, but some do not. Unicode allows for + * additional names; if present these will be returned by + * U_LONG_PROPERTY_NAME + i, where i=1, 2,... + + * @return a pointer to the name, or NULL if either the + * property or the nameChoice is out of range. If a given + * nameChoice returns NULL, then all larger values of + * nameChoice will return NULL, with one exception: if NULL is + * returned for U_SHORT_PROPERTY_NAME, then + * U_LONG_PROPERTY_NAME (and higher) may still return a + * non-NULL value. The returned pointer is valid until + * u_cleanup() is called. + * + * @see UProperty + * @see UPropertyNameChoice + * @stable ICU 2.4 + */ +U_STABLE const char* U_EXPORT2 +u_getPropertyValueName(UProperty property, + int32_t value, + UPropertyNameChoice nameChoice); + +/** + * Return the property value integer for a given value name, as + * specified in the Unicode database file PropertyValueAliases.txt. + * Short, long, and any other variants are recognized. + * + * Note: Some of the names in PropertyValueAliases.txt will only be + * recognized with UCHAR_GENERAL_CATEGORY_MASK, not + * UCHAR_GENERAL_CATEGORY. These include: "C" / "Other", "L" / + * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P" + * / "Punctuation", "S" / "Symbol", and "Z" / "Separator". + * + * @param property UProperty selector constant. + * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT + * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT + * or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT. + * If out of range, UCHAR_INVALID_CODE is returned. + * + * @param alias the value name to be matched. The name is compared + * using "loose matching" as described in + * PropertyValueAliases.txt. + * + * @return a value integer or UCHAR_INVALID_CODE if the given name + * does not match any value of the given property, or if the + * property is invalid. Note: UCHAR_GENERAL_CATEGORY_MASK values + * are not values of UCharCategory, but rather mask values + * produced by U_GET_GC_MASK(). This allows grouped + * categories such as [:L:] to be represented. + * + * @see UProperty + * @stable ICU 2.4 + */ +U_STABLE int32_t U_EXPORT2 +u_getPropertyValueEnum(UProperty property, + const char* alias); + +/** + * Determines if the specified character is permissible as the + * first character in an identifier according to Unicode + * (The Unicode Standard, Version 3.0, chapter 5.16 Identifiers). + * True for characters with general categories "L" (letters) and "Nl" (letter numbers). + * + * Same as java.lang.Character.isUnicodeIdentifierStart(). + * Same as UCHAR_ID_START + * + * @param c the code point to be tested + * @return TRUE if the code point may start an identifier + * + * @see UCHAR_ID_START + * @see u_isalpha + * @see u_isIDPart + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +u_isIDStart(UChar32 c); + +/** + * Determines if the specified character is permissible + * in an identifier according to Java. + * True for characters with general categories "L" (letters), + * "Nl" (letter numbers), "Nd" (decimal digits), + * "Mc" and "Mn" (combining marks), "Pc" (connecting punctuation), and + * u_isIDIgnorable(c). + * + * Same as java.lang.Character.isUnicodeIdentifierPart(). + * Almost the same as Unicode's ID_Continue (UCHAR_ID_CONTINUE) + * except that Unicode recommends to ignore Cf which is less than + * u_isIDIgnorable(c). + * + * @param c the code point to be tested + * @return TRUE if the code point may occur in an identifier according to Java + * + * @see UCHAR_ID_CONTINUE + * @see u_isIDStart + * @see u_isIDIgnorable + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +u_isIDPart(UChar32 c); + +/** + * Determines if the specified character should be regarded + * as an ignorable character in an identifier, + * according to Java. + * True for characters with general category "Cf" (format controls) as well as + * non-whitespace ISO controls + * (U+0000..U+0008, U+000E..U+001B, U+007F..U+009F). + * + * Same as java.lang.Character.isIdentifierIgnorable(). + * + * Note that Unicode just recommends to ignore Cf (format controls). + * + * @param c the code point to be tested + * @return TRUE if the code point is ignorable in identifiers according to Java + * + * @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT + * @see u_isIDStart + * @see u_isIDPart + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +u_isIDIgnorable(UChar32 c); + +/** + * Determines if the specified character is permissible as the + * first character in a Java identifier. + * In addition to u_isIDStart(c), true for characters with + * general categories "Sc" (currency symbols) and "Pc" (connecting punctuation). + * + * Same as java.lang.Character.isJavaIdentifierStart(). + * + * @param c the code point to be tested + * @return TRUE if the code point may start a Java identifier + * + * @see u_isJavaIDPart + * @see u_isalpha + * @see u_isIDStart + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +u_isJavaIDStart(UChar32 c); + +/** + * Determines if the specified character is permissible + * in a Java identifier. + * In addition to u_isIDPart(c), true for characters with + * general category "Sc" (currency symbols). + * + * Same as java.lang.Character.isJavaIdentifierPart(). + * + * @param c the code point to be tested + * @return TRUE if the code point may occur in a Java identifier + * + * @see u_isIDIgnorable + * @see u_isJavaIDStart + * @see u_isalpha + * @see u_isdigit + * @see u_isIDPart + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +u_isJavaIDPart(UChar32 c); + +/** + * The given character is mapped to its lowercase equivalent according to + * UnicodeData.txt; if the character has no lowercase equivalent, the character + * itself is returned. + * + * Same as java.lang.Character.toLowerCase(). + * + * This function only returns the simple, single-code point case mapping. + * Full case mappings should be used whenever possible because they produce + * better results by working on whole strings. + * They take into account the string context and the language and can map + * to a result string with a different length as appropriate. + * Full case mappings are applied by the string case mapping functions, + * see ustring.h and the UnicodeString class. + * See also the User Guide chapter on C/POSIX migration: + * http://icu-project.org/userguide/posix.html#case_mappings + * + * @param c the code point to be mapped + * @return the Simple_Lowercase_Mapping of the code point, if any; + * otherwise the code point itself. + * @stable ICU 2.0 + */ +U_STABLE UChar32 U_EXPORT2 +u_tolower(UChar32 c); + +/** + * The given character is mapped to its uppercase equivalent according to UnicodeData.txt; + * if the character has no uppercase equivalent, the character itself is + * returned. + * + * Same as java.lang.Character.toUpperCase(). + * + * This function only returns the simple, single-code point case mapping. + * Full case mappings should be used whenever possible because they produce + * better results by working on whole strings. + * They take into account the string context and the language and can map + * to a result string with a different length as appropriate. + * Full case mappings are applied by the string case mapping functions, + * see ustring.h and the UnicodeString class. + * See also the User Guide chapter on C/POSIX migration: + * http://icu-project.org/userguide/posix.html#case_mappings + * + * @param c the code point to be mapped + * @return the Simple_Uppercase_Mapping of the code point, if any; + * otherwise the code point itself. + * @stable ICU 2.0 + */ +U_STABLE UChar32 U_EXPORT2 +u_toupper(UChar32 c); + +/** + * The given character is mapped to its titlecase equivalent + * according to UnicodeData.txt; + * if none is defined, the character itself is returned. + * + * Same as java.lang.Character.toTitleCase(). + * + * This function only returns the simple, single-code point case mapping. + * Full case mappings should be used whenever possible because they produce + * better results by working on whole strings. + * They take into account the string context and the language and can map + * to a result string with a different length as appropriate. + * Full case mappings are applied by the string case mapping functions, + * see ustring.h and the UnicodeString class. + * See also the User Guide chapter on C/POSIX migration: + * http://icu-project.org/userguide/posix.html#case_mappings + * + * @param c the code point to be mapped + * @return the Simple_Titlecase_Mapping of the code point, if any; + * otherwise the code point itself. + * @stable ICU 2.0 + */ +U_STABLE UChar32 U_EXPORT2 +u_totitle(UChar32 c); + +/** Option value for case folding: use default mappings defined in CaseFolding.txt. @stable ICU 2.0 */ +#define U_FOLD_CASE_DEFAULT 0 + +/** + * Option value for case folding: + * + * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I + * and dotless i appropriately for Turkic languages (tr, az). + * + * Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that + * are to be included for default mappings and + * excluded for the Turkic-specific mappings. + * + * Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that + * are to be excluded for default mappings and + * included for the Turkic-specific mappings. + * + * @stable ICU 2.0 + */ +#define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1 + +/** + * The given character is mapped to its case folding equivalent according to + * UnicodeData.txt and CaseFolding.txt; + * if the character has no case folding equivalent, the character + * itself is returned. + * + * This function only returns the simple, single-code point case mapping. + * Full case mappings should be used whenever possible because they produce + * better results by working on whole strings. + * They take into account the string context and the language and can map + * to a result string with a different length as appropriate. + * Full case mappings are applied by the string case mapping functions, + * see ustring.h and the UnicodeString class. + * See also the User Guide chapter on C/POSIX migration: + * http://icu-project.org/userguide/posix.html#case_mappings + * + * @param c the code point to be mapped + * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I + * @return the Simple_Case_Folding of the code point, if any; + * otherwise the code point itself. + * @stable ICU 2.0 + */ +U_STABLE UChar32 U_EXPORT2 +u_foldCase(UChar32 c, uint32_t options); + +/** + * Returns the decimal digit value of the code point in the + * specified radix. + * + * If the radix is not in the range <code>2<=radix<=36</code> or if the + * value of <code>c</code> is not a valid digit in the specified + * radix, <code>-1</code> is returned. A character is a valid digit + * if at least one of the following is true: + * <ul> + * <li>The character has a decimal digit value. + * Such characters have the general category "Nd" (decimal digit numbers) + * and a Numeric_Type of Decimal. + * In this case the value is the character's decimal digit value.</li> + * <li>The character is one of the uppercase Latin letters + * <code>'A'</code> through <code>'Z'</code>. + * In this case the value is <code>c-'A'+10</code>.</li> + * <li>The character is one of the lowercase Latin letters + * <code>'a'</code> through <code>'z'</code>. + * In this case the value is <code>ch-'a'+10</code>.</li> + * <li>Latin letters from both the ASCII range (0061..007A, 0041..005A) + * as well as from the Fullwidth ASCII range (FF41..FF5A, FF21..FF3A) + * are recognized.</li> + * </ul> + * + * Same as java.lang.Character.digit(). + * + * @param ch the code point to be tested. + * @param radix the radix. + * @return the numeric value represented by the character in the + * specified radix, + * or -1 if there is no value or if the value exceeds the radix. + * + * @see UCHAR_NUMERIC_TYPE + * @see u_forDigit + * @see u_charDigitValue + * @see u_isdigit + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_digit(UChar32 ch, int8_t radix); + +/** + * Determines the character representation for a specific digit in + * the specified radix. If the value of <code>radix</code> is not a + * valid radix, or the value of <code>digit</code> is not a valid + * digit in the specified radix, the null character + * (<code>U+0000</code>) is returned. + * <p> + * The <code>radix</code> argument is valid if it is greater than or + * equal to 2 and less than or equal to 36. + * The <code>digit</code> argument is valid if + * <code>0 <= digit < radix</code>. + * <p> + * If the digit is less than 10, then + * <code>'0' + digit</code> is returned. Otherwise, the value + * <code>'a' + digit - 10</code> is returned. + * + * Same as java.lang.Character.forDigit(). + * + * @param digit the number to convert to a character. + * @param radix the radix. + * @return the <code>char</code> representation of the specified digit + * in the specified radix. + * + * @see u_digit + * @see u_charDigitValue + * @see u_isdigit + * @stable ICU 2.0 + */ +U_STABLE UChar32 U_EXPORT2 +u_forDigit(int32_t digit, int8_t radix); + +/** + * Get the "age" of the code point. + * The "age" is the Unicode version when the code point was first + * designated (as a non-character or for Private Use) + * or assigned a character. + * This can be useful to avoid emitting code points to receiving + * processes that do not accept newer characters. + * The data is from the UCD file DerivedAge.txt. + * + * @param c The code point. + * @param versionArray The Unicode version number array, to be filled in. + * + * @stable ICU 2.1 + */ +U_STABLE void U_EXPORT2 +u_charAge(UChar32 c, UVersionInfo versionArray); + +/** + * Gets the Unicode version information. + * The version array is filled in with the version information + * for the Unicode standard that is currently used by ICU. + * For example, Unicode version 3.1.1 is represented as an array with + * the values { 3, 1, 1, 0 }. + * + * @param versionArray an output array that will be filled in with + * the Unicode version number + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +u_getUnicodeVersion(UVersionInfo versionArray); + +#if !UCONFIG_NO_NORMALIZATION +/** + * Get the FC_NFKC_Closure property string for a character. + * See Unicode Standard Annex #15 for details, search for "FC_NFKC_Closure" + * or for "FNC": http://www.unicode.org/reports/tr15/ + * + * @param c The character (code point) for which to get the FC_NFKC_Closure string. + * It must be <code>0<=c<=0x10ffff</code>. + * @param dest Destination address for copying the string. + * The string will be zero-terminated if possible. + * If there is no FC_NFKC_Closure string, + * then the buffer will be set to the empty string. + * @param destCapacity <code>==sizeof(dest)</code> + * @param pErrorCode Pointer to a UErrorCode variable. + * @return The length of the string, or 0 if there is no FC_NFKC_Closure string for this character. + * If the destCapacity is less than or equal to the length, then the buffer + * contains the truncated name and the returned length indicates the full + * length of the name. + * The length does not include the zero-termination. + * + * @stable ICU 2.2 + */ +U_STABLE int32_t U_EXPORT2 +u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode); + +#endif + + +U_CDECL_END + +#endif /*_UCHAR*/ +/*eof*/ diff --git a/Source/WTF/icu/unicode/ucnv.h b/Source/WTF/icu/unicode/ucnv.h new file mode 100644 index 000000000..c5fc2dc78 --- /dev/null +++ b/Source/WTF/icu/unicode/ucnv.h @@ -0,0 +1,2034 @@ +/* +********************************************************************** +* Copyright (C) 1999-2013, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** + * ucnv.h: + * External APIs for the ICU's codeset conversion library + * Bertrand A. Damiba + * + * Modification History: + * + * Date Name Description + * 04/04/99 helena Fixed internal header inclusion. + * 05/11/00 helena Added setFallback and usesFallback APIs. + * 06/29/2000 helena Major rewrite of the callback APIs. + * 12/07/2000 srl Update of documentation + */ + +/** + * \file + * \brief C API: Character conversion + * + * <h2>Character Conversion C API</h2> + * + * <p>This API is used to convert codepage or character encoded data to and + * from UTF-16. You can open a converter with {@link ucnv_open() }. With that + * converter, you can get its properties, set options, convert your data and + * close the converter.</p> + * + * <p>Since many software programs recogize different converter names for + * different types of converters, there are other functions in this API to + * iterate over the converter aliases. The functions {@link ucnv_getAvailableName() }, + * {@link ucnv_getAlias() } and {@link ucnv_getStandardName() } are some of the + * more frequently used alias functions to get this information.</p> + * + * <p>When a converter encounters an illegal, irregular, invalid or unmappable character + * its default behavior is to use a substitution character to replace the + * bad byte sequence. This behavior can be changed by using {@link ucnv_setFromUCallBack() } + * or {@link ucnv_setToUCallBack() } on the converter. The header ucnv_err.h defines + * many other callback actions that can be used instead of a character substitution.</p> + * + * <p>More information about this API can be found in our + * <a href="http://icu-project.org/userguide/conversion.html">User's + * Guide</a>.</p> + */ + +#ifndef UCNV_H +#define UCNV_H + +#include "unicode/ucnv_err.h" +#include "unicode/uenum.h" +#include "unicode/localpointer.h" + +#ifndef __USET_H__ + +/** + * USet is the C API type for Unicode sets. + * It is forward-declared here to avoid including the header file if related + * conversion APIs are not used. + * See unicode/uset.h + * + * @see ucnv_getUnicodeSet + * @stable ICU 2.6 + */ +struct USet; +/** @stable ICU 2.6 */ +typedef struct USet USet; + +#endif + +#if !UCONFIG_NO_CONVERSION + +U_CDECL_BEGIN + +/** Maximum length of a converter name including the terminating NULL @stable ICU 2.0 */ +#define UCNV_MAX_CONVERTER_NAME_LENGTH 60 +/** Maximum length of a converter name including path and terminating NULL @stable ICU 2.0 */ +#define UCNV_MAX_FULL_FILE_NAME_LENGTH (600+UCNV_MAX_CONVERTER_NAME_LENGTH) + +/** Shift in for EBDCDIC_STATEFUL and iso2022 states @stable ICU 2.0 */ +#define UCNV_SI 0x0F +/** Shift out for EBDCDIC_STATEFUL and iso2022 states @stable ICU 2.0 */ +#define UCNV_SO 0x0E + +/** + * Enum for specifying basic types of converters + * @see ucnv_getType + * @stable ICU 2.0 + */ +typedef enum { + /** @stable ICU 2.0 */ + UCNV_UNSUPPORTED_CONVERTER = -1, + /** @stable ICU 2.0 */ + UCNV_SBCS = 0, + /** @stable ICU 2.0 */ + UCNV_DBCS = 1, + /** @stable ICU 2.0 */ + UCNV_MBCS = 2, + /** @stable ICU 2.0 */ + UCNV_LATIN_1 = 3, + /** @stable ICU 2.0 */ + UCNV_UTF8 = 4, + /** @stable ICU 2.0 */ + UCNV_UTF16_BigEndian = 5, + /** @stable ICU 2.0 */ + UCNV_UTF16_LittleEndian = 6, + /** @stable ICU 2.0 */ + UCNV_UTF32_BigEndian = 7, + /** @stable ICU 2.0 */ + UCNV_UTF32_LittleEndian = 8, + /** @stable ICU 2.0 */ + UCNV_EBCDIC_STATEFUL = 9, + /** @stable ICU 2.0 */ + UCNV_ISO_2022 = 10, + + /** @stable ICU 2.0 */ + UCNV_LMBCS_1 = 11, + /** @stable ICU 2.0 */ + UCNV_LMBCS_2, + /** @stable ICU 2.0 */ + UCNV_LMBCS_3, + /** @stable ICU 2.0 */ + UCNV_LMBCS_4, + /** @stable ICU 2.0 */ + UCNV_LMBCS_5, + /** @stable ICU 2.0 */ + UCNV_LMBCS_6, + /** @stable ICU 2.0 */ + UCNV_LMBCS_8, + /** @stable ICU 2.0 */ + UCNV_LMBCS_11, + /** @stable ICU 2.0 */ + UCNV_LMBCS_16, + /** @stable ICU 2.0 */ + UCNV_LMBCS_17, + /** @stable ICU 2.0 */ + UCNV_LMBCS_18, + /** @stable ICU 2.0 */ + UCNV_LMBCS_19, + /** @stable ICU 2.0 */ + UCNV_LMBCS_LAST = UCNV_LMBCS_19, + /** @stable ICU 2.0 */ + UCNV_HZ, + /** @stable ICU 2.0 */ + UCNV_SCSU, + /** @stable ICU 2.0 */ + UCNV_ISCII, + /** @stable ICU 2.0 */ + UCNV_US_ASCII, + /** @stable ICU 2.0 */ + UCNV_UTF7, + /** @stable ICU 2.2 */ + UCNV_BOCU1, + /** @stable ICU 2.2 */ + UCNV_UTF16, + /** @stable ICU 2.2 */ + UCNV_UTF32, + /** @stable ICU 2.2 */ + UCNV_CESU8, + /** @stable ICU 2.4 */ + UCNV_IMAP_MAILBOX, + /** @stable ICU 4.8 */ + UCNV_COMPOUND_TEXT, + + /* Number of converter types for which we have conversion routines. */ + UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES +} UConverterType; + +/** + * Enum for specifying which platform a converter ID refers to. + * The use of platform/CCSID is not recommended. See ucnv_openCCSID(). + * + * @see ucnv_getPlatform + * @see ucnv_openCCSID + * @see ucnv_getCCSID + * @stable ICU 2.0 + */ +typedef enum { + UCNV_UNKNOWN = -1, + UCNV_IBM = 0 +} UConverterPlatform; + +/** + * Function pointer for error callback in the codepage to unicode direction. + * Called when an error has occured in conversion to unicode, or on open/close of the callback (see reason). + * @param context Pointer to the callback's private data + * @param args Information about the conversion in progress + * @param codeUnits Points to 'length' bytes of the concerned codepage sequence + * @param length Size (in bytes) of the concerned codepage sequence + * @param reason Defines the reason the callback was invoked + * @param pErrorCode ICU error code in/out parameter. + * For converter callback functions, set to a conversion error + * before the call, and the callback may reset it to U_ZERO_ERROR. + * @see ucnv_setToUCallBack + * @see UConverterToUnicodeArgs + * @stable ICU 2.0 + */ +typedef void (U_EXPORT2 *UConverterToUCallback) ( + const void* context, + UConverterToUnicodeArgs *args, + const char *codeUnits, + int32_t length, + UConverterCallbackReason reason, + UErrorCode *pErrorCode); + +/** + * Function pointer for error callback in the unicode to codepage direction. + * Called when an error has occured in conversion from unicode, or on open/close of the callback (see reason). + * @param context Pointer to the callback's private data + * @param args Information about the conversion in progress + * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence + * @param length Size (in bytes) of the concerned codepage sequence + * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint. + * @param reason Defines the reason the callback was invoked + * @param pErrorCode ICU error code in/out parameter. + * For converter callback functions, set to a conversion error + * before the call, and the callback may reset it to U_ZERO_ERROR. + * @see ucnv_setFromUCallBack + * @stable ICU 2.0 + */ +typedef void (U_EXPORT2 *UConverterFromUCallback) ( + const void* context, + UConverterFromUnicodeArgs *args, + const UChar* codeUnits, + int32_t length, + UChar32 codePoint, + UConverterCallbackReason reason, + UErrorCode *pErrorCode); + +U_CDECL_END + +/** + * Character that separates converter names from options and options from each other. + * @see ucnv_open + * @stable ICU 2.0 + */ +#define UCNV_OPTION_SEP_CHAR ',' + +/** + * String version of UCNV_OPTION_SEP_CHAR. + * @see ucnv_open + * @stable ICU 2.0 + */ +#define UCNV_OPTION_SEP_STRING "," + +/** + * Character that separates a converter option from its value. + * @see ucnv_open + * @stable ICU 2.0 + */ +#define UCNV_VALUE_SEP_CHAR '=' + +/** + * String version of UCNV_VALUE_SEP_CHAR. + * @see ucnv_open + * @stable ICU 2.0 + */ +#define UCNV_VALUE_SEP_STRING "=" + +/** + * Converter option for specifying a locale. + * For example, ucnv_open("SCSU,locale=ja", &errorCode); + * See convrtrs.txt. + * + * @see ucnv_open + * @stable ICU 2.0 + */ +#define UCNV_LOCALE_OPTION_STRING ",locale=" + +/** + * Converter option for specifying a version selector (0..9) for some converters. + * For example, + * \code + * ucnv_open("UTF-7,version=1", &errorCode); + * \endcode + * See convrtrs.txt. + * + * @see ucnv_open + * @stable ICU 2.4 + */ +#define UCNV_VERSION_OPTION_STRING ",version=" + +/** + * Converter option for EBCDIC SBCS or mixed-SBCS/DBCS (stateful) codepages. + * Swaps Unicode mappings for EBCDIC LF and NL codes, as used on + * S/390 (z/OS) Unix System Services (Open Edition). + * For example, ucnv_open("ibm-1047,swaplfnl", &errorCode); + * See convrtrs.txt. + * + * @see ucnv_open + * @stable ICU 2.4 + */ +#define UCNV_SWAP_LFNL_OPTION_STRING ",swaplfnl" + +/** + * Do a fuzzy compare of two converter/alias names. + * The comparison is case-insensitive, ignores leading zeroes if they are not + * followed by further digits, and ignores all but letters and digits. + * Thus the strings "UTF-8", "utf_8", "u*T@f08" and "Utf 8" are exactly equivalent. + * See section 1.4, Charset Alias Matching in Unicode Technical Standard #22 + * at http://www.unicode.org/reports/tr22/ + * + * @param name1 a converter name or alias, zero-terminated + * @param name2 a converter name or alias, zero-terminated + * @return 0 if the names match, or a negative value if the name1 + * lexically precedes name2, or a positive value if the name1 + * lexically follows name2. + * @stable ICU 2.0 + */ +U_STABLE int U_EXPORT2 +ucnv_compareNames(const char *name1, const char *name2); + + +/** + * Creates a UConverter object with the name of a coded character set specified as a C string. + * The actual name will be resolved with the alias file + * using a case-insensitive string comparison that ignores + * leading zeroes and all non-alphanumeric characters. + * E.g., the names "UTF8", "utf-8", "u*T@f08" and "Utf 8" are all equivalent. + * (See also ucnv_compareNames().) + * If <code>NULL</code> is passed for the converter name, it will create one with the + * getDefaultName return value. + * + * <p>A converter name for ICU 1.5 and above may contain options + * like a locale specification to control the specific behavior of + * the newly instantiated converter. + * The meaning of the options depends on the particular converter. + * If an option is not defined for or recognized by a given converter, then it is ignored.</p> + * + * <p>Options are appended to the converter name string, with a + * <code>UCNV_OPTION_SEP_CHAR</code> between the name and the first option and + * also between adjacent options.</p> + * + * <p>If the alias is ambiguous, then the preferred converter is used + * and the status is set to U_AMBIGUOUS_ALIAS_WARNING.</p> + * + * <p>The conversion behavior and names can vary between platforms. ICU may + * convert some characters differently from other platforms. Details on this topic + * are in the <a href="http://icu-project.org/userguide/conversion.html">User's + * Guide</a>. Aliases starting with a "cp" prefix have no specific meaning + * other than its an alias starting with the letters "cp". Please do not + * associate any meaning to these aliases.</p> + * + * \snippet samples/ucnv/convsamp.cpp ucnv_open + * + * @param converterName Name of the coded character set table. + * This may have options appended to the string. + * IANA alias character set names, IBM CCSIDs starting with "ibm-", + * Windows codepage numbers starting with "windows-" are frequently + * used for this parameter. See ucnv_getAvailableName and + * ucnv_getAlias for a complete list that is available. + * If this parameter is NULL, the default converter will be used. + * @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR</TT> + * @return the created Unicode converter object, or <TT>NULL</TT> if an error occured + * @see ucnv_openU + * @see ucnv_openCCSID + * @see ucnv_getAvailableName + * @see ucnv_getAlias + * @see ucnv_getDefaultName + * @see ucnv_close + * @see ucnv_compareNames + * @stable ICU 2.0 + */ +U_STABLE UConverter* U_EXPORT2 +ucnv_open(const char *converterName, UErrorCode *err); + + +/** + * Creates a Unicode converter with the names specified as unicode string. + * The name should be limited to the ASCII-7 alphanumerics range. + * The actual name will be resolved with the alias file + * using a case-insensitive string comparison that ignores + * leading zeroes and all non-alphanumeric characters. + * E.g., the names "UTF8", "utf-8", "u*T@f08" and "Utf 8" are all equivalent. + * (See also ucnv_compareNames().) + * If <TT>NULL</TT> is passed for the converter name, it will create + * one with the ucnv_getDefaultName() return value. + * If the alias is ambiguous, then the preferred converter is used + * and the status is set to U_AMBIGUOUS_ALIAS_WARNING. + * + * <p>See ucnv_open for the complete details</p> + * @param name Name of the UConverter table in a zero terminated + * Unicode string + * @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR, + * U_FILE_ACCESS_ERROR</TT> + * @return the created Unicode converter object, or <TT>NULL</TT> if an + * error occured + * @see ucnv_open + * @see ucnv_openCCSID + * @see ucnv_close + * @see ucnv_compareNames + * @stable ICU 2.0 + */ +U_STABLE UConverter* U_EXPORT2 +ucnv_openU(const UChar *name, + UErrorCode *err); + +/** + * Creates a UConverter object from a CCSID number and platform pair. + * Note that the usefulness of this function is limited to platforms with numeric + * encoding IDs. Only IBM and Microsoft platforms use numeric (16-bit) identifiers for + * encodings. + * + * In addition, IBM CCSIDs and Unicode conversion tables are not 1:1 related. + * For many IBM CCSIDs there are multiple (up to six) Unicode conversion tables, and + * for some Unicode conversion tables there are multiple CCSIDs. + * Some "alternate" Unicode conversion tables are provided by the + * IBM CDRA conversion table registry. + * The most prominent example of a systematic modification of conversion tables that is + * not provided in the form of conversion table files in the repository is + * that S/390 Unix System Services swaps the codes for Line Feed and New Line in all + * EBCDIC codepages, which requires such a swap in the Unicode conversion tables as well. + * + * Only IBM default conversion tables are accessible with ucnv_openCCSID(). + * ucnv_getCCSID() will return the same CCSID for all conversion tables that are associated + * with that CCSID. + * + * Currently, the only "platform" supported in the ICU converter API is UCNV_IBM. + * + * In summary, the use of CCSIDs and the associated API functions is not recommended. + * + * In order to open a converter with the default IBM CDRA Unicode conversion table, + * you can use this function or use the prefix "ibm-": + * \code + * char name[20]; + * sprintf(name, "ibm-%hu", ccsid); + * cnv=ucnv_open(name, &errorCode); + * \endcode + * + * In order to open a converter with the IBM S/390 Unix System Services variant + * of a Unicode/EBCDIC conversion table, + * you can use the prefix "ibm-" together with the option string UCNV_SWAP_LFNL_OPTION_STRING: + * \code + * char name[20]; + * sprintf(name, "ibm-%hu" UCNV_SWAP_LFNL_OPTION_STRING, ccsid); + * cnv=ucnv_open(name, &errorCode); + * \endcode + * + * In order to open a converter from a Microsoft codepage number, use the prefix "cp": + * \code + * char name[20]; + * sprintf(name, "cp%hu", codepageID); + * cnv=ucnv_open(name, &errorCode); + * \endcode + * + * If the alias is ambiguous, then the preferred converter is used + * and the status is set to U_AMBIGUOUS_ALIAS_WARNING. + * + * @param codepage codepage number to create + * @param platform the platform in which the codepage number exists + * @param err error status <TT>U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR</TT> + * @return the created Unicode converter object, or <TT>NULL</TT> if an error + * occured. + * @see ucnv_open + * @see ucnv_openU + * @see ucnv_close + * @see ucnv_getCCSID + * @see ucnv_getPlatform + * @see UConverterPlatform + * @stable ICU 2.0 + */ +U_STABLE UConverter* U_EXPORT2 +ucnv_openCCSID(int32_t codepage, + UConverterPlatform platform, + UErrorCode * err); + +/** + * <p>Creates a UConverter object specified from a packageName and a converterName.</p> + * + * <p>The packageName and converterName must point to an ICU udata object, as defined by + * <code> udata_open( packageName, "cnv", converterName, err) </code> or equivalent. + * Typically, packageName will refer to a (.dat) file, or to a package registered with + * udata_setAppData(). Using a full file or directory pathname for packageName is deprecated.</p> + * + * <p>The name will NOT be looked up in the alias mechanism, nor will the converter be + * stored in the converter cache or the alias table. The only way to open further converters + * is call this function multiple times, or use the ucnv_safeClone() function to clone a + * 'master' converter.</p> + * + * <p>A future version of ICU may add alias table lookups and/or caching + * to this function.</p> + * + * <p>Example Use: + * <code>cnv = ucnv_openPackage("myapp", "myconverter", &err);</code> + * </p> + * + * @param packageName name of the package (equivalent to 'path' in udata_open() call) + * @param converterName name of the data item to be used, without suffix. + * @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR</TT> + * @return the created Unicode converter object, or <TT>NULL</TT> if an error occured + * @see udata_open + * @see ucnv_open + * @see ucnv_safeClone + * @see ucnv_close + * @stable ICU 2.2 + */ +U_STABLE UConverter* U_EXPORT2 +ucnv_openPackage(const char *packageName, const char *converterName, UErrorCode *err); + +/** + * Thread safe converter cloning operation. + * For most efficient operation, pass in a stackBuffer (and a *pBufferSize) + * with at least U_CNV_SAFECLONE_BUFFERSIZE bytes of space. + * If the buffer size is sufficient, then the clone will use the stack buffer; + * otherwise, it will be allocated, and *pBufferSize will indicate + * the actual size. (This should not occur with U_CNV_SAFECLONE_BUFFERSIZE.) + * + * You must ucnv_close() the clone in any case. + * + * If *pBufferSize==0, (regardless of whether stackBuffer==NULL or not) + * then *pBufferSize will be changed to a sufficient size + * for cloning this converter, + * without actually cloning the converter ("pure pre-flighting"). + * + * If *pBufferSize is greater than zero but not large enough for a stack-based + * clone, then the converter is cloned using newly allocated memory + * and *pBufferSize is changed to the necessary size. + * + * If the converter clone fits into the stack buffer but the stack buffer is not + * sufficiently aligned for the clone, then the clone will use an + * adjusted pointer and use an accordingly smaller buffer size. + * + * @param cnv converter to be cloned + * @param stackBuffer <em>Deprecated functionality as of ICU 52, use NULL.</em><br> + * user allocated space for the new clone. If NULL new memory will be allocated. + * If buffer is not large enough, new memory will be allocated. + * Clients can use the U_CNV_SAFECLONE_BUFFERSIZE. This will probably be enough to avoid memory allocations. + * @param pBufferSize <em>Deprecated functionality as of ICU 52, use NULL or 1.</em><br> + * pointer to size of allocated space. + * @param status to indicate whether the operation went on smoothly or there were errors + * An informational status value, U_SAFECLONE_ALLOCATED_WARNING, + * is used if any allocations were necessary. + * However, it is better to check if *pBufferSize grew for checking for + * allocations because warning codes can be overridden by subsequent + * function calls. + * @return pointer to the new clone + * @stable ICU 2.0 + */ +U_STABLE UConverter * U_EXPORT2 +ucnv_safeClone(const UConverter *cnv, + void *stackBuffer, + int32_t *pBufferSize, + UErrorCode *status); + +#ifndef U_HIDE_DEPRECATED_API + +/** + * \def U_CNV_SAFECLONE_BUFFERSIZE + * Definition of a buffer size that is designed to be large enough for + * converters to be cloned with ucnv_safeClone(). + * @deprecated ICU 52. Do not rely on ucnv_safeClone() cloning into any provided buffer. + */ +#define U_CNV_SAFECLONE_BUFFERSIZE 1024 + +#endif /* U_HIDE_DEPRECATED_API */ + +/** + * Deletes the unicode converter and releases resources associated + * with just this instance. + * Does not free up shared converter tables. + * + * @param converter the converter object to be deleted + * @see ucnv_open + * @see ucnv_openU + * @see ucnv_openCCSID + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucnv_close(UConverter * converter); + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalUConverterPointer + * "Smart pointer" class, closes a UConverter via ucnv_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @stable ICU 4.4 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalUConverterPointer, UConverter, ucnv_close); + +U_NAMESPACE_END + +#endif + +/** + * Fills in the output parameter, subChars, with the substitution characters + * as multiple bytes. + * If ucnv_setSubstString() set a Unicode string because the converter is + * stateful, then subChars will be an empty string. + * + * @param converter the Unicode converter + * @param subChars the subsitution characters + * @param len on input the capacity of subChars, on output the number + * of bytes copied to it + * @param err the outgoing error status code. + * If the substitution character array is too small, an + * <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned. + * @see ucnv_setSubstString + * @see ucnv_setSubstChars + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucnv_getSubstChars(const UConverter *converter, + char *subChars, + int8_t *len, + UErrorCode *err); + +/** + * Sets the substitution chars when converting from unicode to a codepage. The + * substitution is specified as a string of 1-4 bytes, and may contain + * <TT>NULL</TT> bytes. + * The subChars must represent a single character. The caller needs to know the + * byte sequence of a valid character in the converter's charset. + * For some converters, for example some ISO 2022 variants, only single-byte + * substitution characters may be supported. + * The newer ucnv_setSubstString() function relaxes these limitations. + * + * @param converter the Unicode converter + * @param subChars the substitution character byte sequence we want set + * @param len the number of bytes in subChars + * @param err the error status code. <TT>U_INDEX_OUTOFBOUNDS_ERROR </TT> if + * len is bigger than the maximum number of bytes allowed in subchars + * @see ucnv_setSubstString + * @see ucnv_getSubstChars + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucnv_setSubstChars(UConverter *converter, + const char *subChars, + int8_t len, + UErrorCode *err); + +/** + * Set a substitution string for converting from Unicode to a charset. + * The caller need not know the charset byte sequence for each charset. + * + * Unlike ucnv_setSubstChars() which is designed to set a charset byte sequence + * for a single character, this function takes a Unicode string with + * zero, one or more characters, and immediately verifies that the string can be + * converted to the charset. + * If not, or if the result is too long (more than 32 bytes as of ICU 3.6), + * then the function returns with an error accordingly. + * + * Also unlike ucnv_setSubstChars(), this function works for stateful charsets + * by converting on the fly at the point of substitution rather than setting + * a fixed byte sequence. + * + * @param cnv The UConverter object. + * @param s The Unicode string. + * @param length The number of UChars in s, or -1 for a NUL-terminated string. + * @param err Pointer to a standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * + * @see ucnv_setSubstChars + * @see ucnv_getSubstChars + * @stable ICU 3.6 + */ +U_STABLE void U_EXPORT2 +ucnv_setSubstString(UConverter *cnv, + const UChar *s, + int32_t length, + UErrorCode *err); + +/** + * Fills in the output parameter, errBytes, with the error characters from the + * last failing conversion. + * + * @param converter the Unicode converter + * @param errBytes the codepage bytes which were in error + * @param len on input the capacity of errBytes, on output the number of + * bytes which were copied to it + * @param err the error status code. + * If the substitution character array is too small, an + * <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned. + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucnv_getInvalidChars(const UConverter *converter, + char *errBytes, + int8_t *len, + UErrorCode *err); + +/** + * Fills in the output parameter, errChars, with the error characters from the + * last failing conversion. + * + * @param converter the Unicode converter + * @param errUChars the UChars which were in error + * @param len on input the capacity of errUChars, on output the number of + * UChars which were copied to it + * @param err the error status code. + * If the substitution character array is too small, an + * <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned. + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucnv_getInvalidUChars(const UConverter *converter, + UChar *errUChars, + int8_t *len, + UErrorCode *err); + +/** + * Resets the state of a converter to the default state. This is used + * in the case of an error, to restart a conversion from a known default state. + * It will also empty the internal output buffers. + * @param converter the Unicode converter + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucnv_reset(UConverter *converter); + +/** + * Resets the to-Unicode part of a converter state to the default state. + * This is used in the case of an error to restart a conversion to + * Unicode to a known default state. It will also empty the internal + * output buffers used for the conversion to Unicode codepoints. + * @param converter the Unicode converter + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucnv_resetToUnicode(UConverter *converter); + +/** + * Resets the from-Unicode part of a converter state to the default state. + * This is used in the case of an error to restart a conversion from + * Unicode to a known default state. It will also empty the internal output + * buffers used for the conversion from Unicode codepoints. + * @param converter the Unicode converter + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucnv_resetFromUnicode(UConverter *converter); + +/** + * Returns the maximum number of bytes that are output per UChar in conversion + * from Unicode using this converter. + * The returned number can be used with UCNV_GET_MAX_BYTES_FOR_STRING + * to calculate the size of a target buffer for conversion from Unicode. + * + * Note: Before ICU 2.8, this function did not return reliable numbers for + * some stateful converters (EBCDIC_STATEFUL, ISO-2022) and LMBCS. + * + * This number may not be the same as the maximum number of bytes per + * "conversion unit". In other words, it may not be the intuitively expected + * number of bytes per character that would be published for a charset, + * and may not fulfill any other purpose than the allocation of an output + * buffer of guaranteed sufficient size for a given input length and converter. + * + * Examples for special cases that are taken into account: + * - Supplementary code points may convert to more bytes than BMP code points. + * This function returns bytes per UChar (UTF-16 code unit), not per + * Unicode code point, for efficient buffer allocation. + * - State-shifting output (SI/SO, escapes, etc.) from stateful converters. + * - When m input UChars are converted to n output bytes, then the maximum m/n + * is taken into account. + * + * The number returned here does not take into account + * (see UCNV_GET_MAX_BYTES_FOR_STRING): + * - callbacks which output more than one charset character sequence per call, + * like escape callbacks + * - initial and final non-character bytes that are output by some converters + * (automatic BOMs, initial escape sequence, final SI, etc.) + * + * Examples for returned values: + * - SBCS charsets: 1 + * - Shift-JIS: 2 + * - UTF-16: 2 (2 per BMP, 4 per surrogate _pair_, BOM not counted) + * - UTF-8: 3 (3 per BMP, 4 per surrogate _pair_) + * - EBCDIC_STATEFUL (EBCDIC mixed SBCS/DBCS): 3 (SO + DBCS) + * - ISO-2022: 3 (always outputs UTF-8) + * - ISO-2022-JP: 6 (4-byte escape sequences + DBCS) + * - ISO-2022-CN: 8 (4-byte designator sequences + 2-byte SS2/SS3 + DBCS) + * + * @param converter The Unicode converter. + * @return The maximum number of bytes per UChar that are output by ucnv_fromUnicode(), + * to be used together with UCNV_GET_MAX_BYTES_FOR_STRING for buffer allocation. + * + * @see UCNV_GET_MAX_BYTES_FOR_STRING + * @see ucnv_getMinCharSize + * @stable ICU 2.0 + */ +U_STABLE int8_t U_EXPORT2 +ucnv_getMaxCharSize(const UConverter *converter); + +/** + * Calculates the size of a buffer for conversion from Unicode to a charset. + * The calculated size is guaranteed to be sufficient for this conversion. + * + * It takes into account initial and final non-character bytes that are output + * by some converters. + * It does not take into account callbacks which output more than one charset + * character sequence per call, like escape callbacks. + * The default (substitution) callback only outputs one charset character sequence. + * + * @param length Number of UChars to be converted. + * @param maxCharSize Return value from ucnv_getMaxCharSize() for the converter + * that will be used. + * @return Size of a buffer that will be large enough to hold the output bytes of + * converting length UChars with the converter that returned the maxCharSize. + * + * @see ucnv_getMaxCharSize + * @stable ICU 2.8 + */ +#define UCNV_GET_MAX_BYTES_FOR_STRING(length, maxCharSize) \ + (((int32_t)(length)+10)*(int32_t)(maxCharSize)) + +/** + * Returns the minimum byte length for characters in this codepage. + * This is usually either 1 or 2. + * @param converter the Unicode converter + * @return the minimum number of bytes allowed by this particular converter + * @see ucnv_getMaxCharSize + * @stable ICU 2.0 + */ +U_STABLE int8_t U_EXPORT2 +ucnv_getMinCharSize(const UConverter *converter); + +/** + * Returns the display name of the converter passed in based on the Locale + * passed in. If the locale contains no display name, the internal ASCII + * name will be filled in. + * + * @param converter the Unicode converter. + * @param displayLocale is the specific Locale we want to localised for + * @param displayName user provided buffer to be filled in + * @param displayNameCapacity size of displayName Buffer + * @param err error status code + * @return displayNameLength number of UChar needed in displayName + * @see ucnv_getName + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +ucnv_getDisplayName(const UConverter *converter, + const char *displayLocale, + UChar *displayName, + int32_t displayNameCapacity, + UErrorCode *err); + +/** + * Gets the internal, canonical name of the converter (zero-terminated). + * The lifetime of the returned string will be that of the converter + * passed to this function. + * @param converter the Unicode converter + * @param err UErrorCode status + * @return the internal name of the converter + * @see ucnv_getDisplayName + * @stable ICU 2.0 + */ +U_STABLE const char * U_EXPORT2 +ucnv_getName(const UConverter *converter, UErrorCode *err); + +/** + * Gets a codepage number associated with the converter. This is not guaranteed + * to be the one used to create the converter. Some converters do not represent + * platform registered codepages and return zero for the codepage number. + * The error code fill-in parameter indicates if the codepage number + * is available. + * Does not check if the converter is <TT>NULL</TT> or if converter's data + * table is <TT>NULL</TT>. + * + * Important: The use of CCSIDs is not recommended because it is limited + * to only two platforms in principle and only one (UCNV_IBM) in the current + * ICU converter API. + * Also, CCSIDs are insufficient to identify IBM Unicode conversion tables precisely. + * For more details see ucnv_openCCSID(). + * + * @param converter the Unicode converter + * @param err the error status code. + * @return If any error occurrs, -1 will be returned otherwise, the codepage number + * will be returned + * @see ucnv_openCCSID + * @see ucnv_getPlatform + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +ucnv_getCCSID(const UConverter *converter, + UErrorCode *err); + +/** + * Gets a codepage platform associated with the converter. Currently, + * only <TT>UCNV_IBM</TT> will be returned. + * Does not test if the converter is <TT>NULL</TT> or if converter's data + * table is <TT>NULL</TT>. + * @param converter the Unicode converter + * @param err the error status code. + * @return The codepage platform + * @stable ICU 2.0 + */ +U_STABLE UConverterPlatform U_EXPORT2 +ucnv_getPlatform(const UConverter *converter, + UErrorCode *err); + +/** + * Gets the type of the converter + * e.g. SBCS, MBCS, DBCS, UTF8, UTF16_BE, UTF16_LE, ISO_2022, + * EBCDIC_STATEFUL, LATIN_1 + * @param converter a valid, opened converter + * @return the type of the converter + * @stable ICU 2.0 + */ +U_STABLE UConverterType U_EXPORT2 +ucnv_getType(const UConverter * converter); + +/** + * Gets the "starter" (lead) bytes for converters of type MBCS. + * Will fill in an <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> if converter passed in + * is not MBCS. Fills in an array of type UBool, with the value of the byte + * as offset to the array. For example, if (starters[0x20] == TRUE) at return, + * it means that the byte 0x20 is a starter byte in this converter. + * Context pointers are always owned by the caller. + * + * @param converter a valid, opened converter of type MBCS + * @param starters an array of size 256 to be filled in + * @param err error status, <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> if the + * converter is not a type which can return starters. + * @see ucnv_getType + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucnv_getStarters(const UConverter* converter, + UBool starters[256], + UErrorCode* err); + + +/** + * Selectors for Unicode sets that can be returned by ucnv_getUnicodeSet(). + * @see ucnv_getUnicodeSet + * @stable ICU 2.6 + */ +typedef enum UConverterUnicodeSet { + /** Select the set of roundtrippable Unicode code points. @stable ICU 2.6 */ + UCNV_ROUNDTRIP_SET, + /** Select the set of Unicode code points with roundtrip or fallback mappings. @stable ICU 4.0 */ + UCNV_ROUNDTRIP_AND_FALLBACK_SET, + /** Number of UConverterUnicodeSet selectors. @stable ICU 2.6 */ + UCNV_SET_COUNT +} UConverterUnicodeSet; + + +/** + * Returns the set of Unicode code points that can be converted by an ICU converter. + * + * Returns one of several kinds of set: + * + * 1. UCNV_ROUNDTRIP_SET + * + * The set of all Unicode code points that can be roundtrip-converted + * (converted without any data loss) with the converter (ucnv_fromUnicode()). + * This set will not include code points that have fallback mappings + * or are only the result of reverse fallback mappings. + * This set will also not include PUA code points with fallbacks, although + * ucnv_fromUnicode() will always uses those mappings despite ucnv_setFallback(). + * See UTR #22 "Character Mapping Markup Language" + * at http://www.unicode.org/reports/tr22/ + * + * This is useful for example for + * - checking that a string or document can be roundtrip-converted with a converter, + * without/before actually performing the conversion + * - testing if a converter can be used for text for typical text for a certain locale, + * by comparing its roundtrip set with the set of ExemplarCharacters from + * ICU's locale data or other sources + * + * 2. UCNV_ROUNDTRIP_AND_FALLBACK_SET + * + * The set of all Unicode code points that can be converted with the converter (ucnv_fromUnicode()) + * when fallbacks are turned on (see ucnv_setFallback()). + * This set includes all code points with roundtrips and fallbacks (but not reverse fallbacks). + * + * In the future, there may be more UConverterUnicodeSet choices to select + * sets with different properties. + * + * @param cnv The converter for which a set is requested. + * @param setFillIn A valid USet *. It will be cleared by this function before + * the converter's specific set is filled into the USet. + * @param whichSet A UConverterUnicodeSet selector; + * currently UCNV_ROUNDTRIP_SET is the only supported value. + * @param pErrorCode ICU error code in/out parameter. + * Must fulfill U_SUCCESS before the function call. + * + * @see UConverterUnicodeSet + * @see uset_open + * @see uset_close + * @stable ICU 2.6 + */ +U_STABLE void U_EXPORT2 +ucnv_getUnicodeSet(const UConverter *cnv, + USet *setFillIn, + UConverterUnicodeSet whichSet, + UErrorCode *pErrorCode); + +/** + * Gets the current calback function used by the converter when an illegal + * or invalid codepage sequence is found. + * Context pointers are always owned by the caller. + * + * @param converter the unicode converter + * @param action fillin: returns the callback function pointer + * @param context fillin: returns the callback's private void* context + * @see ucnv_setToUCallBack + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucnv_getToUCallBack (const UConverter * converter, + UConverterToUCallback *action, + const void **context); + +/** + * Gets the current callback function used by the converter when illegal + * or invalid Unicode sequence is found. + * Context pointers are always owned by the caller. + * + * @param converter the unicode converter + * @param action fillin: returns the callback function pointer + * @param context fillin: returns the callback's private void* context + * @see ucnv_setFromUCallBack + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucnv_getFromUCallBack (const UConverter * converter, + UConverterFromUCallback *action, + const void **context); + +/** + * Changes the callback function used by the converter when + * an illegal or invalid sequence is found. + * Context pointers are always owned by the caller. + * Predefined actions and contexts can be found in the ucnv_err.h header. + * + * @param converter the unicode converter + * @param newAction the new callback function + * @param newContext the new toUnicode callback context pointer. This can be NULL. + * @param oldAction fillin: returns the old callback function pointer. This can be NULL. + * @param oldContext fillin: returns the old callback's private void* context. This can be NULL. + * @param err The error code status + * @see ucnv_getToUCallBack + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucnv_setToUCallBack (UConverter * converter, + UConverterToUCallback newAction, + const void* newContext, + UConverterToUCallback *oldAction, + const void** oldContext, + UErrorCode * err); + +/** + * Changes the current callback function used by the converter when + * an illegal or invalid sequence is found. + * Context pointers are always owned by the caller. + * Predefined actions and contexts can be found in the ucnv_err.h header. + * + * @param converter the unicode converter + * @param newAction the new callback function + * @param newContext the new fromUnicode callback context pointer. This can be NULL. + * @param oldAction fillin: returns the old callback function pointer. This can be NULL. + * @param oldContext fillin: returns the old callback's private void* context. This can be NULL. + * @param err The error code status + * @see ucnv_getFromUCallBack + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucnv_setFromUCallBack (UConverter * converter, + UConverterFromUCallback newAction, + const void *newContext, + UConverterFromUCallback *oldAction, + const void **oldContext, + UErrorCode * err); + +/** + * Converts an array of unicode characters to an array of codepage + * characters. This function is optimized for converting a continuous + * stream of data in buffer-sized chunks, where the entire source and + * target does not fit in available buffers. + * + * The source pointer is an in/out parameter. It starts out pointing where the + * conversion is to begin, and ends up pointing after the last UChar consumed. + * + * Target similarly starts out pointer at the first available byte in the output + * buffer, and ends up pointing after the last byte written to the output. + * + * The converter always attempts to consume the entire source buffer, unless + * (1.) the target buffer is full, or (2.) a failing error is returned from the + * current callback function. When a successful error status has been + * returned, it means that all of the source buffer has been + * consumed. At that point, the caller should reset the source and + * sourceLimit pointers to point to the next chunk. + * + * At the end of the stream (flush==TRUE), the input is completely consumed + * when *source==sourceLimit and no error code is set. + * The converter object is then automatically reset by this function. + * (This means that a converter need not be reset explicitly between data + * streams if it finishes the previous stream without errors.) + * + * This is a <I>stateful</I> conversion. Additionally, even when all source data has + * been consumed, some data may be in the converters' internal state. + * Call this function repeatedly, updating the target pointers with + * the next empty chunk of target in case of a + * <TT>U_BUFFER_OVERFLOW_ERROR</TT>, and updating the source pointers + * with the next chunk of source when a successful error status is + * returned, until there are no more chunks of source data. + * @param converter the Unicode converter + * @param target I/O parameter. Input : Points to the beginning of the buffer to copy + * codepage characters to. Output : points to after the last codepage character copied + * to <TT>target</TT>. + * @param targetLimit the pointer just after last of the <TT>target</TT> buffer + * @param source I/O parameter, pointer to pointer to the source Unicode character buffer. + * @param sourceLimit the pointer just after the last of the source buffer + * @param offsets if NULL is passed, nothing will happen to it, otherwise it needs to have the same number + * of allocated cells as <TT>target</TT>. Will fill in offsets from target to source pointer + * e.g: <TT>offsets[3]</TT> is equal to 6, it means that the <TT>target[3]</TT> was a result of transcoding <TT>source[6]</TT> + * For output data carried across calls, and other data without a specific source character + * (such as from escape sequences or callbacks) -1 will be placed for offsets. + * @param flush set to <TT>TRUE</TT> if the current source buffer is the last available + * chunk of the source, <TT>FALSE</TT> otherwise. Note that if a failing status is returned, + * this function may have to be called multiple times with flush set to <TT>TRUE</TT> until + * the source buffer is consumed. + * @param err the error status. <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> will be set if the + * converter is <TT>NULL</TT>. + * <code>U_BUFFER_OVERFLOW_ERROR</code> will be set if the target is full and there is + * still data to be written to the target. + * @see ucnv_fromUChars + * @see ucnv_convert + * @see ucnv_getMinCharSize + * @see ucnv_setToUCallBack + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucnv_fromUnicode (UConverter * converter, + char **target, + const char *targetLimit, + const UChar ** source, + const UChar * sourceLimit, + int32_t* offsets, + UBool flush, + UErrorCode * err); + +/** + * Converts a buffer of codepage bytes into an array of unicode UChars + * characters. This function is optimized for converting a continuous + * stream of data in buffer-sized chunks, where the entire source and + * target does not fit in available buffers. + * + * The source pointer is an in/out parameter. It starts out pointing where the + * conversion is to begin, and ends up pointing after the last byte of source consumed. + * + * Target similarly starts out pointer at the first available UChar in the output + * buffer, and ends up pointing after the last UChar written to the output. + * It does NOT necessarily keep UChar sequences together. + * + * The converter always attempts to consume the entire source buffer, unless + * (1.) the target buffer is full, or (2.) a failing error is returned from the + * current callback function. When a successful error status has been + * returned, it means that all of the source buffer has been + * consumed. At that point, the caller should reset the source and + * sourceLimit pointers to point to the next chunk. + * + * At the end of the stream (flush==TRUE), the input is completely consumed + * when *source==sourceLimit and no error code is set + * The converter object is then automatically reset by this function. + * (This means that a converter need not be reset explicitly between data + * streams if it finishes the previous stream without errors.) + * + * This is a <I>stateful</I> conversion. Additionally, even when all source data has + * been consumed, some data may be in the converters' internal state. + * Call this function repeatedly, updating the target pointers with + * the next empty chunk of target in case of a + * <TT>U_BUFFER_OVERFLOW_ERROR</TT>, and updating the source pointers + * with the next chunk of source when a successful error status is + * returned, until there are no more chunks of source data. + * @param converter the Unicode converter + * @param target I/O parameter. Input : Points to the beginning of the buffer to copy + * UChars into. Output : points to after the last UChar copied. + * @param targetLimit the pointer just after the end of the <TT>target</TT> buffer + * @param source I/O parameter, pointer to pointer to the source codepage buffer. + * @param sourceLimit the pointer to the byte after the end of the source buffer + * @param offsets if NULL is passed, nothing will happen to it, otherwise it needs to have the same number + * of allocated cells as <TT>target</TT>. Will fill in offsets from target to source pointer + * e.g: <TT>offsets[3]</TT> is equal to 6, it means that the <TT>target[3]</TT> was a result of transcoding <TT>source[6]</TT> + * For output data carried across calls, and other data without a specific source character + * (such as from escape sequences or callbacks) -1 will be placed for offsets. + * @param flush set to <TT>TRUE</TT> if the current source buffer is the last available + * chunk of the source, <TT>FALSE</TT> otherwise. Note that if a failing status is returned, + * this function may have to be called multiple times with flush set to <TT>TRUE</TT> until + * the source buffer is consumed. + * @param err the error status. <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> will be set if the + * converter is <TT>NULL</TT>. + * <code>U_BUFFER_OVERFLOW_ERROR</code> will be set if the target is full and there is + * still data to be written to the target. + * @see ucnv_fromUChars + * @see ucnv_convert + * @see ucnv_getMinCharSize + * @see ucnv_setFromUCallBack + * @see ucnv_getNextUChar + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucnv_toUnicode(UConverter *converter, + UChar **target, + const UChar *targetLimit, + const char **source, + const char *sourceLimit, + int32_t *offsets, + UBool flush, + UErrorCode *err); + +/** + * Convert the Unicode string into a codepage string using an existing UConverter. + * The output string is NUL-terminated if possible. + * + * This function is a more convenient but less powerful version of ucnv_fromUnicode(). + * It is only useful for whole strings, not for streaming conversion. + * + * The maximum output buffer capacity required (barring output from callbacks) will be + * UCNV_GET_MAX_BYTES_FOR_STRING(srcLength, ucnv_getMaxCharSize(cnv)). + * + * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called) + * @param src the input Unicode string + * @param srcLength the input string length, or -1 if NUL-terminated + * @param dest destination string buffer, can be NULL if destCapacity==0 + * @param destCapacity the number of chars available at dest + * @param pErrorCode normal ICU error code; + * common error codes that may be set by this function include + * U_BUFFER_OVERFLOW_ERROR, U_STRING_NOT_TERMINATED_WARNING, + * U_ILLEGAL_ARGUMENT_ERROR, and conversion errors + * @return the length of the output string, not counting the terminating NUL; + * if the length is greater than destCapacity, then the string will not fit + * and a buffer of the indicated length would need to be passed in + * @see ucnv_fromUnicode + * @see ucnv_convert + * @see UCNV_GET_MAX_BYTES_FOR_STRING + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +ucnv_fromUChars(UConverter *cnv, + char *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + UErrorCode *pErrorCode); + +/** + * Convert the codepage string into a Unicode string using an existing UConverter. + * The output string is NUL-terminated if possible. + * + * This function is a more convenient but less powerful version of ucnv_toUnicode(). + * It is only useful for whole strings, not for streaming conversion. + * + * The maximum output buffer capacity required (barring output from callbacks) will be + * 2*srcLength (each char may be converted into a surrogate pair). + * + * @param cnv the converter object to be used (ucnv_resetToUnicode() will be called) + * @param src the input codepage string + * @param srcLength the input string length, or -1 if NUL-terminated + * @param dest destination string buffer, can be NULL if destCapacity==0 + * @param destCapacity the number of UChars available at dest + * @param pErrorCode normal ICU error code; + * common error codes that may be set by this function include + * U_BUFFER_OVERFLOW_ERROR, U_STRING_NOT_TERMINATED_WARNING, + * U_ILLEGAL_ARGUMENT_ERROR, and conversion errors + * @return the length of the output string, not counting the terminating NUL; + * if the length is greater than destCapacity, then the string will not fit + * and a buffer of the indicated length would need to be passed in + * @see ucnv_toUnicode + * @see ucnv_convert + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +ucnv_toUChars(UConverter *cnv, + UChar *dest, int32_t destCapacity, + const char *src, int32_t srcLength, + UErrorCode *pErrorCode); + +/** + * Convert a codepage buffer into Unicode one character at a time. + * The input is completely consumed when the U_INDEX_OUTOFBOUNDS_ERROR is set. + * + * Advantage compared to ucnv_toUnicode() or ucnv_toUChars(): + * - Faster for small amounts of data, for most converters, e.g., + * US-ASCII, ISO-8859-1, UTF-8/16/32, and most "normal" charsets. + * (For complex converters, e.g., SCSU, UTF-7 and ISO 2022 variants, + * it uses ucnv_toUnicode() internally.) + * - Convenient. + * + * Limitations compared to ucnv_toUnicode(): + * - Always assumes flush=TRUE. + * This makes ucnv_getNextUChar() unsuitable for "streaming" conversion, + * that is, for where the input is supplied in multiple buffers, + * because ucnv_getNextUChar() will assume the end of the input at the end + * of the first buffer. + * - Does not provide offset output. + * + * It is possible to "mix" ucnv_getNextUChar() and ucnv_toUnicode() because + * ucnv_getNextUChar() uses the current state of the converter + * (unlike ucnv_toUChars() which always resets first). + * However, if ucnv_getNextUChar() is called after ucnv_toUnicode() + * stopped in the middle of a character sequence (with flush=FALSE), + * then ucnv_getNextUChar() will always use the slower ucnv_toUnicode() + * internally until the next character boundary. + * (This is new in ICU 2.6. In earlier releases, ucnv_getNextUChar() had to + * start at a character boundary.) + * + * Instead of using ucnv_getNextUChar(), it is recommended + * to convert using ucnv_toUnicode() or ucnv_toUChars() + * and then iterate over the text using U16_NEXT() or a UCharIterator (uiter.h) + * or a C++ CharacterIterator or similar. + * This allows streaming conversion and offset output, for example. + * + * <p>Handling of surrogate pairs and supplementary-plane code points:<br> + * There are two different kinds of codepages that provide mappings for surrogate characters: + * <ul> + * <li>Codepages like UTF-8, UTF-32, and GB 18030 provide direct representations for Unicode + * code points U+10000-U+10ffff as well as for single surrogates U+d800-U+dfff. + * Each valid sequence will result in exactly one returned code point. + * If a sequence results in a single surrogate, then that will be returned + * by itself, even if a neighboring sequence encodes the matching surrogate.</li> + * <li>Codepages like SCSU and LMBCS (and UTF-16) provide direct representations only for BMP code points + * including surrogates. Code points in supplementary planes are represented with + * two sequences, each encoding a surrogate. + * For these codepages, matching pairs of surrogates will be combined into single + * code points for returning from this function. + * (Note that SCSU is actually a mix of these codepage types.)</li> + * </ul></p> + * + * @param converter an open UConverter + * @param source the address of a pointer to the codepage buffer, will be + * updated to point after the bytes consumed in the conversion call. + * @param sourceLimit points to the end of the input buffer + * @param err fills in error status (see ucnv_toUnicode) + * <code>U_INDEX_OUTOFBOUNDS_ERROR</code> will be set if the input + * is empty or does not convert to any output (e.g.: pure state-change + * codes SI/SO, escape sequences for ISO 2022, + * or if the callback did not output anything, ...). + * This function will not set a <code>U_BUFFER_OVERFLOW_ERROR</code> because + * the "buffer" is the return code. However, there might be subsequent output + * stored in the converter object + * that will be returned in following calls to this function. + * @return a UChar32 resulting from the partial conversion of source + * @see ucnv_toUnicode + * @see ucnv_toUChars + * @see ucnv_convert + * @stable ICU 2.0 + */ +U_STABLE UChar32 U_EXPORT2 +ucnv_getNextUChar(UConverter * converter, + const char **source, + const char * sourceLimit, + UErrorCode * err); + +/** + * Convert from one external charset to another using two existing UConverters. + * Internally, two conversions - ucnv_toUnicode() and ucnv_fromUnicode() - + * are used, "pivoting" through 16-bit Unicode. + * + * Important: For streaming conversion (multiple function calls for successive + * parts of a text stream), the caller must provide a pivot buffer explicitly, + * and must preserve the pivot buffer and associated pointers from one + * call to another. (The buffer may be moved if its contents and the relative + * pointer positions are preserved.) + * + * There is a similar function, ucnv_convert(), + * which has the following limitations: + * - it takes charset names, not converter objects, so that + * - two converters are opened for each call + * - only single-string conversion is possible, not streaming operation + * - it does not provide enough information to find out, + * in case of failure, whether the toUnicode or + * the fromUnicode conversion failed + * + * By contrast, ucnv_convertEx() + * - takes UConverter parameters instead of charset names + * - fully exposes the pivot buffer for streaming conversion and complete error handling + * + * ucnv_convertEx() also provides further convenience: + * - an option to reset the converters at the beginning + * (if reset==TRUE, see parameters; + * also sets *pivotTarget=*pivotSource=pivotStart) + * - allow NUL-terminated input + * (only a single NUL byte, will not work for charsets with multi-byte NULs) + * (if sourceLimit==NULL, see parameters) + * - terminate with a NUL on output + * (only a single NUL byte, not useful for charsets with multi-byte NULs), + * or set U_STRING_NOT_TERMINATED_WARNING if the output exactly fills + * the target buffer + * - the pivot buffer can be provided internally; + * possible only for whole-string conversion, not streaming conversion; + * in this case, the caller will not be able to get details about where an + * error occurred + * (if pivotStart==NULL, see below) + * + * The function returns when one of the following is true: + * - the entire source text has been converted successfully to the target buffer + * - a target buffer overflow occurred (U_BUFFER_OVERFLOW_ERROR) + * - a conversion error occurred + * (other U_FAILURE(), see description of pErrorCode) + * + * Limitation compared to the direct use of + * ucnv_fromUnicode() and ucnv_toUnicode(): + * ucnv_convertEx() does not provide offset information. + * + * Limitation compared to ucnv_fromUChars() and ucnv_toUChars(): + * ucnv_convertEx() does not support preflighting directly. + * + * Sample code for converting a single string from + * one external charset to UTF-8, ignoring the location of errors: + * + * \code + * int32_t + * myToUTF8(UConverter *cnv, + * const char *s, int32_t length, + * char *u8, int32_t capacity, + * UErrorCode *pErrorCode) { + * UConverter *utf8Cnv; + * char *target; + * + * if(U_FAILURE(*pErrorCode)) { + * return 0; + * } + * + * utf8Cnv=myGetCachedUTF8Converter(pErrorCode); + * if(U_FAILURE(*pErrorCode)) { + * return 0; + * } + * + * if(length<0) { + * length=strlen(s); + * } + * target=u8; + * ucnv_convertEx(utf8Cnv, cnv, + * &target, u8+capacity, + * &s, s+length, + * NULL, NULL, NULL, NULL, + * TRUE, TRUE, + * pErrorCode); + * + * myReleaseCachedUTF8Converter(utf8Cnv); + * + * // return the output string length, but without preflighting + * return (int32_t)(target-u8); + * } + * \endcode + * + * @param targetCnv Output converter, used to convert from the UTF-16 pivot + * to the target using ucnv_fromUnicode(). + * @param sourceCnv Input converter, used to convert from the source to + * the UTF-16 pivot using ucnv_toUnicode(). + * @param target I/O parameter, same as for ucnv_fromUChars(). + * Input: *target points to the beginning of the target buffer. + * Output: *target points to the first unit after the last char written. + * @param targetLimit Pointer to the first unit after the target buffer. + * @param source I/O parameter, same as for ucnv_toUChars(). + * Input: *source points to the beginning of the source buffer. + * Output: *source points to the first unit after the last char read. + * @param sourceLimit Pointer to the first unit after the source buffer. + * @param pivotStart Pointer to the UTF-16 pivot buffer. If pivotStart==NULL, + * then an internal buffer is used and the other pivot + * arguments are ignored and can be NULL as well. + * @param pivotSource I/O parameter, same as source in ucnv_fromUChars() for + * conversion from the pivot buffer to the target buffer. + * @param pivotTarget I/O parameter, same as target in ucnv_toUChars() for + * conversion from the source buffer to the pivot buffer. + * It must be pivotStart<=*pivotSource<=*pivotTarget<=pivotLimit + * and pivotStart<pivotLimit (unless pivotStart==NULL). + * @param pivotLimit Pointer to the first unit after the pivot buffer. + * @param reset If TRUE, then ucnv_resetToUnicode(sourceCnv) and + * ucnv_resetFromUnicode(targetCnv) are called, and the + * pivot pointers are reset (*pivotTarget=*pivotSource=pivotStart). + * @param flush If true, indicates the end of the input. + * Passed directly to ucnv_toUnicode(), and carried over to + * ucnv_fromUnicode() when the source is empty as well. + * @param pErrorCode ICU error code in/out parameter. + * Must fulfill U_SUCCESS before the function call. + * U_BUFFER_OVERFLOW_ERROR always refers to the target buffer + * because overflows into the pivot buffer are handled internally. + * Other conversion errors are from the source-to-pivot + * conversion if *pivotSource==pivotStart, otherwise from + * the pivot-to-target conversion. + * + * @see ucnv_convert + * @see ucnv_fromAlgorithmic + * @see ucnv_toAlgorithmic + * @see ucnv_fromUnicode + * @see ucnv_toUnicode + * @see ucnv_fromUChars + * @see ucnv_toUChars + * @stable ICU 2.6 + */ +U_STABLE void U_EXPORT2 +ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv, + char **target, const char *targetLimit, + const char **source, const char *sourceLimit, + UChar *pivotStart, UChar **pivotSource, + UChar **pivotTarget, const UChar *pivotLimit, + UBool reset, UBool flush, + UErrorCode *pErrorCode); + +/** + * Convert from one external charset to another. + * Internally, two converters are opened according to the name arguments, + * then the text is converted to and from the 16-bit Unicode "pivot" + * using ucnv_convertEx(), then the converters are closed again. + * + * This is a convenience function, not an efficient way to convert a lot of text: + * ucnv_convert() + * - takes charset names, not converter objects, so that + * - two converters are opened for each call + * - only single-string conversion is possible, not streaming operation + * - does not provide enough information to find out, + * in case of failure, whether the toUnicode or + * the fromUnicode conversion failed + * - allows NUL-terminated input + * (only a single NUL byte, will not work for charsets with multi-byte NULs) + * (if sourceLength==-1, see parameters) + * - terminate with a NUL on output + * (only a single NUL byte, not useful for charsets with multi-byte NULs), + * or set U_STRING_NOT_TERMINATED_WARNING if the output exactly fills + * the target buffer + * - a pivot buffer is provided internally + * + * The function returns when one of the following is true: + * - the entire source text has been converted successfully to the target buffer + * and either the target buffer is terminated with a single NUL byte + * or the error code is set to U_STRING_NOT_TERMINATED_WARNING + * - a target buffer overflow occurred (U_BUFFER_OVERFLOW_ERROR) + * and the full output string length is returned ("preflighting") + * - a conversion error occurred + * (other U_FAILURE(), see description of pErrorCode) + * + * @param toConverterName The name of the converter that is used to convert + * from the UTF-16 pivot buffer to the target. + * @param fromConverterName The name of the converter that is used to convert + * from the source to the UTF-16 pivot buffer. + * @param target Pointer to the output buffer. + * @param targetCapacity Capacity of the target, in bytes. + * @param source Pointer to the input buffer. + * @param sourceLength Length of the input text, in bytes, or -1 for NUL-terminated input. + * @param pErrorCode ICU error code in/out parameter. + * Must fulfill U_SUCCESS before the function call. + * @return Length of the complete output text in bytes, even if it exceeds the targetCapacity + * and a U_BUFFER_OVERFLOW_ERROR is set. + * + * @see ucnv_convertEx + * @see ucnv_fromAlgorithmic + * @see ucnv_toAlgorithmic + * @see ucnv_fromUnicode + * @see ucnv_toUnicode + * @see ucnv_fromUChars + * @see ucnv_toUChars + * @see ucnv_getNextUChar + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +ucnv_convert(const char *toConverterName, + const char *fromConverterName, + char *target, + int32_t targetCapacity, + const char *source, + int32_t sourceLength, + UErrorCode *pErrorCode); + +/** + * Convert from one external charset to another. + * Internally, the text is converted to and from the 16-bit Unicode "pivot" + * using ucnv_convertEx(). ucnv_toAlgorithmic() works exactly like ucnv_convert() + * except that the two converters need not be looked up and opened completely. + * + * The source-to-pivot conversion uses the cnv converter parameter. + * The pivot-to-target conversion uses a purely algorithmic converter + * according to the specified type, e.g., UCNV_UTF8 for a UTF-8 converter. + * + * Internally, the algorithmic converter is opened and closed for each + * function call, which is more efficient than using the public ucnv_open() + * but somewhat less efficient than only resetting an existing converter + * and using ucnv_convertEx(). + * + * This function is more convenient than ucnv_convertEx() for single-string + * conversions, especially when "preflighting" is desired (returning the length + * of the complete output even if it does not fit into the target buffer; + * see the User Guide Strings chapter). See ucnv_convert() for details. + * + * @param algorithmicType UConverterType constant identifying the desired target + * charset as a purely algorithmic converter. + * Those are converters for Unicode charsets like + * UTF-8, BOCU-1, SCSU, UTF-7, IMAP-mailbox-name, etc., + * as well as US-ASCII and ISO-8859-1. + * @param cnv The converter that is used to convert + * from the source to the UTF-16 pivot buffer. + * @param target Pointer to the output buffer. + * @param targetCapacity Capacity of the target, in bytes. + * @param source Pointer to the input buffer. + * @param sourceLength Length of the input text, in bytes + * @param pErrorCode ICU error code in/out parameter. + * Must fulfill U_SUCCESS before the function call. + * @return Length of the complete output text in bytes, even if it exceeds the targetCapacity + * and a U_BUFFER_OVERFLOW_ERROR is set. + * + * @see ucnv_fromAlgorithmic + * @see ucnv_convert + * @see ucnv_convertEx + * @see ucnv_fromUnicode + * @see ucnv_toUnicode + * @see ucnv_fromUChars + * @see ucnv_toUChars + * @stable ICU 2.6 + */ +U_STABLE int32_t U_EXPORT2 +ucnv_toAlgorithmic(UConverterType algorithmicType, + UConverter *cnv, + char *target, int32_t targetCapacity, + const char *source, int32_t sourceLength, + UErrorCode *pErrorCode); + +/** + * Convert from one external charset to another. + * Internally, the text is converted to and from the 16-bit Unicode "pivot" + * using ucnv_convertEx(). ucnv_fromAlgorithmic() works exactly like ucnv_convert() + * except that the two converters need not be looked up and opened completely. + * + * The source-to-pivot conversion uses a purely algorithmic converter + * according to the specified type, e.g., UCNV_UTF8 for a UTF-8 converter. + * The pivot-to-target conversion uses the cnv converter parameter. + * + * Internally, the algorithmic converter is opened and closed for each + * function call, which is more efficient than using the public ucnv_open() + * but somewhat less efficient than only resetting an existing converter + * and using ucnv_convertEx(). + * + * This function is more convenient than ucnv_convertEx() for single-string + * conversions, especially when "preflighting" is desired (returning the length + * of the complete output even if it does not fit into the target buffer; + * see the User Guide Strings chapter). See ucnv_convert() for details. + * + * @param cnv The converter that is used to convert + * from the UTF-16 pivot buffer to the target. + * @param algorithmicType UConverterType constant identifying the desired source + * charset as a purely algorithmic converter. + * Those are converters for Unicode charsets like + * UTF-8, BOCU-1, SCSU, UTF-7, IMAP-mailbox-name, etc., + * as well as US-ASCII and ISO-8859-1. + * @param target Pointer to the output buffer. + * @param targetCapacity Capacity of the target, in bytes. + * @param source Pointer to the input buffer. + * @param sourceLength Length of the input text, in bytes + * @param pErrorCode ICU error code in/out parameter. + * Must fulfill U_SUCCESS before the function call. + * @return Length of the complete output text in bytes, even if it exceeds the targetCapacity + * and a U_BUFFER_OVERFLOW_ERROR is set. + * + * @see ucnv_fromAlgorithmic + * @see ucnv_convert + * @see ucnv_convertEx + * @see ucnv_fromUnicode + * @see ucnv_toUnicode + * @see ucnv_fromUChars + * @see ucnv_toUChars + * @stable ICU 2.6 + */ +U_STABLE int32_t U_EXPORT2 +ucnv_fromAlgorithmic(UConverter *cnv, + UConverterType algorithmicType, + char *target, int32_t targetCapacity, + const char *source, int32_t sourceLength, + UErrorCode *pErrorCode); + +/** + * Frees up memory occupied by unused, cached converter shared data. + * + * @return the number of cached converters successfully deleted + * @see ucnv_close + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +ucnv_flushCache(void); + +/** + * Returns the number of available converters, as per the alias file. + * + * @return the number of available converters + * @see ucnv_getAvailableName + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +ucnv_countAvailable(void); + +/** + * Gets the canonical converter name of the specified converter from a list of + * all available converters contaied in the alias file. All converters + * in this list can be opened. + * + * @param n the index to a converter available on the system (in the range <TT>[0..ucnv_countAvaiable()]</TT>) + * @return a pointer a string (library owned), or <TT>NULL</TT> if the index is out of bounds. + * @see ucnv_countAvailable + * @stable ICU 2.0 + */ +U_STABLE const char* U_EXPORT2 +ucnv_getAvailableName(int32_t n); + +/** + * Returns a UEnumeration to enumerate all of the canonical converter + * names, as per the alias file, regardless of the ability to open each + * converter. + * + * @return A UEnumeration object for getting all the recognized canonical + * converter names. + * @see ucnv_getAvailableName + * @see uenum_close + * @see uenum_next + * @stable ICU 2.4 + */ +U_STABLE UEnumeration * U_EXPORT2 +ucnv_openAllNames(UErrorCode *pErrorCode); + +/** + * Gives the number of aliases for a given converter or alias name. + * If the alias is ambiguous, then the preferred converter is used + * and the status is set to U_AMBIGUOUS_ALIAS_WARNING. + * This method only enumerates the listed entries in the alias file. + * @param alias alias name + * @param pErrorCode error status + * @return number of names on alias list for given alias + * @stable ICU 2.0 + */ +U_STABLE uint16_t U_EXPORT2 +ucnv_countAliases(const char *alias, UErrorCode *pErrorCode); + +/** + * Gives the name of the alias at given index of alias list. + * This method only enumerates the listed entries in the alias file. + * If the alias is ambiguous, then the preferred converter is used + * and the status is set to U_AMBIGUOUS_ALIAS_WARNING. + * @param alias alias name + * @param n index in alias list + * @param pErrorCode result of operation + * @return returns the name of the alias at given index + * @see ucnv_countAliases + * @stable ICU 2.0 + */ +U_STABLE const char * U_EXPORT2 +ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode); + +/** + * Fill-up the list of alias names for the given alias. + * This method only enumerates the listed entries in the alias file. + * If the alias is ambiguous, then the preferred converter is used + * and the status is set to U_AMBIGUOUS_ALIAS_WARNING. + * @param alias alias name + * @param aliases fill-in list, aliases is a pointer to an array of + * <code>ucnv_countAliases()</code> string-pointers + * (<code>const char *</code>) that will be filled in. + * The strings themselves are owned by the library. + * @param pErrorCode result of operation + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode); + +/** + * Return a new UEnumeration object for enumerating all the + * alias names for a given converter that are recognized by a standard. + * This method only enumerates the listed entries in the alias file. + * The convrtrs.txt file can be modified to change the results of + * this function. + * The first result in this list is the same result given by + * <code>ucnv_getStandardName</code>, which is the default alias for + * the specified standard name. The returned object must be closed with + * <code>uenum_close</code> when you are done with the object. + * + * @param convName original converter name + * @param standard name of the standard governing the names; MIME and IANA + * are such standards + * @param pErrorCode The error code + * @return A UEnumeration object for getting all aliases that are recognized + * by a standard. If any of the parameters are invalid, NULL + * is returned. + * @see ucnv_getStandardName + * @see uenum_close + * @see uenum_next + * @stable ICU 2.2 + */ +U_STABLE UEnumeration * U_EXPORT2 +ucnv_openStandardNames(const char *convName, + const char *standard, + UErrorCode *pErrorCode); + +/** + * Gives the number of standards associated to converter names. + * @return number of standards + * @stable ICU 2.0 + */ +U_STABLE uint16_t U_EXPORT2 +ucnv_countStandards(void); + +/** + * Gives the name of the standard at given index of standard list. + * @param n index in standard list + * @param pErrorCode result of operation + * @return returns the name of the standard at given index. Owned by the library. + * @stable ICU 2.0 + */ +U_STABLE const char * U_EXPORT2 +ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode); + +/** + * Returns a standard name for a given converter name. + * <p> + * Example alias table:<br> + * conv alias1 { STANDARD1 } alias2 { STANDARD1* } + * <p> + * Result of ucnv_getStandardName("conv", "STANDARD1") from example + * alias table:<br> + * <b>"alias2"</b> + * + * @param name original converter name + * @param standard name of the standard governing the names; MIME and IANA + * are such standards + * @param pErrorCode result of operation + * @return returns the standard converter name; + * if a standard converter name cannot be determined, + * then <code>NULL</code> is returned. Owned by the library. + * @stable ICU 2.0 + */ +U_STABLE const char * U_EXPORT2 +ucnv_getStandardName(const char *name, const char *standard, UErrorCode *pErrorCode); + +/** + * This function will return the internal canonical converter name of the + * tagged alias. This is the opposite of ucnv_openStandardNames, which + * returns the tagged alias given the canonical name. + * <p> + * Example alias table:<br> + * conv alias1 { STANDARD1 } alias2 { STANDARD1* } + * <p> + * Result of ucnv_getStandardName("alias1", "STANDARD1") from example + * alias table:<br> + * <b>"conv"</b> + * + * @return returns the canonical converter name; + * if a standard or alias name cannot be determined, + * then <code>NULL</code> is returned. The returned string is + * owned by the library. + * @see ucnv_getStandardName + * @stable ICU 2.4 + */ +U_STABLE const char * U_EXPORT2 +ucnv_getCanonicalName(const char *alias, const char *standard, UErrorCode *pErrorCode); + +/** + * Returns the current default converter name. If you want to open + * a default converter, you do not need to use this function. + * It is faster if you pass a NULL argument to ucnv_open the + * default converter. + * + * If U_CHARSET_IS_UTF8 is defined to 1 in utypes.h then this function + * always returns "UTF-8". + * + * @return returns the current default converter name. + * Storage owned by the library + * @see ucnv_setDefaultName + * @stable ICU 2.0 + */ +U_STABLE const char * U_EXPORT2 +ucnv_getDefaultName(void); + +#ifndef U_HIDE_SYSTEM_API +/** + * This function is not thread safe. DO NOT call this function when ANY ICU + * function is being used from more than one thread! This function sets the + * current default converter name. If this function needs to be called, it + * should be called during application initialization. Most of the time, the + * results from ucnv_getDefaultName() or ucnv_open with a NULL string argument + * is sufficient for your application. + * + * If U_CHARSET_IS_UTF8 is defined to 1 in utypes.h then this function + * does nothing. + * + * @param name the converter name to be the default (must be known by ICU). + * @see ucnv_getDefaultName + * @system + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucnv_setDefaultName(const char *name); +#endif /* U_HIDE_SYSTEM_API */ + +/** + * Fixes the backslash character mismapping. For example, in SJIS, the backslash + * character in the ASCII portion is also used to represent the yen currency sign. + * When mapping from Unicode character 0x005C, it's unclear whether to map the + * character back to yen or backslash in SJIS. This function will take the input + * buffer and replace all the yen sign characters with backslash. This is necessary + * when the user tries to open a file with the input buffer on Windows. + * This function will test the converter to see whether such mapping is + * required. You can sometimes avoid using this function by using the correct version + * of Shift-JIS. + * + * @param cnv The converter representing the target codepage. + * @param source the input buffer to be fixed + * @param sourceLen the length of the input buffer + * @see ucnv_isAmbiguous + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucnv_fixFileSeparator(const UConverter *cnv, UChar *source, int32_t sourceLen); + +/** + * Determines if the converter contains ambiguous mappings of the same + * character or not. + * @param cnv the converter to be tested + * @return TRUE if the converter contains ambiguous mapping of the same + * character, FALSE otherwise. + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +ucnv_isAmbiguous(const UConverter *cnv); + +/** + * Sets the converter to use fallback mappings or not. + * Regardless of this flag, the converter will always use + * fallbacks from Unicode Private Use code points, as well as + * reverse fallbacks (to Unicode). + * For details see ".ucm File Format" + * in the Conversion Data chapter of the ICU User Guide: + * http://www.icu-project.org/userguide/conversion-data.html#ucmformat + * + * @param cnv The converter to set the fallback mapping usage on. + * @param usesFallback TRUE if the user wants the converter to take advantage of the fallback + * mapping, FALSE otherwise. + * @stable ICU 2.0 + * @see ucnv_usesFallback + */ +U_STABLE void U_EXPORT2 +ucnv_setFallback(UConverter *cnv, UBool usesFallback); + +/** + * Determines if the converter uses fallback mappings or not. + * This flag has restrictions, see ucnv_setFallback(). + * + * @param cnv The converter to be tested + * @return TRUE if the converter uses fallback, FALSE otherwise. + * @stable ICU 2.0 + * @see ucnv_setFallback + */ +U_STABLE UBool U_EXPORT2 +ucnv_usesFallback(const UConverter *cnv); + +/** + * Detects Unicode signature byte sequences at the start of the byte stream + * and returns the charset name of the indicated Unicode charset. + * NULL is returned when no Unicode signature is recognized. + * The number of bytes in the signature is output as well. + * + * The caller can ucnv_open() a converter using the charset name. + * The first code unit (UChar) from the start of the stream will be U+FEFF + * (the Unicode BOM/signature character) and can usually be ignored. + * + * For most Unicode charsets it is also possible to ignore the indicated + * number of initial stream bytes and start converting after them. + * However, there are stateful Unicode charsets (UTF-7 and BOCU-1) for which + * this will not work. Therefore, it is best to ignore the first output UChar + * instead of the input signature bytes. + * <p> + * Usage: + * \snippet samples/ucnv/convsamp.cpp ucnv_detectUnicodeSignature + * + * @param source The source string in which the signature should be detected. + * @param sourceLength Length of the input string, or -1 if terminated with a NUL byte. + * @param signatureLength A pointer to int32_t to receive the number of bytes that make up the signature + * of the detected UTF. 0 if not detected. + * Can be a NULL pointer. + * @param pErrorCode ICU error code in/out parameter. + * Must fulfill U_SUCCESS before the function call. + * @return The name of the encoding detected. NULL if encoding is not detected. + * @stable ICU 2.4 + */ +U_STABLE const char* U_EXPORT2 +ucnv_detectUnicodeSignature(const char* source, + int32_t sourceLength, + int32_t *signatureLength, + UErrorCode *pErrorCode); + +/** + * Returns the number of UChars held in the converter's internal state + * because more input is needed for completing the conversion. This function is + * useful for mapping semantics of ICU's converter interface to those of iconv, + * and this information is not needed for normal conversion. + * @param cnv The converter in which the input is held + * @param status ICU error code in/out parameter. + * Must fulfill U_SUCCESS before the function call. + * @return The number of UChars in the state. -1 if an error is encountered. + * @stable ICU 3.4 + */ +U_STABLE int32_t U_EXPORT2 +ucnv_fromUCountPending(const UConverter* cnv, UErrorCode* status); + +/** + * Returns the number of chars held in the converter's internal state + * because more input is needed for completing the conversion. This function is + * useful for mapping semantics of ICU's converter interface to those of iconv, + * and this information is not needed for normal conversion. + * @param cnv The converter in which the input is held as internal state + * @param status ICU error code in/out parameter. + * Must fulfill U_SUCCESS before the function call. + * @return The number of chars in the state. -1 if an error is encountered. + * @stable ICU 3.4 + */ +U_STABLE int32_t U_EXPORT2 +ucnv_toUCountPending(const UConverter* cnv, UErrorCode* status); + +/** + * Returns whether or not the charset of the converter has a fixed number of bytes + * per charset character. + * An example of this are converters that are of the type UCNV_SBCS or UCNV_DBCS. + * Another example is UTF-32 which is always 4 bytes per character. + * A Unicode code point may be represented by more than one UTF-8 or UTF-16 code unit + * but a UTF-32 converter encodes each code point with 4 bytes. + * Note: This method is not intended to be used to determine whether the charset has a + * fixed ratio of bytes to Unicode codes <i>units</i> for any particular Unicode encoding form. + * FALSE is returned with the UErrorCode if error occurs or cnv is NULL. + * @param cnv The converter to be tested + * @param status ICU error code in/out paramter + * @return TRUE if the converter is fixed-width + * @stable ICU 4.8 + */ +U_STABLE UBool U_EXPORT2 +ucnv_isFixedWidth(UConverter *cnv, UErrorCode *status); + +#endif + +#endif +/*_UCNV*/ diff --git a/Source/WTF/icu/unicode/ucnv_err.h b/Source/WTF/icu/unicode/ucnv_err.h new file mode 100644 index 000000000..e092e95f8 --- /dev/null +++ b/Source/WTF/icu/unicode/ucnv_err.h @@ -0,0 +1,463 @@ +/* +********************************************************************** +* Copyright (C) 1999-2009, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** + * + * + * ucnv_err.h: + */ + +/** + * \file + * \brief C UConverter predefined error callbacks + * + * <h2>Error Behaviour Functions</h2> + * Defines some error behaviour functions called by ucnv_{from,to}Unicode + * These are provided as part of ICU and many are stable, but they + * can also be considered only as an example of what can be done with + * callbacks. You may of course write your own. + * + * If you want to write your own, you may also find the functions from + * ucnv_cb.h useful when writing your own callbacks. + * + * These functions, although public, should NEVER be called directly. + * They should be used as parameters to the ucnv_setFromUCallback + * and ucnv_setToUCallback functions, to set the behaviour of a converter + * when it encounters ILLEGAL/UNMAPPED/INVALID sequences. + * + * usage example: 'STOP' doesn't need any context, but newContext + * could be set to something other than 'NULL' if needed. The available + * contexts in this header can modify the default behavior of the callback. + * + * \code + * UErrorCode err = U_ZERO_ERROR; + * UConverter *myConverter = ucnv_open("ibm-949", &err); + * const void *oldContext; + * UConverterFromUCallback oldAction; + * + * + * if (U_SUCCESS(err)) + * { + * ucnv_setFromUCallBack(myConverter, + * UCNV_FROM_U_CALLBACK_STOP, + * NULL, + * &oldAction, + * &oldContext, + * &status); + * } + * \endcode + * + * The code above tells "myConverter" to stop when it encounters an + * ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from + * Unicode -> Codepage. The behavior from Codepage to Unicode is not changed, + * and ucnv_setToUCallBack would need to be called in order to change + * that behavior too. + * + * Here is an example with a context: + * + * \code + * UErrorCode err = U_ZERO_ERROR; + * UConverter *myConverter = ucnv_open("ibm-949", &err); + * const void *oldContext; + * UConverterFromUCallback oldAction; + * + * + * if (U_SUCCESS(err)) + * { + * ucnv_setToUCallBack(myConverter, + * UCNV_TO_U_CALLBACK_SUBSTITUTE, + * UCNV_SUB_STOP_ON_ILLEGAL, + * &oldAction, + * &oldContext, + * &status); + * } + * \endcode + * + * The code above tells "myConverter" to stop when it encounters an + * ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from + * Codepage -> Unicode. Any unmapped and legal characters will be + * substituted to be the default substitution character. + */ + +#ifndef UCNV_ERR_H +#define UCNV_ERR_H + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION + +/** Forward declaring the UConverter structure. @stable ICU 2.0 */ +struct UConverter; + +/** @stable ICU 2.0 */ +typedef struct UConverter UConverter; + +/** + * FROM_U, TO_U context options for sub callback + * @stable ICU 2.0 + */ +#define UCNV_SUB_STOP_ON_ILLEGAL "i" + +/** + * FROM_U, TO_U context options for skip callback + * @stable ICU 2.0 + */ +#define UCNV_SKIP_STOP_ON_ILLEGAL "i" + +/** + * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to ICU (%UXXXX) + * @stable ICU 2.0 + */ +#define UCNV_ESCAPE_ICU NULL +/** + * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to JAVA (\\uXXXX) + * @stable ICU 2.0 + */ +#define UCNV_ESCAPE_JAVA "J" +/** + * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to C (\\uXXXX \\UXXXXXXXX) + * TO_U_CALLBACK_ESCAPE option to escape the character value accoding to C (\\xXXXX) + * @stable ICU 2.0 + */ +#define UCNV_ESCAPE_C "C" +/** + * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly + * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly + * @stable ICU 2.0 + */ +#define UCNV_ESCAPE_XML_DEC "D" +/** + * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly + * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly + * @stable ICU 2.0 + */ +#define UCNV_ESCAPE_XML_HEX "X" +/** + * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to Unicode (U+XXXXX) + * @stable ICU 2.0 + */ +#define UCNV_ESCAPE_UNICODE "U" + +/** + * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to CSS2 conventions (\\HH..H<space>, that is, + * a backslash, 1..6 hex digits, and a space) + * @stable ICU 4.0 + */ +#define UCNV_ESCAPE_CSS2 "S" + +/** + * The process condition code to be used with the callbacks. + * Codes which are greater than UCNV_IRREGULAR should be + * passed on to any chained callbacks. + * @stable ICU 2.0 + */ +typedef enum { + UCNV_UNASSIGNED = 0, /**< The code point is unassigned. + The error code U_INVALID_CHAR_FOUND will be set. */ + UCNV_ILLEGAL = 1, /**< The code point is illegal. For example, + \\x81\\x2E is illegal in SJIS because \\x2E + is not a valid trail byte for the \\x81 + lead byte. + Also, starting with Unicode 3.0.1, non-shortest byte sequences + in UTF-8 (like \\xC1\\xA1 instead of \\x61 for U+0061) + are also illegal, not just irregular. + The error code U_ILLEGAL_CHAR_FOUND will be set. */ + UCNV_IRREGULAR = 2, /**< The codepoint is not a regular sequence in + the encoding. For example, \\xED\\xA0\\x80..\\xED\\xBF\\xBF + are irregular UTF-8 byte sequences for single surrogate + code points. + The error code U_INVALID_CHAR_FOUND will be set. */ + UCNV_RESET = 3, /**< The callback is called with this reason when a + 'reset' has occured. Callback should reset all + state. */ + UCNV_CLOSE = 4, /**< Called when the converter is closed. The + callback should release any allocated memory.*/ + UCNV_CLONE = 5 /**< Called when ucnv_safeClone() is called on the + converter. the pointer available as the + 'context' is an alias to the original converters' + context pointer. If the context must be owned + by the new converter, the callback must clone + the data and call ucnv_setFromUCallback + (or setToUCallback) with the correct pointer. + @stable ICU 2.2 + */ +} UConverterCallbackReason; + + +/** + * The structure for the fromUnicode callback function parameter. + * @stable ICU 2.0 + */ +typedef struct { + uint16_t size; /**< The size of this struct. @stable ICU 2.0 */ + UBool flush; /**< The internal state of converter will be reset and data flushed if set to TRUE. @stable ICU 2.0 */ + UConverter *converter; /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */ + const UChar *source; /**< Pointer to the source source buffer. @stable ICU 2.0 */ + const UChar *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */ + char *target; /**< Pointer to the target buffer. @stable ICU 2.0 */ + const char *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */ + int32_t *offsets; /**< Pointer to the buffer that recieves the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */ +} UConverterFromUnicodeArgs; + + +/** + * The structure for the toUnicode callback function parameter. + * @stable ICU 2.0 + */ +typedef struct { + uint16_t size; /**< The size of this struct @stable ICU 2.0 */ + UBool flush; /**< The internal state of converter will be reset and data flushed if set to TRUE. @stable ICU 2.0 */ + UConverter *converter; /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */ + const char *source; /**< Pointer to the source source buffer. @stable ICU 2.0 */ + const char *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */ + UChar *target; /**< Pointer to the target buffer. @stable ICU 2.0 */ + const UChar *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */ + int32_t *offsets; /**< Pointer to the buffer that recieves the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */ +} UConverterToUnicodeArgs; + + +/** + * DO NOT CALL THIS FUNCTION DIRECTLY! + * This From Unicode callback STOPS at the ILLEGAL_SEQUENCE, + * returning the error code back to the caller immediately. + * + * @param context Pointer to the callback's private data + * @param fromUArgs Information about the conversion in progress + * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence + * @param length Size (in bytes) of the concerned codepage sequence + * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint. + * @param reason Defines the reason the callback was invoked + * @param err This should always be set to a failure status prior to calling. + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_STOP ( + const void *context, + UConverterFromUnicodeArgs *fromUArgs, + const UChar* codeUnits, + int32_t length, + UChar32 codePoint, + UConverterCallbackReason reason, + UErrorCode * err); + + + +/** + * DO NOT CALL THIS FUNCTION DIRECTLY! + * This To Unicode callback STOPS at the ILLEGAL_SEQUENCE, + * returning the error code back to the caller immediately. + * + * @param context Pointer to the callback's private data + * @param toUArgs Information about the conversion in progress + * @param codeUnits Points to 'length' bytes of the concerned codepage sequence + * @param length Size (in bytes) of the concerned codepage sequence + * @param reason Defines the reason the callback was invoked + * @param err This should always be set to a failure status prior to calling. + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_STOP ( + const void *context, + UConverterToUnicodeArgs *toUArgs, + const char* codeUnits, + int32_t length, + UConverterCallbackReason reason, + UErrorCode * err); + +/** + * DO NOT CALL THIS FUNCTION DIRECTLY! + * This From Unicode callback skips any ILLEGAL_SEQUENCE, or + * skips only UNASSINGED_SEQUENCE depending on the context parameter + * simply ignoring those characters. + * + * @param context The function currently recognizes the callback options: + * UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE, + * returning the error code back to the caller immediately. + * NULL: Skips any ILLEGAL_SEQUENCE + * @param fromUArgs Information about the conversion in progress + * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence + * @param length Size (in bytes) of the concerned codepage sequence + * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint. + * @param reason Defines the reason the callback was invoked + * @param err Return value will be set to success if the callback was handled, + * otherwise this value will be set to a failure status. + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_SKIP ( + const void *context, + UConverterFromUnicodeArgs *fromUArgs, + const UChar* codeUnits, + int32_t length, + UChar32 codePoint, + UConverterCallbackReason reason, + UErrorCode * err); + +/** + * DO NOT CALL THIS FUNCTION DIRECTLY! + * This From Unicode callback will Substitute the ILLEGAL SEQUENCE, or + * UNASSIGNED_SEQUENCE depending on context parameter, with the + * current substitution string for the converter. This is the default + * callback. + * + * @param context The function currently recognizes the callback options: + * UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE, + * returning the error code back to the caller immediately. + * NULL: Substitutes any ILLEGAL_SEQUENCE + * @param fromUArgs Information about the conversion in progress + * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence + * @param length Size (in bytes) of the concerned codepage sequence + * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint. + * @param reason Defines the reason the callback was invoked + * @param err Return value will be set to success if the callback was handled, + * otherwise this value will be set to a failure status. + * @see ucnv_setSubstChars + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_SUBSTITUTE ( + const void *context, + UConverterFromUnicodeArgs *fromUArgs, + const UChar* codeUnits, + int32_t length, + UChar32 codePoint, + UConverterCallbackReason reason, + UErrorCode * err); + +/** + * DO NOT CALL THIS FUNCTION DIRECTLY! + * This From Unicode callback will Substitute the ILLEGAL SEQUENCE with the + * hexadecimal representation of the illegal codepoints + * + * @param context The function currently recognizes the callback options: + * <ul> + * <li>UCNV_ESCAPE_ICU: Substitues the ILLEGAL SEQUENCE with the hexadecimal + * representation in the format %UXXXX, e.g. "%uFFFE%u00AC%uC8FE"). + * In the Event the converter doesn't support the characters {%,U}[A-F][0-9], + * it will substitute the illegal sequence with the substitution characters. + * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as + * %UD84D%UDC56</li> + * <li>UCNV_ESCAPE_JAVA: Substitues the ILLEGAL SEQUENCE with the hexadecimal + * representation in the format \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE"). + * In the Event the converter doesn't support the characters {\,u}[A-F][0-9], + * it will substitute the illegal sequence with the substitution characters. + * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as + * \\uD84D\\uDC56</li> + * <li>UCNV_ESCAPE_C: Substitues the ILLEGAL SEQUENCE with the hexadecimal + * representation in the format \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE"). + * In the Event the converter doesn't support the characters {\,u,U}[A-F][0-9], + * it will substitute the illegal sequence with the substitution characters. + * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as + * \\U00023456</li> + * <li>UCNV_ESCAPE_XML_DEC: Substitues the ILLEGAL SEQUENCE with the decimal + * representation in the format \htmlonly&#DDDDDDDD;, e.g. "&#65534;&#172;&#51454;")\endhtmlonly. + * In the Event the converter doesn't support the characters {&,#}[0-9], + * it will substitute the illegal sequence with the substitution characters. + * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as + * &#144470; and Zero padding is ignored.</li> + * <li>UCNV_ESCAPE_XML_HEX:Substitues the ILLEGAL SEQUENCE with the decimal + * representation in the format \htmlonly&#xXXXX; e.g. "&#xFFFE;&#x00AC;&#xC8FE;")\endhtmlonly. + * In the Event the converter doesn't support the characters {&,#,x}[0-9], + * it will substitute the illegal sequence with the substitution characters. + * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as + * \htmlonly&#x23456;\endhtmlonly</li> + * </ul> + * @param fromUArgs Information about the conversion in progress + * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence + * @param length Size (in bytes) of the concerned codepage sequence + * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint. + * @param reason Defines the reason the callback was invoked + * @param err Return value will be set to success if the callback was handled, + * otherwise this value will be set to a failure status. + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_ESCAPE ( + const void *context, + UConverterFromUnicodeArgs *fromUArgs, + const UChar* codeUnits, + int32_t length, + UChar32 codePoint, + UConverterCallbackReason reason, + UErrorCode * err); + + +/** + * DO NOT CALL THIS FUNCTION DIRECTLY! + * This To Unicode callback skips any ILLEGAL_SEQUENCE, or + * skips only UNASSINGED_SEQUENCE depending on the context parameter + * simply ignoring those characters. + * + * @param context The function currently recognizes the callback options: + * UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE, + * returning the error code back to the caller immediately. + * NULL: Skips any ILLEGAL_SEQUENCE + * @param toUArgs Information about the conversion in progress + * @param codeUnits Points to 'length' bytes of the concerned codepage sequence + * @param length Size (in bytes) of the concerned codepage sequence + * @param reason Defines the reason the callback was invoked + * @param err Return value will be set to success if the callback was handled, + * otherwise this value will be set to a failure status. + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_SKIP ( + const void *context, + UConverterToUnicodeArgs *toUArgs, + const char* codeUnits, + int32_t length, + UConverterCallbackReason reason, + UErrorCode * err); + +/** + * DO NOT CALL THIS FUNCTION DIRECTLY! + * This To Unicode callback will Substitute the ILLEGAL SEQUENCE,or + * UNASSIGNED_SEQUENCE depending on context parameter, with the + * Unicode substitution character, U+FFFD. + * + * @param context The function currently recognizes the callback options: + * UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE, + * returning the error code back to the caller immediately. + * NULL: Substitutes any ILLEGAL_SEQUENCE + * @param toUArgs Information about the conversion in progress + * @param codeUnits Points to 'length' bytes of the concerned codepage sequence + * @param length Size (in bytes) of the concerned codepage sequence + * @param reason Defines the reason the callback was invoked + * @param err Return value will be set to success if the callback was handled, + * otherwise this value will be set to a failure status. + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_SUBSTITUTE ( + const void *context, + UConverterToUnicodeArgs *toUArgs, + const char* codeUnits, + int32_t length, + UConverterCallbackReason reason, + UErrorCode * err); + +/** + * DO NOT CALL THIS FUNCTION DIRECTLY! + * This To Unicode callback will Substitute the ILLEGAL SEQUENCE with the + * hexadecimal representation of the illegal bytes + * (in the format %XNN, e.g. "%XFF%X0A%XC8%X03"). + * + * @param context This function currently recognizes the callback options: + * UCNV_ESCAPE_ICU, UCNV_ESCAPE_JAVA, UCNV_ESCAPE_C, UCNV_ESCAPE_XML_DEC, + * UCNV_ESCAPE_XML_HEX and UCNV_ESCAPE_UNICODE. + * @param toUArgs Information about the conversion in progress + * @param codeUnits Points to 'length' bytes of the concerned codepage sequence + * @param length Size (in bytes) of the concerned codepage sequence + * @param reason Defines the reason the callback was invoked + * @param err Return value will be set to success if the callback was handled, + * otherwise this value will be set to a failure status. + * @stable ICU 2.0 + */ + +U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_ESCAPE ( + const void *context, + UConverterToUnicodeArgs *toUArgs, + const char* codeUnits, + int32_t length, + UConverterCallbackReason reason, + UErrorCode * err); + +#endif + +#endif + +/*UCNV_ERR_H*/ diff --git a/Source/WTF/icu/unicode/ucol.h b/Source/WTF/icu/unicode/ucol.h new file mode 100644 index 000000000..5a459b52a --- /dev/null +++ b/Source/WTF/icu/unicode/ucol.h @@ -0,0 +1,1433 @@ +/* +******************************************************************************* +* Copyright (c) 1996-2013, International Business Machines Corporation and others. +* All Rights Reserved. +******************************************************************************* +*/ + +#ifndef UCOL_H +#define UCOL_H + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_COLLATION + +#include "unicode/unorm.h" +#include "unicode/localpointer.h" +#include "unicode/parseerr.h" +#include "unicode/uloc.h" +#include "unicode/uset.h" +#include "unicode/uscript.h" + +/** + * \file + * \brief C API: Collator + * + * <h2> Collator C API </h2> + * + * The C API for Collator performs locale-sensitive + * string comparison. You use this service to build + * searching and sorting routines for natural language text. + * <em>Important: </em>The ICU collation service has been reimplemented + * in order to achieve better performance and UCA compliance. + * For details, see the + * <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm"> + * collation design document</a>. + * <p> + * For more information about the collation service see + * <a href="http://icu-project.org/userguide/Collate_Intro.html">the users guide</a>. + * <p> + * Collation service provides correct sorting orders for most locales supported in ICU. + * If specific data for a locale is not available, the orders eventually falls back + * to the <a href="http://www.unicode.org/unicode/reports/tr10/">UCA sort order</a>. + * <p> + * Sort ordering may be customized by providing your own set of rules. For more on + * this subject see the + * <a href="http://icu-project.org/userguide/Collate_Customization.html"> + * Collation customization</a> section of the users guide. + * <p> + * @see UCollationResult + * @see UNormalizationMode + * @see UCollationStrength + * @see UCollationElements + */ + +/** A collator. +* For usage in C programs. +*/ +struct UCollator; +/** structure representing a collator object instance + * @stable ICU 2.0 + */ +typedef struct UCollator UCollator; + + +/** + * UCOL_LESS is returned if source string is compared to be less than target + * string in the ucol_strcoll() method. + * UCOL_EQUAL is returned if source string is compared to be equal to target + * string in the ucol_strcoll() method. + * UCOL_GREATER is returned if source string is compared to be greater than + * target string in the ucol_strcoll() method. + * @see ucol_strcoll() + * <p> + * Possible values for a comparison result + * @stable ICU 2.0 + */ +typedef enum { + /** string a == string b */ + UCOL_EQUAL = 0, + /** string a > string b */ + UCOL_GREATER = 1, + /** string a < string b */ + UCOL_LESS = -1 +} UCollationResult ; + + +/** Enum containing attribute values for controling collation behavior. + * Here are all the allowable values. Not every attribute can take every value. The only + * universal value is UCOL_DEFAULT, which resets the attribute value to the predefined + * value for that locale + * @stable ICU 2.0 + */ +typedef enum { + /** accepted by most attributes */ + UCOL_DEFAULT = -1, + + /** Primary collation strength */ + UCOL_PRIMARY = 0, + /** Secondary collation strength */ + UCOL_SECONDARY = 1, + /** Tertiary collation strength */ + UCOL_TERTIARY = 2, + /** Default collation strength */ + UCOL_DEFAULT_STRENGTH = UCOL_TERTIARY, + UCOL_CE_STRENGTH_LIMIT, + /** Quaternary collation strength */ + UCOL_QUATERNARY=3, + /** Identical collation strength */ + UCOL_IDENTICAL=15, + UCOL_STRENGTH_LIMIT, + + /** Turn the feature off - works for UCOL_FRENCH_COLLATION, + UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE + & UCOL_DECOMPOSITION_MODE*/ + UCOL_OFF = 16, + /** Turn the feature on - works for UCOL_FRENCH_COLLATION, + UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE + & UCOL_DECOMPOSITION_MODE*/ + UCOL_ON = 17, + + /** Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be shifted */ + UCOL_SHIFTED = 20, + /** Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be non ignorable */ + UCOL_NON_IGNORABLE = 21, + + /** Valid for UCOL_CASE_FIRST - + lower case sorts before upper case */ + UCOL_LOWER_FIRST = 24, + /** upper case sorts before lower case */ + UCOL_UPPER_FIRST = 25, + + UCOL_ATTRIBUTE_VALUE_COUNT + +} UColAttributeValue; + +/** + * Enum containing the codes for reordering segments of the collation table that are not script + * codes. These reordering codes are to be used in conjunction with the script codes. + * @see ucol_getReorderCodes + * @see ucol_setReorderCodes + * @see ucol_getEquivalentReorderCodes + * @see UScriptCode + * @stable ICU 4.8 + */ + typedef enum { + /** + * A special reordering code that is used to specify the default + * reordering codes for a locale. + * @stable ICU 4.8 + */ + UCOL_REORDER_CODE_DEFAULT = -1, + /** + * A special reordering code that is used to specify no reordering codes. + * @stable ICU 4.8 + */ + UCOL_REORDER_CODE_NONE = USCRIPT_UNKNOWN, + /** + * A special reordering code that is used to specify all other codes used for + * reordering except for the codes lised as UColReorderCode values and those + * listed explicitly in a reordering. + * @stable ICU 4.8 + */ + UCOL_REORDER_CODE_OTHERS = USCRIPT_UNKNOWN, + /** + * Characters with the space property. + * This is equivalent to the rule value "space". + * @stable ICU 4.8 + */ + UCOL_REORDER_CODE_SPACE = 0x1000, + /** + * The first entry in the enumeration of reordering groups. This is intended for use in + * range checking and enumeration of the reorder codes. + * @stable ICU 4.8 + */ + UCOL_REORDER_CODE_FIRST = UCOL_REORDER_CODE_SPACE, + /** + * Characters with the punctuation property. + * This is equivalent to the rule value "punct". + * @stable ICU 4.8 + */ + UCOL_REORDER_CODE_PUNCTUATION = 0x1001, + /** + * Characters with the symbol property. + * This is equivalent to the rule value "symbol". + * @stable ICU 4.8 + */ + UCOL_REORDER_CODE_SYMBOL = 0x1002, + /** + * Characters with the currency property. + * This is equivalent to the rule value "currency". + * @stable ICU 4.8 + */ + UCOL_REORDER_CODE_CURRENCY = 0x1003, + /** + * Characters with the digit property. + * This is equivalent to the rule value "digit". + * @stable ICU 4.8 + */ + UCOL_REORDER_CODE_DIGIT = 0x1004, + /** + * The limit of the reorder codes. This is intended for use in range checking + * and enumeration of the reorder codes. + * @stable ICU 4.8 + */ + UCOL_REORDER_CODE_LIMIT = 0x1005 +} UColReorderCode; + +/** + * Base letter represents a primary difference. Set comparison + * level to UCOL_PRIMARY to ignore secondary and tertiary differences. + * Use this to set the strength of a Collator object. + * Example of primary difference, "abc" < "abd" + * + * Diacritical differences on the same base letter represent a secondary + * difference. Set comparison level to UCOL_SECONDARY to ignore tertiary + * differences. Use this to set the strength of a Collator object. + * Example of secondary difference, "ä" >> "a". + * + * Uppercase and lowercase versions of the same character represents a + * tertiary difference. Set comparison level to UCOL_TERTIARY to include + * all comparison differences. Use this to set the strength of a Collator + * object. + * Example of tertiary difference, "abc" <<< "ABC". + * + * Two characters are considered "identical" when they have the same + * unicode spellings. UCOL_IDENTICAL. + * For example, "ä" == "ä". + * + * UCollationStrength is also used to determine the strength of sort keys + * generated from UCollator objects + * These values can be now found in the UColAttributeValue enum. + * @stable ICU 2.0 + **/ +typedef UColAttributeValue UCollationStrength; + +/** Attributes that collation service understands. All the attributes can take UCOL_DEFAULT + * value, as well as the values specific to each one. + * @stable ICU 2.0 + */ +typedef enum { + /** Attribute for direction of secondary weights - used in Canadian French. + * Acceptable values are UCOL_ON, which results in secondary weights + * being considered backwards and UCOL_OFF which treats secondary + * weights in the order they appear. + * @stable ICU 2.0 + */ + UCOL_FRENCH_COLLATION, + /** Attribute for handling variable elements. + * Acceptable values are UCOL_NON_IGNORABLE (default) + * which treats all the codepoints with non-ignorable + * primary weights in the same way, + * and UCOL_SHIFTED which causes codepoints with primary + * weights that are equal or below the variable top value + * to be ignored on primary level and moved to the quaternary + * level. + * @stable ICU 2.0 + */ + UCOL_ALTERNATE_HANDLING, + /** Controls the ordering of upper and lower case letters. + * Acceptable values are UCOL_OFF (default), which orders + * upper and lower case letters in accordance to their tertiary + * weights, UCOL_UPPER_FIRST which forces upper case letters to + * sort before lower case letters, and UCOL_LOWER_FIRST which does + * the opposite. + * @stable ICU 2.0 + */ + UCOL_CASE_FIRST, + /** Controls whether an extra case level (positioned before the third + * level) is generated or not. Acceptable values are UCOL_OFF (default), + * when case level is not generated, and UCOL_ON which causes the case + * level to be generated. Contents of the case level are affected by + * the value of UCOL_CASE_FIRST attribute. A simple way to ignore + * accent differences in a string is to set the strength to UCOL_PRIMARY + * and enable case level. + * @stable ICU 2.0 + */ + UCOL_CASE_LEVEL, + /** Controls whether the normalization check and necessary normalizations + * are performed. When set to UCOL_OFF (default) no normalization check + * is performed. The correctness of the result is guaranteed only if the + * input data is in so-called FCD form (see users manual for more info). + * When set to UCOL_ON, an incremental check is performed to see whether + * the input data is in the FCD form. If the data is not in the FCD form, + * incremental NFD normalization is performed. + * @stable ICU 2.0 + */ + UCOL_NORMALIZATION_MODE, + /** An alias for UCOL_NORMALIZATION_MODE attribute. + * @stable ICU 2.0 + */ + UCOL_DECOMPOSITION_MODE = UCOL_NORMALIZATION_MODE, + /** The strength attribute. Can be either UCOL_PRIMARY, UCOL_SECONDARY, + * UCOL_TERTIARY, UCOL_QUATERNARY or UCOL_IDENTICAL. The usual strength + * for most locales (except Japanese) is tertiary. Quaternary strength + * is useful when combined with shifted setting for alternate handling + * attribute and for JIS x 4061 collation, when it is used to distinguish + * between Katakana and Hiragana (this is achieved by setting the + * UCOL_HIRAGANA_QUATERNARY mode to on. Otherwise, quaternary level + * is affected only by the number of non ignorable code points in + * the string. Identical strength is rarely useful, as it amounts + * to codepoints of the NFD form of the string. + * @stable ICU 2.0 + */ + UCOL_STRENGTH, +#ifndef U_HIDE_DEPRECATED_API + /** When turned on, this attribute positions Hiragana before all + * non-ignorables on quaternary level This is a sneaky way to produce JIS + * sort order. + * + * This attribute is an implementation detail of the CLDR Japanese tailoring. + * The implementation might change to use a different mechanism + * to achieve the same Japanese sort order. + * Since ICU 50, this attribute is not settable any more via API functions. + * @deprecated ICU 50 Implementation detail, cannot be set via API, might be removed from implementation. + */ + UCOL_HIRAGANA_QUATERNARY_MODE = UCOL_STRENGTH + 1, +#endif /* U_HIDE_DEPRECATED_API */ + /** When turned on, this attribute generates a collation key + * for the numeric value of substrings of digits. + * This is a way to get '100' to sort AFTER '2'. Note that the longest + * digit substring that can be treated as a single collation element is + * 254 digits (not counting leading zeros). If a digit substring is + * longer than that, the digits beyond the limit will be treated as a + * separate digit substring associated with a separate collation element. + * @stable ICU 2.8 + */ + UCOL_NUMERIC_COLLATION = UCOL_STRENGTH + 2, + /** + * The number of UColAttribute constants. + * @stable ICU 2.0 + */ + UCOL_ATTRIBUTE_COUNT +} UColAttribute; + +/** Options for retrieving the rule string + * @stable ICU 2.0 + */ +typedef enum { + /** + * Retrieves the tailoring rules only. + * Same as calling the version of getRules() without UColRuleOption. + * @stable ICU 2.0 + */ + UCOL_TAILORING_ONLY, + /** + * Retrieves the "UCA rules" concatenated with the tailoring rules. + * The "UCA rules" are an <i>approximation</i> of the root collator's sort order. + * They are almost never used or useful at runtime and can be removed from the data. + * See http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales + * @stable ICU 2.0 + */ + UCOL_FULL_RULES +} UColRuleOption ; + +/** + * Open a UCollator for comparing strings. + * The UCollator pointer is used in all the calls to the Collation + * service. After finished, collator must be disposed of by calling + * {@link #ucol_close }. + * @param loc The locale containing the required collation rules. + * Special values for locales can be passed in - + * if NULL is passed for the locale, the default locale + * collation rules will be used. If empty string ("") or + * "root" are passed, UCA rules will be used. + * @param status A pointer to an UErrorCode to receive any errors + * @return A pointer to a UCollator, or 0 if an error occurred. + * @see ucol_openRules + * @see ucol_safeClone + * @see ucol_close + * @stable ICU 2.0 + */ +U_STABLE UCollator* U_EXPORT2 +ucol_open(const char *loc, UErrorCode *status); + +/** + * Produce an UCollator instance according to the rules supplied. + * The rules are used to change the default ordering, defined in the + * UCA in a process called tailoring. The resulting UCollator pointer + * can be used in the same way as the one obtained by {@link #ucol_strcoll }. + * @param rules A string describing the collation rules. For the syntax + * of the rules please see users guide. + * @param rulesLength The length of rules, or -1 if null-terminated. + * @param normalizationMode The normalization mode: One of + * UCOL_OFF (expect the text to not need normalization), + * UCOL_ON (normalize), or + * UCOL_DEFAULT (set the mode according to the rules) + * @param strength The default collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY, + * UCOL_TERTIARY, UCOL_IDENTICAL,UCOL_DEFAULT_STRENGTH - can be also set in the rules. + * @param parseError A pointer to UParseError to recieve information about errors + * occurred during parsing. This argument can currently be set + * to NULL, but at users own risk. Please provide a real structure. + * @param status A pointer to an UErrorCode to receive any errors + * @return A pointer to a UCollator. It is not guaranteed that NULL be returned in case + * of error - please use status argument to check for errors. + * @see ucol_open + * @see ucol_safeClone + * @see ucol_close + * @stable ICU 2.0 + */ +U_STABLE UCollator* U_EXPORT2 +ucol_openRules( const UChar *rules, + int32_t rulesLength, + UColAttributeValue normalizationMode, + UCollationStrength strength, + UParseError *parseError, + UErrorCode *status); + +/** + * Open a collator defined by a short form string. + * The structure and the syntax of the string is defined in the "Naming collators" + * section of the users guide: + * http://icu-project.org/userguide/Collate_Concepts.html#Naming_Collators + * Attributes are overriden by the subsequent attributes. So, for "S2_S3", final + * strength will be 3. 3066bis locale overrides individual locale parts. + * The call to this function is equivalent to a call to ucol_open, followed by a + * series of calls to ucol_setAttribute and ucol_setVariableTop. + * @param definition A short string containing a locale and a set of attributes. + * Attributes not explicitly mentioned are left at the default + * state for a locale. + * @param parseError if not NULL, structure that will get filled with error's pre + * and post context in case of error. + * @param forceDefaults if FALSE, the settings that are the same as the collator + * default settings will not be applied (for example, setting + * French secondary on a French collator would not be executed). + * If TRUE, all the settings will be applied regardless of the + * collator default value. If the definition + * strings are to be cached, should be set to FALSE. + * @param status Error code. Apart from regular error conditions connected to + * instantiating collators (like out of memory or similar), this + * API will return an error if an invalid attribute or attribute/value + * combination is specified. + * @return A pointer to a UCollator or 0 if an error occured (including an + * invalid attribute). + * @see ucol_open + * @see ucol_setAttribute + * @see ucol_setVariableTop + * @see ucol_getShortDefinitionString + * @see ucol_normalizeShortDefinitionString + * @stable ICU 3.0 + * + */ +U_STABLE UCollator* U_EXPORT2 +ucol_openFromShortString( const char *definition, + UBool forceDefaults, + UParseError *parseError, + UErrorCode *status); + +#ifndef U_HIDE_DEPRECATED_API +/** + * Get a set containing the contractions defined by the collator. The set includes + * both the UCA contractions and the contractions defined by the collator. This set + * will contain only strings. If a tailoring explicitly suppresses contractions from + * the UCA (like Russian), removed contractions will not be in the resulting set. + * @param coll collator + * @param conts the set to hold the result. It gets emptied before + * contractions are added. + * @param status to hold the error code + * @return the size of the contraction set + * + * @deprecated ICU 3.4, use ucol_getContractionsAndExpansions instead + */ +U_DEPRECATED int32_t U_EXPORT2 +ucol_getContractions( const UCollator *coll, + USet *conts, + UErrorCode *status); +#endif /* U_HIDE_DEPRECATED_API */ + +/** + * Get a set containing the expansions defined by the collator. The set includes + * both the UCA expansions and the expansions defined by the tailoring + * @param coll collator + * @param contractions if not NULL, the set to hold the contractions + * @param expansions if not NULL, the set to hold the expansions + * @param addPrefixes add the prefix contextual elements to contractions + * @param status to hold the error code + * + * @stable ICU 3.4 + */ +U_STABLE void U_EXPORT2 +ucol_getContractionsAndExpansions( const UCollator *coll, + USet *contractions, USet *expansions, + UBool addPrefixes, UErrorCode *status); + +/** + * Close a UCollator. + * Once closed, a UCollator should not be used. Every open collator should + * be closed. Otherwise, a memory leak will result. + * @param coll The UCollator to close. + * @see ucol_open + * @see ucol_openRules + * @see ucol_safeClone + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucol_close(UCollator *coll); + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalUCollatorPointer + * "Smart pointer" class, closes a UCollator via ucol_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @stable ICU 4.4 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalUCollatorPointer, UCollator, ucol_close); + +U_NAMESPACE_END + +#endif + +/** + * Compare two strings. + * The strings will be compared using the options already specified. + * @param coll The UCollator containing the comparison rules. + * @param source The source string. + * @param sourceLength The length of source, or -1 if null-terminated. + * @param target The target string. + * @param targetLength The length of target, or -1 if null-terminated. + * @return The result of comparing the strings; one of UCOL_EQUAL, + * UCOL_GREATER, UCOL_LESS + * @see ucol_greater + * @see ucol_greaterOrEqual + * @see ucol_equal + * @stable ICU 2.0 + */ +U_STABLE UCollationResult U_EXPORT2 +ucol_strcoll( const UCollator *coll, + const UChar *source, + int32_t sourceLength, + const UChar *target, + int32_t targetLength); + +/** +* Compare two strings in UTF-8. +* The strings will be compared using the options already specified. +* Note: When input string contains malformed a UTF-8 byte sequence, +* this function treats these bytes as REPLACEMENT CHARACTER (U+FFFD). +* @param coll The UCollator containing the comparison rules. +* @param source The source UTF-8 string. +* @param sourceLength The length of source, or -1 if null-terminated. +* @param target The target UTF-8 string. +* @param targetLength The length of target, or -1 if null-terminated. +* @param status A pointer to an UErrorCode to receive any errors +* @return The result of comparing the strings; one of UCOL_EQUAL, +* UCOL_GREATER, UCOL_LESS +* @see ucol_greater +* @see ucol_greaterOrEqual +* @see ucol_equal +* @stable ICU 50 +*/ +U_STABLE UCollationResult U_EXPORT2 +ucol_strcollUTF8( + const UCollator *coll, + const char *source, + int32_t sourceLength, + const char *target, + int32_t targetLength, + UErrorCode *status); + +/** + * Determine if one string is greater than another. + * This function is equivalent to {@link #ucol_strcoll } == UCOL_GREATER + * @param coll The UCollator containing the comparison rules. + * @param source The source string. + * @param sourceLength The length of source, or -1 if null-terminated. + * @param target The target string. + * @param targetLength The length of target, or -1 if null-terminated. + * @return TRUE if source is greater than target, FALSE otherwise. + * @see ucol_strcoll + * @see ucol_greaterOrEqual + * @see ucol_equal + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +ucol_greater(const UCollator *coll, + const UChar *source, int32_t sourceLength, + const UChar *target, int32_t targetLength); + +/** + * Determine if one string is greater than or equal to another. + * This function is equivalent to {@link #ucol_strcoll } != UCOL_LESS + * @param coll The UCollator containing the comparison rules. + * @param source The source string. + * @param sourceLength The length of source, or -1 if null-terminated. + * @param target The target string. + * @param targetLength The length of target, or -1 if null-terminated. + * @return TRUE if source is greater than or equal to target, FALSE otherwise. + * @see ucol_strcoll + * @see ucol_greater + * @see ucol_equal + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +ucol_greaterOrEqual(const UCollator *coll, + const UChar *source, int32_t sourceLength, + const UChar *target, int32_t targetLength); + +/** + * Compare two strings for equality. + * This function is equivalent to {@link #ucol_strcoll } == UCOL_EQUAL + * @param coll The UCollator containing the comparison rules. + * @param source The source string. + * @param sourceLength The length of source, or -1 if null-terminated. + * @param target The target string. + * @param targetLength The length of target, or -1 if null-terminated. + * @return TRUE if source is equal to target, FALSE otherwise + * @see ucol_strcoll + * @see ucol_greater + * @see ucol_greaterOrEqual + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +ucol_equal(const UCollator *coll, + const UChar *source, int32_t sourceLength, + const UChar *target, int32_t targetLength); + +/** + * Compare two UTF-8 encoded trings. + * The strings will be compared using the options already specified. + * @param coll The UCollator containing the comparison rules. + * @param sIter The source string iterator. + * @param tIter The target string iterator. + * @return The result of comparing the strings; one of UCOL_EQUAL, + * UCOL_GREATER, UCOL_LESS + * @param status A pointer to an UErrorCode to receive any errors + * @see ucol_strcoll + * @stable ICU 2.6 + */ +U_STABLE UCollationResult U_EXPORT2 +ucol_strcollIter( const UCollator *coll, + UCharIterator *sIter, + UCharIterator *tIter, + UErrorCode *status); + +/** + * Get the collation strength used in a UCollator. + * The strength influences how strings are compared. + * @param coll The UCollator to query. + * @return The collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY, + * UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL + * @see ucol_setStrength + * @stable ICU 2.0 + */ +U_STABLE UCollationStrength U_EXPORT2 +ucol_getStrength(const UCollator *coll); + +/** + * Set the collation strength used in a UCollator. + * The strength influences how strings are compared. + * @param coll The UCollator to set. + * @param strength The desired collation strength; one of UCOL_PRIMARY, + * UCOL_SECONDARY, UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL, UCOL_DEFAULT + * @see ucol_getStrength + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucol_setStrength(UCollator *coll, + UCollationStrength strength); + +/** + * Retrieves the reordering codes for this collator. + * These reordering codes are a combination of UScript codes and UColReorderCode entries. + * @param coll The UCollator to query. + * @param dest The array to fill with the script ordering. + * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function + * will only return the length of the result without writing any of the result string (pre-flighting). + * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate a + * failure before the function call. + * @return The number of reordering codes written to the dest array. + * @see ucol_setReorderCodes + * @see ucol_getEquivalentReorderCodes + * @see UScriptCode + * @see UColReorderCode + * @stable ICU 4.8 + */ +U_STABLE int32_t U_EXPORT2 +ucol_getReorderCodes(const UCollator* coll, + int32_t* dest, + int32_t destCapacity, + UErrorCode *pErrorCode); +/** + * Sets the reordering codes for this collator. + * Collation reordering allows scripts and some other defined blocks of characters + * to be moved relative to each other as a block. This reordering is done on top of + * the DUCET/CLDR standard collation order. Reordering can specify groups to be placed + * at the start and/or the end of the collation order. These groups are specified using + * UScript codes and UColReorderCode entries. + * <p>By default, reordering codes specified for the start of the order are placed in the + * order given after a group of "special" non-script blocks. These special groups of characters + * are space, punctuation, symbol, currency, and digit. These special groups are represented with + * UColReorderCode entries. Script groups can be intermingled with + * these special non-script blocks if those special blocks are explicitly specified in the reordering. + * <p>The special code OTHERS stands for any script that is not explicitly + * mentioned in the list of reordering codes given. Anything that is after OTHERS + * will go at the very end of the reordering in the order given. + * <p>The special reorder code DEFAULT will reset the reordering for this collator + * to the default for this collator. The default reordering may be the DUCET/CLDR order or may be a reordering that + * was specified when this collator was created from resource data or from rules. The + * DEFAULT code <b>must</b> be the sole code supplied when it used. If not + * that will result in an U_ILLEGAL_ARGUMENT_ERROR being set. + * <p>The special reorder code NONE will remove any reordering for this collator. + * The result of setting no reordering will be to have the DUCET/CLDR ordering used. The + * NONE code <b>must</b> be the sole code supplied when it used. + * @param coll The UCollator to set. + * @param reorderCodes An array of script codes in the new order. This can be NULL if the + * length is also set to 0. An empty array will clear any reordering codes on the collator. + * @param reorderCodesLength The length of reorderCodes. + * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate a + * failure before the function call. + * @see ucol_getReorderCodes + * @see ucol_getEquivalentReorderCodes + * @see UScriptCode + * @see UColReorderCode + * @stable ICU 4.8 + */ +U_STABLE void U_EXPORT2 +ucol_setReorderCodes(UCollator* coll, + const int32_t* reorderCodes, + int32_t reorderCodesLength, + UErrorCode *pErrorCode); + +/** + * Retrieves the reorder codes that are grouped with the given reorder code. Some reorder + * codes will be grouped and must reorder together. + * @param reorderCode The reorder code to determine equivalence for. + * @param dest The array to fill with the script ordering. + * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function + * will only return the length of the result without writing any of the result string (pre-flighting). + * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate + * a failure before the function call. + * @return The number of reordering codes written to the dest array. + * @see ucol_setReorderCodes + * @see ucol_getReorderCodes + * @see UScriptCode + * @see UColReorderCode + * @stable ICU 4.8 + */ +U_STABLE int32_t U_EXPORT2 +ucol_getEquivalentReorderCodes(int32_t reorderCode, + int32_t* dest, + int32_t destCapacity, + UErrorCode *pErrorCode); + +/** + * Get the display name for a UCollator. + * The display name is suitable for presentation to a user. + * @param objLoc The locale of the collator in question. + * @param dispLoc The locale for display. + * @param result A pointer to a buffer to receive the attribute. + * @param resultLength The maximum size of result. + * @param status A pointer to an UErrorCode to receive any errors + * @return The total buffer size needed; if greater than resultLength, + * the output was truncated. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +ucol_getDisplayName( const char *objLoc, + const char *dispLoc, + UChar *result, + int32_t resultLength, + UErrorCode *status); + +/** + * Get a locale for which collation rules are available. + * A UCollator in a locale returned by this function will perform the correct + * collation for the locale. + * @param localeIndex The index of the desired locale. + * @return A locale for which collation rules are available, or 0 if none. + * @see ucol_countAvailable + * @stable ICU 2.0 + */ +U_STABLE const char* U_EXPORT2 +ucol_getAvailable(int32_t localeIndex); + +/** + * Determine how many locales have collation rules available. + * This function is most useful as determining the loop ending condition for + * calls to {@link #ucol_getAvailable }. + * @return The number of locales for which collation rules are available. + * @see ucol_getAvailable + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +ucol_countAvailable(void); + +#if !UCONFIG_NO_SERVICE +/** + * Create a string enumerator of all locales for which a valid + * collator may be opened. + * @param status input-output error code + * @return a string enumeration over locale strings. The caller is + * responsible for closing the result. + * @stable ICU 3.0 + */ +U_STABLE UEnumeration* U_EXPORT2 +ucol_openAvailableLocales(UErrorCode *status); +#endif + +/** + * Create a string enumerator of all possible keywords that are relevant to + * collation. At this point, the only recognized keyword for this + * service is "collation". + * @param status input-output error code + * @return a string enumeration over locale strings. The caller is + * responsible for closing the result. + * @stable ICU 3.0 + */ +U_STABLE UEnumeration* U_EXPORT2 +ucol_getKeywords(UErrorCode *status); + +/** + * Given a keyword, create a string enumeration of all values + * for that keyword that are currently in use. + * @param keyword a particular keyword as enumerated by + * ucol_getKeywords. If any other keyword is passed in, *status is set + * to U_ILLEGAL_ARGUMENT_ERROR. + * @param status input-output error code + * @return a string enumeration over collation keyword values, or NULL + * upon error. The caller is responsible for closing the result. + * @stable ICU 3.0 + */ +U_STABLE UEnumeration* U_EXPORT2 +ucol_getKeywordValues(const char *keyword, UErrorCode *status); + +/** + * Given a key and a locale, returns an array of string values in a preferred + * order that would make a difference. These are all and only those values where + * the open (creation) of the service with the locale formed from the input locale + * plus input keyword and that value has different behavior than creation with the + * input locale alone. + * @param key one of the keys supported by this service. For now, only + * "collation" is supported. + * @param locale the locale + * @param commonlyUsed if set to true it will return only commonly used values + * with the given locale in preferred order. Otherwise, + * it will return all the available values for the locale. + * @param status error status + * @return a string enumeration over keyword values for the given key and the locale. + * @stable ICU 4.2 + */ +U_STABLE UEnumeration* U_EXPORT2 +ucol_getKeywordValuesForLocale(const char* key, + const char* locale, + UBool commonlyUsed, + UErrorCode* status); + +/** + * Return the functionally equivalent locale for the given + * requested locale, with respect to given keyword, for the + * collation service. If two locales return the same result, then + * collators instantiated for these locales will behave + * equivalently. The converse is not always true; two collators + * may in fact be equivalent, but return different results, due to + * internal details. The return result has no other meaning than + * that stated above, and implies nothing as to the relationship + * between the two locales. This is intended for use by + * applications who wish to cache collators, or otherwise reuse + * collators when possible. The functional equivalent may change + * over time. For more information, please see the <a + * href="http://icu-project.org/userguide/locale.html#services"> + * Locales and Services</a> section of the ICU User Guide. + * @param result fillin for the functionally equivalent locale + * @param resultCapacity capacity of the fillin buffer + * @param keyword a particular keyword as enumerated by + * ucol_getKeywords. + * @param locale the requested locale + * @param isAvailable if non-NULL, pointer to a fillin parameter that + * indicates whether the requested locale was 'available' to the + * collation service. A locale is defined as 'available' if it + * physically exists within the collation locale data. + * @param status pointer to input-output error code + * @return the actual buffer size needed for the locale. If greater + * than resultCapacity, the returned full name will be truncated and + * an error code will be returned. + * @stable ICU 3.0 + */ +U_STABLE int32_t U_EXPORT2 +ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity, + const char* keyword, const char* locale, + UBool* isAvailable, UErrorCode* status); + +/** + * Get the collation tailoring rules from a UCollator. + * The rules will follow the rule syntax. + * @param coll The UCollator to query. + * @param length + * @return The collation tailoring rules. + * @stable ICU 2.0 + */ +U_STABLE const UChar* U_EXPORT2 +ucol_getRules( const UCollator *coll, + int32_t *length); + +/** Get the short definition string for a collator. This API harvests the collator's + * locale and the attribute set and produces a string that can be used for opening + * a collator with the same properties using the ucol_openFromShortString API. + * This string will be normalized. + * The structure and the syntax of the string is defined in the "Naming collators" + * section of the users guide: + * http://icu-project.org/userguide/Collate_Concepts.html#Naming_Collators + * This API supports preflighting. + * @param coll a collator + * @param locale a locale that will appear as a collators locale in the resulting + * short string definition. If NULL, the locale will be harvested + * from the collator. + * @param buffer space to hold the resulting string + * @param capacity capacity of the buffer + * @param status for returning errors. All the preflighting errors are featured + * @return length of the resulting string + * @see ucol_openFromShortString + * @see ucol_normalizeShortDefinitionString + * @stable ICU 3.0 + */ +U_STABLE int32_t U_EXPORT2 +ucol_getShortDefinitionString(const UCollator *coll, + const char *locale, + char *buffer, + int32_t capacity, + UErrorCode *status); + +/** Verifies and normalizes short definition string. + * Normalized short definition string has all the option sorted by the argument name, + * so that equivalent definition strings are the same. + * This API supports preflighting. + * @param source definition string + * @param destination space to hold the resulting string + * @param capacity capacity of the buffer + * @param parseError if not NULL, structure that will get filled with error's pre + * and post context in case of error. + * @param status Error code. This API will return an error if an invalid attribute + * or attribute/value combination is specified. All the preflighting + * errors are also featured + * @return length of the resulting normalized string. + * + * @see ucol_openFromShortString + * @see ucol_getShortDefinitionString + * + * @stable ICU 3.0 + */ + +U_STABLE int32_t U_EXPORT2 +ucol_normalizeShortDefinitionString(const char *source, + char *destination, + int32_t capacity, + UParseError *parseError, + UErrorCode *status); + + +/** + * Get a sort key for a string from a UCollator. + * Sort keys may be compared using <TT>strcmp</TT>. + * + * Like ICU functions that write to an output buffer, the buffer contents + * is undefined if the buffer capacity (resultLength parameter) is too small. + * Unlike ICU functions that write a string to an output buffer, + * the terminating zero byte is counted in the sort key length. + * @param coll The UCollator containing the collation rules. + * @param source The string to transform. + * @param sourceLength The length of source, or -1 if null-terminated. + * @param result A pointer to a buffer to receive the attribute. + * @param resultLength The maximum size of result. + * @return The size needed to fully store the sort key. + * If there was an internal error generating the sort key, + * a zero value is returned. + * @see ucol_keyHashCode + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +ucol_getSortKey(const UCollator *coll, + const UChar *source, + int32_t sourceLength, + uint8_t *result, + int32_t resultLength); + + +/** Gets the next count bytes of a sort key. Caller needs + * to preserve state array between calls and to provide + * the same type of UCharIterator set with the same string. + * The destination buffer provided must be big enough to store + * the number of requested bytes. + * + * The generated sort key may or may not be compatible with + * sort keys generated using ucol_getSortKey(). + * @param coll The UCollator containing the collation rules. + * @param iter UCharIterator containing the string we need + * the sort key to be calculated for. + * @param state Opaque state of sortkey iteration. + * @param dest Buffer to hold the resulting sortkey part + * @param count number of sort key bytes required. + * @param status error code indicator. + * @return the actual number of bytes of a sortkey. It can be + * smaller than count if we have reached the end of + * the sort key. + * @stable ICU 2.6 + */ +U_STABLE int32_t U_EXPORT2 +ucol_nextSortKeyPart(const UCollator *coll, + UCharIterator *iter, + uint32_t state[2], + uint8_t *dest, int32_t count, + UErrorCode *status); + +/** enum that is taken by ucol_getBound API + * See below for explanation + * do not change the values assigned to the + * members of this enum. Underlying code + * depends on them having these numbers + * @stable ICU 2.0 + */ +typedef enum { + /** lower bound */ + UCOL_BOUND_LOWER = 0, + /** upper bound that will match strings of exact size */ + UCOL_BOUND_UPPER = 1, + /** upper bound that will match all the strings that have the same initial substring as the given string */ + UCOL_BOUND_UPPER_LONG = 2, + UCOL_BOUND_VALUE_COUNT +} UColBoundMode; + +/** + * Produce a bound for a given sortkey and a number of levels. + * Return value is always the number of bytes needed, regardless of + * whether the result buffer was big enough or even valid.<br> + * Resulting bounds can be used to produce a range of strings that are + * between upper and lower bounds. For example, if bounds are produced + * for a sortkey of string "smith", strings between upper and lower + * bounds with one level would include "Smith", "SMITH", "sMiTh".<br> + * There are two upper bounds that can be produced. If UCOL_BOUND_UPPER + * is produced, strings matched would be as above. However, if bound + * produced using UCOL_BOUND_UPPER_LONG is used, the above example will + * also match "Smithsonian" and similar.<br> + * For more on usage, see example in cintltst/capitst.c in procedure + * TestBounds. + * Sort keys may be compared using <TT>strcmp</TT>. + * @param source The source sortkey. + * @param sourceLength The length of source, or -1 if null-terminated. + * (If an unmodified sortkey is passed, it is always null + * terminated). + * @param boundType Type of bound required. It can be UCOL_BOUND_LOWER, which + * produces a lower inclusive bound, UCOL_BOUND_UPPER, that + * produces upper bound that matches strings of the same length + * or UCOL_BOUND_UPPER_LONG that matches strings that have the + * same starting substring as the source string. + * @param noOfLevels Number of levels required in the resulting bound (for most + * uses, the recommended value is 1). See users guide for + * explanation on number of levels a sortkey can have. + * @param result A pointer to a buffer to receive the resulting sortkey. + * @param resultLength The maximum size of result. + * @param status Used for returning error code if something went wrong. If the + * number of levels requested is higher than the number of levels + * in the source key, a warning (U_SORT_KEY_TOO_SHORT_WARNING) is + * issued. + * @return The size needed to fully store the bound. + * @see ucol_keyHashCode + * @stable ICU 2.1 + */ +U_STABLE int32_t U_EXPORT2 +ucol_getBound(const uint8_t *source, + int32_t sourceLength, + UColBoundMode boundType, + uint32_t noOfLevels, + uint8_t *result, + int32_t resultLength, + UErrorCode *status); + +/** + * Gets the version information for a Collator. Version is currently + * an opaque 32-bit number which depends, among other things, on major + * versions of the collator tailoring and UCA. + * @param coll The UCollator to query. + * @param info the version # information, the result will be filled in + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucol_getVersion(const UCollator* coll, UVersionInfo info); + +/** + * Gets the UCA version information for a Collator. Version is the + * UCA version number (3.1.1, 4.0). + * @param coll The UCollator to query. + * @param info the version # information, the result will be filled in + * @stable ICU 2.8 + */ +U_STABLE void U_EXPORT2 +ucol_getUCAVersion(const UCollator* coll, UVersionInfo info); + +/** + * Merges two sort keys. The levels are merged with their corresponding counterparts + * (primaries with primaries, secondaries with secondaries etc.). Between the values + * from the same level a separator is inserted. + * + * This is useful, for example, for combining sort keys from first and last names + * to sort such pairs. + * It is possible to merge multiple sort keys by consecutively merging + * another one with the intermediate result. + * + * The length of the merge result is the sum of the lengths of the input sort keys. + * + * Example (uncompressed): + * <pre>191B1D 01 050505 01 910505 00 + * 1F2123 01 050505 01 910505 00</pre> + * will be merged as + * <pre>191B1D 02 1F2123 01 050505 02 050505 01 910505 02 910505 00</pre> + * + * If the destination buffer is not big enough, then its contents are undefined. + * If any of source lengths are zero or any of the source pointers are NULL/undefined, + * the result is of size zero. + * + * @param src1 the first sort key + * @param src1Length the length of the first sort key, including the zero byte at the end; + * can be -1 if the function is to find the length + * @param src2 the second sort key + * @param src2Length the length of the second sort key, including the zero byte at the end; + * can be -1 if the function is to find the length + * @param dest the buffer where the merged sort key is written, + * can be NULL if destCapacity==0 + * @param destCapacity the number of bytes in the dest buffer + * @return the length of the merged sort key, src1Length+src2Length; + * can be larger than destCapacity, or 0 if an error occurs (only for illegal arguments), + * in which cases the contents of dest is undefined + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +ucol_mergeSortkeys(const uint8_t *src1, int32_t src1Length, + const uint8_t *src2, int32_t src2Length, + uint8_t *dest, int32_t destCapacity); + +/** + * Universal attribute setter + * @param coll collator which attributes are to be changed + * @param attr attribute type + * @param value attribute value + * @param status to indicate whether the operation went on smoothly or there were errors + * @see UColAttribute + * @see UColAttributeValue + * @see ucol_getAttribute + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucol_setAttribute(UCollator *coll, UColAttribute attr, UColAttributeValue value, UErrorCode *status); + +/** + * Universal attribute getter + * @param coll collator which attributes are to be changed + * @param attr attribute type + * @return attribute value + * @param status to indicate whether the operation went on smoothly or there were errors + * @see UColAttribute + * @see UColAttributeValue + * @see ucol_setAttribute + * @stable ICU 2.0 + */ +U_STABLE UColAttributeValue U_EXPORT2 +ucol_getAttribute(const UCollator *coll, UColAttribute attr, UErrorCode *status); + +/** Variable top + * is a two byte primary value which causes all the codepoints with primary values that + * are less or equal than the variable top to be shifted when alternate handling is set + * to UCOL_SHIFTED. + * Sets the variable top to a collation element value of a string supplied. + * @param coll collator which variable top needs to be changed + * @param varTop one or more (if contraction) UChars to which the variable top should be set + * @param len length of variable top string. If -1 it is considered to be zero terminated. + * @param status error code. If error code is set, the return value is undefined. + * Errors set by this function are: <br> + * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such + * a contraction<br> + * U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes + * @return a 32 bit value containing the value of the variable top in upper 16 bits. + * Lower 16 bits are undefined + * @see ucol_getVariableTop + * @see ucol_restoreVariableTop + * @stable ICU 2.0 + */ +U_STABLE uint32_t U_EXPORT2 +ucol_setVariableTop(UCollator *coll, + const UChar *varTop, int32_t len, + UErrorCode *status); + +/** + * Gets the variable top value of a Collator. + * Lower 16 bits are undefined and should be ignored. + * @param coll collator which variable top needs to be retrieved + * @param status error code (not changed by function). If error code is set, + * the return value is undefined. + * @return the variable top value of a Collator. + * @see ucol_setVariableTop + * @see ucol_restoreVariableTop + * @stable ICU 2.0 + */ +U_STABLE uint32_t U_EXPORT2 ucol_getVariableTop(const UCollator *coll, UErrorCode *status); + +/** + * Sets the variable top to a collation element value supplied. Variable top is + * set to the upper 16 bits. + * Lower 16 bits are ignored. + * @param coll collator which variable top needs to be changed + * @param varTop CE value, as returned by ucol_setVariableTop or ucol)getVariableTop + * @param status error code (not changed by function) + * @see ucol_getVariableTop + * @see ucol_setVariableTop + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ucol_restoreVariableTop(UCollator *coll, const uint32_t varTop, UErrorCode *status); + +/** + * Thread safe cloning operation. The result is a clone of a given collator. + * @param coll collator to be cloned + * @param stackBuffer <em>Deprecated functionality as of ICU 52, use NULL.</em><br> + * user allocated space for the new clone. + * If NULL new memory will be allocated. + * If buffer is not large enough, new memory will be allocated. + * Clients can use the U_COL_SAFECLONE_BUFFERSIZE. + * @param pBufferSize <em>Deprecated functionality as of ICU 52, use NULL or 1.</em><br> + * pointer to size of allocated space. + * If *pBufferSize == 0, a sufficient size for use in cloning will + * be returned ('pre-flighting') + * If *pBufferSize is not enough for a stack-based safe clone, + * new memory will be allocated. + * @param status to indicate whether the operation went on smoothly or there were errors + * An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any + * allocations were necessary. + * @return pointer to the new clone + * @see ucol_open + * @see ucol_openRules + * @see ucol_close + * @stable ICU 2.0 + */ +U_STABLE UCollator* U_EXPORT2 +ucol_safeClone(const UCollator *coll, + void *stackBuffer, + int32_t *pBufferSize, + UErrorCode *status); + +#ifndef U_HIDE_DEPRECATED_API + +/** default memory size for the new clone. + * @deprecated ICU 52. Do not rely on ucol_safeClone() cloning into any provided buffer. + */ +#define U_COL_SAFECLONE_BUFFERSIZE 1 + +#endif /* U_HIDE_DEPRECATED_API */ + +/** + * Returns current rules. Delta defines whether full rules are returned or just the tailoring. + * Returns number of UChars needed to store rules. If buffer is NULL or bufferLen is not enough + * to store rules, will store up to available space. + * + * ucol_getRules() should normally be used instead. + * See http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales + * @param coll collator to get the rules from + * @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES. + * @param buffer buffer to store the result in. If NULL, you'll get no rules. + * @param bufferLen length of buffer to store rules in. If less than needed you'll get only the part that fits in. + * @return current rules + * @stable ICU 2.0 + * @see UCOL_FULL_RULES + */ +U_STABLE int32_t U_EXPORT2 +ucol_getRulesEx(const UCollator *coll, UColRuleOption delta, UChar *buffer, int32_t bufferLen); + +#ifndef U_HIDE_DEPRECATED_API +/** + * gets the locale name of the collator. If the collator + * is instantiated from the rules, then this function returns + * NULL. + * @param coll The UCollator for which the locale is needed + * @param type You can choose between requested, valid and actual + * locale. For description see the definition of + * ULocDataLocaleType in uloc.h + * @param status error code of the operation + * @return real locale name from which the collation data comes. + * If the collator was instantiated from rules, returns + * NULL. + * @deprecated ICU 2.8 Use ucol_getLocaleByType instead + */ +U_DEPRECATED const char * U_EXPORT2 +ucol_getLocale(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status); +#endif /* U_HIDE_DEPRECATED_API */ + +/** + * gets the locale name of the collator. If the collator + * is instantiated from the rules, then this function returns + * NULL. + * @param coll The UCollator for which the locale is needed + * @param type You can choose between requested, valid and actual + * locale. For description see the definition of + * ULocDataLocaleType in uloc.h + * @param status error code of the operation + * @return real locale name from which the collation data comes. + * If the collator was instantiated from rules, returns + * NULL. + * @stable ICU 2.8 + */ +U_STABLE const char * U_EXPORT2 +ucol_getLocaleByType(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status); + +/** + * Get an Unicode set that contains all the characters and sequences tailored in + * this collator. The result must be disposed of by using uset_close. + * @param coll The UCollator for which we want to get tailored chars + * @param status error code of the operation + * @return a pointer to newly created USet. Must be be disposed by using uset_close + * @see ucol_openRules + * @see uset_close + * @stable ICU 2.4 + */ +U_STABLE USet * U_EXPORT2 +ucol_getTailoredSet(const UCollator *coll, UErrorCode *status); + +#ifndef U_HIDE_INTERNAL_API +/** + * Universal attribute getter that returns UCOL_DEFAULT if the value is default + * @param coll collator which attributes are to be changed + * @param attr attribute type + * @return attribute value or UCOL_DEFAULT if the value is default + * @param status to indicate whether the operation went on smoothly or there were errors + * @see UColAttribute + * @see UColAttributeValue + * @see ucol_setAttribute + * @internal ICU 3.0 + */ +U_INTERNAL UColAttributeValue U_EXPORT2 +ucol_getAttributeOrDefault(const UCollator *coll, UColAttribute attr, UErrorCode *status); + +/** Check whether two collators are equal. Collators are considered equal if they + * will sort strings the same. This means that both the current attributes and the + * rules must be equivalent. Currently used for RuleBasedCollator::operator==. + * @param source first collator + * @param target second collator + * @return TRUE or FALSE + * @internal ICU 3.0 + */ +U_INTERNAL UBool U_EXPORT2 +ucol_equals(const UCollator *source, const UCollator *target); + +/** Calculates the set of unsafe code points, given a collator. + * A character is unsafe if you could append any character and cause the ordering to alter significantly. + * Collation sorts in normalized order, so anything that rearranges in normalization can cause this. + * Thus if you have a character like a_umlaut, and you add a lower_dot to it, + * then it normalizes to a_lower_dot + umlaut, and sorts differently. + * @param coll Collator + * @param unsafe a fill-in set to receive the unsafe points + * @param status for catching errors + * @return number of elements in the set + * @internal ICU 3.0 + */ +U_INTERNAL int32_t U_EXPORT2 +ucol_getUnsafeSet( const UCollator *coll, + USet *unsafe, + UErrorCode *status); + +/** Reset UCA's static pointers. You don't want to use this, unless your static memory can go away. + * @internal ICU 3.2.1 + */ +U_INTERNAL void U_EXPORT2 +ucol_forgetUCA(void); + +/** Touches all resources needed for instantiating a collator from a short string definition, + * thus filling up the cache. + * @param definition A short string containing a locale and a set of attributes. + * Attributes not explicitly mentioned are left at the default + * state for a locale. + * @param parseError if not NULL, structure that will get filled with error's pre + * and post context in case of error. + * @param forceDefaults if FALSE, the settings that are the same as the collator + * default settings will not be applied (for example, setting + * French secondary on a French collator would not be executed). + * If TRUE, all the settings will be applied regardless of the + * collator default value. If the definition + * strings are to be cached, should be set to FALSE. + * @param status Error code. Apart from regular error conditions connected to + * instantiating collators (like out of memory or similar), this + * API will return an error if an invalid attribute or attribute/value + * combination is specified. + * @see ucol_openFromShortString + * @internal ICU 3.2.1 + */ +U_INTERNAL void U_EXPORT2 +ucol_prepareShortStringOpen( const char *definition, + UBool forceDefaults, + UParseError *parseError, + UErrorCode *status); +#endif /* U_HIDE_INTERNAL_API */ + +/** Creates a binary image of a collator. This binary image can be stored and + * later used to instantiate a collator using ucol_openBinary. + * This API supports preflighting. + * @param coll Collator + * @param buffer a fill-in buffer to receive the binary image + * @param capacity capacity of the destination buffer + * @param status for catching errors + * @return size of the image + * @see ucol_openBinary + * @stable ICU 3.2 + */ +U_STABLE int32_t U_EXPORT2 +ucol_cloneBinary(const UCollator *coll, + uint8_t *buffer, int32_t capacity, + UErrorCode *status); + +/** Opens a collator from a collator binary image created using + * ucol_cloneBinary. Binary image used in instantiation of the + * collator remains owned by the user and should stay around for + * the lifetime of the collator. The API also takes a base collator + * which usualy should be UCA. + * @param bin binary image owned by the user and required through the + * lifetime of the collator + * @param length size of the image. If negative, the API will try to + * figure out the length of the image + * @param base fallback collator, usually UCA. Base is required to be + * present through the lifetime of the collator. Currently + * it cannot be NULL. + * @param status for catching errors + * @return newly created collator + * @see ucol_cloneBinary + * @stable ICU 3.2 + */ +U_STABLE UCollator* U_EXPORT2 +ucol_openBinary(const uint8_t *bin, int32_t length, + const UCollator *base, + UErrorCode *status); + + +#endif /* #if !UCONFIG_NO_COLLATION */ + +#endif diff --git a/Source/WTF/icu/unicode/uconfig.h b/Source/WTF/icu/unicode/uconfig.h new file mode 100644 index 000000000..bfa8e77b0 --- /dev/null +++ b/Source/WTF/icu/unicode/uconfig.h @@ -0,0 +1,412 @@ +/* +********************************************************************** +* Copyright (C) 2002-2013, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* file name: uconfig.h +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2002sep19 +* created by: Markus W. Scherer +*/ + +#ifndef __UCONFIG_H__ +#define __UCONFIG_H__ + + +/*! + * \file + * \brief User-configurable settings + * + * Miscellaneous switches: + * + * A number of macros affect a variety of minor aspects of ICU. + * Most of them used to be defined elsewhere (e.g., in utypes.h or platform.h) + * and moved here to make them easier to find. + * + * Switches for excluding parts of ICU library code modules: + * + * Changing these macros allows building partial, smaller libraries for special purposes. + * By default, all modules are built. + * The switches are fairly coarse, controlling large modules. + * Basic services cannot be turned off. + * + * Building with any of these options does not guarantee that the + * ICU build process will completely work. It is recommended that + * the ICU libraries and data be built using the normal build. + * At that time you should remove the data used by those services. + * After building the ICU data library, you should rebuild the ICU + * libraries with these switches customized to your needs. + * + * @stable ICU 2.4 + */ + +/** + * If this switch is defined, ICU will attempt to load a header file named "uconfig_local.h" + * prior to determining default settings for uconfig variables. + * + * @internal ICU 4.0 + */ +#if defined(UCONFIG_USE_LOCAL) +#include "uconfig_local.h" +#endif + +/** + * \def U_DEBUG + * Determines whether to include debugging code. + * Automatically set on Windows, but most compilers do not have + * related predefined macros. + * @internal + */ +#ifdef U_DEBUG + /* Use the predefined value. */ +#elif defined(_DEBUG) + /* + * _DEBUG is defined by Visual Studio debug compilation. + * Do *not* test for its NDEBUG macro: It is an orthogonal macro + * which disables assert(). + */ +# define U_DEBUG 1 +# else +# define U_DEBUG 0 +#endif + +/** + * Determines wheter to enable auto cleanup of libraries. + * @internal + */ +#ifndef UCLN_NO_AUTO_CLEANUP +#define UCLN_NO_AUTO_CLEANUP 1 +#endif + +/** + * \def U_DISABLE_RENAMING + * Determines whether to disable renaming or not. + * @internal + */ +#ifndef U_DISABLE_RENAMING +#define U_DISABLE_RENAMING 1 +#endif + +/** + * \def U_NO_DEFAULT_INCLUDE_UTF_HEADERS + * Determines whether utypes.h includes utf.h, utf8.h, utf16.h and utf_old.h. + * utypes.h includes those headers if this macro is defined to 0. + * Otherwise, each those headers must be included explicitly when using one of their macros. + * Defaults to 0 for backward compatibility, except inside ICU. + * @stable ICU 49 + */ +#ifdef U_NO_DEFAULT_INCLUDE_UTF_HEADERS + /* Use the predefined value. */ +#elif defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || \ + defined(U_IO_IMPLEMENTATION) || defined(U_LAYOUT_IMPLEMENTATION) || defined(U_LAYOUTEX_IMPLEMENTATION) || \ + defined(U_TOOLUTIL_IMPLEMENTATION) +# define U_NO_DEFAULT_INCLUDE_UTF_HEADERS 1 +#else +# define U_NO_DEFAULT_INCLUDE_UTF_HEADERS 0 +#endif + +/** + * \def U_OVERRIDE_CXX_ALLOCATION + * Determines whether to override new and delete. + * ICU is normally built such that all of its C++ classes, via their UMemory base, + * override operators new and delete to use its internal, customizable, + * non-exception-throwing memory allocation functions. (Default value 1 for this macro.) + * + * This is especially important when the application and its libraries use multiple heaps. + * For example, on Windows, this allows the ICU DLL to be used by + * applications that statically link the C Runtime library. + * + * @stable ICU 2.2 + */ +#ifndef U_OVERRIDE_CXX_ALLOCATION +#define U_OVERRIDE_CXX_ALLOCATION 1 +#endif + +/** + * \def U_ENABLE_TRACING + * Determines whether to enable tracing. + * @internal + */ +#ifndef U_ENABLE_TRACING +#define U_ENABLE_TRACING 0 +#endif + +/** + * \def U_ENABLE_DYLOAD + * Whether to enable Dynamic loading in ICU. + * @internal + */ +#ifndef U_ENABLE_DYLOAD +#define U_ENABLE_DYLOAD 1 +#endif + +/** + * \def U_CHECK_DYLOAD + * Whether to test Dynamic loading as an OS capability. + * @internal + */ +#ifndef U_CHECK_DYLOAD +#define U_CHECK_DYLOAD 1 +#endif + + +/** + * \def U_DEFAULT_SHOW_DRAFT + * Do we allow ICU users to use the draft APIs by default? + * @internal + */ +#ifndef U_DEFAULT_SHOW_DRAFT +#define U_DEFAULT_SHOW_DRAFT 1 +#endif + +/*===========================================================================*/ +/* Custom icu entry point renaming */ +/*===========================================================================*/ + +/** + * \def U_HAVE_LIB_SUFFIX + * 1 if a custom library suffix is set. + * @internal + */ +#ifdef U_HAVE_LIB_SUFFIX + /* Use the predefined value. */ +#elif defined(U_LIB_SUFFIX_C_NAME) +# define U_HAVE_LIB_SUFFIX 1 +#endif + +/** + * \def U_LIB_SUFFIX_C_NAME_STRING + * Defines the library suffix as a string with C syntax. + * @internal + */ +#ifdef U_LIB_SUFFIX_C_NAME_STRING + /* Use the predefined value. */ +#elif defined(U_LIB_SUFFIX_C_NAME) +# define U_LIB_SUFFIX_C_NAME_STRING #U_LIB_SUFFIX_C_NAME +#else +# define U_LIB_SUFFIX_C_NAME_STRING "" +#endif + +/* common/i18n library switches --------------------------------------------- */ + +/** + * \def UCONFIG_ONLY_COLLATION + * This switch turns off modules that are not needed for collation. + * + * It does not turn off legacy conversion because that is necessary + * for ICU to work on EBCDIC platforms (for the default converter). + * If you want "only collation" and do not build for EBCDIC, + * then you can define UCONFIG_NO_LEGACY_CONVERSION 1 as well. + * + * @stable ICU 2.4 + */ +#ifndef UCONFIG_ONLY_COLLATION +# define UCONFIG_ONLY_COLLATION 0 +#endif + +#if UCONFIG_ONLY_COLLATION + /* common library */ +# define UCONFIG_NO_BREAK_ITERATION 1 +# define UCONFIG_NO_IDNA 1 + + /* i18n library */ +# if UCONFIG_NO_COLLATION +# error Contradictory collation switches in uconfig.h. +# endif +# define UCONFIG_NO_FORMATTING 1 +# define UCONFIG_NO_TRANSLITERATION 1 +# define UCONFIG_NO_REGULAR_EXPRESSIONS 1 +#endif + +/* common library switches -------------------------------------------------- */ + +/** + * \def UCONFIG_NO_FILE_IO + * This switch turns off all file access in the common library + * where file access is only used for data loading. + * ICU data must then be provided in the form of a data DLL (or with an + * equivalent way to link to the data residing in an executable, + * as in building a combined library with both the common library's code and + * the data), or via udata_setCommonData(). + * Application data must be provided via udata_setAppData() or by using + * "open" functions that take pointers to data, for example ucol_openBinary(). + * + * File access is not used at all in the i18n library. + * + * File access cannot be turned off for the icuio library or for the ICU + * test suites and ICU tools. + * + * @stable ICU 3.6 + */ +#ifndef UCONFIG_NO_FILE_IO +# define UCONFIG_NO_FILE_IO 0 +#endif + +/** + * \def UCONFIG_NO_CONVERSION + * ICU will not completely build with this switch turned on. + * This switch turns off all converters. + * + * You may want to use this together with U_CHARSET_IS_UTF8 defined to 1 + * in utypes.h if char* strings in your environment are always in UTF-8. + * + * @stable ICU 3.2 + * @see U_CHARSET_IS_UTF8 + */ +#ifndef UCONFIG_NO_CONVERSION +# define UCONFIG_NO_CONVERSION 0 +#endif + +#if UCONFIG_NO_CONVERSION +# define UCONFIG_NO_LEGACY_CONVERSION 1 +#endif + +/** + * \def UCONFIG_NO_LEGACY_CONVERSION + * This switch turns off all converters except for + * - Unicode charsets (UTF-7/8/16/32, CESU-8, SCSU, BOCU-1) + * - US-ASCII + * - ISO-8859-1 + * + * Turning off legacy conversion is not possible on EBCDIC platforms + * because they need ibm-37 or ibm-1047 default converters. + * + * @stable ICU 2.4 + */ +#ifndef UCONFIG_NO_LEGACY_CONVERSION +# define UCONFIG_NO_LEGACY_CONVERSION 0 +#endif + +/** + * \def UCONFIG_NO_NORMALIZATION + * This switch turns off normalization. + * It implies turning off several other services as well, for example + * collation and IDNA. + * + * @stable ICU 2.6 + */ +#ifndef UCONFIG_NO_NORMALIZATION +# define UCONFIG_NO_NORMALIZATION 0 +#elif UCONFIG_NO_NORMALIZATION + /* common library */ + /* ICU 50 CJK dictionary BreakIterator uses normalization */ +# define UCONFIG_NO_BREAK_ITERATION 1 + /* IDNA (UTS #46) is implemented via normalization */ +# define UCONFIG_NO_IDNA 1 + + /* i18n library */ +# if UCONFIG_ONLY_COLLATION +# error Contradictory collation switches in uconfig.h. +# endif +# define UCONFIG_NO_COLLATION 1 +# define UCONFIG_NO_TRANSLITERATION 1 +#endif + +/** + * \def UCONFIG_NO_BREAK_ITERATION + * This switch turns off break iteration. + * + * @stable ICU 2.4 + */ +#ifndef UCONFIG_NO_BREAK_ITERATION +# define UCONFIG_NO_BREAK_ITERATION 0 +#endif + +/** + * \def UCONFIG_NO_IDNA + * This switch turns off IDNA. + * + * @stable ICU 2.6 + */ +#ifndef UCONFIG_NO_IDNA +# define UCONFIG_NO_IDNA 0 +#endif + +/** + * \def UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE + * Determines the default UMessagePatternApostropheMode. + * See the documentation for that enum. + * + * @stable ICU 4.8 + */ +#ifndef UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE +# define UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE UMSGPAT_APOS_DOUBLE_OPTIONAL +#endif + +/* i18n library switches ---------------------------------------------------- */ + +/** + * \def UCONFIG_NO_COLLATION + * This switch turns off collation and collation-based string search. + * + * @stable ICU 2.4 + */ +#ifndef UCONFIG_NO_COLLATION +# define UCONFIG_NO_COLLATION 0 +#endif + +/** + * \def UCONFIG_NO_FORMATTING + * This switch turns off formatting and calendar/timezone services. + * + * @stable ICU 2.4 + */ +#ifndef UCONFIG_NO_FORMATTING +# define UCONFIG_NO_FORMATTING 0 +#endif + +/** + * \def UCONFIG_NO_TRANSLITERATION + * This switch turns off transliteration. + * + * @stable ICU 2.4 + */ +#ifndef UCONFIG_NO_TRANSLITERATION +# define UCONFIG_NO_TRANSLITERATION 0 +#endif + +/** + * \def UCONFIG_NO_REGULAR_EXPRESSIONS + * This switch turns off regular expressions. + * + * @stable ICU 2.4 + */ +#ifndef UCONFIG_NO_REGULAR_EXPRESSIONS +# define UCONFIG_NO_REGULAR_EXPRESSIONS 0 +#endif + +/** + * \def UCONFIG_NO_SERVICE + * This switch turns off service registration. + * + * @stable ICU 3.2 + */ +#ifndef UCONFIG_NO_SERVICE +# define UCONFIG_NO_SERVICE 1 +#endif + +/** + * \def UCONFIG_HAVE_PARSEALLINPUT + * This switch turns on the "parse all input" attribute. Binary incompatible. + * + * @internal + */ +#ifndef UCONFIG_HAVE_PARSEALLINPUT +# define UCONFIG_HAVE_PARSEALLINPUT 1 +#endif + + +/** + * \def UCONFIG_FORMAT_FASTPATHS_49 + * This switch turns on other formatting fastpaths. Binary incompatible in object DecimalFormat and DecimalFormatSymbols + * + * @internal + */ +#ifndef UCONFIG_FORMAT_FASTPATHS_49 +# define UCONFIG_FORMAT_FASTPATHS_49 1 +#endif + +#endif diff --git a/Source/WTF/icu/unicode/uenum.h b/Source/WTF/icu/unicode/uenum.h new file mode 100644 index 000000000..5408ec5a6 --- /dev/null +++ b/Source/WTF/icu/unicode/uenum.h @@ -0,0 +1,206 @@ +/* +******************************************************************************* +* +* Copyright (C) 2002-2013, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: uenum.h +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:2 +* +* created on: 2002jul08 +* created by: Vladimir Weinstein +*/ + +#ifndef __UENUM_H +#define __UENUM_H + +#include "unicode/utypes.h" +#include "unicode/localpointer.h" + +#if U_SHOW_CPLUSPLUS_API +#include "unicode/strenum.h" +#endif + +/** + * \file + * \brief C API: String Enumeration + */ + +/** + * An enumeration object. + * For usage in C programs. + * @stable ICU 2.2 + */ +struct UEnumeration; +/** structure representing an enumeration object instance @stable ICU 2.2 */ +typedef struct UEnumeration UEnumeration; + +/** + * Disposes of resources in use by the iterator. If en is NULL, + * does nothing. After this call, any char* or UChar* pointer + * returned by uenum_unext() or uenum_next() is invalid. + * @param en UEnumeration structure pointer + * @stable ICU 2.2 + */ +U_STABLE void U_EXPORT2 +uenum_close(UEnumeration* en); + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalUEnumerationPointer + * "Smart pointer" class, closes a UEnumeration via uenum_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @stable ICU 4.4 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalUEnumerationPointer, UEnumeration, uenum_close); + +U_NAMESPACE_END + +#endif + +/** + * Returns the number of elements that the iterator traverses. If + * the iterator is out-of-sync with its service, status is set to + * U_ENUM_OUT_OF_SYNC_ERROR. + * This is a convenience function. It can end up being very + * expensive as all the items might have to be pre-fetched (depending + * on the type of data being traversed). Use with caution and only + * when necessary. + * @param en UEnumeration structure pointer + * @param status error code, can be U_ENUM_OUT_OF_SYNC_ERROR if the + * iterator is out of sync. + * @return number of elements in the iterator + * @stable ICU 2.2 + */ +U_STABLE int32_t U_EXPORT2 +uenum_count(UEnumeration* en, UErrorCode* status); + +/** + * Returns the next element in the iterator's list. If there are + * no more elements, returns NULL. If the iterator is out-of-sync + * with its service, status is set to U_ENUM_OUT_OF_SYNC_ERROR and + * NULL is returned. If the native service string is a char* string, + * it is converted to UChar* with the invariant converter. + * The result is terminated by (UChar)0. + * @param en the iterator object + * @param resultLength pointer to receive the length of the result + * (not including the terminating \\0). + * If the pointer is NULL it is ignored. + * @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if + * the iterator is out of sync with its service. + * @return a pointer to the string. The string will be + * zero-terminated. The return pointer is owned by this iterator + * and must not be deleted by the caller. The pointer is valid + * until the next call to any uenum_... method, including + * uenum_next() or uenum_unext(). When all strings have been + * traversed, returns NULL. + * @stable ICU 2.2 + */ +U_STABLE const UChar* U_EXPORT2 +uenum_unext(UEnumeration* en, + int32_t* resultLength, + UErrorCode* status); + +/** + * Returns the next element in the iterator's list. If there are + * no more elements, returns NULL. If the iterator is out-of-sync + * with its service, status is set to U_ENUM_OUT_OF_SYNC_ERROR and + * NULL is returned. If the native service string is a UChar* + * string, it is converted to char* with the invariant converter. + * The result is terminated by (char)0. If the conversion fails + * (because a character cannot be converted) then status is set to + * U_INVARIANT_CONVERSION_ERROR and the return value is undefined + * (but non-NULL). + * @param en the iterator object + * @param resultLength pointer to receive the length of the result + * (not including the terminating \\0). + * If the pointer is NULL it is ignored. + * @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if + * the iterator is out of sync with its service. Set to + * U_INVARIANT_CONVERSION_ERROR if the underlying native string is + * UChar* and conversion to char* with the invariant converter + * fails. This error pertains only to current string, so iteration + * might be able to continue successfully. + * @return a pointer to the string. The string will be + * zero-terminated. The return pointer is owned by this iterator + * and must not be deleted by the caller. The pointer is valid + * until the next call to any uenum_... method, including + * uenum_next() or uenum_unext(). When all strings have been + * traversed, returns NULL. + * @stable ICU 2.2 + */ +U_STABLE const char* U_EXPORT2 +uenum_next(UEnumeration* en, + int32_t* resultLength, + UErrorCode* status); + +/** + * Resets the iterator to the current list of service IDs. This + * re-establishes sync with the service and rewinds the iterator + * to start at the first element. + * @param en the iterator object + * @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if + * the iterator is out of sync with its service. + * @stable ICU 2.2 + */ +U_STABLE void U_EXPORT2 +uenum_reset(UEnumeration* en, UErrorCode* status); + +#if U_SHOW_CPLUSPLUS_API + +/** + * Given a StringEnumeration, wrap it in a UEnumeration. The + * StringEnumeration is adopted; after this call, the caller must not + * delete it (regardless of error status). + * @param adopted the C++ StringEnumeration to be wrapped in a UEnumeration. + * @param ec the error code. + * @return a UEnumeration wrapping the adopted StringEnumeration. + * @stable ICU 4.2 + */ +U_STABLE UEnumeration* U_EXPORT2 +uenum_openFromStringEnumeration(icu::StringEnumeration* adopted, UErrorCode* ec); + +#endif + +/** + * Given an array of const UChar* strings, return a UEnumeration. String pointers from 0..count-1 must not be null. + * Do not free or modify either the string array or the characters it points to until this object has been destroyed with uenum_close. + * \snippet test/cintltst/uenumtst.c uenum_openUCharStringsEnumeration + * @param strings array of const UChar* strings (each null terminated). All storage is owned by the caller. + * @param count length of the array + * @param ec error code + * @return the new UEnumeration object. Caller is responsible for calling uenum_close to free memory. + * @see uenum_close + * @stable ICU 50 + */ +U_STABLE UEnumeration* U_EXPORT2 +uenum_openUCharStringsEnumeration(const UChar* const strings[], int32_t count, + UErrorCode* ec); + +/* Note: next function is not hidden as draft, as it is used internally (it was formerly an internal function). */ + +/** + * Given an array of const char* strings (invariant chars only), return a UEnumeration. String pointers from 0..count-1 must not be null. + * Do not free or modify either the string array or the characters it points to until this object has been destroyed with uenum_close. + * \snippet test/cintltst/uenumtst.c uenum_openCharStringsEnumeration + * @param strings array of char* strings (each null terminated). All storage is owned by the caller. + * @param count length of the array + * @param ec error code + * @return the new UEnumeration object. Caller is responsible for calling uenum_close to free memory + * @see uenum_close + * @stable ICU 50 + */ +U_STABLE UEnumeration* U_EXPORT2 +uenum_openCharStringsEnumeration(const char* const strings[], int32_t count, + UErrorCode* ec); + +#endif diff --git a/Source/WTF/icu/unicode/uiter.h b/Source/WTF/icu/unicode/uiter.h new file mode 100644 index 000000000..0cdb8ffbe --- /dev/null +++ b/Source/WTF/icu/unicode/uiter.h @@ -0,0 +1,707 @@ +/* +******************************************************************************* +* +* Copyright (C) 2002-2011 International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: uiter.h +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2002jan18 +* created by: Markus W. Scherer +*/ + +#ifndef __UITER_H__ +#define __UITER_H__ + +/** + * \file + * \brief C API: Unicode Character Iteration + * + * @see UCharIterator + */ + +#include "unicode/utypes.h" + +#if U_SHOW_CPLUSPLUS_API + U_NAMESPACE_BEGIN + + class CharacterIterator; + class Replaceable; + + U_NAMESPACE_END +#endif + +U_CDECL_BEGIN + +struct UCharIterator; +typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharIterator. @stable ICU 2.1 */ + +/** + * Origin constants for UCharIterator.getIndex() and UCharIterator.move(). + * @see UCharIteratorMove + * @see UCharIterator + * @stable ICU 2.1 + */ +typedef enum UCharIteratorOrigin { + UITER_START, UITER_CURRENT, UITER_LIMIT, UITER_ZERO, UITER_LENGTH +} UCharIteratorOrigin; + +/** Constants for UCharIterator. @stable ICU 2.6 */ +enum { + /** + * Constant value that may be returned by UCharIteratorMove + * indicating that the final UTF-16 index is not known, but that the move succeeded. + * This can occur when moving relative to limit or length, or + * when moving relative to the current index after a setState() + * when the current UTF-16 index is not known. + * + * It would be very inefficient to have to count from the beginning of the text + * just to get the current/limit/length index after moving relative to it. + * The actual index can be determined with getIndex(UITER_CURRENT) + * which will count the UChars if necessary. + * + * @stable ICU 2.6 + */ + UITER_UNKNOWN_INDEX=-2 +}; + + +/** + * Constant for UCharIterator getState() indicating an error or + * an unknown state. + * Returned by uiter_getState()/UCharIteratorGetState + * when an error occurs. + * Also, some UCharIterator implementations may not be able to return + * a valid state for each position. This will be clearly documented + * for each such iterator (none of the public ones here). + * + * @stable ICU 2.6 + */ +#define UITER_NO_STATE ((uint32_t)0xffffffff) + +/** + * Function type declaration for UCharIterator.getIndex(). + * + * Gets the current position, or the start or limit of the + * iteration range. + * + * This function may perform slowly for UITER_CURRENT after setState() was called, + * or for UITER_LENGTH, because an iterator implementation may have to count + * UChars if the underlying storage is not UTF-16. + * + * @param iter the UCharIterator structure ("this pointer") + * @param origin get the 0, start, limit, length, or current index + * @return the requested index, or U_SENTINEL in an error condition + * + * @see UCharIteratorOrigin + * @see UCharIterator + * @stable ICU 2.1 + */ +typedef int32_t U_CALLCONV +UCharIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin); + +/** + * Function type declaration for UCharIterator.move(). + * + * Use iter->move(iter, index, UITER_ZERO) like CharacterIterator::setIndex(index). + * + * Moves the current position relative to the start or limit of the + * iteration range, or relative to the current position itself. + * The movement is expressed in numbers of code units forward + * or backward by specifying a positive or negative delta. + * Out of bounds movement will be pinned to the start or limit. + * + * This function may perform slowly for moving relative to UITER_LENGTH + * because an iterator implementation may have to count the rest of the + * UChars if the native storage is not UTF-16. + * + * When moving relative to the limit or length, or + * relative to the current position after setState() was called, + * move() may return UITER_UNKNOWN_INDEX (-2) to avoid an inefficient + * determination of the actual UTF-16 index. + * The actual index can be determined with getIndex(UITER_CURRENT) + * which will count the UChars if necessary. + * See UITER_UNKNOWN_INDEX for details. + * + * @param iter the UCharIterator structure ("this pointer") + * @param delta can be positive, zero, or negative + * @param origin move relative to the 0, start, limit, length, or current index + * @return the new index, or U_SENTINEL on an error condition, + * or UITER_UNKNOWN_INDEX when the index is not known. + * + * @see UCharIteratorOrigin + * @see UCharIterator + * @see UITER_UNKNOWN_INDEX + * @stable ICU 2.1 + */ +typedef int32_t U_CALLCONV +UCharIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin); + +/** + * Function type declaration for UCharIterator.hasNext(). + * + * Check if current() and next() can still + * return another code unit. + * + * @param iter the UCharIterator structure ("this pointer") + * @return boolean value for whether current() and next() can still return another code unit + * + * @see UCharIterator + * @stable ICU 2.1 + */ +typedef UBool U_CALLCONV +UCharIteratorHasNext(UCharIterator *iter); + +/** + * Function type declaration for UCharIterator.hasPrevious(). + * + * Check if previous() can still return another code unit. + * + * @param iter the UCharIterator structure ("this pointer") + * @return boolean value for whether previous() can still return another code unit + * + * @see UCharIterator + * @stable ICU 2.1 + */ +typedef UBool U_CALLCONV +UCharIteratorHasPrevious(UCharIterator *iter); + +/** + * Function type declaration for UCharIterator.current(). + * + * Return the code unit at the current position, + * or U_SENTINEL if there is none (index is at the limit). + * + * @param iter the UCharIterator structure ("this pointer") + * @return the current code unit + * + * @see UCharIterator + * @stable ICU 2.1 + */ +typedef UChar32 U_CALLCONV +UCharIteratorCurrent(UCharIterator *iter); + +/** + * Function type declaration for UCharIterator.next(). + * + * Return the code unit at the current index and increment + * the index (post-increment, like s[i++]), + * or return U_SENTINEL if there is none (index is at the limit). + * + * @param iter the UCharIterator structure ("this pointer") + * @return the current code unit (and post-increment the current index) + * + * @see UCharIterator + * @stable ICU 2.1 + */ +typedef UChar32 U_CALLCONV +UCharIteratorNext(UCharIterator *iter); + +/** + * Function type declaration for UCharIterator.previous(). + * + * Decrement the index and return the code unit from there + * (pre-decrement, like s[--i]), + * or return U_SENTINEL if there is none (index is at the start). + * + * @param iter the UCharIterator structure ("this pointer") + * @return the previous code unit (after pre-decrementing the current index) + * + * @see UCharIterator + * @stable ICU 2.1 + */ +typedef UChar32 U_CALLCONV +UCharIteratorPrevious(UCharIterator *iter); + +/** + * Function type declaration for UCharIterator.reservedFn(). + * Reserved for future use. + * + * @param iter the UCharIterator structure ("this pointer") + * @param something some integer argument + * @return some integer + * + * @see UCharIterator + * @stable ICU 2.1 + */ +typedef int32_t U_CALLCONV +UCharIteratorReserved(UCharIterator *iter, int32_t something); + +/** + * Function type declaration for UCharIterator.getState(). + * + * Get the "state" of the iterator in the form of a single 32-bit word. + * It is recommended that the state value be calculated to be as small as + * is feasible. For strings with limited lengths, fewer than 32 bits may + * be sufficient. + * + * This is used together with setState()/UCharIteratorSetState + * to save and restore the iterator position more efficiently than with + * getIndex()/move(). + * + * The iterator state is defined as a uint32_t value because it is designed + * for use in ucol_nextSortKeyPart() which provides 32 bits to store the state + * of the character iterator. + * + * With some UCharIterator implementations (e.g., UTF-8), + * getting and setting the UTF-16 index with existing functions + * (getIndex(UITER_CURRENT) followed by move(pos, UITER_ZERO)) is possible but + * relatively slow because the iterator has to "walk" from a known index + * to the requested one. + * This takes more time the farther it needs to go. + * + * An opaque state value allows an iterator implementation to provide + * an internal index (UTF-8: the source byte array index) for + * fast, constant-time restoration. + * + * After calling setState(), a getIndex(UITER_CURRENT) may be slow because + * the UTF-16 index may not be restored as well, but the iterator can deliver + * the correct text contents and move relative to the current position + * without performance degradation. + * + * Some UCharIterator implementations may not be able to return + * a valid state for each position, in which case they return UITER_NO_STATE instead. + * This will be clearly documented for each such iterator (none of the public ones here). + * + * @param iter the UCharIterator structure ("this pointer") + * @return the state word + * + * @see UCharIterator + * @see UCharIteratorSetState + * @see UITER_NO_STATE + * @stable ICU 2.6 + */ +typedef uint32_t U_CALLCONV +UCharIteratorGetState(const UCharIterator *iter); + +/** + * Function type declaration for UCharIterator.setState(). + * + * Restore the "state" of the iterator using a state word from a getState() call. + * The iterator object need not be the same one as for which getState() was called, + * but it must be of the same type (set up using the same uiter_setXYZ function) + * and it must iterate over the same string + * (binary identical regardless of memory address). + * For more about the state word see UCharIteratorGetState. + * + * After calling setState(), a getIndex(UITER_CURRENT) may be slow because + * the UTF-16 index may not be restored as well, but the iterator can deliver + * the correct text contents and move relative to the current position + * without performance degradation. + * + * @param iter the UCharIterator structure ("this pointer") + * @param state the state word from a getState() call + * on a same-type, same-string iterator + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * + * @see UCharIterator + * @see UCharIteratorGetState + * @stable ICU 2.6 + */ +typedef void U_CALLCONV +UCharIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode); + + +/** + * C API for code unit iteration. + * This can be used as a C wrapper around + * CharacterIterator, Replaceable, or implemented using simple strings, etc. + * + * There are two roles for using UCharIterator: + * + * A "provider" sets the necessary function pointers and controls the "protected" + * fields of the UCharIterator structure. A "provider" passes a UCharIterator + * into C APIs that need a UCharIterator as an abstract, flexible string interface. + * + * Implementations of such C APIs are "callers" of UCharIterator functions; + * they only use the "public" function pointers and never access the "protected" + * fields directly. + * + * The current() and next() functions only check the current index against the + * limit, and previous() only checks the current index against the start, + * to see if the iterator already reached the end of the iteration range. + * + * The assumption - in all iterators - is that the index is moved via the API, + * which means it won't go out of bounds, or the index is modified by + * user code that knows enough about the iterator implementation to set valid + * index values. + * + * UCharIterator functions return code unit values 0..0xffff, + * or U_SENTINEL if the iteration bounds are reached. + * + * @stable ICU 2.1 + */ +struct UCharIterator { + /** + * (protected) Pointer to string or wrapped object or similar. + * Not used by caller. + * @stable ICU 2.1 + */ + const void *context; + + /** + * (protected) Length of string or similar. + * Not used by caller. + * @stable ICU 2.1 + */ + int32_t length; + + /** + * (protected) Start index or similar. + * Not used by caller. + * @stable ICU 2.1 + */ + int32_t start; + + /** + * (protected) Current index or similar. + * Not used by caller. + * @stable ICU 2.1 + */ + int32_t index; + + /** + * (protected) Limit index or similar. + * Not used by caller. + * @stable ICU 2.1 + */ + int32_t limit; + + /** + * (protected) Used by UTF-8 iterators and possibly others. + * @stable ICU 2.1 + */ + int32_t reservedField; + + /** + * (public) Returns the current position or the + * start or limit index of the iteration range. + * + * @see UCharIteratorGetIndex + * @stable ICU 2.1 + */ + UCharIteratorGetIndex *getIndex; + + /** + * (public) Moves the current position relative to the start or limit of the + * iteration range, or relative to the current position itself. + * The movement is expressed in numbers of code units forward + * or backward by specifying a positive or negative delta. + * + * @see UCharIteratorMove + * @stable ICU 2.1 + */ + UCharIteratorMove *move; + + /** + * (public) Check if current() and next() can still + * return another code unit. + * + * @see UCharIteratorHasNext + * @stable ICU 2.1 + */ + UCharIteratorHasNext *hasNext; + + /** + * (public) Check if previous() can still return another code unit. + * + * @see UCharIteratorHasPrevious + * @stable ICU 2.1 + */ + UCharIteratorHasPrevious *hasPrevious; + + /** + * (public) Return the code unit at the current position, + * or U_SENTINEL if there is none (index is at the limit). + * + * @see UCharIteratorCurrent + * @stable ICU 2.1 + */ + UCharIteratorCurrent *current; + + /** + * (public) Return the code unit at the current index and increment + * the index (post-increment, like s[i++]), + * or return U_SENTINEL if there is none (index is at the limit). + * + * @see UCharIteratorNext + * @stable ICU 2.1 + */ + UCharIteratorNext *next; + + /** + * (public) Decrement the index and return the code unit from there + * (pre-decrement, like s[--i]), + * or return U_SENTINEL if there is none (index is at the start). + * + * @see UCharIteratorPrevious + * @stable ICU 2.1 + */ + UCharIteratorPrevious *previous; + + /** + * (public) Reserved for future use. Currently NULL. + * + * @see UCharIteratorReserved + * @stable ICU 2.1 + */ + UCharIteratorReserved *reservedFn; + + /** + * (public) Return the state of the iterator, to be restored later with setState(). + * This function pointer is NULL if the iterator does not implement it. + * + * @see UCharIteratorGet + * @stable ICU 2.6 + */ + UCharIteratorGetState *getState; + + /** + * (public) Restore the iterator state from the state word from a call + * to getState(). + * This function pointer is NULL if the iterator does not implement it. + * + * @see UCharIteratorSet + * @stable ICU 2.6 + */ + UCharIteratorSetState *setState; +}; + +/** + * Helper function for UCharIterator to get the code point + * at the current index. + * + * Return the code point that includes the code unit at the current position, + * or U_SENTINEL if there is none (index is at the limit). + * If the current code unit is a lead or trail surrogate, + * then the following or preceding surrogate is used to form + * the code point value. + * + * @param iter the UCharIterator structure ("this pointer") + * @return the current code point + * + * @see UCharIterator + * @see U16_GET + * @see UnicodeString::char32At() + * @stable ICU 2.1 + */ +U_STABLE UChar32 U_EXPORT2 +uiter_current32(UCharIterator *iter); + +/** + * Helper function for UCharIterator to get the next code point. + * + * Return the code point at the current index and increment + * the index (post-increment, like s[i++]), + * or return U_SENTINEL if there is none (index is at the limit). + * + * @param iter the UCharIterator structure ("this pointer") + * @return the current code point (and post-increment the current index) + * + * @see UCharIterator + * @see U16_NEXT + * @stable ICU 2.1 + */ +U_STABLE UChar32 U_EXPORT2 +uiter_next32(UCharIterator *iter); + +/** + * Helper function for UCharIterator to get the previous code point. + * + * Decrement the index and return the code point from there + * (pre-decrement, like s[--i]), + * or return U_SENTINEL if there is none (index is at the start). + * + * @param iter the UCharIterator structure ("this pointer") + * @return the previous code point (after pre-decrementing the current index) + * + * @see UCharIterator + * @see U16_PREV + * @stable ICU 2.1 + */ +U_STABLE UChar32 U_EXPORT2 +uiter_previous32(UCharIterator *iter); + +/** + * Get the "state" of the iterator in the form of a single 32-bit word. + * This is a convenience function that calls iter->getState(iter) + * if iter->getState is not NULL; + * if it is NULL or any other error occurs, then UITER_NO_STATE is returned. + * + * Some UCharIterator implementations may not be able to return + * a valid state for each position, in which case they return UITER_NO_STATE instead. + * This will be clearly documented for each such iterator (none of the public ones here). + * + * @param iter the UCharIterator structure ("this pointer") + * @return the state word + * + * @see UCharIterator + * @see UCharIteratorGetState + * @see UITER_NO_STATE + * @stable ICU 2.6 + */ +U_STABLE uint32_t U_EXPORT2 +uiter_getState(const UCharIterator *iter); + +/** + * Restore the "state" of the iterator using a state word from a getState() call. + * This is a convenience function that calls iter->setState(iter, state, pErrorCode) + * if iter->setState is not NULL; if it is NULL, then U_UNSUPPORTED_ERROR is set. + * + * @param iter the UCharIterator structure ("this pointer") + * @param state the state word from a getState() call + * on a same-type, same-string iterator + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * + * @see UCharIterator + * @see UCharIteratorSetState + * @stable ICU 2.6 + */ +U_STABLE void U_EXPORT2 +uiter_setState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode); + +/** + * Set up a UCharIterator to iterate over a string. + * + * Sets the UCharIterator function pointers for iteration over the string s + * with iteration boundaries start=index=0 and length=limit=string length. + * The "provider" may set the start, index, and limit values at any time + * within the range 0..length. + * The length field will be ignored. + * + * The string pointer s is set into UCharIterator.context without copying + * or reallocating the string contents. + * + * getState() simply returns the current index. + * move() will always return the final index. + * + * @param iter UCharIterator structure to be set for iteration + * @param s String to iterate over + * @param length Length of s, or -1 if NUL-terminated + * + * @see UCharIterator + * @stable ICU 2.1 + */ +U_STABLE void U_EXPORT2 +uiter_setString(UCharIterator *iter, const UChar *s, int32_t length); + +/** + * Set up a UCharIterator to iterate over a UTF-16BE string + * (byte vector with a big-endian pair of bytes per UChar). + * + * Everything works just like with a normal UChar iterator (uiter_setString), + * except that UChars are assembled from byte pairs, + * and that the length argument here indicates an even number of bytes. + * + * getState() simply returns the current index. + * move() will always return the final index. + * + * @param iter UCharIterator structure to be set for iteration + * @param s UTF-16BE string to iterate over + * @param length Length of s as an even number of bytes, or -1 if NUL-terminated + * (NUL means pair of 0 bytes at even index from s) + * + * @see UCharIterator + * @see uiter_setString + * @stable ICU 2.6 + */ +U_STABLE void U_EXPORT2 +uiter_setUTF16BE(UCharIterator *iter, const char *s, int32_t length); + +/** + * Set up a UCharIterator to iterate over a UTF-8 string. + * + * Sets the UCharIterator function pointers for iteration over the UTF-8 string s + * with UTF-8 iteration boundaries 0 and length. + * The implementation counts the UTF-16 index on the fly and + * lazily evaluates the UTF-16 length of the text. + * + * The start field is used as the UTF-8 offset, the limit field as the UTF-8 length. + * When the reservedField is not 0, then it contains a supplementary code point + * and the UTF-16 index is between the two corresponding surrogates. + * At that point, the UTF-8 index is behind that code point. + * + * The UTF-8 string pointer s is set into UCharIterator.context without copying + * or reallocating the string contents. + * + * getState() returns a state value consisting of + * - the current UTF-8 source byte index (bits 31..1) + * - a flag (bit 0) that indicates whether the UChar position is in the middle + * of a surrogate pair + * (from a 4-byte UTF-8 sequence for the corresponding supplementary code point) + * + * getState() cannot also encode the UTF-16 index in the state value. + * move(relative to limit or length), or + * move(relative to current) after setState(), may return UITER_UNKNOWN_INDEX. + * + * @param iter UCharIterator structure to be set for iteration + * @param s UTF-8 string to iterate over + * @param length Length of s in bytes, or -1 if NUL-terminated + * + * @see UCharIterator + * @stable ICU 2.6 + */ +U_STABLE void U_EXPORT2 +uiter_setUTF8(UCharIterator *iter, const char *s, int32_t length); + +#if U_SHOW_CPLUSPLUS_API + +/** + * Set up a UCharIterator to wrap around a C++ CharacterIterator. + * + * Sets the UCharIterator function pointers for iteration using the + * CharacterIterator charIter. + * + * The CharacterIterator pointer charIter is set into UCharIterator.context + * without copying or cloning the CharacterIterator object. + * The other "protected" UCharIterator fields are set to 0 and will be ignored. + * The iteration index and boundaries are controlled by the CharacterIterator. + * + * getState() simply returns the current index. + * move() will always return the final index. + * + * @param iter UCharIterator structure to be set for iteration + * @param charIter CharacterIterator to wrap + * + * @see UCharIterator + * @stable ICU 2.1 + */ +U_STABLE void U_EXPORT2 +uiter_setCharacterIterator(UCharIterator *iter, icu::CharacterIterator *charIter); + +/** + * Set up a UCharIterator to iterate over a C++ Replaceable. + * + * Sets the UCharIterator function pointers for iteration over the + * Replaceable rep with iteration boundaries start=index=0 and + * length=limit=rep->length(). + * The "provider" may set the start, index, and limit values at any time + * within the range 0..length=rep->length(). + * The length field will be ignored. + * + * The Replaceable pointer rep is set into UCharIterator.context without copying + * or cloning/reallocating the Replaceable object. + * + * getState() simply returns the current index. + * move() will always return the final index. + * + * @param iter UCharIterator structure to be set for iteration + * @param rep Replaceable to iterate over + * + * @see UCharIterator + * @stable ICU 2.1 + */ +U_STABLE void U_EXPORT2 +uiter_setReplaceable(UCharIterator *iter, const icu::Replaceable *rep); + +#endif + +U_CDECL_END + +#endif diff --git a/Source/WTF/icu/unicode/uloc.h b/Source/WTF/icu/unicode/uloc.h new file mode 100644 index 000000000..28ab902b5 --- /dev/null +++ b/Source/WTF/icu/unicode/uloc.h @@ -0,0 +1,1135 @@ +/* +********************************************************************** +* Copyright (C) 1997-2013, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* +* File ULOC.H +* +* Modification History: +* +* Date Name Description +* 04/01/97 aliu Creation. +* 08/22/98 stephen JDK 1.2 sync. +* 12/08/98 rtg New C API for Locale +* 03/30/99 damiba overhaul +* 03/31/99 helena Javadoc for uloc functions. +* 04/15/99 Madhu Updated Javadoc +******************************************************************************** +*/ + +#ifndef ULOC_H +#define ULOC_H + +#include "unicode/utypes.h" +#include "unicode/uenum.h" + +/** + * \file + * \brief C API: Locale + * + * <h2> ULoc C API for Locale </h2> + * A <code>Locale</code> represents a specific geographical, political, + * or cultural region. An operation that requires a <code>Locale</code> to perform + * its task is called <em>locale-sensitive</em> and uses the <code>Locale</code> + * to tailor information for the user. For example, displaying a number + * is a locale-sensitive operation--the number should be formatted + * according to the customs/conventions of the user's native country, + * region, or culture. In the C APIs, a locales is simply a const char string. + * + * <P> + * You create a <code>Locale</code> with one of the three options listed below. + * Each of the component is separated by '_' in the locale string. + * \htmlonly<blockquote>\endhtmlonly + * <pre> + * \code + * newLanguage + * + * newLanguage + newCountry + * + * newLanguage + newCountry + newVariant + * \endcode + * </pre> + * \htmlonly</blockquote>\endhtmlonly + * The first option is a valid <STRONG>ISO + * Language Code.</STRONG> These codes are the lower-case two-letter + * codes as defined by ISO-639. + * You can find a full list of these codes at a number of sites, such as: + * <BR><a href ="http://www.ics.uci.edu/pub/ietf/http/related/iso639.txt"> + * http://www.ics.uci.edu/pub/ietf/http/related/iso639.txt</a> + * + * <P> + * The second option includes an additonal <STRONG>ISO Country + * Code.</STRONG> These codes are the upper-case two-letter codes + * as defined by ISO-3166. + * You can find a full list of these codes at a number of sites, such as: + * <BR><a href="http://www.chemie.fu-berlin.de/diverse/doc/ISO_3166.html"> + * http://www.chemie.fu-berlin.de/diverse/doc/ISO_3166.html</a> + * + * <P> + * The third option requires another additonal information--the + * <STRONG>Variant.</STRONG> + * The Variant codes are vendor and browser-specific. + * For example, use WIN for Windows, MAC for Macintosh, and POSIX for POSIX. + * Where there are two variants, separate them with an underscore, and + * put the most important one first. For + * example, a Traditional Spanish collation might be referenced, with + * "ES", "ES", "Traditional_WIN". + * + * <P> + * Because a <code>Locale</code> is just an identifier for a region, + * no validity check is performed when you specify a <code>Locale</code>. + * If you want to see whether particular resources are available for the + * <code>Locale</code> you asked for, you must query those resources. For + * example, ask the <code>UNumberFormat</code> for the locales it supports + * using its <code>getAvailable</code> method. + * <BR><STRONG>Note:</STRONG> When you ask for a resource for a particular + * locale, you get back the best available match, not necessarily + * precisely what you asked for. For more information, look at + * <code>UResourceBundle</code>. + * + * <P> + * The <code>Locale</code> provides a number of convenient constants + * that you can use to specify the commonly used + * locales. For example, the following refers to a locale + * for the United States: + * \htmlonly<blockquote>\endhtmlonly + * <pre> + * \code + * ULOC_US + * \endcode + * </pre> + * \htmlonly</blockquote>\endhtmlonly + * + * <P> + * Once you've specified a locale you can query it for information about + * itself. Use <code>uloc_getCountry</code> to get the ISO Country Code and + * <code>uloc_getLanguage</code> to get the ISO Language Code. You can + * use <code>uloc_getDisplayCountry</code> to get the + * name of the country suitable for displaying to the user. Similarly, + * you can use <code>uloc_getDisplayLanguage</code> to get the name of + * the language suitable for displaying to the user. Interestingly, + * the <code>uloc_getDisplayXXX</code> methods are themselves locale-sensitive + * and have two versions: one that uses the default locale and one + * that takes a locale as an argument and displays the name or country in + * a language appropriate to that locale. + * + * <P> + * The ICU provides a number of services that perform locale-sensitive + * operations. For example, the <code>unum_xxx</code> functions format + * numbers, currency, or percentages in a locale-sensitive manner. + * </P> + * \htmlonly<blockquote>\endhtmlonly + * <pre> + * \code + * UErrorCode success = U_ZERO_ERROR; + * UNumberFormat *nf; + * const char* myLocale = "fr_FR"; + * + * nf = unum_open( UNUM_DEFAULT, NULL, success ); + * unum_close(nf); + * nf = unum_open( UNUM_CURRENCY, NULL, success ); + * unum_close(nf); + * nf = unum_open( UNUM_PERCENT, NULL, success ); + * unum_close(nf); + * \endcode + * </pre> + * \htmlonly</blockquote>\endhtmlonly + * Each of these methods has two variants; one with an explicit locale + * and one without; the latter using the default locale. + * \htmlonly<blockquote>\endhtmlonly + * <pre> + * \code + * + * nf = unum_open( UNUM_DEFAULT, myLocale, success ); + * unum_close(nf); + * nf = unum_open( UNUM_CURRENCY, myLocale, success ); + * unum_close(nf); + * nf = unum_open( UNUM_PERCENT, myLocale, success ); + * unum_close(nf); + * \endcode + * </pre> + * \htmlonly</blockquote>\endhtmlonly + * A <code>Locale</code> is the mechanism for identifying the kind of services + * (<code>UNumberFormat</code>) that you would like to get. The locale is + * <STRONG>just</STRONG> a mechanism for identifying these services. + * + * <P> + * Each international serivce that performs locale-sensitive operations + * allows you + * to get all the available objects of that type. You can sift + * through these objects by language, country, or variant, + * and use the display names to present a menu to the user. + * For example, you can create a menu of all the collation objects + * suitable for a given language. Such classes implement these + * three class methods: + * \htmlonly<blockquote>\endhtmlonly + * <pre> + * \code + * const char* uloc_getAvailable(int32_t index); + * int32_t uloc_countAvailable(); + * int32_t + * uloc_getDisplayName(const char* localeID, + * const char* inLocaleID, + * UChar* result, + * int32_t maxResultSize, + * UErrorCode* err); + * + * \endcode + * </pre> + * \htmlonly</blockquote>\endhtmlonly + * <P> + * Concerning POSIX/RFC1766 Locale IDs, + * the getLanguage/getCountry/getVariant/getName functions do understand + * the POSIX type form of language_COUNTRY.ENCODING\@VARIANT + * and if there is not an ICU-stype variant, uloc_getVariant() for example + * will return the one listed after the \@at sign. As well, the hyphen + * "-" is recognized as a country/variant separator similarly to RFC1766. + * So for example, "en-us" will be interpreted as en_US. + * As a result, uloc_getName() is far from a no-op, and will have the + * effect of converting POSIX/RFC1766 IDs into ICU form, although it does + * NOT map any of the actual codes (i.e. russian->ru) in any way. + * Applications should call uloc_getName() at the point where a locale ID + * is coming from an external source (user entry, OS, web browser) + * and pass the resulting string to other ICU functions. For example, + * don't use de-de\@EURO as an argument to resourcebundle. + * + * @see UResourceBundle + */ + +/** Useful constant for this language. @stable ICU 2.0 */ +#define ULOC_CHINESE "zh" +/** Useful constant for this language. @stable ICU 2.0 */ +#define ULOC_ENGLISH "en" +/** Useful constant for this language. @stable ICU 2.0 */ +#define ULOC_FRENCH "fr" +/** Useful constant for this language. @stable ICU 2.0 */ +#define ULOC_GERMAN "de" +/** Useful constant for this language. @stable ICU 2.0 */ +#define ULOC_ITALIAN "it" +/** Useful constant for this language. @stable ICU 2.0 */ +#define ULOC_JAPANESE "ja" +/** Useful constant for this language. @stable ICU 2.0 */ +#define ULOC_KOREAN "ko" +/** Useful constant for this language. @stable ICU 2.0 */ +#define ULOC_SIMPLIFIED_CHINESE "zh_CN" +/** Useful constant for this language. @stable ICU 2.0 */ +#define ULOC_TRADITIONAL_CHINESE "zh_TW" + +/** Useful constant for this country/region. @stable ICU 2.0 */ +#define ULOC_CANADA "en_CA" +/** Useful constant for this country/region. @stable ICU 2.0 */ +#define ULOC_CANADA_FRENCH "fr_CA" +/** Useful constant for this country/region. @stable ICU 2.0 */ +#define ULOC_CHINA "zh_CN" +/** Useful constant for this country/region. @stable ICU 2.0 */ +#define ULOC_PRC "zh_CN" +/** Useful constant for this country/region. @stable ICU 2.0 */ +#define ULOC_FRANCE "fr_FR" +/** Useful constant for this country/region. @stable ICU 2.0 */ +#define ULOC_GERMANY "de_DE" +/** Useful constant for this country/region. @stable ICU 2.0 */ +#define ULOC_ITALY "it_IT" +/** Useful constant for this country/region. @stable ICU 2.0 */ +#define ULOC_JAPAN "ja_JP" +/** Useful constant for this country/region. @stable ICU 2.0 */ +#define ULOC_KOREA "ko_KR" +/** Useful constant for this country/region. @stable ICU 2.0 */ +#define ULOC_TAIWAN "zh_TW" +/** Useful constant for this country/region. @stable ICU 2.0 */ +#define ULOC_UK "en_GB" +/** Useful constant for this country/region. @stable ICU 2.0 */ +#define ULOC_US "en_US" + +/** + * Useful constant for the maximum size of the language part of a locale ID. + * (including the terminating NULL). + * @stable ICU 2.0 + */ +#define ULOC_LANG_CAPACITY 12 + +/** + * Useful constant for the maximum size of the country part of a locale ID + * (including the terminating NULL). + * @stable ICU 2.0 + */ +#define ULOC_COUNTRY_CAPACITY 4 +/** + * Useful constant for the maximum size of the whole locale ID + * (including the terminating NULL and all keywords). + * @stable ICU 2.0 + */ +#define ULOC_FULLNAME_CAPACITY 157 + +/** + * Useful constant for the maximum size of the script part of a locale ID + * (including the terminating NULL). + * @stable ICU 2.8 + */ +#define ULOC_SCRIPT_CAPACITY 6 + +/** + * Useful constant for the maximum size of keywords in a locale + * @stable ICU 2.8 + */ +#define ULOC_KEYWORDS_CAPACITY 50 + +/** + * Useful constant for the maximum total size of keywords and their values in a locale + * @stable ICU 2.8 + */ +#define ULOC_KEYWORD_AND_VALUES_CAPACITY 100 + +/** + * Invariant character separating keywords from the locale string + * @stable ICU 2.8 + */ +#define ULOC_KEYWORD_SEPARATOR '@' + +/** + * Unicode code point for '@' separating keywords from the locale string. + * @see ULOC_KEYWORD_SEPARATOR + * @stable ICU 4.6 + */ +#define ULOC_KEYWORD_SEPARATOR_UNICODE 0x40 + +/** + * Invariant character for assigning value to a keyword + * @stable ICU 2.8 + */ +#define ULOC_KEYWORD_ASSIGN '=' + +/** + * Unicode code point for '=' for assigning value to a keyword. + * @see ULOC_KEYWORD_ASSIGN + * @stable ICU 4.6 + */ +#define ULOC_KEYWORD_ASSIGN_UNICODE 0x3D + +/** + * Invariant character separating keywords + * @stable ICU 2.8 + */ +#define ULOC_KEYWORD_ITEM_SEPARATOR ';' + +/** + * Unicode code point for ';' separating keywords + * @see ULOC_KEYWORD_ITEM_SEPARATOR + * @stable ICU 4.6 + */ +#define ULOC_KEYWORD_ITEM_SEPARATOR_UNICODE 0x3B + +/** + * Constants for *_getLocale() + * Allow user to select whether she wants information on + * requested, valid or actual locale. + * For example, a collator for "en_US_CALIFORNIA" was + * requested. In the current state of ICU (2.0), + * the requested locale is "en_US_CALIFORNIA", + * the valid locale is "en_US" (most specific locale supported by ICU) + * and the actual locale is "root" (the collation data comes unmodified + * from the UCA) + * The locale is considered supported by ICU if there is a core ICU bundle + * for that locale (although it may be empty). + * @stable ICU 2.1 + */ +typedef enum { + /** This is locale the data actually comes from + * @stable ICU 2.1 + */ + ULOC_ACTUAL_LOCALE = 0, + /** This is the most specific locale supported by ICU + * @stable ICU 2.1 + */ + ULOC_VALID_LOCALE = 1, + +#ifndef U_HIDE_DEPRECATED_API + /** This is the requested locale + * @deprecated ICU 2.8 + */ + ULOC_REQUESTED_LOCALE = 2, +#endif /* U_HIDE_DEPRECATED_API */ + + ULOC_DATA_LOCALE_TYPE_LIMIT = 3 +} ULocDataLocaleType ; + +#ifndef U_HIDE_SYSTEM_API +/** + * Gets ICU's default locale. + * The returned string is a snapshot in time, and will remain valid + * and unchanged even when uloc_setDefault() is called. + * The returned storage is owned by ICU, and must not be altered or deleted + * by the caller. + * + * @return the ICU default locale + * @system + * @stable ICU 2.0 + */ +U_STABLE const char* U_EXPORT2 +uloc_getDefault(void); + +/** + * Sets ICU's default locale. + * By default (without calling this function), ICU's default locale will be based + * on information obtained from the underlying system environment. + * <p> + * Changes to ICU's default locale do not propagate back to the + * system environment. + * <p> + * Changes to ICU's default locale to not affect any ICU services that + * may already be open based on the previous default locale value. + * + * @param localeID the new ICU default locale. A value of NULL will try to get + * the system's default locale. + * @param status the error information if the setting of default locale fails + * @system + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +uloc_setDefault(const char* localeID, + UErrorCode* status); +#endif /* U_HIDE_SYSTEM_API */ + +/** + * Gets the language code for the specified locale. + * + * @param localeID the locale to get the ISO language code with + * @param language the language code for localeID + * @param languageCapacity the size of the language buffer to store the + * language code with + * @param err error information if retrieving the language code failed + * @return the actual buffer size needed for the language code. If it's greater + * than languageCapacity, the returned language code will be truncated. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +uloc_getLanguage(const char* localeID, + char* language, + int32_t languageCapacity, + UErrorCode* err); + +/** + * Gets the script code for the specified locale. + * + * @param localeID the locale to get the ISO language code with + * @param script the language code for localeID + * @param scriptCapacity the size of the language buffer to store the + * language code with + * @param err error information if retrieving the language code failed + * @return the actual buffer size needed for the language code. If it's greater + * than scriptCapacity, the returned language code will be truncated. + * @stable ICU 2.8 + */ +U_STABLE int32_t U_EXPORT2 +uloc_getScript(const char* localeID, + char* script, + int32_t scriptCapacity, + UErrorCode* err); + +/** + * Gets the country code for the specified locale. + * + * @param localeID the locale to get the country code with + * @param country the country code for localeID + * @param countryCapacity the size of the country buffer to store the + * country code with + * @param err error information if retrieving the country code failed + * @return the actual buffer size needed for the country code. If it's greater + * than countryCapacity, the returned country code will be truncated. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +uloc_getCountry(const char* localeID, + char* country, + int32_t countryCapacity, + UErrorCode* err); + +/** + * Gets the variant code for the specified locale. + * + * @param localeID the locale to get the variant code with + * @param variant the variant code for localeID + * @param variantCapacity the size of the variant buffer to store the + * variant code with + * @param err error information if retrieving the variant code failed + * @return the actual buffer size needed for the variant code. If it's greater + * than variantCapacity, the returned variant code will be truncated. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +uloc_getVariant(const char* localeID, + char* variant, + int32_t variantCapacity, + UErrorCode* err); + + +/** + * Gets the full name for the specified locale. + * Note: This has the effect of 'canonicalizing' the ICU locale ID to + * a certain extent. Upper and lower case are set as needed. + * It does NOT map aliased names in any way. + * See the top of this header file. + * This API supports preflighting. + * + * @param localeID the locale to get the full name with + * @param name fill in buffer for the name without keywords. + * @param nameCapacity capacity of the fill in buffer. + * @param err error information if retrieving the full name failed + * @return the actual buffer size needed for the full name. If it's greater + * than nameCapacity, the returned full name will be truncated. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +uloc_getName(const char* localeID, + char* name, + int32_t nameCapacity, + UErrorCode* err); + +/** + * Gets the full name for the specified locale. + * Note: This has the effect of 'canonicalizing' the string to + * a certain extent. Upper and lower case are set as needed, + * and if the components were in 'POSIX' format they are changed to + * ICU format. It does NOT map aliased names in any way. + * See the top of this header file. + * + * @param localeID the locale to get the full name with + * @param name the full name for localeID + * @param nameCapacity the size of the name buffer to store the + * full name with + * @param err error information if retrieving the full name failed + * @return the actual buffer size needed for the full name. If it's greater + * than nameCapacity, the returned full name will be truncated. + * @stable ICU 2.8 + */ +U_STABLE int32_t U_EXPORT2 +uloc_canonicalize(const char* localeID, + char* name, + int32_t nameCapacity, + UErrorCode* err); + +/** + * Gets the ISO language code for the specified locale. + * + * @param localeID the locale to get the ISO language code with + * @return language the ISO language code for localeID + * @stable ICU 2.0 + */ +U_STABLE const char* U_EXPORT2 +uloc_getISO3Language(const char* localeID); + + +/** + * Gets the ISO country code for the specified locale. + * + * @param localeID the locale to get the ISO country code with + * @return country the ISO country code for localeID + * @stable ICU 2.0 + */ +U_STABLE const char* U_EXPORT2 +uloc_getISO3Country(const char* localeID); + +/** + * Gets the Win32 LCID value for the specified locale. + * If the ICU locale is not recognized by Windows, 0 will be returned. + * + * @param localeID the locale to get the Win32 LCID value with + * @return country the Win32 LCID for localeID + * @stable ICU 2.0 + */ +U_STABLE uint32_t U_EXPORT2 +uloc_getLCID(const char* localeID); + +/** + * Gets the language name suitable for display for the specified locale. + * + * @param locale the locale to get the ISO language code with + * @param displayLocale Specifies the locale to be used to display the name. In other words, + * if the locale's language code is "en", passing Locale::getFrench() for + * inLocale would result in "Anglais", while passing Locale::getGerman() + * for inLocale would result in "Englisch". + * @param language the displayable language code for localeID + * @param languageCapacity the size of the language buffer to store the + * displayable language code with + * @param status error information if retrieving the displayable language code failed + * @return the actual buffer size needed for the displayable language code. If it's greater + * than languageCapacity, the returned language code will be truncated. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +uloc_getDisplayLanguage(const char* locale, + const char* displayLocale, + UChar* language, + int32_t languageCapacity, + UErrorCode* status); + +/** + * Gets the script name suitable for display for the specified locale. + * + * @param locale the locale to get the displayable script code with. NULL may be used to specify the default. + * @param displayLocale Specifies the locale to be used to display the name. In other words, + * if the locale's language code is "en", passing Locale::getFrench() for + * inLocale would result in "", while passing Locale::getGerman() + * for inLocale would result in "". NULL may be used to specify the default. + * @param script the displayable country code for localeID + * @param scriptCapacity the size of the script buffer to store the + * displayable script code with + * @param status error information if retrieving the displayable script code failed + * @return the actual buffer size needed for the displayable script code. If it's greater + * than scriptCapacity, the returned displayable script code will be truncated. + * @stable ICU 2.8 + */ +U_STABLE int32_t U_EXPORT2 +uloc_getDisplayScript(const char* locale, + const char* displayLocale, + UChar* script, + int32_t scriptCapacity, + UErrorCode* status); + +/** + * Gets the country name suitable for display for the specified locale. + * + * @param locale the locale to get the displayable country code with. NULL may be used to specify the default. + * @param displayLocale Specifies the locale to be used to display the name. In other words, + * if the locale's language code is "en", passing Locale::getFrench() for + * inLocale would result in "Anglais", while passing Locale::getGerman() + * for inLocale would result in "Englisch". NULL may be used to specify the default. + * @param country the displayable country code for localeID + * @param countryCapacity the size of the country buffer to store the + * displayable country code with + * @param status error information if retrieving the displayable country code failed + * @return the actual buffer size needed for the displayable country code. If it's greater + * than countryCapacity, the returned displayable country code will be truncated. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +uloc_getDisplayCountry(const char* locale, + const char* displayLocale, + UChar* country, + int32_t countryCapacity, + UErrorCode* status); + + +/** + * Gets the variant name suitable for display for the specified locale. + * + * @param locale the locale to get the displayable variant code with. NULL may be used to specify the default. + * @param displayLocale Specifies the locale to be used to display the name. In other words, + * if the locale's language code is "en", passing Locale::getFrench() for + * inLocale would result in "Anglais", while passing Locale::getGerman() + * for inLocale would result in "Englisch". NULL may be used to specify the default. + * @param variant the displayable variant code for localeID + * @param variantCapacity the size of the variant buffer to store the + * displayable variant code with + * @param status error information if retrieving the displayable variant code failed + * @return the actual buffer size needed for the displayable variant code. If it's greater + * than variantCapacity, the returned displayable variant code will be truncated. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +uloc_getDisplayVariant(const char* locale, + const char* displayLocale, + UChar* variant, + int32_t variantCapacity, + UErrorCode* status); + +/** + * Gets the keyword name suitable for display for the specified locale. + * E.g: for the locale string de_DE\@collation=PHONEBOOK, this API gets the display + * string for the keyword collation. + * Usage: + * <code> + * UErrorCode status = U_ZERO_ERROR; + * const char* keyword =NULL; + * int32_t keywordLen = 0; + * int32_t keywordCount = 0; + * UChar displayKeyword[256]; + * int32_t displayKeywordLen = 0; + * UEnumeration* keywordEnum = uloc_openKeywords("de_DE@collation=PHONEBOOK;calendar=TRADITIONAL", &status); + * for(keywordCount = uenum_count(keywordEnum, &status); keywordCount > 0 ; keywordCount--){ + * if(U_FAILURE(status)){ + * ...something went wrong so handle the error... + * break; + * } + * // the uenum_next returns NUL terminated string + * keyword = uenum_next(keywordEnum, &keywordLen, &status); + * displayKeywordLen = uloc_getDisplayKeyword(keyword, "en_US", displayKeyword, 256); + * ... do something interesting ..... + * } + * uenum_close(keywordEnum); + * </code> + * @param keyword The keyword whose display string needs to be returned. + * @param displayLocale Specifies the locale to be used to display the name. In other words, + * if the locale's language code is "en", passing Locale::getFrench() for + * inLocale would result in "Anglais", while passing Locale::getGerman() + * for inLocale would result in "Englisch". NULL may be used to specify the default. + * @param dest the buffer to which the displayable keyword should be written. + * @param destCapacity The size of the buffer (number of UChars). If it is 0, then + * dest may be NULL and the function will only return the length of the + * result without writing any of the result string (pre-flighting). + * @param status error information if retrieving the displayable string failed. + * Should not be NULL and should not indicate failure on entry. + * @return the actual buffer size needed for the displayable variant code. + * @see #uloc_openKeywords + * @stable ICU 2.8 + */ +U_STABLE int32_t U_EXPORT2 +uloc_getDisplayKeyword(const char* keyword, + const char* displayLocale, + UChar* dest, + int32_t destCapacity, + UErrorCode* status); +/** + * Gets the value of the keyword suitable for display for the specified locale. + * E.g: for the locale string de_DE\@collation=PHONEBOOK, this API gets the display + * string for PHONEBOOK, in the display locale, when "collation" is specified as the keyword. + * + * @param locale The locale to get the displayable variant code with. NULL may be used to specify the default. + * @param keyword The keyword for whose value should be used. + * @param displayLocale Specifies the locale to be used to display the name. In other words, + * if the locale's language code is "en", passing Locale::getFrench() for + * inLocale would result in "Anglais", while passing Locale::getGerman() + * for inLocale would result in "Englisch". NULL may be used to specify the default. + * @param dest the buffer to which the displayable keyword should be written. + * @param destCapacity The size of the buffer (number of UChars). If it is 0, then + * dest may be NULL and the function will only return the length of the + * result without writing any of the result string (pre-flighting). + * @param status error information if retrieving the displayable string failed. + * Should not be NULL and must not indicate failure on entry. + * @return the actual buffer size needed for the displayable variant code. + * @stable ICU 2.8 + */ +U_STABLE int32_t U_EXPORT2 +uloc_getDisplayKeywordValue( const char* locale, + const char* keyword, + const char* displayLocale, + UChar* dest, + int32_t destCapacity, + UErrorCode* status); +/** + * Gets the full name suitable for display for the specified locale. + * + * @param localeID the locale to get the displayable name with. NULL may be used to specify the default. + * @param inLocaleID Specifies the locale to be used to display the name. In other words, + * if the locale's language code is "en", passing Locale::getFrench() for + * inLocale would result in "Anglais", while passing Locale::getGerman() + * for inLocale would result in "Englisch". NULL may be used to specify the default. + * @param result the displayable name for localeID + * @param maxResultSize the size of the name buffer to store the + * displayable full name with + * @param err error information if retrieving the displayable name failed + * @return the actual buffer size needed for the displayable name. If it's greater + * than maxResultSize, the returned displayable name will be truncated. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +uloc_getDisplayName(const char* localeID, + const char* inLocaleID, + UChar* result, + int32_t maxResultSize, + UErrorCode* err); + + +/** + * Gets the specified locale from a list of all available locales. + * The return value is a pointer to an item of + * a locale name array. Both this array and the pointers + * it contains are owned by ICU and should not be deleted or written through + * by the caller. The locale name is terminated by a null pointer. + * @param n the specific locale name index of the available locale list + * @return a specified locale name of all available locales + * @stable ICU 2.0 + */ +U_STABLE const char* U_EXPORT2 +uloc_getAvailable(int32_t n); + +/** + * Gets the size of the all available locale list. + * + * @return the size of the locale list + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 uloc_countAvailable(void); + +/** + * + * Gets a list of all available 2-letter language codes defined in ISO 639, + * plus additional 3-letter codes determined to be useful for locale generation as + * defined by Unicode CLDR. This is a pointer + * to an array of pointers to arrays of char. All of these pointers are owned + * by ICU-- do not delete them, and do not write through them. The array is + * terminated with a null pointer. + * @return a list of all available language codes + * @stable ICU 2.0 + */ +U_STABLE const char* const* U_EXPORT2 +uloc_getISOLanguages(void); + +/** + * + * Gets a list of all available 2-letter country codes defined in ISO 639. This is a + * pointer to an array of pointers to arrays of char. All of these pointers are + * owned by ICU-- do not delete them, and do not write through them. The array is + * terminated with a null pointer. + * @return a list of all available country codes + * @stable ICU 2.0 + */ +U_STABLE const char* const* U_EXPORT2 +uloc_getISOCountries(void); + +/** + * Truncate the locale ID string to get the parent locale ID. + * Copies the part of the string before the last underscore. + * The parent locale ID will be an empty string if there is no + * underscore, or if there is only one underscore at localeID[0]. + * + * @param localeID Input locale ID string. + * @param parent Output string buffer for the parent locale ID. + * @param parentCapacity Size of the output buffer. + * @param err A UErrorCode value. + * @return The length of the parent locale ID. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +uloc_getParent(const char* localeID, + char* parent, + int32_t parentCapacity, + UErrorCode* err); + + + + +/** + * Gets the full name for the specified locale, like uloc_getName(), + * but without keywords. + * + * Note: This has the effect of 'canonicalizing' the string to + * a certain extent. Upper and lower case are set as needed, + * and if the components were in 'POSIX' format they are changed to + * ICU format. It does NOT map aliased names in any way. + * See the top of this header file. + * + * This API strips off the keyword part, so "de_DE\@collation=phonebook" + * will become "de_DE". + * This API supports preflighting. + * + * @param localeID the locale to get the full name with + * @param name fill in buffer for the name without keywords. + * @param nameCapacity capacity of the fill in buffer. + * @param err error information if retrieving the full name failed + * @return the actual buffer size needed for the full name. If it's greater + * than nameCapacity, the returned full name will be truncated. + * @stable ICU 2.8 + */ +U_STABLE int32_t U_EXPORT2 +uloc_getBaseName(const char* localeID, + char* name, + int32_t nameCapacity, + UErrorCode* err); + +/** + * Gets an enumeration of keywords for the specified locale. Enumeration + * must get disposed of by the client using uenum_close function. + * + * @param localeID the locale to get the variant code with + * @param status error information if retrieving the keywords failed + * @return enumeration of keywords or NULL if there are no keywords. + * @stable ICU 2.8 + */ +U_STABLE UEnumeration* U_EXPORT2 +uloc_openKeywords(const char* localeID, + UErrorCode* status); + +/** + * Get the value for a keyword. Locale name does not need to be normalized. + * + * @param localeID locale name containing the keyword ("de_DE@currency=EURO;collation=PHONEBOOK") + * @param keywordName name of the keyword for which we want the value. Case insensitive. + * @param buffer receiving buffer + * @param bufferCapacity capacity of receiving buffer + * @param status containing error code - buffer not big enough. + * @return the length of keyword value + * @stable ICU 2.8 + */ +U_STABLE int32_t U_EXPORT2 +uloc_getKeywordValue(const char* localeID, + const char* keywordName, + char* buffer, int32_t bufferCapacity, + UErrorCode* status); + + +/** + * Sets or removes the value of the specified keyword. + * + * For removing all keywords, use uloc_getBaseName(). + * + * NOTE: Unlike almost every other ICU function which takes a + * buffer, this function will NOT truncate the output text. If a + * BUFFER_OVERFLOW_ERROR is received, it means that the original + * buffer is untouched. This is done to prevent incorrect or possibly + * even malformed locales from being generated and used. + * + * @param keywordName name of the keyword to be set. Case insensitive. + * @param keywordValue value of the keyword to be set. If 0-length or + * NULL, will result in the keyword being removed. No error is given if + * that keyword does not exist. + * @param buffer input buffer containing locale to be modified. + * @param bufferCapacity capacity of receiving buffer + * @param status containing error code - buffer not big enough. + * @return the length needed for the buffer + * @see uloc_getKeywordValue + * @stable ICU 3.2 + */ +U_STABLE int32_t U_EXPORT2 +uloc_setKeywordValue(const char* keywordName, + const char* keywordValue, + char* buffer, int32_t bufferCapacity, + UErrorCode* status); + +/** + * enums for the return value for the character and line orientation + * functions. + * @stable ICU 4.0 + */ +typedef enum { + ULOC_LAYOUT_LTR = 0, /* left-to-right. */ + ULOC_LAYOUT_RTL = 1, /* right-to-left. */ + ULOC_LAYOUT_TTB = 2, /* top-to-bottom. */ + ULOC_LAYOUT_BTT = 3, /* bottom-to-top. */ + ULOC_LAYOUT_UNKNOWN +} ULayoutType; + +/** + * Get the layout character orientation for the specified locale. + * + * @param localeId locale name + * @param status Error status + * @return an enum indicating the layout orientation for characters. + * @stable ICU 4.0 + */ +U_STABLE ULayoutType U_EXPORT2 +uloc_getCharacterOrientation(const char* localeId, + UErrorCode *status); + +/** + * Get the layout line orientation for the specified locale. + * + * @param localeId locale name + * @param status Error status + * @return an enum indicating the layout orientation for lines. + * @stable ICU 4.0 + */ +U_STABLE ULayoutType U_EXPORT2 +uloc_getLineOrientation(const char* localeId, + UErrorCode *status); + +/** + * enums for the 'outResult' parameter return value + * @see uloc_acceptLanguageFromHTTP + * @see uloc_acceptLanguage + * @stable ICU 3.2 + */ +typedef enum { + ULOC_ACCEPT_FAILED = 0, /* No exact match was found. */ + ULOC_ACCEPT_VALID = 1, /* An exact match was found. */ + ULOC_ACCEPT_FALLBACK = 2 /* A fallback was found, for example, + Accept list contained 'ja_JP' + which matched available locale 'ja'. */ +} UAcceptResult; + + +/** + * Based on a HTTP header from a web browser and a list of available locales, + * determine an acceptable locale for the user. + * @param result - buffer to accept the result locale + * @param resultAvailable the size of the result buffer. + * @param outResult - An out parameter that contains the fallback status + * @param httpAcceptLanguage - "Accept-Language:" header as per HTTP. + * @param availableLocales - list of available locales to match + * @param status Error status, may be BUFFER_OVERFLOW_ERROR + * @return length needed for the locale. + * @stable ICU 3.2 + */ +U_STABLE int32_t U_EXPORT2 +uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, + UAcceptResult *outResult, + const char *httpAcceptLanguage, + UEnumeration* availableLocales, + UErrorCode *status); + +/** + * Based on a list of available locales, + * determine an acceptable locale for the user. + * @param result - buffer to accept the result locale + * @param resultAvailable the size of the result buffer. + * @param outResult - An out parameter that contains the fallback status + * @param acceptList - list of acceptable languages + * @param acceptListCount - count of acceptList items + * @param availableLocales - list of available locales to match + * @param status Error status, may be BUFFER_OVERFLOW_ERROR + * @return length needed for the locale. + * @stable ICU 3.2 + */ +U_STABLE int32_t U_EXPORT2 +uloc_acceptLanguage(char *result, int32_t resultAvailable, + UAcceptResult *outResult, const char **acceptList, + int32_t acceptListCount, + UEnumeration* availableLocales, + UErrorCode *status); + + +/** + * Gets the ICU locale ID for the specified Win32 LCID value. + * + * @param hostID the Win32 LCID to translate + * @param locale the output buffer for the ICU locale ID, which will be NUL-terminated + * if there is room. + * @param localeCapacity the size of the output buffer + * @param status an error is returned if the LCID is unrecognized or the output buffer + * is too small + * @return actual the actual size of the locale ID, not including NUL-termination + * @stable ICU 3.8 + */ +U_STABLE int32_t U_EXPORT2 +uloc_getLocaleForLCID(uint32_t hostID, char *locale, int32_t localeCapacity, + UErrorCode *status); + + +/** + * Add the likely subtags for a provided locale ID, per the algorithm described + * in the following CLDR technical report: + * + * http://www.unicode.org/reports/tr35/#Likely_Subtags + * + * If localeID is already in the maximal form, or there is no data available + * for maximization, it will be copied to the output buffer. For example, + * "und-Zzzz" cannot be maximized, since there is no reasonable maximization. + * + * Examples: + * + * "en" maximizes to "en_Latn_US" + * + * "de" maximizes to "de_Latn_US" + * + * "sr" maximizes to "sr_Cyrl_RS" + * + * "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.) + * + * "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.) + * + * @param localeID The locale to maximize + * @param maximizedLocaleID The maximized locale + * @param maximizedLocaleIDCapacity The capacity of the maximizedLocaleID buffer + * @param err Error information if maximizing the locale failed. If the length + * of the localeID and the null-terminator is greater than the maximum allowed size, + * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR. + * @return The actual buffer size needed for the maximized locale. If it's + * greater than maximizedLocaleIDCapacity, the returned ID will be truncated. + * On error, the return value is -1. + * @stable ICU 4.0 + */ +U_STABLE int32_t U_EXPORT2 +uloc_addLikelySubtags(const char* localeID, + char* maximizedLocaleID, + int32_t maximizedLocaleIDCapacity, + UErrorCode* err); + + +/** + * Minimize the subtags for a provided locale ID, per the algorithm described + * in the following CLDR technical report: + * + * http://www.unicode.org/reports/tr35/#Likely_Subtags + * + * If localeID is already in the minimal form, or there is no data available + * for minimization, it will be copied to the output buffer. Since the + * minimization algorithm relies on proper maximization, see the comments + * for uloc_addLikelySubtags for reasons why there might not be any data. + * + * Examples: + * + * "en_Latn_US" minimizes to "en" + * + * "de_Latn_US" minimizes to "de" + * + * "sr_Cyrl_RS" minimizes to "sr" + * + * "zh_Hant_TW" minimizes to "zh_TW" (The region is preferred to the + * script, and minimizing to "zh" would imply "zh_Hans_CN".) + * + * @param localeID The locale to minimize + * @param minimizedLocaleID The minimized locale + * @param minimizedLocaleIDCapacity The capacity of the minimizedLocaleID buffer + * @param err Error information if minimizing the locale failed. If the length + * of the localeID and the null-terminator is greater than the maximum allowed size, + * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR. + * @return The actual buffer size needed for the minimized locale. If it's + * greater than minimizedLocaleIDCapacity, the returned ID will be truncated. + * On error, the return value is -1. + * @stable ICU 4.0 + */ +U_STABLE int32_t U_EXPORT2 +uloc_minimizeSubtags(const char* localeID, + char* minimizedLocaleID, + int32_t minimizedLocaleIDCapacity, + UErrorCode* err); + +/** + * Returns a locale ID for the specified BCP47 language tag string. + * If the specified language tag contains any ill-formed subtags, + * the first such subtag and all following subtags are ignored. + * <p> + * This implements the 'Language-Tag' production of BCP47, and so + * supports grandfathered (regular and irregular) as well as private + * use language tags. Private use tags are represented as 'x-whatever', + * and grandfathered tags are converted to their canonical replacements + * where they exist. Note that a few grandfathered tags have no modern + * replacement, these will be converted using the fallback described in + * the first paragraph, so some information might be lost. + * @param langtag the input BCP47 language tag. + * @param localeID the output buffer receiving a locale ID for the + * specified BCP47 language tag. + * @param localeIDCapacity the size of the locale ID output buffer. + * @param parsedLength if not NULL, successfully parsed length + * for the input language tag is set. + * @param err error information if receiving the locald ID + * failed. + * @return the length of the locale ID. + * @stable ICU 4.2 + */ +U_STABLE int32_t U_EXPORT2 +uloc_forLanguageTag(const char* langtag, + char* localeID, + int32_t localeIDCapacity, + int32_t* parsedLength, + UErrorCode* err); + +/** + * Returns a well-formed language tag for this locale ID. + * <p> + * <b>Note</b>: When <code>strict</code> is FALSE, any locale + * fields which do not satisfy the BCP47 syntax requirement will + * be omitted from the result. When <code>strict</code> is + * TRUE, this function sets U_ILLEGAL_ARGUMENT_ERROR to the + * <code>err</code> if any locale fields do not satisfy the + * BCP47 syntax requirement. + * @param localeID the input locale ID + * @param langtag the output buffer receiving BCP47 language + * tag for the locale ID. + * @param langtagCapacity the size of the BCP47 language tag + * output buffer. + * @param strict boolean value indicating if the function returns + * an error for an ill-formed input locale ID. + * @param err error information if receiving the language + * tag failed. + * @return The length of the BCP47 language tag. + * @stable ICU 4.2 + */ +U_STABLE int32_t U_EXPORT2 +uloc_toLanguageTag(const char* localeID, + char* langtag, + int32_t langtagCapacity, + UBool strict, + UErrorCode* err); + +#endif /*_ULOC*/ diff --git a/Source/WTF/icu/unicode/umachine.h b/Source/WTF/icu/unicode/umachine.h new file mode 100644 index 000000000..d1102f493 --- /dev/null +++ b/Source/WTF/icu/unicode/umachine.h @@ -0,0 +1,322 @@ +/* +****************************************************************************** +* +* Copyright (C) 1999-2012, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* file name: umachine.h +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 1999sep13 +* created by: Markus W. Scherer +* +* This file defines basic types and constants for utf.h to be +* platform-independent. umachine.h and utf.h are included into +* utypes.h to provide all the general definitions for ICU. +* All of these definitions used to be in utypes.h before +* the UTF-handling macros made this unmaintainable. +*/ + +#ifndef __UMACHINE_H__ +#define __UMACHINE_H__ + + +/** + * \file + * \brief Basic types and constants for UTF + * + * <h2> Basic types and constants for UTF </h2> + * This file defines basic types and constants for utf.h to be + * platform-independent. umachine.h and utf.h are included into + * utypes.h to provide all the general definitions for ICU. + * All of these definitions used to be in utypes.h before + * the UTF-handling macros made this unmaintainable. + * + */ +/*==========================================================================*/ +/* Include platform-dependent definitions */ +/* which are contained in the platform-specific file platform.h */ +/*==========================================================================*/ + +#include "unicode/ptypes.h" /* platform.h is included in ptypes.h */ + +/* + * ANSI C headers: + * stddef.h defines wchar_t + */ +#include <stddef.h> + +/*==========================================================================*/ +/* For C wrappers, we use the symbol U_STABLE. */ +/* This works properly if the includer is C or C++. */ +/* Functions are declared U_STABLE return-type U_EXPORT2 function-name()... */ +/*==========================================================================*/ + +/** + * \def U_CFUNC + * This is used in a declaration of a library private ICU C function. + * @stable ICU 2.4 + */ + +/** + * \def U_CDECL_BEGIN + * This is used to begin a declaration of a library private ICU C API. + * @stable ICU 2.4 + */ + +/** + * \def U_CDECL_END + * This is used to end a declaration of a library private ICU C API + * @stable ICU 2.4 + */ + +#ifdef __cplusplus +# define U_CFUNC extern "C" +# define U_CDECL_BEGIN extern "C" { +# define U_CDECL_END } +#else +# define U_CFUNC extern +# define U_CDECL_BEGIN +# define U_CDECL_END +#endif + +#ifndef U_ATTRIBUTE_DEPRECATED +/** + * \def U_ATTRIBUTE_DEPRECATED + * This is used for GCC specific attributes + * @internal + */ +#if U_GCC_MAJOR_MINOR >= 302 +# define U_ATTRIBUTE_DEPRECATED __attribute__ ((deprecated)) +/** + * \def U_ATTRIBUTE_DEPRECATED + * This is used for Visual C++ specific attributes + * @internal + */ +#elif defined(_MSC_VER) && (_MSC_VER >= 1400) +# define U_ATTRIBUTE_DEPRECATED __declspec(deprecated) +#else +# define U_ATTRIBUTE_DEPRECATED +#endif +#endif + +/** This is used to declare a function as a public ICU C API @stable ICU 2.0*/ +#define U_CAPI U_CFUNC U_EXPORT +/** This is used to declare a function as a stable public ICU C API*/ +#define U_STABLE U_CAPI +/** This is used to declare a function as a draft public ICU C API */ +#define U_DRAFT U_CAPI +/** This is used to declare a function as a deprecated public ICU C API */ +#define U_DEPRECATED U_CAPI U_ATTRIBUTE_DEPRECATED +/** This is used to declare a function as an obsolete public ICU C API */ +#define U_OBSOLETE U_CAPI +/** This is used to declare a function as an internal ICU C API */ +#define U_INTERNAL U_CAPI + +/*==========================================================================*/ +/* limits for int32_t etc., like in POSIX inttypes.h */ +/*==========================================================================*/ + +#ifndef INT8_MIN +/** The smallest value an 8 bit signed integer can hold @stable ICU 2.0 */ +# define INT8_MIN ((int8_t)(-128)) +#endif +#ifndef INT16_MIN +/** The smallest value a 16 bit signed integer can hold @stable ICU 2.0 */ +# define INT16_MIN ((int16_t)(-32767-1)) +#endif +#ifndef INT32_MIN +/** The smallest value a 32 bit signed integer can hold @stable ICU 2.0 */ +# define INT32_MIN ((int32_t)(-2147483647-1)) +#endif + +#ifndef INT8_MAX +/** The largest value an 8 bit signed integer can hold @stable ICU 2.0 */ +# define INT8_MAX ((int8_t)(127)) +#endif +#ifndef INT16_MAX +/** The largest value a 16 bit signed integer can hold @stable ICU 2.0 */ +# define INT16_MAX ((int16_t)(32767)) +#endif +#ifndef INT32_MAX +/** The largest value a 32 bit signed integer can hold @stable ICU 2.0 */ +# define INT32_MAX ((int32_t)(2147483647)) +#endif + +#ifndef UINT8_MAX +/** The largest value an 8 bit unsigned integer can hold @stable ICU 2.0 */ +# define UINT8_MAX ((uint8_t)(255U)) +#endif +#ifndef UINT16_MAX +/** The largest value a 16 bit unsigned integer can hold @stable ICU 2.0 */ +# define UINT16_MAX ((uint16_t)(65535U)) +#endif +#ifndef UINT32_MAX +/** The largest value a 32 bit unsigned integer can hold @stable ICU 2.0 */ +# define UINT32_MAX ((uint32_t)(4294967295U)) +#endif + +#if defined(U_INT64_T_UNAVAILABLE) +# error int64_t is required for decimal format and rule-based number format. +#else +# ifndef INT64_C +/** + * Provides a platform independent way to specify a signed 64-bit integer constant. + * note: may be wrong for some 64 bit platforms - ensure your compiler provides INT64_C + * @stable ICU 2.8 + */ +# define INT64_C(c) c ## LL +# endif +# ifndef UINT64_C +/** + * Provides a platform independent way to specify an unsigned 64-bit integer constant. + * note: may be wrong for some 64 bit platforms - ensure your compiler provides UINT64_C + * @stable ICU 2.8 + */ +# define UINT64_C(c) c ## ULL +# endif +# ifndef U_INT64_MIN +/** The smallest value a 64 bit signed integer can hold @stable ICU 2.8 */ +# define U_INT64_MIN ((int64_t)(INT64_C(-9223372036854775807)-1)) +# endif +# ifndef U_INT64_MAX +/** The largest value a 64 bit signed integer can hold @stable ICU 2.8 */ +# define U_INT64_MAX ((int64_t)(INT64_C(9223372036854775807))) +# endif +# ifndef U_UINT64_MAX +/** The largest value a 64 bit unsigned integer can hold @stable ICU 2.8 */ +# define U_UINT64_MAX ((uint64_t)(UINT64_C(18446744073709551615))) +# endif +#endif + +/*==========================================================================*/ +/* Boolean data type */ +/*==========================================================================*/ + +/** The ICU boolean type @stable ICU 2.0 */ +typedef int8_t UBool; + +#ifndef TRUE +/** The TRUE value of a UBool @stable ICU 2.0 */ +# define TRUE 1 +#endif +#ifndef FALSE +/** The FALSE value of a UBool @stable ICU 2.0 */ +# define FALSE 0 +#endif + + +/*==========================================================================*/ +/* Unicode data types */ +/*==========================================================================*/ + +/* wchar_t-related definitions -------------------------------------------- */ + +/* + * \def U_WCHAR_IS_UTF16 + * Defined if wchar_t uses UTF-16. + * + * @stable ICU 2.0 + */ +/* + * \def U_WCHAR_IS_UTF32 + * Defined if wchar_t uses UTF-32. + * + * @stable ICU 2.0 + */ +#if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) +# ifdef __STDC_ISO_10646__ +# if (U_SIZEOF_WCHAR_T==2) +# define U_WCHAR_IS_UTF16 +# elif (U_SIZEOF_WCHAR_T==4) +# define U_WCHAR_IS_UTF32 +# endif +# elif defined __UCS2__ +# if (U_PF_OS390 <= U_PLATFORM && U_PLATFORM <= U_PF_OS400) && (U_SIZEOF_WCHAR_T==2) +# define U_WCHAR_IS_UTF16 +# endif +# elif defined(__UCS4__) || (U_PLATFORM == U_PF_OS400 && defined(__UTF32__)) +# if (U_SIZEOF_WCHAR_T==4) +# define U_WCHAR_IS_UTF32 +# endif +# elif U_PLATFORM_IS_DARWIN_BASED || (U_SIZEOF_WCHAR_T==4 && U_PLATFORM_IS_LINUX_BASED) +# define U_WCHAR_IS_UTF32 +# elif U_PLATFORM_HAS_WIN32_API +# define U_WCHAR_IS_UTF16 +# endif +#endif + +/* UChar and UChar32 definitions -------------------------------------------- */ + +/** Number of bytes in a UChar. @stable ICU 2.0 */ +#define U_SIZEOF_UCHAR 2 + +/** + * \var UChar + * Define UChar to be UCHAR_TYPE, if that is #defined (for example, to char16_t), + * or wchar_t if that is 16 bits wide; always assumed to be unsigned. + * If neither is available, then define UChar to be uint16_t. + * + * This makes the definition of UChar platform-dependent + * but allows direct string type compatibility with platforms with + * 16-bit wchar_t types. + * + * @stable ICU 4.4 + */ +#if defined(UCHAR_TYPE) + typedef UCHAR_TYPE UChar; +/* Not #elif U_HAVE_CHAR16_T -- because that is type-incompatible with pre-C++11 callers + typedef char16_t UChar; */ +#elif U_SIZEOF_WCHAR_T==2 + typedef wchar_t UChar; +#elif defined(__CHAR16_TYPE__) + typedef __CHAR16_TYPE__ UChar; +#else + typedef uint16_t UChar; +#endif + +/** + * Define UChar32 as a type for single Unicode code points. + * UChar32 is a signed 32-bit integer (same as int32_t). + * + * The Unicode code point range is 0..0x10ffff. + * All other values (negative or >=0x110000) are illegal as Unicode code points. + * They may be used as sentinel values to indicate "done", "error" + * or similar non-code point conditions. + * + * Before ICU 2.4 (Jitterbug 2146), UChar32 was defined + * to be wchar_t if that is 32 bits wide (wchar_t may be signed or unsigned) + * or else to be uint32_t. + * That is, the definition of UChar32 was platform-dependent. + * + * @see U_SENTINEL + * @stable ICU 2.4 + */ +typedef int32_t UChar32; + +/** + * This value is intended for sentinel values for APIs that + * (take or) return single code points (UChar32). + * It is outside of the Unicode code point range 0..0x10ffff. + * + * For example, a "done" or "error" value in a new API + * could be indicated with U_SENTINEL. + * + * ICU APIs designed before ICU 2.4 usually define service-specific "done" + * values, mostly 0xffff. + * Those may need to be distinguished from + * actual U+ffff text contents by calling functions like + * CharacterIterator::hasNext() or UnicodeString::length(). + * + * @return -1 + * @see UChar32 + * @stable ICU 2.4 + */ +#define U_SENTINEL (-1) + +#include "unicode/urename.h" + +#endif diff --git a/Source/WTF/icu/unicode/unistr.h b/Source/WTF/icu/unicode/unistr.h new file mode 100644 index 000000000..c6e8b4466 --- /dev/null +++ b/Source/WTF/icu/unicode/unistr.h @@ -0,0 +1,4470 @@ +/* +********************************************************************** +* Copyright (C) 1998-2013, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* +* File unistr.h +* +* Modification History: +* +* Date Name Description +* 09/25/98 stephen Creation. +* 11/11/98 stephen Changed per 11/9 code review. +* 04/20/99 stephen Overhauled per 4/16 code review. +* 11/18/99 aliu Made to inherit from Replaceable. Added method +* handleReplaceBetween(); other methods unchanged. +* 06/25/01 grhoten Remove dependency on iostream. +****************************************************************************** +*/ + +#ifndef UNISTR_H +#define UNISTR_H + +/** + * \file + * \brief C++ API: Unicode String + */ + +#include "unicode/utypes.h" +#include "unicode/rep.h" +#include "unicode/std_string.h" +#include "unicode/stringpiece.h" +#include "unicode/bytestream.h" +#include "unicode/ucasemap.h" + +struct UConverter; // unicode/ucnv.h +class StringThreadTest; + +#ifndef U_COMPARE_CODE_POINT_ORDER +/* see also ustring.h and unorm.h */ +/** + * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc: + * Compare strings in code point order instead of code unit order. + * @stable ICU 2.2 + */ +#define U_COMPARE_CODE_POINT_ORDER 0x8000 +#endif + +#ifndef USTRING_H +/** + * \ingroup ustring_ustrlen + */ +U_STABLE int32_t U_EXPORT2 +u_strlen(const UChar *s); +#endif + +/** + * \def U_STRING_CASE_MAPPER_DEFINED + * @internal + */ +#ifndef U_STRING_CASE_MAPPER_DEFINED +#define U_STRING_CASE_MAPPER_DEFINED + +/** + * Internal string case mapping function type. + * @internal + */ +typedef int32_t U_CALLCONV +UStringCaseMapper(const UCaseMap *csm, + UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + UErrorCode *pErrorCode); + +#endif + +U_NAMESPACE_BEGIN + +class BreakIterator; // unicode/brkiter.h +class Locale; // unicode/locid.h +class StringCharacterIterator; +class UnicodeStringAppendable; // unicode/appendable.h + +/* The <iostream> include has been moved to unicode/ustream.h */ + +/** + * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor + * which constructs a Unicode string from an invariant-character char * string. + * About invariant characters see utypes.h. + * This constructor has no runtime dependency on conversion code and is + * therefore recommended over ones taking a charset name string + * (where the empty string "" indicates invariant-character conversion). + * + * @stable ICU 3.2 + */ +#define US_INV icu::UnicodeString::kInvariant + +/** + * Unicode String literals in C++. + * Dependent on the platform properties, different UnicodeString + * constructors should be used to create a UnicodeString object from + * a string literal. + * The macros are defined for maximum performance. + * They work only for strings that contain "invariant characters", i.e., + * only latin letters, digits, and some punctuation. + * See utypes.h for details. + * + * The string parameter must be a C string literal. + * The length of the string, not including the terminating + * <code>NUL</code>, must be specified as a constant. + * The U_STRING_DECL macro should be invoked exactly once for one + * such string variable before it is used. + * @stable ICU 2.0 + */ +#if defined(U_DECLARE_UTF16) +# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length) +#elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16))) +# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)L ## cs, _length) +#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY +# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)cs, _length) +#else +# define UNICODE_STRING(cs, _length) icu::UnicodeString(cs, _length, US_INV) +#endif + +/** + * Unicode String literals in C++. + * Dependent on the platform properties, different UnicodeString + * constructors should be used to create a UnicodeString object from + * a string literal. + * The macros are defined for improved performance. + * They work only for strings that contain "invariant characters", i.e., + * only latin letters, digits, and some punctuation. + * See utypes.h for details. + * + * The string parameter must be a C string literal. + * @stable ICU 2.0 + */ +#define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1) + +/** + * \def UNISTR_FROM_CHAR_EXPLICIT + * This can be defined to be empty or "explicit". + * If explicit, then the UnicodeString(UChar) and UnicodeString(UChar32) + * constructors are marked as explicit, preventing their inadvertent use. + * @stable ICU 49 + */ +#ifndef UNISTR_FROM_CHAR_EXPLICIT +# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) + // Auto-"explicit" in ICU library code. +# define UNISTR_FROM_CHAR_EXPLICIT explicit +# else + // Empty by default for source code compatibility. +# define UNISTR_FROM_CHAR_EXPLICIT +# endif +#endif + +/** + * \def UNISTR_FROM_STRING_EXPLICIT + * This can be defined to be empty or "explicit". + * If explicit, then the UnicodeString(const char *) and UnicodeString(const UChar *) + * constructors are marked as explicit, preventing their inadvertent use. + * + * In particular, this helps prevent accidentally depending on ICU conversion code + * by passing a string literal into an API with a const UnicodeString & parameter. + * @stable ICU 49 + */ +#ifndef UNISTR_FROM_STRING_EXPLICIT +# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) + // Auto-"explicit" in ICU library code. +# define UNISTR_FROM_STRING_EXPLICIT explicit +# else + // Empty by default for source code compatibility. +# define UNISTR_FROM_STRING_EXPLICIT +# endif +#endif + +/** + * UnicodeString is a string class that stores Unicode characters directly and provides + * similar functionality as the Java String and StringBuffer classes. + * It is a concrete implementation of the abstract class Replaceable (for transliteration). + * + * The UnicodeString class is not suitable for subclassing. + * + * <p>For an overview of Unicode strings in C and C++ see the + * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p> + * + * <p>In ICU, a Unicode string consists of 16-bit Unicode <em>code units</em>. + * A Unicode character may be stored with either one code unit + * (the most common case) or with a matched pair of special code units + * ("surrogates"). The data type for code units is UChar. + * For single-character handling, a Unicode character code <em>point</em> is a value + * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.</p> + * + * <p>Indexes and offsets into and lengths of strings always count code units, not code points. + * This is the same as with multi-byte char* strings in traditional string handling. + * Operations on partial strings typically do not test for code point boundaries. + * If necessary, the user needs to take care of such boundaries by testing for the code unit + * values or by using functions like + * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit() + * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).</p> + * + * UnicodeString methods are more lenient with regard to input parameter values + * than other ICU APIs. In particular: + * - If indexes are out of bounds for a UnicodeString object + * (<0 or >length()) then they are "pinned" to the nearest boundary. + * - If primitive string pointer values (e.g., const UChar * or char *) + * for input strings are NULL, then those input string parameters are treated + * as if they pointed to an empty string. + * However, this is <em>not</em> the case for char * parameters for charset names + * or other IDs. + * - Most UnicodeString methods do not take a UErrorCode parameter because + * there are usually very few opportunities for failure other than a shortage + * of memory, error codes in low-level C++ string methods would be inconvenient, + * and the error code as the last parameter (ICU convention) would prevent + * the use of default parameter values. + * Instead, such methods set the UnicodeString into a "bogus" state + * (see isBogus()) if an error occurs. + * + * In string comparisons, two UnicodeString objects that are both "bogus" + * compare equal (to be transitive and prevent endless loops in sorting), + * and a "bogus" string compares less than any non-"bogus" one. + * + * Const UnicodeString methods are thread-safe. Multiple threads can use + * const methods on the same UnicodeString object simultaneously, + * but non-const methods must not be called concurrently (in multiple threads) + * with any other (const or non-const) methods. + * + * Similarly, const UnicodeString & parameters are thread-safe. + * One object may be passed in as such a parameter concurrently in multiple threads. + * This includes the const UnicodeString & parameters for + * copy construction, assignment, and cloning. + * + * <p>UnicodeString uses several storage methods. + * String contents can be stored inside the UnicodeString object itself, + * in an allocated and shared buffer, or in an outside buffer that is "aliased". + * Most of this is done transparently, but careful aliasing in particular provides + * significant performance improvements. + * Also, the internal buffer is accessible via special functions. + * For details see the + * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p> + * + * @see utf.h + * @see CharacterIterator + * @stable ICU 2.0 + */ +class U_COMMON_API UnicodeString : public Replaceable +{ +public: + + /** + * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor + * which constructs a Unicode string from an invariant-character char * string. + * Use the macro US_INV instead of the full qualification for this value. + * + * @see US_INV + * @stable ICU 3.2 + */ + enum EInvariant { + /** + * @see EInvariant + * @stable ICU 3.2 + */ + kInvariant + }; + + //======================================== + // Read-only operations + //======================================== + + /* Comparison - bitwise only - for international comparison use collation */ + + /** + * Equality operator. Performs only bitwise comparison. + * @param text The UnicodeString to compare to this one. + * @return TRUE if <TT>text</TT> contains the same characters as this one, + * FALSE otherwise. + * @stable ICU 2.0 + */ + inline UBool operator== (const UnicodeString& text) const; + + /** + * Inequality operator. Performs only bitwise comparison. + * @param text The UnicodeString to compare to this one. + * @return FALSE if <TT>text</TT> contains the same characters as this one, + * TRUE otherwise. + * @stable ICU 2.0 + */ + inline UBool operator!= (const UnicodeString& text) const; + + /** + * Greater than operator. Performs only bitwise comparison. + * @param text The UnicodeString to compare to this one. + * @return TRUE if the characters in this are bitwise + * greater than the characters in <code>text</code>, FALSE otherwise + * @stable ICU 2.0 + */ + inline UBool operator> (const UnicodeString& text) const; + + /** + * Less than operator. Performs only bitwise comparison. + * @param text The UnicodeString to compare to this one. + * @return TRUE if the characters in this are bitwise + * less than the characters in <code>text</code>, FALSE otherwise + * @stable ICU 2.0 + */ + inline UBool operator< (const UnicodeString& text) const; + + /** + * Greater than or equal operator. Performs only bitwise comparison. + * @param text The UnicodeString to compare to this one. + * @return TRUE if the characters in this are bitwise + * greater than or equal to the characters in <code>text</code>, FALSE otherwise + * @stable ICU 2.0 + */ + inline UBool operator>= (const UnicodeString& text) const; + + /** + * Less than or equal operator. Performs only bitwise comparison. + * @param text The UnicodeString to compare to this one. + * @return TRUE if the characters in this are bitwise + * less than or equal to the characters in <code>text</code>, FALSE otherwise + * @stable ICU 2.0 + */ + inline UBool operator<= (const UnicodeString& text) const; + + /** + * Compare the characters bitwise in this UnicodeString to + * the characters in <code>text</code>. + * @param text The UnicodeString to compare to this one. + * @return The result of bitwise character comparison: 0 if this + * contains the same characters as <code>text</code>, -1 if the characters in + * this are bitwise less than the characters in <code>text</code>, +1 if the + * characters in this are bitwise greater than the characters + * in <code>text</code>. + * @stable ICU 2.0 + */ + inline int8_t compare(const UnicodeString& text) const; + + /** + * Compare the characters bitwise in the range + * [<TT>start</TT>, <TT>start + length</TT>) with the characters + * in the <b>entire string</b> <TT>text</TT>. + * (The parameters "start" and "length" are not applied to the other text "text".) + * @param start the offset at which the compare operation begins + * @param length the number of characters of text to compare. + * @param text the other text to be compared against this string. + * @return The result of bitwise character comparison: 0 if this + * contains the same characters as <code>text</code>, -1 if the characters in + * this are bitwise less than the characters in <code>text</code>, +1 if the + * characters in this are bitwise greater than the characters + * in <code>text</code>. + * @stable ICU 2.0 + */ + inline int8_t compare(int32_t start, + int32_t length, + const UnicodeString& text) const; + + /** + * Compare the characters bitwise in the range + * [<TT>start</TT>, <TT>start + length</TT>) with the characters + * in <TT>srcText</TT> in the range + * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). + * @param start the offset at which the compare operation begins + * @param length the number of characters in this to compare. + * @param srcText the text to be compared + * @param srcStart the offset into <TT>srcText</TT> to start comparison + * @param srcLength the number of characters in <TT>src</TT> to compare + * @return The result of bitwise character comparison: 0 if this + * contains the same characters as <code>srcText</code>, -1 if the characters in + * this are bitwise less than the characters in <code>srcText</code>, +1 if the + * characters in this are bitwise greater than the characters + * in <code>srcText</code>. + * @stable ICU 2.0 + */ + inline int8_t compare(int32_t start, + int32_t length, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength) const; + + /** + * Compare the characters bitwise in this UnicodeString with the first + * <TT>srcLength</TT> characters in <TT>srcChars</TT>. + * @param srcChars The characters to compare to this UnicodeString. + * @param srcLength the number of characters in <TT>srcChars</TT> to compare + * @return The result of bitwise character comparison: 0 if this + * contains the same characters as <code>srcChars</code>, -1 if the characters in + * this are bitwise less than the characters in <code>srcChars</code>, +1 if the + * characters in this are bitwise greater than the characters + * in <code>srcChars</code>. + * @stable ICU 2.0 + */ + inline int8_t compare(const UChar *srcChars, + int32_t srcLength) const; + + /** + * Compare the characters bitwise in the range + * [<TT>start</TT>, <TT>start + length</TT>) with the first + * <TT>length</TT> characters in <TT>srcChars</TT> + * @param start the offset at which the compare operation begins + * @param length the number of characters to compare. + * @param srcChars the characters to be compared + * @return The result of bitwise character comparison: 0 if this + * contains the same characters as <code>srcChars</code>, -1 if the characters in + * this are bitwise less than the characters in <code>srcChars</code>, +1 if the + * characters in this are bitwise greater than the characters + * in <code>srcChars</code>. + * @stable ICU 2.0 + */ + inline int8_t compare(int32_t start, + int32_t length, + const UChar *srcChars) const; + + /** + * Compare the characters bitwise in the range + * [<TT>start</TT>, <TT>start + length</TT>) with the characters + * in <TT>srcChars</TT> in the range + * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). + * @param start the offset at which the compare operation begins + * @param length the number of characters in this to compare + * @param srcChars the characters to be compared + * @param srcStart the offset into <TT>srcChars</TT> to start comparison + * @param srcLength the number of characters in <TT>srcChars</TT> to compare + * @return The result of bitwise character comparison: 0 if this + * contains the same characters as <code>srcChars</code>, -1 if the characters in + * this are bitwise less than the characters in <code>srcChars</code>, +1 if the + * characters in this are bitwise greater than the characters + * in <code>srcChars</code>. + * @stable ICU 2.0 + */ + inline int8_t compare(int32_t start, + int32_t length, + const UChar *srcChars, + int32_t srcStart, + int32_t srcLength) const; + + /** + * Compare the characters bitwise in the range + * [<TT>start</TT>, <TT>limit</TT>) with the characters + * in <TT>srcText</TT> in the range + * [<TT>srcStart</TT>, <TT>srcLimit</TT>). + * @param start the offset at which the compare operation begins + * @param limit the offset immediately following the compare operation + * @param srcText the text to be compared + * @param srcStart the offset into <TT>srcText</TT> to start comparison + * @param srcLimit the offset into <TT>srcText</TT> to limit comparison + * @return The result of bitwise character comparison: 0 if this + * contains the same characters as <code>srcText</code>, -1 if the characters in + * this are bitwise less than the characters in <code>srcText</code>, +1 if the + * characters in this are bitwise greater than the characters + * in <code>srcText</code>. + * @stable ICU 2.0 + */ + inline int8_t compareBetween(int32_t start, + int32_t limit, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLimit) const; + + /** + * Compare two Unicode strings in code point order. + * The result may be different from the results of compare(), operator<, etc. + * if supplementary characters are present: + * + * In UTF-16, supplementary characters (with code points U+10000 and above) are + * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, + * which means that they compare as less than some other BMP characters like U+feff. + * This function compares Unicode strings in code point order. + * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. + * + * @param text Another string to compare this one to. + * @return a negative/zero/positive integer corresponding to whether + * this string is less than/equal to/greater than the second one + * in code point order + * @stable ICU 2.0 + */ + inline int8_t compareCodePointOrder(const UnicodeString& text) const; + + /** + * Compare two Unicode strings in code point order. + * The result may be different from the results of compare(), operator<, etc. + * if supplementary characters are present: + * + * In UTF-16, supplementary characters (with code points U+10000 and above) are + * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, + * which means that they compare as less than some other BMP characters like U+feff. + * This function compares Unicode strings in code point order. + * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. + * + * @param start The start offset in this string at which the compare operation begins. + * @param length The number of code units from this string to compare. + * @param srcText Another string to compare this one to. + * @return a negative/zero/positive integer corresponding to whether + * this string is less than/equal to/greater than the second one + * in code point order + * @stable ICU 2.0 + */ + inline int8_t compareCodePointOrder(int32_t start, + int32_t length, + const UnicodeString& srcText) const; + + /** + * Compare two Unicode strings in code point order. + * The result may be different from the results of compare(), operator<, etc. + * if supplementary characters are present: + * + * In UTF-16, supplementary characters (with code points U+10000 and above) are + * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, + * which means that they compare as less than some other BMP characters like U+feff. + * This function compares Unicode strings in code point order. + * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. + * + * @param start The start offset in this string at which the compare operation begins. + * @param length The number of code units from this string to compare. + * @param srcText Another string to compare this one to. + * @param srcStart The start offset in that string at which the compare operation begins. + * @param srcLength The number of code units from that string to compare. + * @return a negative/zero/positive integer corresponding to whether + * this string is less than/equal to/greater than the second one + * in code point order + * @stable ICU 2.0 + */ + inline int8_t compareCodePointOrder(int32_t start, + int32_t length, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength) const; + + /** + * Compare two Unicode strings in code point order. + * The result may be different from the results of compare(), operator<, etc. + * if supplementary characters are present: + * + * In UTF-16, supplementary characters (with code points U+10000 and above) are + * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, + * which means that they compare as less than some other BMP characters like U+feff. + * This function compares Unicode strings in code point order. + * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. + * + * @param srcChars A pointer to another string to compare this one to. + * @param srcLength The number of code units from that string to compare. + * @return a negative/zero/positive integer corresponding to whether + * this string is less than/equal to/greater than the second one + * in code point order + * @stable ICU 2.0 + */ + inline int8_t compareCodePointOrder(const UChar *srcChars, + int32_t srcLength) const; + + /** + * Compare two Unicode strings in code point order. + * The result may be different from the results of compare(), operator<, etc. + * if supplementary characters are present: + * + * In UTF-16, supplementary characters (with code points U+10000 and above) are + * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, + * which means that they compare as less than some other BMP characters like U+feff. + * This function compares Unicode strings in code point order. + * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. + * + * @param start The start offset in this string at which the compare operation begins. + * @param length The number of code units from this string to compare. + * @param srcChars A pointer to another string to compare this one to. + * @return a negative/zero/positive integer corresponding to whether + * this string is less than/equal to/greater than the second one + * in code point order + * @stable ICU 2.0 + */ + inline int8_t compareCodePointOrder(int32_t start, + int32_t length, + const UChar *srcChars) const; + + /** + * Compare two Unicode strings in code point order. + * The result may be different from the results of compare(), operator<, etc. + * if supplementary characters are present: + * + * In UTF-16, supplementary characters (with code points U+10000 and above) are + * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, + * which means that they compare as less than some other BMP characters like U+feff. + * This function compares Unicode strings in code point order. + * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. + * + * @param start The start offset in this string at which the compare operation begins. + * @param length The number of code units from this string to compare. + * @param srcChars A pointer to another string to compare this one to. + * @param srcStart The start offset in that string at which the compare operation begins. + * @param srcLength The number of code units from that string to compare. + * @return a negative/zero/positive integer corresponding to whether + * this string is less than/equal to/greater than the second one + * in code point order + * @stable ICU 2.0 + */ + inline int8_t compareCodePointOrder(int32_t start, + int32_t length, + const UChar *srcChars, + int32_t srcStart, + int32_t srcLength) const; + + /** + * Compare two Unicode strings in code point order. + * The result may be different from the results of compare(), operator<, etc. + * if supplementary characters are present: + * + * In UTF-16, supplementary characters (with code points U+10000 and above) are + * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, + * which means that they compare as less than some other BMP characters like U+feff. + * This function compares Unicode strings in code point order. + * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. + * + * @param start The start offset in this string at which the compare operation begins. + * @param limit The offset after the last code unit from this string to compare. + * @param srcText Another string to compare this one to. + * @param srcStart The start offset in that string at which the compare operation begins. + * @param srcLimit The offset after the last code unit from that string to compare. + * @return a negative/zero/positive integer corresponding to whether + * this string is less than/equal to/greater than the second one + * in code point order + * @stable ICU 2.0 + */ + inline int8_t compareCodePointOrderBetween(int32_t start, + int32_t limit, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLimit) const; + + /** + * Compare two strings case-insensitively using full case folding. + * This is equivalent to this->foldCase(options).compare(text.foldCase(options)). + * + * @param text Another string to compare this one to. + * @param options A bit set of options: + * - U_FOLD_CASE_DEFAULT or 0 is used for default options: + * Comparison in code unit order with default case folding. + * + * - U_COMPARE_CODE_POINT_ORDER + * Set to choose code point order instead of code unit order + * (see u_strCompare for details). + * + * - U_FOLD_CASE_EXCLUDE_SPECIAL_I + * + * @return A negative, zero, or positive integer indicating the comparison result. + * @stable ICU 2.0 + */ + inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const; + + /** + * Compare two strings case-insensitively using full case folding. + * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)). + * + * @param start The start offset in this string at which the compare operation begins. + * @param length The number of code units from this string to compare. + * @param srcText Another string to compare this one to. + * @param options A bit set of options: + * - U_FOLD_CASE_DEFAULT or 0 is used for default options: + * Comparison in code unit order with default case folding. + * + * - U_COMPARE_CODE_POINT_ORDER + * Set to choose code point order instead of code unit order + * (see u_strCompare for details). + * + * - U_FOLD_CASE_EXCLUDE_SPECIAL_I + * + * @return A negative, zero, or positive integer indicating the comparison result. + * @stable ICU 2.0 + */ + inline int8_t caseCompare(int32_t start, + int32_t length, + const UnicodeString& srcText, + uint32_t options) const; + + /** + * Compare two strings case-insensitively using full case folding. + * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)). + * + * @param start The start offset in this string at which the compare operation begins. + * @param length The number of code units from this string to compare. + * @param srcText Another string to compare this one to. + * @param srcStart The start offset in that string at which the compare operation begins. + * @param srcLength The number of code units from that string to compare. + * @param options A bit set of options: + * - U_FOLD_CASE_DEFAULT or 0 is used for default options: + * Comparison in code unit order with default case folding. + * + * - U_COMPARE_CODE_POINT_ORDER + * Set to choose code point order instead of code unit order + * (see u_strCompare for details). + * + * - U_FOLD_CASE_EXCLUDE_SPECIAL_I + * + * @return A negative, zero, or positive integer indicating the comparison result. + * @stable ICU 2.0 + */ + inline int8_t caseCompare(int32_t start, + int32_t length, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength, + uint32_t options) const; + + /** + * Compare two strings case-insensitively using full case folding. + * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)). + * + * @param srcChars A pointer to another string to compare this one to. + * @param srcLength The number of code units from that string to compare. + * @param options A bit set of options: + * - U_FOLD_CASE_DEFAULT or 0 is used for default options: + * Comparison in code unit order with default case folding. + * + * - U_COMPARE_CODE_POINT_ORDER + * Set to choose code point order instead of code unit order + * (see u_strCompare for details). + * + * - U_FOLD_CASE_EXCLUDE_SPECIAL_I + * + * @return A negative, zero, or positive integer indicating the comparison result. + * @stable ICU 2.0 + */ + inline int8_t caseCompare(const UChar *srcChars, + int32_t srcLength, + uint32_t options) const; + + /** + * Compare two strings case-insensitively using full case folding. + * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)). + * + * @param start The start offset in this string at which the compare operation begins. + * @param length The number of code units from this string to compare. + * @param srcChars A pointer to another string to compare this one to. + * @param options A bit set of options: + * - U_FOLD_CASE_DEFAULT or 0 is used for default options: + * Comparison in code unit order with default case folding. + * + * - U_COMPARE_CODE_POINT_ORDER + * Set to choose code point order instead of code unit order + * (see u_strCompare for details). + * + * - U_FOLD_CASE_EXCLUDE_SPECIAL_I + * + * @return A negative, zero, or positive integer indicating the comparison result. + * @stable ICU 2.0 + */ + inline int8_t caseCompare(int32_t start, + int32_t length, + const UChar *srcChars, + uint32_t options) const; + + /** + * Compare two strings case-insensitively using full case folding. + * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)). + * + * @param start The start offset in this string at which the compare operation begins. + * @param length The number of code units from this string to compare. + * @param srcChars A pointer to another string to compare this one to. + * @param srcStart The start offset in that string at which the compare operation begins. + * @param srcLength The number of code units from that string to compare. + * @param options A bit set of options: + * - U_FOLD_CASE_DEFAULT or 0 is used for default options: + * Comparison in code unit order with default case folding. + * + * - U_COMPARE_CODE_POINT_ORDER + * Set to choose code point order instead of code unit order + * (see u_strCompare for details). + * + * - U_FOLD_CASE_EXCLUDE_SPECIAL_I + * + * @return A negative, zero, or positive integer indicating the comparison result. + * @stable ICU 2.0 + */ + inline int8_t caseCompare(int32_t start, + int32_t length, + const UChar *srcChars, + int32_t srcStart, + int32_t srcLength, + uint32_t options) const; + + /** + * Compare two strings case-insensitively using full case folding. + * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)). + * + * @param start The start offset in this string at which the compare operation begins. + * @param limit The offset after the last code unit from this string to compare. + * @param srcText Another string to compare this one to. + * @param srcStart The start offset in that string at which the compare operation begins. + * @param srcLimit The offset after the last code unit from that string to compare. + * @param options A bit set of options: + * - U_FOLD_CASE_DEFAULT or 0 is used for default options: + * Comparison in code unit order with default case folding. + * + * - U_COMPARE_CODE_POINT_ORDER + * Set to choose code point order instead of code unit order + * (see u_strCompare for details). + * + * - U_FOLD_CASE_EXCLUDE_SPECIAL_I + * + * @return A negative, zero, or positive integer indicating the comparison result. + * @stable ICU 2.0 + */ + inline int8_t caseCompareBetween(int32_t start, + int32_t limit, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLimit, + uint32_t options) const; + + /** + * Determine if this starts with the characters in <TT>text</TT> + * @param text The text to match. + * @return TRUE if this starts with the characters in <TT>text</TT>, + * FALSE otherwise + * @stable ICU 2.0 + */ + inline UBool startsWith(const UnicodeString& text) const; + + /** + * Determine if this starts with the characters in <TT>srcText</TT> + * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). + * @param srcText The text to match. + * @param srcStart the offset into <TT>srcText</TT> to start matching + * @param srcLength the number of characters in <TT>srcText</TT> to match + * @return TRUE if this starts with the characters in <TT>text</TT>, + * FALSE otherwise + * @stable ICU 2.0 + */ + inline UBool startsWith(const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength) const; + + /** + * Determine if this starts with the characters in <TT>srcChars</TT> + * @param srcChars The characters to match. + * @param srcLength the number of characters in <TT>srcChars</TT> + * @return TRUE if this starts with the characters in <TT>srcChars</TT>, + * FALSE otherwise + * @stable ICU 2.0 + */ + inline UBool startsWith(const UChar *srcChars, + int32_t srcLength) const; + + /** + * Determine if this ends with the characters in <TT>srcChars</TT> + * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). + * @param srcChars The characters to match. + * @param srcStart the offset into <TT>srcText</TT> to start matching + * @param srcLength the number of characters in <TT>srcChars</TT> to match + * @return TRUE if this ends with the characters in <TT>srcChars</TT>, FALSE otherwise + * @stable ICU 2.0 + */ + inline UBool startsWith(const UChar *srcChars, + int32_t srcStart, + int32_t srcLength) const; + + /** + * Determine if this ends with the characters in <TT>text</TT> + * @param text The text to match. + * @return TRUE if this ends with the characters in <TT>text</TT>, + * FALSE otherwise + * @stable ICU 2.0 + */ + inline UBool endsWith(const UnicodeString& text) const; + + /** + * Determine if this ends with the characters in <TT>srcText</TT> + * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). + * @param srcText The text to match. + * @param srcStart the offset into <TT>srcText</TT> to start matching + * @param srcLength the number of characters in <TT>srcText</TT> to match + * @return TRUE if this ends with the characters in <TT>text</TT>, + * FALSE otherwise + * @stable ICU 2.0 + */ + inline UBool endsWith(const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength) const; + + /** + * Determine if this ends with the characters in <TT>srcChars</TT> + * @param srcChars The characters to match. + * @param srcLength the number of characters in <TT>srcChars</TT> + * @return TRUE if this ends with the characters in <TT>srcChars</TT>, + * FALSE otherwise + * @stable ICU 2.0 + */ + inline UBool endsWith(const UChar *srcChars, + int32_t srcLength) const; + + /** + * Determine if this ends with the characters in <TT>srcChars</TT> + * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). + * @param srcChars The characters to match. + * @param srcStart the offset into <TT>srcText</TT> to start matching + * @param srcLength the number of characters in <TT>srcChars</TT> to match + * @return TRUE if this ends with the characters in <TT>srcChars</TT>, + * FALSE otherwise + * @stable ICU 2.0 + */ + inline UBool endsWith(const UChar *srcChars, + int32_t srcStart, + int32_t srcLength) const; + + + /* Searching - bitwise only */ + + /** + * Locate in this the first occurrence of the characters in <TT>text</TT>, + * using bitwise comparison. + * @param text The text to search for. + * @return The offset into this of the start of <TT>text</TT>, + * or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t indexOf(const UnicodeString& text) const; + + /** + * Locate in this the first occurrence of the characters in <TT>text</TT> + * starting at offset <TT>start</TT>, using bitwise comparison. + * @param text The text to search for. + * @param start The offset at which searching will start. + * @return The offset into this of the start of <TT>text</TT>, + * or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t indexOf(const UnicodeString& text, + int32_t start) const; + + /** + * Locate in this the first occurrence in the range + * [<TT>start</TT>, <TT>start + length</TT>) of the characters + * in <TT>text</TT>, using bitwise comparison. + * @param text The text to search for. + * @param start The offset at which searching will start. + * @param length The number of characters to search + * @return The offset into this of the start of <TT>text</TT>, + * or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t indexOf(const UnicodeString& text, + int32_t start, + int32_t length) const; + + /** + * Locate in this the first occurrence in the range + * [<TT>start</TT>, <TT>start + length</TT>) of the characters + * in <TT>srcText</TT> in the range + * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>), + * using bitwise comparison. + * @param srcText The text to search for. + * @param srcStart the offset into <TT>srcText</TT> at which + * to start matching + * @param srcLength the number of characters in <TT>srcText</TT> to match + * @param start the offset into this at which to start matching + * @param length the number of characters in this to search + * @return The offset into this of the start of <TT>text</TT>, + * or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t indexOf(const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength, + int32_t start, + int32_t length) const; + + /** + * Locate in this the first occurrence of the characters in + * <TT>srcChars</TT> + * starting at offset <TT>start</TT>, using bitwise comparison. + * @param srcChars The text to search for. + * @param srcLength the number of characters in <TT>srcChars</TT> to match + * @param start the offset into this at which to start matching + * @return The offset into this of the start of <TT>text</TT>, + * or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t indexOf(const UChar *srcChars, + int32_t srcLength, + int32_t start) const; + + /** + * Locate in this the first occurrence in the range + * [<TT>start</TT>, <TT>start + length</TT>) of the characters + * in <TT>srcChars</TT>, using bitwise comparison. + * @param srcChars The text to search for. + * @param srcLength the number of characters in <TT>srcChars</TT> + * @param start The offset at which searching will start. + * @param length The number of characters to search + * @return The offset into this of the start of <TT>srcChars</TT>, + * or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t indexOf(const UChar *srcChars, + int32_t srcLength, + int32_t start, + int32_t length) const; + + /** + * Locate in this the first occurrence in the range + * [<TT>start</TT>, <TT>start + length</TT>) of the characters + * in <TT>srcChars</TT> in the range + * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>), + * using bitwise comparison. + * @param srcChars The text to search for. + * @param srcStart the offset into <TT>srcChars</TT> at which + * to start matching + * @param srcLength the number of characters in <TT>srcChars</TT> to match + * @param start the offset into this at which to start matching + * @param length the number of characters in this to search + * @return The offset into this of the start of <TT>text</TT>, + * or -1 if not found. + * @stable ICU 2.0 + */ + int32_t indexOf(const UChar *srcChars, + int32_t srcStart, + int32_t srcLength, + int32_t start, + int32_t length) const; + + /** + * Locate in this the first occurrence of the BMP code point <code>c</code>, + * using bitwise comparison. + * @param c The code unit to search for. + * @return The offset into this of <TT>c</TT>, or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t indexOf(UChar c) const; + + /** + * Locate in this the first occurrence of the code point <TT>c</TT>, + * using bitwise comparison. + * + * @param c The code point to search for. + * @return The offset into this of <TT>c</TT>, or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t indexOf(UChar32 c) const; + + /** + * Locate in this the first occurrence of the BMP code point <code>c</code>, + * starting at offset <TT>start</TT>, using bitwise comparison. + * @param c The code unit to search for. + * @param start The offset at which searching will start. + * @return The offset into this of <TT>c</TT>, or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t indexOf(UChar c, + int32_t start) const; + + /** + * Locate in this the first occurrence of the code point <TT>c</TT> + * starting at offset <TT>start</TT>, using bitwise comparison. + * + * @param c The code point to search for. + * @param start The offset at which searching will start. + * @return The offset into this of <TT>c</TT>, or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t indexOf(UChar32 c, + int32_t start) const; + + /** + * Locate in this the first occurrence of the BMP code point <code>c</code> + * in the range [<TT>start</TT>, <TT>start + length</TT>), + * using bitwise comparison. + * @param c The code unit to search for. + * @param start the offset into this at which to start matching + * @param length the number of characters in this to search + * @return The offset into this of <TT>c</TT>, or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t indexOf(UChar c, + int32_t start, + int32_t length) const; + + /** + * Locate in this the first occurrence of the code point <TT>c</TT> + * in the range [<TT>start</TT>, <TT>start + length</TT>), + * using bitwise comparison. + * + * @param c The code point to search for. + * @param start the offset into this at which to start matching + * @param length the number of characters in this to search + * @return The offset into this of <TT>c</TT>, or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t indexOf(UChar32 c, + int32_t start, + int32_t length) const; + + /** + * Locate in this the last occurrence of the characters in <TT>text</TT>, + * using bitwise comparison. + * @param text The text to search for. + * @return The offset into this of the start of <TT>text</TT>, + * or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t lastIndexOf(const UnicodeString& text) const; + + /** + * Locate in this the last occurrence of the characters in <TT>text</TT> + * starting at offset <TT>start</TT>, using bitwise comparison. + * @param text The text to search for. + * @param start The offset at which searching will start. + * @return The offset into this of the start of <TT>text</TT>, + * or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t lastIndexOf(const UnicodeString& text, + int32_t start) const; + + /** + * Locate in this the last occurrence in the range + * [<TT>start</TT>, <TT>start + length</TT>) of the characters + * in <TT>text</TT>, using bitwise comparison. + * @param text The text to search for. + * @param start The offset at which searching will start. + * @param length The number of characters to search + * @return The offset into this of the start of <TT>text</TT>, + * or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t lastIndexOf(const UnicodeString& text, + int32_t start, + int32_t length) const; + + /** + * Locate in this the last occurrence in the range + * [<TT>start</TT>, <TT>start + length</TT>) of the characters + * in <TT>srcText</TT> in the range + * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>), + * using bitwise comparison. + * @param srcText The text to search for. + * @param srcStart the offset into <TT>srcText</TT> at which + * to start matching + * @param srcLength the number of characters in <TT>srcText</TT> to match + * @param start the offset into this at which to start matching + * @param length the number of characters in this to search + * @return The offset into this of the start of <TT>text</TT>, + * or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t lastIndexOf(const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength, + int32_t start, + int32_t length) const; + + /** + * Locate in this the last occurrence of the characters in <TT>srcChars</TT> + * starting at offset <TT>start</TT>, using bitwise comparison. + * @param srcChars The text to search for. + * @param srcLength the number of characters in <TT>srcChars</TT> to match + * @param start the offset into this at which to start matching + * @return The offset into this of the start of <TT>text</TT>, + * or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t lastIndexOf(const UChar *srcChars, + int32_t srcLength, + int32_t start) const; + + /** + * Locate in this the last occurrence in the range + * [<TT>start</TT>, <TT>start + length</TT>) of the characters + * in <TT>srcChars</TT>, using bitwise comparison. + * @param srcChars The text to search for. + * @param srcLength the number of characters in <TT>srcChars</TT> + * @param start The offset at which searching will start. + * @param length The number of characters to search + * @return The offset into this of the start of <TT>srcChars</TT>, + * or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t lastIndexOf(const UChar *srcChars, + int32_t srcLength, + int32_t start, + int32_t length) const; + + /** + * Locate in this the last occurrence in the range + * [<TT>start</TT>, <TT>start + length</TT>) of the characters + * in <TT>srcChars</TT> in the range + * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>), + * using bitwise comparison. + * @param srcChars The text to search for. + * @param srcStart the offset into <TT>srcChars</TT> at which + * to start matching + * @param srcLength the number of characters in <TT>srcChars</TT> to match + * @param start the offset into this at which to start matching + * @param length the number of characters in this to search + * @return The offset into this of the start of <TT>text</TT>, + * or -1 if not found. + * @stable ICU 2.0 + */ + int32_t lastIndexOf(const UChar *srcChars, + int32_t srcStart, + int32_t srcLength, + int32_t start, + int32_t length) const; + + /** + * Locate in this the last occurrence of the BMP code point <code>c</code>, + * using bitwise comparison. + * @param c The code unit to search for. + * @return The offset into this of <TT>c</TT>, or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t lastIndexOf(UChar c) const; + + /** + * Locate in this the last occurrence of the code point <TT>c</TT>, + * using bitwise comparison. + * + * @param c The code point to search for. + * @return The offset into this of <TT>c</TT>, or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t lastIndexOf(UChar32 c) const; + + /** + * Locate in this the last occurrence of the BMP code point <code>c</code> + * starting at offset <TT>start</TT>, using bitwise comparison. + * @param c The code unit to search for. + * @param start The offset at which searching will start. + * @return The offset into this of <TT>c</TT>, or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t lastIndexOf(UChar c, + int32_t start) const; + + /** + * Locate in this the last occurrence of the code point <TT>c</TT> + * starting at offset <TT>start</TT>, using bitwise comparison. + * + * @param c The code point to search for. + * @param start The offset at which searching will start. + * @return The offset into this of <TT>c</TT>, or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t lastIndexOf(UChar32 c, + int32_t start) const; + + /** + * Locate in this the last occurrence of the BMP code point <code>c</code> + * in the range [<TT>start</TT>, <TT>start + length</TT>), + * using bitwise comparison. + * @param c The code unit to search for. + * @param start the offset into this at which to start matching + * @param length the number of characters in this to search + * @return The offset into this of <TT>c</TT>, or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t lastIndexOf(UChar c, + int32_t start, + int32_t length) const; + + /** + * Locate in this the last occurrence of the code point <TT>c</TT> + * in the range [<TT>start</TT>, <TT>start + length</TT>), + * using bitwise comparison. + * + * @param c The code point to search for. + * @param start the offset into this at which to start matching + * @param length the number of characters in this to search + * @return The offset into this of <TT>c</TT>, or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t lastIndexOf(UChar32 c, + int32_t start, + int32_t length) const; + + + /* Character access */ + + /** + * Return the code unit at offset <tt>offset</tt>. + * If the offset is not valid (0..length()-1) then U+ffff is returned. + * @param offset a valid offset into the text + * @return the code unit at offset <tt>offset</tt> + * or 0xffff if the offset is not valid for this string + * @stable ICU 2.0 + */ + inline UChar charAt(int32_t offset) const; + + /** + * Return the code unit at offset <tt>offset</tt>. + * If the offset is not valid (0..length()-1) then U+ffff is returned. + * @param offset a valid offset into the text + * @return the code unit at offset <tt>offset</tt> + * @stable ICU 2.0 + */ + inline UChar operator[] (int32_t offset) const; + + /** + * Return the code point that contains the code unit + * at offset <tt>offset</tt>. + * If the offset is not valid (0..length()-1) then U+ffff is returned. + * @param offset a valid offset into the text + * that indicates the text offset of any of the code units + * that will be assembled into a code point (21-bit value) and returned + * @return the code point of text at <tt>offset</tt> + * or 0xffff if the offset is not valid for this string + * @stable ICU 2.0 + */ + UChar32 char32At(int32_t offset) const; + + /** + * Adjust a random-access offset so that + * it points to the beginning of a Unicode character. + * The offset that is passed in points to + * any code unit of a code point, + * while the returned offset will point to the first code unit + * of the same code point. + * In UTF-16, if the input offset points to a second surrogate + * of a surrogate pair, then the returned offset will point + * to the first surrogate. + * @param offset a valid offset into one code point of the text + * @return offset of the first code unit of the same code point + * @see U16_SET_CP_START + * @stable ICU 2.0 + */ + int32_t getChar32Start(int32_t offset) const; + + /** + * Adjust a random-access offset so that + * it points behind a Unicode character. + * The offset that is passed in points behind + * any code unit of a code point, + * while the returned offset will point behind the last code unit + * of the same code point. + * In UTF-16, if the input offset points behind the first surrogate + * (i.e., to the second surrogate) + * of a surrogate pair, then the returned offset will point + * behind the second surrogate (i.e., to the first surrogate). + * @param offset a valid offset after any code unit of a code point of the text + * @return offset of the first code unit after the same code point + * @see U16_SET_CP_LIMIT + * @stable ICU 2.0 + */ + int32_t getChar32Limit(int32_t offset) const; + + /** + * Move the code unit index along the string by delta code points. + * Interpret the input index as a code unit-based offset into the string, + * move the index forward or backward by delta code points, and + * return the resulting index. + * The input index should point to the first code unit of a code point, + * if there is more than one. + * + * Both input and output indexes are code unit-based as for all + * string indexes/offsets in ICU (and other libraries, like MBCS char*). + * If delta<0 then the index is moved backward (toward the start of the string). + * If delta>0 then the index is moved forward (toward the end of the string). + * + * This behaves like CharacterIterator::move32(delta, kCurrent). + * + * Behavior for out-of-bounds indexes: + * <code>moveIndex32</code> pins the input index to 0..length(), i.e., + * if the input index<0 then it is pinned to 0; + * if it is index>length() then it is pinned to length(). + * Afterwards, the index is moved by <code>delta</code> code points + * forward or backward, + * but no further backward than to 0 and no further forward than to length(). + * The resulting index return value will be in between 0 and length(), inclusively. + * + * Examples: + * <pre> + * // s has code points 'a' U+10000 'b' U+10ffff U+2029 + * UnicodeString s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unescape(); + * + * // initial index: position of U+10000 + * int32_t index=1; + * + * // the following examples will all result in index==4, position of U+10ffff + * + * // skip 2 code points from some position in the string + * index=s.moveIndex32(index, 2); // skips U+10000 and 'b' + * + * // go to the 3rd code point from the start of s (0-based) + * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b' + * + * // go to the next-to-last code point of s + * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff + * </pre> + * + * @param index input code unit index + * @param delta (signed) code point count to move the index forward or backward + * in the string + * @return the resulting code unit index + * @stable ICU 2.0 + */ + int32_t moveIndex32(int32_t index, int32_t delta) const; + + /* Substring extraction */ + + /** + * Copy the characters in the range + * [<tt>start</tt>, <tt>start + length</tt>) into the array <tt>dst</tt>, + * beginning at <tt>dstStart</tt>. + * If the string aliases to <code>dst</code> itself as an external buffer, + * then extract() will not copy the contents. + * + * @param start offset of first character which will be copied into the array + * @param length the number of characters to extract + * @param dst array in which to copy characters. The length of <tt>dst</tt> + * must be at least (<tt>dstStart + length</tt>). + * @param dstStart the offset in <TT>dst</TT> where the first character + * will be extracted + * @stable ICU 2.0 + */ + inline void extract(int32_t start, + int32_t length, + UChar *dst, + int32_t dstStart = 0) const; + + /** + * Copy the contents of the string into dest. + * This is a convenience function that + * checks if there is enough space in dest, + * extracts the entire string if possible, + * and NUL-terminates dest if possible. + * + * If the string fits into dest but cannot be NUL-terminated + * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING. + * If the string itself does not fit into dest + * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR. + * + * If the string aliases to <code>dest</code> itself as an external buffer, + * then extract() will not copy the contents. + * + * @param dest Destination string buffer. + * @param destCapacity Number of UChars available at dest. + * @param errorCode ICU error code. + * @return length() + * @stable ICU 2.0 + */ + int32_t + extract(UChar *dest, int32_t destCapacity, + UErrorCode &errorCode) const; + + /** + * Copy the characters in the range + * [<tt>start</tt>, <tt>start + length</tt>) into the UnicodeString + * <tt>target</tt>. + * @param start offset of first character which will be copied + * @param length the number of characters to extract + * @param target UnicodeString into which to copy characters. + * @return A reference to <TT>target</TT> + * @stable ICU 2.0 + */ + inline void extract(int32_t start, + int32_t length, + UnicodeString& target) const; + + /** + * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>) + * into the array <tt>dst</tt>, beginning at <tt>dstStart</tt>. + * @param start offset of first character which will be copied into the array + * @param limit offset immediately following the last character to be copied + * @param dst array in which to copy characters. The length of <tt>dst</tt> + * must be at least (<tt>dstStart + (limit - start)</tt>). + * @param dstStart the offset in <TT>dst</TT> where the first character + * will be extracted + * @stable ICU 2.0 + */ + inline void extractBetween(int32_t start, + int32_t limit, + UChar *dst, + int32_t dstStart = 0) const; + + /** + * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>) + * into the UnicodeString <tt>target</tt>. Replaceable API. + * @param start offset of first character which will be copied + * @param limit offset immediately following the last character to be copied + * @param target UnicodeString into which to copy characters. + * @return A reference to <TT>target</TT> + * @stable ICU 2.0 + */ + virtual void extractBetween(int32_t start, + int32_t limit, + UnicodeString& target) const; + + /** + * Copy the characters in the range + * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters. + * All characters must be invariant (see utypes.h). + * Use US_INV as the last, signature-distinguishing parameter. + * + * This function does not write any more than <code>targetLength</code> + * characters but returns the length of the entire output string + * so that one can allocate a larger buffer and call the function again + * if necessary. + * The output string is NUL-terminated if possible. + * + * @param start offset of first character which will be copied + * @param startLength the number of characters to extract + * @param target the target buffer for extraction, can be NULL + * if targetLength is 0 + * @param targetCapacity the length of the target buffer + * @param inv Signature-distinguishing paramater, use US_INV. + * @return the output string length, not including the terminating NUL + * @stable ICU 3.2 + */ + int32_t extract(int32_t start, + int32_t startLength, + char *target, + int32_t targetCapacity, + enum EInvariant inv) const; + +#if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION + + /** + * Copy the characters in the range + * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters + * in the platform's default codepage. + * This function does not write any more than <code>targetLength</code> + * characters but returns the length of the entire output string + * so that one can allocate a larger buffer and call the function again + * if necessary. + * The output string is NUL-terminated if possible. + * + * @param start offset of first character which will be copied + * @param startLength the number of characters to extract + * @param target the target buffer for extraction + * @param targetLength the length of the target buffer + * If <TT>target</TT> is NULL, then the number of bytes required for + * <TT>target</TT> is returned. + * @return the output string length, not including the terminating NUL + * @stable ICU 2.0 + */ + int32_t extract(int32_t start, + int32_t startLength, + char *target, + uint32_t targetLength) const; + +#endif + +#if !UCONFIG_NO_CONVERSION + + /** + * Copy the characters in the range + * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters + * in a specified codepage. + * The output string is NUL-terminated. + * + * Recommendation: For invariant-character strings use + * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const + * because it avoids object code dependencies of UnicodeString on + * the conversion code. + * + * @param start offset of first character which will be copied + * @param startLength the number of characters to extract + * @param target the target buffer for extraction + * @param codepage the desired codepage for the characters. 0 has + * the special meaning of the default codepage + * If <code>codepage</code> is an empty string (<code>""</code>), + * then a simple conversion is performed on the codepage-invariant + * subset ("invariant characters") of the platform encoding. See utypes.h. + * If <TT>target</TT> is NULL, then the number of bytes required for + * <TT>target</TT> is returned. It is assumed that the target is big enough + * to fit all of the characters. + * @return the output string length, not including the terminating NUL + * @stable ICU 2.0 + */ + inline int32_t extract(int32_t start, + int32_t startLength, + char *target, + const char *codepage = 0) const; + + /** + * Copy the characters in the range + * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters + * in a specified codepage. + * This function does not write any more than <code>targetLength</code> + * characters but returns the length of the entire output string + * so that one can allocate a larger buffer and call the function again + * if necessary. + * The output string is NUL-terminated if possible. + * + * Recommendation: For invariant-character strings use + * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const + * because it avoids object code dependencies of UnicodeString on + * the conversion code. + * + * @param start offset of first character which will be copied + * @param startLength the number of characters to extract + * @param target the target buffer for extraction + * @param targetLength the length of the target buffer + * @param codepage the desired codepage for the characters. 0 has + * the special meaning of the default codepage + * If <code>codepage</code> is an empty string (<code>""</code>), + * then a simple conversion is performed on the codepage-invariant + * subset ("invariant characters") of the platform encoding. See utypes.h. + * If <TT>target</TT> is NULL, then the number of bytes required for + * <TT>target</TT> is returned. + * @return the output string length, not including the terminating NUL + * @stable ICU 2.0 + */ + int32_t extract(int32_t start, + int32_t startLength, + char *target, + uint32_t targetLength, + const char *codepage) const; + + /** + * Convert the UnicodeString into a codepage string using an existing UConverter. + * The output string is NUL-terminated if possible. + * + * This function avoids the overhead of opening and closing a converter if + * multiple strings are extracted. + * + * @param dest destination string buffer, can be NULL if destCapacity==0 + * @param destCapacity the number of chars available at dest + * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called), + * or NULL for the default converter + * @param errorCode normal ICU error code + * @return the length of the output string, not counting the terminating NUL; + * if the length is greater than destCapacity, then the string will not fit + * and a buffer of the indicated length would need to be passed in + * @stable ICU 2.0 + */ + int32_t extract(char *dest, int32_t destCapacity, + UConverter *cnv, + UErrorCode &errorCode) const; + +#endif + + /** + * Create a temporary substring for the specified range. + * Unlike the substring constructor and setTo() functions, + * the object returned here will be a read-only alias (using getBuffer()) + * rather than copying the text. + * As a result, this substring operation is much faster but requires + * that the original string not be modified or deleted during the lifetime + * of the returned substring object. + * @param start offset of the first character visible in the substring + * @param length length of the substring + * @return a read-only alias UnicodeString object for the substring + * @stable ICU 4.4 + */ + UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const; + + /** + * Create a temporary substring for the specified range. + * Same as tempSubString(start, length) except that the substring range + * is specified as a (start, limit) pair (with an exclusive limit index) + * rather than a (start, length) pair. + * @param start offset of the first character visible in the substring + * @param limit offset immediately following the last character visible in the substring + * @return a read-only alias UnicodeString object for the substring + * @stable ICU 4.4 + */ + inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const; + + /** + * Convert the UnicodeString to UTF-8 and write the result + * to a ByteSink. This is called by toUTF8String(). + * Unpaired surrogates are replaced with U+FFFD. + * Calls u_strToUTF8WithSub(). + * + * @param sink A ByteSink to which the UTF-8 version of the string is written. + * sink.Flush() is called at the end. + * @stable ICU 4.2 + * @see toUTF8String + */ + void toUTF8(ByteSink &sink) const; + +#if U_HAVE_STD_STRING + + /** + * Convert the UnicodeString to UTF-8 and append the result + * to a standard string. + * Unpaired surrogates are replaced with U+FFFD. + * Calls toUTF8(). + * + * @param result A standard string (or a compatible object) + * to which the UTF-8 version of the string is appended. + * @return The string object. + * @stable ICU 4.2 + * @see toUTF8 + */ + template<typename StringClass> + StringClass &toUTF8String(StringClass &result) const { + StringByteSink<StringClass> sbs(&result); + toUTF8(sbs); + return result; + } + +#endif + + /** + * Convert the UnicodeString to UTF-32. + * Unpaired surrogates are replaced with U+FFFD. + * Calls u_strToUTF32WithSub(). + * + * @param utf32 destination string buffer, can be NULL if capacity==0 + * @param capacity the number of UChar32s available at utf32 + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return The length of the UTF-32 string. + * @see fromUTF32 + * @stable ICU 4.2 + */ + int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const; + + /* Length operations */ + + /** + * Return the length of the UnicodeString object. + * The length is the number of UChar code units are in the UnicodeString. + * If you want the number of code points, please use countChar32(). + * @return the length of the UnicodeString object + * @see countChar32 + * @stable ICU 2.0 + */ + inline int32_t length(void) const; + + /** + * Count Unicode code points in the length UChar code units of the string. + * A code point may occupy either one or two UChar code units. + * Counting code points involves reading all code units. + * + * This functions is basically the inverse of moveIndex32(). + * + * @param start the index of the first code unit to check + * @param length the number of UChar code units to check + * @return the number of code points in the specified code units + * @see length + * @stable ICU 2.0 + */ + int32_t + countChar32(int32_t start=0, int32_t length=INT32_MAX) const; + + /** + * Check if the length UChar code units of the string + * contain more Unicode code points than a certain number. + * This is more efficient than counting all code points in this part of the string + * and comparing that number with a threshold. + * This function may not need to scan the string at all if the length + * falls within a certain range, and + * never needs to count more than 'number+1' code points. + * Logically equivalent to (countChar32(start, length)>number). + * A Unicode code point may occupy either one or two UChar code units. + * + * @param start the index of the first code unit to check (0 for the entire string) + * @param length the number of UChar code units to check + * (use INT32_MAX for the entire string; remember that start/length + * values are pinned) + * @param number The number of code points in the (sub)string is compared against + * the 'number' parameter. + * @return Boolean value for whether the string contains more Unicode code points + * than 'number'. Same as (u_countChar32(s, length)>number). + * @see countChar32 + * @see u_strHasMoreChar32Than + * @stable ICU 2.4 + */ + UBool + hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const; + + /** + * Determine if this string is empty. + * @return TRUE if this string contains 0 characters, FALSE otherwise. + * @stable ICU 2.0 + */ + inline UBool isEmpty(void) const; + + /** + * Return the capacity of the internal buffer of the UnicodeString object. + * This is useful together with the getBuffer functions. + * See there for details. + * + * @return the number of UChars available in the internal buffer + * @see getBuffer + * @stable ICU 2.0 + */ + inline int32_t getCapacity(void) const; + + /* Other operations */ + + /** + * Generate a hash code for this object. + * @return The hash code of this UnicodeString. + * @stable ICU 2.0 + */ + inline int32_t hashCode(void) const; + + /** + * Determine if this object contains a valid string. + * A bogus string has no value. It is different from an empty string, + * although in both cases isEmpty() returns TRUE and length() returns 0. + * setToBogus() and isBogus() can be used to indicate that no string value is available. + * For a bogus string, getBuffer() and getTerminatedBuffer() return NULL, and + * length() returns 0. + * + * @return TRUE if the string is bogus/invalid, FALSE otherwise + * @see setToBogus() + * @stable ICU 2.0 + */ + inline UBool isBogus(void) const; + + + //======================================== + // Write operations + //======================================== + + /* Assignment operations */ + + /** + * Assignment operator. Replace the characters in this UnicodeString + * with the characters from <TT>srcText</TT>. + * @param srcText The text containing the characters to replace + * @return a reference to this + * @stable ICU 2.0 + */ + UnicodeString &operator=(const UnicodeString &srcText); + + /** + * Almost the same as the assignment operator. + * Replace the characters in this UnicodeString + * with the characters from <code>srcText</code>. + * + * This function works the same as the assignment operator + * for all strings except for ones that are readonly aliases. + * + * Starting with ICU 2.4, the assignment operator and the copy constructor + * allocate a new buffer and copy the buffer contents even for readonly aliases. + * This function implements the old, more efficient but less safe behavior + * of making this string also a readonly alias to the same buffer. + * + * The fastCopyFrom function must be used only if it is known that the lifetime of + * this UnicodeString does not exceed the lifetime of the aliased buffer + * including its contents, for example for strings from resource bundles + * or aliases to string constants. + * + * @param src The text containing the characters to replace. + * @return a reference to this + * @stable ICU 2.4 + */ + UnicodeString &fastCopyFrom(const UnicodeString &src); + + /** + * Assignment operator. Replace the characters in this UnicodeString + * with the code unit <TT>ch</TT>. + * @param ch the code unit to replace + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& operator= (UChar ch); + + /** + * Assignment operator. Replace the characters in this UnicodeString + * with the code point <TT>ch</TT>. + * @param ch the code point to replace + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& operator= (UChar32 ch); + + /** + * Set the text in the UnicodeString object to the characters + * in <TT>srcText</TT> in the range + * [<TT>srcStart</TT>, <TT>srcText.length()</TT>). + * <TT>srcText</TT> is not modified. + * @param srcText the source for the new characters + * @param srcStart the offset into <TT>srcText</TT> where new characters + * will be obtained + * @return a reference to this + * @stable ICU 2.2 + */ + inline UnicodeString& setTo(const UnicodeString& srcText, + int32_t srcStart); + + /** + * Set the text in the UnicodeString object to the characters + * in <TT>srcText</TT> in the range + * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). + * <TT>srcText</TT> is not modified. + * @param srcText the source for the new characters + * @param srcStart the offset into <TT>srcText</TT> where new characters + * will be obtained + * @param srcLength the number of characters in <TT>srcText</TT> in the + * replace string. + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& setTo(const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength); + + /** + * Set the text in the UnicodeString object to the characters in + * <TT>srcText</TT>. + * <TT>srcText</TT> is not modified. + * @param srcText the source for the new characters + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& setTo(const UnicodeString& srcText); + + /** + * Set the characters in the UnicodeString object to the characters + * in <TT>srcChars</TT>. <TT>srcChars</TT> is not modified. + * @param srcChars the source for the new characters + * @param srcLength the number of Unicode characters in srcChars. + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& setTo(const UChar *srcChars, + int32_t srcLength); + + /** + * Set the characters in the UnicodeString object to the code unit + * <TT>srcChar</TT>. + * @param srcChar the code unit which becomes the UnicodeString's character + * content + * @return a reference to this + * @stable ICU 2.0 + */ + UnicodeString& setTo(UChar srcChar); + + /** + * Set the characters in the UnicodeString object to the code point + * <TT>srcChar</TT>. + * @param srcChar the code point which becomes the UnicodeString's character + * content + * @return a reference to this + * @stable ICU 2.0 + */ + UnicodeString& setTo(UChar32 srcChar); + + /** + * Aliasing setTo() function, analogous to the readonly-aliasing UChar* constructor. + * The text will be used for the UnicodeString object, but + * it will not be released when the UnicodeString is destroyed. + * This has copy-on-write semantics: + * When the string is modified, then the buffer is first copied into + * newly allocated memory. + * The aliased buffer is never modified. + * + * In an assignment to another UnicodeString, when using the copy constructor + * or the assignment operator, the text will be copied. + * When using fastCopyFrom(), the text will be aliased again, + * so that both strings then alias the same readonly-text. + * + * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated. + * This must be true if <code>textLength==-1</code>. + * @param text The characters to alias for the UnicodeString. + * @param textLength The number of Unicode characters in <code>text</code> to alias. + * If -1, then this constructor will determine the length + * by calling <code>u_strlen()</code>. + * @return a reference to this + * @stable ICU 2.0 + */ + UnicodeString &setTo(UBool isTerminated, + const UChar *text, + int32_t textLength); + + /** + * Aliasing setTo() function, analogous to the writable-aliasing UChar* constructor. + * The text will be used for the UnicodeString object, but + * it will not be released when the UnicodeString is destroyed. + * This has write-through semantics: + * For as long as the capacity of the buffer is sufficient, write operations + * will directly affect the buffer. When more capacity is necessary, then + * a new buffer will be allocated and the contents copied as with regularly + * constructed strings. + * In an assignment to another UnicodeString, the buffer will be copied. + * The extract(UChar *dst) function detects whether the dst pointer is the same + * as the string buffer itself and will in this case not copy the contents. + * + * @param buffer The characters to alias for the UnicodeString. + * @param buffLength The number of Unicode characters in <code>buffer</code> to alias. + * @param buffCapacity The size of <code>buffer</code> in UChars. + * @return a reference to this + * @stable ICU 2.0 + */ + UnicodeString &setTo(UChar *buffer, + int32_t buffLength, + int32_t buffCapacity); + + /** + * Make this UnicodeString object invalid. + * The string will test TRUE with isBogus(). + * + * A bogus string has no value. It is different from an empty string. + * It can be used to indicate that no string value is available. + * getBuffer() and getTerminatedBuffer() return NULL, and + * length() returns 0. + * + * This utility function is used throughout the UnicodeString + * implementation to indicate that a UnicodeString operation failed, + * and may be used in other functions, + * especially but not exclusively when such functions do not + * take a UErrorCode for simplicity. + * + * The following methods, and no others, will clear a string object's bogus flag: + * - remove() + * - remove(0, INT32_MAX) + * - truncate(0) + * - operator=() (assignment operator) + * - setTo(...) + * + * The simplest ways to turn a bogus string into an empty one + * is to use the remove() function. + * Examples for other functions that are equivalent to "set to empty string": + * \code + * if(s.isBogus()) { + * s.remove(); // set to an empty string (remove all), or + * s.remove(0, INT32_MAX); // set to an empty string (remove all), or + * s.truncate(0); // set to an empty string (complete truncation), or + * s=UnicodeString(); // assign an empty string, or + * s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or + * static const UChar nul=0; + * s.setTo(&nul, 0); // set to an empty C Unicode string + * } + * \endcode + * + * @see isBogus() + * @stable ICU 2.0 + */ + void setToBogus(); + + /** + * Set the character at the specified offset to the specified character. + * @param offset A valid offset into the text of the character to set + * @param ch The new character + * @return A reference to this + * @stable ICU 2.0 + */ + UnicodeString& setCharAt(int32_t offset, + UChar ch); + + + /* Append operations */ + + /** + * Append operator. Append the code unit <TT>ch</TT> to the UnicodeString + * object. + * @param ch the code unit to be appended + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& operator+= (UChar ch); + + /** + * Append operator. Append the code point <TT>ch</TT> to the UnicodeString + * object. + * @param ch the code point to be appended + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& operator+= (UChar32 ch); + + /** + * Append operator. Append the characters in <TT>srcText</TT> to the + * UnicodeString object. <TT>srcText</TT> is not modified. + * @param srcText the source for the new characters + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& operator+= (const UnicodeString& srcText); + + /** + * Append the characters + * in <TT>srcText</TT> in the range + * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the + * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT> + * is not modified. + * @param srcText the source for the new characters + * @param srcStart the offset into <TT>srcText</TT> where new characters + * will be obtained + * @param srcLength the number of characters in <TT>srcText</TT> in + * the append string + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& append(const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength); + + /** + * Append the characters in <TT>srcText</TT> to the UnicodeString object. + * <TT>srcText</TT> is not modified. + * @param srcText the source for the new characters + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& append(const UnicodeString& srcText); + + /** + * Append the characters in <TT>srcChars</TT> in the range + * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the UnicodeString + * object at offset + * <TT>start</TT>. <TT>srcChars</TT> is not modified. + * @param srcChars the source for the new characters + * @param srcStart the offset into <TT>srcChars</TT> where new characters + * will be obtained + * @param srcLength the number of characters in <TT>srcChars</TT> in + * the append string; can be -1 if <TT>srcChars</TT> is NUL-terminated + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& append(const UChar *srcChars, + int32_t srcStart, + int32_t srcLength); + + /** + * Append the characters in <TT>srcChars</TT> to the UnicodeString object + * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified. + * @param srcChars the source for the new characters + * @param srcLength the number of Unicode characters in <TT>srcChars</TT>; + * can be -1 if <TT>srcChars</TT> is NUL-terminated + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& append(const UChar *srcChars, + int32_t srcLength); + + /** + * Append the code unit <TT>srcChar</TT> to the UnicodeString object. + * @param srcChar the code unit to append + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& append(UChar srcChar); + + /** + * Append the code point <TT>srcChar</TT> to the UnicodeString object. + * @param srcChar the code point to append + * @return a reference to this + * @stable ICU 2.0 + */ + UnicodeString& append(UChar32 srcChar); + + + /* Insert operations */ + + /** + * Insert the characters in <TT>srcText</TT> in the range + * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString + * object at offset <TT>start</TT>. <TT>srcText</TT> is not modified. + * @param start the offset where the insertion begins + * @param srcText the source for the new characters + * @param srcStart the offset into <TT>srcText</TT> where new characters + * will be obtained + * @param srcLength the number of characters in <TT>srcText</TT> in + * the insert string + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& insert(int32_t start, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength); + + /** + * Insert the characters in <TT>srcText</TT> into the UnicodeString object + * at offset <TT>start</TT>. <TT>srcText</TT> is not modified. + * @param start the offset where the insertion begins + * @param srcText the source for the new characters + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& insert(int32_t start, + const UnicodeString& srcText); + + /** + * Insert the characters in <TT>srcChars</TT> in the range + * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString + * object at offset <TT>start</TT>. <TT>srcChars</TT> is not modified. + * @param start the offset at which the insertion begins + * @param srcChars the source for the new characters + * @param srcStart the offset into <TT>srcChars</TT> where new characters + * will be obtained + * @param srcLength the number of characters in <TT>srcChars</TT> + * in the insert string + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& insert(int32_t start, + const UChar *srcChars, + int32_t srcStart, + int32_t srcLength); + + /** + * Insert the characters in <TT>srcChars</TT> into the UnicodeString object + * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified. + * @param start the offset where the insertion begins + * @param srcChars the source for the new characters + * @param srcLength the number of Unicode characters in srcChars. + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& insert(int32_t start, + const UChar *srcChars, + int32_t srcLength); + + /** + * Insert the code unit <TT>srcChar</TT> into the UnicodeString object at + * offset <TT>start</TT>. + * @param start the offset at which the insertion occurs + * @param srcChar the code unit to insert + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& insert(int32_t start, + UChar srcChar); + + /** + * Insert the code point <TT>srcChar</TT> into the UnicodeString object at + * offset <TT>start</TT>. + * @param start the offset at which the insertion occurs + * @param srcChar the code point to insert + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& insert(int32_t start, + UChar32 srcChar); + + + /* Replace operations */ + + /** + * Replace the characters in the range + * [<TT>start</TT>, <TT>start + length</TT>) with the characters in + * <TT>srcText</TT> in the range + * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). + * <TT>srcText</TT> is not modified. + * @param start the offset at which the replace operation begins + * @param length the number of characters to replace. The character at + * <TT>start + length</TT> is not modified. + * @param srcText the source for the new characters + * @param srcStart the offset into <TT>srcText</TT> where new characters + * will be obtained + * @param srcLength the number of characters in <TT>srcText</TT> in + * the replace string + * @return a reference to this + * @stable ICU 2.0 + */ + UnicodeString& replace(int32_t start, + int32_t length, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength); + + /** + * Replace the characters in the range + * [<TT>start</TT>, <TT>start + length</TT>) + * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is + * not modified. + * @param start the offset at which the replace operation begins + * @param length the number of characters to replace. The character at + * <TT>start + length</TT> is not modified. + * @param srcText the source for the new characters + * @return a reference to this + * @stable ICU 2.0 + */ + UnicodeString& replace(int32_t start, + int32_t length, + const UnicodeString& srcText); + + /** + * Replace the characters in the range + * [<TT>start</TT>, <TT>start + length</TT>) with the characters in + * <TT>srcChars</TT> in the range + * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). <TT>srcChars</TT> + * is not modified. + * @param start the offset at which the replace operation begins + * @param length the number of characters to replace. The character at + * <TT>start + length</TT> is not modified. + * @param srcChars the source for the new characters + * @param srcStart the offset into <TT>srcChars</TT> where new characters + * will be obtained + * @param srcLength the number of characters in <TT>srcChars</TT> + * in the replace string + * @return a reference to this + * @stable ICU 2.0 + */ + UnicodeString& replace(int32_t start, + int32_t length, + const UChar *srcChars, + int32_t srcStart, + int32_t srcLength); + + /** + * Replace the characters in the range + * [<TT>start</TT>, <TT>start + length</TT>) with the characters in + * <TT>srcChars</TT>. <TT>srcChars</TT> is not modified. + * @param start the offset at which the replace operation begins + * @param length number of characters to replace. The character at + * <TT>start + length</TT> is not modified. + * @param srcChars the source for the new characters + * @param srcLength the number of Unicode characters in srcChars + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& replace(int32_t start, + int32_t length, + const UChar *srcChars, + int32_t srcLength); + + /** + * Replace the characters in the range + * [<TT>start</TT>, <TT>start + length</TT>) with the code unit + * <TT>srcChar</TT>. + * @param start the offset at which the replace operation begins + * @param length the number of characters to replace. The character at + * <TT>start + length</TT> is not modified. + * @param srcChar the new code unit + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& replace(int32_t start, + int32_t length, + UChar srcChar); + + /** + * Replace the characters in the range + * [<TT>start</TT>, <TT>start + length</TT>) with the code point + * <TT>srcChar</TT>. + * @param start the offset at which the replace operation begins + * @param length the number of characters to replace. The character at + * <TT>start + length</TT> is not modified. + * @param srcChar the new code point + * @return a reference to this + * @stable ICU 2.0 + */ + UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar); + + /** + * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>) + * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is not modified. + * @param start the offset at which the replace operation begins + * @param limit the offset immediately following the replace range + * @param srcText the source for the new characters + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& replaceBetween(int32_t start, + int32_t limit, + const UnicodeString& srcText); + + /** + * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>) + * with the characters in <TT>srcText</TT> in the range + * [<TT>srcStart</TT>, <TT>srcLimit</TT>). <TT>srcText</TT> is not modified. + * @param start the offset at which the replace operation begins + * @param limit the offset immediately following the replace range + * @param srcText the source for the new characters + * @param srcStart the offset into <TT>srcChars</TT> where new characters + * will be obtained + * @param srcLimit the offset immediately following the range to copy + * in <TT>srcText</TT> + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& replaceBetween(int32_t start, + int32_t limit, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLimit); + + /** + * Replace a substring of this object with the given text. + * @param start the beginning index, inclusive; <code>0 <= start + * <= limit</code>. + * @param limit the ending index, exclusive; <code>start <= limit + * <= length()</code>. + * @param text the text to replace characters <code>start</code> + * to <code>limit - 1</code> + * @stable ICU 2.0 + */ + virtual void handleReplaceBetween(int32_t start, + int32_t limit, + const UnicodeString& text); + + /** + * Replaceable API + * @return TRUE if it has MetaData + * @stable ICU 2.4 + */ + virtual UBool hasMetaData() const; + + /** + * Copy a substring of this object, retaining attribute (out-of-band) + * information. This method is used to duplicate or reorder substrings. + * The destination index must not overlap the source range. + * + * @param start the beginning index, inclusive; <code>0 <= start <= + * limit</code>. + * @param limit the ending index, exclusive; <code>start <= limit <= + * length()</code>. + * @param dest the destination index. The characters from + * <code>start..limit-1</code> will be copied to <code>dest</code>. + * Implementations of this method may assume that <code>dest <= start || + * dest >= limit</code>. + * @stable ICU 2.0 + */ + virtual void copy(int32_t start, int32_t limit, int32_t dest); + + /* Search and replace operations */ + + /** + * Replace all occurrences of characters in oldText with the characters + * in newText + * @param oldText the text containing the search text + * @param newText the text containing the replacement text + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& findAndReplace(const UnicodeString& oldText, + const UnicodeString& newText); + + /** + * Replace all occurrences of characters in oldText with characters + * in newText + * in the range [<TT>start</TT>, <TT>start + length</TT>). + * @param start the start of the range in which replace will performed + * @param length the length of the range in which replace will be performed + * @param oldText the text containing the search text + * @param newText the text containing the replacement text + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& findAndReplace(int32_t start, + int32_t length, + const UnicodeString& oldText, + const UnicodeString& newText); + + /** + * Replace all occurrences of characters in oldText in the range + * [<TT>oldStart</TT>, <TT>oldStart + oldLength</TT>) with the characters + * in newText in the range + * [<TT>newStart</TT>, <TT>newStart + newLength</TT>) + * in the range [<TT>start</TT>, <TT>start + length</TT>). + * @param start the start of the range in which replace will performed + * @param length the length of the range in which replace will be performed + * @param oldText the text containing the search text + * @param oldStart the start of the search range in <TT>oldText</TT> + * @param oldLength the length of the search range in <TT>oldText</TT> + * @param newText the text containing the replacement text + * @param newStart the start of the replacement range in <TT>newText</TT> + * @param newLength the length of the replacement range in <TT>newText</TT> + * @return a reference to this + * @stable ICU 2.0 + */ + UnicodeString& findAndReplace(int32_t start, + int32_t length, + const UnicodeString& oldText, + int32_t oldStart, + int32_t oldLength, + const UnicodeString& newText, + int32_t newStart, + int32_t newLength); + + + /* Remove operations */ + + /** + * Remove all characters from the UnicodeString object. + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& remove(void); + + /** + * Remove the characters in the range + * [<TT>start</TT>, <TT>start + length</TT>) from the UnicodeString object. + * @param start the offset of the first character to remove + * @param length the number of characters to remove + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& remove(int32_t start, + int32_t length = (int32_t)INT32_MAX); + + /** + * Remove the characters in the range + * [<TT>start</TT>, <TT>limit</TT>) from the UnicodeString object. + * @param start the offset of the first character to remove + * @param limit the offset immediately following the range to remove + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& removeBetween(int32_t start, + int32_t limit = (int32_t)INT32_MAX); + + /** + * Retain only the characters in the range + * [<code>start</code>, <code>limit</code>) from the UnicodeString object. + * Removes characters before <code>start</code> and at and after <code>limit</code>. + * @param start the offset of the first character to retain + * @param limit the offset immediately following the range to retain + * @return a reference to this + * @stable ICU 4.4 + */ + inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX); + + /* Length operations */ + + /** + * Pad the start of this UnicodeString with the character <TT>padChar</TT>. + * If the length of this UnicodeString is less than targetLength, + * length() - targetLength copies of padChar will be added to the + * beginning of this UnicodeString. + * @param targetLength the desired length of the string + * @param padChar the character to use for padding. Defaults to + * space (U+0020) + * @return TRUE if the text was padded, FALSE otherwise. + * @stable ICU 2.0 + */ + UBool padLeading(int32_t targetLength, + UChar padChar = 0x0020); + + /** + * Pad the end of this UnicodeString with the character <TT>padChar</TT>. + * If the length of this UnicodeString is less than targetLength, + * length() - targetLength copies of padChar will be added to the + * end of this UnicodeString. + * @param targetLength the desired length of the string + * @param padChar the character to use for padding. Defaults to + * space (U+0020) + * @return TRUE if the text was padded, FALSE otherwise. + * @stable ICU 2.0 + */ + UBool padTrailing(int32_t targetLength, + UChar padChar = 0x0020); + + /** + * Truncate this UnicodeString to the <TT>targetLength</TT>. + * @param targetLength the desired length of this UnicodeString. + * @return TRUE if the text was truncated, FALSE otherwise + * @stable ICU 2.0 + */ + inline UBool truncate(int32_t targetLength); + + /** + * Trims leading and trailing whitespace from this UnicodeString. + * @return a reference to this + * @stable ICU 2.0 + */ + UnicodeString& trim(void); + + + /* Miscellaneous operations */ + + /** + * Reverse this UnicodeString in place. + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& reverse(void); + + /** + * Reverse the range [<TT>start</TT>, <TT>start + length</TT>) in + * this UnicodeString. + * @param start the start of the range to reverse + * @param length the number of characters to to reverse + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& reverse(int32_t start, + int32_t length); + + /** + * Convert the characters in this to UPPER CASE following the conventions of + * the default locale. + * @return A reference to this. + * @stable ICU 2.0 + */ + UnicodeString& toUpper(void); + + /** + * Convert the characters in this to UPPER CASE following the conventions of + * a specific locale. + * @param locale The locale containing the conventions to use. + * @return A reference to this. + * @stable ICU 2.0 + */ + UnicodeString& toUpper(const Locale& locale); + + /** + * Convert the characters in this to lower case following the conventions of + * the default locale. + * @return A reference to this. + * @stable ICU 2.0 + */ + UnicodeString& toLower(void); + + /** + * Convert the characters in this to lower case following the conventions of + * a specific locale. + * @param locale The locale containing the conventions to use. + * @return A reference to this. + * @stable ICU 2.0 + */ + UnicodeString& toLower(const Locale& locale); + +#if !UCONFIG_NO_BREAK_ITERATION + + /** + * Titlecase this string, convenience function using the default locale. + * + * Casing is locale-dependent and context-sensitive. + * Titlecasing uses a break iterator to find the first characters of words + * that are to be titlecased. It titlecases those characters and lowercases + * all others. + * + * The titlecase break iterator can be provided to customize for arbitrary + * styles, using rules and dictionaries beyond the standard iterators. + * It may be more efficient to always provide an iterator to avoid + * opening and closing one for each string. + * The standard titlecase iterator for the root locale implements the + * algorithm of Unicode TR 21. + * + * This function uses only the setText(), first() and next() methods of the + * provided break iterator. + * + * @param titleIter A break iterator to find the first characters of words + * that are to be titlecased. + * If none is provided (0), then a standard titlecase + * break iterator is opened. + * Otherwise the provided iterator is set to the string's text. + * @return A reference to this. + * @stable ICU 2.1 + */ + UnicodeString &toTitle(BreakIterator *titleIter); + + /** + * Titlecase this string. + * + * Casing is locale-dependent and context-sensitive. + * Titlecasing uses a break iterator to find the first characters of words + * that are to be titlecased. It titlecases those characters and lowercases + * all others. + * + * The titlecase break iterator can be provided to customize for arbitrary + * styles, using rules and dictionaries beyond the standard iterators. + * It may be more efficient to always provide an iterator to avoid + * opening and closing one for each string. + * The standard titlecase iterator for the root locale implements the + * algorithm of Unicode TR 21. + * + * This function uses only the setText(), first() and next() methods of the + * provided break iterator. + * + * @param titleIter A break iterator to find the first characters of words + * that are to be titlecased. + * If none is provided (0), then a standard titlecase + * break iterator is opened. + * Otherwise the provided iterator is set to the string's text. + * @param locale The locale to consider. + * @return A reference to this. + * @stable ICU 2.1 + */ + UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale); + + /** + * Titlecase this string, with options. + * + * Casing is locale-dependent and context-sensitive. + * Titlecasing uses a break iterator to find the first characters of words + * that are to be titlecased. It titlecases those characters and lowercases + * all others. (This can be modified with options.) + * + * The titlecase break iterator can be provided to customize for arbitrary + * styles, using rules and dictionaries beyond the standard iterators. + * It may be more efficient to always provide an iterator to avoid + * opening and closing one for each string. + * The standard titlecase iterator for the root locale implements the + * algorithm of Unicode TR 21. + * + * This function uses only the setText(), first() and next() methods of the + * provided break iterator. + * + * @param titleIter A break iterator to find the first characters of words + * that are to be titlecased. + * If none is provided (0), then a standard titlecase + * break iterator is opened. + * Otherwise the provided iterator is set to the string's text. + * @param locale The locale to consider. + * @param options Options bit set, see ucasemap_open(). + * @return A reference to this. + * @see U_TITLECASE_NO_LOWERCASE + * @see U_TITLECASE_NO_BREAK_ADJUSTMENT + * @see ucasemap_open + * @stable ICU 3.8 + */ + UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options); + +#endif + + /** + * Case-folds the characters in this string. + * + * Case-folding is locale-independent and not context-sensitive, + * but there is an option for whether to include or exclude mappings for dotted I + * and dotless i that are marked with 'T' in CaseFolding.txt. + * + * The result may be longer or shorter than the original. + * + * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I + * @return A reference to this. + * @stable ICU 2.0 + */ + UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/); + + //======================================== + // Access to the internal buffer + //======================================== + + /** + * Get a read/write pointer to the internal buffer. + * The buffer is guaranteed to be large enough for at least minCapacity UChars, + * writable, and is still owned by the UnicodeString object. + * Calls to getBuffer(minCapacity) must not be nested, and + * must be matched with calls to releaseBuffer(newLength). + * If the string buffer was read-only or shared, + * then it will be reallocated and copied. + * + * An attempted nested call will return 0, and will not further modify the + * state of the UnicodeString object. + * It also returns 0 if the string is bogus. + * + * The actual capacity of the string buffer may be larger than minCapacity. + * getCapacity() returns the actual capacity. + * For many operations, the full capacity should be used to avoid reallocations. + * + * While the buffer is "open" between getBuffer(minCapacity) + * and releaseBuffer(newLength), the following applies: + * - The string length is set to 0. + * - Any read API call on the UnicodeString object will behave like on a 0-length string. + * - Any write API call on the UnicodeString object is disallowed and will have no effect. + * - You can read from and write to the returned buffer. + * - The previous string contents will still be in the buffer; + * if you want to use it, then you need to call length() before getBuffer(minCapacity). + * If the length() was greater than minCapacity, then any contents after minCapacity + * may be lost. + * The buffer contents is not NUL-terminated by getBuffer(). + * If length()<getCapacity() then you can terminate it by writing a NUL + * at index length(). + * - You must call releaseBuffer(newLength) before and in order to + * return to normal UnicodeString operation. + * + * @param minCapacity the minimum number of UChars that are to be available + * in the buffer, starting at the returned pointer; + * default to the current string capacity if minCapacity==-1 + * @return a writable pointer to the internal string buffer, + * or 0 if an error occurs (nested calls, out of memory) + * + * @see releaseBuffer + * @see getTerminatedBuffer() + * @stable ICU 2.0 + */ + UChar *getBuffer(int32_t minCapacity); + + /** + * Release a read/write buffer on a UnicodeString object with an + * "open" getBuffer(minCapacity). + * This function must be called in a matched pair with getBuffer(minCapacity). + * releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open". + * + * It will set the string length to newLength, at most to the current capacity. + * If newLength==-1 then it will set the length according to the + * first NUL in the buffer, or to the capacity if there is no NUL. + * + * After calling releaseBuffer(newLength) the UnicodeString is back to normal operation. + * + * @param newLength the new length of the UnicodeString object; + * defaults to the current capacity if newLength is greater than that; + * if newLength==-1, it defaults to u_strlen(buffer) but not more than + * the current capacity of the string + * + * @see getBuffer(int32_t minCapacity) + * @stable ICU 2.0 + */ + void releaseBuffer(int32_t newLength=-1); + + /** + * Get a read-only pointer to the internal buffer. + * This can be called at any time on a valid UnicodeString. + * + * It returns 0 if the string is bogus, or + * during an "open" getBuffer(minCapacity). + * + * It can be called as many times as desired. + * The pointer that it returns will remain valid until the UnicodeString object is modified, + * at which time the pointer is semantically invalidated and must not be used any more. + * + * The capacity of the buffer can be determined with getCapacity(). + * The part after length() may or may not be initialized and valid, + * depending on the history of the UnicodeString object. + * + * The buffer contents is (probably) not NUL-terminated. + * You can check if it is with + * <code>(s.length()<s.getCapacity() && buffer[s.length()]==0)</code>. + * (See getTerminatedBuffer().) + * + * The buffer may reside in read-only memory. Its contents must not + * be modified. + * + * @return a read-only pointer to the internal string buffer, + * or 0 if the string is empty or bogus + * + * @see getBuffer(int32_t minCapacity) + * @see getTerminatedBuffer() + * @stable ICU 2.0 + */ + inline const UChar *getBuffer() const; + + /** + * Get a read-only pointer to the internal buffer, + * making sure that it is NUL-terminated. + * This can be called at any time on a valid UnicodeString. + * + * It returns 0 if the string is bogus, or + * during an "open" getBuffer(minCapacity), or if the buffer cannot + * be NUL-terminated (because memory allocation failed). + * + * It can be called as many times as desired. + * The pointer that it returns will remain valid until the UnicodeString object is modified, + * at which time the pointer is semantically invalidated and must not be used any more. + * + * The capacity of the buffer can be determined with getCapacity(). + * The part after length()+1 may or may not be initialized and valid, + * depending on the history of the UnicodeString object. + * + * The buffer contents is guaranteed to be NUL-terminated. + * getTerminatedBuffer() may reallocate the buffer if a terminating NUL + * is written. + * For this reason, this function is not const, unlike getBuffer(). + * Note that a UnicodeString may also contain NUL characters as part of its contents. + * + * The buffer may reside in read-only memory. Its contents must not + * be modified. + * + * @return a read-only pointer to the internal string buffer, + * or 0 if the string is empty or bogus + * + * @see getBuffer(int32_t minCapacity) + * @see getBuffer() + * @stable ICU 2.2 + */ + const UChar *getTerminatedBuffer(); + + //======================================== + // Constructors + //======================================== + + /** Construct an empty UnicodeString. + * @stable ICU 2.0 + */ + inline UnicodeString(); + + /** + * Construct a UnicodeString with capacity to hold <TT>capacity</TT> UChars + * @param capacity the number of UChars this UnicodeString should hold + * before a resize is necessary; if count is greater than 0 and count + * code points c take up more space than capacity, then capacity is adjusted + * accordingly. + * @param c is used to initially fill the string + * @param count specifies how many code points c are to be written in the + * string + * @stable ICU 2.0 + */ + UnicodeString(int32_t capacity, UChar32 c, int32_t count); + + /** + * Single UChar (code unit) constructor. + * + * It is recommended to mark this constructor "explicit" by + * <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code> + * on the compiler command line or similar. + * @param ch the character to place in the UnicodeString + * @stable ICU 2.0 + */ + UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar ch); + + /** + * Single UChar32 (code point) constructor. + * + * It is recommended to mark this constructor "explicit" by + * <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code> + * on the compiler command line or similar. + * @param ch the character to place in the UnicodeString + * @stable ICU 2.0 + */ + UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch); + + /** + * UChar* constructor. + * + * It is recommended to mark this constructor "explicit" by + * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code> + * on the compiler command line or similar. + * @param text The characters to place in the UnicodeString. <TT>text</TT> + * must be NULL (U+0000) terminated. + * @stable ICU 2.0 + */ + UNISTR_FROM_STRING_EXPLICIT UnicodeString(const UChar *text); + + /** + * UChar* constructor. + * @param text The characters to place in the UnicodeString. + * @param textLength The number of Unicode characters in <TT>text</TT> + * to copy. + * @stable ICU 2.0 + */ + UnicodeString(const UChar *text, + int32_t textLength); + + /** + * Readonly-aliasing UChar* constructor. + * The text will be used for the UnicodeString object, but + * it will not be released when the UnicodeString is destroyed. + * This has copy-on-write semantics: + * When the string is modified, then the buffer is first copied into + * newly allocated memory. + * The aliased buffer is never modified. + * + * In an assignment to another UnicodeString, when using the copy constructor + * or the assignment operator, the text will be copied. + * When using fastCopyFrom(), the text will be aliased again, + * so that both strings then alias the same readonly-text. + * + * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated. + * This must be true if <code>textLength==-1</code>. + * @param text The characters to alias for the UnicodeString. + * @param textLength The number of Unicode characters in <code>text</code> to alias. + * If -1, then this constructor will determine the length + * by calling <code>u_strlen()</code>. + * @stable ICU 2.0 + */ + UnicodeString(UBool isTerminated, + const UChar *text, + int32_t textLength); + + /** + * Writable-aliasing UChar* constructor. + * The text will be used for the UnicodeString object, but + * it will not be released when the UnicodeString is destroyed. + * This has write-through semantics: + * For as long as the capacity of the buffer is sufficient, write operations + * will directly affect the buffer. When more capacity is necessary, then + * a new buffer will be allocated and the contents copied as with regularly + * constructed strings. + * In an assignment to another UnicodeString, the buffer will be copied. + * The extract(UChar *dst) function detects whether the dst pointer is the same + * as the string buffer itself and will in this case not copy the contents. + * + * @param buffer The characters to alias for the UnicodeString. + * @param buffLength The number of Unicode characters in <code>buffer</code> to alias. + * @param buffCapacity The size of <code>buffer</code> in UChars. + * @stable ICU 2.0 + */ + UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity); + +#if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION + + /** + * char* constructor. + * Uses the default converter (and thus depends on the ICU conversion code) + * unless U_CHARSET_IS_UTF8 is set to 1. + * + * For ASCII (really "invariant character") strings it is more efficient to use + * the constructor that takes a US_INV (for its enum EInvariant). + * For ASCII (invariant-character) string literals, see UNICODE_STRING and + * UNICODE_STRING_SIMPLE. + * + * It is recommended to mark this constructor "explicit" by + * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code> + * on the compiler command line or similar. + * @param codepageData an array of bytes, null-terminated, + * in the platform's default codepage. + * @stable ICU 2.0 + * @see UNICODE_STRING + * @see UNICODE_STRING_SIMPLE + */ + UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData); + + /** + * char* constructor. + * Uses the default converter (and thus depends on the ICU conversion code) + * unless U_CHARSET_IS_UTF8 is set to 1. + * @param codepageData an array of bytes in the platform's default codepage. + * @param dataLength The number of bytes in <TT>codepageData</TT>. + * @stable ICU 2.0 + */ + UnicodeString(const char *codepageData, int32_t dataLength); + +#endif + +#if !UCONFIG_NO_CONVERSION + + /** + * char* constructor. + * @param codepageData an array of bytes, null-terminated + * @param codepage the encoding of <TT>codepageData</TT>. The special + * value 0 for <TT>codepage</TT> indicates that the text is in the + * platform's default codepage. + * + * If <code>codepage</code> is an empty string (<code>""</code>), + * then a simple conversion is performed on the codepage-invariant + * subset ("invariant characters") of the platform encoding. See utypes.h. + * Recommendation: For invariant-character strings use the constructor + * UnicodeString(const char *src, int32_t length, enum EInvariant inv) + * because it avoids object code dependencies of UnicodeString on + * the conversion code. + * + * @stable ICU 2.0 + */ + UnicodeString(const char *codepageData, const char *codepage); + + /** + * char* constructor. + * @param codepageData an array of bytes. + * @param dataLength The number of bytes in <TT>codepageData</TT>. + * @param codepage the encoding of <TT>codepageData</TT>. The special + * value 0 for <TT>codepage</TT> indicates that the text is in the + * platform's default codepage. + * If <code>codepage</code> is an empty string (<code>""</code>), + * then a simple conversion is performed on the codepage-invariant + * subset ("invariant characters") of the platform encoding. See utypes.h. + * Recommendation: For invariant-character strings use the constructor + * UnicodeString(const char *src, int32_t length, enum EInvariant inv) + * because it avoids object code dependencies of UnicodeString on + * the conversion code. + * + * @stable ICU 2.0 + */ + UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage); + + /** + * char * / UConverter constructor. + * This constructor uses an existing UConverter object to + * convert the codepage string to Unicode and construct a UnicodeString + * from that. + * + * The converter is reset at first. + * If the error code indicates a failure before this constructor is called, + * or if an error occurs during conversion or construction, + * then the string will be bogus. + * + * This function avoids the overhead of opening and closing a converter if + * multiple strings are constructed. + * + * @param src input codepage string + * @param srcLength length of the input string, can be -1 for NUL-terminated strings + * @param cnv converter object (ucnv_resetToUnicode() will be called), + * can be NULL for the default converter + * @param errorCode normal ICU error code + * @stable ICU 2.0 + */ + UnicodeString( + const char *src, int32_t srcLength, + UConverter *cnv, + UErrorCode &errorCode); + +#endif + + /** + * Constructs a Unicode string from an invariant-character char * string. + * About invariant characters see utypes.h. + * This constructor has no runtime dependency on conversion code and is + * therefore recommended over ones taking a charset name string + * (where the empty string "" indicates invariant-character conversion). + * + * Use the macro US_INV as the third, signature-distinguishing parameter. + * + * For example: + * \code + * void fn(const char *s) { + * UnicodeString ustr(s, -1, US_INV); + * // use ustr ... + * } + * \endcode + * + * @param src String using only invariant characters. + * @param length Length of src, or -1 if NUL-terminated. + * @param inv Signature-distinguishing paramater, use US_INV. + * + * @see US_INV + * @stable ICU 3.2 + */ + UnicodeString(const char *src, int32_t length, enum EInvariant inv); + + + /** + * Copy constructor. + * @param that The UnicodeString object to copy. + * @stable ICU 2.0 + */ + UnicodeString(const UnicodeString& that); + + /** + * 'Substring' constructor from tail of source string. + * @param src The UnicodeString object to copy. + * @param srcStart The offset into <tt>src</tt> at which to start copying. + * @stable ICU 2.2 + */ + UnicodeString(const UnicodeString& src, int32_t srcStart); + + /** + * 'Substring' constructor from subrange of source string. + * @param src The UnicodeString object to copy. + * @param srcStart The offset into <tt>src</tt> at which to start copying. + * @param srcLength The number of characters from <tt>src</tt> to copy. + * @stable ICU 2.2 + */ + UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength); + + /** + * Clone this object, an instance of a subclass of Replaceable. + * Clones can be used concurrently in multiple threads. + * If a subclass does not implement clone(), or if an error occurs, + * then NULL is returned. + * The clone functions in all subclasses return a pointer to a Replaceable + * because some compilers do not support covariant (same-as-this) + * return types; cast to the appropriate subclass if necessary. + * The caller must delete the clone. + * + * @return a clone of this object + * + * @see Replaceable::clone + * @see getDynamicClassID + * @stable ICU 2.6 + */ + virtual Replaceable *clone() const; + + /** Destructor. + * @stable ICU 2.0 + */ + virtual ~UnicodeString(); + + /** + * Create a UnicodeString from a UTF-8 string. + * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string. + * Calls u_strFromUTF8WithSub(). + * + * @param utf8 UTF-8 input string. + * Note that a StringPiece can be implicitly constructed + * from a std::string or a NUL-terminated const char * string. + * @return A UnicodeString with equivalent UTF-16 contents. + * @see toUTF8 + * @see toUTF8String + * @stable ICU 4.2 + */ + static UnicodeString fromUTF8(const StringPiece &utf8); + + /** + * Create a UnicodeString from a UTF-32 string. + * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string. + * Calls u_strFromUTF32WithSub(). + * + * @param utf32 UTF-32 input string. Must not be NULL. + * @param length Length of the input string, or -1 if NUL-terminated. + * @return A UnicodeString with equivalent UTF-16 contents. + * @see toUTF32 + * @stable ICU 4.2 + */ + static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length); + + /* Miscellaneous operations */ + + /** + * Unescape a string of characters and return a string containing + * the result. The following escape sequences are recognized: + * + * \\uhhhh 4 hex digits; h in [0-9A-Fa-f] + * \\Uhhhhhhhh 8 hex digits + * \\xhh 1-2 hex digits + * \\ooo 1-3 octal digits; o in [0-7] + * \\cX control-X; X is masked with 0x1F + * + * as well as the standard ANSI C escapes: + * + * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A, + * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B, + * \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C + * + * Anything else following a backslash is generically escaped. For + * example, "[a\\-z]" returns "[a-z]". + * + * If an escape sequence is ill-formed, this method returns an empty + * string. An example of an ill-formed sequence is "\\u" followed by + * fewer than 4 hex digits. + * + * This function is similar to u_unescape() but not identical to it. + * The latter takes a source char*, so it does escape recognition + * and also invariant conversion. + * + * @return a string with backslash escapes interpreted, or an + * empty string on error. + * @see UnicodeString#unescapeAt() + * @see u_unescape() + * @see u_unescapeAt() + * @stable ICU 2.0 + */ + UnicodeString unescape() const; + + /** + * Unescape a single escape sequence and return the represented + * character. See unescape() for a listing of the recognized escape + * sequences. The character at offset-1 is assumed (without + * checking) to be a backslash. If the escape sequence is + * ill-formed, or the offset is out of range, U_SENTINEL=-1 is + * returned. + * + * @param offset an input output parameter. On input, it is the + * offset into this string where the escape sequence is located, + * after the initial backslash. On output, it is advanced after the + * last character parsed. On error, it is not advanced at all. + * @return the character represented by the escape sequence at + * offset, or U_SENTINEL=-1 on error. + * @see UnicodeString#unescape() + * @see u_unescape() + * @see u_unescapeAt() + * @stable ICU 2.0 + */ + UChar32 unescapeAt(int32_t &offset) const; + + /** + * ICU "poor man's RTTI", returns a UClassID for this class. + * + * @stable ICU 2.2 + */ + static UClassID U_EXPORT2 getStaticClassID(); + + /** + * ICU "poor man's RTTI", returns a UClassID for the actual class. + * + * @stable ICU 2.2 + */ + virtual UClassID getDynamicClassID() const; + + //======================================== + // Implementation methods + //======================================== + +protected: + /** + * Implement Replaceable::getLength() (see jitterbug 1027). + * @stable ICU 2.4 + */ + virtual int32_t getLength() const; + + /** + * The change in Replaceable to use virtual getCharAt() allows + * UnicodeString::charAt() to be inline again (see jitterbug 709). + * @stable ICU 2.4 + */ + virtual UChar getCharAt(int32_t offset) const; + + /** + * The change in Replaceable to use virtual getChar32At() allows + * UnicodeString::char32At() to be inline again (see jitterbug 709). + * @stable ICU 2.4 + */ + virtual UChar32 getChar32At(int32_t offset) const; + +private: + // For char* constructors. Could be made public. + UnicodeString &setToUTF8(const StringPiece &utf8); + // For extract(char*). + // We could make a toUTF8(target, capacity, errorCode) public but not + // this version: New API will be cleaner if we make callers create substrings + // rather than having start+length on every method, + // and it should take a UErrorCode&. + int32_t + toUTF8(int32_t start, int32_t len, + char *target, int32_t capacity) const; + + /** + * Internal string contents comparison, called by operator==. + * Requires: this & text not bogus and have same lengths. + */ + UBool doEquals(const UnicodeString &text, int32_t len) const; + + inline int8_t + doCompare(int32_t start, + int32_t length, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength) const; + + int8_t doCompare(int32_t start, + int32_t length, + const UChar *srcChars, + int32_t srcStart, + int32_t srcLength) const; + + inline int8_t + doCompareCodePointOrder(int32_t start, + int32_t length, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength) const; + + int8_t doCompareCodePointOrder(int32_t start, + int32_t length, + const UChar *srcChars, + int32_t srcStart, + int32_t srcLength) const; + + inline int8_t + doCaseCompare(int32_t start, + int32_t length, + const UnicodeString &srcText, + int32_t srcStart, + int32_t srcLength, + uint32_t options) const; + + int8_t + doCaseCompare(int32_t start, + int32_t length, + const UChar *srcChars, + int32_t srcStart, + int32_t srcLength, + uint32_t options) const; + + int32_t doIndexOf(UChar c, + int32_t start, + int32_t length) const; + + int32_t doIndexOf(UChar32 c, + int32_t start, + int32_t length) const; + + int32_t doLastIndexOf(UChar c, + int32_t start, + int32_t length) const; + + int32_t doLastIndexOf(UChar32 c, + int32_t start, + int32_t length) const; + + void doExtract(int32_t start, + int32_t length, + UChar *dst, + int32_t dstStart) const; + + inline void doExtract(int32_t start, + int32_t length, + UnicodeString& target) const; + + inline UChar doCharAt(int32_t offset) const; + + UnicodeString& doReplace(int32_t start, + int32_t length, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength); + + UnicodeString& doReplace(int32_t start, + int32_t length, + const UChar *srcChars, + int32_t srcStart, + int32_t srcLength); + + UnicodeString& doReverse(int32_t start, + int32_t length); + + // calculate hash code + int32_t doHashCode(void) const; + + // get pointer to start of array + // these do not check for kOpenGetBuffer, unlike the public getBuffer() function + inline UChar* getArrayStart(void); + inline const UChar* getArrayStart(void) const; + + // A UnicodeString object (not necessarily its current buffer) + // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity). + inline UBool isWritable() const; + + // Is the current buffer writable? + inline UBool isBufferWritable() const; + + // None of the following does releaseArray(). + inline void setLength(int32_t len); // sets only fShortLength and fLength + inline void setToEmpty(); // sets fFlags=kShortString + inline void setArray(UChar *array, int32_t len, int32_t capacity); // does not set fFlags + + // allocate the array; result may be fStackBuffer + // sets refCount to 1 if appropriate + // sets fArray, fCapacity, and fFlags + // returns boolean for success or failure + UBool allocate(int32_t capacity); + + // release the array if owned + void releaseArray(void); + + // turn a bogus string into an empty one + void unBogus(); + + // implements assigment operator, copy constructor, and fastCopyFrom() + UnicodeString ©From(const UnicodeString &src, UBool fastCopy=FALSE); + + // Pin start and limit to acceptable values. + inline void pinIndex(int32_t& start) const; + inline void pinIndices(int32_t& start, + int32_t& length) const; + +#if !UCONFIG_NO_CONVERSION + + /* Internal extract() using UConverter. */ + int32_t doExtract(int32_t start, int32_t length, + char *dest, int32_t destCapacity, + UConverter *cnv, + UErrorCode &errorCode) const; + + /* + * Real constructor for converting from codepage data. + * It assumes that it is called with !fRefCounted. + * + * If <code>codepage==0</code>, then the default converter + * is used for the platform encoding. + * If <code>codepage</code> is an empty string (<code>""</code>), + * then a simple conversion is performed on the codepage-invariant + * subset ("invariant characters") of the platform encoding. See utypes.h. + */ + void doCodepageCreate(const char *codepageData, + int32_t dataLength, + const char *codepage); + + /* + * Worker function for creating a UnicodeString from + * a codepage string using a UConverter. + */ + void + doCodepageCreate(const char *codepageData, + int32_t dataLength, + UConverter *converter, + UErrorCode &status); + +#endif + + /* + * This function is called when write access to the array + * is necessary. + * + * We need to make a copy of the array if + * the buffer is read-only, or + * the buffer is refCounted (shared), and refCount>1, or + * the buffer is too small. + * + * Return FALSE if memory could not be allocated. + */ + UBool cloneArrayIfNeeded(int32_t newCapacity = -1, + int32_t growCapacity = -1, + UBool doCopyArray = TRUE, + int32_t **pBufferToDelete = 0, + UBool forceClone = FALSE); + + /** + * Common function for UnicodeString case mappings. + * The stringCaseMapper has the same type UStringCaseMapper + * as in ustr_imp.h for ustrcase_map(). + */ + UnicodeString & + caseMap(const UCaseMap *csm, UStringCaseMapper *stringCaseMapper); + + // ref counting + void addRef(void); + int32_t removeRef(void); + int32_t refCount(void) const; + + // constants + enum { + // Set the stack buffer size so that sizeof(UnicodeString) is, + // naturally (without padding), a multiple of sizeof(pointer). + US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for short strings + kInvalidUChar=0xffff, // invalid UChar index + kGrowSize=128, // grow size for this buffer + kInvalidHashCode=0, // invalid hash code + kEmptyHashCode=1, // hash code for empty string + + // bit flag values for fFlags + kIsBogus=1, // this string is bogus, i.e., not valid or NULL + kUsingStackBuffer=2,// using fUnion.fStackBuffer instead of fUnion.fFields + kRefCounted=4, // there is a refCount field before the characters in fArray + kBufferIsReadonly=8,// do not write to this buffer + kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"), + // and releaseBuffer(newLength) must be called + + // combined values for convenience + kShortString=kUsingStackBuffer, + kLongString=kRefCounted, + kReadonlyAlias=kBufferIsReadonly, + kWritableAlias=0 + }; + + friend class StringThreadTest; + friend class UnicodeStringAppendable; + + union StackBufferOrFields; // forward declaration necessary before friend declaration + friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion + + /* + * The following are all the class fields that are stored + * in each UnicodeString object. + * Note that UnicodeString has virtual functions, + * therefore there is an implicit vtable pointer + * as the first real field. + * The fields should be aligned such that no padding is necessary. + * On 32-bit machines, the size should be 32 bytes, + * on 64-bit machines (8-byte pointers), it should be 40 bytes. + * + * We use a hack to achieve this. + * + * With at least some compilers, each of the following is forced to + * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer], + * rounded up with additional padding if the fields do not already fit that requirement: + * - sizeof(class UnicodeString) + * - offsetof(UnicodeString, fUnion) + * - sizeof(fUnion) + * - sizeof(fFields) + * + * In order to avoid padding, we make sizeof(fStackBuffer)=16 (=8 UChars) + * which is at least as large as sizeof(fFields) on 32-bit and 64-bit machines. + * (Padding at the end of fFields is ok: + * As long as there is no padding after fStackBuffer, it is not wasted space.) + * + * We further assume that the compiler does not reorder the fields, + * so that fRestOfStackBuffer (which holds a few more UChars) immediately follows after fUnion, + * with at most some padding (but no other field) in between. + * (Padding there would be wasted space, but functionally harmless.) + * + * We use a few more sizeof(pointer)'s chunks of space with + * fRestOfStackBuffer, fShortLength and fFlags, + * to get up exactly to the intended sizeof(UnicodeString). + */ + // (implicit) *vtable; + union StackBufferOrFields { + // fStackBuffer is used iff (fFlags&kUsingStackBuffer) + // else fFields is used + UChar fStackBuffer[8]; // buffer for short strings, together with fRestOfStackBuffer + struct { + UChar *fArray; // the Unicode data + int32_t fCapacity; // capacity of fArray (in UChars) + int32_t fLength; // number of characters in fArray if >127; else undefined + } fFields; + } fUnion; + UChar fRestOfStackBuffer[US_STACKBUF_SIZE-8]; + int8_t fShortLength; // 0..127: length <0: real length is in fUnion.fFields.fLength + uint8_t fFlags; // bit flags: see constants above +}; + +/** + * Create a new UnicodeString with the concatenation of two others. + * + * @param s1 The first string to be copied to the new one. + * @param s2 The second string to be copied to the new one, after s1. + * @return UnicodeString(s1).append(s2) + * @stable ICU 2.8 + */ +U_COMMON_API UnicodeString U_EXPORT2 +operator+ (const UnicodeString &s1, const UnicodeString &s2); + +//======================================== +// Inline members +//======================================== + +//======================================== +// Privates +//======================================== + +inline void +UnicodeString::pinIndex(int32_t& start) const +{ + // pin index + if(start < 0) { + start = 0; + } else if(start > length()) { + start = length(); + } +} + +inline void +UnicodeString::pinIndices(int32_t& start, + int32_t& _length) const +{ + // pin indices + int32_t len = length(); + if(start < 0) { + start = 0; + } else if(start > len) { + start = len; + } + if(_length < 0) { + _length = 0; + } else if(_length > (len - start)) { + _length = (len - start); + } +} + +inline UChar* +UnicodeString::getArrayStart() +{ return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; } + +inline const UChar* +UnicodeString::getArrayStart() const +{ return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; } + +//======================================== +// Default constructor +//======================================== + +inline +UnicodeString::UnicodeString() + : fShortLength(0), + fFlags(kShortString) +{} + +//======================================== +// Read-only implementation methods +//======================================== +inline int32_t +UnicodeString::length() const +{ return fShortLength>=0 ? fShortLength : fUnion.fFields.fLength; } + +inline int32_t +UnicodeString::getCapacity() const +{ return (fFlags&kUsingStackBuffer) ? US_STACKBUF_SIZE : fUnion.fFields.fCapacity; } + +inline int32_t +UnicodeString::hashCode() const +{ return doHashCode(); } + +inline UBool +UnicodeString::isBogus() const +{ return (UBool)(fFlags & kIsBogus); } + +inline UBool +UnicodeString::isWritable() const +{ return (UBool)!(fFlags&(kOpenGetBuffer|kIsBogus)); } + +inline UBool +UnicodeString::isBufferWritable() const +{ + return (UBool)( + !(fFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) && + (!(fFlags&kRefCounted) || refCount()==1)); +} + +inline const UChar * +UnicodeString::getBuffer() const { + if(fFlags&(kIsBogus|kOpenGetBuffer)) { + return 0; + } else if(fFlags&kUsingStackBuffer) { + return fUnion.fStackBuffer; + } else { + return fUnion.fFields.fArray; + } +} + +//======================================== +// Read-only alias methods +//======================================== +inline int8_t +UnicodeString::doCompare(int32_t start, + int32_t thisLength, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength) const +{ + if(srcText.isBogus()) { + return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise + } else { + srcText.pinIndices(srcStart, srcLength); + return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength); + } +} + +inline UBool +UnicodeString::operator== (const UnicodeString& text) const +{ + if(isBogus()) { + return text.isBogus(); + } else { + int32_t len = length(), textLength = text.length(); + return !text.isBogus() && len == textLength && doEquals(text, len); + } +} + +inline UBool +UnicodeString::operator!= (const UnicodeString& text) const +{ return (! operator==(text)); } + +inline UBool +UnicodeString::operator> (const UnicodeString& text) const +{ return doCompare(0, length(), text, 0, text.length()) == 1; } + +inline UBool +UnicodeString::operator< (const UnicodeString& text) const +{ return doCompare(0, length(), text, 0, text.length()) == -1; } + +inline UBool +UnicodeString::operator>= (const UnicodeString& text) const +{ return doCompare(0, length(), text, 0, text.length()) != -1; } + +inline UBool +UnicodeString::operator<= (const UnicodeString& text) const +{ return doCompare(0, length(), text, 0, text.length()) != 1; } + +inline int8_t +UnicodeString::compare(const UnicodeString& text) const +{ return doCompare(0, length(), text, 0, text.length()); } + +inline int8_t +UnicodeString::compare(int32_t start, + int32_t _length, + const UnicodeString& srcText) const +{ return doCompare(start, _length, srcText, 0, srcText.length()); } + +inline int8_t +UnicodeString::compare(const UChar *srcChars, + int32_t srcLength) const +{ return doCompare(0, length(), srcChars, 0, srcLength); } + +inline int8_t +UnicodeString::compare(int32_t start, + int32_t _length, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength) const +{ return doCompare(start, _length, srcText, srcStart, srcLength); } + +inline int8_t +UnicodeString::compare(int32_t start, + int32_t _length, + const UChar *srcChars) const +{ return doCompare(start, _length, srcChars, 0, _length); } + +inline int8_t +UnicodeString::compare(int32_t start, + int32_t _length, + const UChar *srcChars, + int32_t srcStart, + int32_t srcLength) const +{ return doCompare(start, _length, srcChars, srcStart, srcLength); } + +inline int8_t +UnicodeString::compareBetween(int32_t start, + int32_t limit, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLimit) const +{ return doCompare(start, limit - start, + srcText, srcStart, srcLimit - srcStart); } + +inline int8_t +UnicodeString::doCompareCodePointOrder(int32_t start, + int32_t thisLength, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength) const +{ + if(srcText.isBogus()) { + return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise + } else { + srcText.pinIndices(srcStart, srcLength); + return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength); + } +} + +inline int8_t +UnicodeString::compareCodePointOrder(const UnicodeString& text) const +{ return doCompareCodePointOrder(0, length(), text, 0, text.length()); } + +inline int8_t +UnicodeString::compareCodePointOrder(int32_t start, + int32_t _length, + const UnicodeString& srcText) const +{ return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); } + +inline int8_t +UnicodeString::compareCodePointOrder(const UChar *srcChars, + int32_t srcLength) const +{ return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); } + +inline int8_t +UnicodeString::compareCodePointOrder(int32_t start, + int32_t _length, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength) const +{ return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); } + +inline int8_t +UnicodeString::compareCodePointOrder(int32_t start, + int32_t _length, + const UChar *srcChars) const +{ return doCompareCodePointOrder(start, _length, srcChars, 0, _length); } + +inline int8_t +UnicodeString::compareCodePointOrder(int32_t start, + int32_t _length, + const UChar *srcChars, + int32_t srcStart, + int32_t srcLength) const +{ return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); } + +inline int8_t +UnicodeString::compareCodePointOrderBetween(int32_t start, + int32_t limit, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLimit) const +{ return doCompareCodePointOrder(start, limit - start, + srcText, srcStart, srcLimit - srcStart); } + +inline int8_t +UnicodeString::doCaseCompare(int32_t start, + int32_t thisLength, + const UnicodeString &srcText, + int32_t srcStart, + int32_t srcLength, + uint32_t options) const +{ + if(srcText.isBogus()) { + return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise + } else { + srcText.pinIndices(srcStart, srcLength); + return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options); + } +} + +inline int8_t +UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const { + return doCaseCompare(0, length(), text, 0, text.length(), options); +} + +inline int8_t +UnicodeString::caseCompare(int32_t start, + int32_t _length, + const UnicodeString &srcText, + uint32_t options) const { + return doCaseCompare(start, _length, srcText, 0, srcText.length(), options); +} + +inline int8_t +UnicodeString::caseCompare(const UChar *srcChars, + int32_t srcLength, + uint32_t options) const { + return doCaseCompare(0, length(), srcChars, 0, srcLength, options); +} + +inline int8_t +UnicodeString::caseCompare(int32_t start, + int32_t _length, + const UnicodeString &srcText, + int32_t srcStart, + int32_t srcLength, + uint32_t options) const { + return doCaseCompare(start, _length, srcText, srcStart, srcLength, options); +} + +inline int8_t +UnicodeString::caseCompare(int32_t start, + int32_t _length, + const UChar *srcChars, + uint32_t options) const { + return doCaseCompare(start, _length, srcChars, 0, _length, options); +} + +inline int8_t +UnicodeString::caseCompare(int32_t start, + int32_t _length, + const UChar *srcChars, + int32_t srcStart, + int32_t srcLength, + uint32_t options) const { + return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options); +} + +inline int8_t +UnicodeString::caseCompareBetween(int32_t start, + int32_t limit, + const UnicodeString &srcText, + int32_t srcStart, + int32_t srcLimit, + uint32_t options) const { + return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options); +} + +inline int32_t +UnicodeString::indexOf(const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength, + int32_t start, + int32_t _length) const +{ + if(!srcText.isBogus()) { + srcText.pinIndices(srcStart, srcLength); + if(srcLength > 0) { + return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length); + } + } + return -1; +} + +inline int32_t +UnicodeString::indexOf(const UnicodeString& text) const +{ return indexOf(text, 0, text.length(), 0, length()); } + +inline int32_t +UnicodeString::indexOf(const UnicodeString& text, + int32_t start) const { + pinIndex(start); + return indexOf(text, 0, text.length(), start, length() - start); +} + +inline int32_t +UnicodeString::indexOf(const UnicodeString& text, + int32_t start, + int32_t _length) const +{ return indexOf(text, 0, text.length(), start, _length); } + +inline int32_t +UnicodeString::indexOf(const UChar *srcChars, + int32_t srcLength, + int32_t start) const { + pinIndex(start); + return indexOf(srcChars, 0, srcLength, start, length() - start); +} + +inline int32_t +UnicodeString::indexOf(const UChar *srcChars, + int32_t srcLength, + int32_t start, + int32_t _length) const +{ return indexOf(srcChars, 0, srcLength, start, _length); } + +inline int32_t +UnicodeString::indexOf(UChar c, + int32_t start, + int32_t _length) const +{ return doIndexOf(c, start, _length); } + +inline int32_t +UnicodeString::indexOf(UChar32 c, + int32_t start, + int32_t _length) const +{ return doIndexOf(c, start, _length); } + +inline int32_t +UnicodeString::indexOf(UChar c) const +{ return doIndexOf(c, 0, length()); } + +inline int32_t +UnicodeString::indexOf(UChar32 c) const +{ return indexOf(c, 0, length()); } + +inline int32_t +UnicodeString::indexOf(UChar c, + int32_t start) const { + pinIndex(start); + return doIndexOf(c, start, length() - start); +} + +inline int32_t +UnicodeString::indexOf(UChar32 c, + int32_t start) const { + pinIndex(start); + return indexOf(c, start, length() - start); +} + +inline int32_t +UnicodeString::lastIndexOf(const UChar *srcChars, + int32_t srcLength, + int32_t start, + int32_t _length) const +{ return lastIndexOf(srcChars, 0, srcLength, start, _length); } + +inline int32_t +UnicodeString::lastIndexOf(const UChar *srcChars, + int32_t srcLength, + int32_t start) const { + pinIndex(start); + return lastIndexOf(srcChars, 0, srcLength, start, length() - start); +} + +inline int32_t +UnicodeString::lastIndexOf(const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength, + int32_t start, + int32_t _length) const +{ + if(!srcText.isBogus()) { + srcText.pinIndices(srcStart, srcLength); + if(srcLength > 0) { + return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length); + } + } + return -1; +} + +inline int32_t +UnicodeString::lastIndexOf(const UnicodeString& text, + int32_t start, + int32_t _length) const +{ return lastIndexOf(text, 0, text.length(), start, _length); } + +inline int32_t +UnicodeString::lastIndexOf(const UnicodeString& text, + int32_t start) const { + pinIndex(start); + return lastIndexOf(text, 0, text.length(), start, length() - start); +} + +inline int32_t +UnicodeString::lastIndexOf(const UnicodeString& text) const +{ return lastIndexOf(text, 0, text.length(), 0, length()); } + +inline int32_t +UnicodeString::lastIndexOf(UChar c, + int32_t start, + int32_t _length) const +{ return doLastIndexOf(c, start, _length); } + +inline int32_t +UnicodeString::lastIndexOf(UChar32 c, + int32_t start, + int32_t _length) const { + return doLastIndexOf(c, start, _length); +} + +inline int32_t +UnicodeString::lastIndexOf(UChar c) const +{ return doLastIndexOf(c, 0, length()); } + +inline int32_t +UnicodeString::lastIndexOf(UChar32 c) const { + return lastIndexOf(c, 0, length()); +} + +inline int32_t +UnicodeString::lastIndexOf(UChar c, + int32_t start) const { + pinIndex(start); + return doLastIndexOf(c, start, length() - start); +} + +inline int32_t +UnicodeString::lastIndexOf(UChar32 c, + int32_t start) const { + pinIndex(start); + return lastIndexOf(c, start, length() - start); +} + +inline UBool +UnicodeString::startsWith(const UnicodeString& text) const +{ return compare(0, text.length(), text, 0, text.length()) == 0; } + +inline UBool +UnicodeString::startsWith(const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength) const +{ return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; } + +inline UBool +UnicodeString::startsWith(const UChar *srcChars, int32_t srcLength) const { + if(srcLength < 0) { + srcLength = u_strlen(srcChars); + } + return doCompare(0, srcLength, srcChars, 0, srcLength) == 0; +} + +inline UBool +UnicodeString::startsWith(const UChar *srcChars, int32_t srcStart, int32_t srcLength) const { + if(srcLength < 0) { + srcLength = u_strlen(srcChars); + } + return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0; +} + +inline UBool +UnicodeString::endsWith(const UnicodeString& text) const +{ return doCompare(length() - text.length(), text.length(), + text, 0, text.length()) == 0; } + +inline UBool +UnicodeString::endsWith(const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength) const { + srcText.pinIndices(srcStart, srcLength); + return doCompare(length() - srcLength, srcLength, + srcText, srcStart, srcLength) == 0; +} + +inline UBool +UnicodeString::endsWith(const UChar *srcChars, + int32_t srcLength) const { + if(srcLength < 0) { + srcLength = u_strlen(srcChars); + } + return doCompare(length() - srcLength, srcLength, + srcChars, 0, srcLength) == 0; +} + +inline UBool +UnicodeString::endsWith(const UChar *srcChars, + int32_t srcStart, + int32_t srcLength) const { + if(srcLength < 0) { + srcLength = u_strlen(srcChars + srcStart); + } + return doCompare(length() - srcLength, srcLength, + srcChars, srcStart, srcLength) == 0; +} + +//======================================== +// replace +//======================================== +inline UnicodeString& +UnicodeString::replace(int32_t start, + int32_t _length, + const UnicodeString& srcText) +{ return doReplace(start, _length, srcText, 0, srcText.length()); } + +inline UnicodeString& +UnicodeString::replace(int32_t start, + int32_t _length, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength) +{ return doReplace(start, _length, srcText, srcStart, srcLength); } + +inline UnicodeString& +UnicodeString::replace(int32_t start, + int32_t _length, + const UChar *srcChars, + int32_t srcLength) +{ return doReplace(start, _length, srcChars, 0, srcLength); } + +inline UnicodeString& +UnicodeString::replace(int32_t start, + int32_t _length, + const UChar *srcChars, + int32_t srcStart, + int32_t srcLength) +{ return doReplace(start, _length, srcChars, srcStart, srcLength); } + +inline UnicodeString& +UnicodeString::replace(int32_t start, + int32_t _length, + UChar srcChar) +{ return doReplace(start, _length, &srcChar, 0, 1); } + +inline UnicodeString& +UnicodeString::replaceBetween(int32_t start, + int32_t limit, + const UnicodeString& srcText) +{ return doReplace(start, limit - start, srcText, 0, srcText.length()); } + +inline UnicodeString& +UnicodeString::replaceBetween(int32_t start, + int32_t limit, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLimit) +{ return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); } + +inline UnicodeString& +UnicodeString::findAndReplace(const UnicodeString& oldText, + const UnicodeString& newText) +{ return findAndReplace(0, length(), oldText, 0, oldText.length(), + newText, 0, newText.length()); } + +inline UnicodeString& +UnicodeString::findAndReplace(int32_t start, + int32_t _length, + const UnicodeString& oldText, + const UnicodeString& newText) +{ return findAndReplace(start, _length, oldText, 0, oldText.length(), + newText, 0, newText.length()); } + +// ============================ +// extract +// ============================ +inline void +UnicodeString::doExtract(int32_t start, + int32_t _length, + UnicodeString& target) const +{ target.replace(0, target.length(), *this, start, _length); } + +inline void +UnicodeString::extract(int32_t start, + int32_t _length, + UChar *target, + int32_t targetStart) const +{ doExtract(start, _length, target, targetStart); } + +inline void +UnicodeString::extract(int32_t start, + int32_t _length, + UnicodeString& target) const +{ doExtract(start, _length, target); } + +#if !UCONFIG_NO_CONVERSION + +inline int32_t +UnicodeString::extract(int32_t start, + int32_t _length, + char *dst, + const char *codepage) const + +{ + // This dstSize value will be checked explicitly + return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage); +} + +#endif + +inline void +UnicodeString::extractBetween(int32_t start, + int32_t limit, + UChar *dst, + int32_t dstStart) const { + pinIndex(start); + pinIndex(limit); + doExtract(start, limit - start, dst, dstStart); +} + +inline UnicodeString +UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const { + return tempSubString(start, limit - start); +} + +inline UChar +UnicodeString::doCharAt(int32_t offset) const +{ + if((uint32_t)offset < (uint32_t)length()) { + return getArrayStart()[offset]; + } else { + return kInvalidUChar; + } +} + +inline UChar +UnicodeString::charAt(int32_t offset) const +{ return doCharAt(offset); } + +inline UChar +UnicodeString::operator[] (int32_t offset) const +{ return doCharAt(offset); } + +inline UBool +UnicodeString::isEmpty() const { + return fShortLength == 0; +} + +//======================================== +// Write implementation methods +//======================================== +inline void +UnicodeString::setLength(int32_t len) { + if(len <= 127) { + fShortLength = (int8_t)len; + } else { + fShortLength = (int8_t)-1; + fUnion.fFields.fLength = len; + } +} + +inline void +UnicodeString::setToEmpty() { + fShortLength = 0; + fFlags = kShortString; +} + +inline void +UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) { + setLength(len); + fUnion.fFields.fArray = array; + fUnion.fFields.fCapacity = capacity; +} + +inline UnicodeString& +UnicodeString::operator= (UChar ch) +{ return doReplace(0, length(), &ch, 0, 1); } + +inline UnicodeString& +UnicodeString::operator= (UChar32 ch) +{ return replace(0, length(), ch); } + +inline UnicodeString& +UnicodeString::setTo(const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength) +{ + unBogus(); + return doReplace(0, length(), srcText, srcStart, srcLength); +} + +inline UnicodeString& +UnicodeString::setTo(const UnicodeString& srcText, + int32_t srcStart) +{ + unBogus(); + srcText.pinIndex(srcStart); + return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart); +} + +inline UnicodeString& +UnicodeString::setTo(const UnicodeString& srcText) +{ + return copyFrom(srcText); +} + +inline UnicodeString& +UnicodeString::setTo(const UChar *srcChars, + int32_t srcLength) +{ + unBogus(); + return doReplace(0, length(), srcChars, 0, srcLength); +} + +inline UnicodeString& +UnicodeString::setTo(UChar srcChar) +{ + unBogus(); + return doReplace(0, length(), &srcChar, 0, 1); +} + +inline UnicodeString& +UnicodeString::setTo(UChar32 srcChar) +{ + unBogus(); + return replace(0, length(), srcChar); +} + +inline UnicodeString& +UnicodeString::append(const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength) +{ return doReplace(length(), 0, srcText, srcStart, srcLength); } + +inline UnicodeString& +UnicodeString::append(const UnicodeString& srcText) +{ return doReplace(length(), 0, srcText, 0, srcText.length()); } + +inline UnicodeString& +UnicodeString::append(const UChar *srcChars, + int32_t srcStart, + int32_t srcLength) +{ return doReplace(length(), 0, srcChars, srcStart, srcLength); } + +inline UnicodeString& +UnicodeString::append(const UChar *srcChars, + int32_t srcLength) +{ return doReplace(length(), 0, srcChars, 0, srcLength); } + +inline UnicodeString& +UnicodeString::append(UChar srcChar) +{ return doReplace(length(), 0, &srcChar, 0, 1); } + +inline UnicodeString& +UnicodeString::operator+= (UChar ch) +{ return doReplace(length(), 0, &ch, 0, 1); } + +inline UnicodeString& +UnicodeString::operator+= (UChar32 ch) { + return append(ch); +} + +inline UnicodeString& +UnicodeString::operator+= (const UnicodeString& srcText) +{ return doReplace(length(), 0, srcText, 0, srcText.length()); } + +inline UnicodeString& +UnicodeString::insert(int32_t start, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength) +{ return doReplace(start, 0, srcText, srcStart, srcLength); } + +inline UnicodeString& +UnicodeString::insert(int32_t start, + const UnicodeString& srcText) +{ return doReplace(start, 0, srcText, 0, srcText.length()); } + +inline UnicodeString& +UnicodeString::insert(int32_t start, + const UChar *srcChars, + int32_t srcStart, + int32_t srcLength) +{ return doReplace(start, 0, srcChars, srcStart, srcLength); } + +inline UnicodeString& +UnicodeString::insert(int32_t start, + const UChar *srcChars, + int32_t srcLength) +{ return doReplace(start, 0, srcChars, 0, srcLength); } + +inline UnicodeString& +UnicodeString::insert(int32_t start, + UChar srcChar) +{ return doReplace(start, 0, &srcChar, 0, 1); } + +inline UnicodeString& +UnicodeString::insert(int32_t start, + UChar32 srcChar) +{ return replace(start, 0, srcChar); } + + +inline UnicodeString& +UnicodeString::remove() +{ + // remove() of a bogus string makes the string empty and non-bogus + if(isBogus()) { + setToEmpty(); + } else { + fShortLength = 0; + } + return *this; +} + +inline UnicodeString& +UnicodeString::remove(int32_t start, + int32_t _length) +{ + if(start <= 0 && _length == INT32_MAX) { + // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus + return remove(); + } + return doReplace(start, _length, NULL, 0, 0); +} + +inline UnicodeString& +UnicodeString::removeBetween(int32_t start, + int32_t limit) +{ return doReplace(start, limit - start, NULL, 0, 0); } + +inline UnicodeString & +UnicodeString::retainBetween(int32_t start, int32_t limit) { + truncate(limit); + return doReplace(0, start, NULL, 0, 0); +} + +inline UBool +UnicodeString::truncate(int32_t targetLength) +{ + if(isBogus() && targetLength == 0) { + // truncate(0) of a bogus string makes the string empty and non-bogus + unBogus(); + return FALSE; + } else if((uint32_t)targetLength < (uint32_t)length()) { + setLength(targetLength); + return TRUE; + } else { + return FALSE; + } +} + +inline UnicodeString& +UnicodeString::reverse() +{ return doReverse(0, length()); } + +inline UnicodeString& +UnicodeString::reverse(int32_t start, + int32_t _length) +{ return doReverse(start, _length); } + +U_NAMESPACE_END + +#endif diff --git a/Source/WTF/icu/unicode/unorm.h b/Source/WTF/icu/unicode/unorm.h new file mode 100644 index 000000000..fbb7b49b3 --- /dev/null +++ b/Source/WTF/icu/unicode/unorm.h @@ -0,0 +1,561 @@ +/* +******************************************************************************* +* Copyright (c) 1996-2010, International Business Machines Corporation +* and others. All Rights Reserved. +******************************************************************************* +* File unorm.h +* +* Created by: Vladimir Weinstein 12052000 +* +* Modification history : +* +* Date Name Description +* 02/01/01 synwee Added normalization quickcheck enum and method. +*/ +#ifndef UNORM_H +#define UNORM_H + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_NORMALIZATION + +#include "unicode/uiter.h" +#include "unicode/unorm2.h" + +/** + * \file + * \brief C API: Unicode Normalization + * + * <h2>Unicode normalization API</h2> + * + * Note: This API has been replaced by the unorm2.h API and is only available + * for backward compatibility. The functions here simply delegate to the + * unorm2.h functions, for example unorm2_getInstance() and unorm2_normalize(). + * There is one exception: The new API does not provide a replacement for unorm_compare(). + * + * <code>unorm_normalize</code> transforms Unicode text into an equivalent composed or + * decomposed form, allowing for easier sorting and searching of text. + * <code>unorm_normalize</code> supports the standard normalization forms described in + * <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode"> + * Unicode Standard Annex #15: Unicode Normalization Forms</a>. + * + * Characters with accents or other adornments can be encoded in + * several different ways in Unicode. For example, take the character A-acute. + * In Unicode, this can be encoded as a single character (the + * "composed" form): + * + * \code + * 00C1 LATIN CAPITAL LETTER A WITH ACUTE + * \endcode + * + * or as two separate characters (the "decomposed" form): + * + * \code + * 0041 LATIN CAPITAL LETTER A + * 0301 COMBINING ACUTE ACCENT + * \endcode + * + * To a user of your program, however, both of these sequences should be + * treated as the same "user-level" character "A with acute accent". When you are searching or + * comparing text, you must ensure that these two sequences are treated + * equivalently. In addition, you must handle characters with more than one + * accent. Sometimes the order of a character's combining accents is + * significant, while in other cases accent sequences in different orders are + * really equivalent. + * + * Similarly, the string "ffi" can be encoded as three separate letters: + * + * \code + * 0066 LATIN SMALL LETTER F + * 0066 LATIN SMALL LETTER F + * 0069 LATIN SMALL LETTER I + * \endcode + * + * or as the single character + * + * \code + * FB03 LATIN SMALL LIGATURE FFI + * \endcode + * + * The ffi ligature is not a distinct semantic character, and strictly speaking + * it shouldn't be in Unicode at all, but it was included for compatibility + * with existing character sets that already provided it. The Unicode standard + * identifies such characters by giving them "compatibility" decompositions + * into the corresponding semantic characters. When sorting and searching, you + * will often want to use these mappings. + * + * <code>unorm_normalize</code> helps solve these problems by transforming text into the + * canonical composed and decomposed forms as shown in the first example above. + * In addition, you can have it perform compatibility decompositions so that + * you can treat compatibility characters the same as their equivalents. + * Finally, <code>unorm_normalize</code> rearranges accents into the proper canonical + * order, so that you do not have to worry about accent rearrangement on your + * own. + * + * Form FCD, "Fast C or D", is also designed for collation. + * It allows to work on strings that are not necessarily normalized + * with an algorithm (like in collation) that works under "canonical closure", i.e., it treats precomposed + * characters and their decomposed equivalents the same. + * + * It is not a normalization form because it does not provide for uniqueness of representation. Multiple strings + * may be canonically equivalent (their NFDs are identical) and may all conform to FCD without being identical + * themselves. + * + * The form is defined such that the "raw decomposition", the recursive canonical decomposition of each character, + * results in a string that is canonically ordered. This means that precomposed characters are allowed for as long + * as their decompositions do not need canonical reordering. + * + * Its advantage for a process like collation is that all NFD and most NFC texts - and many unnormalized texts - + * already conform to FCD and do not need to be normalized (NFD) for such a process. The FCD quick check will + * return UNORM_YES for most strings in practice. + * + * unorm_normalize(UNORM_FCD) may be implemented with UNORM_NFD. + * + * For more details on FCD see the collation design document: + * http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm + * + * ICU collation performs either NFD or FCD normalization automatically if normalization + * is turned on for the collator object. + * Beyond collation and string search, normalized strings may be useful for string equivalence comparisons, + * transliteration/transcription, unique representations, etc. + * + * The W3C generally recommends to exchange texts in NFC. + * Note also that most legacy character encodings use only precomposed forms and often do not + * encode any combining marks by themselves. For conversion to such character encodings the + * Unicode text needs to be normalized to NFC. + * For more usage examples, see the Unicode Standard Annex. + */ + +/** + * Constants for normalization modes. + * @stable ICU 2.0 + */ +typedef enum { + /** No decomposition/composition. @stable ICU 2.0 */ + UNORM_NONE = 1, + /** Canonical decomposition. @stable ICU 2.0 */ + UNORM_NFD = 2, + /** Compatibility decomposition. @stable ICU 2.0 */ + UNORM_NFKD = 3, + /** Canonical decomposition followed by canonical composition. @stable ICU 2.0 */ + UNORM_NFC = 4, + /** Default normalization. @stable ICU 2.0 */ + UNORM_DEFAULT = UNORM_NFC, + /** Compatibility decomposition followed by canonical composition. @stable ICU 2.0 */ + UNORM_NFKC =5, + /** "Fast C or D" form. @stable ICU 2.0 */ + UNORM_FCD = 6, + + /** One more than the highest normalization mode constant. @stable ICU 2.0 */ + UNORM_MODE_COUNT +} UNormalizationMode; + +/** + * Constants for options flags for normalization. + * Use 0 for default options, + * including normalization according to the Unicode version + * that is currently supported by ICU (see u_getUnicodeVersion). + * @stable ICU 2.6 + */ +enum { + /** + * Options bit set value to select Unicode 3.2 normalization + * (except NormalizationCorrections). + * At most one Unicode version can be selected at a time. + * @stable ICU 2.6 + */ + UNORM_UNICODE_3_2=0x20 +}; + +/** + * Lowest-order bit number of unorm_compare() options bits corresponding to + * normalization options bits. + * + * The options parameter for unorm_compare() uses most bits for + * itself and for various comparison and folding flags. + * The most significant bits, however, are shifted down and passed on + * to the normalization implementation. + * (That is, from unorm_compare(..., options, ...), + * options>>UNORM_COMPARE_NORM_OPTIONS_SHIFT will be passed on to the + * internal normalization functions.) + * + * @see unorm_compare + * @stable ICU 2.6 + */ +#define UNORM_COMPARE_NORM_OPTIONS_SHIFT 20 + +/** + * Normalize a string. + * The string will be normalized according the specified normalization mode + * and options. + * The source and result buffers must not be the same, nor overlap. + * + * @param source The string to normalize. + * @param sourceLength The length of source, or -1 if NUL-terminated. + * @param mode The normalization mode; one of UNORM_NONE, + * UNORM_NFD, UNORM_NFC, UNORM_NFKC, UNORM_NFKD, UNORM_DEFAULT. + * @param options The normalization options, ORed together (0 for no options). + * @param result A pointer to a buffer to receive the result string. + * The result string is NUL-terminated if possible. + * @param resultLength The maximum size of result. + * @param status A pointer to a UErrorCode to receive any errors. + * @return The total buffer size needed; if greater than resultLength, + * the output was truncated, and the error code is set to U_BUFFER_OVERFLOW_ERROR. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +unorm_normalize(const UChar *source, int32_t sourceLength, + UNormalizationMode mode, int32_t options, + UChar *result, int32_t resultLength, + UErrorCode *status); + +/** + * Performing quick check on a string, to quickly determine if the string is + * in a particular normalization format. + * Three types of result can be returned UNORM_YES, UNORM_NO or + * UNORM_MAYBE. Result UNORM_YES indicates that the argument + * string is in the desired normalized format, UNORM_NO determines that + * argument string is not in the desired normalized format. A + * UNORM_MAYBE result indicates that a more thorough check is required, + * the user may have to put the string in its normalized form and compare the + * results. + * + * @param source string for determining if it is in a normalized format + * @param sourcelength length of source to test, or -1 if NUL-terminated + * @param mode which normalization form to test for + * @param status a pointer to a UErrorCode to receive any errors + * @return UNORM_YES, UNORM_NO or UNORM_MAYBE + * + * @see unorm_isNormalized + * @stable ICU 2.0 + */ +U_STABLE UNormalizationCheckResult U_EXPORT2 +unorm_quickCheck(const UChar *source, int32_t sourcelength, + UNormalizationMode mode, + UErrorCode *status); + +/** + * Performing quick check on a string; same as unorm_quickCheck but + * takes an extra options parameter like most normalization functions. + * + * @param src String that is to be tested if it is in a normalization format. + * @param srcLength Length of source to test, or -1 if NUL-terminated. + * @param mode Which normalization form to test for. + * @param options The normalization options, ORed together (0 for no options). + * @param pErrorCode ICU error code in/out parameter. + * Must fulfill U_SUCCESS before the function call. + * @return UNORM_YES, UNORM_NO or UNORM_MAYBE + * + * @see unorm_quickCheck + * @see unorm_isNormalized + * @stable ICU 2.6 + */ +U_STABLE UNormalizationCheckResult U_EXPORT2 +unorm_quickCheckWithOptions(const UChar *src, int32_t srcLength, + UNormalizationMode mode, int32_t options, + UErrorCode *pErrorCode); + +/** + * Test if a string is in a given normalization form. + * This is semantically equivalent to source.equals(normalize(source, mode)) . + * + * Unlike unorm_quickCheck(), this function returns a definitive result, + * never a "maybe". + * For NFD, NFKD, and FCD, both functions work exactly the same. + * For NFC and NFKC where quickCheck may return "maybe", this function will + * perform further tests to arrive at a TRUE/FALSE result. + * + * @param src String that is to be tested if it is in a normalization format. + * @param srcLength Length of source to test, or -1 if NUL-terminated. + * @param mode Which normalization form to test for. + * @param pErrorCode ICU error code in/out parameter. + * Must fulfill U_SUCCESS before the function call. + * @return Boolean value indicating whether the source string is in the + * "mode" normalization form. + * + * @see unorm_quickCheck + * @stable ICU 2.2 + */ +U_STABLE UBool U_EXPORT2 +unorm_isNormalized(const UChar *src, int32_t srcLength, + UNormalizationMode mode, + UErrorCode *pErrorCode); + +/** + * Test if a string is in a given normalization form; same as unorm_isNormalized but + * takes an extra options parameter like most normalization functions. + * + * @param src String that is to be tested if it is in a normalization format. + * @param srcLength Length of source to test, or -1 if NUL-terminated. + * @param mode Which normalization form to test for. + * @param options The normalization options, ORed together (0 for no options). + * @param pErrorCode ICU error code in/out parameter. + * Must fulfill U_SUCCESS before the function call. + * @return Boolean value indicating whether the source string is in the + * "mode/options" normalization form. + * + * @see unorm_quickCheck + * @see unorm_isNormalized + * @stable ICU 2.6 + */ +U_STABLE UBool U_EXPORT2 +unorm_isNormalizedWithOptions(const UChar *src, int32_t srcLength, + UNormalizationMode mode, int32_t options, + UErrorCode *pErrorCode); + +/** + * Iterative normalization forward. + * This function (together with unorm_previous) is somewhat + * similar to the C++ Normalizer class (see its non-static functions). + * + * Iterative normalization is useful when only a small portion of a longer + * string/text needs to be processed. + * + * For example, the likelihood may be high that processing the first 10% of some + * text will be sufficient to find certain data. + * Another example: When one wants to concatenate two normalized strings and get a + * normalized result, it is much more efficient to normalize just a small part of + * the result around the concatenation place instead of re-normalizing everything. + * + * The input text is an instance of the C character iteration API UCharIterator. + * It may wrap around a simple string, a CharacterIterator, a Replaceable, or any + * other kind of text object. + * + * If a buffer overflow occurs, then the caller needs to reset the iterator to the + * old index and call the function again with a larger buffer - if the caller cares + * for the actual output. + * Regardless of the output buffer, the iterator will always be moved to the next + * normalization boundary. + * + * This function (like unorm_previous) serves two purposes: + * + * 1) To find the next boundary so that the normalization of the part of the text + * from the current position to that boundary does not affect and is not affected + * by the part of the text beyond that boundary. + * + * 2) To normalize the text up to the boundary. + * + * The second step is optional, per the doNormalize parameter. + * It is omitted for operations like string concatenation, where the two adjacent + * string ends need to be normalized together. + * In such a case, the output buffer will just contain a copy of the text up to the + * boundary. + * + * pNeededToNormalize is an output-only parameter. Its output value is only defined + * if normalization was requested (doNormalize) and successful (especially, no + * buffer overflow). + * It is useful for operations like a normalizing transliterator, where one would + * not want to replace a piece of text if it is not modified. + * + * If doNormalize==TRUE and pNeededToNormalize!=NULL then *pNeeded... is set TRUE + * if the normalization was necessary. + * + * If doNormalize==FALSE then *pNeededToNormalize will be set to FALSE. + * + * If the buffer overflows, then *pNeededToNormalize will be undefined; + * essentially, whenever U_FAILURE is true (like in buffer overflows), this result + * will be undefined. + * + * @param src The input text in the form of a C character iterator. + * @param dest The output buffer; can be NULL if destCapacity==0 for pure preflighting. + * @param destCapacity The number of UChars that fit into dest. + * @param mode The normalization mode. + * @param options The normalization options, ORed together (0 for no options). + * @param doNormalize Indicates if the source text up to the next boundary + * is to be normalized (TRUE) or just copied (FALSE). + * @param pNeededToNormalize Output flag indicating if the normalization resulted in + * different text from the input. + * Not defined if an error occurs including buffer overflow. + * Always FALSE if !doNormalize. + * @param pErrorCode ICU error code in/out parameter. + * Must fulfill U_SUCCESS before the function call. + * @return Length of output (number of UChars) when successful or buffer overflow. + * + * @see unorm_previous + * @see unorm_normalize + * + * @stable ICU 2.1 + */ +U_STABLE int32_t U_EXPORT2 +unorm_next(UCharIterator *src, + UChar *dest, int32_t destCapacity, + UNormalizationMode mode, int32_t options, + UBool doNormalize, UBool *pNeededToNormalize, + UErrorCode *pErrorCode); + +/** + * Iterative normalization backward. + * This function (together with unorm_next) is somewhat + * similar to the C++ Normalizer class (see its non-static functions). + * For all details see unorm_next. + * + * @param src The input text in the form of a C character iterator. + * @param dest The output buffer; can be NULL if destCapacity==0 for pure preflighting. + * @param destCapacity The number of UChars that fit into dest. + * @param mode The normalization mode. + * @param options The normalization options, ORed together (0 for no options). + * @param doNormalize Indicates if the source text up to the next boundary + * is to be normalized (TRUE) or just copied (FALSE). + * @param pNeededToNormalize Output flag indicating if the normalization resulted in + * different text from the input. + * Not defined if an error occurs including buffer overflow. + * Always FALSE if !doNormalize. + * @param pErrorCode ICU error code in/out parameter. + * Must fulfill U_SUCCESS before the function call. + * @return Length of output (number of UChars) when successful or buffer overflow. + * + * @see unorm_next + * @see unorm_normalize + * + * @stable ICU 2.1 + */ +U_STABLE int32_t U_EXPORT2 +unorm_previous(UCharIterator *src, + UChar *dest, int32_t destCapacity, + UNormalizationMode mode, int32_t options, + UBool doNormalize, UBool *pNeededToNormalize, + UErrorCode *pErrorCode); + +/** + * Concatenate normalized strings, making sure that the result is normalized as well. + * + * If both the left and the right strings are in + * the normalization form according to "mode/options", + * then the result will be + * + * \code + * dest=normalize(left+right, mode, options) + * \endcode + * + * With the input strings already being normalized, + * this function will use unorm_next() and unorm_previous() + * to find the adjacent end pieces of the input strings. + * Only the concatenation of these end pieces will be normalized and + * then concatenated with the remaining parts of the input strings. + * + * It is allowed to have dest==left to avoid copying the entire left string. + * + * @param left Left source string, may be same as dest. + * @param leftLength Length of left source string, or -1 if NUL-terminated. + * @param right Right source string. Must not be the same as dest, nor overlap. + * @param rightLength Length of right source string, or -1 if NUL-terminated. + * @param dest The output buffer; can be NULL if destCapacity==0 for pure preflighting. + * @param destCapacity The number of UChars that fit into dest. + * @param mode The normalization mode. + * @param options The normalization options, ORed together (0 for no options). + * @param pErrorCode ICU error code in/out parameter. + * Must fulfill U_SUCCESS before the function call. + * @return Length of output (number of UChars) when successful or buffer overflow. + * + * @see unorm_normalize + * @see unorm_next + * @see unorm_previous + * + * @stable ICU 2.1 + */ +U_STABLE int32_t U_EXPORT2 +unorm_concatenate(const UChar *left, int32_t leftLength, + const UChar *right, int32_t rightLength, + UChar *dest, int32_t destCapacity, + UNormalizationMode mode, int32_t options, + UErrorCode *pErrorCode); + +/** + * Option bit for unorm_compare: + * Both input strings are assumed to fulfill FCD conditions. + * @stable ICU 2.2 + */ +#define UNORM_INPUT_IS_FCD 0x20000 + +/** + * Option bit for unorm_compare: + * Perform case-insensitive comparison. + * @stable ICU 2.2 + */ +#define U_COMPARE_IGNORE_CASE 0x10000 + +#ifndef U_COMPARE_CODE_POINT_ORDER +/* see also unistr.h and ustring.h */ +/** + * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc: + * Compare strings in code point order instead of code unit order. + * @stable ICU 2.2 + */ +#define U_COMPARE_CODE_POINT_ORDER 0x8000 +#endif + +/** + * Compare two strings for canonical equivalence. + * Further options include case-insensitive comparison and + * code point order (as opposed to code unit order). + * + * Canonical equivalence between two strings is defined as their normalized + * forms (NFD or NFC) being identical. + * This function compares strings incrementally instead of normalizing + * (and optionally case-folding) both strings entirely, + * improving performance significantly. + * + * Bulk normalization is only necessary if the strings do not fulfill the FCD + * conditions. Only in this case, and only if the strings are relatively long, + * is memory allocated temporarily. + * For FCD strings and short non-FCD strings there is no memory allocation. + * + * Semantically, this is equivalent to + * strcmp[CodePointOrder](NFD(foldCase(NFD(s1))), NFD(foldCase(NFD(s2)))) + * where code point order and foldCase are all optional. + * + * UAX 21 2.5 Caseless Matching specifies that for a canonical caseless match + * the case folding must be performed first, then the normalization. + * + * @param s1 First source string. + * @param length1 Length of first source string, or -1 if NUL-terminated. + * + * @param s2 Second source string. + * @param length2 Length of second source string, or -1 if NUL-terminated. + * + * @param options A bit set of options: + * - U_FOLD_CASE_DEFAULT or 0 is used for default options: + * Case-sensitive comparison in code unit order, and the input strings + * are quick-checked for FCD. + * + * - UNORM_INPUT_IS_FCD + * Set if the caller knows that both s1 and s2 fulfill the FCD conditions. + * If not set, the function will quickCheck for FCD + * and normalize if necessary. + * + * - U_COMPARE_CODE_POINT_ORDER + * Set to choose code point order instead of code unit order + * (see u_strCompare for details). + * + * - U_COMPARE_IGNORE_CASE + * Set to compare strings case-insensitively using case folding, + * instead of case-sensitively. + * If set, then the following case folding options are used. + * + * - Options as used with case-insensitive comparisons, currently: + * + * - U_FOLD_CASE_EXCLUDE_SPECIAL_I + * (see u_strCaseCompare for details) + * + * - regular normalization options shifted left by UNORM_COMPARE_NORM_OPTIONS_SHIFT + * + * @param pErrorCode ICU error code in/out parameter. + * Must fulfill U_SUCCESS before the function call. + * @return <0 or 0 or >0 as usual for string comparisons + * + * @see unorm_normalize + * @see UNORM_FCD + * @see u_strCompare + * @see u_strCaseCompare + * + * @stable ICU 2.2 + */ +U_STABLE int32_t U_EXPORT2 +unorm_compare(const UChar *s1, int32_t length1, + const UChar *s2, int32_t length2, + uint32_t options, + UErrorCode *pErrorCode); + +#endif /* #if !UCONFIG_NO_NORMALIZATION */ + +#endif diff --git a/Source/WTF/icu/unicode/unorm2.h b/Source/WTF/icu/unicode/unorm2.h new file mode 100644 index 000000000..7152fc109 --- /dev/null +++ b/Source/WTF/icu/unicode/unorm2.h @@ -0,0 +1,528 @@ +/* +******************************************************************************* +* +* Copyright (C) 2009-2013, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: unorm2.h +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2009dec15 +* created by: Markus W. Scherer +*/ + +#ifndef __UNORM2_H__ +#define __UNORM2_H__ + +/** + * \file + * \brief C API: New API for Unicode Normalization. + * + * Unicode normalization functionality for standard Unicode normalization or + * for using custom mapping tables. + * All instances of UNormalizer2 are unmodifiable/immutable. + * Instances returned by unorm2_getInstance() are singletons that must not be deleted by the caller. + * For more details see the Normalizer2 C++ class. + */ + +#include "unicode/utypes.h" +#include "unicode/localpointer.h" +#include "unicode/uset.h" + +/** + * Constants for normalization modes. + * For details about standard Unicode normalization forms + * and about the algorithms which are also used with custom mapping tables + * see http://www.unicode.org/unicode/reports/tr15/ + * @stable ICU 4.4 + */ +typedef enum { + /** + * Decomposition followed by composition. + * Same as standard NFC when using an "nfc" instance. + * Same as standard NFKC when using an "nfkc" instance. + * For details about standard Unicode normalization forms + * see http://www.unicode.org/unicode/reports/tr15/ + * @stable ICU 4.4 + */ + UNORM2_COMPOSE, + /** + * Map, and reorder canonically. + * Same as standard NFD when using an "nfc" instance. + * Same as standard NFKD when using an "nfkc" instance. + * For details about standard Unicode normalization forms + * see http://www.unicode.org/unicode/reports/tr15/ + * @stable ICU 4.4 + */ + UNORM2_DECOMPOSE, + /** + * "Fast C or D" form. + * If a string is in this form, then further decomposition <i>without reordering</i> + * would yield the same form as DECOMPOSE. + * Text in "Fast C or D" form can be processed efficiently with data tables + * that are "canonically closed", that is, that provide equivalent data for + * equivalent text, without having to be fully normalized. + * Not a standard Unicode normalization form. + * Not a unique form: Different FCD strings can be canonically equivalent. + * For details see http://www.unicode.org/notes/tn5/#FCD + * @stable ICU 4.4 + */ + UNORM2_FCD, + /** + * Compose only contiguously. + * Also known as "FCC" or "Fast C Contiguous". + * The result will often but not always be in NFC. + * The result will conform to FCD which is useful for processing. + * Not a standard Unicode normalization form. + * For details see http://www.unicode.org/notes/tn5/#FCC + * @stable ICU 4.4 + */ + UNORM2_COMPOSE_CONTIGUOUS +} UNormalization2Mode; + +/** + * Result values for normalization quick check functions. + * For details see http://www.unicode.org/reports/tr15/#Detecting_Normalization_Forms + * @stable ICU 2.0 + */ +typedef enum UNormalizationCheckResult { + /** + * The input string is not in the normalization form. + * @stable ICU 2.0 + */ + UNORM_NO, + /** + * The input string is in the normalization form. + * @stable ICU 2.0 + */ + UNORM_YES, + /** + * The input string may or may not be in the normalization form. + * This value is only returned for composition forms like NFC and FCC, + * when a backward-combining character is found for which the surrounding text + * would have to be analyzed further. + * @stable ICU 2.0 + */ + UNORM_MAYBE +} UNormalizationCheckResult; + +/** + * Opaque C service object type for the new normalization API. + * @stable ICU 4.4 + */ +struct UNormalizer2; +typedef struct UNormalizer2 UNormalizer2; /**< C typedef for struct UNormalizer2. @stable ICU 4.4 */ + +#if !UCONFIG_NO_NORMALIZATION + +/** + * Returns a UNormalizer2 instance for Unicode NFC normalization. + * Same as unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE, pErrorCode). + * Returns an unmodifiable singleton instance. Do not delete it. + * @param pErrorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return the requested Normalizer2, if successful + * @stable ICU 49 + */ +U_STABLE const UNormalizer2 * U_EXPORT2 +unorm2_getNFCInstance(UErrorCode *pErrorCode); + +/** + * Returns a UNormalizer2 instance for Unicode NFD normalization. + * Same as unorm2_getInstance(NULL, "nfc", UNORM2_DECOMPOSE, pErrorCode). + * Returns an unmodifiable singleton instance. Do not delete it. + * @param pErrorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return the requested Normalizer2, if successful + * @stable ICU 49 + */ +U_STABLE const UNormalizer2 * U_EXPORT2 +unorm2_getNFDInstance(UErrorCode *pErrorCode); + +/** + * Returns a UNormalizer2 instance for Unicode NFKC normalization. + * Same as unorm2_getInstance(NULL, "nfkc", UNORM2_COMPOSE, pErrorCode). + * Returns an unmodifiable singleton instance. Do not delete it. + * @param pErrorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return the requested Normalizer2, if successful + * @stable ICU 49 + */ +U_STABLE const UNormalizer2 * U_EXPORT2 +unorm2_getNFKCInstance(UErrorCode *pErrorCode); + +/** + * Returns a UNormalizer2 instance for Unicode NFKD normalization. + * Same as unorm2_getInstance(NULL, "nfkc", UNORM2_DECOMPOSE, pErrorCode). + * Returns an unmodifiable singleton instance. Do not delete it. + * @param pErrorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return the requested Normalizer2, if successful + * @stable ICU 49 + */ +U_STABLE const UNormalizer2 * U_EXPORT2 +unorm2_getNFKDInstance(UErrorCode *pErrorCode); + +/** + * Returns a UNormalizer2 instance for Unicode NFKC_Casefold normalization. + * Same as unorm2_getInstance(NULL, "nfkc_cf", UNORM2_COMPOSE, pErrorCode). + * Returns an unmodifiable singleton instance. Do not delete it. + * @param pErrorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return the requested Normalizer2, if successful + * @stable ICU 49 + */ +U_STABLE const UNormalizer2 * U_EXPORT2 +unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode); + +/** + * Returns a UNormalizer2 instance which uses the specified data file + * (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle) + * and which composes or decomposes text according to the specified mode. + * Returns an unmodifiable singleton instance. Do not delete it. + * + * Use packageName=NULL for data files that are part of ICU's own data. + * Use name="nfc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFC/NFD. + * Use name="nfkc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFKC/NFKD. + * Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold. + * + * @param packageName NULL for ICU built-in data, otherwise application data package name + * @param name "nfc" or "nfkc" or "nfkc_cf" or name of custom data file + * @param mode normalization mode (compose or decompose etc.) + * @param pErrorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return the requested UNormalizer2, if successful + * @stable ICU 4.4 + */ +U_STABLE const UNormalizer2 * U_EXPORT2 +unorm2_getInstance(const char *packageName, + const char *name, + UNormalization2Mode mode, + UErrorCode *pErrorCode); + +/** + * Constructs a filtered normalizer wrapping any UNormalizer2 instance + * and a filter set. + * Both are aliased and must not be modified or deleted while this object + * is used. + * The filter set should be frozen; otherwise the performance will suffer greatly. + * @param norm2 wrapped UNormalizer2 instance + * @param filterSet USet which determines the characters to be normalized + * @param pErrorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return the requested UNormalizer2, if successful + * @stable ICU 4.4 + */ +U_STABLE UNormalizer2 * U_EXPORT2 +unorm2_openFiltered(const UNormalizer2 *norm2, const USet *filterSet, UErrorCode *pErrorCode); + +/** + * Closes a UNormalizer2 instance from unorm2_openFiltered(). + * Do not close instances from unorm2_getInstance()! + * @param norm2 UNormalizer2 instance to be closed + * @stable ICU 4.4 + */ +U_STABLE void U_EXPORT2 +unorm2_close(UNormalizer2 *norm2); + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalUNormalizer2Pointer + * "Smart pointer" class, closes a UNormalizer2 via unorm2_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @stable ICU 4.4 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalUNormalizer2Pointer, UNormalizer2, unorm2_close); + +U_NAMESPACE_END + +#endif + +/** + * Writes the normalized form of the source string to the destination string + * (replacing its contents) and returns the length of the destination string. + * The source and destination strings must be different buffers. + * @param norm2 UNormalizer2 instance + * @param src source string + * @param length length of the source string, or -1 if NUL-terminated + * @param dest destination string; its contents is replaced with normalized src + * @param capacity number of UChars that can be written to dest + * @param pErrorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return dest + * @stable ICU 4.4 + */ +U_STABLE int32_t U_EXPORT2 +unorm2_normalize(const UNormalizer2 *norm2, + const UChar *src, int32_t length, + UChar *dest, int32_t capacity, + UErrorCode *pErrorCode); +/** + * Appends the normalized form of the second string to the first string + * (merging them at the boundary) and returns the length of the first string. + * The result is normalized if the first string was normalized. + * The first and second strings must be different buffers. + * @param norm2 UNormalizer2 instance + * @param first string, should be normalized + * @param firstLength length of the first string, or -1 if NUL-terminated + * @param firstCapacity number of UChars that can be written to first + * @param second string, will be normalized + * @param secondLength length of the source string, or -1 if NUL-terminated + * @param pErrorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return first + * @stable ICU 4.4 + */ +U_STABLE int32_t U_EXPORT2 +unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2, + UChar *first, int32_t firstLength, int32_t firstCapacity, + const UChar *second, int32_t secondLength, + UErrorCode *pErrorCode); +/** + * Appends the second string to the first string + * (merging them at the boundary) and returns the length of the first string. + * The result is normalized if both the strings were normalized. + * The first and second strings must be different buffers. + * @param norm2 UNormalizer2 instance + * @param first string, should be normalized + * @param firstLength length of the first string, or -1 if NUL-terminated + * @param firstCapacity number of UChars that can be written to first + * @param second string, should be normalized + * @param secondLength length of the source string, or -1 if NUL-terminated + * @param pErrorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return first + * @stable ICU 4.4 + */ +U_STABLE int32_t U_EXPORT2 +unorm2_append(const UNormalizer2 *norm2, + UChar *first, int32_t firstLength, int32_t firstCapacity, + const UChar *second, int32_t secondLength, + UErrorCode *pErrorCode); + +/** + * Gets the decomposition mapping of c. + * Roughly equivalent to normalizing the String form of c + * on a UNORM2_DECOMPOSE UNormalizer2 instance, but much faster, and except that this function + * returns a negative value and does not write a string + * if c does not have a decomposition mapping in this instance's data. + * This function is independent of the mode of the UNormalizer2. + * @param norm2 UNormalizer2 instance + * @param c code point + * @param decomposition String buffer which will be set to c's + * decomposition mapping, if there is one. + * @param capacity number of UChars that can be written to decomposition + * @param pErrorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return the non-negative length of c's decomposition, if there is one; otherwise a negative value + * @stable ICU 4.6 + */ +U_STABLE int32_t U_EXPORT2 +unorm2_getDecomposition(const UNormalizer2 *norm2, + UChar32 c, UChar *decomposition, int32_t capacity, + UErrorCode *pErrorCode); + +/** + * Gets the raw decomposition mapping of c. + * + * This is similar to the unorm2_getDecomposition() function but returns the + * raw decomposition mapping as specified in UnicodeData.txt or + * (for custom data) in the mapping files processed by the gennorm2 tool. + * By contrast, unorm2_getDecomposition() returns the processed, + * recursively-decomposed version of this mapping. + * + * When used on a standard NFKC Normalizer2 instance, + * unorm2_getRawDecomposition() returns the Unicode Decomposition_Mapping (dm) property. + * + * When used on a standard NFC Normalizer2 instance, + * it returns the Decomposition_Mapping only if the Decomposition_Type (dt) is Canonical (Can); + * in this case, the result contains either one or two code points (=1..4 UChars). + * + * This function is independent of the mode of the UNormalizer2. + * @param norm2 UNormalizer2 instance + * @param c code point + * @param decomposition String buffer which will be set to c's + * raw decomposition mapping, if there is one. + * @param capacity number of UChars that can be written to decomposition + * @param pErrorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return the non-negative length of c's raw decomposition, if there is one; otherwise a negative value + * @stable ICU 49 + */ +U_STABLE int32_t U_EXPORT2 +unorm2_getRawDecomposition(const UNormalizer2 *norm2, + UChar32 c, UChar *decomposition, int32_t capacity, + UErrorCode *pErrorCode); + +/** + * Performs pairwise composition of a & b and returns the composite if there is one. + * + * Returns a composite code point c only if c has a two-way mapping to a+b. + * In standard Unicode normalization, this means that + * c has a canonical decomposition to a+b + * and c does not have the Full_Composition_Exclusion property. + * + * This function is independent of the mode of the UNormalizer2. + * @param norm2 UNormalizer2 instance + * @param a A (normalization starter) code point. + * @param b Another code point. + * @return The non-negative composite code point if there is one; otherwise a negative value. + * @stable ICU 49 + */ +U_STABLE UChar32 U_EXPORT2 +unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b); + +/** + * Gets the combining class of c. + * The default implementation returns 0 + * but all standard implementations return the Unicode Canonical_Combining_Class value. + * @param norm2 UNormalizer2 instance + * @param c code point + * @return c's combining class + * @stable ICU 49 + */ +U_STABLE uint8_t U_EXPORT2 +unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c); + +/** + * Tests if the string is normalized. + * Internally, in cases where the quickCheck() method would return "maybe" + * (which is only possible for the two COMPOSE modes) this method + * resolves to "yes" or "no" to provide a definitive result, + * at the cost of doing more work in those cases. + * @param norm2 UNormalizer2 instance + * @param s input string + * @param length length of the string, or -1 if NUL-terminated + * @param pErrorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return TRUE if s is normalized + * @stable ICU 4.4 + */ +U_STABLE UBool U_EXPORT2 +unorm2_isNormalized(const UNormalizer2 *norm2, + const UChar *s, int32_t length, + UErrorCode *pErrorCode); + +/** + * Tests if the string is normalized. + * For the two COMPOSE modes, the result could be "maybe" in cases that + * would take a little more work to resolve definitively. + * Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster + * combination of quick check + normalization, to avoid + * re-checking the "yes" prefix. + * @param norm2 UNormalizer2 instance + * @param s input string + * @param length length of the string, or -1 if NUL-terminated + * @param pErrorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return UNormalizationCheckResult + * @stable ICU 4.4 + */ +U_STABLE UNormalizationCheckResult U_EXPORT2 +unorm2_quickCheck(const UNormalizer2 *norm2, + const UChar *s, int32_t length, + UErrorCode *pErrorCode); + +/** + * Returns the end of the normalized substring of the input string. + * In other words, with <code>end=spanQuickCheckYes(s, ec);</code> + * the substring <code>UnicodeString(s, 0, end)</code> + * will pass the quick check with a "yes" result. + * + * The returned end index is usually one or more characters before the + * "no" or "maybe" character: The end index is at a normalization boundary. + * (See the class documentation for more about normalization boundaries.) + * + * When the goal is a normalized string and most input strings are expected + * to be normalized already, then call this method, + * and if it returns a prefix shorter than the input string, + * copy that prefix and use normalizeSecondAndAppend() for the remainder. + * @param norm2 UNormalizer2 instance + * @param s input string + * @param length length of the string, or -1 if NUL-terminated + * @param pErrorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return "yes" span end index + * @stable ICU 4.4 + */ +U_STABLE int32_t U_EXPORT2 +unorm2_spanQuickCheckYes(const UNormalizer2 *norm2, + const UChar *s, int32_t length, + UErrorCode *pErrorCode); + +/** + * Tests if the character always has a normalization boundary before it, + * regardless of context. + * For details see the Normalizer2 base class documentation. + * @param norm2 UNormalizer2 instance + * @param c character to test + * @return TRUE if c has a normalization boundary before it + * @stable ICU 4.4 + */ +U_STABLE UBool U_EXPORT2 +unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c); + +/** + * Tests if the character always has a normalization boundary after it, + * regardless of context. + * For details see the Normalizer2 base class documentation. + * @param norm2 UNormalizer2 instance + * @param c character to test + * @return TRUE if c has a normalization boundary after it + * @stable ICU 4.4 + */ +U_STABLE UBool U_EXPORT2 +unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c); + +/** + * Tests if the character is normalization-inert. + * For details see the Normalizer2 base class documentation. + * @param norm2 UNormalizer2 instance + * @param c character to test + * @return TRUE if c is normalization-inert + * @stable ICU 4.4 + */ +U_STABLE UBool U_EXPORT2 +unorm2_isInert(const UNormalizer2 *norm2, UChar32 c); + +#endif /* !UCONFIG_NO_NORMALIZATION */ +#endif /* __UNORM2_H__ */ diff --git a/Source/WTF/icu/unicode/uobject.h b/Source/WTF/icu/unicode/uobject.h new file mode 100644 index 000000000..54ceace62 --- /dev/null +++ b/Source/WTF/icu/unicode/uobject.h @@ -0,0 +1,320 @@ +/* +****************************************************************************** +* +* Copyright (C) 2002-2012, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* file name: uobject.h +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2002jun26 +* created by: Markus W. Scherer +*/ + +#ifndef __UOBJECT_H__ +#define __UOBJECT_H__ + +#include "unicode/utypes.h" + +/** + * \file + * \brief C++ API: Common ICU base class UObject. + */ + +/** + * @{ + * \def U_NO_THROW + * Define this to define the throw() specification so + * certain functions do not throw any exceptions + * + * UMemory operator new methods should have the throw() specification + * appended to them, so that the compiler adds the additional NULL check + * before calling constructors. Without, if <code>operator new</code> returns NULL the + * constructor is still called, and if the constructor references member + * data, (which it typically does), the result is a segmentation violation. + * + * @stable ICU 4.2 + */ +#ifndef U_NO_THROW +#define U_NO_THROW throw() +#endif + +/** @} */ + +/*===========================================================================*/ +/* UClassID-based RTTI */ +/*===========================================================================*/ + +/** + * UClassID is used to identify classes without using the compiler's RTTI. + * This was used before C++ compilers consistently supported RTTI. + * ICU 4.6 requires compiler RTTI to be turned on. + * + * Each class hierarchy which needs + * to implement polymorphic clone() or operator==() defines two methods, + * described in detail below. UClassID values can be compared using + * operator==(). Nothing else should be done with them. + * + * \par + * In class hierarchies that implement "poor man's RTTI", + * each concrete subclass implements getDynamicClassID() in the same way: + * + * \code + * class Derived { + * public: + * virtual UClassID getDynamicClassID() const + * { return Derived::getStaticClassID(); } + * } + * \endcode + * + * Each concrete class implements getStaticClassID() as well, which allows + * clients to test for a specific type. + * + * \code + * class Derived { + * public: + * static UClassID U_EXPORT2 getStaticClassID(); + * private: + * static char fgClassID; + * } + * + * // In Derived.cpp: + * UClassID Derived::getStaticClassID() + * { return (UClassID)&Derived::fgClassID; } + * char Derived::fgClassID = 0; // Value is irrelevant + * \endcode + * @stable ICU 2.0 + */ +typedef void* UClassID; + +U_NAMESPACE_BEGIN + +/** + * UMemory is the common ICU base class. + * All other ICU C++ classes are derived from UMemory (starting with ICU 2.4). + * + * This is primarily to make it possible and simple to override the + * C++ memory management by adding new/delete operators to this base class. + * + * To override ALL ICU memory management, including that from plain C code, + * replace the allocation functions declared in cmemory.h + * + * UMemory does not contain any virtual functions. + * Common "boilerplate" functions are defined in UObject. + * + * @stable ICU 2.4 + */ +class U_COMMON_API UMemory { +public: + +/* test versions for debugging shaper heap memory problems */ +#ifdef SHAPER_MEMORY_DEBUG + static void * NewArray(int size, int count); + static void * GrowArray(void * array, int newSize ); + static void FreeArray(void * array ); +#endif + +#if U_OVERRIDE_CXX_ALLOCATION + /** + * Override for ICU4C C++ memory management. + * simple, non-class types are allocated using the macros in common/cmemory.h + * (uprv_malloc(), uprv_free(), uprv_realloc()); + * they or something else could be used here to implement C++ new/delete + * for ICU4C C++ classes + * @stable ICU 2.4 + */ + static void * U_EXPORT2 operator new(size_t size) U_NO_THROW; + + /** + * Override for ICU4C C++ memory management. + * See new(). + * @stable ICU 2.4 + */ + static void * U_EXPORT2 operator new[](size_t size) U_NO_THROW; + + /** + * Override for ICU4C C++ memory management. + * simple, non-class types are allocated using the macros in common/cmemory.h + * (uprv_malloc(), uprv_free(), uprv_realloc()); + * they or something else could be used here to implement C++ new/delete + * for ICU4C C++ classes + * @stable ICU 2.4 + */ + static void U_EXPORT2 operator delete(void *p) U_NO_THROW; + + /** + * Override for ICU4C C++ memory management. + * See delete(). + * @stable ICU 2.4 + */ + static void U_EXPORT2 operator delete[](void *p) U_NO_THROW; + +#if U_HAVE_PLACEMENT_NEW + /** + * Override for ICU4C C++ memory management for STL. + * See new(). + * @stable ICU 2.6 + */ + static inline void * U_EXPORT2 operator new(size_t, void *ptr) U_NO_THROW { return ptr; } + + /** + * Override for ICU4C C++ memory management for STL. + * See delete(). + * @stable ICU 2.6 + */ + static inline void U_EXPORT2 operator delete(void *, void *) U_NO_THROW {} +#endif /* U_HAVE_PLACEMENT_NEW */ +#if U_HAVE_DEBUG_LOCATION_NEW + /** + * This method overrides the MFC debug version of the operator new + * + * @param size The requested memory size + * @param file The file where the allocation was requested + * @param line The line where the allocation was requested + */ + static void * U_EXPORT2 operator new(size_t size, const char* file, int line) U_NO_THROW; + /** + * This method provides a matching delete for the MFC debug new + * + * @param p The pointer to the allocated memory + * @param file The file where the allocation was requested + * @param line The line where the allocation was requested + */ + static void U_EXPORT2 operator delete(void* p, const char* file, int line) U_NO_THROW; +#endif /* U_HAVE_DEBUG_LOCATION_NEW */ +#endif /* U_OVERRIDE_CXX_ALLOCATION */ + + /* + * Assignment operator not declared. The compiler will provide one + * which does nothing since this class does not contain any data members. + * API/code coverage may show the assignment operator as present and + * untested - ignore. + * Subclasses need this assignment operator if they use compiler-provided + * assignment operators of their own. An alternative to not declaring one + * here would be to declare and empty-implement a protected or public one. + UMemory &UMemory::operator=(const UMemory &); + */ +}; + +/** + * UObject is the common ICU "boilerplate" class. + * UObject inherits UMemory (starting with ICU 2.4), + * and all other public ICU C++ classes + * are derived from UObject (starting with ICU 2.2). + * + * UObject contains common virtual functions, in particular a virtual destructor. + * + * The clone() function is not available in UObject because it is not + * implemented by all ICU classes. + * Many ICU services provide a clone() function for their class trees, + * defined on the service's C++ base class, and all subclasses within that + * service class tree return a pointer to the service base class + * (which itself is a subclass of UObject). + * This is because some compilers do not support covariant (same-as-this) + * return types; cast to the appropriate subclass if necessary. + * + * @stable ICU 2.2 + */ +class U_COMMON_API UObject : public UMemory { +public: + /** + * Destructor. + * + * @stable ICU 2.2 + */ + virtual ~UObject(); + + /** + * ICU4C "poor man's RTTI", returns a UClassID for the actual ICU class. + * The base class implementation returns a dummy value. + * + * Use compiler RTTI rather than ICU's "poor man's RTTI". + * Since ICU 4.6, new ICU C++ class hierarchies do not implement "poor man's RTTI". + * + * @stable ICU 2.2 + */ + virtual UClassID getDynamicClassID() const; + +protected: + // the following functions are protected to prevent instantiation and + // direct use of UObject itself + + // default constructor + // inline UObject() {} + + // copy constructor + // inline UObject(const UObject &other) {} + +#if 0 + // TODO Sometime in the future. Implement operator==(). + // (This comment inserted in 2.2) + // some or all of the following "boilerplate" functions may be made public + // in a future ICU4C release when all subclasses implement them + + // assignment operator + // (not virtual, see "Taligent's Guide to Designing Programs" pp.73..74) + // commented out because the implementation is the same as a compiler's default + // UObject &operator=(const UObject &other) { return *this; } + + // comparison operators + virtual inline UBool operator==(const UObject &other) const { return this==&other; } + inline UBool operator!=(const UObject &other) const { return !operator==(other); } + + // clone() commented out from the base class: + // some compilers do not support co-variant return types + // (i.e., subclasses would have to return UObject * as well, instead of SubClass *) + // see also UObject class documentation. + // virtual UObject *clone() const; +#endif + + /* + * Assignment operator not declared. The compiler will provide one + * which does nothing since this class does not contain any data members. + * API/code coverage may show the assignment operator as present and + * untested - ignore. + * Subclasses need this assignment operator if they use compiler-provided + * assignment operators of their own. An alternative to not declaring one + * here would be to declare and empty-implement a protected or public one. + UObject &UObject::operator=(const UObject &); + */ +}; + +#ifndef U_HIDE_INTERNAL_API +/** + * This is a simple macro to add ICU RTTI to an ICU object implementation. + * This does not go into the header. This should only be used in *.cpp files. + * + * @param myClass The name of the class that needs RTTI defined. + * @internal + */ +#define UOBJECT_DEFINE_RTTI_IMPLEMENTATION(myClass) \ + UClassID U_EXPORT2 myClass::getStaticClassID() { \ + static char classID = 0; \ + return (UClassID)&classID; \ + } \ + UClassID myClass::getDynamicClassID() const \ + { return myClass::getStaticClassID(); } + + +/** + * This macro adds ICU RTTI to an ICU abstract class implementation. + * This macro should be invoked in *.cpp files. The corresponding + * header should declare getStaticClassID. + * + * @param myClass The name of the class that needs RTTI defined. + * @internal + */ +#define UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(myClass) \ + UClassID U_EXPORT2 myClass::getStaticClassID() { \ + static char classID = 0; \ + return (UClassID)&classID; \ + } + +#endif /* U_HIDE_INTERNAL_API */ + +U_NAMESPACE_END + +#endif diff --git a/Source/WTF/icu/unicode/urename.h b/Source/WTF/icu/unicode/urename.h new file mode 100644 index 000000000..6b1f49098 --- /dev/null +++ b/Source/WTF/icu/unicode/urename.h @@ -0,0 +1,1825 @@ +/* +******************************************************************************* +* Copyright (C) 2002-2013, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* +* file name: urename.h +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* Created by: Perl script tools/genren.pl written by Vladimir Weinstein +* +* Contains data for renaming ICU exports. +* Gets included by umachine.h +* +* THIS FILE IS MACHINE-GENERATED, DON'T PLAY WITH IT IF YOU DON'T KNOW WHAT +* YOU ARE DOING, OTHERWISE VERY BAD THINGS WILL HAPPEN! +*/ + +#ifndef URENAME_H +#define URENAME_H + +/* U_DISABLE_RENAMING can be defined in the following ways: + * - when running configure, e.g. + * runConfigureICU Linux --disable-renaming + * - by changing the default setting of U_DISABLE_RENAMING in uconfig.h + */ + +#include "unicode/uconfig.h" + +#if !U_DISABLE_RENAMING + +/* We need the U_ICU_ENTRY_POINT_RENAME definition. There's a default one in unicode/uvernum.h we can use, but we will give + the platform a chance to define it first. + Normally (if utypes.h or umachine.h was included first) this will not be necessary as it will already be defined. + */ + +#ifndef U_ICU_ENTRY_POINT_RENAME +#include "unicode/umachine.h" +#endif + +/* If we still don't have U_ICU_ENTRY_POINT_RENAME use the default. */ +#ifndef U_ICU_ENTRY_POINT_RENAME +#include "unicode/uvernum.h" +#endif + +/* Error out before the following defines cause very strange and unexpected code breakage */ +#ifndef U_ICU_ENTRY_POINT_RENAME +#error U_ICU_ENTRY_POINT_RENAME is not defined - cannot continue. Consider defining U_DISABLE_RENAMING if renaming should not be used. +#endif + + +/* C exports renaming data */ + +#define T_CString_int64ToString U_ICU_ENTRY_POINT_RENAME(T_CString_int64ToString) +#define T_CString_integerToString U_ICU_ENTRY_POINT_RENAME(T_CString_integerToString) +#define T_CString_stringToInteger U_ICU_ENTRY_POINT_RENAME(T_CString_stringToInteger) +#define T_CString_toLowerCase U_ICU_ENTRY_POINT_RENAME(T_CString_toLowerCase) +#define T_CString_toUpperCase U_ICU_ENTRY_POINT_RENAME(T_CString_toUpperCase) +#define UCNV_FROM_U_CALLBACK_ESCAPE U_ICU_ENTRY_POINT_RENAME(UCNV_FROM_U_CALLBACK_ESCAPE) +#define UCNV_FROM_U_CALLBACK_SKIP U_ICU_ENTRY_POINT_RENAME(UCNV_FROM_U_CALLBACK_SKIP) +#define UCNV_FROM_U_CALLBACK_STOP U_ICU_ENTRY_POINT_RENAME(UCNV_FROM_U_CALLBACK_STOP) +#define UCNV_FROM_U_CALLBACK_SUBSTITUTE U_ICU_ENTRY_POINT_RENAME(UCNV_FROM_U_CALLBACK_SUBSTITUTE) +#define UCNV_TO_U_CALLBACK_ESCAPE U_ICU_ENTRY_POINT_RENAME(UCNV_TO_U_CALLBACK_ESCAPE) +#define UCNV_TO_U_CALLBACK_SKIP U_ICU_ENTRY_POINT_RENAME(UCNV_TO_U_CALLBACK_SKIP) +#define UCNV_TO_U_CALLBACK_STOP U_ICU_ENTRY_POINT_RENAME(UCNV_TO_U_CALLBACK_STOP) +#define UCNV_TO_U_CALLBACK_SUBSTITUTE U_ICU_ENTRY_POINT_RENAME(UCNV_TO_U_CALLBACK_SUBSTITUTE) +#define UDataMemory_createNewInstance U_ICU_ENTRY_POINT_RENAME(UDataMemory_createNewInstance) +#define UDataMemory_init U_ICU_ENTRY_POINT_RENAME(UDataMemory_init) +#define UDataMemory_isLoaded U_ICU_ENTRY_POINT_RENAME(UDataMemory_isLoaded) +#define UDataMemory_normalizeDataPointer U_ICU_ENTRY_POINT_RENAME(UDataMemory_normalizeDataPointer) +#define UDataMemory_setData U_ICU_ENTRY_POINT_RENAME(UDataMemory_setData) +#define UDatamemory_assign U_ICU_ENTRY_POINT_RENAME(UDatamemory_assign) +#define _ASCIIData U_ICU_ENTRY_POINT_RENAME(_ASCIIData) +#define _Bocu1Data U_ICU_ENTRY_POINT_RENAME(_Bocu1Data) +#define _CESU8Data U_ICU_ENTRY_POINT_RENAME(_CESU8Data) +#define _CompoundTextData U_ICU_ENTRY_POINT_RENAME(_CompoundTextData) +#define _HZData U_ICU_ENTRY_POINT_RENAME(_HZData) +#define _IMAPData U_ICU_ENTRY_POINT_RENAME(_IMAPData) +#define _ISCIIData U_ICU_ENTRY_POINT_RENAME(_ISCIIData) +#define _ISO2022Data U_ICU_ENTRY_POINT_RENAME(_ISO2022Data) +#define _LMBCSData1 U_ICU_ENTRY_POINT_RENAME(_LMBCSData1) +#define _LMBCSData11 U_ICU_ENTRY_POINT_RENAME(_LMBCSData11) +#define _LMBCSData16 U_ICU_ENTRY_POINT_RENAME(_LMBCSData16) +#define _LMBCSData17 U_ICU_ENTRY_POINT_RENAME(_LMBCSData17) +#define _LMBCSData18 U_ICU_ENTRY_POINT_RENAME(_LMBCSData18) +#define _LMBCSData19 U_ICU_ENTRY_POINT_RENAME(_LMBCSData19) +#define _LMBCSData2 U_ICU_ENTRY_POINT_RENAME(_LMBCSData2) +#define _LMBCSData3 U_ICU_ENTRY_POINT_RENAME(_LMBCSData3) +#define _LMBCSData4 U_ICU_ENTRY_POINT_RENAME(_LMBCSData4) +#define _LMBCSData5 U_ICU_ENTRY_POINT_RENAME(_LMBCSData5) +#define _LMBCSData6 U_ICU_ENTRY_POINT_RENAME(_LMBCSData6) +#define _LMBCSData8 U_ICU_ENTRY_POINT_RENAME(_LMBCSData8) +#define _Latin1Data U_ICU_ENTRY_POINT_RENAME(_Latin1Data) +#define _MBCSData U_ICU_ENTRY_POINT_RENAME(_MBCSData) +#define _SCSUData U_ICU_ENTRY_POINT_RENAME(_SCSUData) +#define _UTF16BEData U_ICU_ENTRY_POINT_RENAME(_UTF16BEData) +#define _UTF16Data U_ICU_ENTRY_POINT_RENAME(_UTF16Data) +#define _UTF16LEData U_ICU_ENTRY_POINT_RENAME(_UTF16LEData) +#define _UTF32BEData U_ICU_ENTRY_POINT_RENAME(_UTF32BEData) +#define _UTF32Data U_ICU_ENTRY_POINT_RENAME(_UTF32Data) +#define _UTF32LEData U_ICU_ENTRY_POINT_RENAME(_UTF32LEData) +#define _UTF7Data U_ICU_ENTRY_POINT_RENAME(_UTF7Data) +#define _UTF8Data U_ICU_ENTRY_POINT_RENAME(_UTF8Data) +#define cmemory_cleanup U_ICU_ENTRY_POINT_RENAME(cmemory_cleanup) +#define cmemory_inUse U_ICU_ENTRY_POINT_RENAME(cmemory_inUse) +#define izrule_clone U_ICU_ENTRY_POINT_RENAME(izrule_clone) +#define izrule_close U_ICU_ENTRY_POINT_RENAME(izrule_close) +#define izrule_equals U_ICU_ENTRY_POINT_RENAME(izrule_equals) +#define izrule_getDSTSavings U_ICU_ENTRY_POINT_RENAME(izrule_getDSTSavings) +#define izrule_getDynamicClassID U_ICU_ENTRY_POINT_RENAME(izrule_getDynamicClassID) +#define izrule_getFinalStart U_ICU_ENTRY_POINT_RENAME(izrule_getFinalStart) +#define izrule_getFirstStart U_ICU_ENTRY_POINT_RENAME(izrule_getFirstStart) +#define izrule_getName U_ICU_ENTRY_POINT_RENAME(izrule_getName) +#define izrule_getNextStart U_ICU_ENTRY_POINT_RENAME(izrule_getNextStart) +#define izrule_getPreviousStart U_ICU_ENTRY_POINT_RENAME(izrule_getPreviousStart) +#define izrule_getRawOffset U_ICU_ENTRY_POINT_RENAME(izrule_getRawOffset) +#define izrule_getStaticClassID U_ICU_ENTRY_POINT_RENAME(izrule_getStaticClassID) +#define izrule_isEquivalentTo U_ICU_ENTRY_POINT_RENAME(izrule_isEquivalentTo) +#define izrule_open U_ICU_ENTRY_POINT_RENAME(izrule_open) +#define le_close U_ICU_ENTRY_POINT_RENAME(le_close) +#define le_create U_ICU_ENTRY_POINT_RENAME(le_create) +#define le_getCharIndices U_ICU_ENTRY_POINT_RENAME(le_getCharIndices) +#define le_getCharIndicesWithBase U_ICU_ENTRY_POINT_RENAME(le_getCharIndicesWithBase) +#define le_getGlyphCount U_ICU_ENTRY_POINT_RENAME(le_getGlyphCount) +#define le_getGlyphPosition U_ICU_ENTRY_POINT_RENAME(le_getGlyphPosition) +#define le_getGlyphPositions U_ICU_ENTRY_POINT_RENAME(le_getGlyphPositions) +#define le_getGlyphs U_ICU_ENTRY_POINT_RENAME(le_getGlyphs) +#define le_layoutChars U_ICU_ENTRY_POINT_RENAME(le_layoutChars) +#define le_reset U_ICU_ENTRY_POINT_RENAME(le_reset) +#define locale_getKeywords U_ICU_ENTRY_POINT_RENAME(locale_getKeywords) +#define locale_getKeywordsStart U_ICU_ENTRY_POINT_RENAME(locale_getKeywordsStart) +#define locale_get_default U_ICU_ENTRY_POINT_RENAME(locale_get_default) +#define locale_set_default U_ICU_ENTRY_POINT_RENAME(locale_set_default) +#define pl_addFontRun U_ICU_ENTRY_POINT_RENAME(pl_addFontRun) +#define pl_addLocaleRun U_ICU_ENTRY_POINT_RENAME(pl_addLocaleRun) +#define pl_addValueRun U_ICU_ENTRY_POINT_RENAME(pl_addValueRun) +#define pl_close U_ICU_ENTRY_POINT_RENAME(pl_close) +#define pl_closeFontRuns U_ICU_ENTRY_POINT_RENAME(pl_closeFontRuns) +#define pl_closeLine U_ICU_ENTRY_POINT_RENAME(pl_closeLine) +#define pl_closeLocaleRuns U_ICU_ENTRY_POINT_RENAME(pl_closeLocaleRuns) +#define pl_closeValueRuns U_ICU_ENTRY_POINT_RENAME(pl_closeValueRuns) +#define pl_countLineRuns U_ICU_ENTRY_POINT_RENAME(pl_countLineRuns) +#define pl_create U_ICU_ENTRY_POINT_RENAME(pl_create) +#define pl_getAscent U_ICU_ENTRY_POINT_RENAME(pl_getAscent) +#define pl_getDescent U_ICU_ENTRY_POINT_RENAME(pl_getDescent) +#define pl_getFontRunCount U_ICU_ENTRY_POINT_RENAME(pl_getFontRunCount) +#define pl_getFontRunFont U_ICU_ENTRY_POINT_RENAME(pl_getFontRunFont) +#define pl_getFontRunLastLimit U_ICU_ENTRY_POINT_RENAME(pl_getFontRunLastLimit) +#define pl_getFontRunLimit U_ICU_ENTRY_POINT_RENAME(pl_getFontRunLimit) +#define pl_getLeading U_ICU_ENTRY_POINT_RENAME(pl_getLeading) +#define pl_getLineAscent U_ICU_ENTRY_POINT_RENAME(pl_getLineAscent) +#define pl_getLineDescent U_ICU_ENTRY_POINT_RENAME(pl_getLineDescent) +#define pl_getLineLeading U_ICU_ENTRY_POINT_RENAME(pl_getLineLeading) +#define pl_getLineVisualRun U_ICU_ENTRY_POINT_RENAME(pl_getLineVisualRun) +#define pl_getLineWidth U_ICU_ENTRY_POINT_RENAME(pl_getLineWidth) +#define pl_getLocaleRunCount U_ICU_ENTRY_POINT_RENAME(pl_getLocaleRunCount) +#define pl_getLocaleRunLastLimit U_ICU_ENTRY_POINT_RENAME(pl_getLocaleRunLastLimit) +#define pl_getLocaleRunLimit U_ICU_ENTRY_POINT_RENAME(pl_getLocaleRunLimit) +#define pl_getLocaleRunLocale U_ICU_ENTRY_POINT_RENAME(pl_getLocaleRunLocale) +#define pl_getParagraphLevel U_ICU_ENTRY_POINT_RENAME(pl_getParagraphLevel) +#define pl_getTextDirection U_ICU_ENTRY_POINT_RENAME(pl_getTextDirection) +#define pl_getValueRunCount U_ICU_ENTRY_POINT_RENAME(pl_getValueRunCount) +#define pl_getValueRunLastLimit U_ICU_ENTRY_POINT_RENAME(pl_getValueRunLastLimit) +#define pl_getValueRunLimit U_ICU_ENTRY_POINT_RENAME(pl_getValueRunLimit) +#define pl_getValueRunValue U_ICU_ENTRY_POINT_RENAME(pl_getValueRunValue) +#define pl_getVisualRunAscent U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunAscent) +#define pl_getVisualRunDescent U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunDescent) +#define pl_getVisualRunDirection U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunDirection) +#define pl_getVisualRunFont U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunFont) +#define pl_getVisualRunGlyphCount U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunGlyphCount) +#define pl_getVisualRunGlyphToCharMap U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunGlyphToCharMap) +#define pl_getVisualRunGlyphs U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunGlyphs) +#define pl_getVisualRunLeading U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunLeading) +#define pl_getVisualRunPositions U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunPositions) +#define pl_isComplex U_ICU_ENTRY_POINT_RENAME(pl_isComplex) +#define pl_nextLine U_ICU_ENTRY_POINT_RENAME(pl_nextLine) +#define pl_openEmptyFontRuns U_ICU_ENTRY_POINT_RENAME(pl_openEmptyFontRuns) +#define pl_openEmptyLocaleRuns U_ICU_ENTRY_POINT_RENAME(pl_openEmptyLocaleRuns) +#define pl_openEmptyValueRuns U_ICU_ENTRY_POINT_RENAME(pl_openEmptyValueRuns) +#define pl_openFontRuns U_ICU_ENTRY_POINT_RENAME(pl_openFontRuns) +#define pl_openLocaleRuns U_ICU_ENTRY_POINT_RENAME(pl_openLocaleRuns) +#define pl_openValueRuns U_ICU_ENTRY_POINT_RENAME(pl_openValueRuns) +#define pl_reflow U_ICU_ENTRY_POINT_RENAME(pl_reflow) +#define pl_resetFontRuns U_ICU_ENTRY_POINT_RENAME(pl_resetFontRuns) +#define pl_resetLocaleRuns U_ICU_ENTRY_POINT_RENAME(pl_resetLocaleRuns) +#define pl_resetValueRuns U_ICU_ENTRY_POINT_RENAME(pl_resetValueRuns) +#define res_countArrayItems U_ICU_ENTRY_POINT_RENAME(res_countArrayItems) +#define res_findResource U_ICU_ENTRY_POINT_RENAME(res_findResource) +#define res_getAlias U_ICU_ENTRY_POINT_RENAME(res_getAlias) +#define res_getArrayItem U_ICU_ENTRY_POINT_RENAME(res_getArrayItem) +#define res_getBinary U_ICU_ENTRY_POINT_RENAME(res_getBinary) +#define res_getIntVector U_ICU_ENTRY_POINT_RENAME(res_getIntVector) +#define res_getPublicType U_ICU_ENTRY_POINT_RENAME(res_getPublicType) +#define res_getResource U_ICU_ENTRY_POINT_RENAME(res_getResource) +#define res_getString U_ICU_ENTRY_POINT_RENAME(res_getString) +#define res_getTableItemByIndex U_ICU_ENTRY_POINT_RENAME(res_getTableItemByIndex) +#define res_getTableItemByKey U_ICU_ENTRY_POINT_RENAME(res_getTableItemByKey) +#define res_load U_ICU_ENTRY_POINT_RENAME(res_load) +#define res_read U_ICU_ENTRY_POINT_RENAME(res_read) +#define res_unload U_ICU_ENTRY_POINT_RENAME(res_unload) +#define u_UCharsToChars U_ICU_ENTRY_POINT_RENAME(u_UCharsToChars) +#define u_austrcpy U_ICU_ENTRY_POINT_RENAME(u_austrcpy) +#define u_austrncpy U_ICU_ENTRY_POINT_RENAME(u_austrncpy) +#define u_catclose U_ICU_ENTRY_POINT_RENAME(u_catclose) +#define u_catgets U_ICU_ENTRY_POINT_RENAME(u_catgets) +#define u_catopen U_ICU_ENTRY_POINT_RENAME(u_catopen) +#define u_charAge U_ICU_ENTRY_POINT_RENAME(u_charAge) +#define u_charDigitValue U_ICU_ENTRY_POINT_RENAME(u_charDigitValue) +#define u_charDirection U_ICU_ENTRY_POINT_RENAME(u_charDirection) +#define u_charFromName U_ICU_ENTRY_POINT_RENAME(u_charFromName) +#define u_charMirror U_ICU_ENTRY_POINT_RENAME(u_charMirror) +#define u_charName U_ICU_ENTRY_POINT_RENAME(u_charName) +#define u_charType U_ICU_ENTRY_POINT_RENAME(u_charType) +#define u_charsToUChars U_ICU_ENTRY_POINT_RENAME(u_charsToUChars) +#define u_cleanup U_ICU_ENTRY_POINT_RENAME(u_cleanup) +#define u_countChar32 U_ICU_ENTRY_POINT_RENAME(u_countChar32) +#define u_digit U_ICU_ENTRY_POINT_RENAME(u_digit) +#define u_enumCharNames U_ICU_ENTRY_POINT_RENAME(u_enumCharNames) +#define u_enumCharTypes U_ICU_ENTRY_POINT_RENAME(u_enumCharTypes) +#define u_errorName U_ICU_ENTRY_POINT_RENAME(u_errorName) +#define u_fadopt U_ICU_ENTRY_POINT_RENAME(u_fadopt) +#define u_fclose U_ICU_ENTRY_POINT_RENAME(u_fclose) +#define u_feof U_ICU_ENTRY_POINT_RENAME(u_feof) +#define u_fflush U_ICU_ENTRY_POINT_RENAME(u_fflush) +#define u_fgetConverter U_ICU_ENTRY_POINT_RENAME(u_fgetConverter) +#define u_fgetNumberFormat U_ICU_ENTRY_POINT_RENAME(u_fgetNumberFormat) +#define u_fgetc U_ICU_ENTRY_POINT_RENAME(u_fgetc) +#define u_fgetcodepage U_ICU_ENTRY_POINT_RENAME(u_fgetcodepage) +#define u_fgetcx U_ICU_ENTRY_POINT_RENAME(u_fgetcx) +#define u_fgetfile U_ICU_ENTRY_POINT_RENAME(u_fgetfile) +#define u_fgetlocale U_ICU_ENTRY_POINT_RENAME(u_fgetlocale) +#define u_fgets U_ICU_ENTRY_POINT_RENAME(u_fgets) +#define u_file_read U_ICU_ENTRY_POINT_RENAME(u_file_read) +#define u_file_write U_ICU_ENTRY_POINT_RENAME(u_file_write) +#define u_file_write_flush U_ICU_ENTRY_POINT_RENAME(u_file_write_flush) +#define u_finit U_ICU_ENTRY_POINT_RENAME(u_finit) +#define u_flushDefaultConverter U_ICU_ENTRY_POINT_RENAME(u_flushDefaultConverter) +#define u_foldCase U_ICU_ENTRY_POINT_RENAME(u_foldCase) +#define u_fopen U_ICU_ENTRY_POINT_RENAME(u_fopen) +#define u_forDigit U_ICU_ENTRY_POINT_RENAME(u_forDigit) +#define u_formatMessage U_ICU_ENTRY_POINT_RENAME(u_formatMessage) +#define u_formatMessageWithError U_ICU_ENTRY_POINT_RENAME(u_formatMessageWithError) +#define u_fprintf U_ICU_ENTRY_POINT_RENAME(u_fprintf) +#define u_fprintf_u U_ICU_ENTRY_POINT_RENAME(u_fprintf_u) +#define u_fputc U_ICU_ENTRY_POINT_RENAME(u_fputc) +#define u_fputs U_ICU_ENTRY_POINT_RENAME(u_fputs) +#define u_frewind U_ICU_ENTRY_POINT_RENAME(u_frewind) +#define u_fscanf U_ICU_ENTRY_POINT_RENAME(u_fscanf) +#define u_fscanf_u U_ICU_ENTRY_POINT_RENAME(u_fscanf_u) +#define u_fsetcodepage U_ICU_ENTRY_POINT_RENAME(u_fsetcodepage) +#define u_fsetlocale U_ICU_ENTRY_POINT_RENAME(u_fsetlocale) +#define u_fsettransliterator U_ICU_ENTRY_POINT_RENAME(u_fsettransliterator) +#define u_fstropen U_ICU_ENTRY_POINT_RENAME(u_fstropen) +#define u_fungetc U_ICU_ENTRY_POINT_RENAME(u_fungetc) +#define u_getBidiPairedBracket U_ICU_ENTRY_POINT_RENAME(u_getBidiPairedBracket) +#define u_getCombiningClass U_ICU_ENTRY_POINT_RENAME(u_getCombiningClass) +#define u_getDataDirectory U_ICU_ENTRY_POINT_RENAME(u_getDataDirectory) +#define u_getDataVersion U_ICU_ENTRY_POINT_RENAME(u_getDataVersion) +#define u_getDefaultConverter U_ICU_ENTRY_POINT_RENAME(u_getDefaultConverter) +#define u_getFC_NFKC_Closure U_ICU_ENTRY_POINT_RENAME(u_getFC_NFKC_Closure) +#define u_getISOComment U_ICU_ENTRY_POINT_RENAME(u_getISOComment) +#define u_getIntPropertyMaxValue U_ICU_ENTRY_POINT_RENAME(u_getIntPropertyMaxValue) +#define u_getIntPropertyMinValue U_ICU_ENTRY_POINT_RENAME(u_getIntPropertyMinValue) +#define u_getIntPropertyValue U_ICU_ENTRY_POINT_RENAME(u_getIntPropertyValue) +#define u_getMainProperties U_ICU_ENTRY_POINT_RENAME(u_getMainProperties) +#define u_getNumericValue U_ICU_ENTRY_POINT_RENAME(u_getNumericValue) +#define u_getPropertyEnum U_ICU_ENTRY_POINT_RENAME(u_getPropertyEnum) +#define u_getPropertyName U_ICU_ENTRY_POINT_RENAME(u_getPropertyName) +#define u_getPropertyValueEnum U_ICU_ENTRY_POINT_RENAME(u_getPropertyValueEnum) +#define u_getPropertyValueName U_ICU_ENTRY_POINT_RENAME(u_getPropertyValueName) +#define u_getUnicodeProperties U_ICU_ENTRY_POINT_RENAME(u_getUnicodeProperties) +#define u_getUnicodeVersion U_ICU_ENTRY_POINT_RENAME(u_getUnicodeVersion) +#define u_getVersion U_ICU_ENTRY_POINT_RENAME(u_getVersion) +#define u_get_stdout U_ICU_ENTRY_POINT_RENAME(u_get_stdout) +#define u_hasBinaryProperty U_ICU_ENTRY_POINT_RENAME(u_hasBinaryProperty) +#define u_init U_ICU_ENTRY_POINT_RENAME(u_init) +#define u_isIDIgnorable U_ICU_ENTRY_POINT_RENAME(u_isIDIgnorable) +#define u_isIDPart U_ICU_ENTRY_POINT_RENAME(u_isIDPart) +#define u_isIDStart U_ICU_ENTRY_POINT_RENAME(u_isIDStart) +#define u_isISOControl U_ICU_ENTRY_POINT_RENAME(u_isISOControl) +#define u_isJavaIDPart U_ICU_ENTRY_POINT_RENAME(u_isJavaIDPart) +#define u_isJavaIDStart U_ICU_ENTRY_POINT_RENAME(u_isJavaIDStart) +#define u_isJavaSpaceChar U_ICU_ENTRY_POINT_RENAME(u_isJavaSpaceChar) +#define u_isMirrored U_ICU_ENTRY_POINT_RENAME(u_isMirrored) +#define u_isUAlphabetic U_ICU_ENTRY_POINT_RENAME(u_isUAlphabetic) +#define u_isULowercase U_ICU_ENTRY_POINT_RENAME(u_isULowercase) +#define u_isUUppercase U_ICU_ENTRY_POINT_RENAME(u_isUUppercase) +#define u_isUWhiteSpace U_ICU_ENTRY_POINT_RENAME(u_isUWhiteSpace) +#define u_isWhitespace U_ICU_ENTRY_POINT_RENAME(u_isWhitespace) +#define u_isalnum U_ICU_ENTRY_POINT_RENAME(u_isalnum) +#define u_isalnumPOSIX U_ICU_ENTRY_POINT_RENAME(u_isalnumPOSIX) +#define u_isalpha U_ICU_ENTRY_POINT_RENAME(u_isalpha) +#define u_isbase U_ICU_ENTRY_POINT_RENAME(u_isbase) +#define u_isblank U_ICU_ENTRY_POINT_RENAME(u_isblank) +#define u_iscntrl U_ICU_ENTRY_POINT_RENAME(u_iscntrl) +#define u_isdefined U_ICU_ENTRY_POINT_RENAME(u_isdefined) +#define u_isdigit U_ICU_ENTRY_POINT_RENAME(u_isdigit) +#define u_isgraph U_ICU_ENTRY_POINT_RENAME(u_isgraph) +#define u_isgraphPOSIX U_ICU_ENTRY_POINT_RENAME(u_isgraphPOSIX) +#define u_islower U_ICU_ENTRY_POINT_RENAME(u_islower) +#define u_isprint U_ICU_ENTRY_POINT_RENAME(u_isprint) +#define u_isprintPOSIX U_ICU_ENTRY_POINT_RENAME(u_isprintPOSIX) +#define u_ispunct U_ICU_ENTRY_POINT_RENAME(u_ispunct) +#define u_isspace U_ICU_ENTRY_POINT_RENAME(u_isspace) +#define u_istitle U_ICU_ENTRY_POINT_RENAME(u_istitle) +#define u_isupper U_ICU_ENTRY_POINT_RENAME(u_isupper) +#define u_isxdigit U_ICU_ENTRY_POINT_RENAME(u_isxdigit) +#define u_locbund_close U_ICU_ENTRY_POINT_RENAME(u_locbund_close) +#define u_locbund_getNumberFormat U_ICU_ENTRY_POINT_RENAME(u_locbund_getNumberFormat) +#define u_locbund_init U_ICU_ENTRY_POINT_RENAME(u_locbund_init) +#define u_memcasecmp U_ICU_ENTRY_POINT_RENAME(u_memcasecmp) +#define u_memchr U_ICU_ENTRY_POINT_RENAME(u_memchr) +#define u_memchr32 U_ICU_ENTRY_POINT_RENAME(u_memchr32) +#define u_memcmp U_ICU_ENTRY_POINT_RENAME(u_memcmp) +#define u_memcmpCodePointOrder U_ICU_ENTRY_POINT_RENAME(u_memcmpCodePointOrder) +#define u_memcpy U_ICU_ENTRY_POINT_RENAME(u_memcpy) +#define u_memmove U_ICU_ENTRY_POINT_RENAME(u_memmove) +#define u_memrchr U_ICU_ENTRY_POINT_RENAME(u_memrchr) +#define u_memrchr32 U_ICU_ENTRY_POINT_RENAME(u_memrchr32) +#define u_memset U_ICU_ENTRY_POINT_RENAME(u_memset) +#define u_parseMessage U_ICU_ENTRY_POINT_RENAME(u_parseMessage) +#define u_parseMessageWithError U_ICU_ENTRY_POINT_RENAME(u_parseMessageWithError) +#define u_printf U_ICU_ENTRY_POINT_RENAME(u_printf) +#define u_printf_parse U_ICU_ENTRY_POINT_RENAME(u_printf_parse) +#define u_printf_u U_ICU_ENTRY_POINT_RENAME(u_printf_u) +#define u_releaseDefaultConverter U_ICU_ENTRY_POINT_RENAME(u_releaseDefaultConverter) +#define u_scanf_parse U_ICU_ENTRY_POINT_RENAME(u_scanf_parse) +#define u_setAtomicIncDecFunctions U_ICU_ENTRY_POINT_RENAME(u_setAtomicIncDecFunctions) +#define u_setDataDirectory U_ICU_ENTRY_POINT_RENAME(u_setDataDirectory) +#define u_setMemoryFunctions U_ICU_ENTRY_POINT_RENAME(u_setMemoryFunctions) +#define u_setMutexFunctions U_ICU_ENTRY_POINT_RENAME(u_setMutexFunctions) +#define u_shapeArabic U_ICU_ENTRY_POINT_RENAME(u_shapeArabic) +#define u_snprintf U_ICU_ENTRY_POINT_RENAME(u_snprintf) +#define u_snprintf_u U_ICU_ENTRY_POINT_RENAME(u_snprintf_u) +#define u_sprintf U_ICU_ENTRY_POINT_RENAME(u_sprintf) +#define u_sprintf_u U_ICU_ENTRY_POINT_RENAME(u_sprintf_u) +#define u_sscanf U_ICU_ENTRY_POINT_RENAME(u_sscanf) +#define u_sscanf_u U_ICU_ENTRY_POINT_RENAME(u_sscanf_u) +#define u_strCaseCompare U_ICU_ENTRY_POINT_RENAME(u_strCaseCompare) +#define u_strCompare U_ICU_ENTRY_POINT_RENAME(u_strCompare) +#define u_strCompareIter U_ICU_ENTRY_POINT_RENAME(u_strCompareIter) +#define u_strFindFirst U_ICU_ENTRY_POINT_RENAME(u_strFindFirst) +#define u_strFindLast U_ICU_ENTRY_POINT_RENAME(u_strFindLast) +#define u_strFoldCase U_ICU_ENTRY_POINT_RENAME(u_strFoldCase) +#define u_strFromJavaModifiedUTF8WithSub U_ICU_ENTRY_POINT_RENAME(u_strFromJavaModifiedUTF8WithSub) +#define u_strFromPunycode U_ICU_ENTRY_POINT_RENAME(u_strFromPunycode) +#define u_strFromUTF32 U_ICU_ENTRY_POINT_RENAME(u_strFromUTF32) +#define u_strFromUTF32WithSub U_ICU_ENTRY_POINT_RENAME(u_strFromUTF32WithSub) +#define u_strFromUTF8 U_ICU_ENTRY_POINT_RENAME(u_strFromUTF8) +#define u_strFromUTF8Lenient U_ICU_ENTRY_POINT_RENAME(u_strFromUTF8Lenient) +#define u_strFromUTF8WithSub U_ICU_ENTRY_POINT_RENAME(u_strFromUTF8WithSub) +#define u_strFromWCS U_ICU_ENTRY_POINT_RENAME(u_strFromWCS) +#define u_strHasMoreChar32Than U_ICU_ENTRY_POINT_RENAME(u_strHasMoreChar32Than) +#define u_strToJavaModifiedUTF8 U_ICU_ENTRY_POINT_RENAME(u_strToJavaModifiedUTF8) +#define u_strToLower U_ICU_ENTRY_POINT_RENAME(u_strToLower) +#define u_strToPunycode U_ICU_ENTRY_POINT_RENAME(u_strToPunycode) +#define u_strToTitle U_ICU_ENTRY_POINT_RENAME(u_strToTitle) +#define u_strToUTF32 U_ICU_ENTRY_POINT_RENAME(u_strToUTF32) +#define u_strToUTF32WithSub U_ICU_ENTRY_POINT_RENAME(u_strToUTF32WithSub) +#define u_strToUTF8 U_ICU_ENTRY_POINT_RENAME(u_strToUTF8) +#define u_strToUTF8WithSub U_ICU_ENTRY_POINT_RENAME(u_strToUTF8WithSub) +#define u_strToUpper U_ICU_ENTRY_POINT_RENAME(u_strToUpper) +#define u_strToWCS U_ICU_ENTRY_POINT_RENAME(u_strToWCS) +#define u_strcasecmp U_ICU_ENTRY_POINT_RENAME(u_strcasecmp) +#define u_strcat U_ICU_ENTRY_POINT_RENAME(u_strcat) +#define u_strchr U_ICU_ENTRY_POINT_RENAME(u_strchr) +#define u_strchr32 U_ICU_ENTRY_POINT_RENAME(u_strchr32) +#define u_strcmp U_ICU_ENTRY_POINT_RENAME(u_strcmp) +#define u_strcmpCodePointOrder U_ICU_ENTRY_POINT_RENAME(u_strcmpCodePointOrder) +#define u_strcmpFold U_ICU_ENTRY_POINT_RENAME(u_strcmpFold) +#define u_strcpy U_ICU_ENTRY_POINT_RENAME(u_strcpy) +#define u_strcspn U_ICU_ENTRY_POINT_RENAME(u_strcspn) +#define u_strlen U_ICU_ENTRY_POINT_RENAME(u_strlen) +#define u_strncasecmp U_ICU_ENTRY_POINT_RENAME(u_strncasecmp) +#define u_strncat U_ICU_ENTRY_POINT_RENAME(u_strncat) +#define u_strncmp U_ICU_ENTRY_POINT_RENAME(u_strncmp) +#define u_strncmpCodePointOrder U_ICU_ENTRY_POINT_RENAME(u_strncmpCodePointOrder) +#define u_strncpy U_ICU_ENTRY_POINT_RENAME(u_strncpy) +#define u_strpbrk U_ICU_ENTRY_POINT_RENAME(u_strpbrk) +#define u_strrchr U_ICU_ENTRY_POINT_RENAME(u_strrchr) +#define u_strrchr32 U_ICU_ENTRY_POINT_RENAME(u_strrchr32) +#define u_strrstr U_ICU_ENTRY_POINT_RENAME(u_strrstr) +#define u_strspn U_ICU_ENTRY_POINT_RENAME(u_strspn) +#define u_strstr U_ICU_ENTRY_POINT_RENAME(u_strstr) +#define u_strtok_r U_ICU_ENTRY_POINT_RENAME(u_strtok_r) +#define u_terminateChars U_ICU_ENTRY_POINT_RENAME(u_terminateChars) +#define u_terminateUChar32s U_ICU_ENTRY_POINT_RENAME(u_terminateUChar32s) +#define u_terminateUChars U_ICU_ENTRY_POINT_RENAME(u_terminateUChars) +#define u_terminateWChars U_ICU_ENTRY_POINT_RENAME(u_terminateWChars) +#define u_tolower U_ICU_ENTRY_POINT_RENAME(u_tolower) +#define u_totitle U_ICU_ENTRY_POINT_RENAME(u_totitle) +#define u_toupper U_ICU_ENTRY_POINT_RENAME(u_toupper) +#define u_uastrcpy U_ICU_ENTRY_POINT_RENAME(u_uastrcpy) +#define u_uastrncpy U_ICU_ENTRY_POINT_RENAME(u_uastrncpy) +#define u_unescape U_ICU_ENTRY_POINT_RENAME(u_unescape) +#define u_unescapeAt U_ICU_ENTRY_POINT_RENAME(u_unescapeAt) +#define u_versionFromString U_ICU_ENTRY_POINT_RENAME(u_versionFromString) +#define u_versionFromUString U_ICU_ENTRY_POINT_RENAME(u_versionFromUString) +#define u_versionToString U_ICU_ENTRY_POINT_RENAME(u_versionToString) +#define u_vformatMessage U_ICU_ENTRY_POINT_RENAME(u_vformatMessage) +#define u_vformatMessageWithError U_ICU_ENTRY_POINT_RENAME(u_vformatMessageWithError) +#define u_vfprintf U_ICU_ENTRY_POINT_RENAME(u_vfprintf) +#define u_vfprintf_u U_ICU_ENTRY_POINT_RENAME(u_vfprintf_u) +#define u_vfscanf U_ICU_ENTRY_POINT_RENAME(u_vfscanf) +#define u_vfscanf_u U_ICU_ENTRY_POINT_RENAME(u_vfscanf_u) +#define u_vparseMessage U_ICU_ENTRY_POINT_RENAME(u_vparseMessage) +#define u_vparseMessageWithError U_ICU_ENTRY_POINT_RENAME(u_vparseMessageWithError) +#define u_vsnprintf U_ICU_ENTRY_POINT_RENAME(u_vsnprintf) +#define u_vsnprintf_u U_ICU_ENTRY_POINT_RENAME(u_vsnprintf_u) +#define u_vsprintf U_ICU_ENTRY_POINT_RENAME(u_vsprintf) +#define u_vsprintf_u U_ICU_ENTRY_POINT_RENAME(u_vsprintf_u) +#define u_vsscanf U_ICU_ENTRY_POINT_RENAME(u_vsscanf) +#define u_vsscanf_u U_ICU_ENTRY_POINT_RENAME(u_vsscanf_u) +#define u_writeDiff U_ICU_ENTRY_POINT_RENAME(u_writeDiff) +#define u_writeIdenticalLevelRun U_ICU_ENTRY_POINT_RENAME(u_writeIdenticalLevelRun) +#define u_writeIdenticalLevelRunTwoChars U_ICU_ENTRY_POINT_RENAME(u_writeIdenticalLevelRunTwoChars) +#define ubidi_addPropertyStarts U_ICU_ENTRY_POINT_RENAME(ubidi_addPropertyStarts) +#define ubidi_close U_ICU_ENTRY_POINT_RENAME(ubidi_close) +#define ubidi_countParagraphs U_ICU_ENTRY_POINT_RENAME(ubidi_countParagraphs) +#define ubidi_countRuns U_ICU_ENTRY_POINT_RENAME(ubidi_countRuns) +#define ubidi_getBaseDirection U_ICU_ENTRY_POINT_RENAME(ubidi_getBaseDirection) +#define ubidi_getClass U_ICU_ENTRY_POINT_RENAME(ubidi_getClass) +#define ubidi_getClassCallback U_ICU_ENTRY_POINT_RENAME(ubidi_getClassCallback) +#define ubidi_getCustomizedClass U_ICU_ENTRY_POINT_RENAME(ubidi_getCustomizedClass) +#define ubidi_getDirection U_ICU_ENTRY_POINT_RENAME(ubidi_getDirection) +#define ubidi_getJoiningGroup U_ICU_ENTRY_POINT_RENAME(ubidi_getJoiningGroup) +#define ubidi_getJoiningType U_ICU_ENTRY_POINT_RENAME(ubidi_getJoiningType) +#define ubidi_getLength U_ICU_ENTRY_POINT_RENAME(ubidi_getLength) +#define ubidi_getLevelAt U_ICU_ENTRY_POINT_RENAME(ubidi_getLevelAt) +#define ubidi_getLevels U_ICU_ENTRY_POINT_RENAME(ubidi_getLevels) +#define ubidi_getLogicalIndex U_ICU_ENTRY_POINT_RENAME(ubidi_getLogicalIndex) +#define ubidi_getLogicalMap U_ICU_ENTRY_POINT_RENAME(ubidi_getLogicalMap) +#define ubidi_getLogicalRun U_ICU_ENTRY_POINT_RENAME(ubidi_getLogicalRun) +#define ubidi_getMaxValue U_ICU_ENTRY_POINT_RENAME(ubidi_getMaxValue) +#define ubidi_getMemory U_ICU_ENTRY_POINT_RENAME(ubidi_getMemory) +#define ubidi_getMirror U_ICU_ENTRY_POINT_RENAME(ubidi_getMirror) +#define ubidi_getPairedBracket U_ICU_ENTRY_POINT_RENAME(ubidi_getPairedBracket) +#define ubidi_getPairedBracketType U_ICU_ENTRY_POINT_RENAME(ubidi_getPairedBracketType) +#define ubidi_getParaLevel U_ICU_ENTRY_POINT_RENAME(ubidi_getParaLevel) +#define ubidi_getParaLevelAtIndex U_ICU_ENTRY_POINT_RENAME(ubidi_getParaLevelAtIndex) +#define ubidi_getParagraph U_ICU_ENTRY_POINT_RENAME(ubidi_getParagraph) +#define ubidi_getParagraphByIndex U_ICU_ENTRY_POINT_RENAME(ubidi_getParagraphByIndex) +#define ubidi_getProcessedLength U_ICU_ENTRY_POINT_RENAME(ubidi_getProcessedLength) +#define ubidi_getReorderingMode U_ICU_ENTRY_POINT_RENAME(ubidi_getReorderingMode) +#define ubidi_getReorderingOptions U_ICU_ENTRY_POINT_RENAME(ubidi_getReorderingOptions) +#define ubidi_getResultLength U_ICU_ENTRY_POINT_RENAME(ubidi_getResultLength) +#define ubidi_getRuns U_ICU_ENTRY_POINT_RENAME(ubidi_getRuns) +#define ubidi_getSingleton U_ICU_ENTRY_POINT_RENAME(ubidi_getSingleton) +#define ubidi_getText U_ICU_ENTRY_POINT_RENAME(ubidi_getText) +#define ubidi_getVisualIndex U_ICU_ENTRY_POINT_RENAME(ubidi_getVisualIndex) +#define ubidi_getVisualMap U_ICU_ENTRY_POINT_RENAME(ubidi_getVisualMap) +#define ubidi_getVisualRun U_ICU_ENTRY_POINT_RENAME(ubidi_getVisualRun) +#define ubidi_invertMap U_ICU_ENTRY_POINT_RENAME(ubidi_invertMap) +#define ubidi_isBidiControl U_ICU_ENTRY_POINT_RENAME(ubidi_isBidiControl) +#define ubidi_isInverse U_ICU_ENTRY_POINT_RENAME(ubidi_isInverse) +#define ubidi_isJoinControl U_ICU_ENTRY_POINT_RENAME(ubidi_isJoinControl) +#define ubidi_isMirrored U_ICU_ENTRY_POINT_RENAME(ubidi_isMirrored) +#define ubidi_isOrderParagraphsLTR U_ICU_ENTRY_POINT_RENAME(ubidi_isOrderParagraphsLTR) +#define ubidi_open U_ICU_ENTRY_POINT_RENAME(ubidi_open) +#define ubidi_openSized U_ICU_ENTRY_POINT_RENAME(ubidi_openSized) +#define ubidi_orderParagraphsLTR U_ICU_ENTRY_POINT_RENAME(ubidi_orderParagraphsLTR) +#define ubidi_reorderLogical U_ICU_ENTRY_POINT_RENAME(ubidi_reorderLogical) +#define ubidi_reorderVisual U_ICU_ENTRY_POINT_RENAME(ubidi_reorderVisual) +#define ubidi_setClassCallback U_ICU_ENTRY_POINT_RENAME(ubidi_setClassCallback) +#define ubidi_setContext U_ICU_ENTRY_POINT_RENAME(ubidi_setContext) +#define ubidi_setInverse U_ICU_ENTRY_POINT_RENAME(ubidi_setInverse) +#define ubidi_setLine U_ICU_ENTRY_POINT_RENAME(ubidi_setLine) +#define ubidi_setPara U_ICU_ENTRY_POINT_RENAME(ubidi_setPara) +#define ubidi_setReorderingMode U_ICU_ENTRY_POINT_RENAME(ubidi_setReorderingMode) +#define ubidi_setReorderingOptions U_ICU_ENTRY_POINT_RENAME(ubidi_setReorderingOptions) +#define ubidi_writeReordered U_ICU_ENTRY_POINT_RENAME(ubidi_writeReordered) +#define ubidi_writeReverse U_ICU_ENTRY_POINT_RENAME(ubidi_writeReverse) +#define ublock_getCode U_ICU_ENTRY_POINT_RENAME(ublock_getCode) +#define ubrk_close U_ICU_ENTRY_POINT_RENAME(ubrk_close) +#define ubrk_countAvailable U_ICU_ENTRY_POINT_RENAME(ubrk_countAvailable) +#define ubrk_current U_ICU_ENTRY_POINT_RENAME(ubrk_current) +#define ubrk_first U_ICU_ENTRY_POINT_RENAME(ubrk_first) +#define ubrk_following U_ICU_ENTRY_POINT_RENAME(ubrk_following) +#define ubrk_getAvailable U_ICU_ENTRY_POINT_RENAME(ubrk_getAvailable) +#define ubrk_getLocaleByType U_ICU_ENTRY_POINT_RENAME(ubrk_getLocaleByType) +#define ubrk_getRuleStatus U_ICU_ENTRY_POINT_RENAME(ubrk_getRuleStatus) +#define ubrk_getRuleStatusVec U_ICU_ENTRY_POINT_RENAME(ubrk_getRuleStatusVec) +#define ubrk_isBoundary U_ICU_ENTRY_POINT_RENAME(ubrk_isBoundary) +#define ubrk_last U_ICU_ENTRY_POINT_RENAME(ubrk_last) +#define ubrk_next U_ICU_ENTRY_POINT_RENAME(ubrk_next) +#define ubrk_open U_ICU_ENTRY_POINT_RENAME(ubrk_open) +#define ubrk_openRules U_ICU_ENTRY_POINT_RENAME(ubrk_openRules) +#define ubrk_preceding U_ICU_ENTRY_POINT_RENAME(ubrk_preceding) +#define ubrk_previous U_ICU_ENTRY_POINT_RENAME(ubrk_previous) +#define ubrk_refreshUText U_ICU_ENTRY_POINT_RENAME(ubrk_refreshUText) +#define ubrk_safeClone U_ICU_ENTRY_POINT_RENAME(ubrk_safeClone) +#define ubrk_setText U_ICU_ENTRY_POINT_RENAME(ubrk_setText) +#define ubrk_setUText U_ICU_ENTRY_POINT_RENAME(ubrk_setUText) +#define ubrk_swap U_ICU_ENTRY_POINT_RENAME(ubrk_swap) +#define ucal_add U_ICU_ENTRY_POINT_RENAME(ucal_add) +#define ucal_clear U_ICU_ENTRY_POINT_RENAME(ucal_clear) +#define ucal_clearField U_ICU_ENTRY_POINT_RENAME(ucal_clearField) +#define ucal_clone U_ICU_ENTRY_POINT_RENAME(ucal_clone) +#define ucal_close U_ICU_ENTRY_POINT_RENAME(ucal_close) +#define ucal_countAvailable U_ICU_ENTRY_POINT_RENAME(ucal_countAvailable) +#define ucal_equivalentTo U_ICU_ENTRY_POINT_RENAME(ucal_equivalentTo) +#define ucal_get U_ICU_ENTRY_POINT_RENAME(ucal_get) +#define ucal_getAttribute U_ICU_ENTRY_POINT_RENAME(ucal_getAttribute) +#define ucal_getAvailable U_ICU_ENTRY_POINT_RENAME(ucal_getAvailable) +#define ucal_getCanonicalTimeZoneID U_ICU_ENTRY_POINT_RENAME(ucal_getCanonicalTimeZoneID) +#define ucal_getDSTSavings U_ICU_ENTRY_POINT_RENAME(ucal_getDSTSavings) +#define ucal_getDayOfWeekType U_ICU_ENTRY_POINT_RENAME(ucal_getDayOfWeekType) +#define ucal_getDefaultTimeZone U_ICU_ENTRY_POINT_RENAME(ucal_getDefaultTimeZone) +#define ucal_getFieldDifference U_ICU_ENTRY_POINT_RENAME(ucal_getFieldDifference) +#define ucal_getGregorianChange U_ICU_ENTRY_POINT_RENAME(ucal_getGregorianChange) +#define ucal_getKeywordValuesForLocale U_ICU_ENTRY_POINT_RENAME(ucal_getKeywordValuesForLocale) +#define ucal_getLimit U_ICU_ENTRY_POINT_RENAME(ucal_getLimit) +#define ucal_getLocaleByType U_ICU_ENTRY_POINT_RENAME(ucal_getLocaleByType) +#define ucal_getMillis U_ICU_ENTRY_POINT_RENAME(ucal_getMillis) +#define ucal_getNow U_ICU_ENTRY_POINT_RENAME(ucal_getNow) +#define ucal_getTZDataVersion U_ICU_ENTRY_POINT_RENAME(ucal_getTZDataVersion) +#define ucal_getTimeZoneDisplayName U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneDisplayName) +#define ucal_getTimeZoneID U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneID) +#define ucal_getTimeZoneIDForWindowsID U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneIDForWindowsID) +#define ucal_getTimeZoneTransitionDate U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneTransitionDate) +#define ucal_getType U_ICU_ENTRY_POINT_RENAME(ucal_getType) +#define ucal_getWeekendTransition U_ICU_ENTRY_POINT_RENAME(ucal_getWeekendTransition) +#define ucal_getWindowsTimeZoneID U_ICU_ENTRY_POINT_RENAME(ucal_getWindowsTimeZoneID) +#define ucal_inDaylightTime U_ICU_ENTRY_POINT_RENAME(ucal_inDaylightTime) +#define ucal_isSet U_ICU_ENTRY_POINT_RENAME(ucal_isSet) +#define ucal_isWeekend U_ICU_ENTRY_POINT_RENAME(ucal_isWeekend) +#define ucal_open U_ICU_ENTRY_POINT_RENAME(ucal_open) +#define ucal_openCountryTimeZones U_ICU_ENTRY_POINT_RENAME(ucal_openCountryTimeZones) +#define ucal_openTimeZoneIDEnumeration U_ICU_ENTRY_POINT_RENAME(ucal_openTimeZoneIDEnumeration) +#define ucal_openTimeZones U_ICU_ENTRY_POINT_RENAME(ucal_openTimeZones) +#define ucal_roll U_ICU_ENTRY_POINT_RENAME(ucal_roll) +#define ucal_set U_ICU_ENTRY_POINT_RENAME(ucal_set) +#define ucal_setAttribute U_ICU_ENTRY_POINT_RENAME(ucal_setAttribute) +#define ucal_setDate U_ICU_ENTRY_POINT_RENAME(ucal_setDate) +#define ucal_setDateTime U_ICU_ENTRY_POINT_RENAME(ucal_setDateTime) +#define ucal_setDefaultTimeZone U_ICU_ENTRY_POINT_RENAME(ucal_setDefaultTimeZone) +#define ucal_setGregorianChange U_ICU_ENTRY_POINT_RENAME(ucal_setGregorianChange) +#define ucal_setMillis U_ICU_ENTRY_POINT_RENAME(ucal_setMillis) +#define ucal_setTimeZone U_ICU_ENTRY_POINT_RENAME(ucal_setTimeZone) +#define ucase_addCaseClosure U_ICU_ENTRY_POINT_RENAME(ucase_addCaseClosure) +#define ucase_addPropertyStarts U_ICU_ENTRY_POINT_RENAME(ucase_addPropertyStarts) +#define ucase_addStringCaseClosure U_ICU_ENTRY_POINT_RENAME(ucase_addStringCaseClosure) +#define ucase_fold U_ICU_ENTRY_POINT_RENAME(ucase_fold) +#define ucase_getCaseLocale U_ICU_ENTRY_POINT_RENAME(ucase_getCaseLocale) +#define ucase_getSingleton U_ICU_ENTRY_POINT_RENAME(ucase_getSingleton) +#define ucase_getType U_ICU_ENTRY_POINT_RENAME(ucase_getType) +#define ucase_getTypeOrIgnorable U_ICU_ENTRY_POINT_RENAME(ucase_getTypeOrIgnorable) +#define ucase_hasBinaryProperty U_ICU_ENTRY_POINT_RENAME(ucase_hasBinaryProperty) +#define ucase_isCaseSensitive U_ICU_ENTRY_POINT_RENAME(ucase_isCaseSensitive) +#define ucase_isSoftDotted U_ICU_ENTRY_POINT_RENAME(ucase_isSoftDotted) +#define ucase_toFullFolding U_ICU_ENTRY_POINT_RENAME(ucase_toFullFolding) +#define ucase_toFullLower U_ICU_ENTRY_POINT_RENAME(ucase_toFullLower) +#define ucase_toFullTitle U_ICU_ENTRY_POINT_RENAME(ucase_toFullTitle) +#define ucase_toFullUpper U_ICU_ENTRY_POINT_RENAME(ucase_toFullUpper) +#define ucase_tolower U_ICU_ENTRY_POINT_RENAME(ucase_tolower) +#define ucase_totitle U_ICU_ENTRY_POINT_RENAME(ucase_totitle) +#define ucase_toupper U_ICU_ENTRY_POINT_RENAME(ucase_toupper) +#define ucasemap_close U_ICU_ENTRY_POINT_RENAME(ucasemap_close) +#define ucasemap_getBreakIterator U_ICU_ENTRY_POINT_RENAME(ucasemap_getBreakIterator) +#define ucasemap_getLocale U_ICU_ENTRY_POINT_RENAME(ucasemap_getLocale) +#define ucasemap_getOptions U_ICU_ENTRY_POINT_RENAME(ucasemap_getOptions) +#define ucasemap_internalUTF8ToTitle U_ICU_ENTRY_POINT_RENAME(ucasemap_internalUTF8ToTitle) +#define ucasemap_mapUTF8 U_ICU_ENTRY_POINT_RENAME(ucasemap_mapUTF8) +#define ucasemap_open U_ICU_ENTRY_POINT_RENAME(ucasemap_open) +#define ucasemap_setBreakIterator U_ICU_ENTRY_POINT_RENAME(ucasemap_setBreakIterator) +#define ucasemap_setLocale U_ICU_ENTRY_POINT_RENAME(ucasemap_setLocale) +#define ucasemap_setOptions U_ICU_ENTRY_POINT_RENAME(ucasemap_setOptions) +#define ucasemap_toTitle U_ICU_ENTRY_POINT_RENAME(ucasemap_toTitle) +#define ucasemap_utf8FoldCase U_ICU_ENTRY_POINT_RENAME(ucasemap_utf8FoldCase) +#define ucasemap_utf8ToLower U_ICU_ENTRY_POINT_RENAME(ucasemap_utf8ToLower) +#define ucasemap_utf8ToTitle U_ICU_ENTRY_POINT_RENAME(ucasemap_utf8ToTitle) +#define ucasemap_utf8ToUpper U_ICU_ENTRY_POINT_RENAME(ucasemap_utf8ToUpper) +#define uchar_addPropertyStarts U_ICU_ENTRY_POINT_RENAME(uchar_addPropertyStarts) +#define uchar_swapNames U_ICU_ENTRY_POINT_RENAME(uchar_swapNames) +#define ucln_cleanupOne U_ICU_ENTRY_POINT_RENAME(ucln_cleanupOne) +#define ucln_common_registerCleanup U_ICU_ENTRY_POINT_RENAME(ucln_common_registerCleanup) +#define ucln_i18n_registerCleanup U_ICU_ENTRY_POINT_RENAME(ucln_i18n_registerCleanup) +#define ucln_io_registerCleanup U_ICU_ENTRY_POINT_RENAME(ucln_io_registerCleanup) +#define ucln_lib_cleanup U_ICU_ENTRY_POINT_RENAME(ucln_lib_cleanup) +#define ucln_registerCleanup U_ICU_ENTRY_POINT_RENAME(ucln_registerCleanup) +#define ucnv_MBCSFromUChar32 U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSFromUChar32) +#define ucnv_MBCSFromUnicodeWithOffsets U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSFromUnicodeWithOffsets) +#define ucnv_MBCSGetFilteredUnicodeSetForUnicode U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSGetFilteredUnicodeSetForUnicode) +#define ucnv_MBCSGetType U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSGetType) +#define ucnv_MBCSGetUnicodeSetForUnicode U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSGetUnicodeSetForUnicode) +#define ucnv_MBCSIsLeadByte U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSIsLeadByte) +#define ucnv_MBCSSimpleGetNextUChar U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSSimpleGetNextUChar) +#define ucnv_MBCSToUnicodeWithOffsets U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSToUnicodeWithOffsets) +#define ucnv_bld_countAvailableConverters U_ICU_ENTRY_POINT_RENAME(ucnv_bld_countAvailableConverters) +#define ucnv_bld_getAvailableConverter U_ICU_ENTRY_POINT_RENAME(ucnv_bld_getAvailableConverter) +#define ucnv_canCreateConverter U_ICU_ENTRY_POINT_RENAME(ucnv_canCreateConverter) +#define ucnv_cbFromUWriteBytes U_ICU_ENTRY_POINT_RENAME(ucnv_cbFromUWriteBytes) +#define ucnv_cbFromUWriteSub U_ICU_ENTRY_POINT_RENAME(ucnv_cbFromUWriteSub) +#define ucnv_cbFromUWriteUChars U_ICU_ENTRY_POINT_RENAME(ucnv_cbFromUWriteUChars) +#define ucnv_cbToUWriteSub U_ICU_ENTRY_POINT_RENAME(ucnv_cbToUWriteSub) +#define ucnv_cbToUWriteUChars U_ICU_ENTRY_POINT_RENAME(ucnv_cbToUWriteUChars) +#define ucnv_close U_ICU_ENTRY_POINT_RENAME(ucnv_close) +#define ucnv_compareNames U_ICU_ENTRY_POINT_RENAME(ucnv_compareNames) +#define ucnv_convert U_ICU_ENTRY_POINT_RENAME(ucnv_convert) +#define ucnv_convertEx U_ICU_ENTRY_POINT_RENAME(ucnv_convertEx) +#define ucnv_countAliases U_ICU_ENTRY_POINT_RENAME(ucnv_countAliases) +#define ucnv_countAvailable U_ICU_ENTRY_POINT_RENAME(ucnv_countAvailable) +#define ucnv_countStandards U_ICU_ENTRY_POINT_RENAME(ucnv_countStandards) +#define ucnv_createAlgorithmicConverter U_ICU_ENTRY_POINT_RENAME(ucnv_createAlgorithmicConverter) +#define ucnv_createConverter U_ICU_ENTRY_POINT_RENAME(ucnv_createConverter) +#define ucnv_createConverterFromPackage U_ICU_ENTRY_POINT_RENAME(ucnv_createConverterFromPackage) +#define ucnv_createConverterFromSharedData U_ICU_ENTRY_POINT_RENAME(ucnv_createConverterFromSharedData) +#define ucnv_detectUnicodeSignature U_ICU_ENTRY_POINT_RENAME(ucnv_detectUnicodeSignature) +#define ucnv_extContinueMatchFromU U_ICU_ENTRY_POINT_RENAME(ucnv_extContinueMatchFromU) +#define ucnv_extContinueMatchToU U_ICU_ENTRY_POINT_RENAME(ucnv_extContinueMatchToU) +#define ucnv_extGetUnicodeSet U_ICU_ENTRY_POINT_RENAME(ucnv_extGetUnicodeSet) +#define ucnv_extInitialMatchFromU U_ICU_ENTRY_POINT_RENAME(ucnv_extInitialMatchFromU) +#define ucnv_extInitialMatchToU U_ICU_ENTRY_POINT_RENAME(ucnv_extInitialMatchToU) +#define ucnv_extSimpleMatchFromU U_ICU_ENTRY_POINT_RENAME(ucnv_extSimpleMatchFromU) +#define ucnv_extSimpleMatchToU U_ICU_ENTRY_POINT_RENAME(ucnv_extSimpleMatchToU) +#define ucnv_fixFileSeparator U_ICU_ENTRY_POINT_RENAME(ucnv_fixFileSeparator) +#define ucnv_flushCache U_ICU_ENTRY_POINT_RENAME(ucnv_flushCache) +#define ucnv_fromAlgorithmic U_ICU_ENTRY_POINT_RENAME(ucnv_fromAlgorithmic) +#define ucnv_fromUChars U_ICU_ENTRY_POINT_RENAME(ucnv_fromUChars) +#define ucnv_fromUCountPending U_ICU_ENTRY_POINT_RENAME(ucnv_fromUCountPending) +#define ucnv_fromUWriteBytes U_ICU_ENTRY_POINT_RENAME(ucnv_fromUWriteBytes) +#define ucnv_fromUnicode U_ICU_ENTRY_POINT_RENAME(ucnv_fromUnicode) +#define ucnv_fromUnicode_UTF8 U_ICU_ENTRY_POINT_RENAME(ucnv_fromUnicode_UTF8) +#define ucnv_fromUnicode_UTF8_OFFSETS_LOGIC U_ICU_ENTRY_POINT_RENAME(ucnv_fromUnicode_UTF8_OFFSETS_LOGIC) +#define ucnv_getAlias U_ICU_ENTRY_POINT_RENAME(ucnv_getAlias) +#define ucnv_getAliases U_ICU_ENTRY_POINT_RENAME(ucnv_getAliases) +#define ucnv_getAvailableName U_ICU_ENTRY_POINT_RENAME(ucnv_getAvailableName) +#define ucnv_getCCSID U_ICU_ENTRY_POINT_RENAME(ucnv_getCCSID) +#define ucnv_getCanonicalName U_ICU_ENTRY_POINT_RENAME(ucnv_getCanonicalName) +#define ucnv_getCompleteUnicodeSet U_ICU_ENTRY_POINT_RENAME(ucnv_getCompleteUnicodeSet) +#define ucnv_getDefaultName U_ICU_ENTRY_POINT_RENAME(ucnv_getDefaultName) +#define ucnv_getDisplayName U_ICU_ENTRY_POINT_RENAME(ucnv_getDisplayName) +#define ucnv_getFromUCallBack U_ICU_ENTRY_POINT_RENAME(ucnv_getFromUCallBack) +#define ucnv_getInvalidChars U_ICU_ENTRY_POINT_RENAME(ucnv_getInvalidChars) +#define ucnv_getInvalidUChars U_ICU_ENTRY_POINT_RENAME(ucnv_getInvalidUChars) +#define ucnv_getMaxCharSize U_ICU_ENTRY_POINT_RENAME(ucnv_getMaxCharSize) +#define ucnv_getMinCharSize U_ICU_ENTRY_POINT_RENAME(ucnv_getMinCharSize) +#define ucnv_getName U_ICU_ENTRY_POINT_RENAME(ucnv_getName) +#define ucnv_getNextUChar U_ICU_ENTRY_POINT_RENAME(ucnv_getNextUChar) +#define ucnv_getNonSurrogateUnicodeSet U_ICU_ENTRY_POINT_RENAME(ucnv_getNonSurrogateUnicodeSet) +#define ucnv_getPlatform U_ICU_ENTRY_POINT_RENAME(ucnv_getPlatform) +#define ucnv_getStandard U_ICU_ENTRY_POINT_RENAME(ucnv_getStandard) +#define ucnv_getStandardName U_ICU_ENTRY_POINT_RENAME(ucnv_getStandardName) +#define ucnv_getStarters U_ICU_ENTRY_POINT_RENAME(ucnv_getStarters) +#define ucnv_getSubstChars U_ICU_ENTRY_POINT_RENAME(ucnv_getSubstChars) +#define ucnv_getToUCallBack U_ICU_ENTRY_POINT_RENAME(ucnv_getToUCallBack) +#define ucnv_getType U_ICU_ENTRY_POINT_RENAME(ucnv_getType) +#define ucnv_getUnicodeSet U_ICU_ENTRY_POINT_RENAME(ucnv_getUnicodeSet) +#define ucnv_incrementRefCount U_ICU_ENTRY_POINT_RENAME(ucnv_incrementRefCount) +#define ucnv_io_countKnownConverters U_ICU_ENTRY_POINT_RENAME(ucnv_io_countKnownConverters) +#define ucnv_io_getConverterName U_ICU_ENTRY_POINT_RENAME(ucnv_io_getConverterName) +#define ucnv_io_stripASCIIForCompare U_ICU_ENTRY_POINT_RENAME(ucnv_io_stripASCIIForCompare) +#define ucnv_io_stripEBCDICForCompare U_ICU_ENTRY_POINT_RENAME(ucnv_io_stripEBCDICForCompare) +#define ucnv_isAmbiguous U_ICU_ENTRY_POINT_RENAME(ucnv_isAmbiguous) +#define ucnv_isFixedWidth U_ICU_ENTRY_POINT_RENAME(ucnv_isFixedWidth) +#define ucnv_load U_ICU_ENTRY_POINT_RENAME(ucnv_load) +#define ucnv_loadSharedData U_ICU_ENTRY_POINT_RENAME(ucnv_loadSharedData) +#define ucnv_open U_ICU_ENTRY_POINT_RENAME(ucnv_open) +#define ucnv_openAllNames U_ICU_ENTRY_POINT_RENAME(ucnv_openAllNames) +#define ucnv_openCCSID U_ICU_ENTRY_POINT_RENAME(ucnv_openCCSID) +#define ucnv_openPackage U_ICU_ENTRY_POINT_RENAME(ucnv_openPackage) +#define ucnv_openStandardNames U_ICU_ENTRY_POINT_RENAME(ucnv_openStandardNames) +#define ucnv_openU U_ICU_ENTRY_POINT_RENAME(ucnv_openU) +#define ucnv_reset U_ICU_ENTRY_POINT_RENAME(ucnv_reset) +#define ucnv_resetFromUnicode U_ICU_ENTRY_POINT_RENAME(ucnv_resetFromUnicode) +#define ucnv_resetToUnicode U_ICU_ENTRY_POINT_RENAME(ucnv_resetToUnicode) +#define ucnv_safeClone U_ICU_ENTRY_POINT_RENAME(ucnv_safeClone) +#define ucnv_setDefaultName U_ICU_ENTRY_POINT_RENAME(ucnv_setDefaultName) +#define ucnv_setFallback U_ICU_ENTRY_POINT_RENAME(ucnv_setFallback) +#define ucnv_setFromUCallBack U_ICU_ENTRY_POINT_RENAME(ucnv_setFromUCallBack) +#define ucnv_setSubstChars U_ICU_ENTRY_POINT_RENAME(ucnv_setSubstChars) +#define ucnv_setSubstString U_ICU_ENTRY_POINT_RENAME(ucnv_setSubstString) +#define ucnv_setToUCallBack U_ICU_ENTRY_POINT_RENAME(ucnv_setToUCallBack) +#define ucnv_swap U_ICU_ENTRY_POINT_RENAME(ucnv_swap) +#define ucnv_swapAliases U_ICU_ENTRY_POINT_RENAME(ucnv_swapAliases) +#define ucnv_toAlgorithmic U_ICU_ENTRY_POINT_RENAME(ucnv_toAlgorithmic) +#define ucnv_toUChars U_ICU_ENTRY_POINT_RENAME(ucnv_toUChars) +#define ucnv_toUCountPending U_ICU_ENTRY_POINT_RENAME(ucnv_toUCountPending) +#define ucnv_toUWriteCodePoint U_ICU_ENTRY_POINT_RENAME(ucnv_toUWriteCodePoint) +#define ucnv_toUWriteUChars U_ICU_ENTRY_POINT_RENAME(ucnv_toUWriteUChars) +#define ucnv_toUnicode U_ICU_ENTRY_POINT_RENAME(ucnv_toUnicode) +#define ucnv_unload U_ICU_ENTRY_POINT_RENAME(ucnv_unload) +#define ucnv_unloadSharedDataIfReady U_ICU_ENTRY_POINT_RENAME(ucnv_unloadSharedDataIfReady) +#define ucnv_usesFallback U_ICU_ENTRY_POINT_RENAME(ucnv_usesFallback) +#define ucnvsel_close U_ICU_ENTRY_POINT_RENAME(ucnvsel_close) +#define ucnvsel_open U_ICU_ENTRY_POINT_RENAME(ucnvsel_open) +#define ucnvsel_openFromSerialized U_ICU_ENTRY_POINT_RENAME(ucnvsel_openFromSerialized) +#define ucnvsel_selectForString U_ICU_ENTRY_POINT_RENAME(ucnvsel_selectForString) +#define ucnvsel_selectForUTF8 U_ICU_ENTRY_POINT_RENAME(ucnvsel_selectForUTF8) +#define ucnvsel_serialize U_ICU_ENTRY_POINT_RENAME(ucnvsel_serialize) +#define ucol_allocWeights U_ICU_ENTRY_POINT_RENAME(ucol_allocWeights) +#define ucol_assembleTailoringTable U_ICU_ENTRY_POINT_RENAME(ucol_assembleTailoringTable) +#define ucol_buildPermutationTable U_ICU_ENTRY_POINT_RENAME(ucol_buildPermutationTable) +#define ucol_calcSortKey U_ICU_ENTRY_POINT_RENAME(ucol_calcSortKey) +#define ucol_calcSortKeySimpleTertiary U_ICU_ENTRY_POINT_RENAME(ucol_calcSortKeySimpleTertiary) +#define ucol_cloneBinary U_ICU_ENTRY_POINT_RENAME(ucol_cloneBinary) +#define ucol_close U_ICU_ENTRY_POINT_RENAME(ucol_close) +#define ucol_closeElements U_ICU_ENTRY_POINT_RENAME(ucol_closeElements) +#define ucol_countAvailable U_ICU_ENTRY_POINT_RENAME(ucol_countAvailable) +#define ucol_createElements U_ICU_ENTRY_POINT_RENAME(ucol_createElements) +#define ucol_doCE U_ICU_ENTRY_POINT_RENAME(ucol_doCE) +#define ucol_equal U_ICU_ENTRY_POINT_RENAME(ucol_equal) +#define ucol_equals U_ICU_ENTRY_POINT_RENAME(ucol_equals) +#define ucol_findReorderingEntry U_ICU_ENTRY_POINT_RENAME(ucol_findReorderingEntry) +#define ucol_forceHanImplicit U_ICU_ENTRY_POINT_RENAME(ucol_forceHanImplicit) +#define ucol_forgetUCA U_ICU_ENTRY_POINT_RENAME(ucol_forgetUCA) +#define ucol_freeOffsetBuffer U_ICU_ENTRY_POINT_RENAME(ucol_freeOffsetBuffer) +#define ucol_getAttribute U_ICU_ENTRY_POINT_RENAME(ucol_getAttribute) +#define ucol_getAttributeOrDefault U_ICU_ENTRY_POINT_RENAME(ucol_getAttributeOrDefault) +#define ucol_getAvailable U_ICU_ENTRY_POINT_RENAME(ucol_getAvailable) +#define ucol_getBound U_ICU_ENTRY_POINT_RENAME(ucol_getBound) +#define ucol_getCEStrengthDifference U_ICU_ENTRY_POINT_RENAME(ucol_getCEStrengthDifference) +#define ucol_getCollationKey U_ICU_ENTRY_POINT_RENAME(ucol_getCollationKey) +#define ucol_getContractions U_ICU_ENTRY_POINT_RENAME(ucol_getContractions) +#define ucol_getContractionsAndExpansions U_ICU_ENTRY_POINT_RENAME(ucol_getContractionsAndExpansions) +#define ucol_getDisplayName U_ICU_ENTRY_POINT_RENAME(ucol_getDisplayName) +#define ucol_getEquivalentReorderCodes U_ICU_ENTRY_POINT_RENAME(ucol_getEquivalentReorderCodes) +#define ucol_getFirstCE U_ICU_ENTRY_POINT_RENAME(ucol_getFirstCE) +#define ucol_getFunctionalEquivalent U_ICU_ENTRY_POINT_RENAME(ucol_getFunctionalEquivalent) +#define ucol_getKeywordValues U_ICU_ENTRY_POINT_RENAME(ucol_getKeywordValues) +#define ucol_getKeywordValuesForLocale U_ICU_ENTRY_POINT_RENAME(ucol_getKeywordValuesForLocale) +#define ucol_getKeywords U_ICU_ENTRY_POINT_RENAME(ucol_getKeywords) +#define ucol_getLeadBytesForReorderCode U_ICU_ENTRY_POINT_RENAME(ucol_getLeadBytesForReorderCode) +#define ucol_getLocale U_ICU_ENTRY_POINT_RENAME(ucol_getLocale) +#define ucol_getLocaleByType U_ICU_ENTRY_POINT_RENAME(ucol_getLocaleByType) +#define ucol_getMaxExpansion U_ICU_ENTRY_POINT_RENAME(ucol_getMaxExpansion) +#define ucol_getNextCE U_ICU_ENTRY_POINT_RENAME(ucol_getNextCE) +#define ucol_getOffset U_ICU_ENTRY_POINT_RENAME(ucol_getOffset) +#define ucol_getPrevCE U_ICU_ENTRY_POINT_RENAME(ucol_getPrevCE) +#define ucol_getReorderCodes U_ICU_ENTRY_POINT_RENAME(ucol_getReorderCodes) +#define ucol_getReorderCodesForLeadByte U_ICU_ENTRY_POINT_RENAME(ucol_getReorderCodesForLeadByte) +#define ucol_getRules U_ICU_ENTRY_POINT_RENAME(ucol_getRules) +#define ucol_getRulesEx U_ICU_ENTRY_POINT_RENAME(ucol_getRulesEx) +#define ucol_getShortDefinitionString U_ICU_ENTRY_POINT_RENAME(ucol_getShortDefinitionString) +#define ucol_getSortKey U_ICU_ENTRY_POINT_RENAME(ucol_getSortKey) +#define ucol_getStrength U_ICU_ENTRY_POINT_RENAME(ucol_getStrength) +#define ucol_getTailoredSet U_ICU_ENTRY_POINT_RENAME(ucol_getTailoredSet) +#define ucol_getUCAVersion U_ICU_ENTRY_POINT_RENAME(ucol_getUCAVersion) +#define ucol_getUnsafeSet U_ICU_ENTRY_POINT_RENAME(ucol_getUnsafeSet) +#define ucol_getVariableTop U_ICU_ENTRY_POINT_RENAME(ucol_getVariableTop) +#define ucol_getVersion U_ICU_ENTRY_POINT_RENAME(ucol_getVersion) +#define ucol_greater U_ICU_ENTRY_POINT_RENAME(ucol_greater) +#define ucol_greaterOrEqual U_ICU_ENTRY_POINT_RENAME(ucol_greaterOrEqual) +#define ucol_initBuffers U_ICU_ENTRY_POINT_RENAME(ucol_initBuffers) +#define ucol_initCollator U_ICU_ENTRY_POINT_RENAME(ucol_initCollator) +#define ucol_initInverseUCA U_ICU_ENTRY_POINT_RENAME(ucol_initInverseUCA) +#define ucol_initUCA U_ICU_ENTRY_POINT_RENAME(ucol_initUCA) +#define ucol_inv_getNextCE U_ICU_ENTRY_POINT_RENAME(ucol_inv_getNextCE) +#define ucol_inv_getPrevCE U_ICU_ENTRY_POINT_RENAME(ucol_inv_getPrevCE) +#define ucol_isTailored U_ICU_ENTRY_POINT_RENAME(ucol_isTailored) +#define ucol_keyHashCode U_ICU_ENTRY_POINT_RENAME(ucol_keyHashCode) +#define ucol_looksLikeCollationBinary U_ICU_ENTRY_POINT_RENAME(ucol_looksLikeCollationBinary) +#define ucol_mergeSortkeys U_ICU_ENTRY_POINT_RENAME(ucol_mergeSortkeys) +#define ucol_next U_ICU_ENTRY_POINT_RENAME(ucol_next) +#define ucol_nextProcessed U_ICU_ENTRY_POINT_RENAME(ucol_nextProcessed) +#define ucol_nextSortKeyPart U_ICU_ENTRY_POINT_RENAME(ucol_nextSortKeyPart) +#define ucol_nextWeight U_ICU_ENTRY_POINT_RENAME(ucol_nextWeight) +#define ucol_normalizeShortDefinitionString U_ICU_ENTRY_POINT_RENAME(ucol_normalizeShortDefinitionString) +#define ucol_open U_ICU_ENTRY_POINT_RENAME(ucol_open) +#define ucol_openAvailableLocales U_ICU_ENTRY_POINT_RENAME(ucol_openAvailableLocales) +#define ucol_openBinary U_ICU_ENTRY_POINT_RENAME(ucol_openBinary) +#define ucol_openElements U_ICU_ENTRY_POINT_RENAME(ucol_openElements) +#define ucol_openFromShortString U_ICU_ENTRY_POINT_RENAME(ucol_openFromShortString) +#define ucol_openRules U_ICU_ENTRY_POINT_RENAME(ucol_openRules) +#define ucol_openRulesForImport U_ICU_ENTRY_POINT_RENAME(ucol_openRulesForImport) +#define ucol_open_internal U_ICU_ENTRY_POINT_RENAME(ucol_open_internal) +#define ucol_prepareShortStringOpen U_ICU_ENTRY_POINT_RENAME(ucol_prepareShortStringOpen) +#define ucol_previous U_ICU_ENTRY_POINT_RENAME(ucol_previous) +#define ucol_previousProcessed U_ICU_ENTRY_POINT_RENAME(ucol_previousProcessed) +#define ucol_primaryOrder U_ICU_ENTRY_POINT_RENAME(ucol_primaryOrder) +#define ucol_prv_getSpecialCE U_ICU_ENTRY_POINT_RENAME(ucol_prv_getSpecialCE) +#define ucol_prv_getSpecialPrevCE U_ICU_ENTRY_POINT_RENAME(ucol_prv_getSpecialPrevCE) +#define ucol_reset U_ICU_ENTRY_POINT_RENAME(ucol_reset) +#define ucol_restoreVariableTop U_ICU_ENTRY_POINT_RENAME(ucol_restoreVariableTop) +#define ucol_safeClone U_ICU_ENTRY_POINT_RENAME(ucol_safeClone) +#define ucol_secondaryOrder U_ICU_ENTRY_POINT_RENAME(ucol_secondaryOrder) +#define ucol_setAttribute U_ICU_ENTRY_POINT_RENAME(ucol_setAttribute) +#define ucol_setOffset U_ICU_ENTRY_POINT_RENAME(ucol_setOffset) +#define ucol_setOptionsFromHeader U_ICU_ENTRY_POINT_RENAME(ucol_setOptionsFromHeader) +#define ucol_setReorderCodes U_ICU_ENTRY_POINT_RENAME(ucol_setReorderCodes) +#define ucol_setReqValidLocales U_ICU_ENTRY_POINT_RENAME(ucol_setReqValidLocales) +#define ucol_setStrength U_ICU_ENTRY_POINT_RENAME(ucol_setStrength) +#define ucol_setText U_ICU_ENTRY_POINT_RENAME(ucol_setText) +#define ucol_setVariableTop U_ICU_ENTRY_POINT_RENAME(ucol_setVariableTop) +#define ucol_strcoll U_ICU_ENTRY_POINT_RENAME(ucol_strcoll) +#define ucol_strcollIter U_ICU_ENTRY_POINT_RENAME(ucol_strcollIter) +#define ucol_strcollUTF8 U_ICU_ENTRY_POINT_RENAME(ucol_strcollUTF8) +#define ucol_swap U_ICU_ENTRY_POINT_RENAME(ucol_swap) +#define ucol_swapBinary U_ICU_ENTRY_POINT_RENAME(ucol_swapBinary) +#define ucol_swapInverseUCA U_ICU_ENTRY_POINT_RENAME(ucol_swapInverseUCA) +#define ucol_tertiaryOrder U_ICU_ENTRY_POINT_RENAME(ucol_tertiaryOrder) +#define ucol_tok_assembleTokenList U_ICU_ENTRY_POINT_RENAME(ucol_tok_assembleTokenList) +#define ucol_tok_closeTokenList U_ICU_ENTRY_POINT_RENAME(ucol_tok_closeTokenList) +#define ucol_tok_getNextArgument U_ICU_ENTRY_POINT_RENAME(ucol_tok_getNextArgument) +#define ucol_tok_getRulesFromBundle U_ICU_ENTRY_POINT_RENAME(ucol_tok_getRulesFromBundle) +#define ucol_tok_initTokenList U_ICU_ENTRY_POINT_RENAME(ucol_tok_initTokenList) +#define ucol_tok_parseNextToken U_ICU_ENTRY_POINT_RENAME(ucol_tok_parseNextToken) +#define ucol_updateInternalState U_ICU_ENTRY_POINT_RENAME(ucol_updateInternalState) +#define ucsdet_close U_ICU_ENTRY_POINT_RENAME(ucsdet_close) +#define ucsdet_detect U_ICU_ENTRY_POINT_RENAME(ucsdet_detect) +#define ucsdet_detectAll U_ICU_ENTRY_POINT_RENAME(ucsdet_detectAll) +#define ucsdet_enableInputFilter U_ICU_ENTRY_POINT_RENAME(ucsdet_enableInputFilter) +#define ucsdet_getAllDetectableCharsets U_ICU_ENTRY_POINT_RENAME(ucsdet_getAllDetectableCharsets) +#define ucsdet_getConfidence U_ICU_ENTRY_POINT_RENAME(ucsdet_getConfidence) +#define ucsdet_getDetectableCharsets U_ICU_ENTRY_POINT_RENAME(ucsdet_getDetectableCharsets) +#define ucsdet_getLanguage U_ICU_ENTRY_POINT_RENAME(ucsdet_getLanguage) +#define ucsdet_getName U_ICU_ENTRY_POINT_RENAME(ucsdet_getName) +#define ucsdet_getUChars U_ICU_ENTRY_POINT_RENAME(ucsdet_getUChars) +#define ucsdet_isInputFilterEnabled U_ICU_ENTRY_POINT_RENAME(ucsdet_isInputFilterEnabled) +#define ucsdet_open U_ICU_ENTRY_POINT_RENAME(ucsdet_open) +#define ucsdet_setDeclaredEncoding U_ICU_ENTRY_POINT_RENAME(ucsdet_setDeclaredEncoding) +#define ucsdet_setDetectableCharset U_ICU_ENTRY_POINT_RENAME(ucsdet_setDetectableCharset) +#define ucsdet_setText U_ICU_ENTRY_POINT_RENAME(ucsdet_setText) +#define ucurr_countCurrencies U_ICU_ENTRY_POINT_RENAME(ucurr_countCurrencies) +#define ucurr_forLocale U_ICU_ENTRY_POINT_RENAME(ucurr_forLocale) +#define ucurr_forLocaleAndDate U_ICU_ENTRY_POINT_RENAME(ucurr_forLocaleAndDate) +#define ucurr_getDefaultFractionDigits U_ICU_ENTRY_POINT_RENAME(ucurr_getDefaultFractionDigits) +#define ucurr_getKeywordValuesForLocale U_ICU_ENTRY_POINT_RENAME(ucurr_getKeywordValuesForLocale) +#define ucurr_getName U_ICU_ENTRY_POINT_RENAME(ucurr_getName) +#define ucurr_getNumericCode U_ICU_ENTRY_POINT_RENAME(ucurr_getNumericCode) +#define ucurr_getPluralName U_ICU_ENTRY_POINT_RENAME(ucurr_getPluralName) +#define ucurr_getRoundingIncrement U_ICU_ENTRY_POINT_RENAME(ucurr_getRoundingIncrement) +#define ucurr_isAvailable U_ICU_ENTRY_POINT_RENAME(ucurr_isAvailable) +#define ucurr_openISOCurrencies U_ICU_ENTRY_POINT_RENAME(ucurr_openISOCurrencies) +#define ucurr_register U_ICU_ENTRY_POINT_RENAME(ucurr_register) +#define ucurr_unregister U_ICU_ENTRY_POINT_RENAME(ucurr_unregister) +#define udat_applyPattern U_ICU_ENTRY_POINT_RENAME(udat_applyPattern) +#define udat_applyPatternRelative U_ICU_ENTRY_POINT_RENAME(udat_applyPatternRelative) +#define udat_clone U_ICU_ENTRY_POINT_RENAME(udat_clone) +#define udat_close U_ICU_ENTRY_POINT_RENAME(udat_close) +#define udat_countAvailable U_ICU_ENTRY_POINT_RENAME(udat_countAvailable) +#define udat_countSymbols U_ICU_ENTRY_POINT_RENAME(udat_countSymbols) +#define udat_format U_ICU_ENTRY_POINT_RENAME(udat_format) +#define udat_get2DigitYearStart U_ICU_ENTRY_POINT_RENAME(udat_get2DigitYearStart) +#define udat_getAvailable U_ICU_ENTRY_POINT_RENAME(udat_getAvailable) +#define udat_getCalendar U_ICU_ENTRY_POINT_RENAME(udat_getCalendar) +#define udat_getContext U_ICU_ENTRY_POINT_RENAME(udat_getContext) +#define udat_getLocaleByType U_ICU_ENTRY_POINT_RENAME(udat_getLocaleByType) +#define udat_getNumberFormat U_ICU_ENTRY_POINT_RENAME(udat_getNumberFormat) +#define udat_getSymbols U_ICU_ENTRY_POINT_RENAME(udat_getSymbols) +#define udat_isLenient U_ICU_ENTRY_POINT_RENAME(udat_isLenient) +#define udat_open U_ICU_ENTRY_POINT_RENAME(udat_open) +#define udat_parse U_ICU_ENTRY_POINT_RENAME(udat_parse) +#define udat_parseCalendar U_ICU_ENTRY_POINT_RENAME(udat_parseCalendar) +#define udat_registerOpener U_ICU_ENTRY_POINT_RENAME(udat_registerOpener) +#define udat_set2DigitYearStart U_ICU_ENTRY_POINT_RENAME(udat_set2DigitYearStart) +#define udat_setCalendar U_ICU_ENTRY_POINT_RENAME(udat_setCalendar) +#define udat_setContext U_ICU_ENTRY_POINT_RENAME(udat_setContext) +#define udat_setLenient U_ICU_ENTRY_POINT_RENAME(udat_setLenient) +#define udat_setNumberFormat U_ICU_ENTRY_POINT_RENAME(udat_setNumberFormat) +#define udat_setSymbols U_ICU_ENTRY_POINT_RENAME(udat_setSymbols) +#define udat_toCalendarDateField U_ICU_ENTRY_POINT_RENAME(udat_toCalendarDateField) +#define udat_toPattern U_ICU_ENTRY_POINT_RENAME(udat_toPattern) +#define udat_toPatternRelativeDate U_ICU_ENTRY_POINT_RENAME(udat_toPatternRelativeDate) +#define udat_toPatternRelativeTime U_ICU_ENTRY_POINT_RENAME(udat_toPatternRelativeTime) +#define udat_unregisterOpener U_ICU_ENTRY_POINT_RENAME(udat_unregisterOpener) +#define udata_checkCommonData U_ICU_ENTRY_POINT_RENAME(udata_checkCommonData) +#define udata_close U_ICU_ENTRY_POINT_RENAME(udata_close) +#define udata_closeSwapper U_ICU_ENTRY_POINT_RENAME(udata_closeSwapper) +#define udata_getHeaderSize U_ICU_ENTRY_POINT_RENAME(udata_getHeaderSize) +#define udata_getInfo U_ICU_ENTRY_POINT_RENAME(udata_getInfo) +#define udata_getInfoSize U_ICU_ENTRY_POINT_RENAME(udata_getInfoSize) +#define udata_getLength U_ICU_ENTRY_POINT_RENAME(udata_getLength) +#define udata_getMemory U_ICU_ENTRY_POINT_RENAME(udata_getMemory) +#define udata_getRawMemory U_ICU_ENTRY_POINT_RENAME(udata_getRawMemory) +#define udata_open U_ICU_ENTRY_POINT_RENAME(udata_open) +#define udata_openChoice U_ICU_ENTRY_POINT_RENAME(udata_openChoice) +#define udata_openSwapper U_ICU_ENTRY_POINT_RENAME(udata_openSwapper) +#define udata_openSwapperForInputData U_ICU_ENTRY_POINT_RENAME(udata_openSwapperForInputData) +#define udata_printError U_ICU_ENTRY_POINT_RENAME(udata_printError) +#define udata_readInt16 U_ICU_ENTRY_POINT_RENAME(udata_readInt16) +#define udata_readInt32 U_ICU_ENTRY_POINT_RENAME(udata_readInt32) +#define udata_setAppData U_ICU_ENTRY_POINT_RENAME(udata_setAppData) +#define udata_setCommonData U_ICU_ENTRY_POINT_RENAME(udata_setCommonData) +#define udata_setFileAccess U_ICU_ENTRY_POINT_RENAME(udata_setFileAccess) +#define udata_swapDataHeader U_ICU_ENTRY_POINT_RENAME(udata_swapDataHeader) +#define udata_swapInvStringBlock U_ICU_ENTRY_POINT_RENAME(udata_swapInvStringBlock) +#define udatpg_addPattern U_ICU_ENTRY_POINT_RENAME(udatpg_addPattern) +#define udatpg_clone U_ICU_ENTRY_POINT_RENAME(udatpg_clone) +#define udatpg_close U_ICU_ENTRY_POINT_RENAME(udatpg_close) +#define udatpg_getAppendItemFormat U_ICU_ENTRY_POINT_RENAME(udatpg_getAppendItemFormat) +#define udatpg_getAppendItemName U_ICU_ENTRY_POINT_RENAME(udatpg_getAppendItemName) +#define udatpg_getBaseSkeleton U_ICU_ENTRY_POINT_RENAME(udatpg_getBaseSkeleton) +#define udatpg_getBestPattern U_ICU_ENTRY_POINT_RENAME(udatpg_getBestPattern) +#define udatpg_getBestPatternWithOptions U_ICU_ENTRY_POINT_RENAME(udatpg_getBestPatternWithOptions) +#define udatpg_getDateTimeFormat U_ICU_ENTRY_POINT_RENAME(udatpg_getDateTimeFormat) +#define udatpg_getDecimal U_ICU_ENTRY_POINT_RENAME(udatpg_getDecimal) +#define udatpg_getPatternForSkeleton U_ICU_ENTRY_POINT_RENAME(udatpg_getPatternForSkeleton) +#define udatpg_getSkeleton U_ICU_ENTRY_POINT_RENAME(udatpg_getSkeleton) +#define udatpg_open U_ICU_ENTRY_POINT_RENAME(udatpg_open) +#define udatpg_openBaseSkeletons U_ICU_ENTRY_POINT_RENAME(udatpg_openBaseSkeletons) +#define udatpg_openEmpty U_ICU_ENTRY_POINT_RENAME(udatpg_openEmpty) +#define udatpg_openSkeletons U_ICU_ENTRY_POINT_RENAME(udatpg_openSkeletons) +#define udatpg_replaceFieldTypes U_ICU_ENTRY_POINT_RENAME(udatpg_replaceFieldTypes) +#define udatpg_replaceFieldTypesWithOptions U_ICU_ENTRY_POINT_RENAME(udatpg_replaceFieldTypesWithOptions) +#define udatpg_setAppendItemFormat U_ICU_ENTRY_POINT_RENAME(udatpg_setAppendItemFormat) +#define udatpg_setAppendItemName U_ICU_ENTRY_POINT_RENAME(udatpg_setAppendItemName) +#define udatpg_setDateTimeFormat U_ICU_ENTRY_POINT_RENAME(udatpg_setDateTimeFormat) +#define udatpg_setDecimal U_ICU_ENTRY_POINT_RENAME(udatpg_setDecimal) +#define udict_swap U_ICU_ENTRY_POINT_RENAME(udict_swap) +#define udtitvfmt_close U_ICU_ENTRY_POINT_RENAME(udtitvfmt_close) +#define udtitvfmt_format U_ICU_ENTRY_POINT_RENAME(udtitvfmt_format) +#define udtitvfmt_open U_ICU_ENTRY_POINT_RENAME(udtitvfmt_open) +#define uenum_close U_ICU_ENTRY_POINT_RENAME(uenum_close) +#define uenum_count U_ICU_ENTRY_POINT_RENAME(uenum_count) +#define uenum_next U_ICU_ENTRY_POINT_RENAME(uenum_next) +#define uenum_nextDefault U_ICU_ENTRY_POINT_RENAME(uenum_nextDefault) +#define uenum_openCharStringsEnumeration U_ICU_ENTRY_POINT_RENAME(uenum_openCharStringsEnumeration) +#define uenum_openFromStringEnumeration U_ICU_ENTRY_POINT_RENAME(uenum_openFromStringEnumeration) +#define uenum_openUCharStringsEnumeration U_ICU_ENTRY_POINT_RENAME(uenum_openUCharStringsEnumeration) +#define uenum_reset U_ICU_ENTRY_POINT_RENAME(uenum_reset) +#define uenum_unext U_ICU_ENTRY_POINT_RENAME(uenum_unext) +#define uenum_unextDefault U_ICU_ENTRY_POINT_RENAME(uenum_unextDefault) +#define ufile_close_translit U_ICU_ENTRY_POINT_RENAME(ufile_close_translit) +#define ufile_fill_uchar_buffer U_ICU_ENTRY_POINT_RENAME(ufile_fill_uchar_buffer) +#define ufile_flush_io U_ICU_ENTRY_POINT_RENAME(ufile_flush_io) +#define ufile_flush_translit U_ICU_ENTRY_POINT_RENAME(ufile_flush_translit) +#define ufile_getch U_ICU_ENTRY_POINT_RENAME(ufile_getch) +#define ufile_getch32 U_ICU_ENTRY_POINT_RENAME(ufile_getch32) +#define ufmt_64tou U_ICU_ENTRY_POINT_RENAME(ufmt_64tou) +#define ufmt_close U_ICU_ENTRY_POINT_RENAME(ufmt_close) +#define ufmt_defaultCPToUnicode U_ICU_ENTRY_POINT_RENAME(ufmt_defaultCPToUnicode) +#define ufmt_digitvalue U_ICU_ENTRY_POINT_RENAME(ufmt_digitvalue) +#define ufmt_getArrayItemByIndex U_ICU_ENTRY_POINT_RENAME(ufmt_getArrayItemByIndex) +#define ufmt_getArrayLength U_ICU_ENTRY_POINT_RENAME(ufmt_getArrayLength) +#define ufmt_getDate U_ICU_ENTRY_POINT_RENAME(ufmt_getDate) +#define ufmt_getDecNumChars U_ICU_ENTRY_POINT_RENAME(ufmt_getDecNumChars) +#define ufmt_getDouble U_ICU_ENTRY_POINT_RENAME(ufmt_getDouble) +#define ufmt_getInt64 U_ICU_ENTRY_POINT_RENAME(ufmt_getInt64) +#define ufmt_getLong U_ICU_ENTRY_POINT_RENAME(ufmt_getLong) +#define ufmt_getObject U_ICU_ENTRY_POINT_RENAME(ufmt_getObject) +#define ufmt_getType U_ICU_ENTRY_POINT_RENAME(ufmt_getType) +#define ufmt_getUChars U_ICU_ENTRY_POINT_RENAME(ufmt_getUChars) +#define ufmt_isNumeric U_ICU_ENTRY_POINT_RENAME(ufmt_isNumeric) +#define ufmt_isdigit U_ICU_ENTRY_POINT_RENAME(ufmt_isdigit) +#define ufmt_open U_ICU_ENTRY_POINT_RENAME(ufmt_open) +#define ufmt_ptou U_ICU_ENTRY_POINT_RENAME(ufmt_ptou) +#define ufmt_uto64 U_ICU_ENTRY_POINT_RENAME(ufmt_uto64) +#define ufmt_utop U_ICU_ENTRY_POINT_RENAME(ufmt_utop) +#define ugender_getInstance U_ICU_ENTRY_POINT_RENAME(ugender_getInstance) +#define ugender_getListGender U_ICU_ENTRY_POINT_RENAME(ugender_getListGender) +#define uhash_close U_ICU_ENTRY_POINT_RENAME(uhash_close) +#define uhash_compareCaselessUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_compareCaselessUnicodeString) +#define uhash_compareChars U_ICU_ENTRY_POINT_RENAME(uhash_compareChars) +#define uhash_compareIChars U_ICU_ENTRY_POINT_RENAME(uhash_compareIChars) +#define uhash_compareLong U_ICU_ENTRY_POINT_RENAME(uhash_compareLong) +#define uhash_compareScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_compareScriptSet) +#define uhash_compareUChars U_ICU_ENTRY_POINT_RENAME(uhash_compareUChars) +#define uhash_compareUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_compareUnicodeString) +#define uhash_count U_ICU_ENTRY_POINT_RENAME(uhash_count) +#define uhash_deleteHashtable U_ICU_ENTRY_POINT_RENAME(uhash_deleteHashtable) +#define uhash_deleteScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_deleteScriptSet) +#define uhash_equals U_ICU_ENTRY_POINT_RENAME(uhash_equals) +#define uhash_equalsScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_equalsScriptSet) +#define uhash_find U_ICU_ENTRY_POINT_RENAME(uhash_find) +#define uhash_get U_ICU_ENTRY_POINT_RENAME(uhash_get) +#define uhash_geti U_ICU_ENTRY_POINT_RENAME(uhash_geti) +#define uhash_hashCaselessUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_hashCaselessUnicodeString) +#define uhash_hashChars U_ICU_ENTRY_POINT_RENAME(uhash_hashChars) +#define uhash_hashIChars U_ICU_ENTRY_POINT_RENAME(uhash_hashIChars) +#define uhash_hashLong U_ICU_ENTRY_POINT_RENAME(uhash_hashLong) +#define uhash_hashScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_hashScriptSet) +#define uhash_hashUChars U_ICU_ENTRY_POINT_RENAME(uhash_hashUChars) +#define uhash_hashUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_hashUnicodeString) +#define uhash_iget U_ICU_ENTRY_POINT_RENAME(uhash_iget) +#define uhash_igeti U_ICU_ENTRY_POINT_RENAME(uhash_igeti) +#define uhash_init U_ICU_ENTRY_POINT_RENAME(uhash_init) +#define uhash_iput U_ICU_ENTRY_POINT_RENAME(uhash_iput) +#define uhash_iputi U_ICU_ENTRY_POINT_RENAME(uhash_iputi) +#define uhash_iremove U_ICU_ENTRY_POINT_RENAME(uhash_iremove) +#define uhash_iremovei U_ICU_ENTRY_POINT_RENAME(uhash_iremovei) +#define uhash_nextElement U_ICU_ENTRY_POINT_RENAME(uhash_nextElement) +#define uhash_open U_ICU_ENTRY_POINT_RENAME(uhash_open) +#define uhash_openSize U_ICU_ENTRY_POINT_RENAME(uhash_openSize) +#define uhash_put U_ICU_ENTRY_POINT_RENAME(uhash_put) +#define uhash_puti U_ICU_ENTRY_POINT_RENAME(uhash_puti) +#define uhash_remove U_ICU_ENTRY_POINT_RENAME(uhash_remove) +#define uhash_removeAll U_ICU_ENTRY_POINT_RENAME(uhash_removeAll) +#define uhash_removeElement U_ICU_ENTRY_POINT_RENAME(uhash_removeElement) +#define uhash_removei U_ICU_ENTRY_POINT_RENAME(uhash_removei) +#define uhash_setKeyComparator U_ICU_ENTRY_POINT_RENAME(uhash_setKeyComparator) +#define uhash_setKeyDeleter U_ICU_ENTRY_POINT_RENAME(uhash_setKeyDeleter) +#define uhash_setKeyHasher U_ICU_ENTRY_POINT_RENAME(uhash_setKeyHasher) +#define uhash_setResizePolicy U_ICU_ENTRY_POINT_RENAME(uhash_setResizePolicy) +#define uhash_setValueComparator U_ICU_ENTRY_POINT_RENAME(uhash_setValueComparator) +#define uhash_setValueDeleter U_ICU_ENTRY_POINT_RENAME(uhash_setValueDeleter) +#define uidna_IDNToASCII U_ICU_ENTRY_POINT_RENAME(uidna_IDNToASCII) +#define uidna_IDNToUnicode U_ICU_ENTRY_POINT_RENAME(uidna_IDNToUnicode) +#define uidna_close U_ICU_ENTRY_POINT_RENAME(uidna_close) +#define uidna_compare U_ICU_ENTRY_POINT_RENAME(uidna_compare) +#define uidna_labelToASCII U_ICU_ENTRY_POINT_RENAME(uidna_labelToASCII) +#define uidna_labelToASCII_UTF8 U_ICU_ENTRY_POINT_RENAME(uidna_labelToASCII_UTF8) +#define uidna_labelToUnicode U_ICU_ENTRY_POINT_RENAME(uidna_labelToUnicode) +#define uidna_labelToUnicodeUTF8 U_ICU_ENTRY_POINT_RENAME(uidna_labelToUnicodeUTF8) +#define uidna_nameToASCII U_ICU_ENTRY_POINT_RENAME(uidna_nameToASCII) +#define uidna_nameToASCII_UTF8 U_ICU_ENTRY_POINT_RENAME(uidna_nameToASCII_UTF8) +#define uidna_nameToUnicode U_ICU_ENTRY_POINT_RENAME(uidna_nameToUnicode) +#define uidna_nameToUnicodeUTF8 U_ICU_ENTRY_POINT_RENAME(uidna_nameToUnicodeUTF8) +#define uidna_openUTS46 U_ICU_ENTRY_POINT_RENAME(uidna_openUTS46) +#define uidna_toASCII U_ICU_ENTRY_POINT_RENAME(uidna_toASCII) +#define uidna_toUnicode U_ICU_ENTRY_POINT_RENAME(uidna_toUnicode) +#define uiter_current32 U_ICU_ENTRY_POINT_RENAME(uiter_current32) +#define uiter_getState U_ICU_ENTRY_POINT_RENAME(uiter_getState) +#define uiter_next32 U_ICU_ENTRY_POINT_RENAME(uiter_next32) +#define uiter_previous32 U_ICU_ENTRY_POINT_RENAME(uiter_previous32) +#define uiter_setCharacterIterator U_ICU_ENTRY_POINT_RENAME(uiter_setCharacterIterator) +#define uiter_setReplaceable U_ICU_ENTRY_POINT_RENAME(uiter_setReplaceable) +#define uiter_setState U_ICU_ENTRY_POINT_RENAME(uiter_setState) +#define uiter_setString U_ICU_ENTRY_POINT_RENAME(uiter_setString) +#define uiter_setUTF16BE U_ICU_ENTRY_POINT_RENAME(uiter_setUTF16BE) +#define uiter_setUTF8 U_ICU_ENTRY_POINT_RENAME(uiter_setUTF8) +#define uldn_close U_ICU_ENTRY_POINT_RENAME(uldn_close) +#define uldn_getContext U_ICU_ENTRY_POINT_RENAME(uldn_getContext) +#define uldn_getDialectHandling U_ICU_ENTRY_POINT_RENAME(uldn_getDialectHandling) +#define uldn_getLocale U_ICU_ENTRY_POINT_RENAME(uldn_getLocale) +#define uldn_keyDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_keyDisplayName) +#define uldn_keyValueDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_keyValueDisplayName) +#define uldn_languageDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_languageDisplayName) +#define uldn_localeDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_localeDisplayName) +#define uldn_open U_ICU_ENTRY_POINT_RENAME(uldn_open) +#define uldn_openForContext U_ICU_ENTRY_POINT_RENAME(uldn_openForContext) +#define uldn_regionDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_regionDisplayName) +#define uldn_scriptCodeDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_scriptCodeDisplayName) +#define uldn_scriptDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_scriptDisplayName) +#define uldn_variantDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_variantDisplayName) +#define ulist_addItemBeginList U_ICU_ENTRY_POINT_RENAME(ulist_addItemBeginList) +#define ulist_addItemEndList U_ICU_ENTRY_POINT_RENAME(ulist_addItemEndList) +#define ulist_close_keyword_values_iterator U_ICU_ENTRY_POINT_RENAME(ulist_close_keyword_values_iterator) +#define ulist_containsString U_ICU_ENTRY_POINT_RENAME(ulist_containsString) +#define ulist_count_keyword_values U_ICU_ENTRY_POINT_RENAME(ulist_count_keyword_values) +#define ulist_createEmptyList U_ICU_ENTRY_POINT_RENAME(ulist_createEmptyList) +#define ulist_deleteList U_ICU_ENTRY_POINT_RENAME(ulist_deleteList) +#define ulist_getListFromEnum U_ICU_ENTRY_POINT_RENAME(ulist_getListFromEnum) +#define ulist_getListSize U_ICU_ENTRY_POINT_RENAME(ulist_getListSize) +#define ulist_getNext U_ICU_ENTRY_POINT_RENAME(ulist_getNext) +#define ulist_next_keyword_value U_ICU_ENTRY_POINT_RENAME(ulist_next_keyword_value) +#define ulist_resetList U_ICU_ENTRY_POINT_RENAME(ulist_resetList) +#define ulist_reset_keyword_values_iterator U_ICU_ENTRY_POINT_RENAME(ulist_reset_keyword_values_iterator) +#define uloc_acceptLanguage U_ICU_ENTRY_POINT_RENAME(uloc_acceptLanguage) +#define uloc_acceptLanguageFromHTTP U_ICU_ENTRY_POINT_RENAME(uloc_acceptLanguageFromHTTP) +#define uloc_addLikelySubtags U_ICU_ENTRY_POINT_RENAME(uloc_addLikelySubtags) +#define uloc_canonicalize U_ICU_ENTRY_POINT_RENAME(uloc_canonicalize) +#define uloc_countAvailable U_ICU_ENTRY_POINT_RENAME(uloc_countAvailable) +#define uloc_forLanguageTag U_ICU_ENTRY_POINT_RENAME(uloc_forLanguageTag) +#define uloc_getAvailable U_ICU_ENTRY_POINT_RENAME(uloc_getAvailable) +#define uloc_getBaseName U_ICU_ENTRY_POINT_RENAME(uloc_getBaseName) +#define uloc_getCharacterOrientation U_ICU_ENTRY_POINT_RENAME(uloc_getCharacterOrientation) +#define uloc_getCountry U_ICU_ENTRY_POINT_RENAME(uloc_getCountry) +#define uloc_getCurrentCountryID U_ICU_ENTRY_POINT_RENAME(uloc_getCurrentCountryID) +#define uloc_getCurrentLanguageID U_ICU_ENTRY_POINT_RENAME(uloc_getCurrentLanguageID) +#define uloc_getDefault U_ICU_ENTRY_POINT_RENAME(uloc_getDefault) +#define uloc_getDisplayCountry U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayCountry) +#define uloc_getDisplayKeyword U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayKeyword) +#define uloc_getDisplayKeywordValue U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayKeywordValue) +#define uloc_getDisplayLanguage U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayLanguage) +#define uloc_getDisplayName U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayName) +#define uloc_getDisplayScript U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayScript) +#define uloc_getDisplayScriptInContext U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayScriptInContext) +#define uloc_getDisplayVariant U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayVariant) +#define uloc_getISO3Country U_ICU_ENTRY_POINT_RENAME(uloc_getISO3Country) +#define uloc_getISO3Language U_ICU_ENTRY_POINT_RENAME(uloc_getISO3Language) +#define uloc_getISOCountries U_ICU_ENTRY_POINT_RENAME(uloc_getISOCountries) +#define uloc_getISOLanguages U_ICU_ENTRY_POINT_RENAME(uloc_getISOLanguages) +#define uloc_getKeywordValue U_ICU_ENTRY_POINT_RENAME(uloc_getKeywordValue) +#define uloc_getLCID U_ICU_ENTRY_POINT_RENAME(uloc_getLCID) +#define uloc_getLanguage U_ICU_ENTRY_POINT_RENAME(uloc_getLanguage) +#define uloc_getLineOrientation U_ICU_ENTRY_POINT_RENAME(uloc_getLineOrientation) +#define uloc_getLocaleForLCID U_ICU_ENTRY_POINT_RENAME(uloc_getLocaleForLCID) +#define uloc_getName U_ICU_ENTRY_POINT_RENAME(uloc_getName) +#define uloc_getParent U_ICU_ENTRY_POINT_RENAME(uloc_getParent) +#define uloc_getScript U_ICU_ENTRY_POINT_RENAME(uloc_getScript) +#define uloc_getTableStringWithFallback U_ICU_ENTRY_POINT_RENAME(uloc_getTableStringWithFallback) +#define uloc_getVariant U_ICU_ENTRY_POINT_RENAME(uloc_getVariant) +#define uloc_minimizeSubtags U_ICU_ENTRY_POINT_RENAME(uloc_minimizeSubtags) +#define uloc_openKeywordList U_ICU_ENTRY_POINT_RENAME(uloc_openKeywordList) +#define uloc_openKeywords U_ICU_ENTRY_POINT_RENAME(uloc_openKeywords) +#define uloc_setDefault U_ICU_ENTRY_POINT_RENAME(uloc_setDefault) +#define uloc_setKeywordValue U_ICU_ENTRY_POINT_RENAME(uloc_setKeywordValue) +#define uloc_toLanguageTag U_ICU_ENTRY_POINT_RENAME(uloc_toLanguageTag) +#define ulocdata_close U_ICU_ENTRY_POINT_RENAME(ulocdata_close) +#define ulocdata_getCLDRVersion U_ICU_ENTRY_POINT_RENAME(ulocdata_getCLDRVersion) +#define ulocdata_getDelimiter U_ICU_ENTRY_POINT_RENAME(ulocdata_getDelimiter) +#define ulocdata_getExemplarSet U_ICU_ENTRY_POINT_RENAME(ulocdata_getExemplarSet) +#define ulocdata_getLocaleDisplayPattern U_ICU_ENTRY_POINT_RENAME(ulocdata_getLocaleDisplayPattern) +#define ulocdata_getLocaleSeparator U_ICU_ENTRY_POINT_RENAME(ulocdata_getLocaleSeparator) +#define ulocdata_getMeasurementSystem U_ICU_ENTRY_POINT_RENAME(ulocdata_getMeasurementSystem) +#define ulocdata_getNoSubstitute U_ICU_ENTRY_POINT_RENAME(ulocdata_getNoSubstitute) +#define ulocdata_getPaperSize U_ICU_ENTRY_POINT_RENAME(ulocdata_getPaperSize) +#define ulocdata_open U_ICU_ENTRY_POINT_RENAME(ulocdata_open) +#define ulocdata_setNoSubstitute U_ICU_ENTRY_POINT_RENAME(ulocdata_setNoSubstitute) +#define ulocimp_getCountry U_ICU_ENTRY_POINT_RENAME(ulocimp_getCountry) +#define ulocimp_getLanguage U_ICU_ENTRY_POINT_RENAME(ulocimp_getLanguage) +#define ulocimp_getScript U_ICU_ENTRY_POINT_RENAME(ulocimp_getScript) +#define umsg_applyPattern U_ICU_ENTRY_POINT_RENAME(umsg_applyPattern) +#define umsg_autoQuoteApostrophe U_ICU_ENTRY_POINT_RENAME(umsg_autoQuoteApostrophe) +#define umsg_clone U_ICU_ENTRY_POINT_RENAME(umsg_clone) +#define umsg_close U_ICU_ENTRY_POINT_RENAME(umsg_close) +#define umsg_format U_ICU_ENTRY_POINT_RENAME(umsg_format) +#define umsg_getLocale U_ICU_ENTRY_POINT_RENAME(umsg_getLocale) +#define umsg_open U_ICU_ENTRY_POINT_RENAME(umsg_open) +#define umsg_parse U_ICU_ENTRY_POINT_RENAME(umsg_parse) +#define umsg_setLocale U_ICU_ENTRY_POINT_RENAME(umsg_setLocale) +#define umsg_toPattern U_ICU_ENTRY_POINT_RENAME(umsg_toPattern) +#define umsg_vformat U_ICU_ENTRY_POINT_RENAME(umsg_vformat) +#define umsg_vparse U_ICU_ENTRY_POINT_RENAME(umsg_vparse) +#define umtx_lock U_ICU_ENTRY_POINT_RENAME(umtx_lock) +#define umtx_unlock U_ICU_ENTRY_POINT_RENAME(umtx_unlock) +#define uniset_getUnicode32Instance U_ICU_ENTRY_POINT_RENAME(uniset_getUnicode32Instance) +#define unorm2_append U_ICU_ENTRY_POINT_RENAME(unorm2_append) +#define unorm2_close U_ICU_ENTRY_POINT_RENAME(unorm2_close) +#define unorm2_composePair U_ICU_ENTRY_POINT_RENAME(unorm2_composePair) +#define unorm2_getCombiningClass U_ICU_ENTRY_POINT_RENAME(unorm2_getCombiningClass) +#define unorm2_getDecomposition U_ICU_ENTRY_POINT_RENAME(unorm2_getDecomposition) +#define unorm2_getInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getInstance) +#define unorm2_getNFCInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFCInstance) +#define unorm2_getNFDInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFDInstance) +#define unorm2_getNFKCCasefoldInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFKCCasefoldInstance) +#define unorm2_getNFKCInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFKCInstance) +#define unorm2_getNFKDInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFKDInstance) +#define unorm2_getRawDecomposition U_ICU_ENTRY_POINT_RENAME(unorm2_getRawDecomposition) +#define unorm2_hasBoundaryAfter U_ICU_ENTRY_POINT_RENAME(unorm2_hasBoundaryAfter) +#define unorm2_hasBoundaryBefore U_ICU_ENTRY_POINT_RENAME(unorm2_hasBoundaryBefore) +#define unorm2_isInert U_ICU_ENTRY_POINT_RENAME(unorm2_isInert) +#define unorm2_isNormalized U_ICU_ENTRY_POINT_RENAME(unorm2_isNormalized) +#define unorm2_normalize U_ICU_ENTRY_POINT_RENAME(unorm2_normalize) +#define unorm2_normalizeSecondAndAppend U_ICU_ENTRY_POINT_RENAME(unorm2_normalizeSecondAndAppend) +#define unorm2_openFiltered U_ICU_ENTRY_POINT_RENAME(unorm2_openFiltered) +#define unorm2_quickCheck U_ICU_ENTRY_POINT_RENAME(unorm2_quickCheck) +#define unorm2_spanQuickCheckYes U_ICU_ENTRY_POINT_RENAME(unorm2_spanQuickCheckYes) +#define unorm2_swap U_ICU_ENTRY_POINT_RENAME(unorm2_swap) +#define unorm_closeIter U_ICU_ENTRY_POINT_RENAME(unorm_closeIter) +#define unorm_compare U_ICU_ENTRY_POINT_RENAME(unorm_compare) +#define unorm_concatenate U_ICU_ENTRY_POINT_RENAME(unorm_concatenate) +#define unorm_getFCD16 U_ICU_ENTRY_POINT_RENAME(unorm_getFCD16) +#define unorm_getQuickCheck U_ICU_ENTRY_POINT_RENAME(unorm_getQuickCheck) +#define unorm_isNormalized U_ICU_ENTRY_POINT_RENAME(unorm_isNormalized) +#define unorm_isNormalizedWithOptions U_ICU_ENTRY_POINT_RENAME(unorm_isNormalizedWithOptions) +#define unorm_next U_ICU_ENTRY_POINT_RENAME(unorm_next) +#define unorm_normalize U_ICU_ENTRY_POINT_RENAME(unorm_normalize) +#define unorm_openIter U_ICU_ENTRY_POINT_RENAME(unorm_openIter) +#define unorm_previous U_ICU_ENTRY_POINT_RENAME(unorm_previous) +#define unorm_quickCheck U_ICU_ENTRY_POINT_RENAME(unorm_quickCheck) +#define unorm_quickCheckWithOptions U_ICU_ENTRY_POINT_RENAME(unorm_quickCheckWithOptions) +#define unorm_setIter U_ICU_ENTRY_POINT_RENAME(unorm_setIter) +#define unum_applyPattern U_ICU_ENTRY_POINT_RENAME(unum_applyPattern) +#define unum_clone U_ICU_ENTRY_POINT_RENAME(unum_clone) +#define unum_close U_ICU_ENTRY_POINT_RENAME(unum_close) +#define unum_countAvailable U_ICU_ENTRY_POINT_RENAME(unum_countAvailable) +#define unum_format U_ICU_ENTRY_POINT_RENAME(unum_format) +#define unum_formatDecimal U_ICU_ENTRY_POINT_RENAME(unum_formatDecimal) +#define unum_formatDouble U_ICU_ENTRY_POINT_RENAME(unum_formatDouble) +#define unum_formatDoubleCurrency U_ICU_ENTRY_POINT_RENAME(unum_formatDoubleCurrency) +#define unum_formatInt64 U_ICU_ENTRY_POINT_RENAME(unum_formatInt64) +#define unum_formatUFormattable U_ICU_ENTRY_POINT_RENAME(unum_formatUFormattable) +#define unum_getAttribute U_ICU_ENTRY_POINT_RENAME(unum_getAttribute) +#define unum_getAvailable U_ICU_ENTRY_POINT_RENAME(unum_getAvailable) +#define unum_getDoubleAttribute U_ICU_ENTRY_POINT_RENAME(unum_getDoubleAttribute) +#define unum_getLocaleByType U_ICU_ENTRY_POINT_RENAME(unum_getLocaleByType) +#define unum_getSymbol U_ICU_ENTRY_POINT_RENAME(unum_getSymbol) +#define unum_getTextAttribute U_ICU_ENTRY_POINT_RENAME(unum_getTextAttribute) +#define unum_open U_ICU_ENTRY_POINT_RENAME(unum_open) +#define unum_parse U_ICU_ENTRY_POINT_RENAME(unum_parse) +#define unum_parseDecimal U_ICU_ENTRY_POINT_RENAME(unum_parseDecimal) +#define unum_parseDouble U_ICU_ENTRY_POINT_RENAME(unum_parseDouble) +#define unum_parseDoubleCurrency U_ICU_ENTRY_POINT_RENAME(unum_parseDoubleCurrency) +#define unum_parseInt64 U_ICU_ENTRY_POINT_RENAME(unum_parseInt64) +#define unum_parseToUFormattable U_ICU_ENTRY_POINT_RENAME(unum_parseToUFormattable) +#define unum_setAttribute U_ICU_ENTRY_POINT_RENAME(unum_setAttribute) +#define unum_setDoubleAttribute U_ICU_ENTRY_POINT_RENAME(unum_setDoubleAttribute) +#define unum_setSymbol U_ICU_ENTRY_POINT_RENAME(unum_setSymbol) +#define unum_setTextAttribute U_ICU_ENTRY_POINT_RENAME(unum_setTextAttribute) +#define unum_toPattern U_ICU_ENTRY_POINT_RENAME(unum_toPattern) +#define unumsys_close U_ICU_ENTRY_POINT_RENAME(unumsys_close) +#define unumsys_getDescription U_ICU_ENTRY_POINT_RENAME(unumsys_getDescription) +#define unumsys_getName U_ICU_ENTRY_POINT_RENAME(unumsys_getName) +#define unumsys_getRadix U_ICU_ENTRY_POINT_RENAME(unumsys_getRadix) +#define unumsys_isAlgorithmic U_ICU_ENTRY_POINT_RENAME(unumsys_isAlgorithmic) +#define unumsys_open U_ICU_ENTRY_POINT_RENAME(unumsys_open) +#define unumsys_openAvailableNames U_ICU_ENTRY_POINT_RENAME(unumsys_openAvailableNames) +#define unumsys_openByName U_ICU_ENTRY_POINT_RENAME(unumsys_openByName) +#define uplrules_close U_ICU_ENTRY_POINT_RENAME(uplrules_close) +#define uplrules_open U_ICU_ENTRY_POINT_RENAME(uplrules_open) +#define uplrules_openForType U_ICU_ENTRY_POINT_RENAME(uplrules_openForType) +#define uplrules_select U_ICU_ENTRY_POINT_RENAME(uplrules_select) +#define uplug_closeLibrary U_ICU_ENTRY_POINT_RENAME(uplug_closeLibrary) +#define uplug_findLibrary U_ICU_ENTRY_POINT_RENAME(uplug_findLibrary) +#define uplug_getConfiguration U_ICU_ENTRY_POINT_RENAME(uplug_getConfiguration) +#define uplug_getContext U_ICU_ENTRY_POINT_RENAME(uplug_getContext) +#define uplug_getCurrentLevel U_ICU_ENTRY_POINT_RENAME(uplug_getCurrentLevel) +#define uplug_getLibrary U_ICU_ENTRY_POINT_RENAME(uplug_getLibrary) +#define uplug_getLibraryName U_ICU_ENTRY_POINT_RENAME(uplug_getLibraryName) +#define uplug_getPlugInternal U_ICU_ENTRY_POINT_RENAME(uplug_getPlugInternal) +#define uplug_getPlugLevel U_ICU_ENTRY_POINT_RENAME(uplug_getPlugLevel) +#define uplug_getPlugLoadStatus U_ICU_ENTRY_POINT_RENAME(uplug_getPlugLoadStatus) +#define uplug_getPlugName U_ICU_ENTRY_POINT_RENAME(uplug_getPlugName) +#define uplug_getPluginFile U_ICU_ENTRY_POINT_RENAME(uplug_getPluginFile) +#define uplug_getSymbolName U_ICU_ENTRY_POINT_RENAME(uplug_getSymbolName) +#define uplug_init U_ICU_ENTRY_POINT_RENAME(uplug_init) +#define uplug_loadPlugFromEntrypoint U_ICU_ENTRY_POINT_RENAME(uplug_loadPlugFromEntrypoint) +#define uplug_loadPlugFromLibrary U_ICU_ENTRY_POINT_RENAME(uplug_loadPlugFromLibrary) +#define uplug_nextPlug U_ICU_ENTRY_POINT_RENAME(uplug_nextPlug) +#define uplug_openLibrary U_ICU_ENTRY_POINT_RENAME(uplug_openLibrary) +#define uplug_removePlug U_ICU_ENTRY_POINT_RENAME(uplug_removePlug) +#define uplug_setContext U_ICU_ENTRY_POINT_RENAME(uplug_setContext) +#define uplug_setPlugLevel U_ICU_ENTRY_POINT_RENAME(uplug_setPlugLevel) +#define uplug_setPlugName U_ICU_ENTRY_POINT_RENAME(uplug_setPlugName) +#define uplug_setPlugNoUnload U_ICU_ENTRY_POINT_RENAME(uplug_setPlugNoUnload) +#define uprops_getSource U_ICU_ENTRY_POINT_RENAME(uprops_getSource) +#define upropsvec_addPropertyStarts U_ICU_ENTRY_POINT_RENAME(upropsvec_addPropertyStarts) +#define uprv_aestrncpy U_ICU_ENTRY_POINT_RENAME(uprv_aestrncpy) +#define uprv_asciiFromEbcdic U_ICU_ENTRY_POINT_RENAME(uprv_asciiFromEbcdic) +#define uprv_asciitolower U_ICU_ENTRY_POINT_RENAME(uprv_asciitolower) +#define uprv_calloc U_ICU_ENTRY_POINT_RENAME(uprv_calloc) +#define uprv_ceil U_ICU_ENTRY_POINT_RENAME(uprv_ceil) +#define uprv_cnttab_addContraction U_ICU_ENTRY_POINT_RENAME(uprv_cnttab_addContraction) +#define uprv_cnttab_changeContraction U_ICU_ENTRY_POINT_RENAME(uprv_cnttab_changeContraction) +#define uprv_cnttab_changeLastCE U_ICU_ENTRY_POINT_RENAME(uprv_cnttab_changeLastCE) +#define uprv_cnttab_clone U_ICU_ENTRY_POINT_RENAME(uprv_cnttab_clone) +#define uprv_cnttab_close U_ICU_ENTRY_POINT_RENAME(uprv_cnttab_close) +#define uprv_cnttab_constructTable U_ICU_ENTRY_POINT_RENAME(uprv_cnttab_constructTable) +#define uprv_cnttab_findCE U_ICU_ENTRY_POINT_RENAME(uprv_cnttab_findCE) +#define uprv_cnttab_findCP U_ICU_ENTRY_POINT_RENAME(uprv_cnttab_findCP) +#define uprv_cnttab_getCE U_ICU_ENTRY_POINT_RENAME(uprv_cnttab_getCE) +#define uprv_cnttab_insertContraction U_ICU_ENTRY_POINT_RENAME(uprv_cnttab_insertContraction) +#define uprv_cnttab_isTailored U_ICU_ENTRY_POINT_RENAME(uprv_cnttab_isTailored) +#define uprv_cnttab_open U_ICU_ENTRY_POINT_RENAME(uprv_cnttab_open) +#define uprv_cnttab_setContraction U_ICU_ENTRY_POINT_RENAME(uprv_cnttab_setContraction) +#define uprv_collIterateAtEnd U_ICU_ENTRY_POINT_RENAME(uprv_collIterateAtEnd) +#define uprv_compareASCIIPropertyNames U_ICU_ENTRY_POINT_RENAME(uprv_compareASCIIPropertyNames) +#define uprv_compareEBCDICPropertyNames U_ICU_ENTRY_POINT_RENAME(uprv_compareEBCDICPropertyNames) +#define uprv_compareInvAscii U_ICU_ENTRY_POINT_RENAME(uprv_compareInvAscii) +#define uprv_compareInvEbcdic U_ICU_ENTRY_POINT_RENAME(uprv_compareInvEbcdic) +#define uprv_compareInvEbcdicAsAscii U_ICU_ENTRY_POINT_RENAME(uprv_compareInvEbcdicAsAscii) +#define uprv_convertToLCID U_ICU_ENTRY_POINT_RENAME(uprv_convertToLCID) +#define uprv_convertToPosix U_ICU_ENTRY_POINT_RENAME(uprv_convertToPosix) +#define uprv_copyAscii U_ICU_ENTRY_POINT_RENAME(uprv_copyAscii) +#define uprv_copyEbcdic U_ICU_ENTRY_POINT_RENAME(uprv_copyEbcdic) +#define uprv_decContextClearStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextClearStatus) +#define uprv_decContextDefault U_ICU_ENTRY_POINT_RENAME(uprv_decContextDefault) +#define uprv_decContextGetRounding U_ICU_ENTRY_POINT_RENAME(uprv_decContextGetRounding) +#define uprv_decContextGetStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextGetStatus) +#define uprv_decContextRestoreStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextRestoreStatus) +#define uprv_decContextSaveStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextSaveStatus) +#define uprv_decContextSetRounding U_ICU_ENTRY_POINT_RENAME(uprv_decContextSetRounding) +#define uprv_decContextSetStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextSetStatus) +#define uprv_decContextSetStatusFromString U_ICU_ENTRY_POINT_RENAME(uprv_decContextSetStatusFromString) +#define uprv_decContextSetStatusFromStringQuiet U_ICU_ENTRY_POINT_RENAME(uprv_decContextSetStatusFromStringQuiet) +#define uprv_decContextSetStatusQuiet U_ICU_ENTRY_POINT_RENAME(uprv_decContextSetStatusQuiet) +#define uprv_decContextStatusToString U_ICU_ENTRY_POINT_RENAME(uprv_decContextStatusToString) +#define uprv_decContextTestSavedStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextTestSavedStatus) +#define uprv_decContextTestStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextTestStatus) +#define uprv_decContextZeroStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextZeroStatus) +#define uprv_decNumberAbs U_ICU_ENTRY_POINT_RENAME(uprv_decNumberAbs) +#define uprv_decNumberAdd U_ICU_ENTRY_POINT_RENAME(uprv_decNumberAdd) +#define uprv_decNumberAnd U_ICU_ENTRY_POINT_RENAME(uprv_decNumberAnd) +#define uprv_decNumberClass U_ICU_ENTRY_POINT_RENAME(uprv_decNumberClass) +#define uprv_decNumberClassToString U_ICU_ENTRY_POINT_RENAME(uprv_decNumberClassToString) +#define uprv_decNumberCompare U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCompare) +#define uprv_decNumberCompareSignal U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCompareSignal) +#define uprv_decNumberCompareTotal U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCompareTotal) +#define uprv_decNumberCompareTotalMag U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCompareTotalMag) +#define uprv_decNumberCopy U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCopy) +#define uprv_decNumberCopyAbs U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCopyAbs) +#define uprv_decNumberCopyNegate U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCopyNegate) +#define uprv_decNumberCopySign U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCopySign) +#define uprv_decNumberDivide U_ICU_ENTRY_POINT_RENAME(uprv_decNumberDivide) +#define uprv_decNumberDivideInteger U_ICU_ENTRY_POINT_RENAME(uprv_decNumberDivideInteger) +#define uprv_decNumberExp U_ICU_ENTRY_POINT_RENAME(uprv_decNumberExp) +#define uprv_decNumberFMA U_ICU_ENTRY_POINT_RENAME(uprv_decNumberFMA) +#define uprv_decNumberFromInt32 U_ICU_ENTRY_POINT_RENAME(uprv_decNumberFromInt32) +#define uprv_decNumberFromString U_ICU_ENTRY_POINT_RENAME(uprv_decNumberFromString) +#define uprv_decNumberFromUInt32 U_ICU_ENTRY_POINT_RENAME(uprv_decNumberFromUInt32) +#define uprv_decNumberGetBCD U_ICU_ENTRY_POINT_RENAME(uprv_decNumberGetBCD) +#define uprv_decNumberInvert U_ICU_ENTRY_POINT_RENAME(uprv_decNumberInvert) +#define uprv_decNumberIsNormal U_ICU_ENTRY_POINT_RENAME(uprv_decNumberIsNormal) +#define uprv_decNumberIsSubnormal U_ICU_ENTRY_POINT_RENAME(uprv_decNumberIsSubnormal) +#define uprv_decNumberLn U_ICU_ENTRY_POINT_RENAME(uprv_decNumberLn) +#define uprv_decNumberLog10 U_ICU_ENTRY_POINT_RENAME(uprv_decNumberLog10) +#define uprv_decNumberLogB U_ICU_ENTRY_POINT_RENAME(uprv_decNumberLogB) +#define uprv_decNumberMax U_ICU_ENTRY_POINT_RENAME(uprv_decNumberMax) +#define uprv_decNumberMaxMag U_ICU_ENTRY_POINT_RENAME(uprv_decNumberMaxMag) +#define uprv_decNumberMin U_ICU_ENTRY_POINT_RENAME(uprv_decNumberMin) +#define uprv_decNumberMinMag U_ICU_ENTRY_POINT_RENAME(uprv_decNumberMinMag) +#define uprv_decNumberMinus U_ICU_ENTRY_POINT_RENAME(uprv_decNumberMinus) +#define uprv_decNumberMultiply U_ICU_ENTRY_POINT_RENAME(uprv_decNumberMultiply) +#define uprv_decNumberNextMinus U_ICU_ENTRY_POINT_RENAME(uprv_decNumberNextMinus) +#define uprv_decNumberNextPlus U_ICU_ENTRY_POINT_RENAME(uprv_decNumberNextPlus) +#define uprv_decNumberNextToward U_ICU_ENTRY_POINT_RENAME(uprv_decNumberNextToward) +#define uprv_decNumberNormalize U_ICU_ENTRY_POINT_RENAME(uprv_decNumberNormalize) +#define uprv_decNumberOr U_ICU_ENTRY_POINT_RENAME(uprv_decNumberOr) +#define uprv_decNumberPlus U_ICU_ENTRY_POINT_RENAME(uprv_decNumberPlus) +#define uprv_decNumberPower U_ICU_ENTRY_POINT_RENAME(uprv_decNumberPower) +#define uprv_decNumberQuantize U_ICU_ENTRY_POINT_RENAME(uprv_decNumberQuantize) +#define uprv_decNumberReduce U_ICU_ENTRY_POINT_RENAME(uprv_decNumberReduce) +#define uprv_decNumberRemainder U_ICU_ENTRY_POINT_RENAME(uprv_decNumberRemainder) +#define uprv_decNumberRemainderNear U_ICU_ENTRY_POINT_RENAME(uprv_decNumberRemainderNear) +#define uprv_decNumberRescale U_ICU_ENTRY_POINT_RENAME(uprv_decNumberRescale) +#define uprv_decNumberRotate U_ICU_ENTRY_POINT_RENAME(uprv_decNumberRotate) +#define uprv_decNumberSameQuantum U_ICU_ENTRY_POINT_RENAME(uprv_decNumberSameQuantum) +#define uprv_decNumberScaleB U_ICU_ENTRY_POINT_RENAME(uprv_decNumberScaleB) +#define uprv_decNumberSetBCD U_ICU_ENTRY_POINT_RENAME(uprv_decNumberSetBCD) +#define uprv_decNumberShift U_ICU_ENTRY_POINT_RENAME(uprv_decNumberShift) +#define uprv_decNumberSquareRoot U_ICU_ENTRY_POINT_RENAME(uprv_decNumberSquareRoot) +#define uprv_decNumberSubtract U_ICU_ENTRY_POINT_RENAME(uprv_decNumberSubtract) +#define uprv_decNumberToEngString U_ICU_ENTRY_POINT_RENAME(uprv_decNumberToEngString) +#define uprv_decNumberToInt32 U_ICU_ENTRY_POINT_RENAME(uprv_decNumberToInt32) +#define uprv_decNumberToIntegralExact U_ICU_ENTRY_POINT_RENAME(uprv_decNumberToIntegralExact) +#define uprv_decNumberToIntegralValue U_ICU_ENTRY_POINT_RENAME(uprv_decNumberToIntegralValue) +#define uprv_decNumberToString U_ICU_ENTRY_POINT_RENAME(uprv_decNumberToString) +#define uprv_decNumberToUInt32 U_ICU_ENTRY_POINT_RENAME(uprv_decNumberToUInt32) +#define uprv_decNumberTrim U_ICU_ENTRY_POINT_RENAME(uprv_decNumberTrim) +#define uprv_decNumberVersion U_ICU_ENTRY_POINT_RENAME(uprv_decNumberVersion) +#define uprv_decNumberXor U_ICU_ENTRY_POINT_RENAME(uprv_decNumberXor) +#define uprv_decNumberZero U_ICU_ENTRY_POINT_RENAME(uprv_decNumberZero) +#define uprv_deleteUObject U_ICU_ENTRY_POINT_RENAME(uprv_deleteUObject) +#define uprv_delete_collIterate U_ICU_ENTRY_POINT_RENAME(uprv_delete_collIterate) +#define uprv_dl_close U_ICU_ENTRY_POINT_RENAME(uprv_dl_close) +#define uprv_dl_open U_ICU_ENTRY_POINT_RENAME(uprv_dl_open) +#define uprv_dlsym_func U_ICU_ENTRY_POINT_RENAME(uprv_dlsym_func) +#define uprv_eastrncpy U_ICU_ENTRY_POINT_RENAME(uprv_eastrncpy) +#define uprv_ebcdicFromAscii U_ICU_ENTRY_POINT_RENAME(uprv_ebcdicFromAscii) +#define uprv_ebcdicToLowercaseAscii U_ICU_ENTRY_POINT_RENAME(uprv_ebcdicToLowercaseAscii) +#define uprv_ebcdictolower U_ICU_ENTRY_POINT_RENAME(uprv_ebcdictolower) +#define uprv_fabs U_ICU_ENTRY_POINT_RENAME(uprv_fabs) +#define uprv_floor U_ICU_ENTRY_POINT_RENAME(uprv_floor) +#define uprv_fmax U_ICU_ENTRY_POINT_RENAME(uprv_fmax) +#define uprv_fmin U_ICU_ENTRY_POINT_RENAME(uprv_fmin) +#define uprv_fmod U_ICU_ENTRY_POINT_RENAME(uprv_fmod) +#define uprv_free U_ICU_ENTRY_POINT_RENAME(uprv_free) +#define uprv_getCharNameCharacters U_ICU_ENTRY_POINT_RENAME(uprv_getCharNameCharacters) +#define uprv_getDefaultCodepage U_ICU_ENTRY_POINT_RENAME(uprv_getDefaultCodepage) +#define uprv_getDefaultLocaleID U_ICU_ENTRY_POINT_RENAME(uprv_getDefaultLocaleID) +#define uprv_getInfinity U_ICU_ENTRY_POINT_RENAME(uprv_getInfinity) +#define uprv_getMaxCharNameLength U_ICU_ENTRY_POINT_RENAME(uprv_getMaxCharNameLength) +#define uprv_getMaxValues U_ICU_ENTRY_POINT_RENAME(uprv_getMaxValues) +#define uprv_getNaN U_ICU_ENTRY_POINT_RENAME(uprv_getNaN) +#define uprv_getRawUTCtime U_ICU_ENTRY_POINT_RENAME(uprv_getRawUTCtime) +#define uprv_getStaticCurrencyName U_ICU_ENTRY_POINT_RENAME(uprv_getStaticCurrencyName) +#define uprv_getUTCtime U_ICU_ENTRY_POINT_RENAME(uprv_getUTCtime) +#define uprv_haveProperties U_ICU_ENTRY_POINT_RENAME(uprv_haveProperties) +#define uprv_init_collIterate U_ICU_ENTRY_POINT_RENAME(uprv_init_collIterate) +#define uprv_init_pce U_ICU_ENTRY_POINT_RENAME(uprv_init_pce) +#define uprv_int32Comparator U_ICU_ENTRY_POINT_RENAME(uprv_int32Comparator) +#define uprv_isASCIILetter U_ICU_ENTRY_POINT_RENAME(uprv_isASCIILetter) +#define uprv_isInfinite U_ICU_ENTRY_POINT_RENAME(uprv_isInfinite) +#define uprv_isInvariantString U_ICU_ENTRY_POINT_RENAME(uprv_isInvariantString) +#define uprv_isInvariantUString U_ICU_ENTRY_POINT_RENAME(uprv_isInvariantUString) +#define uprv_isNaN U_ICU_ENTRY_POINT_RENAME(uprv_isNaN) +#define uprv_isNegativeInfinity U_ICU_ENTRY_POINT_RENAME(uprv_isNegativeInfinity) +#define uprv_isPositiveInfinity U_ICU_ENTRY_POINT_RENAME(uprv_isPositiveInfinity) +#define uprv_itou U_ICU_ENTRY_POINT_RENAME(uprv_itou) +#define uprv_log U_ICU_ENTRY_POINT_RENAME(uprv_log) +#define uprv_malloc U_ICU_ENTRY_POINT_RENAME(uprv_malloc) +#define uprv_mapFile U_ICU_ENTRY_POINT_RENAME(uprv_mapFile) +#define uprv_max U_ICU_ENTRY_POINT_RENAME(uprv_max) +#define uprv_maxMantissa U_ICU_ENTRY_POINT_RENAME(uprv_maxMantissa) +#define uprv_maximumPtr U_ICU_ENTRY_POINT_RENAME(uprv_maximumPtr) +#define uprv_min U_ICU_ENTRY_POINT_RENAME(uprv_min) +#define uprv_modf U_ICU_ENTRY_POINT_RENAME(uprv_modf) +#define uprv_new_collIterate U_ICU_ENTRY_POINT_RENAME(uprv_new_collIterate) +#define uprv_parseCurrency U_ICU_ENTRY_POINT_RENAME(uprv_parseCurrency) +#define uprv_pathIsAbsolute U_ICU_ENTRY_POINT_RENAME(uprv_pathIsAbsolute) +#define uprv_pow U_ICU_ENTRY_POINT_RENAME(uprv_pow) +#define uprv_pow10 U_ICU_ENTRY_POINT_RENAME(uprv_pow10) +#define uprv_realloc U_ICU_ENTRY_POINT_RENAME(uprv_realloc) +#define uprv_round U_ICU_ENTRY_POINT_RENAME(uprv_round) +#define uprv_sortArray U_ICU_ENTRY_POINT_RENAME(uprv_sortArray) +#define uprv_stableBinarySearch U_ICU_ENTRY_POINT_RENAME(uprv_stableBinarySearch) +#define uprv_strCompare U_ICU_ENTRY_POINT_RENAME(uprv_strCompare) +#define uprv_strdup U_ICU_ENTRY_POINT_RENAME(uprv_strdup) +#define uprv_stricmp U_ICU_ENTRY_POINT_RENAME(uprv_stricmp) +#define uprv_strndup U_ICU_ENTRY_POINT_RENAME(uprv_strndup) +#define uprv_strnicmp U_ICU_ENTRY_POINT_RENAME(uprv_strnicmp) +#define uprv_syntaxError U_ICU_ENTRY_POINT_RENAME(uprv_syntaxError) +#define uprv_timezone U_ICU_ENTRY_POINT_RENAME(uprv_timezone) +#define uprv_toupper U_ICU_ENTRY_POINT_RENAME(uprv_toupper) +#define uprv_trunc U_ICU_ENTRY_POINT_RENAME(uprv_trunc) +#define uprv_tzname U_ICU_ENTRY_POINT_RENAME(uprv_tzname) +#define uprv_tzset U_ICU_ENTRY_POINT_RENAME(uprv_tzset) +#define uprv_uca_addAnElement U_ICU_ENTRY_POINT_RENAME(uprv_uca_addAnElement) +#define uprv_uca_assembleTable U_ICU_ENTRY_POINT_RENAME(uprv_uca_assembleTable) +#define uprv_uca_canonicalClosure U_ICU_ENTRY_POINT_RENAME(uprv_uca_canonicalClosure) +#define uprv_uca_closeTempTable U_ICU_ENTRY_POINT_RENAME(uprv_uca_closeTempTable) +#define uprv_uca_getCodePointFromRaw U_ICU_ENTRY_POINT_RENAME(uprv_uca_getCodePointFromRaw) +#define uprv_uca_getImplicitFromRaw U_ICU_ENTRY_POINT_RENAME(uprv_uca_getImplicitFromRaw) +#define uprv_uca_getRawFromCodePoint U_ICU_ENTRY_POINT_RENAME(uprv_uca_getRawFromCodePoint) +#define uprv_uca_getRawFromImplicit U_ICU_ENTRY_POINT_RENAME(uprv_uca_getRawFromImplicit) +#define uprv_uca_initImplicitConstants U_ICU_ENTRY_POINT_RENAME(uprv_uca_initImplicitConstants) +#define uprv_uca_initTempTable U_ICU_ENTRY_POINT_RENAME(uprv_uca_initTempTable) +#define uprv_uint16Comparator U_ICU_ENTRY_POINT_RENAME(uprv_uint16Comparator) +#define uprv_uint32Comparator U_ICU_ENTRY_POINT_RENAME(uprv_uint32Comparator) +#define uprv_unmapFile U_ICU_ENTRY_POINT_RENAME(uprv_unmapFile) +#define upvec_cloneArray U_ICU_ENTRY_POINT_RENAME(upvec_cloneArray) +#define upvec_close U_ICU_ENTRY_POINT_RENAME(upvec_close) +#define upvec_compact U_ICU_ENTRY_POINT_RENAME(upvec_compact) +#define upvec_compactToUTrie2Handler U_ICU_ENTRY_POINT_RENAME(upvec_compactToUTrie2Handler) +#define upvec_compactToUTrie2WithRowIndexes U_ICU_ENTRY_POINT_RENAME(upvec_compactToUTrie2WithRowIndexes) +#define upvec_getArray U_ICU_ENTRY_POINT_RENAME(upvec_getArray) +#define upvec_getRow U_ICU_ENTRY_POINT_RENAME(upvec_getRow) +#define upvec_getValue U_ICU_ENTRY_POINT_RENAME(upvec_getValue) +#define upvec_open U_ICU_ENTRY_POINT_RENAME(upvec_open) +#define upvec_setValue U_ICU_ENTRY_POINT_RENAME(upvec_setValue) +#define uregex_appendReplacement U_ICU_ENTRY_POINT_RENAME(uregex_appendReplacement) +#define uregex_appendReplacementUText U_ICU_ENTRY_POINT_RENAME(uregex_appendReplacementUText) +#define uregex_appendTail U_ICU_ENTRY_POINT_RENAME(uregex_appendTail) +#define uregex_appendTailUText U_ICU_ENTRY_POINT_RENAME(uregex_appendTailUText) +#define uregex_clone U_ICU_ENTRY_POINT_RENAME(uregex_clone) +#define uregex_close U_ICU_ENTRY_POINT_RENAME(uregex_close) +#define uregex_end U_ICU_ENTRY_POINT_RENAME(uregex_end) +#define uregex_end64 U_ICU_ENTRY_POINT_RENAME(uregex_end64) +#define uregex_find U_ICU_ENTRY_POINT_RENAME(uregex_find) +#define uregex_find64 U_ICU_ENTRY_POINT_RENAME(uregex_find64) +#define uregex_findNext U_ICU_ENTRY_POINT_RENAME(uregex_findNext) +#define uregex_flags U_ICU_ENTRY_POINT_RENAME(uregex_flags) +#define uregex_getFindProgressCallback U_ICU_ENTRY_POINT_RENAME(uregex_getFindProgressCallback) +#define uregex_getMatchCallback U_ICU_ENTRY_POINT_RENAME(uregex_getMatchCallback) +#define uregex_getStackLimit U_ICU_ENTRY_POINT_RENAME(uregex_getStackLimit) +#define uregex_getText U_ICU_ENTRY_POINT_RENAME(uregex_getText) +#define uregex_getTimeLimit U_ICU_ENTRY_POINT_RENAME(uregex_getTimeLimit) +#define uregex_getUText U_ICU_ENTRY_POINT_RENAME(uregex_getUText) +#define uregex_group U_ICU_ENTRY_POINT_RENAME(uregex_group) +#define uregex_groupCount U_ICU_ENTRY_POINT_RENAME(uregex_groupCount) +#define uregex_groupUText U_ICU_ENTRY_POINT_RENAME(uregex_groupUText) +#define uregex_groupUTextDeep U_ICU_ENTRY_POINT_RENAME(uregex_groupUTextDeep) +#define uregex_hasAnchoringBounds U_ICU_ENTRY_POINT_RENAME(uregex_hasAnchoringBounds) +#define uregex_hasTransparentBounds U_ICU_ENTRY_POINT_RENAME(uregex_hasTransparentBounds) +#define uregex_hitEnd U_ICU_ENTRY_POINT_RENAME(uregex_hitEnd) +#define uregex_lookingAt U_ICU_ENTRY_POINT_RENAME(uregex_lookingAt) +#define uregex_lookingAt64 U_ICU_ENTRY_POINT_RENAME(uregex_lookingAt64) +#define uregex_matches U_ICU_ENTRY_POINT_RENAME(uregex_matches) +#define uregex_matches64 U_ICU_ENTRY_POINT_RENAME(uregex_matches64) +#define uregex_open U_ICU_ENTRY_POINT_RENAME(uregex_open) +#define uregex_openC U_ICU_ENTRY_POINT_RENAME(uregex_openC) +#define uregex_openUText U_ICU_ENTRY_POINT_RENAME(uregex_openUText) +#define uregex_pattern U_ICU_ENTRY_POINT_RENAME(uregex_pattern) +#define uregex_patternUText U_ICU_ENTRY_POINT_RENAME(uregex_patternUText) +#define uregex_refreshUText U_ICU_ENTRY_POINT_RENAME(uregex_refreshUText) +#define uregex_regionEnd U_ICU_ENTRY_POINT_RENAME(uregex_regionEnd) +#define uregex_regionEnd64 U_ICU_ENTRY_POINT_RENAME(uregex_regionEnd64) +#define uregex_regionStart U_ICU_ENTRY_POINT_RENAME(uregex_regionStart) +#define uregex_regionStart64 U_ICU_ENTRY_POINT_RENAME(uregex_regionStart64) +#define uregex_replaceAll U_ICU_ENTRY_POINT_RENAME(uregex_replaceAll) +#define uregex_replaceAllUText U_ICU_ENTRY_POINT_RENAME(uregex_replaceAllUText) +#define uregex_replaceFirst U_ICU_ENTRY_POINT_RENAME(uregex_replaceFirst) +#define uregex_replaceFirstUText U_ICU_ENTRY_POINT_RENAME(uregex_replaceFirstUText) +#define uregex_requireEnd U_ICU_ENTRY_POINT_RENAME(uregex_requireEnd) +#define uregex_reset U_ICU_ENTRY_POINT_RENAME(uregex_reset) +#define uregex_reset64 U_ICU_ENTRY_POINT_RENAME(uregex_reset64) +#define uregex_setFindProgressCallback U_ICU_ENTRY_POINT_RENAME(uregex_setFindProgressCallback) +#define uregex_setMatchCallback U_ICU_ENTRY_POINT_RENAME(uregex_setMatchCallback) +#define uregex_setRegion U_ICU_ENTRY_POINT_RENAME(uregex_setRegion) +#define uregex_setRegion64 U_ICU_ENTRY_POINT_RENAME(uregex_setRegion64) +#define uregex_setRegionAndStart U_ICU_ENTRY_POINT_RENAME(uregex_setRegionAndStart) +#define uregex_setStackLimit U_ICU_ENTRY_POINT_RENAME(uregex_setStackLimit) +#define uregex_setText U_ICU_ENTRY_POINT_RENAME(uregex_setText) +#define uregex_setTimeLimit U_ICU_ENTRY_POINT_RENAME(uregex_setTimeLimit) +#define uregex_setUText U_ICU_ENTRY_POINT_RENAME(uregex_setUText) +#define uregex_split U_ICU_ENTRY_POINT_RENAME(uregex_split) +#define uregex_splitUText U_ICU_ENTRY_POINT_RENAME(uregex_splitUText) +#define uregex_start U_ICU_ENTRY_POINT_RENAME(uregex_start) +#define uregex_start64 U_ICU_ENTRY_POINT_RENAME(uregex_start64) +#define uregex_ucstr_unescape_charAt U_ICU_ENTRY_POINT_RENAME(uregex_ucstr_unescape_charAt) +#define uregex_useAnchoringBounds U_ICU_ENTRY_POINT_RENAME(uregex_useAnchoringBounds) +#define uregex_useTransparentBounds U_ICU_ENTRY_POINT_RENAME(uregex_useTransparentBounds) +#define uregex_utext_unescape_charAt U_ICU_ENTRY_POINT_RENAME(uregex_utext_unescape_charAt) +#define uregion_areEqual U_ICU_ENTRY_POINT_RENAME(uregion_areEqual) +#define uregion_contains U_ICU_ENTRY_POINT_RENAME(uregion_contains) +#define uregion_getAvailable U_ICU_ENTRY_POINT_RENAME(uregion_getAvailable) +#define uregion_getContainedRegions U_ICU_ENTRY_POINT_RENAME(uregion_getContainedRegions) +#define uregion_getContainedRegionsOfType U_ICU_ENTRY_POINT_RENAME(uregion_getContainedRegionsOfType) +#define uregion_getContainingRegion U_ICU_ENTRY_POINT_RENAME(uregion_getContainingRegion) +#define uregion_getContainingRegionOfType U_ICU_ENTRY_POINT_RENAME(uregion_getContainingRegionOfType) +#define uregion_getNumericCode U_ICU_ENTRY_POINT_RENAME(uregion_getNumericCode) +#define uregion_getPreferredValues U_ICU_ENTRY_POINT_RENAME(uregion_getPreferredValues) +#define uregion_getRegionCode U_ICU_ENTRY_POINT_RENAME(uregion_getRegionCode) +#define uregion_getRegionFromCode U_ICU_ENTRY_POINT_RENAME(uregion_getRegionFromCode) +#define uregion_getRegionFromNumericCode U_ICU_ENTRY_POINT_RENAME(uregion_getRegionFromNumericCode) +#define uregion_getType U_ICU_ENTRY_POINT_RENAME(uregion_getType) +#define ures_close U_ICU_ENTRY_POINT_RENAME(ures_close) +#define ures_copyResb U_ICU_ENTRY_POINT_RENAME(ures_copyResb) +#define ures_countArrayItems U_ICU_ENTRY_POINT_RENAME(ures_countArrayItems) +#define ures_findResource U_ICU_ENTRY_POINT_RENAME(ures_findResource) +#define ures_findSubResource U_ICU_ENTRY_POINT_RENAME(ures_findSubResource) +#define ures_getBinary U_ICU_ENTRY_POINT_RENAME(ures_getBinary) +#define ures_getByIndex U_ICU_ENTRY_POINT_RENAME(ures_getByIndex) +#define ures_getByKey U_ICU_ENTRY_POINT_RENAME(ures_getByKey) +#define ures_getByKeyWithFallback U_ICU_ENTRY_POINT_RENAME(ures_getByKeyWithFallback) +#define ures_getFunctionalEquivalent U_ICU_ENTRY_POINT_RENAME(ures_getFunctionalEquivalent) +#define ures_getInt U_ICU_ENTRY_POINT_RENAME(ures_getInt) +#define ures_getIntVector U_ICU_ENTRY_POINT_RENAME(ures_getIntVector) +#define ures_getKey U_ICU_ENTRY_POINT_RENAME(ures_getKey) +#define ures_getKeywordValues U_ICU_ENTRY_POINT_RENAME(ures_getKeywordValues) +#define ures_getLocale U_ICU_ENTRY_POINT_RENAME(ures_getLocale) +#define ures_getLocaleByType U_ICU_ENTRY_POINT_RENAME(ures_getLocaleByType) +#define ures_getLocaleInternal U_ICU_ENTRY_POINT_RENAME(ures_getLocaleInternal) +#define ures_getName U_ICU_ENTRY_POINT_RENAME(ures_getName) +#define ures_getNextResource U_ICU_ENTRY_POINT_RENAME(ures_getNextResource) +#define ures_getNextString U_ICU_ENTRY_POINT_RENAME(ures_getNextString) +#define ures_getSize U_ICU_ENTRY_POINT_RENAME(ures_getSize) +#define ures_getString U_ICU_ENTRY_POINT_RENAME(ures_getString) +#define ures_getStringByIndex U_ICU_ENTRY_POINT_RENAME(ures_getStringByIndex) +#define ures_getStringByKey U_ICU_ENTRY_POINT_RENAME(ures_getStringByKey) +#define ures_getStringByKeyWithFallback U_ICU_ENTRY_POINT_RENAME(ures_getStringByKeyWithFallback) +#define ures_getType U_ICU_ENTRY_POINT_RENAME(ures_getType) +#define ures_getUInt U_ICU_ENTRY_POINT_RENAME(ures_getUInt) +#define ures_getUTF8String U_ICU_ENTRY_POINT_RENAME(ures_getUTF8String) +#define ures_getUTF8StringByIndex U_ICU_ENTRY_POINT_RENAME(ures_getUTF8StringByIndex) +#define ures_getUTF8StringByKey U_ICU_ENTRY_POINT_RENAME(ures_getUTF8StringByKey) +#define ures_getVersion U_ICU_ENTRY_POINT_RENAME(ures_getVersion) +#define ures_getVersionByKey U_ICU_ENTRY_POINT_RENAME(ures_getVersionByKey) +#define ures_getVersionNumber U_ICU_ENTRY_POINT_RENAME(ures_getVersionNumber) +#define ures_getVersionNumberInternal U_ICU_ENTRY_POINT_RENAME(ures_getVersionNumberInternal) +#define ures_hasNext U_ICU_ENTRY_POINT_RENAME(ures_hasNext) +#define ures_initStackObject U_ICU_ENTRY_POINT_RENAME(ures_initStackObject) +#define ures_open U_ICU_ENTRY_POINT_RENAME(ures_open) +#define ures_openAvailableLocales U_ICU_ENTRY_POINT_RENAME(ures_openAvailableLocales) +#define ures_openDirect U_ICU_ENTRY_POINT_RENAME(ures_openDirect) +#define ures_openFillIn U_ICU_ENTRY_POINT_RENAME(ures_openFillIn) +#define ures_openU U_ICU_ENTRY_POINT_RENAME(ures_openU) +#define ures_resetIterator U_ICU_ENTRY_POINT_RENAME(ures_resetIterator) +#define ures_swap U_ICU_ENTRY_POINT_RENAME(ures_swap) +#define uscript_breaksBetweenLetters U_ICU_ENTRY_POINT_RENAME(uscript_breaksBetweenLetters) +#define uscript_closeRun U_ICU_ENTRY_POINT_RENAME(uscript_closeRun) +#define uscript_getCode U_ICU_ENTRY_POINT_RENAME(uscript_getCode) +#define uscript_getName U_ICU_ENTRY_POINT_RENAME(uscript_getName) +#define uscript_getSampleString U_ICU_ENTRY_POINT_RENAME(uscript_getSampleString) +#define uscript_getSampleUnicodeString U_ICU_ENTRY_POINT_RENAME(uscript_getSampleUnicodeString) +#define uscript_getScript U_ICU_ENTRY_POINT_RENAME(uscript_getScript) +#define uscript_getScriptExtensions U_ICU_ENTRY_POINT_RENAME(uscript_getScriptExtensions) +#define uscript_getShortName U_ICU_ENTRY_POINT_RENAME(uscript_getShortName) +#define uscript_getUsage U_ICU_ENTRY_POINT_RENAME(uscript_getUsage) +#define uscript_hasScript U_ICU_ENTRY_POINT_RENAME(uscript_hasScript) +#define uscript_isCased U_ICU_ENTRY_POINT_RENAME(uscript_isCased) +#define uscript_isRightToLeft U_ICU_ENTRY_POINT_RENAME(uscript_isRightToLeft) +#define uscript_nextRun U_ICU_ENTRY_POINT_RENAME(uscript_nextRun) +#define uscript_openRun U_ICU_ENTRY_POINT_RENAME(uscript_openRun) +#define uscript_resetRun U_ICU_ENTRY_POINT_RENAME(uscript_resetRun) +#define uscript_setRunText U_ICU_ENTRY_POINT_RENAME(uscript_setRunText) +#define usearch_close U_ICU_ENTRY_POINT_RENAME(usearch_close) +#define usearch_first U_ICU_ENTRY_POINT_RENAME(usearch_first) +#define usearch_following U_ICU_ENTRY_POINT_RENAME(usearch_following) +#define usearch_getAttribute U_ICU_ENTRY_POINT_RENAME(usearch_getAttribute) +#define usearch_getBreakIterator U_ICU_ENTRY_POINT_RENAME(usearch_getBreakIterator) +#define usearch_getCollator U_ICU_ENTRY_POINT_RENAME(usearch_getCollator) +#define usearch_getMatchedLength U_ICU_ENTRY_POINT_RENAME(usearch_getMatchedLength) +#define usearch_getMatchedStart U_ICU_ENTRY_POINT_RENAME(usearch_getMatchedStart) +#define usearch_getMatchedText U_ICU_ENTRY_POINT_RENAME(usearch_getMatchedText) +#define usearch_getOffset U_ICU_ENTRY_POINT_RENAME(usearch_getOffset) +#define usearch_getPattern U_ICU_ENTRY_POINT_RENAME(usearch_getPattern) +#define usearch_getText U_ICU_ENTRY_POINT_RENAME(usearch_getText) +#define usearch_handleNextCanonical U_ICU_ENTRY_POINT_RENAME(usearch_handleNextCanonical) +#define usearch_handleNextExact U_ICU_ENTRY_POINT_RENAME(usearch_handleNextExact) +#define usearch_handlePreviousCanonical U_ICU_ENTRY_POINT_RENAME(usearch_handlePreviousCanonical) +#define usearch_handlePreviousExact U_ICU_ENTRY_POINT_RENAME(usearch_handlePreviousExact) +#define usearch_last U_ICU_ENTRY_POINT_RENAME(usearch_last) +#define usearch_next U_ICU_ENTRY_POINT_RENAME(usearch_next) +#define usearch_open U_ICU_ENTRY_POINT_RENAME(usearch_open) +#define usearch_openFromCollator U_ICU_ENTRY_POINT_RENAME(usearch_openFromCollator) +#define usearch_preceding U_ICU_ENTRY_POINT_RENAME(usearch_preceding) +#define usearch_previous U_ICU_ENTRY_POINT_RENAME(usearch_previous) +#define usearch_reset U_ICU_ENTRY_POINT_RENAME(usearch_reset) +#define usearch_search U_ICU_ENTRY_POINT_RENAME(usearch_search) +#define usearch_searchBackwards U_ICU_ENTRY_POINT_RENAME(usearch_searchBackwards) +#define usearch_setAttribute U_ICU_ENTRY_POINT_RENAME(usearch_setAttribute) +#define usearch_setBreakIterator U_ICU_ENTRY_POINT_RENAME(usearch_setBreakIterator) +#define usearch_setCollator U_ICU_ENTRY_POINT_RENAME(usearch_setCollator) +#define usearch_setOffset U_ICU_ENTRY_POINT_RENAME(usearch_setOffset) +#define usearch_setPattern U_ICU_ENTRY_POINT_RENAME(usearch_setPattern) +#define usearch_setText U_ICU_ENTRY_POINT_RENAME(usearch_setText) +#define uset_add U_ICU_ENTRY_POINT_RENAME(uset_add) +#define uset_addAll U_ICU_ENTRY_POINT_RENAME(uset_addAll) +#define uset_addAllCodePoints U_ICU_ENTRY_POINT_RENAME(uset_addAllCodePoints) +#define uset_addRange U_ICU_ENTRY_POINT_RENAME(uset_addRange) +#define uset_addString U_ICU_ENTRY_POINT_RENAME(uset_addString) +#define uset_applyIntPropertyValue U_ICU_ENTRY_POINT_RENAME(uset_applyIntPropertyValue) +#define uset_applyPattern U_ICU_ENTRY_POINT_RENAME(uset_applyPattern) +#define uset_applyPropertyAlias U_ICU_ENTRY_POINT_RENAME(uset_applyPropertyAlias) +#define uset_charAt U_ICU_ENTRY_POINT_RENAME(uset_charAt) +#define uset_clear U_ICU_ENTRY_POINT_RENAME(uset_clear) +#define uset_clone U_ICU_ENTRY_POINT_RENAME(uset_clone) +#define uset_cloneAsThawed U_ICU_ENTRY_POINT_RENAME(uset_cloneAsThawed) +#define uset_close U_ICU_ENTRY_POINT_RENAME(uset_close) +#define uset_closeOver U_ICU_ENTRY_POINT_RENAME(uset_closeOver) +#define uset_compact U_ICU_ENTRY_POINT_RENAME(uset_compact) +#define uset_complement U_ICU_ENTRY_POINT_RENAME(uset_complement) +#define uset_complementAll U_ICU_ENTRY_POINT_RENAME(uset_complementAll) +#define uset_contains U_ICU_ENTRY_POINT_RENAME(uset_contains) +#define uset_containsAll U_ICU_ENTRY_POINT_RENAME(uset_containsAll) +#define uset_containsAllCodePoints U_ICU_ENTRY_POINT_RENAME(uset_containsAllCodePoints) +#define uset_containsNone U_ICU_ENTRY_POINT_RENAME(uset_containsNone) +#define uset_containsRange U_ICU_ENTRY_POINT_RENAME(uset_containsRange) +#define uset_containsSome U_ICU_ENTRY_POINT_RENAME(uset_containsSome) +#define uset_containsString U_ICU_ENTRY_POINT_RENAME(uset_containsString) +#define uset_equals U_ICU_ENTRY_POINT_RENAME(uset_equals) +#define uset_freeze U_ICU_ENTRY_POINT_RENAME(uset_freeze) +#define uset_getItem U_ICU_ENTRY_POINT_RENAME(uset_getItem) +#define uset_getItemCount U_ICU_ENTRY_POINT_RENAME(uset_getItemCount) +#define uset_getSerializedRange U_ICU_ENTRY_POINT_RENAME(uset_getSerializedRange) +#define uset_getSerializedRangeCount U_ICU_ENTRY_POINT_RENAME(uset_getSerializedRangeCount) +#define uset_getSerializedSet U_ICU_ENTRY_POINT_RENAME(uset_getSerializedSet) +#define uset_indexOf U_ICU_ENTRY_POINT_RENAME(uset_indexOf) +#define uset_isEmpty U_ICU_ENTRY_POINT_RENAME(uset_isEmpty) +#define uset_isFrozen U_ICU_ENTRY_POINT_RENAME(uset_isFrozen) +#define uset_open U_ICU_ENTRY_POINT_RENAME(uset_open) +#define uset_openEmpty U_ICU_ENTRY_POINT_RENAME(uset_openEmpty) +#define uset_openPattern U_ICU_ENTRY_POINT_RENAME(uset_openPattern) +#define uset_openPatternOptions U_ICU_ENTRY_POINT_RENAME(uset_openPatternOptions) +#define uset_remove U_ICU_ENTRY_POINT_RENAME(uset_remove) +#define uset_removeAll U_ICU_ENTRY_POINT_RENAME(uset_removeAll) +#define uset_removeAllStrings U_ICU_ENTRY_POINT_RENAME(uset_removeAllStrings) +#define uset_removeRange U_ICU_ENTRY_POINT_RENAME(uset_removeRange) +#define uset_removeString U_ICU_ENTRY_POINT_RENAME(uset_removeString) +#define uset_resemblesPattern U_ICU_ENTRY_POINT_RENAME(uset_resemblesPattern) +#define uset_retain U_ICU_ENTRY_POINT_RENAME(uset_retain) +#define uset_retainAll U_ICU_ENTRY_POINT_RENAME(uset_retainAll) +#define uset_serialize U_ICU_ENTRY_POINT_RENAME(uset_serialize) +#define uset_serializedContains U_ICU_ENTRY_POINT_RENAME(uset_serializedContains) +#define uset_set U_ICU_ENTRY_POINT_RENAME(uset_set) +#define uset_setSerializedToOne U_ICU_ENTRY_POINT_RENAME(uset_setSerializedToOne) +#define uset_size U_ICU_ENTRY_POINT_RENAME(uset_size) +#define uset_span U_ICU_ENTRY_POINT_RENAME(uset_span) +#define uset_spanBack U_ICU_ENTRY_POINT_RENAME(uset_spanBack) +#define uset_spanBackUTF8 U_ICU_ENTRY_POINT_RENAME(uset_spanBackUTF8) +#define uset_spanUTF8 U_ICU_ENTRY_POINT_RENAME(uset_spanUTF8) +#define uset_toPattern U_ICU_ENTRY_POINT_RENAME(uset_toPattern) +#define uspoof_areConfusable U_ICU_ENTRY_POINT_RENAME(uspoof_areConfusable) +#define uspoof_areConfusableUTF8 U_ICU_ENTRY_POINT_RENAME(uspoof_areConfusableUTF8) +#define uspoof_areConfusableUnicodeString U_ICU_ENTRY_POINT_RENAME(uspoof_areConfusableUnicodeString) +#define uspoof_check U_ICU_ENTRY_POINT_RENAME(uspoof_check) +#define uspoof_checkUTF8 U_ICU_ENTRY_POINT_RENAME(uspoof_checkUTF8) +#define uspoof_checkUnicodeString U_ICU_ENTRY_POINT_RENAME(uspoof_checkUnicodeString) +#define uspoof_clone U_ICU_ENTRY_POINT_RENAME(uspoof_clone) +#define uspoof_close U_ICU_ENTRY_POINT_RENAME(uspoof_close) +#define uspoof_getAllowedChars U_ICU_ENTRY_POINT_RENAME(uspoof_getAllowedChars) +#define uspoof_getAllowedLocales U_ICU_ENTRY_POINT_RENAME(uspoof_getAllowedLocales) +#define uspoof_getAllowedUnicodeSet U_ICU_ENTRY_POINT_RENAME(uspoof_getAllowedUnicodeSet) +#define uspoof_getChecks U_ICU_ENTRY_POINT_RENAME(uspoof_getChecks) +#define uspoof_getInclusionSet U_ICU_ENTRY_POINT_RENAME(uspoof_getInclusionSet) +#define uspoof_getInclusionUnicodeSet U_ICU_ENTRY_POINT_RENAME(uspoof_getInclusionUnicodeSet) +#define uspoof_getRecommendedSet U_ICU_ENTRY_POINT_RENAME(uspoof_getRecommendedSet) +#define uspoof_getRecommendedUnicodeSet U_ICU_ENTRY_POINT_RENAME(uspoof_getRecommendedUnicodeSet) +#define uspoof_getRestrictionLevel U_ICU_ENTRY_POINT_RENAME(uspoof_getRestrictionLevel) +#define uspoof_getSkeleton U_ICU_ENTRY_POINT_RENAME(uspoof_getSkeleton) +#define uspoof_getSkeletonUTF8 U_ICU_ENTRY_POINT_RENAME(uspoof_getSkeletonUTF8) +#define uspoof_getSkeletonUnicodeString U_ICU_ENTRY_POINT_RENAME(uspoof_getSkeletonUnicodeString) +#define uspoof_open U_ICU_ENTRY_POINT_RENAME(uspoof_open) +#define uspoof_openFromSerialized U_ICU_ENTRY_POINT_RENAME(uspoof_openFromSerialized) +#define uspoof_openFromSource U_ICU_ENTRY_POINT_RENAME(uspoof_openFromSource) +#define uspoof_serialize U_ICU_ENTRY_POINT_RENAME(uspoof_serialize) +#define uspoof_setAllowedChars U_ICU_ENTRY_POINT_RENAME(uspoof_setAllowedChars) +#define uspoof_setAllowedLocales U_ICU_ENTRY_POINT_RENAME(uspoof_setAllowedLocales) +#define uspoof_setAllowedUnicodeSet U_ICU_ENTRY_POINT_RENAME(uspoof_setAllowedUnicodeSet) +#define uspoof_setChecks U_ICU_ENTRY_POINT_RENAME(uspoof_setChecks) +#define uspoof_setRestrictionLevel U_ICU_ENTRY_POINT_RENAME(uspoof_setRestrictionLevel) +#define uspoof_swap U_ICU_ENTRY_POINT_RENAME(uspoof_swap) +#define usprep_close U_ICU_ENTRY_POINT_RENAME(usprep_close) +#define usprep_open U_ICU_ENTRY_POINT_RENAME(usprep_open) +#define usprep_openByType U_ICU_ENTRY_POINT_RENAME(usprep_openByType) +#define usprep_prepare U_ICU_ENTRY_POINT_RENAME(usprep_prepare) +#define usprep_swap U_ICU_ENTRY_POINT_RENAME(usprep_swap) +#define ustr_hashCharsN U_ICU_ENTRY_POINT_RENAME(ustr_hashCharsN) +#define ustr_hashICharsN U_ICU_ENTRY_POINT_RENAME(ustr_hashICharsN) +#define ustr_hashUCharsN U_ICU_ENTRY_POINT_RENAME(ustr_hashUCharsN) +#define ustrcase_internalFold U_ICU_ENTRY_POINT_RENAME(ustrcase_internalFold) +#define ustrcase_internalToLower U_ICU_ENTRY_POINT_RENAME(ustrcase_internalToLower) +#define ustrcase_internalToTitle U_ICU_ENTRY_POINT_RENAME(ustrcase_internalToTitle) +#define ustrcase_internalToUpper U_ICU_ENTRY_POINT_RENAME(ustrcase_internalToUpper) +#define ustrcase_map U_ICU_ENTRY_POINT_RENAME(ustrcase_map) +#define ustrcase_setTempCaseMapLocale U_ICU_ENTRY_POINT_RENAME(ustrcase_setTempCaseMapLocale) +#define utext_char32At U_ICU_ENTRY_POINT_RENAME(utext_char32At) +#define utext_clone U_ICU_ENTRY_POINT_RENAME(utext_clone) +#define utext_close U_ICU_ENTRY_POINT_RENAME(utext_close) +#define utext_copy U_ICU_ENTRY_POINT_RENAME(utext_copy) +#define utext_current32 U_ICU_ENTRY_POINT_RENAME(utext_current32) +#define utext_equals U_ICU_ENTRY_POINT_RENAME(utext_equals) +#define utext_extract U_ICU_ENTRY_POINT_RENAME(utext_extract) +#define utext_freeze U_ICU_ENTRY_POINT_RENAME(utext_freeze) +#define utext_getNativeIndex U_ICU_ENTRY_POINT_RENAME(utext_getNativeIndex) +#define utext_getPreviousNativeIndex U_ICU_ENTRY_POINT_RENAME(utext_getPreviousNativeIndex) +#define utext_hasMetaData U_ICU_ENTRY_POINT_RENAME(utext_hasMetaData) +#define utext_isLengthExpensive U_ICU_ENTRY_POINT_RENAME(utext_isLengthExpensive) +#define utext_isWritable U_ICU_ENTRY_POINT_RENAME(utext_isWritable) +#define utext_moveIndex32 U_ICU_ENTRY_POINT_RENAME(utext_moveIndex32) +#define utext_nativeLength U_ICU_ENTRY_POINT_RENAME(utext_nativeLength) +#define utext_next32 U_ICU_ENTRY_POINT_RENAME(utext_next32) +#define utext_next32From U_ICU_ENTRY_POINT_RENAME(utext_next32From) +#define utext_openCharacterIterator U_ICU_ENTRY_POINT_RENAME(utext_openCharacterIterator) +#define utext_openConstUnicodeString U_ICU_ENTRY_POINT_RENAME(utext_openConstUnicodeString) +#define utext_openReplaceable U_ICU_ENTRY_POINT_RENAME(utext_openReplaceable) +#define utext_openUChars U_ICU_ENTRY_POINT_RENAME(utext_openUChars) +#define utext_openUTF8 U_ICU_ENTRY_POINT_RENAME(utext_openUTF8) +#define utext_openUnicodeString U_ICU_ENTRY_POINT_RENAME(utext_openUnicodeString) +#define utext_previous32 U_ICU_ENTRY_POINT_RENAME(utext_previous32) +#define utext_previous32From U_ICU_ENTRY_POINT_RENAME(utext_previous32From) +#define utext_replace U_ICU_ENTRY_POINT_RENAME(utext_replace) +#define utext_setNativeIndex U_ICU_ENTRY_POINT_RENAME(utext_setNativeIndex) +#define utext_setup U_ICU_ENTRY_POINT_RENAME(utext_setup) +#define utf8_appendCharSafeBody U_ICU_ENTRY_POINT_RENAME(utf8_appendCharSafeBody) +#define utf8_back1SafeBody U_ICU_ENTRY_POINT_RENAME(utf8_back1SafeBody) +#define utf8_countTrailBytes U_ICU_ENTRY_POINT_RENAME(utf8_countTrailBytes) +#define utf8_nextCharSafeBody U_ICU_ENTRY_POINT_RENAME(utf8_nextCharSafeBody) +#define utf8_prevCharSafeBody U_ICU_ENTRY_POINT_RENAME(utf8_prevCharSafeBody) +#define utmscale_fromInt64 U_ICU_ENTRY_POINT_RENAME(utmscale_fromInt64) +#define utmscale_getTimeScaleValue U_ICU_ENTRY_POINT_RENAME(utmscale_getTimeScaleValue) +#define utmscale_toInt64 U_ICU_ENTRY_POINT_RENAME(utmscale_toInt64) +#define utrace_cleanup U_ICU_ENTRY_POINT_RENAME(utrace_cleanup) +#define utrace_data U_ICU_ENTRY_POINT_RENAME(utrace_data) +#define utrace_entry U_ICU_ENTRY_POINT_RENAME(utrace_entry) +#define utrace_exit U_ICU_ENTRY_POINT_RENAME(utrace_exit) +#define utrace_format U_ICU_ENTRY_POINT_RENAME(utrace_format) +#define utrace_functionName U_ICU_ENTRY_POINT_RENAME(utrace_functionName) +#define utrace_getFunctions U_ICU_ENTRY_POINT_RENAME(utrace_getFunctions) +#define utrace_getLevel U_ICU_ENTRY_POINT_RENAME(utrace_getLevel) +#define utrace_level U_ICU_ENTRY_POINT_RENAME(utrace_level) +#define utrace_setFunctions U_ICU_ENTRY_POINT_RENAME(utrace_setFunctions) +#define utrace_setLevel U_ICU_ENTRY_POINT_RENAME(utrace_setLevel) +#define utrace_vformat U_ICU_ENTRY_POINT_RENAME(utrace_vformat) +#define utrans_clone U_ICU_ENTRY_POINT_RENAME(utrans_clone) +#define utrans_close U_ICU_ENTRY_POINT_RENAME(utrans_close) +#define utrans_countAvailableIDs U_ICU_ENTRY_POINT_RENAME(utrans_countAvailableIDs) +#define utrans_getAvailableID U_ICU_ENTRY_POINT_RENAME(utrans_getAvailableID) +#define utrans_getID U_ICU_ENTRY_POINT_RENAME(utrans_getID) +#define utrans_getUnicodeID U_ICU_ENTRY_POINT_RENAME(utrans_getUnicodeID) +#define utrans_open U_ICU_ENTRY_POINT_RENAME(utrans_open) +#define utrans_openIDs U_ICU_ENTRY_POINT_RENAME(utrans_openIDs) +#define utrans_openInverse U_ICU_ENTRY_POINT_RENAME(utrans_openInverse) +#define utrans_openU U_ICU_ENTRY_POINT_RENAME(utrans_openU) +#define utrans_register U_ICU_ENTRY_POINT_RENAME(utrans_register) +#define utrans_rep_caseContextIterator U_ICU_ENTRY_POINT_RENAME(utrans_rep_caseContextIterator) +#define utrans_setFilter U_ICU_ENTRY_POINT_RENAME(utrans_setFilter) +#define utrans_stripRules U_ICU_ENTRY_POINT_RENAME(utrans_stripRules) +#define utrans_trans U_ICU_ENTRY_POINT_RENAME(utrans_trans) +#define utrans_transIncremental U_ICU_ENTRY_POINT_RENAME(utrans_transIncremental) +#define utrans_transIncrementalUChars U_ICU_ENTRY_POINT_RENAME(utrans_transIncrementalUChars) +#define utrans_transUChars U_ICU_ENTRY_POINT_RENAME(utrans_transUChars) +#define utrans_transliterator_cleanup U_ICU_ENTRY_POINT_RENAME(utrans_transliterator_cleanup) +#define utrans_unregister U_ICU_ENTRY_POINT_RENAME(utrans_unregister) +#define utrans_unregisterID U_ICU_ENTRY_POINT_RENAME(utrans_unregisterID) +#define utrie2_clone U_ICU_ENTRY_POINT_RENAME(utrie2_clone) +#define utrie2_cloneAsThawed U_ICU_ENTRY_POINT_RENAME(utrie2_cloneAsThawed) +#define utrie2_close U_ICU_ENTRY_POINT_RENAME(utrie2_close) +#define utrie2_enum U_ICU_ENTRY_POINT_RENAME(utrie2_enum) +#define utrie2_enumForLeadSurrogate U_ICU_ENTRY_POINT_RENAME(utrie2_enumForLeadSurrogate) +#define utrie2_freeze U_ICU_ENTRY_POINT_RENAME(utrie2_freeze) +#define utrie2_fromUTrie U_ICU_ENTRY_POINT_RENAME(utrie2_fromUTrie) +#define utrie2_get32 U_ICU_ENTRY_POINT_RENAME(utrie2_get32) +#define utrie2_get32FromLeadSurrogateCodeUnit U_ICU_ENTRY_POINT_RENAME(utrie2_get32FromLeadSurrogateCodeUnit) +#define utrie2_getVersion U_ICU_ENTRY_POINT_RENAME(utrie2_getVersion) +#define utrie2_internalU8NextIndex U_ICU_ENTRY_POINT_RENAME(utrie2_internalU8NextIndex) +#define utrie2_internalU8PrevIndex U_ICU_ENTRY_POINT_RENAME(utrie2_internalU8PrevIndex) +#define utrie2_isFrozen U_ICU_ENTRY_POINT_RENAME(utrie2_isFrozen) +#define utrie2_open U_ICU_ENTRY_POINT_RENAME(utrie2_open) +#define utrie2_openDummy U_ICU_ENTRY_POINT_RENAME(utrie2_openDummy) +#define utrie2_openFromSerialized U_ICU_ENTRY_POINT_RENAME(utrie2_openFromSerialized) +#define utrie2_serialize U_ICU_ENTRY_POINT_RENAME(utrie2_serialize) +#define utrie2_set32 U_ICU_ENTRY_POINT_RENAME(utrie2_set32) +#define utrie2_set32ForLeadSurrogateCodeUnit U_ICU_ENTRY_POINT_RENAME(utrie2_set32ForLeadSurrogateCodeUnit) +#define utrie2_setRange32 U_ICU_ENTRY_POINT_RENAME(utrie2_setRange32) +#define utrie2_swap U_ICU_ENTRY_POINT_RENAME(utrie2_swap) +#define utrie2_swapAnyVersion U_ICU_ENTRY_POINT_RENAME(utrie2_swapAnyVersion) +#define utrie_clone U_ICU_ENTRY_POINT_RENAME(utrie_clone) +#define utrie_close U_ICU_ENTRY_POINT_RENAME(utrie_close) +#define utrie_defaultGetFoldingOffset U_ICU_ENTRY_POINT_RENAME(utrie_defaultGetFoldingOffset) +#define utrie_enum U_ICU_ENTRY_POINT_RENAME(utrie_enum) +#define utrie_get32 U_ICU_ENTRY_POINT_RENAME(utrie_get32) +#define utrie_getData U_ICU_ENTRY_POINT_RENAME(utrie_getData) +#define utrie_open U_ICU_ENTRY_POINT_RENAME(utrie_open) +#define utrie_serialize U_ICU_ENTRY_POINT_RENAME(utrie_serialize) +#define utrie_set32 U_ICU_ENTRY_POINT_RENAME(utrie_set32) +#define utrie_setRange32 U_ICU_ENTRY_POINT_RENAME(utrie_setRange32) +#define utrie_swap U_ICU_ENTRY_POINT_RENAME(utrie_swap) +#define utrie_unserialize U_ICU_ENTRY_POINT_RENAME(utrie_unserialize) +#define utrie_unserializeDummy U_ICU_ENTRY_POINT_RENAME(utrie_unserializeDummy) +#define vzone_clone U_ICU_ENTRY_POINT_RENAME(vzone_clone) +#define vzone_close U_ICU_ENTRY_POINT_RENAME(vzone_close) +#define vzone_countTransitionRules U_ICU_ENTRY_POINT_RENAME(vzone_countTransitionRules) +#define vzone_equals U_ICU_ENTRY_POINT_RENAME(vzone_equals) +#define vzone_getDynamicClassID U_ICU_ENTRY_POINT_RENAME(vzone_getDynamicClassID) +#define vzone_getLastModified U_ICU_ENTRY_POINT_RENAME(vzone_getLastModified) +#define vzone_getNextTransition U_ICU_ENTRY_POINT_RENAME(vzone_getNextTransition) +#define vzone_getOffset U_ICU_ENTRY_POINT_RENAME(vzone_getOffset) +#define vzone_getOffset2 U_ICU_ENTRY_POINT_RENAME(vzone_getOffset2) +#define vzone_getOffset3 U_ICU_ENTRY_POINT_RENAME(vzone_getOffset3) +#define vzone_getPreviousTransition U_ICU_ENTRY_POINT_RENAME(vzone_getPreviousTransition) +#define vzone_getRawOffset U_ICU_ENTRY_POINT_RENAME(vzone_getRawOffset) +#define vzone_getStaticClassID U_ICU_ENTRY_POINT_RENAME(vzone_getStaticClassID) +#define vzone_getTZURL U_ICU_ENTRY_POINT_RENAME(vzone_getTZURL) +#define vzone_hasSameRules U_ICU_ENTRY_POINT_RENAME(vzone_hasSameRules) +#define vzone_inDaylightTime U_ICU_ENTRY_POINT_RENAME(vzone_inDaylightTime) +#define vzone_openData U_ICU_ENTRY_POINT_RENAME(vzone_openData) +#define vzone_openID U_ICU_ENTRY_POINT_RENAME(vzone_openID) +#define vzone_setLastModified U_ICU_ENTRY_POINT_RENAME(vzone_setLastModified) +#define vzone_setRawOffset U_ICU_ENTRY_POINT_RENAME(vzone_setRawOffset) +#define vzone_setTZURL U_ICU_ENTRY_POINT_RENAME(vzone_setTZURL) +#define vzone_useDaylightTime U_ICU_ENTRY_POINT_RENAME(vzone_useDaylightTime) +#define vzone_write U_ICU_ENTRY_POINT_RENAME(vzone_write) +#define vzone_writeFromStart U_ICU_ENTRY_POINT_RENAME(vzone_writeFromStart) +#define vzone_writeSimple U_ICU_ENTRY_POINT_RENAME(vzone_writeSimple) +#define zrule_close U_ICU_ENTRY_POINT_RENAME(zrule_close) +#define zrule_equals U_ICU_ENTRY_POINT_RENAME(zrule_equals) +#define zrule_getDSTSavings U_ICU_ENTRY_POINT_RENAME(zrule_getDSTSavings) +#define zrule_getName U_ICU_ENTRY_POINT_RENAME(zrule_getName) +#define zrule_getRawOffset U_ICU_ENTRY_POINT_RENAME(zrule_getRawOffset) +#define zrule_isEquivalentTo U_ICU_ENTRY_POINT_RENAME(zrule_isEquivalentTo) +#define ztrans_adoptFrom U_ICU_ENTRY_POINT_RENAME(ztrans_adoptFrom) +#define ztrans_adoptTo U_ICU_ENTRY_POINT_RENAME(ztrans_adoptTo) +#define ztrans_clone U_ICU_ENTRY_POINT_RENAME(ztrans_clone) +#define ztrans_close U_ICU_ENTRY_POINT_RENAME(ztrans_close) +#define ztrans_equals U_ICU_ENTRY_POINT_RENAME(ztrans_equals) +#define ztrans_getDynamicClassID U_ICU_ENTRY_POINT_RENAME(ztrans_getDynamicClassID) +#define ztrans_getFrom U_ICU_ENTRY_POINT_RENAME(ztrans_getFrom) +#define ztrans_getStaticClassID U_ICU_ENTRY_POINT_RENAME(ztrans_getStaticClassID) +#define ztrans_getTime U_ICU_ENTRY_POINT_RENAME(ztrans_getTime) +#define ztrans_getTo U_ICU_ENTRY_POINT_RENAME(ztrans_getTo) +#define ztrans_open U_ICU_ENTRY_POINT_RENAME(ztrans_open) +#define ztrans_openEmpty U_ICU_ENTRY_POINT_RENAME(ztrans_openEmpty) +#define ztrans_setFrom U_ICU_ENTRY_POINT_RENAME(ztrans_setFrom) +#define ztrans_setTime U_ICU_ENTRY_POINT_RENAME(ztrans_setTime) +#define ztrans_setTo U_ICU_ENTRY_POINT_RENAME(ztrans_setTo) + +#endif + +#endif diff --git a/Source/WTF/icu/unicode/uscript.h b/Source/WTF/icu/unicode/uscript.h new file mode 100644 index 000000000..57255c4f9 --- /dev/null +++ b/Source/WTF/icu/unicode/uscript.h @@ -0,0 +1,627 @@ +/* + ********************************************************************** + * Copyright (C) 1997-2013, International Business Machines + * Corporation and others. All Rights Reserved. + ********************************************************************** + * + * File USCRIPT.H + * + * Modification History: + * + * Date Name Description + * 07/06/2001 Ram Creation. + ****************************************************************************** + */ + +#ifndef USCRIPT_H +#define USCRIPT_H +#include "unicode/utypes.h" + +/** + * \file + * \brief C API: Unicode Script Information + */ + +/** + * Constants for ISO 15924 script codes. + * + * Many of these script codes - those from Unicode's ScriptNames.txt - + * are character property values for Unicode's Script property. + * See UAX #24 Script Names (http://www.unicode.org/reports/tr24/). + * + * Starting with ICU 3.6, constants for most ISO 15924 script codes + * are included (currently excluding private-use codes Qaaa..Qabx). + * For scripts for which there are codes in ISO 15924 but which are not + * used in the Unicode Character Database (UCD), there are no Unicode characters + * associated with those scripts. + * + * For example, there are no characters that have a UCD script code of + * Hans or Hant. All Han ideographs have the Hani script code. + * The Hans and Hant script codes are used with CLDR data. + * + * ISO 15924 script codes are included for use with CLDR and similar. + * + * @stable ICU 2.2 + */ +typedef enum UScriptCode { + /* + * Note: UScriptCode constants and their ISO script code comments + * are parsed by preparseucd.py. + * It matches lines like + * USCRIPT_<Unicode Script value name> = <integer>, / * <ISO script code> * / + */ + + /** @stable ICU 2.2 */ + USCRIPT_INVALID_CODE = -1, + /** @stable ICU 2.2 */ + USCRIPT_COMMON = 0, /* Zyyy */ + /** @stable ICU 2.2 */ + USCRIPT_INHERITED = 1, /* Zinh */ /* "Code for inherited script", for non-spacing combining marks; also Qaai */ + /** @stable ICU 2.2 */ + USCRIPT_ARABIC = 2, /* Arab */ + /** @stable ICU 2.2 */ + USCRIPT_ARMENIAN = 3, /* Armn */ + /** @stable ICU 2.2 */ + USCRIPT_BENGALI = 4, /* Beng */ + /** @stable ICU 2.2 */ + USCRIPT_BOPOMOFO = 5, /* Bopo */ + /** @stable ICU 2.2 */ + USCRIPT_CHEROKEE = 6, /* Cher */ + /** @stable ICU 2.2 */ + USCRIPT_COPTIC = 7, /* Copt */ + /** @stable ICU 2.2 */ + USCRIPT_CYRILLIC = 8, /* Cyrl */ + /** @stable ICU 2.2 */ + USCRIPT_DESERET = 9, /* Dsrt */ + /** @stable ICU 2.2 */ + USCRIPT_DEVANAGARI = 10, /* Deva */ + /** @stable ICU 2.2 */ + USCRIPT_ETHIOPIC = 11, /* Ethi */ + /** @stable ICU 2.2 */ + USCRIPT_GEORGIAN = 12, /* Geor */ + /** @stable ICU 2.2 */ + USCRIPT_GOTHIC = 13, /* Goth */ + /** @stable ICU 2.2 */ + USCRIPT_GREEK = 14, /* Grek */ + /** @stable ICU 2.2 */ + USCRIPT_GUJARATI = 15, /* Gujr */ + /** @stable ICU 2.2 */ + USCRIPT_GURMUKHI = 16, /* Guru */ + /** @stable ICU 2.2 */ + USCRIPT_HAN = 17, /* Hani */ + /** @stable ICU 2.2 */ + USCRIPT_HANGUL = 18, /* Hang */ + /** @stable ICU 2.2 */ + USCRIPT_HEBREW = 19, /* Hebr */ + /** @stable ICU 2.2 */ + USCRIPT_HIRAGANA = 20, /* Hira */ + /** @stable ICU 2.2 */ + USCRIPT_KANNADA = 21, /* Knda */ + /** @stable ICU 2.2 */ + USCRIPT_KATAKANA = 22, /* Kana */ + /** @stable ICU 2.2 */ + USCRIPT_KHMER = 23, /* Khmr */ + /** @stable ICU 2.2 */ + USCRIPT_LAO = 24, /* Laoo */ + /** @stable ICU 2.2 */ + USCRIPT_LATIN = 25, /* Latn */ + /** @stable ICU 2.2 */ + USCRIPT_MALAYALAM = 26, /* Mlym */ + /** @stable ICU 2.2 */ + USCRIPT_MONGOLIAN = 27, /* Mong */ + /** @stable ICU 2.2 */ + USCRIPT_MYANMAR = 28, /* Mymr */ + /** @stable ICU 2.2 */ + USCRIPT_OGHAM = 29, /* Ogam */ + /** @stable ICU 2.2 */ + USCRIPT_OLD_ITALIC = 30, /* Ital */ + /** @stable ICU 2.2 */ + USCRIPT_ORIYA = 31, /* Orya */ + /** @stable ICU 2.2 */ + USCRIPT_RUNIC = 32, /* Runr */ + /** @stable ICU 2.2 */ + USCRIPT_SINHALA = 33, /* Sinh */ + /** @stable ICU 2.2 */ + USCRIPT_SYRIAC = 34, /* Syrc */ + /** @stable ICU 2.2 */ + USCRIPT_TAMIL = 35, /* Taml */ + /** @stable ICU 2.2 */ + USCRIPT_TELUGU = 36, /* Telu */ + /** @stable ICU 2.2 */ + USCRIPT_THAANA = 37, /* Thaa */ + /** @stable ICU 2.2 */ + USCRIPT_THAI = 38, /* Thai */ + /** @stable ICU 2.2 */ + USCRIPT_TIBETAN = 39, /* Tibt */ + /** Canadian_Aboriginal script. @stable ICU 2.6 */ + USCRIPT_CANADIAN_ABORIGINAL = 40, /* Cans */ + /** Canadian_Aboriginal script (alias). @stable ICU 2.2 */ + USCRIPT_UCAS = USCRIPT_CANADIAN_ABORIGINAL, + /** @stable ICU 2.2 */ + USCRIPT_YI = 41, /* Yiii */ + /* New scripts in Unicode 3.2 */ + /** @stable ICU 2.2 */ + USCRIPT_TAGALOG = 42, /* Tglg */ + /** @stable ICU 2.2 */ + USCRIPT_HANUNOO = 43, /* Hano */ + /** @stable ICU 2.2 */ + USCRIPT_BUHID = 44, /* Buhd */ + /** @stable ICU 2.2 */ + USCRIPT_TAGBANWA = 45, /* Tagb */ + + /* New scripts in Unicode 4 */ + /** @stable ICU 2.6 */ + USCRIPT_BRAILLE = 46, /* Brai */ + /** @stable ICU 2.6 */ + USCRIPT_CYPRIOT = 47, /* Cprt */ + /** @stable ICU 2.6 */ + USCRIPT_LIMBU = 48, /* Limb */ + /** @stable ICU 2.6 */ + USCRIPT_LINEAR_B = 49, /* Linb */ + /** @stable ICU 2.6 */ + USCRIPT_OSMANYA = 50, /* Osma */ + /** @stable ICU 2.6 */ + USCRIPT_SHAVIAN = 51, /* Shaw */ + /** @stable ICU 2.6 */ + USCRIPT_TAI_LE = 52, /* Tale */ + /** @stable ICU 2.6 */ + USCRIPT_UGARITIC = 53, /* Ugar */ + + /** New script code in Unicode 4.0.1 @stable ICU 3.0 */ + USCRIPT_KATAKANA_OR_HIRAGANA = 54,/*Hrkt */ + + /* New scripts in Unicode 4.1 */ + /** @stable ICU 3.4 */ + USCRIPT_BUGINESE = 55, /* Bugi */ + /** @stable ICU 3.4 */ + USCRIPT_GLAGOLITIC = 56, /* Glag */ + /** @stable ICU 3.4 */ + USCRIPT_KHAROSHTHI = 57, /* Khar */ + /** @stable ICU 3.4 */ + USCRIPT_SYLOTI_NAGRI = 58, /* Sylo */ + /** @stable ICU 3.4 */ + USCRIPT_NEW_TAI_LUE = 59, /* Talu */ + /** @stable ICU 3.4 */ + USCRIPT_TIFINAGH = 60, /* Tfng */ + /** @stable ICU 3.4 */ + USCRIPT_OLD_PERSIAN = 61, /* Xpeo */ + + /* New script codes from ISO 15924 */ + /** @stable ICU 3.6 */ + USCRIPT_BALINESE = 62, /* Bali */ + /** @stable ICU 3.6 */ + USCRIPT_BATAK = 63, /* Batk */ + /** @stable ICU 3.6 */ + USCRIPT_BLISSYMBOLS = 64, /* Blis */ + /** @stable ICU 3.6 */ + USCRIPT_BRAHMI = 65, /* Brah */ + /** @stable ICU 3.6 */ + USCRIPT_CHAM = 66, /* Cham */ + /** @stable ICU 3.6 */ + USCRIPT_CIRTH = 67, /* Cirt */ + /** @stable ICU 3.6 */ + USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC = 68, /* Cyrs */ + /** @stable ICU 3.6 */ + USCRIPT_DEMOTIC_EGYPTIAN = 69, /* Egyd */ + /** @stable ICU 3.6 */ + USCRIPT_HIERATIC_EGYPTIAN = 70, /* Egyh */ + /** @stable ICU 3.6 */ + USCRIPT_EGYPTIAN_HIEROGLYPHS = 71, /* Egyp */ + /** @stable ICU 3.6 */ + USCRIPT_KHUTSURI = 72, /* Geok */ + /** @stable ICU 3.6 */ + USCRIPT_SIMPLIFIED_HAN = 73, /* Hans */ + /** @stable ICU 3.6 */ + USCRIPT_TRADITIONAL_HAN = 74, /* Hant */ + /** @stable ICU 3.6 */ + USCRIPT_PAHAWH_HMONG = 75, /* Hmng */ + /** @stable ICU 3.6 */ + USCRIPT_OLD_HUNGARIAN = 76, /* Hung */ + /** @stable ICU 3.6 */ + USCRIPT_HARAPPAN_INDUS = 77, /* Inds */ + /** @stable ICU 3.6 */ + USCRIPT_JAVANESE = 78, /* Java */ + /** @stable ICU 3.6 */ + USCRIPT_KAYAH_LI = 79, /* Kali */ + /** @stable ICU 3.6 */ + USCRIPT_LATIN_FRAKTUR = 80, /* Latf */ + /** @stable ICU 3.6 */ + USCRIPT_LATIN_GAELIC = 81, /* Latg */ + /** @stable ICU 3.6 */ + USCRIPT_LEPCHA = 82, /* Lepc */ + /** @stable ICU 3.6 */ + USCRIPT_LINEAR_A = 83, /* Lina */ + /** @stable ICU 4.6 */ + USCRIPT_MANDAIC = 84, /* Mand */ + /** @stable ICU 3.6 */ + USCRIPT_MANDAEAN = USCRIPT_MANDAIC, + /** @stable ICU 3.6 */ + USCRIPT_MAYAN_HIEROGLYPHS = 85, /* Maya */ + /** @stable ICU 4.6 */ + USCRIPT_MEROITIC_HIEROGLYPHS = 86, /* Mero */ + /** @stable ICU 3.6 */ + USCRIPT_MEROITIC = USCRIPT_MEROITIC_HIEROGLYPHS, + /** @stable ICU 3.6 */ + USCRIPT_NKO = 87, /* Nkoo */ + /** @stable ICU 3.6 */ + USCRIPT_ORKHON = 88, /* Orkh */ + /** @stable ICU 3.6 */ + USCRIPT_OLD_PERMIC = 89, /* Perm */ + /** @stable ICU 3.6 */ + USCRIPT_PHAGS_PA = 90, /* Phag */ + /** @stable ICU 3.6 */ + USCRIPT_PHOENICIAN = 91, /* Phnx */ + /** @stable ICU 52 */ + USCRIPT_MIAO = 92, /* Plrd */ + /** @stable ICU 3.6 */ + USCRIPT_PHONETIC_POLLARD = USCRIPT_MIAO, + /** @stable ICU 3.6 */ + USCRIPT_RONGORONGO = 93, /* Roro */ + /** @stable ICU 3.6 */ + USCRIPT_SARATI = 94, /* Sara */ + /** @stable ICU 3.6 */ + USCRIPT_ESTRANGELO_SYRIAC = 95, /* Syre */ + /** @stable ICU 3.6 */ + USCRIPT_WESTERN_SYRIAC = 96, /* Syrj */ + /** @stable ICU 3.6 */ + USCRIPT_EASTERN_SYRIAC = 97, /* Syrn */ + /** @stable ICU 3.6 */ + USCRIPT_TENGWAR = 98, /* Teng */ + /** @stable ICU 3.6 */ + USCRIPT_VAI = 99, /* Vaii */ + /** @stable ICU 3.6 */ + USCRIPT_VISIBLE_SPEECH = 100,/* Visp */ + /** @stable ICU 3.6 */ + USCRIPT_CUNEIFORM = 101,/* Xsux */ + /** @stable ICU 3.6 */ + USCRIPT_UNWRITTEN_LANGUAGES = 102,/* Zxxx */ + /** @stable ICU 3.6 */ + USCRIPT_UNKNOWN = 103,/* Zzzz */ /* Unknown="Code for uncoded script", for unassigned code points */ + + /* New script codes from ISO 15924 */ + /** @stable ICU 3.8 */ + USCRIPT_CARIAN = 104,/* Cari */ + /** @stable ICU 3.8 */ + USCRIPT_JAPANESE = 105,/* Jpan */ + /** @stable ICU 3.8 */ + USCRIPT_LANNA = 106,/* Lana */ + /** @stable ICU 3.8 */ + USCRIPT_LYCIAN = 107,/* Lyci */ + /** @stable ICU 3.8 */ + USCRIPT_LYDIAN = 108,/* Lydi */ + /** @stable ICU 3.8 */ + USCRIPT_OL_CHIKI = 109,/* Olck */ + /** @stable ICU 3.8 */ + USCRIPT_REJANG = 110,/* Rjng */ + /** @stable ICU 3.8 */ + USCRIPT_SAURASHTRA = 111,/* Saur */ + /** @stable ICU 3.8 */ + USCRIPT_SIGN_WRITING = 112,/* Sgnw */ + /** @stable ICU 3.8 */ + USCRIPT_SUNDANESE = 113,/* Sund */ + /** @stable ICU 3.8 */ + USCRIPT_MOON = 114,/* Moon */ + /** @stable ICU 3.8 */ + USCRIPT_MEITEI_MAYEK = 115,/* Mtei */ + + /* New script codes from ISO 15924 */ + /** @stable ICU 4.0 */ + USCRIPT_IMPERIAL_ARAMAIC = 116,/* Armi */ + /** @stable ICU 4.0 */ + USCRIPT_AVESTAN = 117,/* Avst */ + /** @stable ICU 4.0 */ + USCRIPT_CHAKMA = 118,/* Cakm */ + /** @stable ICU 4.0 */ + USCRIPT_KOREAN = 119,/* Kore */ + /** @stable ICU 4.0 */ + USCRIPT_KAITHI = 120,/* Kthi */ + /** @stable ICU 4.0 */ + USCRIPT_MANICHAEAN = 121,/* Mani */ + /** @stable ICU 4.0 */ + USCRIPT_INSCRIPTIONAL_PAHLAVI = 122,/* Phli */ + /** @stable ICU 4.0 */ + USCRIPT_PSALTER_PAHLAVI = 123,/* Phlp */ + /** @stable ICU 4.0 */ + USCRIPT_BOOK_PAHLAVI = 124,/* Phlv */ + /** @stable ICU 4.0 */ + USCRIPT_INSCRIPTIONAL_PARTHIAN = 125,/* Prti */ + /** @stable ICU 4.0 */ + USCRIPT_SAMARITAN = 126,/* Samr */ + /** @stable ICU 4.0 */ + USCRIPT_TAI_VIET = 127,/* Tavt */ + /** @stable ICU 4.0 */ + USCRIPT_MATHEMATICAL_NOTATION = 128,/* Zmth */ + /** @stable ICU 4.0 */ + USCRIPT_SYMBOLS = 129,/* Zsym */ + + /* New script codes from ISO 15924 */ + /** @stable ICU 4.4 */ + USCRIPT_BAMUM = 130,/* Bamu */ + /** @stable ICU 4.4 */ + USCRIPT_LISU = 131,/* Lisu */ + /** @stable ICU 4.4 */ + USCRIPT_NAKHI_GEBA = 132,/* Nkgb */ + /** @stable ICU 4.4 */ + USCRIPT_OLD_SOUTH_ARABIAN = 133,/* Sarb */ + + /* New script codes from ISO 15924 */ + /** @stable ICU 4.6 */ + USCRIPT_BASSA_VAH = 134,/* Bass */ + /** @stable ICU 4.6 */ + USCRIPT_DUPLOYAN_SHORTAND = 135,/* Dupl */ + /** @stable ICU 4.6 */ + USCRIPT_ELBASAN = 136,/* Elba */ + /** @stable ICU 4.6 */ + USCRIPT_GRANTHA = 137,/* Gran */ + /** @stable ICU 4.6 */ + USCRIPT_KPELLE = 138,/* Kpel */ + /** @stable ICU 4.6 */ + USCRIPT_LOMA = 139,/* Loma */ + /** @stable ICU 4.6 */ + USCRIPT_MENDE = 140,/* Mend */ + /** @stable ICU 4.6 */ + USCRIPT_MEROITIC_CURSIVE = 141,/* Merc */ + /** @stable ICU 4.6 */ + USCRIPT_OLD_NORTH_ARABIAN = 142,/* Narb */ + /** @stable ICU 4.6 */ + USCRIPT_NABATAEAN = 143,/* Nbat */ + /** @stable ICU 4.6 */ + USCRIPT_PALMYRENE = 144,/* Palm */ + /** @stable ICU 4.6 */ + USCRIPT_SINDHI = 145,/* Sind */ + /** @stable ICU 4.6 */ + USCRIPT_WARANG_CITI = 146,/* Wara */ + + /** @stable ICU 4.8 */ + USCRIPT_AFAKA = 147,/* Afak */ + /** @stable ICU 4.8 */ + USCRIPT_JURCHEN = 148,/* Jurc */ + /** @stable ICU 4.8 */ + USCRIPT_MRO = 149,/* Mroo */ + /** @stable ICU 4.8 */ + USCRIPT_NUSHU = 150,/* Nshu */ + /** @stable ICU 4.8 */ + USCRIPT_SHARADA = 151,/* Shrd */ + /** @stable ICU 4.8 */ + USCRIPT_SORA_SOMPENG = 152,/* Sora */ + /** @stable ICU 4.8 */ + USCRIPT_TAKRI = 153,/* Takr */ + /** @stable ICU 4.8 */ + USCRIPT_TANGUT = 154,/* Tang */ + /** @stable ICU 4.8 */ + USCRIPT_WOLEAI = 155,/* Wole */ + + /** @stable ICU 49 */ + USCRIPT_ANATOLIAN_HIEROGLYPHS = 156,/* Hluw */ + /** @stable ICU 49 */ + USCRIPT_KHOJKI = 157,/* Khoj */ + /** @stable ICU 49 */ + USCRIPT_TIRHUTA = 158,/* Tirh */ + + /** @stable ICU 52 */ + USCRIPT_CAUCASIAN_ALBANIAN = 159,/* Aghb */ + /** @stable ICU 52 */ + USCRIPT_MAHAJANI = 160,/* Mahj */ + + /* Private use codes from Qaaa - Qabx are not supported */ + + /** @stable ICU 2.2 */ + USCRIPT_CODE_LIMIT = 161 +} UScriptCode; + +/** + * Gets script codes associated with the given locale or ISO 15924 abbreviation or name. + * Fills in USCRIPT_MALAYALAM given "Malayam" OR "Mlym". + * Fills in USCRIPT_LATIN given "en" OR "en_US" + * If required capacity is greater than capacity of the destination buffer then the error code + * is set to U_BUFFER_OVERFLOW_ERROR and the required capacity is returned + * + * <p>Note: To search by short or long script alias only, use + * u_getPropertyValueEnum(UCHAR_SCRIPT, alias) instead. This does + * a fast lookup with no access of the locale data. + * @param nameOrAbbrOrLocale name of the script, as given in + * PropertyValueAliases.txt, or ISO 15924 code or locale + * @param fillIn the UScriptCode buffer to fill in the script code + * @param capacity the capacity (size) fo UScriptCode buffer passed in. + * @param err the error status code. + * @return The number of script codes filled in the buffer passed in + * @stable ICU 2.4 + */ +U_STABLE int32_t U_EXPORT2 +uscript_getCode(const char* nameOrAbbrOrLocale,UScriptCode* fillIn,int32_t capacity,UErrorCode *err); + +/** + * Gets a script name associated with the given script code. + * Returns "Malayam" given USCRIPT_MALAYALAM + * @param scriptCode UScriptCode enum + * @return script long name as given in + * PropertyValueAliases.txt, or NULL if scriptCode is invalid + * @stable ICU 2.4 + */ +U_STABLE const char* U_EXPORT2 +uscript_getName(UScriptCode scriptCode); + +/** + * Gets a script name associated with the given script code. + * Returns "Mlym" given USCRIPT_MALAYALAM + * @param scriptCode UScriptCode enum + * @return script abbreviated name as given in + * PropertyValueAliases.txt, or NULL if scriptCode is invalid + * @stable ICU 2.4 + */ +U_STABLE const char* U_EXPORT2 +uscript_getShortName(UScriptCode scriptCode); + +/** + * Gets the script code associated with the given codepoint. + * Returns USCRIPT_MALAYALAM given 0x0D02 + * @param codepoint UChar32 codepoint + * @param err the error status code. + * @return The UScriptCode, or 0 if codepoint is invalid + * @stable ICU 2.4 + */ +U_STABLE UScriptCode U_EXPORT2 +uscript_getScript(UChar32 codepoint, UErrorCode *err); + +/** + * Do the Script_Extensions of code point c contain script sc? + * If c does not have explicit Script_Extensions, then this tests whether + * c has the Script property value sc. + * + * Some characters are commonly used in multiple scripts. + * For more information, see UAX #24: http://www.unicode.org/reports/tr24/. + * + * The Script_Extensions property is provisional. It may be modified or removed + * in future versions of the Unicode Standard, and thus in ICU. + * @param c code point + * @param sc script code + * @return TRUE if sc is in Script_Extensions(c) + * @stable ICU 49 + */ +U_STABLE UBool U_EXPORT2 +uscript_hasScript(UChar32 c, UScriptCode sc); + +/** + * Writes code point c's Script_Extensions as a list of UScriptCode values + * to the output scripts array and returns the number of script codes. + * - If c does have Script_Extensions, then the Script property value + * (normally Common or Inherited) is not included. + * - If c does not have Script_Extensions, then the one Script code is written to the output array. + * - If c is not a valid code point, then the one USCRIPT_UNKNOWN code is written. + * In other words, if the return value is 1, + * then the output array contains exactly c's single Script code. + * If the return value is n>=2, then the output array contains c's n Script_Extensions script codes. + * + * Some characters are commonly used in multiple scripts. + * For more information, see UAX #24: http://www.unicode.org/reports/tr24/. + * + * If there are more than capacity script codes to be written, then + * U_BUFFER_OVERFLOW_ERROR is set and the number of Script_Extensions is returned. + * (Usual ICU buffer handling behavior.) + * + * The Script_Extensions property is provisional. It may be modified or removed + * in future versions of the Unicode Standard, and thus in ICU. + * @param c code point + * @param scripts output script code array + * @param capacity capacity of the scripts array + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return number of script codes in c's Script_Extensions, or 1 for the single Script value, + * written to scripts unless U_BUFFER_OVERFLOW_ERROR indicates insufficient capacity + * @stable ICU 49 + */ +U_STABLE int32_t U_EXPORT2 +uscript_getScriptExtensions(UChar32 c, + UScriptCode *scripts, int32_t capacity, + UErrorCode *errorCode); + +#ifndef U_HIDE_DRAFT_API + +/** + * Script usage constants. + * See UAX #31 Unicode Identifier and Pattern Syntax. + * http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Exclusion_from_Identifiers + * + * @draft ICU 51 + */ +typedef enum UScriptUsage { + /** Not encoded in Unicode. @draft ICU 51 */ + USCRIPT_USAGE_NOT_ENCODED, + /** Unknown script usage. @draft ICU 51 */ + USCRIPT_USAGE_UNKNOWN, + /** Candidate for Exclusion from Identifiers. @draft ICU 51 */ + USCRIPT_USAGE_EXCLUDED, + /** Limited Use script. @draft ICU 51 */ + USCRIPT_USAGE_LIMITED_USE, + /** Aspirational Use script. @draft ICU 51 */ + USCRIPT_USAGE_ASPIRATIONAL, + /** Recommended script. @draft ICU 51 */ + USCRIPT_USAGE_RECOMMENDED +} UScriptUsage; + +/** + * Writes the script sample character string. + * This string normally consists of one code point but might be longer. + * The string is empty if the script is not encoded. + * + * @param script script code + * @param dest output string array + * @param capacity number of UChars in the dest array + * @param pErrorCode standard ICU in/out error code, must pass U_SUCCESS() on input + * @return the string length, even if U_BUFFER_OVERFLOW_ERROR + * @draft ICU 51 + */ +U_DRAFT int32_t U_EXPORT2 +uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode); + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN +class UnicodeString; +U_NAMESPACE_END + +/** + * Returns the script sample character string. + * This string normally consists of one code point but might be longer. + * The string is empty if the script is not encoded. + * + * @param script script code + * @return the sample character string + * @draft ICU 51 + */ +U_COMMON_API icu::UnicodeString U_EXPORT2 +uscript_getSampleUnicodeString(UScriptCode script); + +#endif + +/** + * Returns the script usage according to UAX #31 Unicode Identifier and Pattern Syntax. + * Returns USCRIPT_USAGE_NOT_ENCODED if the script is not encoded in Unicode. + * + * @param script script code + * @return script usage + * @see UScriptUsage + * @draft ICU 51 + */ +U_DRAFT UScriptUsage U_EXPORT2 +uscript_getUsage(UScriptCode script); + +/** + * Returns TRUE if the script is written right-to-left. + * For example, Arab and Hebr. + * + * @param script script code + * @return TRUE if the script is right-to-left + * @draft ICU 51 + */ +U_DRAFT UBool U_EXPORT2 +uscript_isRightToLeft(UScriptCode script); + +/** + * Returns TRUE if the script allows line breaks between letters (excluding hyphenation). + * Such a script typically requires dictionary-based line breaking. + * For example, Hani and Thai. + * + * @param script script code + * @return TRUE if the script allows line breaks between letters + * @draft ICU 51 + */ +U_DRAFT UBool U_EXPORT2 +uscript_breaksBetweenLetters(UScriptCode script); + +/** + * Returns TRUE if in modern (or most recent) usage of the script case distinctions are customary. + * For example, Latn and Cyrl. + * + * @param script script code + * @return TRUE if the script is cased + * @draft ICU 51 + */ +U_DRAFT UBool U_EXPORT2 +uscript_isCased(UScriptCode script); + +#endif /* U_HIDE_DRAFT_API */ + +#endif diff --git a/Source/WTF/icu/unicode/uset.h b/Source/WTF/icu/unicode/uset.h new file mode 100644 index 000000000..40510cd41 --- /dev/null +++ b/Source/WTF/icu/unicode/uset.h @@ -0,0 +1,1124 @@ +/* +******************************************************************************* +* +* Copyright (C) 2002-2012, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: uset.h +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2002mar07 +* created by: Markus W. Scherer +* +* C version of UnicodeSet. +*/ + + +/** + * \file + * \brief C API: Unicode Set + * + * <p>This is a C wrapper around the C++ UnicodeSet class.</p> + */ + +#ifndef __USET_H__ +#define __USET_H__ + +#include "unicode/utypes.h" +#include "unicode/uchar.h" +#include "unicode/localpointer.h" + +#ifndef UCNV_H +struct USet; +/** + * A UnicodeSet. Use the uset_* API to manipulate. Create with + * uset_open*, and destroy with uset_close. + * @stable ICU 2.4 + */ +typedef struct USet USet; +#endif + +/** + * Bitmask values to be passed to uset_openPatternOptions() or + * uset_applyPattern() taking an option parameter. + * @stable ICU 2.4 + */ +enum { + /** + * Ignore white space within patterns unless quoted or escaped. + * @stable ICU 2.4 + */ + USET_IGNORE_SPACE = 1, + + /** + * Enable case insensitive matching. E.g., "[ab]" with this flag + * will match 'a', 'A', 'b', and 'B'. "[^ab]" with this flag will + * match all except 'a', 'A', 'b', and 'B'. This performs a full + * closure over case mappings, e.g. U+017F for s. + * + * The resulting set is a superset of the input for the code points but + * not for the strings. + * It performs a case mapping closure of the code points and adds + * full case folding strings for the code points, and reduces strings of + * the original set to their full case folding equivalents. + * + * This is designed for case-insensitive matches, for example + * in regular expressions. The full code point case closure allows checking of + * an input character directly against the closure set. + * Strings are matched by comparing the case-folded form from the closure + * set with an incremental case folding of the string in question. + * + * The closure set will also contain single code points if the original + * set contained case-equivalent strings (like U+00DF for "ss" or "Ss" etc.). + * This is not necessary (that is, redundant) for the above matching method + * but results in the same closure sets regardless of whether the original + * set contained the code point or a string. + * + * @stable ICU 2.4 + */ + USET_CASE_INSENSITIVE = 2, + + /** + * Enable case insensitive matching. E.g., "[ab]" with this flag + * will match 'a', 'A', 'b', and 'B'. "[^ab]" with this flag will + * match all except 'a', 'A', 'b', and 'B'. This adds the lower-, + * title-, and uppercase mappings as well as the case folding + * of each existing element in the set. + * @stable ICU 3.2 + */ + USET_ADD_CASE_MAPPINGS = 4 +}; + +/** + * Argument values for whether span() and similar functions continue while + * the current character is contained vs. not contained in the set. + * + * The functionality is straightforward for sets with only single code points, + * without strings (which is the common case): + * - USET_SPAN_CONTAINED and USET_SPAN_SIMPLE + * work the same. + * - span() and spanBack() partition any string the same way when + * alternating between span(USET_SPAN_NOT_CONTAINED) and + * span(either "contained" condition). + * - Using a complemented (inverted) set and the opposite span conditions + * yields the same results. + * + * When a set contains multi-code point strings, then these statements may not + * be true, depending on the strings in the set (for example, whether they + * overlap with each other) and the string that is processed. + * For a set with strings: + * - The complement of the set contains the opposite set of code points, + * but the same set of strings. + * Therefore, complementing both the set and the span conditions + * may yield different results. + * - When starting spans at different positions in a string + * (span(s, ...) vs. span(s+1, ...)) the ends of the spans may be different + * because a set string may start before the later position. + * - span(USET_SPAN_SIMPLE) may be shorter than + * span(USET_SPAN_CONTAINED) because it will not recursively try + * all possible paths. + * For example, with a set which contains the three strings "xy", "xya" and "ax", + * span("xyax", USET_SPAN_CONTAINED) will return 4 but + * span("xyax", USET_SPAN_SIMPLE) will return 3. + * span(USET_SPAN_SIMPLE) will never be longer than + * span(USET_SPAN_CONTAINED). + * - With either "contained" condition, span() and spanBack() may partition + * a string in different ways. + * For example, with a set which contains the two strings "ab" and "ba", + * and when processing the string "aba", + * span() will yield contained/not-contained boundaries of { 0, 2, 3 } + * while spanBack() will yield boundaries of { 0, 1, 3 }. + * + * Note: If it is important to get the same boundaries whether iterating forward + * or backward through a string, then either only span() should be used and + * the boundaries cached for backward operation, or an ICU BreakIterator + * could be used. + * + * Note: Unpaired surrogates are treated like surrogate code points. + * Similarly, set strings match only on code point boundaries, + * never in the middle of a surrogate pair. + * Illegal UTF-8 sequences are treated like U+FFFD. + * When processing UTF-8 strings, malformed set strings + * (strings with unpaired surrogates which cannot be converted to UTF-8) + * are ignored. + * + * @stable ICU 3.8 + */ +typedef enum USetSpanCondition { + /** + * Continue a span() while there is no set element at the current position. + * Stops before the first set element (character or string). + * (For code points only, this is like while contains(current)==FALSE). + * + * When span() returns, the substring between where it started and the position + * it returned consists only of characters that are not in the set, + * and none of its strings overlap with the span. + * + * @stable ICU 3.8 + */ + USET_SPAN_NOT_CONTAINED = 0, + /** + * Continue a span() while there is a set element at the current position. + * (For characters only, this is like while contains(current)==TRUE). + * + * When span() returns, the substring between where it started and the position + * it returned consists only of set elements (characters or strings) that are in the set. + * + * If a set contains strings, then the span will be the longest substring + * matching any of the possible concatenations of set elements (characters or strings). + * (There must be a single, non-overlapping concatenation of characters or strings.) + * This is equivalent to a POSIX regular expression for (OR of each set element)*. + * + * @stable ICU 3.8 + */ + USET_SPAN_CONTAINED = 1, + /** + * Continue a span() while there is a set element at the current position. + * (For characters only, this is like while contains(current)==TRUE). + * + * When span() returns, the substring between where it started and the position + * it returned consists only of set elements (characters or strings) that are in the set. + * + * If a set only contains single characters, then this is the same + * as USET_SPAN_CONTAINED. + * + * If a set contains strings, then the span will be the longest substring + * with a match at each position with the longest single set element (character or string). + * + * Use this span condition together with other longest-match algorithms, + * such as ICU converters (ucnv_getUnicodeSet()). + * + * @stable ICU 3.8 + */ + USET_SPAN_SIMPLE = 2, + /** + * One more than the last span condition. + * @stable ICU 3.8 + */ + USET_SPAN_CONDITION_COUNT +} USetSpanCondition; + +enum { + /** + * Capacity of USerializedSet::staticArray. + * Enough for any single-code point set. + * Also provides padding for nice sizeof(USerializedSet). + * @stable ICU 2.4 + */ + USET_SERIALIZED_STATIC_ARRAY_CAPACITY=8 +}; + +/** + * A serialized form of a Unicode set. Limited manipulations are + * possible directly on a serialized set. See below. + * @stable ICU 2.4 + */ +typedef struct USerializedSet { + /** + * The serialized Unicode Set. + * @stable ICU 2.4 + */ + const uint16_t *array; + /** + * The length of the array that contains BMP characters. + * @stable ICU 2.4 + */ + int32_t bmpLength; + /** + * The total length of the array. + * @stable ICU 2.4 + */ + int32_t length; + /** + * A small buffer for the array to reduce memory allocations. + * @stable ICU 2.4 + */ + uint16_t staticArray[USET_SERIALIZED_STATIC_ARRAY_CAPACITY]; +} USerializedSet; + +/********************************************************************* + * USet API + *********************************************************************/ + +/** + * Create an empty USet object. + * Equivalent to uset_open(1, 0). + * @return a newly created USet. The caller must call uset_close() on + * it when done. + * @stable ICU 4.2 + */ +U_STABLE USet* U_EXPORT2 +uset_openEmpty(void); + +/** + * Creates a USet object that contains the range of characters + * start..end, inclusive. If <code>start > end</code> + * then an empty set is created (same as using uset_openEmpty()). + * @param start first character of the range, inclusive + * @param end last character of the range, inclusive + * @return a newly created USet. The caller must call uset_close() on + * it when done. + * @stable ICU 2.4 + */ +U_STABLE USet* U_EXPORT2 +uset_open(UChar32 start, UChar32 end); + +/** + * Creates a set from the given pattern. See the UnicodeSet class + * description for the syntax of the pattern language. + * @param pattern a string specifying what characters are in the set + * @param patternLength the length of the pattern, or -1 if null + * terminated + * @param ec the error code + * @stable ICU 2.4 + */ +U_STABLE USet* U_EXPORT2 +uset_openPattern(const UChar* pattern, int32_t patternLength, + UErrorCode* ec); + +/** + * Creates a set from the given pattern. See the UnicodeSet class + * description for the syntax of the pattern language. + * @param pattern a string specifying what characters are in the set + * @param patternLength the length of the pattern, or -1 if null + * terminated + * @param options bitmask for options to apply to the pattern. + * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE. + * @param ec the error code + * @stable ICU 2.4 + */ +U_STABLE USet* U_EXPORT2 +uset_openPatternOptions(const UChar* pattern, int32_t patternLength, + uint32_t options, + UErrorCode* ec); + +/** + * Disposes of the storage used by a USet object. This function should + * be called exactly once for objects returned by uset_open(). + * @param set the object to dispose of + * @stable ICU 2.4 + */ +U_STABLE void U_EXPORT2 +uset_close(USet* set); + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalUSetPointer + * "Smart pointer" class, closes a USet via uset_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @stable ICU 4.4 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalUSetPointer, USet, uset_close); + +U_NAMESPACE_END + +#endif + +/** + * Returns a copy of this object. + * If this set is frozen, then the clone will be frozen as well. + * Use uset_cloneAsThawed() for a mutable clone of a frozen set. + * @param set the original set + * @return the newly allocated copy of the set + * @see uset_cloneAsThawed + * @stable ICU 3.8 + */ +U_STABLE USet * U_EXPORT2 +uset_clone(const USet *set); + +/** + * Determines whether the set has been frozen (made immutable) or not. + * See the ICU4J Freezable interface for details. + * @param set the set + * @return TRUE/FALSE for whether the set has been frozen + * @see uset_freeze + * @see uset_cloneAsThawed + * @stable ICU 3.8 + */ +U_STABLE UBool U_EXPORT2 +uset_isFrozen(const USet *set); + +/** + * Freeze the set (make it immutable). + * Once frozen, it cannot be unfrozen and is therefore thread-safe + * until it is deleted. + * See the ICU4J Freezable interface for details. + * Freezing the set may also make some operations faster, for example + * uset_contains() and uset_span(). + * A frozen set will not be modified. (It remains frozen.) + * @param set the set + * @return the same set, now frozen + * @see uset_isFrozen + * @see uset_cloneAsThawed + * @stable ICU 3.8 + */ +U_STABLE void U_EXPORT2 +uset_freeze(USet *set); + +/** + * Clone the set and make the clone mutable. + * See the ICU4J Freezable interface for details. + * @param set the set + * @return the mutable clone + * @see uset_freeze + * @see uset_isFrozen + * @see uset_clone + * @stable ICU 3.8 + */ +U_STABLE USet * U_EXPORT2 +uset_cloneAsThawed(const USet *set); + +/** + * Causes the USet object to represent the range <code>start - end</code>. + * If <code>start > end</code> then this USet is set to an empty range. + * A frozen set will not be modified. + * @param set the object to set to the given range + * @param start first character in the set, inclusive + * @param end last character in the set, inclusive + * @stable ICU 3.2 + */ +U_STABLE void U_EXPORT2 +uset_set(USet* set, + UChar32 start, UChar32 end); + +/** + * Modifies the set to represent the set specified by the given + * pattern. See the UnicodeSet class description for the syntax of + * the pattern language. See also the User Guide chapter about UnicodeSet. + * <em>Empties the set passed before applying the pattern.</em> + * A frozen set will not be modified. + * @param set The set to which the pattern is to be applied. + * @param pattern A pointer to UChar string specifying what characters are in the set. + * The character at pattern[0] must be a '['. + * @param patternLength The length of the UChar string. -1 if NUL terminated. + * @param options A bitmask for options to apply to the pattern. + * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE. + * @param status Returns an error if the pattern cannot be parsed. + * @return Upon successful parse, the value is either + * the index of the character after the closing ']' + * of the parsed pattern. + * If the status code indicates failure, then the return value + * is the index of the error in the source. + * + * @stable ICU 2.8 + */ +U_STABLE int32_t U_EXPORT2 +uset_applyPattern(USet *set, + const UChar *pattern, int32_t patternLength, + uint32_t options, + UErrorCode *status); + +/** + * Modifies the set to contain those code points which have the given value + * for the given binary or enumerated property, as returned by + * u_getIntPropertyValue. Prior contents of this set are lost. + * A frozen set will not be modified. + * + * @param set the object to contain the code points defined by the property + * + * @param prop a property in the range UCHAR_BIN_START..UCHAR_BIN_LIMIT-1 + * or UCHAR_INT_START..UCHAR_INT_LIMIT-1 + * or UCHAR_MASK_START..UCHAR_MASK_LIMIT-1. + * + * @param value a value in the range u_getIntPropertyMinValue(prop).. + * u_getIntPropertyMaxValue(prop), with one exception. If prop is + * UCHAR_GENERAL_CATEGORY_MASK, then value should not be a UCharCategory, but + * rather a mask value produced by U_GET_GC_MASK(). This allows grouped + * categories such as [:L:] to be represented. + * + * @param ec error code input/output parameter + * + * @stable ICU 3.2 + */ +U_STABLE void U_EXPORT2 +uset_applyIntPropertyValue(USet* set, + UProperty prop, int32_t value, UErrorCode* ec); + +/** + * Modifies the set to contain those code points which have the + * given value for the given property. Prior contents of this + * set are lost. + * A frozen set will not be modified. + * + * @param set the object to contain the code points defined by the given + * property and value alias + * + * @param prop a string specifying a property alias, either short or long. + * The name is matched loosely. See PropertyAliases.txt for names and a + * description of loose matching. If the value string is empty, then this + * string is interpreted as either a General_Category value alias, a Script + * value alias, a binary property alias, or a special ID. Special IDs are + * matched loosely and correspond to the following sets: + * + * "ANY" = [\\u0000-\\U0010FFFF], + * "ASCII" = [\\u0000-\\u007F], + * "Assigned" = [:^Cn:]. + * + * @param propLength the length of the prop, or -1 if NULL + * + * @param value a string specifying a value alias, either short or long. + * The name is matched loosely. See PropertyValueAliases.txt for names + * and a description of loose matching. In addition to aliases listed, + * numeric values and canonical combining classes may be expressed + * numerically, e.g., ("nv", "0.5") or ("ccc", "220"). The value string + * may also be empty. + * + * @param valueLength the length of the value, or -1 if NULL + * + * @param ec error code input/output parameter + * + * @stable ICU 3.2 + */ +U_STABLE void U_EXPORT2 +uset_applyPropertyAlias(USet* set, + const UChar *prop, int32_t propLength, + const UChar *value, int32_t valueLength, + UErrorCode* ec); + +/** + * Return true if the given position, in the given pattern, appears + * to be the start of a UnicodeSet pattern. + * + * @param pattern a string specifying the pattern + * @param patternLength the length of the pattern, or -1 if NULL + * @param pos the given position + * @stable ICU 3.2 + */ +U_STABLE UBool U_EXPORT2 +uset_resemblesPattern(const UChar *pattern, int32_t patternLength, + int32_t pos); + +/** + * Returns a string representation of this set. If the result of + * calling this function is passed to a uset_openPattern(), it + * will produce another set that is equal to this one. + * @param set the set + * @param result the string to receive the rules, may be NULL + * @param resultCapacity the capacity of result, may be 0 if result is NULL + * @param escapeUnprintable if TRUE then convert unprintable + * character to their hex escape representations, \\uxxxx or + * \\Uxxxxxxxx. Unprintable characters are those other than + * U+000A, U+0020..U+007E. + * @param ec error code. + * @return length of string, possibly larger than resultCapacity + * @stable ICU 2.4 + */ +U_STABLE int32_t U_EXPORT2 +uset_toPattern(const USet* set, + UChar* result, int32_t resultCapacity, + UBool escapeUnprintable, + UErrorCode* ec); + +/** + * Adds the given character to the given USet. After this call, + * uset_contains(set, c) will return TRUE. + * A frozen set will not be modified. + * @param set the object to which to add the character + * @param c the character to add + * @stable ICU 2.4 + */ +U_STABLE void U_EXPORT2 +uset_add(USet* set, UChar32 c); + +/** + * Adds all of the elements in the specified set to this set if + * they're not already present. This operation effectively + * modifies this set so that its value is the <i>union</i> of the two + * sets. The behavior of this operation is unspecified if the specified + * collection is modified while the operation is in progress. + * A frozen set will not be modified. + * + * @param set the object to which to add the set + * @param additionalSet the source set whose elements are to be added to this set. + * @stable ICU 2.6 + */ +U_STABLE void U_EXPORT2 +uset_addAll(USet* set, const USet *additionalSet); + +/** + * Adds the given range of characters to the given USet. After this call, + * uset_contains(set, start, end) will return TRUE. + * A frozen set will not be modified. + * @param set the object to which to add the character + * @param start the first character of the range to add, inclusive + * @param end the last character of the range to add, inclusive + * @stable ICU 2.2 + */ +U_STABLE void U_EXPORT2 +uset_addRange(USet* set, UChar32 start, UChar32 end); + +/** + * Adds the given string to the given USet. After this call, + * uset_containsString(set, str, strLen) will return TRUE. + * A frozen set will not be modified. + * @param set the object to which to add the character + * @param str the string to add + * @param strLen the length of the string or -1 if null terminated. + * @stable ICU 2.4 + */ +U_STABLE void U_EXPORT2 +uset_addString(USet* set, const UChar* str, int32_t strLen); + +/** + * Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"} + * If this set already any particular character, it has no effect on that character. + * A frozen set will not be modified. + * @param set the object to which to add the character + * @param str the source string + * @param strLen the length of the string or -1 if null terminated. + * @stable ICU 3.4 + */ +U_STABLE void U_EXPORT2 +uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen); + +/** + * Removes the given character from the given USet. After this call, + * uset_contains(set, c) will return FALSE. + * A frozen set will not be modified. + * @param set the object from which to remove the character + * @param c the character to remove + * @stable ICU 2.4 + */ +U_STABLE void U_EXPORT2 +uset_remove(USet* set, UChar32 c); + +/** + * Removes the given range of characters from the given USet. After this call, + * uset_contains(set, start, end) will return FALSE. + * A frozen set will not be modified. + * @param set the object to which to add the character + * @param start the first character of the range to remove, inclusive + * @param end the last character of the range to remove, inclusive + * @stable ICU 2.2 + */ +U_STABLE void U_EXPORT2 +uset_removeRange(USet* set, UChar32 start, UChar32 end); + +/** + * Removes the given string to the given USet. After this call, + * uset_containsString(set, str, strLen) will return FALSE. + * A frozen set will not be modified. + * @param set the object to which to add the character + * @param str the string to remove + * @param strLen the length of the string or -1 if null terminated. + * @stable ICU 2.4 + */ +U_STABLE void U_EXPORT2 +uset_removeString(USet* set, const UChar* str, int32_t strLen); + +/** + * Removes from this set all of its elements that are contained in the + * specified set. This operation effectively modifies this + * set so that its value is the <i>asymmetric set difference</i> of + * the two sets. + * A frozen set will not be modified. + * @param set the object from which the elements are to be removed + * @param removeSet the object that defines which elements will be + * removed from this set + * @stable ICU 3.2 + */ +U_STABLE void U_EXPORT2 +uset_removeAll(USet* set, const USet* removeSet); + +/** + * Retain only the elements in this set that are contained in the + * specified range. If <code>start > end</code> then an empty range is + * retained, leaving the set empty. This is equivalent to + * a boolean logic AND, or a set INTERSECTION. + * A frozen set will not be modified. + * + * @param set the object for which to retain only the specified range + * @param start first character, inclusive, of range to be retained + * to this set. + * @param end last character, inclusive, of range to be retained + * to this set. + * @stable ICU 3.2 + */ +U_STABLE void U_EXPORT2 +uset_retain(USet* set, UChar32 start, UChar32 end); + +/** + * Retains only the elements in this set that are contained in the + * specified set. In other words, removes from this set all of + * its elements that are not contained in the specified set. This + * operation effectively modifies this set so that its value is + * the <i>intersection</i> of the two sets. + * A frozen set will not be modified. + * + * @param set the object on which to perform the retain + * @param retain set that defines which elements this set will retain + * @stable ICU 3.2 + */ +U_STABLE void U_EXPORT2 +uset_retainAll(USet* set, const USet* retain); + +/** + * Reallocate this objects internal structures to take up the least + * possible space, without changing this object's value. + * A frozen set will not be modified. + * + * @param set the object on which to perfrom the compact + * @stable ICU 3.2 + */ +U_STABLE void U_EXPORT2 +uset_compact(USet* set); + +/** + * Inverts this set. This operation modifies this set so that + * its value is its complement. This operation does not affect + * the multicharacter strings, if any. + * A frozen set will not be modified. + * @param set the set + * @stable ICU 2.4 + */ +U_STABLE void U_EXPORT2 +uset_complement(USet* set); + +/** + * Complements in this set all elements contained in the specified + * set. Any character in the other set will be removed if it is + * in this set, or will be added if it is not in this set. + * A frozen set will not be modified. + * + * @param set the set with which to complement + * @param complement set that defines which elements will be xor'ed + * from this set. + * @stable ICU 3.2 + */ +U_STABLE void U_EXPORT2 +uset_complementAll(USet* set, const USet* complement); + +/** + * Removes all of the elements from this set. This set will be + * empty after this call returns. + * A frozen set will not be modified. + * @param set the set + * @stable ICU 2.4 + */ +U_STABLE void U_EXPORT2 +uset_clear(USet* set); + +/** + * Close this set over the given attribute. For the attribute + * USET_CASE, the result is to modify this set so that: + * + * 1. For each character or string 'a' in this set, all strings or + * characters 'b' such that foldCase(a) == foldCase(b) are added + * to this set. + * + * 2. For each string 'e' in the resulting set, if e != + * foldCase(e), 'e' will be removed. + * + * Example: [aq\\u00DF{Bc}{bC}{Fi}] => [aAqQ\\u00DF\\uFB01{ss}{bc}{fi}] + * + * (Here foldCase(x) refers to the operation u_strFoldCase, and a + * == b denotes that the contents are the same, not pointer + * comparison.) + * + * A frozen set will not be modified. + * + * @param set the set + * + * @param attributes bitmask for attributes to close over. + * Currently only the USET_CASE bit is supported. Any undefined bits + * are ignored. + * @stable ICU 4.2 + */ +U_STABLE void U_EXPORT2 +uset_closeOver(USet* set, int32_t attributes); + +/** + * Remove all strings from this set. + * + * @param set the set + * @stable ICU 4.2 + */ +U_STABLE void U_EXPORT2 +uset_removeAllStrings(USet* set); + +/** + * Returns TRUE if the given USet contains no characters and no + * strings. + * @param set the set + * @return true if set is empty + * @stable ICU 2.4 + */ +U_STABLE UBool U_EXPORT2 +uset_isEmpty(const USet* set); + +/** + * Returns TRUE if the given USet contains the given character. + * This function works faster with a frozen set. + * @param set the set + * @param c The codepoint to check for within the set + * @return true if set contains c + * @stable ICU 2.4 + */ +U_STABLE UBool U_EXPORT2 +uset_contains(const USet* set, UChar32 c); + +/** + * Returns TRUE if the given USet contains all characters c + * where start <= c && c <= end. + * @param set the set + * @param start the first character of the range to test, inclusive + * @param end the last character of the range to test, inclusive + * @return TRUE if set contains the range + * @stable ICU 2.2 + */ +U_STABLE UBool U_EXPORT2 +uset_containsRange(const USet* set, UChar32 start, UChar32 end); + +/** + * Returns TRUE if the given USet contains the given string. + * @param set the set + * @param str the string + * @param strLen the length of the string or -1 if null terminated. + * @return true if set contains str + * @stable ICU 2.4 + */ +U_STABLE UBool U_EXPORT2 +uset_containsString(const USet* set, const UChar* str, int32_t strLen); + +/** + * Returns the index of the given character within this set, where + * the set is ordered by ascending code point. If the character + * is not in this set, return -1. The inverse of this method is + * <code>charAt()</code>. + * @param set the set + * @param c the character to obtain the index for + * @return an index from 0..size()-1, or -1 + * @stable ICU 3.2 + */ +U_STABLE int32_t U_EXPORT2 +uset_indexOf(const USet* set, UChar32 c); + +/** + * Returns the character at the given index within this set, where + * the set is ordered by ascending code point. If the index is + * out of range, return (UChar32)-1. The inverse of this method is + * <code>indexOf()</code>. + * @param set the set + * @param charIndex an index from 0..size()-1 to obtain the char for + * @return the character at the given index, or (UChar32)-1. + * @stable ICU 3.2 + */ +U_STABLE UChar32 U_EXPORT2 +uset_charAt(const USet* set, int32_t charIndex); + +/** + * Returns the number of characters and strings contained in the given + * USet. + * @param set the set + * @return a non-negative integer counting the characters and strings + * contained in set + * @stable ICU 2.4 + */ +U_STABLE int32_t U_EXPORT2 +uset_size(const USet* set); + +/** + * Returns the number of items in this set. An item is either a range + * of characters or a single multicharacter string. + * @param set the set + * @return a non-negative integer counting the character ranges + * and/or strings contained in set + * @stable ICU 2.4 + */ +U_STABLE int32_t U_EXPORT2 +uset_getItemCount(const USet* set); + +/** + * Returns an item of this set. An item is either a range of + * characters or a single multicharacter string. + * @param set the set + * @param itemIndex a non-negative integer in the range 0.. + * uset_getItemCount(set)-1 + * @param start pointer to variable to receive first character + * in range, inclusive + * @param end pointer to variable to receive last character in range, + * inclusive + * @param str buffer to receive the string, may be NULL + * @param strCapacity capacity of str, or 0 if str is NULL + * @param ec error code + * @return the length of the string (>= 2), or 0 if the item is a + * range, in which case it is the range *start..*end, or -1 if + * itemIndex is out of range + * @stable ICU 2.4 + */ +U_STABLE int32_t U_EXPORT2 +uset_getItem(const USet* set, int32_t itemIndex, + UChar32* start, UChar32* end, + UChar* str, int32_t strCapacity, + UErrorCode* ec); + +/** + * Returns true if set1 contains all the characters and strings + * of set2. It answers the question, 'Is set1 a superset of set2?' + * @param set1 set to be checked for containment + * @param set2 set to be checked for containment + * @return true if the test condition is met + * @stable ICU 3.2 + */ +U_STABLE UBool U_EXPORT2 +uset_containsAll(const USet* set1, const USet* set2); + +/** + * Returns true if this set contains all the characters + * of the given string. This is does not check containment of grapheme + * clusters, like uset_containsString. + * @param set set of characters to be checked for containment + * @param str string containing codepoints to be checked for containment + * @param strLen the length of the string or -1 if null terminated. + * @return true if the test condition is met + * @stable ICU 3.4 + */ +U_STABLE UBool U_EXPORT2 +uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen); + +/** + * Returns true if set1 contains none of the characters and strings + * of set2. It answers the question, 'Is set1 a disjoint set of set2?' + * @param set1 set to be checked for containment + * @param set2 set to be checked for containment + * @return true if the test condition is met + * @stable ICU 3.2 + */ +U_STABLE UBool U_EXPORT2 +uset_containsNone(const USet* set1, const USet* set2); + +/** + * Returns true if set1 contains some of the characters and strings + * of set2. It answers the question, 'Does set1 and set2 have an intersection?' + * @param set1 set to be checked for containment + * @param set2 set to be checked for containment + * @return true if the test condition is met + * @stable ICU 3.2 + */ +U_STABLE UBool U_EXPORT2 +uset_containsSome(const USet* set1, const USet* set2); + +/** + * Returns the length of the initial substring of the input string which + * consists only of characters and strings that are contained in this set + * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE), + * or only of characters and strings that are not contained + * in this set (USET_SPAN_NOT_CONTAINED). + * See USetSpanCondition for details. + * Similar to the strspn() C library function. + * Unpaired surrogates are treated according to contains() of their surrogate code points. + * This function works faster with a frozen set and with a non-negative string length argument. + * @param set the set + * @param s start of the string + * @param length of the string; can be -1 for NUL-terminated + * @param spanCondition specifies the containment condition + * @return the length of the initial substring according to the spanCondition; + * 0 if the start of the string does not fit the spanCondition + * @stable ICU 3.8 + * @see USetSpanCondition + */ +U_STABLE int32_t U_EXPORT2 +uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition); + +/** + * Returns the start of the trailing substring of the input string which + * consists only of characters and strings that are contained in this set + * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE), + * or only of characters and strings that are not contained + * in this set (USET_SPAN_NOT_CONTAINED). + * See USetSpanCondition for details. + * Unpaired surrogates are treated according to contains() of their surrogate code points. + * This function works faster with a frozen set and with a non-negative string length argument. + * @param set the set + * @param s start of the string + * @param length of the string; can be -1 for NUL-terminated + * @param spanCondition specifies the containment condition + * @return the start of the trailing substring according to the spanCondition; + * the string length if the end of the string does not fit the spanCondition + * @stable ICU 3.8 + * @see USetSpanCondition + */ +U_STABLE int32_t U_EXPORT2 +uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition); + +/** + * Returns the length of the initial substring of the input string which + * consists only of characters and strings that are contained in this set + * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE), + * or only of characters and strings that are not contained + * in this set (USET_SPAN_NOT_CONTAINED). + * See USetSpanCondition for details. + * Similar to the strspn() C library function. + * Malformed byte sequences are treated according to contains(0xfffd). + * This function works faster with a frozen set and with a non-negative string length argument. + * @param set the set + * @param s start of the string (UTF-8) + * @param length of the string; can be -1 for NUL-terminated + * @param spanCondition specifies the containment condition + * @return the length of the initial substring according to the spanCondition; + * 0 if the start of the string does not fit the spanCondition + * @stable ICU 3.8 + * @see USetSpanCondition + */ +U_STABLE int32_t U_EXPORT2 +uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition); + +/** + * Returns the start of the trailing substring of the input string which + * consists only of characters and strings that are contained in this set + * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE), + * or only of characters and strings that are not contained + * in this set (USET_SPAN_NOT_CONTAINED). + * See USetSpanCondition for details. + * Malformed byte sequences are treated according to contains(0xfffd). + * This function works faster with a frozen set and with a non-negative string length argument. + * @param set the set + * @param s start of the string (UTF-8) + * @param length of the string; can be -1 for NUL-terminated + * @param spanCondition specifies the containment condition + * @return the start of the trailing substring according to the spanCondition; + * the string length if the end of the string does not fit the spanCondition + * @stable ICU 3.8 + * @see USetSpanCondition + */ +U_STABLE int32_t U_EXPORT2 +uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition); + +/** + * Returns true if set1 contains all of the characters and strings + * of set2, and vis versa. It answers the question, 'Is set1 equal to set2?' + * @param set1 set to be checked for containment + * @param set2 set to be checked for containment + * @return true if the test condition is met + * @stable ICU 3.2 + */ +U_STABLE UBool U_EXPORT2 +uset_equals(const USet* set1, const USet* set2); + +/********************************************************************* + * Serialized set API + *********************************************************************/ + +/** + * Serializes this set into an array of 16-bit integers. Serialization + * (currently) only records the characters in the set; multicharacter + * strings are ignored. + * + * The array + * has following format (each line is one 16-bit integer): + * + * length = (n+2*m) | (m!=0?0x8000:0) + * bmpLength = n; present if m!=0 + * bmp[0] + * bmp[1] + * ... + * bmp[n-1] + * supp-high[0] + * supp-low[0] + * supp-high[1] + * supp-low[1] + * ... + * supp-high[m-1] + * supp-low[m-1] + * + * The array starts with a header. After the header are n bmp + * code points, then m supplementary code points. Either n or m + * or both may be zero. n+2*m is always <= 0x7FFF. + * + * If there are no supplementary characters (if m==0) then the + * header is one 16-bit integer, 'length', with value n. + * + * If there are supplementary characters (if m!=0) then the header + * is two 16-bit integers. The first, 'length', has value + * (n+2*m)|0x8000. The second, 'bmpLength', has value n. + * + * After the header the code points are stored in ascending order. + * Supplementary code points are stored as most significant 16 + * bits followed by least significant 16 bits. + * + * @param set the set + * @param dest pointer to buffer of destCapacity 16-bit integers. + * May be NULL only if destCapacity is zero. + * @param destCapacity size of dest, or zero. Must not be negative. + * @param pErrorCode pointer to the error code. Will be set to + * U_INDEX_OUTOFBOUNDS_ERROR if n+2*m > 0x7FFF. Will be set to + * U_BUFFER_OVERFLOW_ERROR if n+2*m+(m!=0?2:1) > destCapacity. + * @return the total length of the serialized format, including + * the header, that is, n+2*m+(m!=0?2:1), or 0 on error other + * than U_BUFFER_OVERFLOW_ERROR. + * @stable ICU 2.4 + */ +U_STABLE int32_t U_EXPORT2 +uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* pErrorCode); + +/** + * Given a serialized array, fill in the given serialized set object. + * @param fillSet pointer to result + * @param src pointer to start of array + * @param srcLength length of array + * @return true if the given array is valid, otherwise false + * @stable ICU 2.4 + */ +U_STABLE UBool U_EXPORT2 +uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength); + +/** + * Set the USerializedSet to contain the given character (and nothing + * else). + * @param fillSet pointer to result + * @param c The codepoint to set + * @stable ICU 2.4 + */ +U_STABLE void U_EXPORT2 +uset_setSerializedToOne(USerializedSet* fillSet, UChar32 c); + +/** + * Returns TRUE if the given USerializedSet contains the given + * character. + * @param set the serialized set + * @param c The codepoint to check for within the set + * @return true if set contains c + * @stable ICU 2.4 + */ +U_STABLE UBool U_EXPORT2 +uset_serializedContains(const USerializedSet* set, UChar32 c); + +/** + * Returns the number of disjoint ranges of characters contained in + * the given serialized set. Ignores any strings contained in the + * set. + * @param set the serialized set + * @return a non-negative integer counting the character ranges + * contained in set + * @stable ICU 2.4 + */ +U_STABLE int32_t U_EXPORT2 +uset_getSerializedRangeCount(const USerializedSet* set); + +/** + * Returns a range of characters contained in the given serialized + * set. + * @param set the serialized set + * @param rangeIndex a non-negative integer in the range 0.. + * uset_getSerializedRangeCount(set)-1 + * @param pStart pointer to variable to receive first character + * in range, inclusive + * @param pEnd pointer to variable to receive last character in range, + * inclusive + * @return true if rangeIndex is valid, otherwise false + * @stable ICU 2.4 + */ +U_STABLE UBool U_EXPORT2 +uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex, + UChar32* pStart, UChar32* pEnd); + +#endif diff --git a/Source/WTF/icu/unicode/ustring.h b/Source/WTF/icu/unicode/ustring.h new file mode 100644 index 000000000..d2ea31c67 --- /dev/null +++ b/Source/WTF/icu/unicode/ustring.h @@ -0,0 +1,1703 @@ +/* +********************************************************************** +* Copyright (C) 1998-2012, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* +* File ustring.h +* +* Modification History: +* +* Date Name Description +* 12/07/98 bertrand Creation. +****************************************************************************** +*/ + +#ifndef USTRING_H +#define USTRING_H + +#include "unicode/utypes.h" +#include "unicode/putil.h" +#include "unicode/uiter.h" + +/** + * \def UBRK_TYPEDEF_UBREAK_ITERATOR + * @internal + */ + +#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR +# define UBRK_TYPEDEF_UBREAK_ITERATOR +/** Simple declaration for u_strToTitle() to avoid including unicode/ubrk.h. @stable ICU 2.1*/ + typedef struct UBreakIterator UBreakIterator; +#endif + +/** + * \file + * \brief C API: Unicode string handling functions + * + * These C API functions provide general Unicode string handling. + * + * Some functions are equivalent in name, signature, and behavior to the ANSI C <string.h> + * functions. (For example, they do not check for bad arguments like NULL string pointers.) + * In some cases, only the thread-safe variant of such a function is implemented here + * (see u_strtok_r()). + * + * Other functions provide more Unicode-specific functionality like locale-specific + * upper/lower-casing and string comparison in code point order. + * + * ICU uses 16-bit Unicode (UTF-16) in the form of arrays of UChar code units. + * UTF-16 encodes each Unicode code point with either one or two UChar code units. + * (This is the default form of Unicode, and a forward-compatible extension of the original, + * fixed-width form that was known as UCS-2. UTF-16 superseded UCS-2 with Unicode 2.0 + * in 1996.) + * + * Some APIs accept a 32-bit UChar32 value for a single code point. + * + * ICU also handles 16-bit Unicode text with unpaired surrogates. + * Such text is not well-formed UTF-16. + * Code-point-related functions treat unpaired surrogates as surrogate code points, + * i.e., as separate units. + * + * Although UTF-16 is a variable-width encoding form (like some legacy multi-byte encodings), + * it is much more efficient even for random access because the code unit values + * for single-unit characters vs. lead units vs. trail units are completely disjoint. + * This means that it is easy to determine character (code point) boundaries from + * random offsets in the string. + * + * Unicode (UTF-16) string processing is optimized for the single-unit case. + * Although it is important to support supplementary characters + * (which use pairs of lead/trail code units called "surrogates"), + * their occurrence is rare. Almost all characters in modern use require only + * a single UChar code unit (i.e., their code point values are <=0xffff). + * + * For more details see the User Guide Strings chapter (http://icu-project.org/userguide/strings.html). + * For a discussion of the handling of unpaired surrogates see also + * Jitterbug 2145 and its icu mailing list proposal on 2002-sep-18. + */ + +/** + * \defgroup ustring_ustrlen String Length + * \ingroup ustring_strlen + */ +/*@{*/ +/** + * Determine the length of an array of UChar. + * + * @param s The array of UChars, NULL (U+0000) terminated. + * @return The number of UChars in <code>chars</code>, minus the terminator. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_strlen(const UChar *s); +/*@}*/ + +/** + * Count Unicode code points in the length UChar code units of the string. + * A code point may occupy either one or two UChar code units. + * Counting code points involves reading all code units. + * + * This functions is basically the inverse of the U16_FWD_N() macro (see utf.h). + * + * @param s The input string. + * @param length The number of UChar code units to be checked, or -1 to count all + * code points before the first NUL (U+0000). + * @return The number of code points in the specified code units. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_countChar32(const UChar *s, int32_t length); + +/** + * Check if the string contains more Unicode code points than a certain number. + * This is more efficient than counting all code points in the entire string + * and comparing that number with a threshold. + * This function may not need to scan the string at all if the length is known + * (not -1 for NUL-termination) and falls within a certain range, and + * never needs to count more than 'number+1' code points. + * Logically equivalent to (u_countChar32(s, length)>number). + * A Unicode code point may occupy either one or two UChar code units. + * + * @param s The input string. + * @param length The length of the string, or -1 if it is NUL-terminated. + * @param number The number of code points in the string is compared against + * the 'number' parameter. + * @return Boolean value for whether the string contains more Unicode code points + * than 'number'. Same as (u_countChar32(s, length)>number). + * @stable ICU 2.4 + */ +U_STABLE UBool U_EXPORT2 +u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number); + +/** + * Concatenate two ustrings. Appends a copy of <code>src</code>, + * including the null terminator, to <code>dst</code>. The initial copied + * character from <code>src</code> overwrites the null terminator in <code>dst</code>. + * + * @param dst The destination string. + * @param src The source string. + * @return A pointer to <code>dst</code>. + * @stable ICU 2.0 + */ +U_STABLE UChar* U_EXPORT2 +u_strcat(UChar *dst, + const UChar *src); + +/** + * Concatenate two ustrings. + * Appends at most <code>n</code> characters from <code>src</code> to <code>dst</code>. + * Adds a terminating NUL. + * If src is too long, then only <code>n-1</code> characters will be copied + * before the terminating NUL. + * If <code>n<=0</code> then dst is not modified. + * + * @param dst The destination string. + * @param src The source string (can be NULL/invalid if n<=0). + * @param n The maximum number of characters to append; no-op if <=0. + * @return A pointer to <code>dst</code>. + * @stable ICU 2.0 + */ +U_STABLE UChar* U_EXPORT2 +u_strncat(UChar *dst, + const UChar *src, + int32_t n); + +/** + * Find the first occurrence of a substring in a string. + * The substring is found at code point boundaries. + * That means that if the substring begins with + * a trail surrogate or ends with a lead surrogate, + * then it is found only if these surrogates stand alone in the text. + * Otherwise, the substring edge units would be matched against + * halves of surrogate pairs. + * + * @param s The string to search (NUL-terminated). + * @param substring The substring to find (NUL-terminated). + * @return A pointer to the first occurrence of <code>substring</code> in <code>s</code>, + * or <code>s</code> itself if the <code>substring</code> is empty, + * or <code>NULL</code> if <code>substring</code> is not in <code>s</code>. + * @stable ICU 2.0 + * + * @see u_strrstr + * @see u_strFindFirst + * @see u_strFindLast + */ +U_STABLE UChar * U_EXPORT2 +u_strstr(const UChar *s, const UChar *substring); + +/** + * Find the first occurrence of a substring in a string. + * The substring is found at code point boundaries. + * That means that if the substring begins with + * a trail surrogate or ends with a lead surrogate, + * then it is found only if these surrogates stand alone in the text. + * Otherwise, the substring edge units would be matched against + * halves of surrogate pairs. + * + * @param s The string to search. + * @param length The length of s (number of UChars), or -1 if it is NUL-terminated. + * @param substring The substring to find (NUL-terminated). + * @param subLength The length of substring (number of UChars), or -1 if it is NUL-terminated. + * @return A pointer to the first occurrence of <code>substring</code> in <code>s</code>, + * or <code>s</code> itself if the <code>substring</code> is empty, + * or <code>NULL</code> if <code>substring</code> is not in <code>s</code>. + * @stable ICU 2.4 + * + * @see u_strstr + * @see u_strFindLast + */ +U_STABLE UChar * U_EXPORT2 +u_strFindFirst(const UChar *s, int32_t length, const UChar *substring, int32_t subLength); + +/** + * Find the first occurrence of a BMP code point in a string. + * A surrogate code point is found only if its match in the text is not + * part of a surrogate pair. + * A NUL character is found at the string terminator. + * + * @param s The string to search (NUL-terminated). + * @param c The BMP code point to find. + * @return A pointer to the first occurrence of <code>c</code> in <code>s</code> + * or <code>NULL</code> if <code>c</code> is not in <code>s</code>. + * @stable ICU 2.0 + * + * @see u_strchr32 + * @see u_memchr + * @see u_strstr + * @see u_strFindFirst + */ +U_STABLE UChar * U_EXPORT2 +u_strchr(const UChar *s, UChar c); + +/** + * Find the first occurrence of a code point in a string. + * A surrogate code point is found only if its match in the text is not + * part of a surrogate pair. + * A NUL character is found at the string terminator. + * + * @param s The string to search (NUL-terminated). + * @param c The code point to find. + * @return A pointer to the first occurrence of <code>c</code> in <code>s</code> + * or <code>NULL</code> if <code>c</code> is not in <code>s</code>. + * @stable ICU 2.0 + * + * @see u_strchr + * @see u_memchr32 + * @see u_strstr + * @see u_strFindFirst + */ +U_STABLE UChar * U_EXPORT2 +u_strchr32(const UChar *s, UChar32 c); + +/** + * Find the last occurrence of a substring in a string. + * The substring is found at code point boundaries. + * That means that if the substring begins with + * a trail surrogate or ends with a lead surrogate, + * then it is found only if these surrogates stand alone in the text. + * Otherwise, the substring edge units would be matched against + * halves of surrogate pairs. + * + * @param s The string to search (NUL-terminated). + * @param substring The substring to find (NUL-terminated). + * @return A pointer to the last occurrence of <code>substring</code> in <code>s</code>, + * or <code>s</code> itself if the <code>substring</code> is empty, + * or <code>NULL</code> if <code>substring</code> is not in <code>s</code>. + * @stable ICU 2.4 + * + * @see u_strstr + * @see u_strFindFirst + * @see u_strFindLast + */ +U_STABLE UChar * U_EXPORT2 +u_strrstr(const UChar *s, const UChar *substring); + +/** + * Find the last occurrence of a substring in a string. + * The substring is found at code point boundaries. + * That means that if the substring begins with + * a trail surrogate or ends with a lead surrogate, + * then it is found only if these surrogates stand alone in the text. + * Otherwise, the substring edge units would be matched against + * halves of surrogate pairs. + * + * @param s The string to search. + * @param length The length of s (number of UChars), or -1 if it is NUL-terminated. + * @param substring The substring to find (NUL-terminated). + * @param subLength The length of substring (number of UChars), or -1 if it is NUL-terminated. + * @return A pointer to the last occurrence of <code>substring</code> in <code>s</code>, + * or <code>s</code> itself if the <code>substring</code> is empty, + * or <code>NULL</code> if <code>substring</code> is not in <code>s</code>. + * @stable ICU 2.4 + * + * @see u_strstr + * @see u_strFindLast + */ +U_STABLE UChar * U_EXPORT2 +u_strFindLast(const UChar *s, int32_t length, const UChar *substring, int32_t subLength); + +/** + * Find the last occurrence of a BMP code point in a string. + * A surrogate code point is found only if its match in the text is not + * part of a surrogate pair. + * A NUL character is found at the string terminator. + * + * @param s The string to search (NUL-terminated). + * @param c The BMP code point to find. + * @return A pointer to the last occurrence of <code>c</code> in <code>s</code> + * or <code>NULL</code> if <code>c</code> is not in <code>s</code>. + * @stable ICU 2.4 + * + * @see u_strrchr32 + * @see u_memrchr + * @see u_strrstr + * @see u_strFindLast + */ +U_STABLE UChar * U_EXPORT2 +u_strrchr(const UChar *s, UChar c); + +/** + * Find the last occurrence of a code point in a string. + * A surrogate code point is found only if its match in the text is not + * part of a surrogate pair. + * A NUL character is found at the string terminator. + * + * @param s The string to search (NUL-terminated). + * @param c The code point to find. + * @return A pointer to the last occurrence of <code>c</code> in <code>s</code> + * or <code>NULL</code> if <code>c</code> is not in <code>s</code>. + * @stable ICU 2.4 + * + * @see u_strrchr + * @see u_memchr32 + * @see u_strrstr + * @see u_strFindLast + */ +U_STABLE UChar * U_EXPORT2 +u_strrchr32(const UChar *s, UChar32 c); + +/** + * Locates the first occurrence in the string <code>string</code> of any of the characters + * in the string <code>matchSet</code>. + * Works just like C's strpbrk but with Unicode. + * + * @param string The string in which to search, NUL-terminated. + * @param matchSet A NUL-terminated string defining a set of code points + * for which to search in the text string. + * @return A pointer to the character in <code>string</code> that matches one of the + * characters in <code>matchSet</code>, or NULL if no such character is found. + * @stable ICU 2.0 + */ +U_STABLE UChar * U_EXPORT2 +u_strpbrk(const UChar *string, const UChar *matchSet); + +/** + * Returns the number of consecutive characters in <code>string</code>, + * beginning with the first, that do not occur somewhere in <code>matchSet</code>. + * Works just like C's strcspn but with Unicode. + * + * @param string The string in which to search, NUL-terminated. + * @param matchSet A NUL-terminated string defining a set of code points + * for which to search in the text string. + * @return The number of initial characters in <code>string</code> that do not + * occur in <code>matchSet</code>. + * @see u_strspn + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_strcspn(const UChar *string, const UChar *matchSet); + +/** + * Returns the number of consecutive characters in <code>string</code>, + * beginning with the first, that occur somewhere in <code>matchSet</code>. + * Works just like C's strspn but with Unicode. + * + * @param string The string in which to search, NUL-terminated. + * @param matchSet A NUL-terminated string defining a set of code points + * for which to search in the text string. + * @return The number of initial characters in <code>string</code> that do + * occur in <code>matchSet</code>. + * @see u_strcspn + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_strspn(const UChar *string, const UChar *matchSet); + +/** + * The string tokenizer API allows an application to break a string into + * tokens. Unlike strtok(), the saveState (the current pointer within the + * original string) is maintained in saveState. In the first call, the + * argument src is a pointer to the string. In subsequent calls to + * return successive tokens of that string, src must be specified as + * NULL. The value saveState is set by this function to maintain the + * function's position within the string, and on each subsequent call + * you must give this argument the same variable. This function does + * handle surrogate pairs. This function is similar to the strtok_r() + * the POSIX Threads Extension (1003.1c-1995) version. + * + * @param src String containing token(s). This string will be modified. + * After the first call to u_strtok_r(), this argument must + * be NULL to get to the next token. + * @param delim Set of delimiter characters (Unicode code points). + * @param saveState The current pointer within the original string, + * which is set by this function. The saveState + * parameter should the address of a local variable of type + * UChar *. (i.e. defined "Uhar *myLocalSaveState" and use + * &myLocalSaveState for this parameter). + * @return A pointer to the next token found in src, or NULL + * when there are no more tokens. + * @stable ICU 2.0 + */ +U_STABLE UChar * U_EXPORT2 +u_strtok_r(UChar *src, + const UChar *delim, + UChar **saveState); + +/** + * Compare two Unicode strings for bitwise equality (code unit order). + * + * @param s1 A string to compare. + * @param s2 A string to compare. + * @return 0 if <code>s1</code> and <code>s2</code> are bitwise equal; a negative + * value if <code>s1</code> is bitwise less than <code>s2,</code>; a positive + * value if <code>s1</code> is bitwise greater than <code>s2</code>. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_strcmp(const UChar *s1, + const UChar *s2); + +/** + * Compare two Unicode strings in code point order. + * See u_strCompare for details. + * + * @param s1 A string to compare. + * @param s2 A string to compare. + * @return a negative/zero/positive integer corresponding to whether + * the first string is less than/equal to/greater than the second one + * in code point order + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_strcmpCodePointOrder(const UChar *s1, const UChar *s2); + +/** + * Compare two Unicode strings (binary order). + * + * The comparison can be done in code unit order or in code point order. + * They differ only in UTF-16 when + * comparing supplementary code points (U+10000..U+10ffff) + * to BMP code points near the end of the BMP (i.e., U+e000..U+ffff). + * In code unit order, high BMP code points sort after supplementary code points + * because they are stored as pairs of surrogates which are at U+d800..U+dfff. + * + * This functions works with strings of different explicitly specified lengths + * unlike the ANSI C-like u_strcmp() and u_memcmp() etc. + * NUL-terminated strings are possible with length arguments of -1. + * + * @param s1 First source string. + * @param length1 Length of first source string, or -1 if NUL-terminated. + * + * @param s2 Second source string. + * @param length2 Length of second source string, or -1 if NUL-terminated. + * + * @param codePointOrder Choose between code unit order (FALSE) + * and code point order (TRUE). + * + * @return <0 or 0 or >0 as usual for string comparisons + * + * @stable ICU 2.2 + */ +U_STABLE int32_t U_EXPORT2 +u_strCompare(const UChar *s1, int32_t length1, + const UChar *s2, int32_t length2, + UBool codePointOrder); + +/** + * Compare two Unicode strings (binary order) + * as presented by UCharIterator objects. + * Works otherwise just like u_strCompare(). + * + * Both iterators are reset to their start positions. + * When the function returns, it is undefined where the iterators + * have stopped. + * + * @param iter1 First source string iterator. + * @param iter2 Second source string iterator. + * @param codePointOrder Choose between code unit order (FALSE) + * and code point order (TRUE). + * + * @return <0 or 0 or >0 as usual for string comparisons + * + * @see u_strCompare + * + * @stable ICU 2.6 + */ +U_STABLE int32_t U_EXPORT2 +u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder); + +#ifndef U_COMPARE_CODE_POINT_ORDER +/* see also unistr.h and unorm.h */ +/** + * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc: + * Compare strings in code point order instead of code unit order. + * @stable ICU 2.2 + */ +#define U_COMPARE_CODE_POINT_ORDER 0x8000 +#endif + +/** + * Compare two strings case-insensitively using full case folding. + * This is equivalent to + * u_strCompare(u_strFoldCase(s1, options), + * u_strFoldCase(s2, options), + * (options&U_COMPARE_CODE_POINT_ORDER)!=0). + * + * The comparison can be done in UTF-16 code unit order or in code point order. + * They differ only when comparing supplementary code points (U+10000..U+10ffff) + * to BMP code points near the end of the BMP (i.e., U+e000..U+ffff). + * In code unit order, high BMP code points sort after supplementary code points + * because they are stored as pairs of surrogates which are at U+d800..U+dfff. + * + * This functions works with strings of different explicitly specified lengths + * unlike the ANSI C-like u_strcmp() and u_memcmp() etc. + * NUL-terminated strings are possible with length arguments of -1. + * + * @param s1 First source string. + * @param length1 Length of first source string, or -1 if NUL-terminated. + * + * @param s2 Second source string. + * @param length2 Length of second source string, or -1 if NUL-terminated. + * + * @param options A bit set of options: + * - U_FOLD_CASE_DEFAULT or 0 is used for default options: + * Comparison in code unit order with default case folding. + * + * - U_COMPARE_CODE_POINT_ORDER + * Set to choose code point order instead of code unit order + * (see u_strCompare for details). + * + * - U_FOLD_CASE_EXCLUDE_SPECIAL_I + * + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * + * @return <0 or 0 or >0 as usual for string comparisons + * + * @stable ICU 2.2 + */ +U_STABLE int32_t U_EXPORT2 +u_strCaseCompare(const UChar *s1, int32_t length1, + const UChar *s2, int32_t length2, + uint32_t options, + UErrorCode *pErrorCode); + +/** + * Compare two ustrings for bitwise equality. + * Compares at most <code>n</code> characters. + * + * @param ucs1 A string to compare (can be NULL/invalid if n<=0). + * @param ucs2 A string to compare (can be NULL/invalid if n<=0). + * @param n The maximum number of characters to compare; always returns 0 if n<=0. + * @return 0 if <code>s1</code> and <code>s2</code> are bitwise equal; a negative + * value if <code>s1</code> is bitwise less than <code>s2</code>; a positive + * value if <code>s1</code> is bitwise greater than <code>s2</code>. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_strncmp(const UChar *ucs1, + const UChar *ucs2, + int32_t n); + +/** + * Compare two Unicode strings in code point order. + * This is different in UTF-16 from u_strncmp() if supplementary characters are present. + * For details, see u_strCompare(). + * + * @param s1 A string to compare. + * @param s2 A string to compare. + * @param n The maximum number of characters to compare. + * @return a negative/zero/positive integer corresponding to whether + * the first string is less than/equal to/greater than the second one + * in code point order + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_strncmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t n); + +/** + * Compare two strings case-insensitively using full case folding. + * This is equivalent to u_strcmp(u_strFoldCase(s1, options), u_strFoldCase(s2, options)). + * + * @param s1 A string to compare. + * @param s2 A string to compare. + * @param options A bit set of options: + * - U_FOLD_CASE_DEFAULT or 0 is used for default options: + * Comparison in code unit order with default case folding. + * + * - U_COMPARE_CODE_POINT_ORDER + * Set to choose code point order instead of code unit order + * (see u_strCompare for details). + * + * - U_FOLD_CASE_EXCLUDE_SPECIAL_I + * + * @return A negative, zero, or positive integer indicating the comparison result. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options); + +/** + * Compare two strings case-insensitively using full case folding. + * This is equivalent to u_strcmp(u_strFoldCase(s1, at most n, options), + * u_strFoldCase(s2, at most n, options)). + * + * @param s1 A string to compare. + * @param s2 A string to compare. + * @param n The maximum number of characters each string to case-fold and then compare. + * @param options A bit set of options: + * - U_FOLD_CASE_DEFAULT or 0 is used for default options: + * Comparison in code unit order with default case folding. + * + * - U_COMPARE_CODE_POINT_ORDER + * Set to choose code point order instead of code unit order + * (see u_strCompare for details). + * + * - U_FOLD_CASE_EXCLUDE_SPECIAL_I + * + * @return A negative, zero, or positive integer indicating the comparison result. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options); + +/** + * Compare two strings case-insensitively using full case folding. + * This is equivalent to u_strcmp(u_strFoldCase(s1, n, options), + * u_strFoldCase(s2, n, options)). + * + * @param s1 A string to compare. + * @param s2 A string to compare. + * @param length The number of characters in each string to case-fold and then compare. + * @param options A bit set of options: + * - U_FOLD_CASE_DEFAULT or 0 is used for default options: + * Comparison in code unit order with default case folding. + * + * - U_COMPARE_CODE_POINT_ORDER + * Set to choose code point order instead of code unit order + * (see u_strCompare for details). + * + * - U_FOLD_CASE_EXCLUDE_SPECIAL_I + * + * @return A negative, zero, or positive integer indicating the comparison result. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options); + +/** + * Copy a ustring. Adds a null terminator. + * + * @param dst The destination string. + * @param src The source string. + * @return A pointer to <code>dst</code>. + * @stable ICU 2.0 + */ +U_STABLE UChar* U_EXPORT2 +u_strcpy(UChar *dst, + const UChar *src); + +/** + * Copy a ustring. + * Copies at most <code>n</code> characters. The result will be null terminated + * if the length of <code>src</code> is less than <code>n</code>. + * + * @param dst The destination string. + * @param src The source string (can be NULL/invalid if n<=0). + * @param n The maximum number of characters to copy; no-op if <=0. + * @return A pointer to <code>dst</code>. + * @stable ICU 2.0 + */ +U_STABLE UChar* U_EXPORT2 +u_strncpy(UChar *dst, + const UChar *src, + int32_t n); + +#if !UCONFIG_NO_CONVERSION + +/** + * Copy a byte string encoded in the default codepage to a ustring. + * Adds a null terminator. + * Performs a host byte to UChar conversion + * + * @param dst The destination string. + * @param src The source string. + * @return A pointer to <code>dst</code>. + * @stable ICU 2.0 + */ +U_STABLE UChar* U_EXPORT2 u_uastrcpy(UChar *dst, + const char *src ); + +/** + * Copy a byte string encoded in the default codepage to a ustring. + * Copies at most <code>n</code> characters. The result will be null terminated + * if the length of <code>src</code> is less than <code>n</code>. + * Performs a host byte to UChar conversion + * + * @param dst The destination string. + * @param src The source string. + * @param n The maximum number of characters to copy. + * @return A pointer to <code>dst</code>. + * @stable ICU 2.0 + */ +U_STABLE UChar* U_EXPORT2 u_uastrncpy(UChar *dst, + const char *src, + int32_t n); + +/** + * Copy ustring to a byte string encoded in the default codepage. + * Adds a null terminator. + * Performs a UChar to host byte conversion + * + * @param dst The destination string. + * @param src The source string. + * @return A pointer to <code>dst</code>. + * @stable ICU 2.0 + */ +U_STABLE char* U_EXPORT2 u_austrcpy(char *dst, + const UChar *src ); + +/** + * Copy ustring to a byte string encoded in the default codepage. + * Copies at most <code>n</code> characters. The result will be null terminated + * if the length of <code>src</code> is less than <code>n</code>. + * Performs a UChar to host byte conversion + * + * @param dst The destination string. + * @param src The source string. + * @param n The maximum number of characters to copy. + * @return A pointer to <code>dst</code>. + * @stable ICU 2.0 + */ +U_STABLE char* U_EXPORT2 u_austrncpy(char *dst, + const UChar *src, + int32_t n ); + +#endif + +/** + * Synonym for memcpy(), but with UChars only. + * @param dest The destination string + * @param src The source string (can be NULL/invalid if count<=0) + * @param count The number of characters to copy; no-op if <=0 + * @return A pointer to <code>dest</code> + * @stable ICU 2.0 + */ +U_STABLE UChar* U_EXPORT2 +u_memcpy(UChar *dest, const UChar *src, int32_t count); + +/** + * Synonym for memmove(), but with UChars only. + * @param dest The destination string + * @param src The source string (can be NULL/invalid if count<=0) + * @param count The number of characters to move; no-op if <=0 + * @return A pointer to <code>dest</code> + * @stable ICU 2.0 + */ +U_STABLE UChar* U_EXPORT2 +u_memmove(UChar *dest, const UChar *src, int32_t count); + +/** + * Initialize <code>count</code> characters of <code>dest</code> to <code>c</code>. + * + * @param dest The destination string. + * @param c The character to initialize the string. + * @param count The maximum number of characters to set. + * @return A pointer to <code>dest</code>. + * @stable ICU 2.0 + */ +U_STABLE UChar* U_EXPORT2 +u_memset(UChar *dest, UChar c, int32_t count); + +/** + * Compare the first <code>count</code> UChars of each buffer. + * + * @param buf1 The first string to compare. + * @param buf2 The second string to compare. + * @param count The maximum number of UChars to compare. + * @return When buf1 < buf2, a negative number is returned. + * When buf1 == buf2, 0 is returned. + * When buf1 > buf2, a positive number is returned. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_memcmp(const UChar *buf1, const UChar *buf2, int32_t count); + +/** + * Compare two Unicode strings in code point order. + * This is different in UTF-16 from u_memcmp() if supplementary characters are present. + * For details, see u_strCompare(). + * + * @param s1 A string to compare. + * @param s2 A string to compare. + * @param count The maximum number of characters to compare. + * @return a negative/zero/positive integer corresponding to whether + * the first string is less than/equal to/greater than the second one + * in code point order + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_memcmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t count); + +/** + * Find the first occurrence of a BMP code point in a string. + * A surrogate code point is found only if its match in the text is not + * part of a surrogate pair. + * A NUL character is found at the string terminator. + * + * @param s The string to search (contains <code>count</code> UChars). + * @param c The BMP code point to find. + * @param count The length of the string. + * @return A pointer to the first occurrence of <code>c</code> in <code>s</code> + * or <code>NULL</code> if <code>c</code> is not in <code>s</code>. + * @stable ICU 2.0 + * + * @see u_strchr + * @see u_memchr32 + * @see u_strFindFirst + */ +U_STABLE UChar* U_EXPORT2 +u_memchr(const UChar *s, UChar c, int32_t count); + +/** + * Find the first occurrence of a code point in a string. + * A surrogate code point is found only if its match in the text is not + * part of a surrogate pair. + * A NUL character is found at the string terminator. + * + * @param s The string to search (contains <code>count</code> UChars). + * @param c The code point to find. + * @param count The length of the string. + * @return A pointer to the first occurrence of <code>c</code> in <code>s</code> + * or <code>NULL</code> if <code>c</code> is not in <code>s</code>. + * @stable ICU 2.0 + * + * @see u_strchr32 + * @see u_memchr + * @see u_strFindFirst + */ +U_STABLE UChar* U_EXPORT2 +u_memchr32(const UChar *s, UChar32 c, int32_t count); + +/** + * Find the last occurrence of a BMP code point in a string. + * A surrogate code point is found only if its match in the text is not + * part of a surrogate pair. + * A NUL character is found at the string terminator. + * + * @param s The string to search (contains <code>count</code> UChars). + * @param c The BMP code point to find. + * @param count The length of the string. + * @return A pointer to the last occurrence of <code>c</code> in <code>s</code> + * or <code>NULL</code> if <code>c</code> is not in <code>s</code>. + * @stable ICU 2.4 + * + * @see u_strrchr + * @see u_memrchr32 + * @see u_strFindLast + */ +U_STABLE UChar* U_EXPORT2 +u_memrchr(const UChar *s, UChar c, int32_t count); + +/** + * Find the last occurrence of a code point in a string. + * A surrogate code point is found only if its match in the text is not + * part of a surrogate pair. + * A NUL character is found at the string terminator. + * + * @param s The string to search (contains <code>count</code> UChars). + * @param c The code point to find. + * @param count The length of the string. + * @return A pointer to the last occurrence of <code>c</code> in <code>s</code> + * or <code>NULL</code> if <code>c</code> is not in <code>s</code>. + * @stable ICU 2.4 + * + * @see u_strrchr32 + * @see u_memrchr + * @see u_strFindLast + */ +U_STABLE UChar* U_EXPORT2 +u_memrchr32(const UChar *s, UChar32 c, int32_t count); + +/** + * Unicode String literals in C. + * We need one macro to declare a variable for the string + * and to statically preinitialize it if possible, + * and a second macro to dynamically intialize such a string variable if necessary. + * + * The macros are defined for maximum performance. + * They work only for strings that contain "invariant characters", i.e., + * only latin letters, digits, and some punctuation. + * See utypes.h for details. + * + * A pair of macros for a single string must be used with the same + * parameters. + * The string parameter must be a C string literal. + * The length of the string, not including the terminating + * <code>NUL</code>, must be specified as a constant. + * The U_STRING_DECL macro should be invoked exactly once for one + * such string variable before it is used. + * + * Usage: + * <pre> + * U_STRING_DECL(ustringVar1, "Quick-Fox 2", 11); + * U_STRING_DECL(ustringVar2, "jumps 5%", 8); + * static UBool didInit=FALSE; + * + * int32_t function() { + * if(!didInit) { + * U_STRING_INIT(ustringVar1, "Quick-Fox 2", 11); + * U_STRING_INIT(ustringVar2, "jumps 5%", 8); + * didInit=TRUE; + * } + * return u_strcmp(ustringVar1, ustringVar2); + * } + * </pre> + * + * Note that the macros will NOT consistently work if their argument is another <code>#define</code>. + * The following will not work on all platforms, don't use it. + * + * <pre> + * #define GLUCK "Mr. Gluck" + * U_STRING_DECL(var, GLUCK, 9) + * U_STRING_INIT(var, GLUCK, 9) + * </pre> + * + * Instead, use the string literal "Mr. Gluck" as the argument to both macro + * calls. + * + * + * @stable ICU 2.0 + */ +#if defined(U_DECLARE_UTF16) +# define U_STRING_DECL(var, cs, length) static const UChar *var=(const UChar *)U_DECLARE_UTF16(cs) + /**@stable ICU 2.0 */ +# define U_STRING_INIT(var, cs, length) +#elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16))) +# define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]=L ## cs + /**@stable ICU 2.0 */ +# define U_STRING_INIT(var, cs, length) +#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY +# define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]=cs + /**@stable ICU 2.0 */ +# define U_STRING_INIT(var, cs, length) +#else +# define U_STRING_DECL(var, cs, length) static UChar var[(length)+1] + /**@stable ICU 2.0 */ +# define U_STRING_INIT(var, cs, length) u_charsToUChars(cs, var, length+1) +#endif + +/** + * Unescape a string of characters and write the resulting + * Unicode characters to the destination buffer. The following escape + * sequences are recognized: + * + * \\uhhhh 4 hex digits; h in [0-9A-Fa-f] + * \\Uhhhhhhhh 8 hex digits + * \\xhh 1-2 hex digits + * \\x{h...} 1-8 hex digits + * \\ooo 1-3 octal digits; o in [0-7] + * \\cX control-X; X is masked with 0x1F + * + * as well as the standard ANSI C escapes: + * + * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A, + * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B, + * \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C + * + * Anything else following a backslash is generically escaped. For + * example, "[a\\-z]" returns "[a-z]". + * + * If an escape sequence is ill-formed, this method returns an empty + * string. An example of an ill-formed sequence is "\\u" followed by + * fewer than 4 hex digits. + * + * The above characters are recognized in the compiler's codepage, + * that is, they are coded as 'u', '\\', etc. Characters that are + * not parts of escape sequences are converted using u_charsToUChars(). + * + * This function is similar to UnicodeString::unescape() but not + * identical to it. The latter takes a source UnicodeString, so it + * does escape recognition but no conversion. + * + * @param src a zero-terminated string of invariant characters + * @param dest pointer to buffer to receive converted and unescaped + * text and, if there is room, a zero terminator. May be NULL for + * preflighting, in which case no UChars will be written, but the + * return value will still be valid. On error, an empty string is + * stored here (if possible). + * @param destCapacity the number of UChars that may be written at + * dest. Ignored if dest == NULL. + * @return the length of unescaped string. + * @see u_unescapeAt + * @see UnicodeString#unescape() + * @see UnicodeString#unescapeAt() + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_unescape(const char *src, + UChar *dest, int32_t destCapacity); + +U_CDECL_BEGIN +/** + * Callback function for u_unescapeAt() that returns a character of + * the source text given an offset and a context pointer. The context + * pointer will be whatever is passed into u_unescapeAt(). + * + * @param offset pointer to the offset that will be passed to u_unescapeAt(). + * @param context an opaque pointer passed directly into u_unescapeAt() + * @return the character represented by the escape sequence at + * offset + * @see u_unescapeAt + * @stable ICU 2.0 + */ +typedef UChar (U_CALLCONV *UNESCAPE_CHAR_AT)(int32_t offset, void *context); +U_CDECL_END + +/** + * Unescape a single sequence. The character at offset-1 is assumed + * (without checking) to be a backslash. This method takes a callback + * pointer to a function that returns the UChar at a given offset. By + * varying this callback, ICU functions are able to unescape char* + * strings, UnicodeString objects, and UFILE pointers. + * + * If offset is out of range, or if the escape sequence is ill-formed, + * (UChar32)0xFFFFFFFF is returned. See documentation of u_unescape() + * for a list of recognized sequences. + * + * @param charAt callback function that returns a UChar of the source + * text given an offset and a context pointer. + * @param offset pointer to the offset that will be passed to charAt. + * The offset value will be updated upon return to point after the + * last parsed character of the escape sequence. On error the offset + * is unchanged. + * @param length the number of characters in the source text. The + * last character of the source text is considered to be at offset + * length-1. + * @param context an opaque pointer passed directly into charAt. + * @return the character represented by the escape sequence at + * offset, or (UChar32)0xFFFFFFFF on error. + * @see u_unescape() + * @see UnicodeString#unescape() + * @see UnicodeString#unescapeAt() + * @stable ICU 2.0 + */ +U_STABLE UChar32 U_EXPORT2 +u_unescapeAt(UNESCAPE_CHAR_AT charAt, + int32_t *offset, + int32_t length, + void *context); + +/** + * Uppercase the characters in a string. + * Casing is locale-dependent and context-sensitive. + * The result may be longer or shorter than the original. + * The source string and the destination buffer are allowed to overlap. + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of UChars). If it is 0, then + * dest may be NULL and the function will only return the length of the result + * without writing any of the result string. + * @param src The original string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param locale The locale to consider, or "" for the root locale or NULL for the default locale. + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * @return The length of the result string. It may be greater than destCapacity. In that case, + * only some of the result was written to the destination buffer. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_strToUpper(UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + const char *locale, + UErrorCode *pErrorCode); + +/** + * Lowercase the characters in a string. + * Casing is locale-dependent and context-sensitive. + * The result may be longer or shorter than the original. + * The source string and the destination buffer are allowed to overlap. + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of UChars). If it is 0, then + * dest may be NULL and the function will only return the length of the result + * without writing any of the result string. + * @param src The original string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param locale The locale to consider, or "" for the root locale or NULL for the default locale. + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * @return The length of the result string. It may be greater than destCapacity. In that case, + * only some of the result was written to the destination buffer. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_strToLower(UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + const char *locale, + UErrorCode *pErrorCode); + +#if !UCONFIG_NO_BREAK_ITERATION + +/** + * Titlecase a string. + * Casing is locale-dependent and context-sensitive. + * Titlecasing uses a break iterator to find the first characters of words + * that are to be titlecased. It titlecases those characters and lowercases + * all others. + * + * The titlecase break iterator can be provided to customize for arbitrary + * styles, using rules and dictionaries beyond the standard iterators. + * It may be more efficient to always provide an iterator to avoid + * opening and closing one for each string. + * The standard titlecase iterator for the root locale implements the + * algorithm of Unicode TR 21. + * + * This function uses only the setText(), first() and next() methods of the + * provided break iterator. + * + * The result may be longer or shorter than the original. + * The source string and the destination buffer are allowed to overlap. + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of UChars). If it is 0, then + * dest may be NULL and the function will only return the length of the result + * without writing any of the result string. + * @param src The original string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param titleIter A break iterator to find the first characters of words + * that are to be titlecased. + * If none is provided (NULL), then a standard titlecase + * break iterator is opened. + * @param locale The locale to consider, or "" for the root locale or NULL for the default locale. + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * @return The length of the result string. It may be greater than destCapacity. In that case, + * only some of the result was written to the destination buffer. + * @stable ICU 2.1 + */ +U_STABLE int32_t U_EXPORT2 +u_strToTitle(UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + UBreakIterator *titleIter, + const char *locale, + UErrorCode *pErrorCode); + +#endif + +/** + * Case-folds the characters in a string. + * + * Case-folding is locale-independent and not context-sensitive, + * but there is an option for whether to include or exclude mappings for dotted I + * and dotless i that are marked with 'T' in CaseFolding.txt. + * + * The result may be longer or shorter than the original. + * The source string and the destination buffer are allowed to overlap. + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of UChars). If it is 0, then + * dest may be NULL and the function will only return the length of the result + * without writing any of the result string. + * @param src The original string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * @return The length of the result string. It may be greater than destCapacity. In that case, + * only some of the result was written to the destination buffer. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_strFoldCase(UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + uint32_t options, + UErrorCode *pErrorCode); + +#if defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION +/** + * Convert a UTF-16 string to a wchar_t string. + * If it is known at compile time that wchar_t strings are in UTF-16 or UTF-32, then + * this function simply calls the fast, dedicated function for that. + * Otherwise, two conversions UTF-16 -> default charset -> wchar_t* are performed. + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of wchar_t's). If it is 0, then + * dest may be NULL and the function will only return the length of the + * result without writing any of the result string (pre-flighting). + * @param pDestLength A pointer to receive the number of units written to the destination. If + * pDestLength!=NULL then *pDestLength is always set to the + * number of output units corresponding to the transformation of + * all the input units, even in case of a buffer overflow. + * @param src The original source string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * @return The pointer to destination buffer. + * @stable ICU 2.0 + */ +U_STABLE wchar_t* U_EXPORT2 +u_strToWCS(wchar_t *dest, + int32_t destCapacity, + int32_t *pDestLength, + const UChar *src, + int32_t srcLength, + UErrorCode *pErrorCode); +/** + * Convert a wchar_t string to UTF-16. + * If it is known at compile time that wchar_t strings are in UTF-16 or UTF-32, then + * this function simply calls the fast, dedicated function for that. + * Otherwise, two conversions wchar_t* -> default charset -> UTF-16 are performed. + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of UChars). If it is 0, then + * dest may be NULL and the function will only return the length of the + * result without writing any of the result string (pre-flighting). + * @param pDestLength A pointer to receive the number of units written to the destination. If + * pDestLength!=NULL then *pDestLength is always set to the + * number of output units corresponding to the transformation of + * all the input units, even in case of a buffer overflow. + * @param src The original source string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * @return The pointer to destination buffer. + * @stable ICU 2.0 + */ +U_STABLE UChar* U_EXPORT2 +u_strFromWCS(UChar *dest, + int32_t destCapacity, + int32_t *pDestLength, + const wchar_t *src, + int32_t srcLength, + UErrorCode *pErrorCode); +#endif /* defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION */ + +/** + * Convert a UTF-16 string to UTF-8. + * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set. + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of chars). If it is 0, then + * dest may be NULL and the function will only return the length of the + * result without writing any of the result string (pre-flighting). + * @param pDestLength A pointer to receive the number of units written to the destination. If + * pDestLength!=NULL then *pDestLength is always set to the + * number of output units corresponding to the transformation of + * all the input units, even in case of a buffer overflow. + * @param src The original source string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * @return The pointer to destination buffer. + * @stable ICU 2.0 + * @see u_strToUTF8WithSub + * @see u_strFromUTF8 + */ +U_STABLE char* U_EXPORT2 +u_strToUTF8(char *dest, + int32_t destCapacity, + int32_t *pDestLength, + const UChar *src, + int32_t srcLength, + UErrorCode *pErrorCode); + +/** + * Convert a UTF-8 string to UTF-16. + * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set. + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of UChars). If it is 0, then + * dest may be NULL and the function will only return the length of the + * result without writing any of the result string (pre-flighting). + * @param pDestLength A pointer to receive the number of units written to the destination. If + * pDestLength!=NULL then *pDestLength is always set to the + * number of output units corresponding to the transformation of + * all the input units, even in case of a buffer overflow. + * @param src The original source string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * @return The pointer to destination buffer. + * @stable ICU 2.0 + * @see u_strFromUTF8WithSub + * @see u_strFromUTF8Lenient + */ +U_STABLE UChar* U_EXPORT2 +u_strFromUTF8(UChar *dest, + int32_t destCapacity, + int32_t *pDestLength, + const char *src, + int32_t srcLength, + UErrorCode *pErrorCode); + +/** + * Convert a UTF-16 string to UTF-8. + * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set. + * + * Same as u_strToUTF8() except for the additional subchar which is output for + * illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code. + * With subchar==U_SENTINEL, this function behaves exactly like u_strToUTF8(). + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of chars). If it is 0, then + * dest may be NULL and the function will only return the length of the + * result without writing any of the result string (pre-flighting). + * @param pDestLength A pointer to receive the number of units written to the destination. If + * pDestLength!=NULL then *pDestLength is always set to the + * number of output units corresponding to the transformation of + * all the input units, even in case of a buffer overflow. + * @param src The original source string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param subchar The substitution character to use in place of an illegal input sequence, + * or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead. + * A substitution character can be any valid Unicode code point (up to U+10FFFF) + * except for surrogate code points (U+D800..U+DFFF). + * The recommended value is U+FFFD "REPLACEMENT CHARACTER". + * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0. + * Set to 0 if no substitutions occur or subchar<0. + * pNumSubstitutions can be NULL. + * @param pErrorCode Pointer to a standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return The pointer to destination buffer. + * @see u_strToUTF8 + * @see u_strFromUTF8WithSub + * @stable ICU 3.6 + */ +U_STABLE char* U_EXPORT2 +u_strToUTF8WithSub(char *dest, + int32_t destCapacity, + int32_t *pDestLength, + const UChar *src, + int32_t srcLength, + UChar32 subchar, int32_t *pNumSubstitutions, + UErrorCode *pErrorCode); + +/** + * Convert a UTF-8 string to UTF-16. + * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set. + * + * Same as u_strFromUTF8() except for the additional subchar which is output for + * illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code. + * With subchar==U_SENTINEL, this function behaves exactly like u_strFromUTF8(). + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of UChars). If it is 0, then + * dest may be NULL and the function will only return the length of the + * result without writing any of the result string (pre-flighting). + * @param pDestLength A pointer to receive the number of units written to the destination. If + * pDestLength!=NULL then *pDestLength is always set to the + * number of output units corresponding to the transformation of + * all the input units, even in case of a buffer overflow. + * @param src The original source string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param subchar The substitution character to use in place of an illegal input sequence, + * or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead. + * A substitution character can be any valid Unicode code point (up to U+10FFFF) + * except for surrogate code points (U+D800..U+DFFF). + * The recommended value is U+FFFD "REPLACEMENT CHARACTER". + * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0. + * Set to 0 if no substitutions occur or subchar<0. + * pNumSubstitutions can be NULL. + * @param pErrorCode Pointer to a standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return The pointer to destination buffer. + * @see u_strFromUTF8 + * @see u_strFromUTF8Lenient + * @see u_strToUTF8WithSub + * @stable ICU 3.6 + */ +U_STABLE UChar* U_EXPORT2 +u_strFromUTF8WithSub(UChar *dest, + int32_t destCapacity, + int32_t *pDestLength, + const char *src, + int32_t srcLength, + UChar32 subchar, int32_t *pNumSubstitutions, + UErrorCode *pErrorCode); + +/** + * Convert a UTF-8 string to UTF-16. + * + * Same as u_strFromUTF8() except that this function is designed to be very fast, + * which it achieves by being lenient about malformed UTF-8 sequences. + * This function is intended for use in environments where UTF-8 text is + * expected to be well-formed. + * + * Its semantics are: + * - Well-formed UTF-8 text is correctly converted to well-formed UTF-16 text. + * - The function will not read beyond the input string, nor write beyond + * the destCapacity. + * - Malformed UTF-8 results in "garbage" 16-bit Unicode strings which may not + * be well-formed UTF-16. + * The function will resynchronize to valid code point boundaries + * within a small number of code points after an illegal sequence. + * - Non-shortest forms are not detected and will result in "spoofing" output. + * + * For further performance improvement, if srcLength is given (>=0), + * then it must be destCapacity>=srcLength. + * + * There is no inverse u_strToUTF8Lenient() function because there is practically + * no performance gain from not checking that a UTF-16 string is well-formed. + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of UChars). If it is 0, then + * dest may be NULL and the function will only return the length of the + * result without writing any of the result string (pre-flighting). + * Unlike for other ICU functions, if srcLength>=0 then it + * must be destCapacity>=srcLength. + * @param pDestLength A pointer to receive the number of units written to the destination. If + * pDestLength!=NULL then *pDestLength is always set to the + * number of output units corresponding to the transformation of + * all the input units, even in case of a buffer overflow. + * Unlike for other ICU functions, if srcLength>=0 but + * destCapacity<srcLength, then *pDestLength will be set to srcLength + * (and U_BUFFER_OVERFLOW_ERROR will be set) + * regardless of the actual result length. + * @param src The original source string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param pErrorCode Pointer to a standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return The pointer to destination buffer. + * @see u_strFromUTF8 + * @see u_strFromUTF8WithSub + * @see u_strToUTF8WithSub + * @stable ICU 3.6 + */ +U_STABLE UChar * U_EXPORT2 +u_strFromUTF8Lenient(UChar *dest, + int32_t destCapacity, + int32_t *pDestLength, + const char *src, + int32_t srcLength, + UErrorCode *pErrorCode); + +/** + * Convert a UTF-16 string to UTF-32. + * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set. + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of UChar32s). If it is 0, then + * dest may be NULL and the function will only return the length of the + * result without writing any of the result string (pre-flighting). + * @param pDestLength A pointer to receive the number of units written to the destination. If + * pDestLength!=NULL then *pDestLength is always set to the + * number of output units corresponding to the transformation of + * all the input units, even in case of a buffer overflow. + * @param src The original source string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * @return The pointer to destination buffer. + * @see u_strToUTF32WithSub + * @see u_strFromUTF32 + * @stable ICU 2.0 + */ +U_STABLE UChar32* U_EXPORT2 +u_strToUTF32(UChar32 *dest, + int32_t destCapacity, + int32_t *pDestLength, + const UChar *src, + int32_t srcLength, + UErrorCode *pErrorCode); + +/** + * Convert a UTF-32 string to UTF-16. + * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set. + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of UChars). If it is 0, then + * dest may be NULL and the function will only return the length of the + * result without writing any of the result string (pre-flighting). + * @param pDestLength A pointer to receive the number of units written to the destination. If + * pDestLength!=NULL then *pDestLength is always set to the + * number of output units corresponding to the transformation of + * all the input units, even in case of a buffer overflow. + * @param src The original source string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * @return The pointer to destination buffer. + * @see u_strFromUTF32WithSub + * @see u_strToUTF32 + * @stable ICU 2.0 + */ +U_STABLE UChar* U_EXPORT2 +u_strFromUTF32(UChar *dest, + int32_t destCapacity, + int32_t *pDestLength, + const UChar32 *src, + int32_t srcLength, + UErrorCode *pErrorCode); + +/** + * Convert a UTF-16 string to UTF-32. + * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set. + * + * Same as u_strToUTF32() except for the additional subchar which is output for + * illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code. + * With subchar==U_SENTINEL, this function behaves exactly like u_strToUTF32(). + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of UChar32s). If it is 0, then + * dest may be NULL and the function will only return the length of the + * result without writing any of the result string (pre-flighting). + * @param pDestLength A pointer to receive the number of units written to the destination. If + * pDestLength!=NULL then *pDestLength is always set to the + * number of output units corresponding to the transformation of + * all the input units, even in case of a buffer overflow. + * @param src The original source string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param subchar The substitution character to use in place of an illegal input sequence, + * or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead. + * A substitution character can be any valid Unicode code point (up to U+10FFFF) + * except for surrogate code points (U+D800..U+DFFF). + * The recommended value is U+FFFD "REPLACEMENT CHARACTER". + * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0. + * Set to 0 if no substitutions occur or subchar<0. + * pNumSubstitutions can be NULL. + * @param pErrorCode Pointer to a standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return The pointer to destination buffer. + * @see u_strToUTF32 + * @see u_strFromUTF32WithSub + * @stable ICU 4.2 + */ +U_STABLE UChar32* U_EXPORT2 +u_strToUTF32WithSub(UChar32 *dest, + int32_t destCapacity, + int32_t *pDestLength, + const UChar *src, + int32_t srcLength, + UChar32 subchar, int32_t *pNumSubstitutions, + UErrorCode *pErrorCode); + +/** + * Convert a UTF-32 string to UTF-16. + * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set. + * + * Same as u_strFromUTF32() except for the additional subchar which is output for + * illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code. + * With subchar==U_SENTINEL, this function behaves exactly like u_strFromUTF32(). + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of UChars). If it is 0, then + * dest may be NULL and the function will only return the length of the + * result without writing any of the result string (pre-flighting). + * @param pDestLength A pointer to receive the number of units written to the destination. If + * pDestLength!=NULL then *pDestLength is always set to the + * number of output units corresponding to the transformation of + * all the input units, even in case of a buffer overflow. + * @param src The original source string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param subchar The substitution character to use in place of an illegal input sequence, + * or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead. + * A substitution character can be any valid Unicode code point (up to U+10FFFF) + * except for surrogate code points (U+D800..U+DFFF). + * The recommended value is U+FFFD "REPLACEMENT CHARACTER". + * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0. + * Set to 0 if no substitutions occur or subchar<0. + * pNumSubstitutions can be NULL. + * @param pErrorCode Pointer to a standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return The pointer to destination buffer. + * @see u_strFromUTF32 + * @see u_strToUTF32WithSub + * @stable ICU 4.2 + */ +U_STABLE UChar* U_EXPORT2 +u_strFromUTF32WithSub(UChar *dest, + int32_t destCapacity, + int32_t *pDestLength, + const UChar32 *src, + int32_t srcLength, + UChar32 subchar, int32_t *pNumSubstitutions, + UErrorCode *pErrorCode); + +/** + * Convert a 16-bit Unicode string to Java Modified UTF-8. + * See http://java.sun.com/javase/6/docs/api/java/io/DataInput.html#modified-utf-8 + * + * This function behaves according to the documentation for Java DataOutput.writeUTF() + * except that it does not encode the output length in the destination buffer + * and does not have an output length restriction. + * See http://java.sun.com/javase/6/docs/api/java/io/DataOutput.html#writeUTF(java.lang.String) + * + * The input string need not be well-formed UTF-16. + * (Therefore there is no subchar parameter.) + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of chars). If it is 0, then + * dest may be NULL and the function will only return the length of the + * result without writing any of the result string (pre-flighting). + * @param pDestLength A pointer to receive the number of units written to the destination. If + * pDestLength!=NULL then *pDestLength is always set to the + * number of output units corresponding to the transformation of + * all the input units, even in case of a buffer overflow. + * @param src The original source string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param pErrorCode Pointer to a standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return The pointer to destination buffer. + * @stable ICU 4.4 + * @see u_strToUTF8WithSub + * @see u_strFromJavaModifiedUTF8WithSub + */ +U_STABLE char* U_EXPORT2 +u_strToJavaModifiedUTF8( + char *dest, + int32_t destCapacity, + int32_t *pDestLength, + const UChar *src, + int32_t srcLength, + UErrorCode *pErrorCode); + +/** + * Convert a Java Modified UTF-8 string to a 16-bit Unicode string. + * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set. + * + * This function behaves according to the documentation for Java DataInput.readUTF() + * except that it takes a length parameter rather than + * interpreting the first two input bytes as the length. + * See http://java.sun.com/javase/6/docs/api/java/io/DataInput.html#readUTF() + * + * The output string may not be well-formed UTF-16. + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of UChars). If it is 0, then + * dest may be NULL and the function will only return the length of the + * result without writing any of the result string (pre-flighting). + * @param pDestLength A pointer to receive the number of units written to the destination. If + * pDestLength!=NULL then *pDestLength is always set to the + * number of output units corresponding to the transformation of + * all the input units, even in case of a buffer overflow. + * @param src The original source string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param subchar The substitution character to use in place of an illegal input sequence, + * or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead. + * A substitution character can be any valid Unicode code point (up to U+10FFFF) + * except for surrogate code points (U+D800..U+DFFF). + * The recommended value is U+FFFD "REPLACEMENT CHARACTER". + * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0. + * Set to 0 if no substitutions occur or subchar<0. + * pNumSubstitutions can be NULL. + * @param pErrorCode Pointer to a standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return The pointer to destination buffer. + * @see u_strFromUTF8WithSub + * @see u_strFromUTF8Lenient + * @see u_strToJavaModifiedUTF8 + * @stable ICU 4.4 + */ +U_STABLE UChar* U_EXPORT2 +u_strFromJavaModifiedUTF8WithSub( + UChar *dest, + int32_t destCapacity, + int32_t *pDestLength, + const char *src, + int32_t srcLength, + UChar32 subchar, int32_t *pNumSubstitutions, + UErrorCode *pErrorCode); + +#endif diff --git a/Source/WTF/icu/unicode/utf.h b/Source/WTF/icu/unicode/utf.h new file mode 100644 index 000000000..f5954fe9f --- /dev/null +++ b/Source/WTF/icu/unicode/utf.h @@ -0,0 +1,223 @@ +/* +******************************************************************************* +* +* Copyright (C) 1999-2011, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: utf.h +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 1999sep09 +* created by: Markus W. Scherer +*/ + +/** + * \file + * \brief C API: Code point macros + * + * This file defines macros for checking whether a code point is + * a surrogate or a non-character etc. + * + * The UChar and UChar32 data types for Unicode code units and code points + * are defined in umachine.h because they can be machine-dependent. + * + * If U_NO_DEFAULT_INCLUDE_UTF_HEADERS is 0 then utf.h is included by utypes.h + * and itself includes utf8.h and utf16.h after some + * common definitions. + * If U_NO_DEFAULT_INCLUDE_UTF_HEADERS is 1 then each of these headers must be + * included explicitly if their definitions are used. + * + * utf8.h and utf16.h define macros for efficiently getting code points + * in and out of UTF-8/16 strings. + * utf16.h macros have "U16_" prefixes. + * utf8.h defines similar macros with "U8_" prefixes for UTF-8 string handling. + * + * ICU mostly processes 16-bit Unicode strings. + * Most of the time, such strings are well-formed UTF-16. + * Single, unpaired surrogates must be handled as well, and are treated in ICU + * like regular code points where possible. + * (Pairs of surrogate code points are indistinguishable from supplementary + * code points encoded as pairs of supplementary code units.) + * + * In fact, almost all Unicode code points in normal text (>99%) + * are on the BMP (<=U+ffff) and even <=U+d7ff. + * ICU functions handle supplementary code points (U+10000..U+10ffff) + * but are optimized for the much more frequently occurring BMP code points. + * + * umachine.h defines UChar to be an unsigned 16-bit integer. + * Where available, UChar is defined to be a char16_t + * or a wchar_t (if that is an unsigned 16-bit type), otherwise uint16_t. + * + * UChar32 is defined to be a signed 32-bit integer (int32_t), large enough for a 21-bit + * Unicode code point (Unicode scalar value, 0..0x10ffff). + * Before ICU 2.4, the definition of UChar32 was similarly platform-dependent as + * the definition of UChar. For details see the documentation for UChar32 itself. + * + * utf.h defines a small number of C macros for single Unicode code points. + * These are simple checks for surrogates and non-characters. + * For actual Unicode character properties see uchar.h. + * + * By default, string operations must be done with error checking in case + * a string is not well-formed UTF-16. + * The macros will detect if a surrogate code unit is unpaired + * (lead unit without trail unit or vice versa) and just return the unit itself + * as the code point. + * + * The regular "safe" macros require that the initial, passed-in string index + * is within bounds. They only check the index when they read more than one + * code unit. This is usually done with code similar to the following loop: + * <pre>while(i<length) { + * U16_NEXT(s, i, length, c); + * // use c + * }</pre> + * + * When it is safe to assume that text is well-formed UTF-16 + * (does not contain single, unpaired surrogates), then one can use + * U16_..._UNSAFE macros. + * These do not check for proper code unit sequences or truncated text and may + * yield wrong results or even cause a crash if they are used with "malformed" + * text. + * In practice, U16_..._UNSAFE macros will produce slightly less code but + * should not be faster because the processing is only different when a + * surrogate code unit is detected, which will be rare. + * + * Similarly for UTF-8, there are "safe" macros without a suffix, + * and U8_..._UNSAFE versions. + * The performance differences are much larger here because UTF-8 provides so + * many opportunities for malformed sequences. + * The unsafe UTF-8 macros are entirely implemented inside the macro definitions + * and are fast, while the safe UTF-8 macros call functions for all but the + * trivial (ASCII) cases. + * (ICU 3.6 optimizes U8_NEXT() and U8_APPEND() to handle most other common + * characters inline as well.) + * + * Unlike with UTF-16, malformed sequences cannot be expressed with distinct + * code point values (0..U+10ffff). They are indicated with negative values instead. + * + * For more information see the ICU User Guide Strings chapter + * (http://userguide.icu-project.org/strings). + * + * <em>Usage:</em> + * ICU coding guidelines for if() statements should be followed when using these macros. + * Compound statements (curly braces {}) must be used for if-else-while... + * bodies and all macro statements should be terminated with semicolon. + * + * @stable ICU 2.4 + */ + +#ifndef __UTF_H__ +#define __UTF_H__ + +#include "unicode/umachine.h" +/* include the utfXX.h after the following definitions */ + +/* single-code point definitions -------------------------------------------- */ + +/** + * Is this code point a Unicode noncharacter? + * @param c 32-bit code point + * @return TRUE or FALSE + * @stable ICU 2.4 + */ +#define U_IS_UNICODE_NONCHAR(c) \ + ((c)>=0xfdd0 && \ + ((uint32_t)(c)<=0xfdef || ((c)&0xfffe)==0xfffe) && \ + (uint32_t)(c)<=0x10ffff) + +/** + * Is c a Unicode code point value (0..U+10ffff) + * that can be assigned a character? + * + * Code points that are not characters include: + * - single surrogate code points (U+d800..U+dfff, 2048 code points) + * - the last two code points on each plane (U+__fffe and U+__ffff, 34 code points) + * - U+fdd0..U+fdef (new with Unicode 3.1, 32 code points) + * - the highest Unicode code point value is U+10ffff + * + * This means that all code points below U+d800 are character code points, + * and that boundary is tested first for performance. + * + * @param c 32-bit code point + * @return TRUE or FALSE + * @stable ICU 2.4 + */ +#define U_IS_UNICODE_CHAR(c) \ + ((uint32_t)(c)<0xd800 || \ + ((uint32_t)(c)>0xdfff && \ + (uint32_t)(c)<=0x10ffff && \ + !U_IS_UNICODE_NONCHAR(c))) + +/** + * Is this code point a BMP code point (U+0000..U+ffff)? + * @param c 32-bit code point + * @return TRUE or FALSE + * @stable ICU 2.8 + */ +#define U_IS_BMP(c) ((uint32_t)(c)<=0xffff) + +/** + * Is this code point a supplementary code point (U+10000..U+10ffff)? + * @param c 32-bit code point + * @return TRUE or FALSE + * @stable ICU 2.8 + */ +#define U_IS_SUPPLEMENTARY(c) ((uint32_t)((c)-0x10000)<=0xfffff) + +/** + * Is this code point a lead surrogate (U+d800..U+dbff)? + * @param c 32-bit code point + * @return TRUE or FALSE + * @stable ICU 2.4 + */ +#define U_IS_LEAD(c) (((c)&0xfffffc00)==0xd800) + +/** + * Is this code point a trail surrogate (U+dc00..U+dfff)? + * @param c 32-bit code point + * @return TRUE or FALSE + * @stable ICU 2.4 + */ +#define U_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00) + +/** + * Is this code point a surrogate (U+d800..U+dfff)? + * @param c 32-bit code point + * @return TRUE or FALSE + * @stable ICU 2.4 + */ +#define U_IS_SURROGATE(c) (((c)&0xfffff800)==0xd800) + +/** + * Assuming c is a surrogate code point (U_IS_SURROGATE(c)), + * is it a lead surrogate? + * @param c 32-bit code point + * @return TRUE or FALSE + * @stable ICU 2.4 + */ +#define U_IS_SURROGATE_LEAD(c) (((c)&0x400)==0) + +/** + * Assuming c is a surrogate code point (U_IS_SURROGATE(c)), + * is it a trail surrogate? + * @param c 32-bit code point + * @return TRUE or FALSE + * @stable ICU 4.2 + */ +#define U_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0) + +/* include the utfXX.h ------------------------------------------------------ */ + +#if !U_NO_DEFAULT_INCLUDE_UTF_HEADERS + +#include "unicode/utf8.h" +#include "unicode/utf16.h" + +/* utf_old.h contains deprecated, pre-ICU 2.4 definitions */ +#include "unicode/utf_old.h" + +#endif /* !U_NO_DEFAULT_INCLUDE_UTF_HEADERS */ + +#endif /* __UTF_H__ */ diff --git a/Source/WTF/icu/unicode/utf16.h b/Source/WTF/icu/unicode/utf16.h new file mode 100644 index 000000000..bdd88a8b9 --- /dev/null +++ b/Source/WTF/icu/unicode/utf16.h @@ -0,0 +1,623 @@ +/* +******************************************************************************* +* +* Copyright (C) 1999-2012, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: utf16.h +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 1999sep09 +* created by: Markus W. Scherer +*/ + +/** + * \file + * \brief C API: 16-bit Unicode handling macros + * + * This file defines macros to deal with 16-bit Unicode (UTF-16) code units and strings. + * + * For more information see utf.h and the ICU User Guide Strings chapter + * (http://userguide.icu-project.org/strings). + * + * <em>Usage:</em> + * ICU coding guidelines for if() statements should be followed when using these macros. + * Compound statements (curly braces {}) must be used for if-else-while... + * bodies and all macro statements should be terminated with semicolon. + */ + +#ifndef __UTF16_H__ +#define __UTF16_H__ + +#include "unicode/umachine.h" +#ifndef __UTF_H__ +# include "unicode/utf.h" +#endif + +/* single-code point definitions -------------------------------------------- */ + +/** + * Does this code unit alone encode a code point (BMP, not a surrogate)? + * @param c 16-bit code unit + * @return TRUE or FALSE + * @stable ICU 2.4 + */ +#define U16_IS_SINGLE(c) !U_IS_SURROGATE(c) + +/** + * Is this code unit a lead surrogate (U+d800..U+dbff)? + * @param c 16-bit code unit + * @return TRUE or FALSE + * @stable ICU 2.4 + */ +#define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800) + +/** + * Is this code unit a trail surrogate (U+dc00..U+dfff)? + * @param c 16-bit code unit + * @return TRUE or FALSE + * @stable ICU 2.4 + */ +#define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00) + +/** + * Is this code unit a surrogate (U+d800..U+dfff)? + * @param c 16-bit code unit + * @return TRUE or FALSE + * @stable ICU 2.4 + */ +#define U16_IS_SURROGATE(c) U_IS_SURROGATE(c) + +/** + * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)), + * is it a lead surrogate? + * @param c 16-bit code unit + * @return TRUE or FALSE + * @stable ICU 2.4 + */ +#define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0) + +/** + * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)), + * is it a trail surrogate? + * @param c 16-bit code unit + * @return TRUE or FALSE + * @stable ICU 4.2 + */ +#define U16_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0) + +/** + * Helper constant for U16_GET_SUPPLEMENTARY. + * @internal + */ +#define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000) + +/** + * Get a supplementary code point value (U+10000..U+10ffff) + * from its lead and trail surrogates. + * The result is undefined if the input values are not + * lead and trail surrogates. + * + * @param lead lead surrogate (U+d800..U+dbff) + * @param trail trail surrogate (U+dc00..U+dfff) + * @return supplementary code point (U+10000..U+10ffff) + * @stable ICU 2.4 + */ +#define U16_GET_SUPPLEMENTARY(lead, trail) \ + (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET) + + +/** + * Get the lead surrogate (0xd800..0xdbff) for a + * supplementary code point (0x10000..0x10ffff). + * @param supplementary 32-bit code point (U+10000..U+10ffff) + * @return lead surrogate (U+d800..U+dbff) for supplementary + * @stable ICU 2.4 + */ +#define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0) + +/** + * Get the trail surrogate (0xdc00..0xdfff) for a + * supplementary code point (0x10000..0x10ffff). + * @param supplementary 32-bit code point (U+10000..U+10ffff) + * @return trail surrogate (U+dc00..U+dfff) for supplementary + * @stable ICU 2.4 + */ +#define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00) + +/** + * How many 16-bit code units are used to encode this Unicode code point? (1 or 2) + * The result is not defined if c is not a Unicode code point (U+0000..U+10ffff). + * @param c 32-bit code point + * @return 1 or 2 + * @stable ICU 2.4 + */ +#define U16_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2) + +/** + * The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff). + * @return 2 + * @stable ICU 2.4 + */ +#define U16_MAX_LENGTH 2 + +/** + * Get a code point from a string at a random-access offset, + * without changing the offset. + * "Unsafe" macro, assumes well-formed UTF-16. + * + * The offset may point to either the lead or trail surrogate unit + * for a supplementary code point, in which case the macro will read + * the adjacent matching surrogate as well. + * The result is undefined if the offset points to a single, unpaired surrogate. + * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT. + * + * @param s const UChar * string + * @param i string offset + * @param c output UChar32 variable + * @see U16_GET + * @stable ICU 2.4 + */ +#define U16_GET_UNSAFE(s, i, c) { \ + (c)=(s)[i]; \ + if(U16_IS_SURROGATE(c)) { \ + if(U16_IS_SURROGATE_LEAD(c)) { \ + (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)+1]); \ + } else { \ + (c)=U16_GET_SUPPLEMENTARY((s)[(i)-1], (c)); \ + } \ + } \ +} + +/** + * Get a code point from a string at a random-access offset, + * without changing the offset. + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * The offset may point to either the lead or trail surrogate unit + * for a supplementary code point, in which case the macro will read + * the adjacent matching surrogate as well. + * + * The length can be negative for a NUL-terminated string. + * + * If the offset points to a single, unpaired surrogate, then that itself + * will be returned as the code point. + * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT. + * + * @param s const UChar * string + * @param start starting string offset (usually 0) + * @param i string offset, must be start<=i<length + * @param length string length + * @param c output UChar32 variable + * @see U16_GET_UNSAFE + * @stable ICU 2.4 + */ +#define U16_GET(s, start, i, length, c) { \ + (c)=(s)[i]; \ + if(U16_IS_SURROGATE(c)) { \ + uint16_t __c2; \ + if(U16_IS_SURROGATE_LEAD(c)) { \ + if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \ + (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ + } \ + } else { \ + if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ + (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ + } \ + } \ + } \ +} + +/* definitions with forward iteration --------------------------------------- */ + +/** + * Get a code point from a string at a code point boundary offset, + * and advance the offset to the next code point boundary. + * (Post-incrementing forward iteration.) + * "Unsafe" macro, assumes well-formed UTF-16. + * + * The offset may point to the lead surrogate unit + * for a supplementary code point, in which case the macro will read + * the following trail surrogate as well. + * If the offset points to a trail surrogate, then that itself + * will be returned as the code point. + * The result is undefined if the offset points to a single, unpaired lead surrogate. + * + * @param s const UChar * string + * @param i string offset + * @param c output UChar32 variable + * @see U16_NEXT + * @stable ICU 2.4 + */ +#define U16_NEXT_UNSAFE(s, i, c) { \ + (c)=(s)[(i)++]; \ + if(U16_IS_LEAD(c)) { \ + (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)++]); \ + } \ +} + +/** + * Get a code point from a string at a code point boundary offset, + * and advance the offset to the next code point boundary. + * (Post-incrementing forward iteration.) + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * The length can be negative for a NUL-terminated string. + * + * The offset may point to the lead surrogate unit + * for a supplementary code point, in which case the macro will read + * the following trail surrogate as well. + * If the offset points to a trail surrogate or + * to a single, unpaired lead surrogate, then that itself + * will be returned as the code point. + * + * @param s const UChar * string + * @param i string offset, must be i<length + * @param length string length + * @param c output UChar32 variable + * @see U16_NEXT_UNSAFE + * @stable ICU 2.4 + */ +#define U16_NEXT(s, i, length, c) { \ + (c)=(s)[(i)++]; \ + if(U16_IS_LEAD(c)) { \ + uint16_t __c2; \ + if((i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \ + ++(i); \ + (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ + } \ + } \ +} + +/** + * Append a code point to a string, overwriting 1 or 2 code units. + * The offset points to the current end of the string contents + * and is advanced (post-increment). + * "Unsafe" macro, assumes a valid code point and sufficient space in the string. + * Otherwise, the result is undefined. + * + * @param s const UChar * string buffer + * @param i string offset + * @param c code point to append + * @see U16_APPEND + * @stable ICU 2.4 + */ +#define U16_APPEND_UNSAFE(s, i, c) { \ + if((uint32_t)(c)<=0xffff) { \ + (s)[(i)++]=(uint16_t)(c); \ + } else { \ + (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ + (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ + } \ +} + +/** + * Append a code point to a string, overwriting 1 or 2 code units. + * The offset points to the current end of the string contents + * and is advanced (post-increment). + * "Safe" macro, checks for a valid code point. + * If a surrogate pair is written, checks for sufficient space in the string. + * If the code point is not valid or a trail surrogate does not fit, + * then isError is set to TRUE. + * + * @param s const UChar * string buffer + * @param i string offset, must be i<capacity + * @param capacity size of the string buffer + * @param c code point to append + * @param isError output UBool set to TRUE if an error occurs, otherwise not modified + * @see U16_APPEND_UNSAFE + * @stable ICU 2.4 + */ +#define U16_APPEND(s, i, capacity, c, isError) { \ + if((uint32_t)(c)<=0xffff) { \ + (s)[(i)++]=(uint16_t)(c); \ + } else if((uint32_t)(c)<=0x10ffff && (i)+1<(capacity)) { \ + (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ + (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ + } else /* c>0x10ffff or not enough space */ { \ + (isError)=TRUE; \ + } \ +} + +/** + * Advance the string offset from one code point boundary to the next. + * (Post-incrementing iteration.) + * "Unsafe" macro, assumes well-formed UTF-16. + * + * @param s const UChar * string + * @param i string offset + * @see U16_FWD_1 + * @stable ICU 2.4 + */ +#define U16_FWD_1_UNSAFE(s, i) { \ + if(U16_IS_LEAD((s)[(i)++])) { \ + ++(i); \ + } \ +} + +/** + * Advance the string offset from one code point boundary to the next. + * (Post-incrementing iteration.) + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * The length can be negative for a NUL-terminated string. + * + * @param s const UChar * string + * @param i string offset, must be i<length + * @param length string length + * @see U16_FWD_1_UNSAFE + * @stable ICU 2.4 + */ +#define U16_FWD_1(s, i, length) { \ + if(U16_IS_LEAD((s)[(i)++]) && (i)!=(length) && U16_IS_TRAIL((s)[i])) { \ + ++(i); \ + } \ +} + +/** + * Advance the string offset from one code point boundary to the n-th next one, + * i.e., move forward by n code points. + * (Post-incrementing iteration.) + * "Unsafe" macro, assumes well-formed UTF-16. + * + * @param s const UChar * string + * @param i string offset + * @param n number of code points to skip + * @see U16_FWD_N + * @stable ICU 2.4 + */ +#define U16_FWD_N_UNSAFE(s, i, n) { \ + int32_t __N=(n); \ + while(__N>0) { \ + U16_FWD_1_UNSAFE(s, i); \ + --__N; \ + } \ +} + +/** + * Advance the string offset from one code point boundary to the n-th next one, + * i.e., move forward by n code points. + * (Post-incrementing iteration.) + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * The length can be negative for a NUL-terminated string. + * + * @param s const UChar * string + * @param i int32_t string offset, must be i<length + * @param length int32_t string length + * @param n number of code points to skip + * @see U16_FWD_N_UNSAFE + * @stable ICU 2.4 + */ +#define U16_FWD_N(s, i, length, n) { \ + int32_t __N=(n); \ + while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \ + U16_FWD_1(s, i, length); \ + --__N; \ + } \ +} + +/** + * Adjust a random-access offset to a code point boundary + * at the start of a code point. + * If the offset points to the trail surrogate of a surrogate pair, + * then the offset is decremented. + * Otherwise, it is not modified. + * "Unsafe" macro, assumes well-formed UTF-16. + * + * @param s const UChar * string + * @param i string offset + * @see U16_SET_CP_START + * @stable ICU 2.4 + */ +#define U16_SET_CP_START_UNSAFE(s, i) { \ + if(U16_IS_TRAIL((s)[i])) { \ + --(i); \ + } \ +} + +/** + * Adjust a random-access offset to a code point boundary + * at the start of a code point. + * If the offset points to the trail surrogate of a surrogate pair, + * then the offset is decremented. + * Otherwise, it is not modified. + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * @param s const UChar * string + * @param start starting string offset (usually 0) + * @param i string offset, must be start<=i + * @see U16_SET_CP_START_UNSAFE + * @stable ICU 2.4 + */ +#define U16_SET_CP_START(s, start, i) { \ + if(U16_IS_TRAIL((s)[i]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \ + --(i); \ + } \ +} + +/* definitions with backward iteration -------------------------------------- */ + +/** + * Move the string offset from one code point boundary to the previous one + * and get the code point between them. + * (Pre-decrementing backward iteration.) + * "Unsafe" macro, assumes well-formed UTF-16. + * + * The input offset may be the same as the string length. + * If the offset is behind a trail surrogate unit + * for a supplementary code point, then the macro will read + * the preceding lead surrogate as well. + * If the offset is behind a lead surrogate, then that itself + * will be returned as the code point. + * The result is undefined if the offset is behind a single, unpaired trail surrogate. + * + * @param s const UChar * string + * @param i string offset + * @param c output UChar32 variable + * @see U16_PREV + * @stable ICU 2.4 + */ +#define U16_PREV_UNSAFE(s, i, c) { \ + (c)=(s)[--(i)]; \ + if(U16_IS_TRAIL(c)) { \ + (c)=U16_GET_SUPPLEMENTARY((s)[--(i)], (c)); \ + } \ +} + +/** + * Move the string offset from one code point boundary to the previous one + * and get the code point between them. + * (Pre-decrementing backward iteration.) + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * The input offset may be the same as the string length. + * If the offset is behind a trail surrogate unit + * for a supplementary code point, then the macro will read + * the preceding lead surrogate as well. + * If the offset is behind a lead surrogate or behind a single, unpaired + * trail surrogate, then that itself + * will be returned as the code point. + * + * @param s const UChar * string + * @param start starting string offset (usually 0) + * @param i string offset, must be start<i + * @param c output UChar32 variable + * @see U16_PREV_UNSAFE + * @stable ICU 2.4 + */ +#define U16_PREV(s, start, i, c) { \ + (c)=(s)[--(i)]; \ + if(U16_IS_TRAIL(c)) { \ + uint16_t __c2; \ + if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ + --(i); \ + (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ + } \ + } \ +} + +/** + * Move the string offset from one code point boundary to the previous one. + * (Pre-decrementing backward iteration.) + * The input offset may be the same as the string length. + * "Unsafe" macro, assumes well-formed UTF-16. + * + * @param s const UChar * string + * @param i string offset + * @see U16_BACK_1 + * @stable ICU 2.4 + */ +#define U16_BACK_1_UNSAFE(s, i) { \ + if(U16_IS_TRAIL((s)[--(i)])) { \ + --(i); \ + } \ +} + +/** + * Move the string offset from one code point boundary to the previous one. + * (Pre-decrementing backward iteration.) + * The input offset may be the same as the string length. + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * @param s const UChar * string + * @param start starting string offset (usually 0) + * @param i string offset, must be start<i + * @see U16_BACK_1_UNSAFE + * @stable ICU 2.4 + */ +#define U16_BACK_1(s, start, i) { \ + if(U16_IS_TRAIL((s)[--(i)]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \ + --(i); \ + } \ +} + +/** + * Move the string offset from one code point boundary to the n-th one before it, + * i.e., move backward by n code points. + * (Pre-decrementing backward iteration.) + * The input offset may be the same as the string length. + * "Unsafe" macro, assumes well-formed UTF-16. + * + * @param s const UChar * string + * @param i string offset + * @param n number of code points to skip + * @see U16_BACK_N + * @stable ICU 2.4 + */ +#define U16_BACK_N_UNSAFE(s, i, n) { \ + int32_t __N=(n); \ + while(__N>0) { \ + U16_BACK_1_UNSAFE(s, i); \ + --__N; \ + } \ +} + +/** + * Move the string offset from one code point boundary to the n-th one before it, + * i.e., move backward by n code points. + * (Pre-decrementing backward iteration.) + * The input offset may be the same as the string length. + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * @param s const UChar * string + * @param start start of string + * @param i string offset, must be start<i + * @param n number of code points to skip + * @see U16_BACK_N_UNSAFE + * @stable ICU 2.4 + */ +#define U16_BACK_N(s, start, i, n) { \ + int32_t __N=(n); \ + while(__N>0 && (i)>(start)) { \ + U16_BACK_1(s, start, i); \ + --__N; \ + } \ +} + +/** + * Adjust a random-access offset to a code point boundary after a code point. + * If the offset is behind the lead surrogate of a surrogate pair, + * then the offset is incremented. + * Otherwise, it is not modified. + * The input offset may be the same as the string length. + * "Unsafe" macro, assumes well-formed UTF-16. + * + * @param s const UChar * string + * @param i string offset + * @see U16_SET_CP_LIMIT + * @stable ICU 2.4 + */ +#define U16_SET_CP_LIMIT_UNSAFE(s, i) { \ + if(U16_IS_LEAD((s)[(i)-1])) { \ + ++(i); \ + } \ +} + +/** + * Adjust a random-access offset to a code point boundary after a code point. + * If the offset is behind the lead surrogate of a surrogate pair, + * then the offset is incremented. + * Otherwise, it is not modified. + * The input offset may be the same as the string length. + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * The length can be negative for a NUL-terminated string. + * + * @param s const UChar * string + * @param start int32_t starting string offset (usually 0) + * @param i int32_t string offset, start<=i<=length + * @param length int32_t string length + * @see U16_SET_CP_LIMIT_UNSAFE + * @stable ICU 2.4 + */ +#define U16_SET_CP_LIMIT(s, start, i, length) { \ + if((start)<(i) && ((i)<(length) || (length)<0) && U16_IS_LEAD((s)[(i)-1]) && U16_IS_TRAIL((s)[i])) { \ + ++(i); \ + } \ +} + +#endif diff --git a/Source/WTF/icu/unicode/utf8.h b/Source/WTF/icu/unicode/utf8.h new file mode 100644 index 000000000..21e5f3d04 --- /dev/null +++ b/Source/WTF/icu/unicode/utf8.h @@ -0,0 +1,830 @@ +/* +******************************************************************************* +* +* Copyright (C) 1999-2013, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: utf8.h +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 1999sep13 +* created by: Markus W. Scherer +*/ + +/** + * \file + * \brief C API: 8-bit Unicode handling macros + * + * This file defines macros to deal with 8-bit Unicode (UTF-8) code units (bytes) and strings. + * + * For more information see utf.h and the ICU User Guide Strings chapter + * (http://userguide.icu-project.org/strings). + * + * <em>Usage:</em> + * ICU coding guidelines for if() statements should be followed when using these macros. + * Compound statements (curly braces {}) must be used for if-else-while... + * bodies and all macro statements should be terminated with semicolon. + */ + +#ifndef __UTF8_H__ +#define __UTF8_H__ + +#include "unicode/umachine.h" +#ifndef __UTF_H__ +# include "unicode/utf.h" +#endif + +/* internal definitions ----------------------------------------------------- */ + +/** + * \var utf8_countTrailBytes + * Internal array with numbers of trail bytes for any given byte used in + * lead byte position. + * + * This is internal since it is not meant to be called directly by external clients; + * however it is called by public macros in this file and thus must remain stable, + * and should not be hidden when other internal functions are hidden (otherwise + * public macros would fail to compile). + * @internal + */ +#ifdef U_UTF8_IMPL +U_EXPORT const uint8_t +#elif defined(U_STATIC_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) +U_CFUNC const uint8_t +#else +U_CFUNC U_IMPORT const uint8_t /* U_IMPORT2? */ /*U_IMPORT*/ +#endif +utf8_countTrailBytes[256]; + +/** + * Counts the trail bytes for a UTF-8 lead byte. + * Returns 0 for 0..0xbf as well as for 0xfe and 0xff. + * + * This is internal since it is not meant to be called directly by external clients; + * however it is called by public macros in this file and thus must remain stable. + * + * Note: Beginning with ICU 50, the implementation uses a multi-condition expression + * which was shown in 2012 (on x86-64) to compile to fast, branch-free code. + * leadByte is evaluated multiple times. + * + * The pre-ICU 50 implementation used the exported array utf8_countTrailBytes: + * #define U8_COUNT_TRAIL_BYTES(leadByte) (utf8_countTrailBytes[leadByte]) + * leadByte was evaluated exactly once. + * + * @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff. + * @internal + */ +#define U8_COUNT_TRAIL_BYTES(leadByte) \ + ((leadByte)<0xf0 ? \ + ((leadByte)>=0xc0)+((leadByte)>=0xe0) : \ + (leadByte)<0xfe ? 3+((leadByte)>=0xf8)+((leadByte)>=0xfc) : 0) + +/** + * Counts the trail bytes for a UTF-8 lead byte of a valid UTF-8 sequence. + * The maximum supported lead byte is 0xf4 corresponding to U+10FFFF. + * leadByte might be evaluated multiple times. + * + * This is internal since it is not meant to be called directly by external clients; + * however it is called by public macros in this file and thus must remain stable. + * + * @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff. + * @internal + */ +#define U8_COUNT_TRAIL_BYTES_UNSAFE(leadByte) \ + (((leadByte)>=0xc0)+((leadByte)>=0xe0)+((leadByte)>=0xf0)) + +/** + * Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value. + * + * This is internal since it is not meant to be called directly by external clients; + * however it is called by public macros in this file and thus must remain stable. + * @internal + */ +#define U8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1) + +/** + * Function for handling "next code point" with error-checking. + * + * This is internal since it is not meant to be called directly by external clients; + * however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this + * file and thus must remain stable, and should not be hidden when other internal + * functions are hidden (otherwise public macros would fail to compile). + * @internal + */ +U_STABLE UChar32 U_EXPORT2 +utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict); + +/** + * Function for handling "append code point" with error-checking. + * + * This is internal since it is not meant to be called directly by external clients; + * however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this + * file and thus must remain stable, and should not be hidden when other internal + * functions are hidden (otherwise public macros would fail to compile). + * @internal + */ +U_STABLE int32_t U_EXPORT2 +utf8_appendCharSafeBody(uint8_t *s, int32_t i, int32_t length, UChar32 c, UBool *pIsError); + +/** + * Function for handling "previous code point" with error-checking. + * + * This is internal since it is not meant to be called directly by external clients; + * however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this + * file and thus must remain stable, and should not be hidden when other internal + * functions are hidden (otherwise public macros would fail to compile). + * @internal + */ +U_STABLE UChar32 U_EXPORT2 +utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, UBool strict); + +/** + * Function for handling "skip backward one code point" with error-checking. + * + * This is internal since it is not meant to be called directly by external clients; + * however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this + * file and thus must remain stable, and should not be hidden when other internal + * functions are hidden (otherwise public macros would fail to compile). + * @internal + */ +U_STABLE int32_t U_EXPORT2 +utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); + +/* single-code point definitions -------------------------------------------- */ + +/** + * Does this code unit (byte) encode a code point by itself (US-ASCII 0..0x7f)? + * @param c 8-bit code unit (byte) + * @return TRUE or FALSE + * @stable ICU 2.4 + */ +#define U8_IS_SINGLE(c) (((c)&0x80)==0) + +/** + * Is this code unit (byte) a UTF-8 lead byte? + * @param c 8-bit code unit (byte) + * @return TRUE or FALSE + * @stable ICU 2.4 + */ +#define U8_IS_LEAD(c) ((uint8_t)((c)-0xc0)<0x3e) + +/** + * Is this code unit (byte) a UTF-8 trail byte? + * @param c 8-bit code unit (byte) + * @return TRUE or FALSE + * @stable ICU 2.4 + */ +#define U8_IS_TRAIL(c) (((c)&0xc0)==0x80) + +/** + * How many code units (bytes) are used for the UTF-8 encoding + * of this Unicode code point? + * @param c 32-bit code point + * @return 1..4, or 0 if c is a surrogate or not a Unicode code point + * @stable ICU 2.4 + */ +#define U8_LENGTH(c) \ + ((uint32_t)(c)<=0x7f ? 1 : \ + ((uint32_t)(c)<=0x7ff ? 2 : \ + ((uint32_t)(c)<=0xd7ff ? 3 : \ + ((uint32_t)(c)<=0xdfff || (uint32_t)(c)>0x10ffff ? 0 : \ + ((uint32_t)(c)<=0xffff ? 3 : 4)\ + ) \ + ) \ + ) \ + ) + +/** + * The maximum number of UTF-8 code units (bytes) per Unicode code point (U+0000..U+10ffff). + * @return 4 + * @stable ICU 2.4 + */ +#define U8_MAX_LENGTH 4 + +/** + * Get a code point from a string at a random-access offset, + * without changing the offset. + * The offset may point to either the lead byte or one of the trail bytes + * for a code point, in which case the macro will read all of the bytes + * for the code point. + * The result is undefined if the offset points to an illegal UTF-8 + * byte sequence. + * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT. + * + * @param s const uint8_t * string + * @param i string offset + * @param c output UChar32 variable + * @see U8_GET + * @stable ICU 2.4 + */ +#define U8_GET_UNSAFE(s, i, c) { \ + int32_t _u8_get_unsafe_index=(int32_t)(i); \ + U8_SET_CP_START_UNSAFE(s, _u8_get_unsafe_index); \ + U8_NEXT_UNSAFE(s, _u8_get_unsafe_index, c); \ +} + +/** + * Get a code point from a string at a random-access offset, + * without changing the offset. + * The offset may point to either the lead byte or one of the trail bytes + * for a code point, in which case the macro will read all of the bytes + * for the code point. + * + * The length can be negative for a NUL-terminated string. + * + * If the offset points to an illegal UTF-8 byte sequence, then + * c is set to a negative value. + * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT. + * + * @param s const uint8_t * string + * @param start int32_t starting string offset + * @param i int32_t string offset, must be start<=i<length + * @param length int32_t string length + * @param c output UChar32 variable, set to <0 in case of an error + * @see U8_GET_UNSAFE + * @stable ICU 2.4 + */ +#define U8_GET(s, start, i, length, c) { \ + int32_t _u8_get_index=(i); \ + U8_SET_CP_START(s, start, _u8_get_index); \ + U8_NEXT(s, _u8_get_index, length, c); \ +} + +#ifndef U_HIDE_DRAFT_API +/** + * Get a code point from a string at a random-access offset, + * without changing the offset. + * The offset may point to either the lead byte or one of the trail bytes + * for a code point, in which case the macro will read all of the bytes + * for the code point. + * + * The length can be negative for a NUL-terminated string. + * + * If the offset points to an illegal UTF-8 byte sequence, then + * c is set to U+FFFD. + * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT_OR_FFFD. + * + * This macro does not distinguish between a real U+FFFD in the text + * and U+FFFD returned for an ill-formed sequence. + * Use U8_GET() if that distinction is important. + * + * @param s const uint8_t * string + * @param start int32_t starting string offset + * @param i int32_t string offset, must be start<=i<length + * @param length int32_t string length + * @param c output UChar32 variable, set to U+FFFD in case of an error + * @see U8_GET + * @draft ICU 51 + */ +#define U8_GET_OR_FFFD(s, start, i, length, c) { \ + int32_t _u8_get_index=(i); \ + U8_SET_CP_START(s, start, _u8_get_index); \ + U8_NEXT_OR_FFFD(s, _u8_get_index, length, c); \ +} +#endif /* U_HIDE_DRAFT_API */ + +/* definitions with forward iteration --------------------------------------- */ + +/** + * Get a code point from a string at a code point boundary offset, + * and advance the offset to the next code point boundary. + * (Post-incrementing forward iteration.) + * "Unsafe" macro, assumes well-formed UTF-8. + * + * The offset may point to the lead byte of a multi-byte sequence, + * in which case the macro will read the whole sequence. + * The result is undefined if the offset points to a trail byte + * or an illegal UTF-8 sequence. + * + * @param s const uint8_t * string + * @param i string offset + * @param c output UChar32 variable + * @see U8_NEXT + * @stable ICU 2.4 + */ +#define U8_NEXT_UNSAFE(s, i, c) { \ + (c)=(uint8_t)(s)[(i)++]; \ + if((c)>=0x80) { \ + if((c)<0xe0) { \ + (c)=(((c)&0x1f)<<6)|((s)[(i)++]&0x3f); \ + } else if((c)<0xf0) { \ + /* no need for (c&0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ \ + (c)=(UChar)(((c)<<12)|(((s)[i]&0x3f)<<6)|((s)[(i)+1]&0x3f)); \ + (i)+=2; \ + } else { \ + (c)=(((c)&7)<<18)|(((s)[i]&0x3f)<<12)|(((s)[(i)+1]&0x3f)<<6)|((s)[(i)+2]&0x3f); \ + (i)+=3; \ + } \ + } \ +} + +/** + * Get a code point from a string at a code point boundary offset, + * and advance the offset to the next code point boundary. + * (Post-incrementing forward iteration.) + * "Safe" macro, checks for illegal sequences and for string boundaries. + * + * The length can be negative for a NUL-terminated string. + * + * The offset may point to the lead byte of a multi-byte sequence, + * in which case the macro will read the whole sequence. + * If the offset points to a trail byte or an illegal UTF-8 sequence, then + * c is set to a negative value. + * + * @param s const uint8_t * string + * @param i int32_t string offset, must be i<length + * @param length int32_t string length + * @param c output UChar32 variable, set to <0 in case of an error + * @see U8_NEXT_UNSAFE + * @stable ICU 2.4 + */ +#define U8_NEXT(s, i, length, c) { \ + (c)=(uint8_t)(s)[(i)++]; \ + if((c)>=0x80) { \ + uint8_t __t1, __t2; \ + if( /* handle U+1000..U+CFFF inline */ \ + (0xe0<(c) && (c)<=0xec) && \ + (((i)+1)<(length) || (length)<0) && \ + (__t1=(uint8_t)((s)[i]-0x80))<=0x3f && \ + (__t2=(uint8_t)((s)[(i)+1]-0x80))<= 0x3f \ + ) { \ + /* no need for (c&0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ \ + (c)=(UChar)(((c)<<12)|(__t1<<6)|__t2); \ + (i)+=2; \ + } else if( /* handle U+0080..U+07FF inline */ \ + ((c)<0xe0 && (c)>=0xc2) && \ + ((i)!=(length)) && \ + (__t1=(uint8_t)((s)[i]-0x80))<=0x3f \ + ) { \ + (c)=(((c)&0x1f)<<6)|__t1; \ + ++(i); \ + } else { \ + /* function call for "complicated" and error cases */ \ + (c)=utf8_nextCharSafeBody((const uint8_t *)s, &(i), (length), c, -1); \ + } \ + } \ +} + +#ifndef U_HIDE_DRAFT_API +/** + * Get a code point from a string at a code point boundary offset, + * and advance the offset to the next code point boundary. + * (Post-incrementing forward iteration.) + * "Safe" macro, checks for illegal sequences and for string boundaries. + * + * The length can be negative for a NUL-terminated string. + * + * The offset may point to the lead byte of a multi-byte sequence, + * in which case the macro will read the whole sequence. + * If the offset points to a trail byte or an illegal UTF-8 sequence, then + * c is set to U+FFFD. + * + * This macro does not distinguish between a real U+FFFD in the text + * and U+FFFD returned for an ill-formed sequence. + * Use U8_NEXT() if that distinction is important. + * + * @param s const uint8_t * string + * @param i int32_t string offset, must be i<length + * @param length int32_t string length + * @param c output UChar32 variable, set to U+FFFD in case of an error + * @see U8_NEXT + * @draft ICU 51 + */ +#define U8_NEXT_OR_FFFD(s, i, length, c) { \ + (c)=(uint8_t)(s)[(i)++]; \ + if((c)>=0x80) { \ + uint8_t __t1, __t2; \ + if( /* handle U+1000..U+CFFF inline */ \ + (0xe0<(c) && (c)<=0xec) && \ + (((i)+1)<(length) || (length)<0) && \ + (__t1=(uint8_t)((s)[i]-0x80))<=0x3f && \ + (__t2=(uint8_t)((s)[(i)+1]-0x80))<= 0x3f \ + ) { \ + /* no need for (c&0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ \ + (c)=(UChar)(((c)<<12)|(__t1<<6)|__t2); \ + (i)+=2; \ + } else if( /* handle U+0080..U+07FF inline */ \ + ((c)<0xe0 && (c)>=0xc2) && \ + ((i)!=(length)) && \ + (__t1=(uint8_t)((s)[i]-0x80))<=0x3f \ + ) { \ + (c)=(((c)&0x1f)<<6)|__t1; \ + ++(i); \ + } else { \ + /* function call for "complicated" and error cases */ \ + (c)=utf8_nextCharSafeBody((const uint8_t *)s, &(i), (length), c, -3); \ + } \ + } \ +} +#endif /* U_HIDE_DRAFT_API */ + +/** + * Append a code point to a string, overwriting 1 to 4 bytes. + * The offset points to the current end of the string contents + * and is advanced (post-increment). + * "Unsafe" macro, assumes a valid code point and sufficient space in the string. + * Otherwise, the result is undefined. + * + * @param s const uint8_t * string buffer + * @param i string offset + * @param c code point to append + * @see U8_APPEND + * @stable ICU 2.4 + */ +#define U8_APPEND_UNSAFE(s, i, c) { \ + if((uint32_t)(c)<=0x7f) { \ + (s)[(i)++]=(uint8_t)(c); \ + } else { \ + if((uint32_t)(c)<=0x7ff) { \ + (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \ + } else { \ + if((uint32_t)(c)<=0xffff) { \ + (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \ + } else { \ + (s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); \ + (s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); \ + } \ + (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \ + } \ + (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \ + } \ +} + +/** + * Append a code point to a string, overwriting 1 to 4 bytes. + * The offset points to the current end of the string contents + * and is advanced (post-increment). + * "Safe" macro, checks for a valid code point. + * If a non-ASCII code point is written, checks for sufficient space in the string. + * If the code point is not valid or trail bytes do not fit, + * then isError is set to TRUE. + * + * @param s const uint8_t * string buffer + * @param i int32_t string offset, must be i<capacity + * @param capacity int32_t size of the string buffer + * @param c UChar32 code point to append + * @param isError output UBool set to TRUE if an error occurs, otherwise not modified + * @see U8_APPEND_UNSAFE + * @stable ICU 2.4 + */ +#define U8_APPEND(s, i, capacity, c, isError) { \ + if((uint32_t)(c)<=0x7f) { \ + (s)[(i)++]=(uint8_t)(c); \ + } else if((uint32_t)(c)<=0x7ff && (i)+1<(capacity)) { \ + (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \ + (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \ + } else if((uint32_t)(c)<=0xd7ff && (i)+2<(capacity)) { \ + (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \ + (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \ + (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \ + } else { \ + (i)=utf8_appendCharSafeBody(s, (i), (capacity), c, &(isError)); \ + } \ +} + +/** + * Advance the string offset from one code point boundary to the next. + * (Post-incrementing iteration.) + * "Unsafe" macro, assumes well-formed UTF-8. + * + * @param s const uint8_t * string + * @param i string offset + * @see U8_FWD_1 + * @stable ICU 2.4 + */ +#define U8_FWD_1_UNSAFE(s, i) { \ + (i)+=1+U8_COUNT_TRAIL_BYTES_UNSAFE((uint8_t)(s)[i]); \ +} + +/** + * Advance the string offset from one code point boundary to the next. + * (Post-incrementing iteration.) + * "Safe" macro, checks for illegal sequences and for string boundaries. + * + * The length can be negative for a NUL-terminated string. + * + * @param s const uint8_t * string + * @param i int32_t string offset, must be i<length + * @param length int32_t string length + * @see U8_FWD_1_UNSAFE + * @stable ICU 2.4 + */ +#define U8_FWD_1(s, i, length) { \ + uint8_t __b=(uint8_t)(s)[(i)++]; \ + if(U8_IS_LEAD(__b)) { \ + uint8_t __count=U8_COUNT_TRAIL_BYTES(__b); \ + if((i)+__count>(length) && (length)>=0) { \ + __count=(uint8_t)((length)-(i)); \ + } \ + while(__count>0 && U8_IS_TRAIL((s)[i])) { \ + ++(i); \ + --__count; \ + } \ + } \ +} + +/** + * Advance the string offset from one code point boundary to the n-th next one, + * i.e., move forward by n code points. + * (Post-incrementing iteration.) + * "Unsafe" macro, assumes well-formed UTF-8. + * + * @param s const uint8_t * string + * @param i string offset + * @param n number of code points to skip + * @see U8_FWD_N + * @stable ICU 2.4 + */ +#define U8_FWD_N_UNSAFE(s, i, n) { \ + int32_t __N=(n); \ + while(__N>0) { \ + U8_FWD_1_UNSAFE(s, i); \ + --__N; \ + } \ +} + +/** + * Advance the string offset from one code point boundary to the n-th next one, + * i.e., move forward by n code points. + * (Post-incrementing iteration.) + * "Safe" macro, checks for illegal sequences and for string boundaries. + * + * The length can be negative for a NUL-terminated string. + * + * @param s const uint8_t * string + * @param i int32_t string offset, must be i<length + * @param length int32_t string length + * @param n number of code points to skip + * @see U8_FWD_N_UNSAFE + * @stable ICU 2.4 + */ +#define U8_FWD_N(s, i, length, n) { \ + int32_t __N=(n); \ + while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \ + U8_FWD_1(s, i, length); \ + --__N; \ + } \ +} + +/** + * Adjust a random-access offset to a code point boundary + * at the start of a code point. + * If the offset points to a UTF-8 trail byte, + * then the offset is moved backward to the corresponding lead byte. + * Otherwise, it is not modified. + * "Unsafe" macro, assumes well-formed UTF-8. + * + * @param s const uint8_t * string + * @param i string offset + * @see U8_SET_CP_START + * @stable ICU 2.4 + */ +#define U8_SET_CP_START_UNSAFE(s, i) { \ + while(U8_IS_TRAIL((s)[i])) { --(i); } \ +} + +/** + * Adjust a random-access offset to a code point boundary + * at the start of a code point. + * If the offset points to a UTF-8 trail byte, + * then the offset is moved backward to the corresponding lead byte. + * Otherwise, it is not modified. + * "Safe" macro, checks for illegal sequences and for string boundaries. + * + * @param s const uint8_t * string + * @param start int32_t starting string offset (usually 0) + * @param i int32_t string offset, must be start<=i + * @see U8_SET_CP_START_UNSAFE + * @stable ICU 2.4 + */ +#define U8_SET_CP_START(s, start, i) { \ + if(U8_IS_TRAIL((s)[(i)])) { \ + (i)=utf8_back1SafeBody(s, start, (i)); \ + } \ +} + +/* definitions with backward iteration -------------------------------------- */ + +/** + * Move the string offset from one code point boundary to the previous one + * and get the code point between them. + * (Pre-decrementing backward iteration.) + * "Unsafe" macro, assumes well-formed UTF-8. + * + * The input offset may be the same as the string length. + * If the offset is behind a multi-byte sequence, then the macro will read + * the whole sequence. + * If the offset is behind a lead byte, then that itself + * will be returned as the code point. + * The result is undefined if the offset is behind an illegal UTF-8 sequence. + * + * @param s const uint8_t * string + * @param i string offset + * @param c output UChar32 variable + * @see U8_PREV + * @stable ICU 2.4 + */ +#define U8_PREV_UNSAFE(s, i, c) { \ + (c)=(uint8_t)(s)[--(i)]; \ + if(U8_IS_TRAIL(c)) { \ + uint8_t __b, __count=1, __shift=6; \ +\ + /* c is a trail byte */ \ + (c)&=0x3f; \ + for(;;) { \ + __b=(uint8_t)(s)[--(i)]; \ + if(__b>=0xc0) { \ + U8_MASK_LEAD_BYTE(__b, __count); \ + (c)|=(UChar32)__b<<__shift; \ + break; \ + } else { \ + (c)|=(UChar32)(__b&0x3f)<<__shift; \ + ++__count; \ + __shift+=6; \ + } \ + } \ + } \ +} + +/** + * Move the string offset from one code point boundary to the previous one + * and get the code point between them. + * (Pre-decrementing backward iteration.) + * "Safe" macro, checks for illegal sequences and for string boundaries. + * + * The input offset may be the same as the string length. + * If the offset is behind a multi-byte sequence, then the macro will read + * the whole sequence. + * If the offset is behind a lead byte, then that itself + * will be returned as the code point. + * If the offset is behind an illegal UTF-8 sequence, then c is set to a negative value. + * + * @param s const uint8_t * string + * @param start int32_t starting string offset (usually 0) + * @param i int32_t string offset, must be start<i + * @param c output UChar32 variable, set to <0 in case of an error + * @see U8_PREV_UNSAFE + * @stable ICU 2.4 + */ +#define U8_PREV(s, start, i, c) { \ + (c)=(uint8_t)(s)[--(i)]; \ + if((c)>=0x80) { \ + (c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -1); \ + } \ +} + +#ifndef U_HIDE_DRAFT_API +/** + * Move the string offset from one code point boundary to the previous one + * and get the code point between them. + * (Pre-decrementing backward iteration.) + * "Safe" macro, checks for illegal sequences and for string boundaries. + * + * The input offset may be the same as the string length. + * If the offset is behind a multi-byte sequence, then the macro will read + * the whole sequence. + * If the offset is behind a lead byte, then that itself + * will be returned as the code point. + * If the offset is behind an illegal UTF-8 sequence, then c is set to U+FFFD. + * + * This macro does not distinguish between a real U+FFFD in the text + * and U+FFFD returned for an ill-formed sequence. + * Use U8_PREV() if that distinction is important. + * + * @param s const uint8_t * string + * @param start int32_t starting string offset (usually 0) + * @param i int32_t string offset, must be start<i + * @param c output UChar32 variable, set to U+FFFD in case of an error + * @see U8_PREV + * @draft ICU 51 + */ +#define U8_PREV_OR_FFFD(s, start, i, c) { \ + (c)=(uint8_t)(s)[--(i)]; \ + if((c)>=0x80) { \ + (c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -3); \ + } \ +} +#endif /* U_HIDE_DRAFT_API */ + +/** + * Move the string offset from one code point boundary to the previous one. + * (Pre-decrementing backward iteration.) + * The input offset may be the same as the string length. + * "Unsafe" macro, assumes well-formed UTF-8. + * + * @param s const uint8_t * string + * @param i string offset + * @see U8_BACK_1 + * @stable ICU 2.4 + */ +#define U8_BACK_1_UNSAFE(s, i) { \ + while(U8_IS_TRAIL((s)[--(i)])) {} \ +} + +/** + * Move the string offset from one code point boundary to the previous one. + * (Pre-decrementing backward iteration.) + * The input offset may be the same as the string length. + * "Safe" macro, checks for illegal sequences and for string boundaries. + * + * @param s const uint8_t * string + * @param start int32_t starting string offset (usually 0) + * @param i int32_t string offset, must be start<i + * @see U8_BACK_1_UNSAFE + * @stable ICU 2.4 + */ +#define U8_BACK_1(s, start, i) { \ + if(U8_IS_TRAIL((s)[--(i)])) { \ + (i)=utf8_back1SafeBody(s, start, (i)); \ + } \ +} + +/** + * Move the string offset from one code point boundary to the n-th one before it, + * i.e., move backward by n code points. + * (Pre-decrementing backward iteration.) + * The input offset may be the same as the string length. + * "Unsafe" macro, assumes well-formed UTF-8. + * + * @param s const uint8_t * string + * @param i string offset + * @param n number of code points to skip + * @see U8_BACK_N + * @stable ICU 2.4 + */ +#define U8_BACK_N_UNSAFE(s, i, n) { \ + int32_t __N=(n); \ + while(__N>0) { \ + U8_BACK_1_UNSAFE(s, i); \ + --__N; \ + } \ +} + +/** + * Move the string offset from one code point boundary to the n-th one before it, + * i.e., move backward by n code points. + * (Pre-decrementing backward iteration.) + * The input offset may be the same as the string length. + * "Safe" macro, checks for illegal sequences and for string boundaries. + * + * @param s const uint8_t * string + * @param start int32_t index of the start of the string + * @param i int32_t string offset, must be start<i + * @param n number of code points to skip + * @see U8_BACK_N_UNSAFE + * @stable ICU 2.4 + */ +#define U8_BACK_N(s, start, i, n) { \ + int32_t __N=(n); \ + while(__N>0 && (i)>(start)) { \ + U8_BACK_1(s, start, i); \ + --__N; \ + } \ +} + +/** + * Adjust a random-access offset to a code point boundary after a code point. + * If the offset is behind a partial multi-byte sequence, + * then the offset is incremented to behind the whole sequence. + * Otherwise, it is not modified. + * The input offset may be the same as the string length. + * "Unsafe" macro, assumes well-formed UTF-8. + * + * @param s const uint8_t * string + * @param i string offset + * @see U8_SET_CP_LIMIT + * @stable ICU 2.4 + */ +#define U8_SET_CP_LIMIT_UNSAFE(s, i) { \ + U8_BACK_1_UNSAFE(s, i); \ + U8_FWD_1_UNSAFE(s, i); \ +} + +/** + * Adjust a random-access offset to a code point boundary after a code point. + * If the offset is behind a partial multi-byte sequence, + * then the offset is incremented to behind the whole sequence. + * Otherwise, it is not modified. + * The input offset may be the same as the string length. + * "Safe" macro, checks for illegal sequences and for string boundaries. + * + * The length can be negative for a NUL-terminated string. + * + * @param s const uint8_t * string + * @param start int32_t starting string offset (usually 0) + * @param i int32_t string offset, must be start<=i<=length + * @param length int32_t string length + * @see U8_SET_CP_LIMIT_UNSAFE + * @stable ICU 2.4 + */ +#define U8_SET_CP_LIMIT(s, start, i, length) { \ + if((start)<(i) && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \ + U8_BACK_1(s, start, i); \ + U8_FWD_1(s, i, length); \ + } \ +} + +#endif diff --git a/Source/WTF/icu/unicode/utf_old.h b/Source/WTF/icu/unicode/utf_old.h new file mode 100644 index 000000000..f9125b1dd --- /dev/null +++ b/Source/WTF/icu/unicode/utf_old.h @@ -0,0 +1,1169 @@ +/* +******************************************************************************* +* +* Copyright (C) 2002-2012, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: utf_old.h +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2002sep21 +* created by: Markus W. Scherer +*/ + +/** + * \file + * \brief C API: Deprecated macros for Unicode string handling + */ + +/** + * + * The macros in utf_old.h are all deprecated and their use discouraged. + * Some of the design principles behind the set of UTF macros + * have changed or proved impractical. + * Almost all of the old "UTF macros" are at least renamed. + * If you are looking for a new equivalent to an old macro, please see the + * comment at the old one. + * + * Brief summary of reasons for deprecation: + * - Switch on UTF_SIZE (selection of UTF-8/16/32 default string processing) + * was impractical. + * - Switch on UTF_SAFE etc. (selection of unsafe/safe/strict default string processing) + * was of little use and impractical. + * - Whole classes of macros became obsolete outside of the UTF_SIZE/UTF_SAFE + * selection framework: UTF32_ macros (all trivial) + * and UTF_ default and intermediate macros (all aliases). + * - The selection framework also caused many macro aliases. + * - Change in Unicode standard: "irregular" sequences (3.0) became illegal (3.2). + * - Change of language in Unicode standard: + * Growing distinction between internal x-bit Unicode strings and external UTF-x + * forms, with the former more lenient. + * Suggests renaming of UTF16_ macros to U16_. + * - The prefix "UTF_" without a width number confused some users. + * - "Safe" append macros needed the addition of an error indicator output. + * - "Safe" UTF-8 macros used legitimate (if rarely used) code point values + * to indicate error conditions. + * - The use of the "_CHAR" infix for code point operations confused some users. + * + * More details: + * + * Until ICU 2.2, utf.h theoretically allowed to choose among UTF-8/16/32 + * for string processing, and among unsafe/safe/strict default macros for that. + * + * It proved nearly impossible to write non-trivial, high-performance code + * that is UTF-generic. + * Unsafe default macros would be dangerous for default string processing, + * and the main reason for the "strict" versions disappeared: + * Between Unicode 3.0 and 3.2 all "irregular" UTF-8 sequences became illegal. + * The only other conditions that "strict" checked for were non-characters, + * which are valid during processing. Only during text input/output should they + * be checked, and at that time other well-formedness checks may be + * necessary or useful as well. + * This can still be done by using U16_NEXT and U_IS_UNICODE_NONCHAR + * or U_IS_UNICODE_CHAR. + * + * The old UTF8_..._SAFE macros also used some normal Unicode code points + * to indicate malformed sequences. + * The new UTF8_ macros without suffix use negative values instead. + * + * The entire contents of utf32.h was moved here without replacement + * because all those macros were trivial and + * were meaningful only in the framework of choosing the UTF size. + * + * See Jitterbug 2150 and its discussion on the ICU mailing list + * in September 2002. + * + * <hr> + * + * <em>Obsolete part</em> of pre-ICU 2.4 utf.h file documentation: + * + * <p>The original concept for these files was for ICU to allow + * in principle to set which UTF (UTF-8/16/32) is used internally + * by defining UTF_SIZE to either 8, 16, or 32. utf.h would then define the UChar type + * accordingly. UTF-16 was the default.</p> + * + * <p>This concept has been abandoned. + * A lot of the ICU source code assumes UChar strings are in UTF-16. + * This is especially true for low-level code like + * conversion, normalization, and collation. + * The utf.h header enforces the default of UTF-16. + * The UTF-8 and UTF-32 macros remain for now for completeness and backward compatibility.</p> + * + * <p>Accordingly, utf.h defines UChar to be an unsigned 16-bit integer. If this matches wchar_t, then + * UChar is defined to be exactly wchar_t, otherwise uint16_t.</p> + * + * <p>UChar32 is defined to be a signed 32-bit integer (int32_t), large enough for a 21-bit + * Unicode code point (Unicode scalar value, 0..0x10ffff). + * Before ICU 2.4, the definition of UChar32 was similarly platform-dependent as + * the definition of UChar. For details see the documentation for UChar32 itself.</p> + * + * <p>utf.h also defines a number of C macros for handling single Unicode code points and + * for using UTF Unicode strings. It includes utf8.h, utf16.h, and utf32.h for the actual + * implementations of those macros and then aliases one set of them (for UTF-16) for general use. + * The UTF-specific macros have the UTF size in the macro name prefixes (UTF16_...), while + * the general alias macros always begin with UTF_...</p> + * + * <p>Many string operations can be done with or without error checking. + * Where such a distinction is useful, there are two versions of the macros, "unsafe" and "safe" + * ones with ..._UNSAFE and ..._SAFE suffixes. The unsafe macros are fast but may cause + * program failures if the strings are not well-formed. The safe macros have an additional, boolean + * parameter "strict". If strict is FALSE, then only illegal sequences are detected. + * Otherwise, irregular sequences and non-characters are detected as well (like single surrogates). + * Safe macros return special error code points for illegal/irregular sequences: + * Typically, U+ffff, or values that would result in a code unit sequence of the same length + * as the erroneous input sequence.<br> + * Note that _UNSAFE macros have fewer parameters: They do not have the strictness parameter, and + * they do not have start/length parameters for boundary checking.</p> + * + * <p>Here, the macros are aliased in two steps: + * In the first step, the UTF-specific macros with UTF16_ prefix and _UNSAFE and _SAFE suffixes are + * aliased according to the UTF_SIZE to macros with UTF_ prefix and the same suffixes and signatures. + * Then, in a second step, the default, general alias macros are set to use either the unsafe or + * the safe/not strict (default) or the safe/strict macro; + * these general macros do not have a strictness parameter.</p> + * + * <p>It is possible to change the default choice for the general alias macros to be unsafe, safe/not strict or safe/strict. + * The default is safe/not strict. It is not recommended to select the unsafe macros as the basis for + * Unicode string handling in ICU! To select this, define UTF_SAFE, UTF_STRICT, or UTF_UNSAFE.</p> + * + * <p>For general use, one should use the default, general macros with UTF_ prefix and no _SAFE/_UNSAFE suffix. + * Only in some cases it may be necessary to control the choice of macro directly and use a less generic alias. + * For example, if it can be assumed that a string is well-formed and the index will stay within the bounds, + * then the _UNSAFE version may be used. + * If a UTF-8 string is to be processed, then the macros with UTF8_ prefixes need to be used.</p> + * + * <hr> + * + * @deprecated ICU 2.4. Use the macros in utf.h, utf16.h, utf8.h instead. + */ + +#ifndef __UTF_OLD_H__ +#define __UTF_OLD_H__ + +#ifndef U_HIDE_DEPRECATED_API + +#include "unicode/utf.h" +#include "unicode/utf8.h" +#include "unicode/utf16.h" + +/* Formerly utf.h, part 1 --------------------------------------------------- */ + +#ifdef U_USE_UTF_DEPRECATES +/** + * Unicode string and array offset and index type. + * ICU always counts Unicode code units (UChars) for + * string offsets, indexes, and lengths, not Unicode code points. + * + * @obsolete ICU 2.6. Use int32_t directly instead since this API will be removed in that release. + */ +typedef int32_t UTextOffset; +#endif + +/** Number of bits in a Unicode string code unit - ICU uses 16-bit Unicode. @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF_SIZE 16 + +/** + * The default choice for general Unicode string macros is to use the ..._SAFE macro implementations + * with strict=FALSE. + * + * @deprecated ICU 2.4. Obsolete, see utf_old.h. + */ +#define UTF_SAFE +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#undef UTF_UNSAFE +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#undef UTF_STRICT + +/** + * UTF8_ERROR_VALUE_1 and UTF8_ERROR_VALUE_2 are special error values for UTF-8, + * which need 1 or 2 bytes in UTF-8: + * \code + * U+0015 = NAK = Negative Acknowledge, C0 control character + * U+009f = highest C1 control character + * \endcode + * + * These are used by UTF8_..._SAFE macros so that they can return an error value + * that needs the same number of code units (bytes) as were seen by + * a macro. They should be tested with UTF_IS_ERROR() or UTF_IS_VALID(). + * + * @deprecated ICU 2.4. Obsolete, see utf_old.h. + */ +#define UTF8_ERROR_VALUE_1 0x15 + +/** + * See documentation on UTF8_ERROR_VALUE_1 for details. + * + * @deprecated ICU 2.4. Obsolete, see utf_old.h. + */ +#define UTF8_ERROR_VALUE_2 0x9f + +/** + * Error value for all UTFs. This code point value will be set by macros with error + * checking if an error is detected. + * + * @deprecated ICU 2.4. Obsolete, see utf_old.h. + */ +#define UTF_ERROR_VALUE 0xffff + +/** + * Is a given 32-bit code an error value + * as returned by one of the macros for any UTF? + * + * @deprecated ICU 2.4. Obsolete, see utf_old.h. + */ +#define UTF_IS_ERROR(c) \ + (((c)&0xfffe)==0xfffe || (c)==UTF8_ERROR_VALUE_1 || (c)==UTF8_ERROR_VALUE_2) + +/** + * This is a combined macro: Is c a valid Unicode value _and_ not an error code? + * + * @deprecated ICU 2.4. Obsolete, see utf_old.h. + */ +#define UTF_IS_VALID(c) \ + (UTF_IS_UNICODE_CHAR(c) && \ + (c)!=UTF8_ERROR_VALUE_1 && (c)!=UTF8_ERROR_VALUE_2) + +/** + * Is this code unit or code point a surrogate (U+d800..U+dfff)? + * @deprecated ICU 2.4. Renamed to U_IS_SURROGATE and U16_IS_SURROGATE, see utf_old.h. + */ +#define UTF_IS_SURROGATE(uchar) (((uchar)&0xfffff800)==0xd800) + +/** + * Is a given 32-bit code point a Unicode noncharacter? + * + * @deprecated ICU 2.4. Renamed to U_IS_UNICODE_NONCHAR, see utf_old.h. + */ +#define UTF_IS_UNICODE_NONCHAR(c) \ + ((c)>=0xfdd0 && \ + ((uint32_t)(c)<=0xfdef || ((c)&0xfffe)==0xfffe) && \ + (uint32_t)(c)<=0x10ffff) + +/** + * Is a given 32-bit value a Unicode code point value (0..U+10ffff) + * that can be assigned a character? + * + * Code points that are not characters include: + * - single surrogate code points (U+d800..U+dfff, 2048 code points) + * - the last two code points on each plane (U+__fffe and U+__ffff, 34 code points) + * - U+fdd0..U+fdef (new with Unicode 3.1, 32 code points) + * - the highest Unicode code point value is U+10ffff + * + * This means that all code points below U+d800 are character code points, + * and that boundary is tested first for performance. + * + * @deprecated ICU 2.4. Renamed to U_IS_UNICODE_CHAR, see utf_old.h. + */ +#define UTF_IS_UNICODE_CHAR(c) \ + ((uint32_t)(c)<0xd800 || \ + ((uint32_t)(c)>0xdfff && \ + (uint32_t)(c)<=0x10ffff && \ + !UTF_IS_UNICODE_NONCHAR(c))) + +/* Formerly utf8.h ---------------------------------------------------------- */ + +/** + * Count the trail bytes for a UTF-8 lead byte. + * @deprecated ICU 2.4. Renamed to U8_COUNT_TRAIL_BYTES, see utf_old.h. + */ +#define UTF8_COUNT_TRAIL_BYTES(leadByte) (utf8_countTrailBytes[(uint8_t)leadByte]) + +/** + * Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value. + * @deprecated ICU 2.4. Renamed to U8_MASK_LEAD_BYTE, see utf_old.h. + */ +#define UTF8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1) + +/** Is this this code point a single code unit (byte)? @deprecated ICU 2.4. Renamed to U8_IS_SINGLE, see utf_old.h. */ +#define UTF8_IS_SINGLE(uchar) (((uchar)&0x80)==0) +/** Is this this code unit the lead code unit (byte) of a code point? @deprecated ICU 2.4. Renamed to U8_IS_LEAD, see utf_old.h. */ +#define UTF8_IS_LEAD(uchar) ((uint8_t)((uchar)-0xc0)<0x3e) +/** Is this this code unit a trailing code unit (byte) of a code point? @deprecated ICU 2.4. Renamed to U8_IS_TRAIL, see utf_old.h. */ +#define UTF8_IS_TRAIL(uchar) (((uchar)&0xc0)==0x80) + +/** Does this scalar Unicode value need multiple code units for storage? @deprecated ICU 2.4. Use U8_LENGTH or test ((uint32_t)(c)>0x7f) instead, see utf_old.h. */ +#define UTF8_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0x7f) + +/** + * Given the lead character, how many bytes are taken by this code point. + * ICU does not deal with code points >0x10ffff + * unless necessary for advancing in the byte stream. + * + * These length macros take into account that for values >0x10ffff + * the UTF8_APPEND_CHAR_SAFE macros would write the error code point 0xffff + * with 3 bytes. + * Code point comparisons need to be in uint32_t because UChar32 + * may be a signed type, and negative values must be recognized. + * + * @deprecated ICU 2.4. Use U8_LENGTH instead, see utf.h. + */ +#if 1 +# define UTF8_CHAR_LENGTH(c) \ + ((uint32_t)(c)<=0x7f ? 1 : \ + ((uint32_t)(c)<=0x7ff ? 2 : \ + ((uint32_t)((c)-0x10000)>0xfffff ? 3 : 4) \ + ) \ + ) +#else +# define UTF8_CHAR_LENGTH(c) \ + ((uint32_t)(c)<=0x7f ? 1 : \ + ((uint32_t)(c)<=0x7ff ? 2 : \ + ((uint32_t)(c)<=0xffff ? 3 : \ + ((uint32_t)(c)<=0x10ffff ? 4 : \ + ((uint32_t)(c)<=0x3ffffff ? 5 : \ + ((uint32_t)(c)<=0x7fffffff ? 6 : 3) \ + ) \ + ) \ + ) \ + ) \ + ) +#endif + +/** The maximum number of bytes per code point. @deprecated ICU 2.4. Renamed to U8_MAX_LENGTH, see utf_old.h. */ +#define UTF8_MAX_CHAR_LENGTH 4 + +/** Average number of code units compared to UTF-16. @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF8_ARRAY_SIZE(size) ((5*(size))/2) + +/** @deprecated ICU 2.4. Renamed to U8_GET_UNSAFE, see utf_old.h. */ +#define UTF8_GET_CHAR_UNSAFE(s, i, c) { \ + int32_t _utf8_get_char_unsafe_index=(int32_t)(i); \ + UTF8_SET_CHAR_START_UNSAFE(s, _utf8_get_char_unsafe_index); \ + UTF8_NEXT_CHAR_UNSAFE(s, _utf8_get_char_unsafe_index, c); \ +} + +/** @deprecated ICU 2.4. Use U8_GET instead, see utf_old.h. */ +#define UTF8_GET_CHAR_SAFE(s, start, i, length, c, strict) { \ + int32_t _utf8_get_char_safe_index=(int32_t)(i); \ + UTF8_SET_CHAR_START_SAFE(s, start, _utf8_get_char_safe_index); \ + UTF8_NEXT_CHAR_SAFE(s, _utf8_get_char_safe_index, length, c, strict); \ +} + +/** @deprecated ICU 2.4. Renamed to U8_NEXT_UNSAFE, see utf_old.h. */ +#define UTF8_NEXT_CHAR_UNSAFE(s, i, c) { \ + (c)=(s)[(i)++]; \ + if((uint8_t)((c)-0xc0)<0x35) { \ + uint8_t __count=UTF8_COUNT_TRAIL_BYTES(c); \ + UTF8_MASK_LEAD_BYTE(c, __count); \ + switch(__count) { \ + /* each following branch falls through to the next one */ \ + case 3: \ + (c)=((c)<<6)|((s)[(i)++]&0x3f); \ + case 2: \ + (c)=((c)<<6)|((s)[(i)++]&0x3f); \ + case 1: \ + (c)=((c)<<6)|((s)[(i)++]&0x3f); \ + /* no other branches to optimize switch() */ \ + break; \ + } \ + } \ +} + +/** @deprecated ICU 2.4. Renamed to U8_APPEND_UNSAFE, see utf_old.h. */ +#define UTF8_APPEND_CHAR_UNSAFE(s, i, c) { \ + if((uint32_t)(c)<=0x7f) { \ + (s)[(i)++]=(uint8_t)(c); \ + } else { \ + if((uint32_t)(c)<=0x7ff) { \ + (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \ + } else { \ + if((uint32_t)(c)<=0xffff) { \ + (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \ + } else { \ + (s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); \ + (s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); \ + } \ + (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \ + } \ + (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \ + } \ +} + +/** @deprecated ICU 2.4. Renamed to U8_FWD_1_UNSAFE, see utf_old.h. */ +#define UTF8_FWD_1_UNSAFE(s, i) { \ + (i)+=1+UTF8_COUNT_TRAIL_BYTES((s)[i]); \ +} + +/** @deprecated ICU 2.4. Renamed to U8_FWD_N_UNSAFE, see utf_old.h. */ +#define UTF8_FWD_N_UNSAFE(s, i, n) { \ + int32_t __N=(n); \ + while(__N>0) { \ + UTF8_FWD_1_UNSAFE(s, i); \ + --__N; \ + } \ +} + +/** @deprecated ICU 2.4. Renamed to U8_SET_CP_START_UNSAFE, see utf_old.h. */ +#define UTF8_SET_CHAR_START_UNSAFE(s, i) { \ + while(UTF8_IS_TRAIL((s)[i])) { --(i); } \ +} + +/** @deprecated ICU 2.4. Use U8_NEXT instead, see utf_old.h. */ +#define UTF8_NEXT_CHAR_SAFE(s, i, length, c, strict) { \ + (c)=(s)[(i)++]; \ + if((c)>=0x80) { \ + if(UTF8_IS_LEAD(c)) { \ + (c)=utf8_nextCharSafeBody(s, &(i), (int32_t)(length), c, strict); \ + } else { \ + (c)=UTF8_ERROR_VALUE_1; \ + } \ + } \ +} + +/** @deprecated ICU 2.4. Use U8_APPEND instead, see utf_old.h. */ +#define UTF8_APPEND_CHAR_SAFE(s, i, length, c) { \ + if((uint32_t)(c)<=0x7f) { \ + (s)[(i)++]=(uint8_t)(c); \ + } else { \ + (i)=utf8_appendCharSafeBody(s, (int32_t)(i), (int32_t)(length), c, NULL); \ + } \ +} + +/** @deprecated ICU 2.4. Renamed to U8_FWD_1, see utf_old.h. */ +#define UTF8_FWD_1_SAFE(s, i, length) U8_FWD_1(s, i, length) + +/** @deprecated ICU 2.4. Renamed to U8_FWD_N, see utf_old.h. */ +#define UTF8_FWD_N_SAFE(s, i, length, n) U8_FWD_N(s, i, length, n) + +/** @deprecated ICU 2.4. Renamed to U8_SET_CP_START, see utf_old.h. */ +#define UTF8_SET_CHAR_START_SAFE(s, start, i) U8_SET_CP_START(s, start, i) + +/** @deprecated ICU 2.4. Renamed to U8_PREV_UNSAFE, see utf_old.h. */ +#define UTF8_PREV_CHAR_UNSAFE(s, i, c) { \ + (c)=(s)[--(i)]; \ + if(UTF8_IS_TRAIL(c)) { \ + uint8_t __b, __count=1, __shift=6; \ +\ + /* c is a trail byte */ \ + (c)&=0x3f; \ + for(;;) { \ + __b=(s)[--(i)]; \ + if(__b>=0xc0) { \ + UTF8_MASK_LEAD_BYTE(__b, __count); \ + (c)|=(UChar32)__b<<__shift; \ + break; \ + } else { \ + (c)|=(UChar32)(__b&0x3f)<<__shift; \ + ++__count; \ + __shift+=6; \ + } \ + } \ + } \ +} + +/** @deprecated ICU 2.4. Renamed to U8_BACK_1_UNSAFE, see utf_old.h. */ +#define UTF8_BACK_1_UNSAFE(s, i) { \ + while(UTF8_IS_TRAIL((s)[--(i)])) {} \ +} + +/** @deprecated ICU 2.4. Renamed to U8_BACK_N_UNSAFE, see utf_old.h. */ +#define UTF8_BACK_N_UNSAFE(s, i, n) { \ + int32_t __N=(n); \ + while(__N>0) { \ + UTF8_BACK_1_UNSAFE(s, i); \ + --__N; \ + } \ +} + +/** @deprecated ICU 2.4. Renamed to U8_SET_CP_LIMIT_UNSAFE, see utf_old.h. */ +#define UTF8_SET_CHAR_LIMIT_UNSAFE(s, i) { \ + UTF8_BACK_1_UNSAFE(s, i); \ + UTF8_FWD_1_UNSAFE(s, i); \ +} + +/** @deprecated ICU 2.4. Use U8_PREV instead, see utf_old.h. */ +#define UTF8_PREV_CHAR_SAFE(s, start, i, c, strict) { \ + (c)=(s)[--(i)]; \ + if((c)>=0x80) { \ + if((c)<=0xbf) { \ + (c)=utf8_prevCharSafeBody(s, start, &(i), c, strict); \ + } else { \ + (c)=UTF8_ERROR_VALUE_1; \ + } \ + } \ +} + +/** @deprecated ICU 2.4. Renamed to U8_BACK_1, see utf_old.h. */ +#define UTF8_BACK_1_SAFE(s, start, i) U8_BACK_1(s, start, i) + +/** @deprecated ICU 2.4. Renamed to U8_BACK_N, see utf_old.h. */ +#define UTF8_BACK_N_SAFE(s, start, i, n) U8_BACK_N(s, start, i, n) + +/** @deprecated ICU 2.4. Renamed to U8_SET_CP_LIMIT, see utf_old.h. */ +#define UTF8_SET_CHAR_LIMIT_SAFE(s, start, i, length) U8_SET_CP_LIMIT(s, start, i, length) + +/* Formerly utf16.h --------------------------------------------------------- */ + +/** Is uchar a first/lead surrogate? @deprecated ICU 2.4. Renamed to U_IS_LEAD and U16_IS_LEAD, see utf_old.h. */ +#define UTF_IS_FIRST_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xd800) + +/** Is uchar a second/trail surrogate? @deprecated ICU 2.4. Renamed to U_IS_TRAIL and U16_IS_TRAIL, see utf_old.h. */ +#define UTF_IS_SECOND_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xdc00) + +/** Assuming c is a surrogate, is it a first/lead surrogate? @deprecated ICU 2.4. Renamed to U_IS_SURROGATE_LEAD and U16_IS_SURROGATE_LEAD, see utf_old.h. */ +#define UTF_IS_SURROGATE_FIRST(c) (((c)&0x400)==0) + +/** Helper constant for UTF16_GET_PAIR_VALUE. @deprecated ICU 2.4. Renamed to U16_SURROGATE_OFFSET, see utf_old.h. */ +#define UTF_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000) + +/** Get the UTF-32 value from the surrogate code units. @deprecated ICU 2.4. Renamed to U16_GET_SUPPLEMENTARY, see utf_old.h. */ +#define UTF16_GET_PAIR_VALUE(first, second) \ + (((first)<<10UL)+(second)-UTF_SURROGATE_OFFSET) + +/** @deprecated ICU 2.4. Renamed to U16_LEAD, see utf_old.h. */ +#define UTF_FIRST_SURROGATE(supplementary) (UChar)(((supplementary)>>10)+0xd7c0) + +/** @deprecated ICU 2.4. Renamed to U16_TRAIL, see utf_old.h. */ +#define UTF_SECOND_SURROGATE(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00) + +/** @deprecated ICU 2.4. Renamed to U16_LEAD, see utf_old.h. */ +#define UTF16_LEAD(supplementary) UTF_FIRST_SURROGATE(supplementary) + +/** @deprecated ICU 2.4. Renamed to U16_TRAIL, see utf_old.h. */ +#define UTF16_TRAIL(supplementary) UTF_SECOND_SURROGATE(supplementary) + +/** @deprecated ICU 2.4. Renamed to U16_IS_SINGLE, see utf_old.h. */ +#define UTF16_IS_SINGLE(uchar) !UTF_IS_SURROGATE(uchar) + +/** @deprecated ICU 2.4. Renamed to U16_IS_LEAD, see utf_old.h. */ +#define UTF16_IS_LEAD(uchar) UTF_IS_FIRST_SURROGATE(uchar) + +/** @deprecated ICU 2.4. Renamed to U16_IS_TRAIL, see utf_old.h. */ +#define UTF16_IS_TRAIL(uchar) UTF_IS_SECOND_SURROGATE(uchar) + +/** Does this scalar Unicode value need multiple code units for storage? @deprecated ICU 2.4. Use U16_LENGTH or test ((uint32_t)(c)>0xffff) instead, see utf_old.h. */ +#define UTF16_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0xffff) + +/** @deprecated ICU 2.4. Renamed to U16_LENGTH, see utf_old.h. */ +#define UTF16_CHAR_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2) + +/** @deprecated ICU 2.4. Renamed to U16_MAX_LENGTH, see utf_old.h. */ +#define UTF16_MAX_CHAR_LENGTH 2 + +/** Average number of code units compared to UTF-16. @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF16_ARRAY_SIZE(size) (size) + +/** + * Get a single code point from an offset that points to any + * of the code units that belong to that code point. + * Assume 0<=i<length. + * + * This could be used for iteration together with + * UTF16_CHAR_LENGTH() and UTF_IS_ERROR(), + * but the use of UTF16_NEXT_CHAR[_UNSAFE]() and + * UTF16_PREV_CHAR[_UNSAFE]() is more efficient for that. + * @deprecated ICU 2.4. Renamed to U16_GET_UNSAFE, see utf_old.h. + */ +#define UTF16_GET_CHAR_UNSAFE(s, i, c) { \ + (c)=(s)[i]; \ + if(UTF_IS_SURROGATE(c)) { \ + if(UTF_IS_SURROGATE_FIRST(c)) { \ + (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)+1]); \ + } else { \ + (c)=UTF16_GET_PAIR_VALUE((s)[(i)-1], (c)); \ + } \ + } \ +} + +/** @deprecated ICU 2.4. Use U16_GET instead, see utf_old.h. */ +#define UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) { \ + (c)=(s)[i]; \ + if(UTF_IS_SURROGATE(c)) { \ + uint16_t __c2; \ + if(UTF_IS_SURROGATE_FIRST(c)) { \ + if((i)+1<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)+1])) { \ + (c)=UTF16_GET_PAIR_VALUE((c), __c2); \ + /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \ + } else if(strict) {\ + /* unmatched first surrogate */ \ + (c)=UTF_ERROR_VALUE; \ + } \ + } else { \ + if((i)-1>=(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \ + (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \ + /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \ + } else if(strict) {\ + /* unmatched second surrogate */ \ + (c)=UTF_ERROR_VALUE; \ + } \ + } \ + } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \ + (c)=UTF_ERROR_VALUE; \ + } \ +} + +/** @deprecated ICU 2.4. Renamed to U16_NEXT_UNSAFE, see utf_old.h. */ +#define UTF16_NEXT_CHAR_UNSAFE(s, i, c) { \ + (c)=(s)[(i)++]; \ + if(UTF_IS_FIRST_SURROGATE(c)) { \ + (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)++]); \ + } \ +} + +/** @deprecated ICU 2.4. Renamed to U16_APPEND_UNSAFE, see utf_old.h. */ +#define UTF16_APPEND_CHAR_UNSAFE(s, i, c) { \ + if((uint32_t)(c)<=0xffff) { \ + (s)[(i)++]=(uint16_t)(c); \ + } else { \ + (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ + (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ + } \ +} + +/** @deprecated ICU 2.4. Renamed to U16_FWD_1_UNSAFE, see utf_old.h. */ +#define UTF16_FWD_1_UNSAFE(s, i) { \ + if(UTF_IS_FIRST_SURROGATE((s)[(i)++])) { \ + ++(i); \ + } \ +} + +/** @deprecated ICU 2.4. Renamed to U16_FWD_N_UNSAFE, see utf_old.h. */ +#define UTF16_FWD_N_UNSAFE(s, i, n) { \ + int32_t __N=(n); \ + while(__N>0) { \ + UTF16_FWD_1_UNSAFE(s, i); \ + --__N; \ + } \ +} + +/** @deprecated ICU 2.4. Renamed to U16_SET_CP_START_UNSAFE, see utf_old.h. */ +#define UTF16_SET_CHAR_START_UNSAFE(s, i) { \ + if(UTF_IS_SECOND_SURROGATE((s)[i])) { \ + --(i); \ + } \ +} + +/** @deprecated ICU 2.4. Use U16_NEXT instead, see utf_old.h. */ +#define UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) { \ + (c)=(s)[(i)++]; \ + if(UTF_IS_FIRST_SURROGATE(c)) { \ + uint16_t __c2; \ + if((i)<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)])) { \ + ++(i); \ + (c)=UTF16_GET_PAIR_VALUE((c), __c2); \ + /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \ + } else if(strict) {\ + /* unmatched first surrogate */ \ + (c)=UTF_ERROR_VALUE; \ + } \ + } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \ + /* unmatched second surrogate or other non-character */ \ + (c)=UTF_ERROR_VALUE; \ + } \ +} + +/** @deprecated ICU 2.4. Use U16_APPEND instead, see utf_old.h. */ +#define UTF16_APPEND_CHAR_SAFE(s, i, length, c) { \ + if((uint32_t)(c)<=0xffff) { \ + (s)[(i)++]=(uint16_t)(c); \ + } else if((uint32_t)(c)<=0x10ffff) { \ + if((i)+1<(length)) { \ + (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ + (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ + } else /* not enough space */ { \ + (s)[(i)++]=UTF_ERROR_VALUE; \ + } \ + } else /* c>0x10ffff, write error value */ { \ + (s)[(i)++]=UTF_ERROR_VALUE; \ + } \ +} + +/** @deprecated ICU 2.4. Renamed to U16_FWD_1, see utf_old.h. */ +#define UTF16_FWD_1_SAFE(s, i, length) U16_FWD_1(s, i, length) + +/** @deprecated ICU 2.4. Renamed to U16_FWD_N, see utf_old.h. */ +#define UTF16_FWD_N_SAFE(s, i, length, n) U16_FWD_N(s, i, length, n) + +/** @deprecated ICU 2.4. Renamed to U16_SET_CP_START, see utf_old.h. */ +#define UTF16_SET_CHAR_START_SAFE(s, start, i) U16_SET_CP_START(s, start, i) + +/** @deprecated ICU 2.4. Renamed to U16_PREV_UNSAFE, see utf_old.h. */ +#define UTF16_PREV_CHAR_UNSAFE(s, i, c) { \ + (c)=(s)[--(i)]; \ + if(UTF_IS_SECOND_SURROGATE(c)) { \ + (c)=UTF16_GET_PAIR_VALUE((s)[--(i)], (c)); \ + } \ +} + +/** @deprecated ICU 2.4. Renamed to U16_BACK_1_UNSAFE, see utf_old.h. */ +#define UTF16_BACK_1_UNSAFE(s, i) { \ + if(UTF_IS_SECOND_SURROGATE((s)[--(i)])) { \ + --(i); \ + } \ +} + +/** @deprecated ICU 2.4. Renamed to U16_BACK_N_UNSAFE, see utf_old.h. */ +#define UTF16_BACK_N_UNSAFE(s, i, n) { \ + int32_t __N=(n); \ + while(__N>0) { \ + UTF16_BACK_1_UNSAFE(s, i); \ + --__N; \ + } \ +} + +/** @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT_UNSAFE, see utf_old.h. */ +#define UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) { \ + if(UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \ + ++(i); \ + } \ +} + +/** @deprecated ICU 2.4. Use U16_PREV instead, see utf_old.h. */ +#define UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) { \ + (c)=(s)[--(i)]; \ + if(UTF_IS_SECOND_SURROGATE(c)) { \ + uint16_t __c2; \ + if((i)>(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \ + --(i); \ + (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \ + /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \ + } else if(strict) {\ + /* unmatched second surrogate */ \ + (c)=UTF_ERROR_VALUE; \ + } \ + } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \ + /* unmatched first surrogate or other non-character */ \ + (c)=UTF_ERROR_VALUE; \ + } \ +} + +/** @deprecated ICU 2.4. Renamed to U16_BACK_1, see utf_old.h. */ +#define UTF16_BACK_1_SAFE(s, start, i) U16_BACK_1(s, start, i) + +/** @deprecated ICU 2.4. Renamed to U16_BACK_N, see utf_old.h. */ +#define UTF16_BACK_N_SAFE(s, start, i, n) U16_BACK_N(s, start, i, n) + +/** @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT, see utf_old.h. */ +#define UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length) + +/* Formerly utf32.h --------------------------------------------------------- */ + +/* +* Old documentation: +* +* This file defines macros to deal with UTF-32 code units and code points. +* Signatures and semantics are the same as for the similarly named macros +* in utf16.h. +* utf32.h is included by utf.h after unicode/umachine.h</p> +* and some common definitions. +* <p><b>Usage:</b> ICU coding guidelines for if() statements should be followed when using these macros. +* Compound statements (curly braces {}) must be used for if-else-while... +* bodies and all macro statements should be terminated with semicolon.</p> +*/ + +/* internal definitions ----------------------------------------------------- */ + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_IS_SAFE(c, strict) \ + (!(strict) ? \ + (uint32_t)(c)<=0x10ffff : \ + UTF_IS_UNICODE_CHAR(c)) + +/* + * For the semantics of all of these macros, see utf16.h. + * The UTF-32 versions are trivial because any code point is + * encoded using exactly one code unit. + */ + +/* single-code point definitions -------------------------------------------- */ + +/* classes of code unit values */ + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_IS_SINGLE(uchar) 1 +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_IS_LEAD(uchar) 0 +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_IS_TRAIL(uchar) 0 + +/* number of code units per code point */ + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_NEED_MULTIPLE_UCHAR(c) 0 +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_CHAR_LENGTH(c) 1 +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_MAX_CHAR_LENGTH 1 + +/* average number of code units compared to UTF-16 */ + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_ARRAY_SIZE(size) (size) + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_GET_CHAR_UNSAFE(s, i, c) { \ + (c)=(s)[i]; \ +} + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_GET_CHAR_SAFE(s, start, i, length, c, strict) { \ + (c)=(s)[i]; \ + if(!UTF32_IS_SAFE(c, strict)) { \ + (c)=UTF_ERROR_VALUE; \ + } \ +} + +/* definitions with forward iteration --------------------------------------- */ + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_NEXT_CHAR_UNSAFE(s, i, c) { \ + (c)=(s)[(i)++]; \ +} + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_APPEND_CHAR_UNSAFE(s, i, c) { \ + (s)[(i)++]=(c); \ +} + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_FWD_1_UNSAFE(s, i) { \ + ++(i); \ +} + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_FWD_N_UNSAFE(s, i, n) { \ + (i)+=(n); \ +} + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_SET_CHAR_START_UNSAFE(s, i) { \ +} + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_NEXT_CHAR_SAFE(s, i, length, c, strict) { \ + (c)=(s)[(i)++]; \ + if(!UTF32_IS_SAFE(c, strict)) { \ + (c)=UTF_ERROR_VALUE; \ + } \ +} + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_APPEND_CHAR_SAFE(s, i, length, c) { \ + if((uint32_t)(c)<=0x10ffff) { \ + (s)[(i)++]=(c); \ + } else /* c>0x10ffff, write 0xfffd */ { \ + (s)[(i)++]=0xfffd; \ + } \ +} + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_FWD_1_SAFE(s, i, length) { \ + ++(i); \ +} + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_FWD_N_SAFE(s, i, length, n) { \ + if(((i)+=(n))>(length)) { \ + (i)=(length); \ + } \ +} + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_SET_CHAR_START_SAFE(s, start, i) { \ +} + +/* definitions with backward iteration -------------------------------------- */ + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_PREV_CHAR_UNSAFE(s, i, c) { \ + (c)=(s)[--(i)]; \ +} + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_BACK_1_UNSAFE(s, i) { \ + --(i); \ +} + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_BACK_N_UNSAFE(s, i, n) { \ + (i)-=(n); \ +} + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_SET_CHAR_LIMIT_UNSAFE(s, i) { \ +} + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_PREV_CHAR_SAFE(s, start, i, c, strict) { \ + (c)=(s)[--(i)]; \ + if(!UTF32_IS_SAFE(c, strict)) { \ + (c)=UTF_ERROR_VALUE; \ + } \ +} + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_BACK_1_SAFE(s, start, i) { \ + --(i); \ +} + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_BACK_N_SAFE(s, start, i, n) { \ + (i)-=(n); \ + if((i)<(start)) { \ + (i)=(start); \ + } \ +} + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_SET_CHAR_LIMIT_SAFE(s, i, length) { \ +} + +/* Formerly utf.h, part 2 --------------------------------------------------- */ + +/** + * Estimate the number of code units for a string based on the number of UTF-16 code units. + * + * @deprecated ICU 2.4. Obsolete, see utf_old.h. + */ +#define UTF_ARRAY_SIZE(size) UTF16_ARRAY_SIZE(size) + +/** @deprecated ICU 2.4. Renamed to U16_GET_UNSAFE, see utf_old.h. */ +#define UTF_GET_CHAR_UNSAFE(s, i, c) UTF16_GET_CHAR_UNSAFE(s, i, c) + +/** @deprecated ICU 2.4. Use U16_GET instead, see utf_old.h. */ +#define UTF_GET_CHAR_SAFE(s, start, i, length, c, strict) UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) + + +/** @deprecated ICU 2.4. Renamed to U16_NEXT_UNSAFE, see utf_old.h. */ +#define UTF_NEXT_CHAR_UNSAFE(s, i, c) UTF16_NEXT_CHAR_UNSAFE(s, i, c) + +/** @deprecated ICU 2.4. Use U16_NEXT instead, see utf_old.h. */ +#define UTF_NEXT_CHAR_SAFE(s, i, length, c, strict) UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) + + +/** @deprecated ICU 2.4. Renamed to U16_APPEND_UNSAFE, see utf_old.h. */ +#define UTF_APPEND_CHAR_UNSAFE(s, i, c) UTF16_APPEND_CHAR_UNSAFE(s, i, c) + +/** @deprecated ICU 2.4. Use U16_APPEND instead, see utf_old.h. */ +#define UTF_APPEND_CHAR_SAFE(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c) + + +/** @deprecated ICU 2.4. Renamed to U16_FWD_1_UNSAFE, see utf_old.h. */ +#define UTF_FWD_1_UNSAFE(s, i) UTF16_FWD_1_UNSAFE(s, i) + +/** @deprecated ICU 2.4. Renamed to U16_FWD_1, see utf_old.h. */ +#define UTF_FWD_1_SAFE(s, i, length) UTF16_FWD_1_SAFE(s, i, length) + + +/** @deprecated ICU 2.4. Renamed to U16_FWD_N_UNSAFE, see utf_old.h. */ +#define UTF_FWD_N_UNSAFE(s, i, n) UTF16_FWD_N_UNSAFE(s, i, n) + +/** @deprecated ICU 2.4. Renamed to U16_FWD_N, see utf_old.h. */ +#define UTF_FWD_N_SAFE(s, i, length, n) UTF16_FWD_N_SAFE(s, i, length, n) + + +/** @deprecated ICU 2.4. Renamed to U16_SET_CP_START_UNSAFE, see utf_old.h. */ +#define UTF_SET_CHAR_START_UNSAFE(s, i) UTF16_SET_CHAR_START_UNSAFE(s, i) + +/** @deprecated ICU 2.4. Renamed to U16_SET_CP_START, see utf_old.h. */ +#define UTF_SET_CHAR_START_SAFE(s, start, i) UTF16_SET_CHAR_START_SAFE(s, start, i) + + +/** @deprecated ICU 2.4. Renamed to U16_PREV_UNSAFE, see utf_old.h. */ +#define UTF_PREV_CHAR_UNSAFE(s, i, c) UTF16_PREV_CHAR_UNSAFE(s, i, c) + +/** @deprecated ICU 2.4. Use U16_PREV instead, see utf_old.h. */ +#define UTF_PREV_CHAR_SAFE(s, start, i, c, strict) UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) + + +/** @deprecated ICU 2.4. Renamed to U16_BACK_1_UNSAFE, see utf_old.h. */ +#define UTF_BACK_1_UNSAFE(s, i) UTF16_BACK_1_UNSAFE(s, i) + +/** @deprecated ICU 2.4. Renamed to U16_BACK_1, see utf_old.h. */ +#define UTF_BACK_1_SAFE(s, start, i) UTF16_BACK_1_SAFE(s, start, i) + + +/** @deprecated ICU 2.4. Renamed to U16_BACK_N_UNSAFE, see utf_old.h. */ +#define UTF_BACK_N_UNSAFE(s, i, n) UTF16_BACK_N_UNSAFE(s, i, n) + +/** @deprecated ICU 2.4. Renamed to U16_BACK_N, see utf_old.h. */ +#define UTF_BACK_N_SAFE(s, start, i, n) UTF16_BACK_N_SAFE(s, start, i, n) + + +/** @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT_UNSAFE, see utf_old.h. */ +#define UTF_SET_CHAR_LIMIT_UNSAFE(s, i) UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) + +/** @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT, see utf_old.h. */ +#define UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length) UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length) + +/* Define default macros (UTF-16 "safe") ------------------------------------ */ + +/** + * Does this code unit alone encode a code point (BMP, not a surrogate)? + * Same as UTF16_IS_SINGLE. + * @deprecated ICU 2.4. Renamed to U_IS_SINGLE and U16_IS_SINGLE, see utf_old.h. + */ +#define UTF_IS_SINGLE(uchar) U16_IS_SINGLE(uchar) + +/** + * Is this code unit the first one of several (a lead surrogate)? + * Same as UTF16_IS_LEAD. + * @deprecated ICU 2.4. Renamed to U_IS_LEAD and U16_IS_LEAD, see utf_old.h. + */ +#define UTF_IS_LEAD(uchar) U16_IS_LEAD(uchar) + +/** + * Is this code unit one of several but not the first one (a trail surrogate)? + * Same as UTF16_IS_TRAIL. + * @deprecated ICU 2.4. Renamed to U_IS_TRAIL and U16_IS_TRAIL, see utf_old.h. + */ +#define UTF_IS_TRAIL(uchar) U16_IS_TRAIL(uchar) + +/** + * Does this code point require multiple code units (is it a supplementary code point)? + * Same as UTF16_NEED_MULTIPLE_UCHAR. + * @deprecated ICU 2.4. Use U16_LENGTH or test ((uint32_t)(c)>0xffff) instead. + */ +#define UTF_NEED_MULTIPLE_UCHAR(c) UTF16_NEED_MULTIPLE_UCHAR(c) + +/** + * How many code units are used to encode this code point (1 or 2)? + * Same as UTF16_CHAR_LENGTH. + * @deprecated ICU 2.4. Renamed to U16_LENGTH, see utf_old.h. + */ +#define UTF_CHAR_LENGTH(c) U16_LENGTH(c) + +/** + * How many code units are used at most for any Unicode code point (2)? + * Same as UTF16_MAX_CHAR_LENGTH. + * @deprecated ICU 2.4. Renamed to U16_MAX_LENGTH, see utf_old.h. + */ +#define UTF_MAX_CHAR_LENGTH U16_MAX_LENGTH + +/** + * Set c to the code point that contains the code unit i. + * i could point to the lead or the trail surrogate for the code point. + * i is not modified. + * Same as UTF16_GET_CHAR. + * \pre 0<=i<length + * + * @deprecated ICU 2.4. Renamed to U16_GET, see utf_old.h. + */ +#define UTF_GET_CHAR(s, start, i, length, c) U16_GET(s, start, i, length, c) + +/** + * Set c to the code point that starts at code unit i + * and advance i to beyond the code units of this code point (post-increment). + * i must point to the first code unit of a code point. + * Otherwise c is set to the trail unit (surrogate) itself. + * Same as UTF16_NEXT_CHAR. + * \pre 0<=i<length + * \post 0<i<=length + * + * @deprecated ICU 2.4. Renamed to U16_NEXT, see utf_old.h. + */ +#define UTF_NEXT_CHAR(s, i, length, c) U16_NEXT(s, i, length, c) + +/** + * Append the code units of code point c to the string at index i + * and advance i to beyond the new code units (post-increment). + * The code units beginning at index i will be overwritten. + * Same as UTF16_APPEND_CHAR. + * \pre 0<=c<=0x10ffff + * \pre 0<=i<length + * \post 0<i<=length + * + * @deprecated ICU 2.4. Use U16_APPEND instead, see utf_old.h. + */ +#define UTF_APPEND_CHAR(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c) + +/** + * Advance i to beyond the code units of the code point that begins at i. + * I.e., advance i by one code point. + * Same as UTF16_FWD_1. + * \pre 0<=i<length + * \post 0<i<=length + * + * @deprecated ICU 2.4. Renamed to U16_FWD_1, see utf_old.h. + */ +#define UTF_FWD_1(s, i, length) U16_FWD_1(s, i, length) + +/** + * Advance i to beyond the code units of the n code points where the first one begins at i. + * I.e., advance i by n code points. + * Same as UT16_FWD_N. + * \pre 0<=i<length + * \post 0<i<=length + * + * @deprecated ICU 2.4. Renamed to U16_FWD_N, see utf_old.h. + */ +#define UTF_FWD_N(s, i, length, n) U16_FWD_N(s, i, length, n) + +/** + * Take the random-access index i and adjust it so that it points to the beginning + * of a code point. + * The input index points to any code unit of a code point and is moved to point to + * the first code unit of the same code point. i is never incremented. + * In other words, if i points to a trail surrogate that is preceded by a matching + * lead surrogate, then i is decremented. Otherwise it is not modified. + * This can be used to start an iteration with UTF_NEXT_CHAR() from a random index. + * Same as UTF16_SET_CHAR_START. + * \pre start<=i<length + * \post start<=i<length + * + * @deprecated ICU 2.4. Renamed to U16_SET_CP_START, see utf_old.h. + */ +#define UTF_SET_CHAR_START(s, start, i) U16_SET_CP_START(s, start, i) + +/** + * Set c to the code point that has code units before i + * and move i backward (towards the beginning of the string) + * to the first code unit of this code point (pre-increment). + * i must point to the first code unit after the last unit of a code point (i==length is allowed). + * Same as UTF16_PREV_CHAR. + * \pre start<i<=length + * \post start<=i<length + * + * @deprecated ICU 2.4. Renamed to U16_PREV, see utf_old.h. + */ +#define UTF_PREV_CHAR(s, start, i, c) U16_PREV(s, start, i, c) + +/** + * Move i backward (towards the beginning of the string) + * to the first code unit of the code point that has code units before i. + * I.e., move i backward by one code point. + * i must point to the first code unit after the last unit of a code point (i==length is allowed). + * Same as UTF16_BACK_1. + * \pre start<i<=length + * \post start<=i<length + * + * @deprecated ICU 2.4. Renamed to U16_BACK_1, see utf_old.h. + */ +#define UTF_BACK_1(s, start, i) U16_BACK_1(s, start, i) + +/** + * Move i backward (towards the beginning of the string) + * to the first code unit of the n code points that have code units before i. + * I.e., move i backward by n code points. + * i must point to the first code unit after the last unit of a code point (i==length is allowed). + * Same as UTF16_BACK_N. + * \pre start<i<=length + * \post start<=i<length + * + * @deprecated ICU 2.4. Renamed to U16_BACK_N, see utf_old.h. + */ +#define UTF_BACK_N(s, start, i, n) U16_BACK_N(s, start, i, n) + +/** + * Take the random-access index i and adjust it so that it points beyond + * a code point. The input index points beyond any code unit + * of a code point and is moved to point beyond the last code unit of the same + * code point. i is never decremented. + * In other words, if i points to a trail surrogate that is preceded by a matching + * lead surrogate, then i is incremented. Otherwise it is not modified. + * This can be used to start an iteration with UTF_PREV_CHAR() from a random index. + * Same as UTF16_SET_CHAR_LIMIT. + * \pre start<i<=length + * \post start<i<=length + * + * @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT, see utf_old.h. + */ +#define UTF_SET_CHAR_LIMIT(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length) + +#endif /* U_HIDE_DEPRECATED_API */ + +#endif + diff --git a/Source/WTF/icu/unicode/utypes.h b/Source/WTF/icu/unicode/utypes.h new file mode 100644 index 000000000..8f924c9d1 --- /dev/null +++ b/Source/WTF/icu/unicode/utypes.h @@ -0,0 +1,723 @@ +/* +********************************************************************** +* Copyright (C) 1996-2012, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* +* FILE NAME : UTYPES.H (formerly ptypes.h) +* +* Date Name Description +* 12/11/96 helena Creation. +* 02/27/97 aliu Added typedefs for UClassID, int8, int16, int32, +* uint8, uint16, and uint32. +* 04/01/97 aliu Added XP_CPLUSPLUS and modified to work under C as +* well as C++. +* Modified to use memcpy() for uprv_arrayCopy() fns. +* 04/14/97 aliu Added TPlatformUtilities. +* 05/07/97 aliu Added import/export specifiers (replacing the old +* broken EXT_CLASS). Added version number for our +* code. Cleaned up header. +* 6/20/97 helena Java class name change. +* 08/11/98 stephen UErrorCode changed from typedef to enum +* 08/12/98 erm Changed T_ANALYTIC_PACKAGE_VERSION to 3 +* 08/14/98 stephen Added uprv_arrayCopy() for int8_t, int16_t, int32_t +* 12/09/98 jfitz Added BUFFER_OVERFLOW_ERROR (bug 1100066) +* 04/20/99 stephen Cleaned up & reworked for autoconf. +* Renamed to utypes.h. +* 05/05/99 stephen Changed to use <inttypes.h> +* 12/07/99 helena Moved copyright notice string from ucnv_bld.h here. +******************************************************************************* +*/ + +#ifndef UTYPES_H +#define UTYPES_H + + +#include "unicode/umachine.h" +#include "unicode/uversion.h" +#include "unicode/uconfig.h" +#include <float.h> + +#if !U_NO_DEFAULT_INCLUDE_UTF_HEADERS +# include "unicode/utf.h" +#endif + +/*! + * \file + * \brief Basic definitions for ICU, for both C and C++ APIs + * + * This file defines basic types, constants, and enumerations directly or + * indirectly by including other header files, especially utf.h for the + * basic character and string definitions and umachine.h for consistent + * integer and other types. + */ + + +/** + * \def U_SHOW_CPLUSPLUS_API + * @internal + */ +#ifdef __cplusplus +# ifndef U_SHOW_CPLUSPLUS_API +# define U_SHOW_CPLUSPLUS_API 0 +# endif +#else +# undef U_SHOW_CPLUSPLUS_API +# define U_SHOW_CPLUSPLUS_API 0 +#endif + +/** @{ API visibility control */ + +/** + * \def U_HIDE_DRAFT_API + * Define this to 1 to request that draft API be "hidden" + * @internal + */ +/** + * \def U_HIDE_INTERNAL_API + * Define this to 1 to request that internal API be "hidden" + * @internal + */ +#if !U_DEFAULT_SHOW_DRAFT && !defined(U_SHOW_DRAFT_API) +#define U_HIDE_DRAFT_API 1 +#endif +#if !U_DEFAULT_SHOW_DRAFT && !defined(U_SHOW_INTERNAL_API) +#define U_HIDE_INTERNAL_API 1 +#endif + +/** @} */ + +/*===========================================================================*/ +/* ICUDATA naming scheme */ +/*===========================================================================*/ + +/** + * \def U_ICUDATA_TYPE_LETTER + * + * This is a platform-dependent string containing one letter: + * - b for big-endian, ASCII-family platforms + * - l for little-endian, ASCII-family platforms + * - e for big-endian, EBCDIC-family platforms + * This letter is part of the common data file name. + * @stable ICU 2.0 + */ + +/** + * \def U_ICUDATA_TYPE_LITLETTER + * The non-string form of U_ICUDATA_TYPE_LETTER + * @stable ICU 2.0 + */ +#if U_CHARSET_FAMILY +# if U_IS_BIG_ENDIAN + /* EBCDIC - should always be BE */ +# define U_ICUDATA_TYPE_LETTER "e" +# define U_ICUDATA_TYPE_LITLETTER e +# else +# error "Don't know what to do with little endian EBCDIC!" +# define U_ICUDATA_TYPE_LETTER "x" +# define U_ICUDATA_TYPE_LITLETTER x +# endif +#else +# if U_IS_BIG_ENDIAN + /* Big-endian ASCII */ +# define U_ICUDATA_TYPE_LETTER "b" +# define U_ICUDATA_TYPE_LITLETTER b +# else + /* Little-endian ASCII */ +# define U_ICUDATA_TYPE_LETTER "l" +# define U_ICUDATA_TYPE_LITLETTER l +# endif +#endif + +/** + * A single string literal containing the icudata stub name. i.e. 'icudt18e' for + * ICU 1.8.x on EBCDIC, etc.. + * @stable ICU 2.0 + */ +#define U_ICUDATA_NAME "icudt" U_ICU_VERSION_SHORT U_ICUDATA_TYPE_LETTER +#ifndef U_HIDE_INTERNAL_API +#define U_USRDATA_NAME "usrdt" U_ICU_VERSION_SHORT U_ICUDATA_TYPE_LETTER /**< @internal */ +#define U_USE_USRDATA 1 /**< @internal */ +#endif /* U_HIDE_INTERNAL_API */ + +/** + * U_ICU_ENTRY_POINT is the name of the DLL entry point to the ICU data library. + * Defined as a literal, not a string. + * Tricky Preprocessor use - ## operator replaces macro paramters with the literal string + * from the corresponding macro invocation, _before_ other macro substitutions. + * Need a nested \#defines to get the actual version numbers rather than + * the literal text U_ICU_VERSION_MAJOR_NUM into the name. + * The net result will be something of the form + * \#define U_ICU_ENTRY_POINT icudt19_dat + * @stable ICU 2.4 + */ +#define U_ICUDATA_ENTRY_POINT U_DEF2_ICUDATA_ENTRY_POINT(U_ICU_VERSION_MAJOR_NUM,U_LIB_SUFFIX_C_NAME) + +#ifndef U_HIDE_INTERNAL_API +/** + * Do not use. Note that it's OK for the 2nd argument to be undefined (literal). + * @internal + */ +#define U_DEF2_ICUDATA_ENTRY_POINT(major,suff) U_DEF_ICUDATA_ENTRY_POINT(major,suff) + +/** + * Do not use. + * @internal + */ +#ifndef U_DEF_ICUDATA_ENTRY_POINT +/* affected by symbol renaming. See platform.h */ +#ifndef U_LIB_SUFFIX_C_NAME +#define U_DEF_ICUDATA_ENTRY_POINT(major, suff) icudt##major##_dat +#else +#define U_DEF_ICUDATA_ENTRY_POINT(major, suff) icudt##suff ## major##_dat +#endif +#endif +#endif /* U_HIDE_INTERNAL_API */ + +/** + * \def NULL + * Define NULL if necessary, to 0 for C++ and to ((void *)0) for C. + * @stable ICU 2.0 + */ +#ifndef NULL +#ifdef __cplusplus +#define NULL 0 +#else +#define NULL ((void *)0) +#endif +#endif + +/*===========================================================================*/ +/* Calendar/TimeZone data types */ +/*===========================================================================*/ + +/** + * Date and Time data type. + * This is a primitive data type that holds the date and time + * as the number of milliseconds since 1970-jan-01, 00:00 UTC. + * UTC leap seconds are ignored. + * @stable ICU 2.0 + */ +typedef double UDate; + +/** The number of milliseconds per second @stable ICU 2.0 */ +#define U_MILLIS_PER_SECOND (1000) +/** The number of milliseconds per minute @stable ICU 2.0 */ +#define U_MILLIS_PER_MINUTE (60000) +/** The number of milliseconds per hour @stable ICU 2.0 */ +#define U_MILLIS_PER_HOUR (3600000) +/** The number of milliseconds per day @stable ICU 2.0 */ +#define U_MILLIS_PER_DAY (86400000) + +/** + * Maximum UDate value + * @stable ICU 4.8 + */ +#define U_DATE_MAX DBL_MAX + +/** + * Minimum UDate value + * @stable ICU 4.8 + */ +#define U_DATE_MIN -U_DATE_MAX + +/*===========================================================================*/ +/* Shared library/DLL import-export API control */ +/*===========================================================================*/ + +/* + * Control of symbol import/export. + * ICU is separated into three libraries. + */ + +/** + * \def U_COMBINED_IMPLEMENTATION + * Set to export library symbols from inside the ICU library + * when all of ICU is in a single library. + * This can be set as a compiler option while building ICU, and it + * needs to be the first one tested to override U_COMMON_API, U_I18N_API, etc. + * @stable ICU 2.0 + */ + +/** + * \def U_DATA_API + * Set to export library symbols from inside the stubdata library, + * and to import them from outside. + * @stable ICU 3.0 + */ + +/** + * \def U_COMMON_API + * Set to export library symbols from inside the common library, + * and to import them from outside. + * @stable ICU 2.0 + */ + +/** + * \def U_I18N_API + * Set to export library symbols from inside the i18n library, + * and to import them from outside. + * @stable ICU 2.0 + */ + +/** + * \def U_LAYOUT_API + * Set to export library symbols from inside the layout engine library, + * and to import them from outside. + * @stable ICU 2.0 + */ + +/** + * \def U_LAYOUTEX_API + * Set to export library symbols from inside the layout extensions library, + * and to import them from outside. + * @stable ICU 2.6 + */ + +/** + * \def U_IO_API + * Set to export library symbols from inside the ustdio library, + * and to import them from outside. + * @stable ICU 2.0 + */ + +/** + * \def U_TOOLUTIL_API + * Set to export library symbols from inside the toolutil library, + * and to import them from outside. + * @stable ICU 3.4 + */ + +#if defined(U_COMBINED_IMPLEMENTATION) +#define U_DATA_API U_EXPORT +#define U_COMMON_API U_EXPORT +#define U_I18N_API U_EXPORT +#define U_LAYOUT_API U_EXPORT +#define U_LAYOUTEX_API U_EXPORT +#define U_IO_API U_EXPORT +#define U_TOOLUTIL_API U_EXPORT +#elif defined(U_STATIC_IMPLEMENTATION) +#define U_DATA_API +#define U_COMMON_API +#define U_I18N_API +#define U_LAYOUT_API +#define U_LAYOUTEX_API +#define U_IO_API +#define U_TOOLUTIL_API +#elif defined(U_COMMON_IMPLEMENTATION) +#define U_DATA_API U_IMPORT +#define U_COMMON_API U_EXPORT +#define U_I18N_API U_IMPORT +#define U_LAYOUT_API U_IMPORT +#define U_LAYOUTEX_API U_IMPORT +#define U_IO_API U_IMPORT +#define U_TOOLUTIL_API U_IMPORT +#elif defined(U_I18N_IMPLEMENTATION) +#define U_DATA_API U_IMPORT +#define U_COMMON_API U_IMPORT +#define U_I18N_API U_EXPORT +#define U_LAYOUT_API U_IMPORT +#define U_LAYOUTEX_API U_IMPORT +#define U_IO_API U_IMPORT +#define U_TOOLUTIL_API U_IMPORT +#elif defined(U_LAYOUT_IMPLEMENTATION) +#define U_DATA_API U_IMPORT +#define U_COMMON_API U_IMPORT +#define U_I18N_API U_IMPORT +#define U_LAYOUT_API U_EXPORT +#define U_LAYOUTEX_API U_IMPORT +#define U_IO_API U_IMPORT +#define U_TOOLUTIL_API U_IMPORT +#elif defined(U_LAYOUTEX_IMPLEMENTATION) +#define U_DATA_API U_IMPORT +#define U_COMMON_API U_IMPORT +#define U_I18N_API U_IMPORT +#define U_LAYOUT_API U_IMPORT +#define U_LAYOUTEX_API U_EXPORT +#define U_IO_API U_IMPORT +#define U_TOOLUTIL_API U_IMPORT +#elif defined(U_IO_IMPLEMENTATION) +#define U_DATA_API U_IMPORT +#define U_COMMON_API U_IMPORT +#define U_I18N_API U_IMPORT +#define U_LAYOUT_API U_IMPORT +#define U_LAYOUTEX_API U_IMPORT +#define U_IO_API U_EXPORT +#define U_TOOLUTIL_API U_IMPORT +#elif defined(U_TOOLUTIL_IMPLEMENTATION) +#define U_DATA_API U_IMPORT +#define U_COMMON_API U_IMPORT +#define U_I18N_API U_IMPORT +#define U_LAYOUT_API U_IMPORT +#define U_LAYOUTEX_API U_IMPORT +#define U_IO_API U_IMPORT +#define U_TOOLUTIL_API U_EXPORT +#else +#define U_DATA_API U_IMPORT +#define U_COMMON_API U_IMPORT +#define U_I18N_API U_IMPORT +#define U_LAYOUT_API U_IMPORT +#define U_LAYOUTEX_API U_IMPORT +#define U_IO_API U_IMPORT +#define U_TOOLUTIL_API U_IMPORT +#endif + +/** + * \def U_STANDARD_CPP_NAMESPACE + * Control of C++ Namespace + * @stable ICU 2.0 + */ +#ifdef __cplusplus +#define U_STANDARD_CPP_NAMESPACE :: +#else +#define U_STANDARD_CPP_NAMESPACE +#endif + + +/*===========================================================================*/ +/* Global delete operator */ +/*===========================================================================*/ + +/* + * The ICU4C library must not use the global new and delete operators. + * These operators here are defined to enable testing for this. + * See Jitterbug 2581 for details of why this is necessary. + * + * Verification that ICU4C's memory usage is correct, i.e., + * that global new/delete are not used: + * + * a) Check for imports of global new/delete (see uobject.cpp for details) + * b) Verify that new is never imported. + * c) Verify that delete is only imported from object code for interface/mixin classes. + * d) Add global delete and delete[] only for the ICU4C library itself + * and define them in a way that crashes or otherwise easily shows a problem. + * + * The following implements d). + * The operator implementations crash; this is intentional and used for library debugging. + * + * Note: This is currently only done on Windows because + * some Linux/Unix compilers have problems with defining global new/delete. + * On Windows, it is _MSC_VER>=1200 for MSVC 6.0 and higher. + */ +#if defined(__cplusplus) && U_DEBUG && U_OVERRIDE_CXX_ALLOCATION && (_MSC_VER>=1200) && !defined(U_STATIC_IMPLEMENTATION) && (defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) || defined(U_LAYOUT_IMPLEMENTATION) || defined(U_LAYOUTEX_IMPLEMENTATION)) + +#ifndef U_HIDE_INTERNAL_API +/** + * Global operator new, defined only inside ICU4C, must not be used. + * Crashes intentionally. + * @internal + */ +inline void * +operator new(size_t /*size*/) { + char *q=NULL; + *q=5; /* break it */ + return q; +} + +#ifdef _Ret_bytecap_ +/* This is only needed to suppress a Visual C++ 2008 warning for operator new[]. */ +_Ret_bytecap_(_Size) +#endif +/** + * Global operator new[], defined only inside ICU4C, must not be used. + * Crashes intentionally. + * @internal + */ +inline void * +operator new[](size_t /*size*/) { + char *q=NULL; + *q=5; /* break it */ + return q; +} + +/** + * Global operator delete, defined only inside ICU4C, must not be used. + * Crashes intentionally. + * @internal + */ +inline void +operator delete(void * /*p*/) { + char *q=NULL; + *q=5; /* break it */ +} + +/** + * Global operator delete[], defined only inside ICU4C, must not be used. + * Crashes intentionally. + * @internal + */ +inline void +operator delete[](void * /*p*/) { + char *q=NULL; + *q=5; /* break it */ +} + +#endif /* U_HIDE_INTERNAL_API */ +#endif + +/*===========================================================================*/ +/* UErrorCode */ +/*===========================================================================*/ + +/** + * Error code to replace exception handling, so that the code is compatible with all C++ compilers, + * and to use the same mechanism for C and C++. + * + * \par + * ICU functions that take a reference (C++) or a pointer (C) to a UErrorCode + * first test if(U_FAILURE(errorCode)) { return immediately; } + * so that in a chain of such functions the first one that sets an error code + * causes the following ones to not perform any operations. + * + * \par + * Error codes should be tested using U_FAILURE() and U_SUCCESS(). + * @stable ICU 2.0 + */ +typedef enum UErrorCode { + /* The ordering of U_ERROR_INFO_START Vs U_USING_FALLBACK_WARNING looks weird + * and is that way because VC++ debugger displays first encountered constant, + * which is not the what the code is used for + */ + + U_USING_FALLBACK_WARNING = -128, /**< A resource bundle lookup returned a fallback result (not an error) */ + + U_ERROR_WARNING_START = -128, /**< Start of information results (semantically successful) */ + + U_USING_DEFAULT_WARNING = -127, /**< A resource bundle lookup returned a result from the root locale (not an error) */ + + U_SAFECLONE_ALLOCATED_WARNING = -126, /**< A SafeClone operation required allocating memory (informational only) */ + + U_STATE_OLD_WARNING = -125, /**< ICU has to use compatibility layer to construct the service. Expect performance/memory usage degradation. Consider upgrading */ + + U_STRING_NOT_TERMINATED_WARNING = -124,/**< An output string could not be NUL-terminated because output length==destCapacity. */ + + U_SORT_KEY_TOO_SHORT_WARNING = -123, /**< Number of levels requested in getBound is higher than the number of levels in the sort key */ + + U_AMBIGUOUS_ALIAS_WARNING = -122, /**< This converter alias can go to different converter implementations */ + + U_DIFFERENT_UCA_VERSION = -121, /**< ucol_open encountered a mismatch between UCA version and collator image version, so the collator was constructed from rules. No impact to further function */ + + U_PLUGIN_CHANGED_LEVEL_WARNING = -120, /**< A plugin caused a level change. May not be an error, but later plugins may not load. */ + + U_ERROR_WARNING_LIMIT, /**< This must always be the last warning value to indicate the limit for UErrorCode warnings (last warning code +1) */ + + + U_ZERO_ERROR = 0, /**< No error, no warning. */ + + U_ILLEGAL_ARGUMENT_ERROR = 1, /**< Start of codes indicating failure */ + U_MISSING_RESOURCE_ERROR = 2, /**< The requested resource cannot be found */ + U_INVALID_FORMAT_ERROR = 3, /**< Data format is not what is expected */ + U_FILE_ACCESS_ERROR = 4, /**< The requested file cannot be found */ + U_INTERNAL_PROGRAM_ERROR = 5, /**< Indicates a bug in the library code */ + U_MESSAGE_PARSE_ERROR = 6, /**< Unable to parse a message (message format) */ + U_MEMORY_ALLOCATION_ERROR = 7, /**< Memory allocation error */ + U_INDEX_OUTOFBOUNDS_ERROR = 8, /**< Trying to access the index that is out of bounds */ + U_PARSE_ERROR = 9, /**< Equivalent to Java ParseException */ + U_INVALID_CHAR_FOUND = 10, /**< Character conversion: Unmappable input sequence. In other APIs: Invalid character. */ + U_TRUNCATED_CHAR_FOUND = 11, /**< Character conversion: Incomplete input sequence. */ + U_ILLEGAL_CHAR_FOUND = 12, /**< Character conversion: Illegal input sequence/combination of input units. */ + U_INVALID_TABLE_FORMAT = 13, /**< Conversion table file found, but corrupted */ + U_INVALID_TABLE_FILE = 14, /**< Conversion table file not found */ + U_BUFFER_OVERFLOW_ERROR = 15, /**< A result would not fit in the supplied buffer */ + U_UNSUPPORTED_ERROR = 16, /**< Requested operation not supported in current context */ + U_RESOURCE_TYPE_MISMATCH = 17, /**< an operation is requested over a resource that does not support it */ + U_ILLEGAL_ESCAPE_SEQUENCE = 18, /**< ISO-2022 illlegal escape sequence */ + U_UNSUPPORTED_ESCAPE_SEQUENCE = 19, /**< ISO-2022 unsupported escape sequence */ + U_NO_SPACE_AVAILABLE = 20, /**< No space available for in-buffer expansion for Arabic shaping */ + U_CE_NOT_FOUND_ERROR = 21, /**< Currently used only while setting variable top, but can be used generally */ + U_PRIMARY_TOO_LONG_ERROR = 22, /**< User tried to set variable top to a primary that is longer than two bytes */ + U_STATE_TOO_OLD_ERROR = 23, /**< ICU cannot construct a service from this state, as it is no longer supported */ + U_TOO_MANY_ALIASES_ERROR = 24, /**< There are too many aliases in the path to the requested resource. + It is very possible that a circular alias definition has occured */ + U_ENUM_OUT_OF_SYNC_ERROR = 25, /**< UEnumeration out of sync with underlying collection */ + U_INVARIANT_CONVERSION_ERROR = 26, /**< Unable to convert a UChar* string to char* with the invariant converter. */ + U_INVALID_STATE_ERROR = 27, /**< Requested operation can not be completed with ICU in its current state */ + U_COLLATOR_VERSION_MISMATCH = 28, /**< Collator version is not compatible with the base version */ + U_USELESS_COLLATOR_ERROR = 29, /**< Collator is options only and no base is specified */ + U_NO_WRITE_PERMISSION = 30, /**< Attempt to modify read-only or constant data. */ + + U_STANDARD_ERROR_LIMIT, /**< This must always be the last value to indicate the limit for standard errors */ + /* + * the error code range 0x10000 0x10100 are reserved for Transliterator + */ + U_BAD_VARIABLE_DEFINITION=0x10000,/**< Missing '$' or duplicate variable name */ + U_PARSE_ERROR_START = 0x10000, /**< Start of Transliterator errors */ + U_MALFORMED_RULE, /**< Elements of a rule are misplaced */ + U_MALFORMED_SET, /**< A UnicodeSet pattern is invalid*/ + U_MALFORMED_SYMBOL_REFERENCE, /**< UNUSED as of ICU 2.4 */ + U_MALFORMED_UNICODE_ESCAPE, /**< A Unicode escape pattern is invalid*/ + U_MALFORMED_VARIABLE_DEFINITION, /**< A variable definition is invalid */ + U_MALFORMED_VARIABLE_REFERENCE, /**< A variable reference is invalid */ + U_MISMATCHED_SEGMENT_DELIMITERS, /**< UNUSED as of ICU 2.4 */ + U_MISPLACED_ANCHOR_START, /**< A start anchor appears at an illegal position */ + U_MISPLACED_CURSOR_OFFSET, /**< A cursor offset occurs at an illegal position */ + U_MISPLACED_QUANTIFIER, /**< A quantifier appears after a segment close delimiter */ + U_MISSING_OPERATOR, /**< A rule contains no operator */ + U_MISSING_SEGMENT_CLOSE, /**< UNUSED as of ICU 2.4 */ + U_MULTIPLE_ANTE_CONTEXTS, /**< More than one ante context */ + U_MULTIPLE_CURSORS, /**< More than one cursor */ + U_MULTIPLE_POST_CONTEXTS, /**< More than one post context */ + U_TRAILING_BACKSLASH, /**< A dangling backslash */ + U_UNDEFINED_SEGMENT_REFERENCE, /**< A segment reference does not correspond to a defined segment */ + U_UNDEFINED_VARIABLE, /**< A variable reference does not correspond to a defined variable */ + U_UNQUOTED_SPECIAL, /**< A special character was not quoted or escaped */ + U_UNTERMINATED_QUOTE, /**< A closing single quote is missing */ + U_RULE_MASK_ERROR, /**< A rule is hidden by an earlier more general rule */ + U_MISPLACED_COMPOUND_FILTER, /**< A compound filter is in an invalid location */ + U_MULTIPLE_COMPOUND_FILTERS, /**< More than one compound filter */ + U_INVALID_RBT_SYNTAX, /**< A "::id" rule was passed to the RuleBasedTransliterator parser */ + U_INVALID_PROPERTY_PATTERN, /**< UNUSED as of ICU 2.4 */ + U_MALFORMED_PRAGMA, /**< A 'use' pragma is invlalid */ + U_UNCLOSED_SEGMENT, /**< A closing ')' is missing */ + U_ILLEGAL_CHAR_IN_SEGMENT, /**< UNUSED as of ICU 2.4 */ + U_VARIABLE_RANGE_EXHAUSTED, /**< Too many stand-ins generated for the given variable range */ + U_VARIABLE_RANGE_OVERLAP, /**< The variable range overlaps characters used in rules */ + U_ILLEGAL_CHARACTER, /**< A special character is outside its allowed context */ + U_INTERNAL_TRANSLITERATOR_ERROR, /**< Internal transliterator system error */ + U_INVALID_ID, /**< A "::id" rule specifies an unknown transliterator */ + U_INVALID_FUNCTION, /**< A "&fn()" rule specifies an unknown transliterator */ + U_PARSE_ERROR_LIMIT, /**< The limit for Transliterator errors */ + + /* + * the error code range 0x10100 0x10200 are reserved for formatting API parsing error + */ + U_UNEXPECTED_TOKEN=0x10100, /**< Syntax error in format pattern */ + U_FMT_PARSE_ERROR_START=0x10100, /**< Start of format library errors */ + U_MULTIPLE_DECIMAL_SEPARATORS, /**< More than one decimal separator in number pattern */ + U_MULTIPLE_DECIMAL_SEPERATORS = U_MULTIPLE_DECIMAL_SEPARATORS, /**< Typo: kept for backward compatibility. Use U_MULTIPLE_DECIMAL_SEPARATORS */ + U_MULTIPLE_EXPONENTIAL_SYMBOLS, /**< More than one exponent symbol in number pattern */ + U_MALFORMED_EXPONENTIAL_PATTERN, /**< Grouping symbol in exponent pattern */ + U_MULTIPLE_PERCENT_SYMBOLS, /**< More than one percent symbol in number pattern */ + U_MULTIPLE_PERMILL_SYMBOLS, /**< More than one permill symbol in number pattern */ + U_MULTIPLE_PAD_SPECIFIERS, /**< More than one pad symbol in number pattern */ + U_PATTERN_SYNTAX_ERROR, /**< Syntax error in format pattern */ + U_ILLEGAL_PAD_POSITION, /**< Pad symbol misplaced in number pattern */ + U_UNMATCHED_BRACES, /**< Braces do not match in message pattern */ + U_UNSUPPORTED_PROPERTY, /**< UNUSED as of ICU 2.4 */ + U_UNSUPPORTED_ATTRIBUTE, /**< UNUSED as of ICU 2.4 */ + U_ARGUMENT_TYPE_MISMATCH, /**< Argument name and argument index mismatch in MessageFormat functions */ + U_DUPLICATE_KEYWORD, /**< Duplicate keyword in PluralFormat */ + U_UNDEFINED_KEYWORD, /**< Undefined Plural keyword */ + U_DEFAULT_KEYWORD_MISSING, /**< Missing DEFAULT rule in plural rules */ + U_DECIMAL_NUMBER_SYNTAX_ERROR, /**< Decimal number syntax error */ + U_FORMAT_INEXACT_ERROR, /**< Cannot format a number exactly and rounding mode is ROUND_UNNECESSARY @stable ICU 4.8 */ + U_FMT_PARSE_ERROR_LIMIT, /**< The limit for format library errors */ + + /* + * the error code range 0x10200 0x102ff are reserved for Break Iterator related error + */ + U_BRK_INTERNAL_ERROR=0x10200, /**< An internal error (bug) was detected. */ + U_BRK_ERROR_START=0x10200, /**< Start of codes indicating Break Iterator failures */ + U_BRK_HEX_DIGITS_EXPECTED, /**< Hex digits expected as part of a escaped char in a rule. */ + U_BRK_SEMICOLON_EXPECTED, /**< Missing ';' at the end of a RBBI rule. */ + U_BRK_RULE_SYNTAX, /**< Syntax error in RBBI rule. */ + U_BRK_UNCLOSED_SET, /**< UnicodeSet witing an RBBI rule missing a closing ']'. */ + U_BRK_ASSIGN_ERROR, /**< Syntax error in RBBI rule assignment statement. */ + U_BRK_VARIABLE_REDFINITION, /**< RBBI rule $Variable redefined. */ + U_BRK_MISMATCHED_PAREN, /**< Mis-matched parentheses in an RBBI rule. */ + U_BRK_NEW_LINE_IN_QUOTED_STRING, /**< Missing closing quote in an RBBI rule. */ + U_BRK_UNDEFINED_VARIABLE, /**< Use of an undefined $Variable in an RBBI rule. */ + U_BRK_INIT_ERROR, /**< Initialization failure. Probable missing ICU Data. */ + U_BRK_RULE_EMPTY_SET, /**< Rule contains an empty Unicode Set. */ + U_BRK_UNRECOGNIZED_OPTION, /**< !!option in RBBI rules not recognized. */ + U_BRK_MALFORMED_RULE_TAG, /**< The {nnn} tag on a rule is mal formed */ + U_BRK_ERROR_LIMIT, /**< This must always be the last value to indicate the limit for Break Iterator failures */ + + /* + * The error codes in the range 0x10300-0x103ff are reserved for regular expression related errrs + */ + U_REGEX_INTERNAL_ERROR=0x10300, /**< An internal error (bug) was detected. */ + U_REGEX_ERROR_START=0x10300, /**< Start of codes indicating Regexp failures */ + U_REGEX_RULE_SYNTAX, /**< Syntax error in regexp pattern. */ + U_REGEX_INVALID_STATE, /**< RegexMatcher in invalid state for requested operation */ + U_REGEX_BAD_ESCAPE_SEQUENCE, /**< Unrecognized backslash escape sequence in pattern */ + U_REGEX_PROPERTY_SYNTAX, /**< Incorrect Unicode property */ + U_REGEX_UNIMPLEMENTED, /**< Use of regexp feature that is not yet implemented. */ + U_REGEX_MISMATCHED_PAREN, /**< Incorrectly nested parentheses in regexp pattern. */ + U_REGEX_NUMBER_TOO_BIG, /**< Decimal number is too large. */ + U_REGEX_BAD_INTERVAL, /**< Error in {min,max} interval */ + U_REGEX_MAX_LT_MIN, /**< In {min,max}, max is less than min. */ + U_REGEX_INVALID_BACK_REF, /**< Back-reference to a non-existent capture group. */ + U_REGEX_INVALID_FLAG, /**< Invalid value for match mode flags. */ + U_REGEX_LOOK_BEHIND_LIMIT, /**< Look-Behind pattern matches must have a bounded maximum length. */ + U_REGEX_SET_CONTAINS_STRING, /**< Regexps cannot have UnicodeSets containing strings.*/ + U_REGEX_OCTAL_TOO_BIG, /**< Octal character constants must be <= 0377. */ + U_REGEX_MISSING_CLOSE_BRACKET, /**< Missing closing bracket on a bracket expression. */ + U_REGEX_INVALID_RANGE, /**< In a character range [x-y], x is greater than y. */ + U_REGEX_STACK_OVERFLOW, /**< Regular expression backtrack stack overflow. */ + U_REGEX_TIME_OUT, /**< Maximum allowed match time exceeded */ + U_REGEX_STOPPED_BY_CALLER, /**< Matching operation aborted by user callback fn. */ + U_REGEX_ERROR_LIMIT, /**< This must always be the last value to indicate the limit for regexp errors */ + + /* + * The error code in the range 0x10400-0x104ff are reserved for IDNA related error codes + */ + U_IDNA_PROHIBITED_ERROR=0x10400, + U_IDNA_ERROR_START=0x10400, + U_IDNA_UNASSIGNED_ERROR, + U_IDNA_CHECK_BIDI_ERROR, + U_IDNA_STD3_ASCII_RULES_ERROR, + U_IDNA_ACE_PREFIX_ERROR, + U_IDNA_VERIFICATION_ERROR, + U_IDNA_LABEL_TOO_LONG_ERROR, + U_IDNA_ZERO_LENGTH_LABEL_ERROR, + U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR, + U_IDNA_ERROR_LIMIT, + /* + * Aliases for StringPrep + */ + U_STRINGPREP_PROHIBITED_ERROR = U_IDNA_PROHIBITED_ERROR, + U_STRINGPREP_UNASSIGNED_ERROR = U_IDNA_UNASSIGNED_ERROR, + U_STRINGPREP_CHECK_BIDI_ERROR = U_IDNA_CHECK_BIDI_ERROR, + + /* + * The error code in the range 0x10500-0x105ff are reserved for Plugin related error codes + */ + U_PLUGIN_ERROR_START=0x10500, /**< Start of codes indicating plugin failures */ + U_PLUGIN_TOO_HIGH=0x10500, /**< The plugin's level is too high to be loaded right now. */ + U_PLUGIN_DIDNT_SET_LEVEL, /**< The plugin didn't call uplug_setPlugLevel in response to a QUERY */ + U_PLUGIN_ERROR_LIMIT, /**< This must always be the last value to indicate the limit for plugin errors */ + + U_ERROR_LIMIT=U_PLUGIN_ERROR_LIMIT /**< This must always be the last value to indicate the limit for UErrorCode (last error code +1) */ +} UErrorCode; + +/* Use the following to determine if an UErrorCode represents */ +/* operational success or failure. */ + +#ifdef __cplusplus + /** + * Does the error code indicate success? + * @stable ICU 2.0 + */ + static + inline UBool U_SUCCESS(UErrorCode code) { return (UBool)(code<=U_ZERO_ERROR); } + /** + * Does the error code indicate a failure? + * @stable ICU 2.0 + */ + static + inline UBool U_FAILURE(UErrorCode code) { return (UBool)(code>U_ZERO_ERROR); } +#else + /** + * Does the error code indicate success? + * @stable ICU 2.0 + */ +# define U_SUCCESS(x) ((x)<=U_ZERO_ERROR) + /** + * Does the error code indicate a failure? + * @stable ICU 2.0 + */ +# define U_FAILURE(x) ((x)>U_ZERO_ERROR) +#endif + +/** + * Return a string for a UErrorCode value. + * The string will be the same as the name of the error code constant + * in the UErrorCode enum above. + * @stable ICU 2.0 + */ +U_STABLE const char * U_EXPORT2 +u_errorName(UErrorCode code); + + +#endif /* _UTYPES */ diff --git a/Source/WTF/icu/unicode/uvernum.h b/Source/WTF/icu/unicode/uvernum.h new file mode 100644 index 000000000..bd0b0c989 --- /dev/null +++ b/Source/WTF/icu/unicode/uvernum.h @@ -0,0 +1,167 @@ +/* +******************************************************************************* +* Copyright (C) 2000-2013, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* +* file name: uvernum.h +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* Created by: Vladimir Weinstein +* Updated by: Steven R. Loomis +* +*/ + +/** + * \file + * \brief C API: definitions of ICU version numbers + * + * This file is included by uversion.h and other files. This file contains only + * macros and definitions. The actual version numbers are defined here. + */ + + /* + * IMPORTANT: When updating version, the following things need to be done: + * source/common/unicode/uvernum.h - this file: update major, minor, + * patchlevel, suffix, version, short version constants, namespace, + * renaming macro, and copyright + * + * The following files need to be updated as well, which can be done + * by running the UNIX makefile target 'update-windows-makefiles' in icu/source. + * + * + * source/common/common.vcproj - update 'Output file name' on the link tab so + * that it contains the new major/minor combination + * source/i18n/i18n.vcproj - same as for the common.vcproj + * source/layout/layout.vcproj - same as for the common.vcproj + * source/layoutex/layoutex.vcproj - same + * source/stubdata/stubdata.vcproj - same as for the common.vcproj + * source/io/io.vcproj - same as for the common.vcproj + * source/data/makedata.mak - change U_ICUDATA_NAME so that it contains + * the new major/minor combination and the Unicode version. + */ + +#ifndef UVERNUM_H +#define UVERNUM_H + +/** The standard copyright notice that gets compiled into each library. + * This value will change in the subsequent releases of ICU + * @stable ICU 2.4 + */ +#define U_COPYRIGHT_STRING \ + " Copyright (C) 2013, International Business Machines Corporation and others. All Rights Reserved. " + +/** The current ICU major version as an integer. + * This value will change in the subsequent releases of ICU + * @stable ICU 2.4 + */ +#define U_ICU_VERSION_MAJOR_NUM 52 + +/** The current ICU minor version as an integer. + * This value will change in the subsequent releases of ICU + * @stable ICU 2.6 + */ +#define U_ICU_VERSION_MINOR_NUM 1 + +/** The current ICU patchlevel version as an integer. + * This value will change in the subsequent releases of ICU + * @stable ICU 2.4 + */ +#define U_ICU_VERSION_PATCHLEVEL_NUM 0 + +/** The current ICU build level version as an integer. + * This value is for use by ICU clients. It defaults to 0. + * @stable ICU 4.0 + */ +#ifndef U_ICU_VERSION_BUILDLEVEL_NUM +#define U_ICU_VERSION_BUILDLEVEL_NUM 0 +#endif + +/** Glued version suffix for renamers + * This value will change in the subsequent releases of ICU + * @stable ICU 2.6 + */ +#define U_ICU_VERSION_SUFFIX _52 + +/** + * \def U_DEF2_ICU_ENTRY_POINT_RENAME + * @internal + */ +/** + * \def U_DEF_ICU_ENTRY_POINT_RENAME + * @internal + */ +/** Glued version suffix function for renamers + * This value will change in the subsequent releases of ICU. + * If a custom suffix (such as matching library suffixes) is desired, this can be modified. + * Note that if present, platform.h may contain an earlier definition of this macro. + * \def U_ICU_ENTRY_POINT_RENAME + * @stable ICU 4.2 + */ + +#ifndef U_ICU_ENTRY_POINT_RENAME +#ifdef U_HAVE_LIB_SUFFIX +#define U_DEF_ICU_ENTRY_POINT_RENAME(x,y,z) x ## y ## z +#define U_DEF2_ICU_ENTRY_POINT_RENAME(x,y,z) U_DEF_ICU_ENTRY_POINT_RENAME(x,y,z) +#define U_ICU_ENTRY_POINT_RENAME(x) U_DEF2_ICU_ENTRY_POINT_RENAME(x,U_ICU_VERSION_SUFFIX,U_LIB_SUFFIX_C_NAME) +#else +#define U_DEF_ICU_ENTRY_POINT_RENAME(x,y) x ## y +#define U_DEF2_ICU_ENTRY_POINT_RENAME(x,y) U_DEF_ICU_ENTRY_POINT_RENAME(x,y) +#define U_ICU_ENTRY_POINT_RENAME(x) U_DEF2_ICU_ENTRY_POINT_RENAME(x,U_ICU_VERSION_SUFFIX) +#endif +#endif + +/** The current ICU library version as a dotted-decimal string. The patchlevel + * only appears in this string if it non-zero. + * This value will change in the subsequent releases of ICU + * @stable ICU 2.4 + */ +#define U_ICU_VERSION "52.1" + +/** The current ICU library major/minor version as a string without dots, for library name suffixes. + * This value will change in the subsequent releases of ICU + * @stable ICU 2.6 + */ +#define U_ICU_VERSION_SHORT "52" + +#ifndef U_HIDE_INTERNAL_API +/** Data version in ICU4C. + * @internal ICU 4.4 Internal Use Only + **/ +#define U_ICU_DATA_VERSION "52.1" +#endif /* U_HIDE_INTERNAL_API */ + +/*=========================================================================== + * ICU collation framework version information + * Version info that can be obtained from a collator is affected by these + * numbers in a secret and magic way. Please use collator version as whole + *=========================================================================== + */ + +/** + * Collation runtime version (sort key generator, strcoll). + * If the version is different, sort keys for the same string could be different. + * This value may change in subsequent releases of ICU. + * @stable ICU 2.4 + */ +#define UCOL_RUNTIME_VERSION 7 + +/** + * Collation builder code version. + * When this is different, the same tailoring might result + * in assigning different collation elements to code points. + * This value may change in subsequent releases of ICU. + * @stable ICU 2.4 + */ +#define UCOL_BUILDER_VERSION 8 + +/** + * This is the version of collation tailorings. + * This value may change in subsequent releases of ICU. + * @stable ICU 2.4 + */ +#define UCOL_TAILORINGS_VERSION 1 + +#endif diff --git a/Source/WTF/icu/unicode/uversion.h b/Source/WTF/icu/unicode/uversion.h new file mode 100644 index 000000000..74e309105 --- /dev/null +++ b/Source/WTF/icu/unicode/uversion.h @@ -0,0 +1,193 @@ +/* +******************************************************************************* +* Copyright (C) 2000-2011, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* +* file name: uversion.h +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* Created by: Vladimir Weinstein +* +* Gets included by utypes.h and Windows .rc files +*/ + +/** + * \file + * \brief C API: API for accessing ICU version numbers. + */ +/*===========================================================================*/ +/* Main ICU version information */ +/*===========================================================================*/ + +#ifndef UVERSION_H +#define UVERSION_H + +#include "unicode/umachine.h" + +/* Actual version info lives in uvernum.h */ +#include "unicode/uvernum.h" + +/** Maximum length of the copyright string. + * @stable ICU 2.4 + */ +#define U_COPYRIGHT_STRING_LENGTH 128 + +/** An ICU version consists of up to 4 numbers from 0..255. + * @stable ICU 2.4 + */ +#define U_MAX_VERSION_LENGTH 4 + +/** In a string, ICU version fields are delimited by dots. + * @stable ICU 2.4 + */ +#define U_VERSION_DELIMITER '.' + +/** The maximum length of an ICU version string. + * @stable ICU 2.4 + */ +#define U_MAX_VERSION_STRING_LENGTH 20 + +/** The binary form of a version on ICU APIs is an array of 4 uint8_t. + * To compare two versions, use memcmp(v1,v2,sizeof(UVersionInfo)). + * @stable ICU 2.4 + */ +typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH]; + +/*===========================================================================*/ +/* C++ namespace if supported. Versioned unless versioning is disabled. */ +/*===========================================================================*/ + +/** + * \def U_NAMESPACE_BEGIN + * This is used to begin a declaration of a public ICU C++ API. + * When not compiling for C++, it does nothing. + * When compiling for C++, it begins an extern "C++" linkage block (to protect + * against cases in which an external client includes ICU header files inside + * an extern "C" linkage block). + * + * It also begins a versioned-ICU-namespace block. + * @stable ICU 2.4 + */ + +/** + * \def U_NAMESPACE_END + * This is used to end a declaration of a public ICU C++ API. + * When not compiling for C++, it does nothing. + * When compiling for C++, it ends the extern "C++" block begun by + * U_NAMESPACE_BEGIN. + * + * It also ends the versioned-ICU-namespace block begun by U_NAMESPACE_BEGIN. + * @stable ICU 2.4 + */ + +/** + * \def U_NAMESPACE_USE + * This is used to specify that the rest of the code uses the + * public ICU C++ API namespace. + * This is invoked by default; we recommend that you turn it off: + * See the "Recommended Build Options" section of the ICU4C readme + * (http://source.icu-project.org/repos/icu/icu/trunk/readme.html#RecBuild) + * @stable ICU 2.4 + */ + +/** + * \def U_NAMESPACE_QUALIFIER + * This is used to qualify that a function or class is part of + * the public ICU C++ API namespace. + * + * This macro is unnecessary since ICU 49 requires namespace support. + * You can just use "icu::" instead. + * @stable ICU 2.4 + */ + +/* Define namespace symbols if the compiler supports it. */ +#ifdef __cplusplus +# if U_DISABLE_RENAMING +# define U_ICU_NAMESPACE icu + namespace U_ICU_NAMESPACE { } +# else +# define U_ICU_NAMESPACE U_ICU_ENTRY_POINT_RENAME(icu) + namespace U_ICU_NAMESPACE { } + namespace icu = U_ICU_NAMESPACE; +# endif + +# define U_NAMESPACE_BEGIN extern "C++" { namespace U_ICU_NAMESPACE { +# define U_NAMESPACE_END } } +# define U_NAMESPACE_USE using namespace U_ICU_NAMESPACE; +# define U_NAMESPACE_QUALIFIER U_ICU_NAMESPACE:: + +# ifndef U_USING_ICU_NAMESPACE +# define U_USING_ICU_NAMESPACE 1 +# endif +# if U_USING_ICU_NAMESPACE + U_NAMESPACE_USE +# endif +#else +# define U_NAMESPACE_BEGIN +# define U_NAMESPACE_END +# define U_NAMESPACE_USE +# define U_NAMESPACE_QUALIFIER +#endif + +/*===========================================================================*/ +/* General version helper functions. Definitions in putil.c */ +/*===========================================================================*/ + +/** + * Parse a string with dotted-decimal version information and + * fill in a UVersionInfo structure with the result. + * Definition of this function lives in putil.c + * + * @param versionArray The destination structure for the version information. + * @param versionString A string with dotted-decimal version information, + * with up to four non-negative number fields with + * values of up to 255 each. + * @stable ICU 2.4 + */ +U_STABLE void U_EXPORT2 +u_versionFromString(UVersionInfo versionArray, const char *versionString); + +/** + * Parse a Unicode string with dotted-decimal version information and + * fill in a UVersionInfo structure with the result. + * Definition of this function lives in putil.c + * + * @param versionArray The destination structure for the version information. + * @param versionString A Unicode string with dotted-decimal version + * information, with up to four non-negative number + * fields with values of up to 255 each. + * @stable ICU 4.2 + */ +U_STABLE void U_EXPORT2 +u_versionFromUString(UVersionInfo versionArray, const UChar *versionString); + + +/** + * Write a string with dotted-decimal version information according + * to the input UVersionInfo. + * Definition of this function lives in putil.c + * + * @param versionArray The version information to be written as a string. + * @param versionString A string buffer that will be filled in with + * a string corresponding to the numeric version + * information in versionArray. + * The buffer size must be at least U_MAX_VERSION_STRING_LENGTH. + * @stable ICU 2.4 + */ +U_STABLE void U_EXPORT2 +u_versionToString(const UVersionInfo versionArray, char *versionString); + +/** + * Gets the ICU release version. The version array stores the version information + * for ICU. For example, release "1.3.31.2" is then represented as 0x01031F02. + * Definition of this function lives in putil.c + * + * @param versionArray the version # information, the result will be filled in + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +u_getVersion(UVersionInfo versionArray); +#endif |