diff options
author | Lorry Tar Creator <lorry-tar-importer@lorry> | 2016-04-10 09:28:39 +0000 |
---|---|---|
committer | Lorry Tar Creator <lorry-tar-importer@lorry> | 2016-04-10 09:28:39 +0000 |
commit | 32761a6cee1d0dee366b885b7b9c777e67885688 (patch) | |
tree | d6bec92bebfb216f4126356e55518842c2f476a1 /Source/WTF/icu | |
parent | a4e969f4965059196ca948db781e52f7cfebf19e (diff) | |
download | WebKitGtk-tarball-32761a6cee1d0dee366b885b7b9c777e67885688.tar.gz |
webkitgtk-2.4.11webkitgtk-2.4.11
Diffstat (limited to 'Source/WTF/icu')
36 files changed, 0 insertions, 27651 deletions
diff --git a/Source/WTF/icu/LICENSE b/Source/WTF/icu/LICENSE deleted file mode 100644 index 385d130cd..000000000 --- a/Source/WTF/icu/LICENSE +++ /dev/null @@ -1,25 +0,0 @@ -COPYRIGHT AND PERMISSION NOTICE - -Copyright (c) 1995-2006 International Business Machines Corporation and others - -All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy of this -software and associated documentation files (the "Software"), to deal in the Software -without restriction, including without limitation the rights to use, copy, modify, -merge, publish, distribute, and/or sell copies of the Software, and to permit persons -to whom the Software is furnished to do so, provided that the above copyright notice(s) -and this permission notice appear in all copies of the Software and that both the above -copyright notice(s) and this permission notice appear in supporting documentation. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR -PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER -OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR -CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR -PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING -OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - -Except as contained in this notice, the name of a copyright holder shall not be used in -advertising or otherwise to promote the sale, use or other dealings in this Software -without prior written authorization of the copyright holder. diff --git a/Source/WTF/icu/README b/Source/WTF/icu/README deleted file mode 100644 index 389e2e801..000000000 --- a/Source/WTF/icu/README +++ /dev/null @@ -1,4 +0,0 @@ -The headers in this directory are for compiling on Mac OS X 10.4. -The Mac OS X 10.4 release includes the ICU binary, but not ICU headers. -For other platforms, installed ICU headers should be used rather than these. -They are specific to Mac OS X 10.4. diff --git a/Source/WTF/icu/unicode/bytestream.h b/Source/WTF/icu/unicode/bytestream.h deleted file mode 100644 index 174aa38af..000000000 --- a/Source/WTF/icu/unicode/bytestream.h +++ /dev/null @@ -1,257 +0,0 @@ -// Copyright (C) 2009-2012, International Business Machines -// Corporation and others. All Rights Reserved. -// -// Copyright 2007 Google Inc. All Rights Reserved. -// Author: sanjay@google.com (Sanjay Ghemawat) -// -// Abstract interface that consumes a sequence of bytes (ByteSink). -// -// Used so that we can write a single piece of code that can operate -// on a variety of output string types. -// -// Various implementations of this interface are provided: -// ByteSink: -// CheckedArrayByteSink Write to a flat array, with bounds checking -// StringByteSink Write to an STL string - -// This code is a contribution of Google code, and the style used here is -// a compromise between the original Google code and the ICU coding guidelines. -// For example, data types are ICU-ified (size_t,int->int32_t), -// and API comments doxygen-ified, but function names and behavior are -// as in the original, if possible. -// Assertion-style error handling, not available in ICU, was changed to -// parameter "pinning" similar to UnicodeString. -// -// In addition, this is only a partial port of the original Google code, -// limited to what was needed so far. The (nearly) complete original code -// is in the ICU svn repository at icuhtml/trunk/design/strings/contrib -// (see ICU ticket 6765, r25517). - -#ifndef __BYTESTREAM_H__ -#define __BYTESTREAM_H__ - -/** - * \file - * \brief C++ API: Interface for writing bytes, and implementation classes. - */ - -#include "unicode/utypes.h" -#include "unicode/uobject.h" -#include "unicode/std_string.h" - -U_NAMESPACE_BEGIN - -/** - * A ByteSink can be filled with bytes. - * @stable ICU 4.2 - */ -class U_COMMON_API ByteSink : public UMemory { -public: - /** - * Default constructor. - * @stable ICU 4.2 - */ - ByteSink() { } - /** - * Virtual destructor. - * @stable ICU 4.2 - */ - virtual ~ByteSink(); - - /** - * Append "bytes[0,n-1]" to this. - * @param bytes the pointer to the bytes - * @param n the number of bytes; must be non-negative - * @stable ICU 4.2 - */ - virtual void Append(const char* bytes, int32_t n) = 0; - - /** - * Returns a writable buffer for appending and writes the buffer's capacity to - * *result_capacity. Guarantees *result_capacity>=min_capacity. - * May return a pointer to the caller-owned scratch buffer which must have - * scratch_capacity>=min_capacity. - * The returned buffer is only valid until the next operation - * on this ByteSink. - * - * After writing at most *result_capacity bytes, call Append() with the - * pointer returned from this function and the number of bytes written. - * Many Append() implementations will avoid copying bytes if this function - * returned an internal buffer. - * - * Partial usage example: - * int32_t capacity; - * char* buffer = sink->GetAppendBuffer(..., &capacity); - * ... Write n bytes into buffer, with n <= capacity. - * sink->Append(buffer, n); - * In many implementations, that call to Append will avoid copying bytes. - * - * If the ByteSink allocates or reallocates an internal buffer, it should use - * the desired_capacity_hint if appropriate. - * If a caller cannot provide a reasonable guess at the desired capacity, - * it should pass desired_capacity_hint=0. - * - * If a non-scratch buffer is returned, the caller may only pass - * a prefix to it to Append(). - * That is, it is not correct to pass an interior pointer to Append(). - * - * The default implementation always returns the scratch buffer. - * - * @param min_capacity required minimum capacity of the returned buffer; - * must be non-negative - * @param desired_capacity_hint desired capacity of the returned buffer; - * must be non-negative - * @param scratch default caller-owned buffer - * @param scratch_capacity capacity of the scratch buffer - * @param result_capacity pointer to an integer which will be set to the - * capacity of the returned buffer - * @return a buffer with *result_capacity>=min_capacity - * @stable ICU 4.2 - */ - virtual char* GetAppendBuffer(int32_t min_capacity, - int32_t desired_capacity_hint, - char* scratch, int32_t scratch_capacity, - int32_t* result_capacity); - - /** - * Flush internal buffers. - * Some byte sinks use internal buffers or provide buffering - * and require calling Flush() at the end of the stream. - * The ByteSink should be ready for further Append() calls after Flush(). - * The default implementation of Flush() does nothing. - * @stable ICU 4.2 - */ - virtual void Flush(); - -private: - ByteSink(const ByteSink &); // copy constructor not implemented - ByteSink &operator=(const ByteSink &); // assignment operator not implemented -}; - -// ------------------------------------------------------------- -// Some standard implementations - -/** - * Implementation of ByteSink that writes to a flat byte array, - * with bounds-checking: - * This sink will not write more than capacity bytes to outbuf. - * If more than capacity bytes are Append()ed, then excess bytes are ignored, - * and Overflowed() will return true. - * Overflow does not cause a runtime error. - * @stable ICU 4.2 - */ -class U_COMMON_API CheckedArrayByteSink : public ByteSink { -public: - /** - * Constructs a ByteSink that will write to outbuf[0..capacity-1]. - * @param outbuf buffer to write to - * @param capacity size of the buffer - * @stable ICU 4.2 - */ - CheckedArrayByteSink(char* outbuf, int32_t capacity); - /** - * Destructor. - * @stable ICU 4.2 - */ - virtual ~CheckedArrayByteSink(); - /** - * Returns the sink to its original state, without modifying the buffer. - * Useful for reusing both the buffer and the sink for multiple streams. - * Resets the state to NumberOfBytesWritten()=NumberOfBytesAppended()=0 - * and Overflowed()=FALSE. - * @return *this - * @stable ICU 4.6 - */ - virtual CheckedArrayByteSink& Reset(); - /** - * Append "bytes[0,n-1]" to this. - * @param bytes the pointer to the bytes - * @param n the number of bytes; must be non-negative - * @stable ICU 4.2 - */ - virtual void Append(const char* bytes, int32_t n); - /** - * Returns a writable buffer for appending and writes the buffer's capacity to - * *result_capacity. For details see the base class documentation. - * @param min_capacity required minimum capacity of the returned buffer; - * must be non-negative - * @param desired_capacity_hint desired capacity of the returned buffer; - * must be non-negative - * @param scratch default caller-owned buffer - * @param scratch_capacity capacity of the scratch buffer - * @param result_capacity pointer to an integer which will be set to the - * capacity of the returned buffer - * @return a buffer with *result_capacity>=min_capacity - * @stable ICU 4.2 - */ - virtual char* GetAppendBuffer(int32_t min_capacity, - int32_t desired_capacity_hint, - char* scratch, int32_t scratch_capacity, - int32_t* result_capacity); - /** - * Returns the number of bytes actually written to the sink. - * @return number of bytes written to the buffer - * @stable ICU 4.2 - */ - int32_t NumberOfBytesWritten() const { return size_; } - /** - * Returns true if any bytes were discarded, i.e., if there was an - * attempt to write more than 'capacity' bytes. - * @return TRUE if more than 'capacity' bytes were Append()ed - * @stable ICU 4.2 - */ - UBool Overflowed() const { return overflowed_; } - /** - * Returns the number of bytes appended to the sink. - * If Overflowed() then NumberOfBytesAppended()>NumberOfBytesWritten() - * else they return the same number. - * @return number of bytes written to the buffer - * @stable ICU 4.6 - */ - int32_t NumberOfBytesAppended() const { return appended_; } -private: - char* outbuf_; - const int32_t capacity_; - int32_t size_; - int32_t appended_; - UBool overflowed_; - CheckedArrayByteSink(); ///< default constructor not implemented - CheckedArrayByteSink(const CheckedArrayByteSink &); ///< copy constructor not implemented - CheckedArrayByteSink &operator=(const CheckedArrayByteSink &); ///< assignment operator not implemented -}; - -#if U_HAVE_STD_STRING - -/** - * Implementation of ByteSink that writes to a "string". - * The StringClass is usually instantiated with a std::string. - * @stable ICU 4.2 - */ -template<typename StringClass> -class StringByteSink : public ByteSink { - public: - /** - * Constructs a ByteSink that will append bytes to the dest string. - * @param dest pointer to string object to append to - * @stable ICU 4.2 - */ - StringByteSink(StringClass* dest) : dest_(dest) { } - /** - * Append "bytes[0,n-1]" to this. - * @param data the pointer to the bytes - * @param n the number of bytes; must be non-negative - * @stable ICU 4.2 - */ - virtual void Append(const char* data, int32_t n) { dest_->append(data, n); } - private: - StringClass* dest_; - StringByteSink(); ///< default constructor not implemented - StringByteSink(const StringByteSink &); ///< copy constructor not implemented - StringByteSink &operator=(const StringByteSink &); ///< assignment operator not implemented -}; - -#endif - -U_NAMESPACE_END - -#endif // __BYTESTREAM_H__ diff --git a/Source/WTF/icu/unicode/localpointer.h b/Source/WTF/icu/unicode/localpointer.h deleted file mode 100644 index e3ccb2581..000000000 --- a/Source/WTF/icu/unicode/localpointer.h +++ /dev/null @@ -1,304 +0,0 @@ -/* -******************************************************************************* -* -* Copyright (C) 2009-2012, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: localpointer.h -* encoding: US-ASCII -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2009nov13 -* created by: Markus W. Scherer -*/ - -#ifndef __LOCALPOINTER_H__ -#define __LOCALPOINTER_H__ - -/** - * \file - * \brief C++ API: "Smart pointers" for use with and in ICU4C C++ code. - * - * These classes are inspired by - * - std::auto_ptr - * - boost::scoped_ptr & boost::scoped_array - * - Taligent Safe Pointers (TOnlyPointerTo) - * - * but none of those provide for all of the goals for ICU smart pointers: - * - Smart pointer owns the object and releases it when it goes out of scope. - * - No transfer of ownership via copy/assignment to reduce misuse. Simpler & more robust. - * - ICU-compatible: No exceptions. - * - Need to be able to orphan/release the pointer and its ownership. - * - Need variants for normal C++ object pointers, C++ arrays, and ICU C service objects. - * - * For details see http://site.icu-project.org/design/cpp/scoped_ptr - */ - -#include "unicode/utypes.h" - -#if U_SHOW_CPLUSPLUS_API - -U_NAMESPACE_BEGIN - -/** - * "Smart pointer" base class; do not use directly: use LocalPointer etc. - * - * Base class for smart pointer classes that do not throw exceptions. - * - * Do not use this base class directly, since it does not delete its pointer. - * A subclass must implement methods that delete the pointer: - * Destructor and adoptInstead(). - * - * There is no operator T *() provided because the programmer must decide - * whether to use getAlias() (without transfer of ownership) or orpan() - * (with transfer of ownership and NULLing of the pointer). - * - * @see LocalPointer - * @see LocalArray - * @see U_DEFINE_LOCAL_OPEN_POINTER - * @stable ICU 4.4 - */ -template<typename T> -class LocalPointerBase { -public: - /** - * Constructor takes ownership. - * @param p simple pointer to an object that is adopted - * @stable ICU 4.4 - */ - explicit LocalPointerBase(T *p=NULL) : ptr(p) {} - /** - * Destructor deletes the object it owns. - * Subclass must override: Base class does nothing. - * @stable ICU 4.4 - */ - ~LocalPointerBase() { /* delete ptr; */ } - /** - * NULL check. - * @return TRUE if ==NULL - * @stable ICU 4.4 - */ - UBool isNull() const { return ptr==NULL; } - /** - * NULL check. - * @return TRUE if !=NULL - * @stable ICU 4.4 - */ - UBool isValid() const { return ptr!=NULL; } - /** - * Comparison with a simple pointer, so that existing code - * with ==NULL need not be changed. - * @param other simple pointer for comparison - * @return true if this pointer value equals other - * @stable ICU 4.4 - */ - bool operator==(const T *other) const { return ptr==other; } - /** - * Comparison with a simple pointer, so that existing code - * with !=NULL need not be changed. - * @param other simple pointer for comparison - * @return true if this pointer value differs from other - * @stable ICU 4.4 - */ - bool operator!=(const T *other) const { return ptr!=other; } - /** - * Access without ownership change. - * @return the pointer value - * @stable ICU 4.4 - */ - T *getAlias() const { return ptr; } - /** - * Access without ownership change. - * @return the pointer value as a reference - * @stable ICU 4.4 - */ - T &operator*() const { return *ptr; } - /** - * Access without ownership change. - * @return the pointer value - * @stable ICU 4.4 - */ - T *operator->() const { return ptr; } - /** - * Gives up ownership; the internal pointer becomes NULL. - * @return the pointer value; - * caller becomes responsible for deleting the object - * @stable ICU 4.4 - */ - T *orphan() { - T *p=ptr; - ptr=NULL; - return p; - } - /** - * Deletes the object it owns, - * and adopts (takes ownership of) the one passed in. - * Subclass must override: Base class does not delete the object. - * @param p simple pointer to an object that is adopted - * @stable ICU 4.4 - */ - void adoptInstead(T *p) { - // delete ptr; - ptr=p; - } -protected: - /** - * Actual pointer. - * @internal - */ - T *ptr; -private: - // No comparison operators with other LocalPointerBases. - bool operator==(const LocalPointerBase &other); - bool operator!=(const LocalPointerBase &other); - // No ownership transfer: No copy constructor, no assignment operator. - LocalPointerBase(const LocalPointerBase &other); - void operator=(const LocalPointerBase &other); - // No heap allocation. Use only on the stack. - static void * U_EXPORT2 operator new(size_t size); - static void * U_EXPORT2 operator new[](size_t size); -#if U_HAVE_PLACEMENT_NEW - static void * U_EXPORT2 operator new(size_t, void *ptr); -#endif -}; - -/** - * "Smart pointer" class, deletes objects via the standard C++ delete operator. - * For most methods see the LocalPointerBase base class. - * - * Usage example: - * \code - * LocalPointer<UnicodeString> s(new UnicodeString((UChar32)0x50005)); - * int32_t length=s->length(); // 2 - * UChar lead=s->charAt(0); // 0xd900 - * if(some condition) { return; } // no need to explicitly delete the pointer - * s.adoptInstead(new UnicodeString((UChar)0xfffc)); - * length=s->length(); // 1 - * // no need to explicitly delete the pointer - * \endcode - * - * @see LocalPointerBase - * @stable ICU 4.4 - */ -template<typename T> -class LocalPointer : public LocalPointerBase<T> { -public: - /** - * Constructor takes ownership. - * @param p simple pointer to an object that is adopted - * @stable ICU 4.4 - */ - explicit LocalPointer(T *p=NULL) : LocalPointerBase<T>(p) {} - /** - * Destructor deletes the object it owns. - * @stable ICU 4.4 - */ - ~LocalPointer() { - delete LocalPointerBase<T>::ptr; - } - /** - * Deletes the object it owns, - * and adopts (takes ownership of) the one passed in. - * @param p simple pointer to an object that is adopted - * @stable ICU 4.4 - */ - void adoptInstead(T *p) { - delete LocalPointerBase<T>::ptr; - LocalPointerBase<T>::ptr=p; - } -}; - -/** - * "Smart pointer" class, deletes objects via the C++ array delete[] operator. - * For most methods see the LocalPointerBase base class. - * Adds operator[] for array item access. - * - * Usage example: - * \code - * LocalArray<UnicodeString> a(new UnicodeString[2]); - * a[0].append((UChar)0x61); - * if(some condition) { return; } // no need to explicitly delete the array - * a.adoptInstead(new UnicodeString[4]); - * a[3].append((UChar)0x62).append((UChar)0x63).reverse(); - * // no need to explicitly delete the array - * \endcode - * - * @see LocalPointerBase - * @stable ICU 4.4 - */ -template<typename T> -class LocalArray : public LocalPointerBase<T> { -public: - /** - * Constructor takes ownership. - * @param p simple pointer to an array of T objects that is adopted - * @stable ICU 4.4 - */ - explicit LocalArray(T *p=NULL) : LocalPointerBase<T>(p) {} - /** - * Destructor deletes the array it owns. - * @stable ICU 4.4 - */ - ~LocalArray() { - delete[] LocalPointerBase<T>::ptr; - } - /** - * Deletes the array it owns, - * and adopts (takes ownership of) the one passed in. - * @param p simple pointer to an array of T objects that is adopted - * @stable ICU 4.4 - */ - void adoptInstead(T *p) { - delete[] LocalPointerBase<T>::ptr; - LocalPointerBase<T>::ptr=p; - } - /** - * Array item access (writable). - * No index bounds check. - * @param i array index - * @return reference to the array item - * @stable ICU 4.4 - */ - T &operator[](ptrdiff_t i) const { return LocalPointerBase<T>::ptr[i]; } -}; - -/** - * \def U_DEFINE_LOCAL_OPEN_POINTER - * "Smart pointer" definition macro, deletes objects via the closeFunction. - * Defines a subclass of LocalPointerBase which works just - * like LocalPointer<Type> except that this subclass will use the closeFunction - * rather than the C++ delete operator. - * - * Requirement: The closeFunction must tolerate a NULL pointer. - * (We could add a NULL check here but it is normally redundant.) - * - * Usage example: - * \code - * LocalUCaseMapPointer csm(ucasemap_open(localeID, options, &errorCode)); - * utf8OutLength=ucasemap_utf8ToLower(csm.getAlias(), - * utf8Out, (int32_t)sizeof(utf8Out), - * utf8In, utf8InLength, &errorCode); - * if(U_FAILURE(errorCode)) { return; } // no need to explicitly delete the UCaseMap - * \endcode - * - * @see LocalPointerBase - * @see LocalPointer - * @stable ICU 4.4 - */ -#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction) \ - class LocalPointerClassName : public LocalPointerBase<Type> { \ - public: \ - explicit LocalPointerClassName(Type *p=NULL) : LocalPointerBase<Type>(p) {} \ - ~LocalPointerClassName() { closeFunction(ptr); } \ - void adoptInstead(Type *p) { \ - closeFunction(ptr); \ - ptr=p; \ - } \ - } - -U_NAMESPACE_END - -#endif /* U_SHOW_CPLUSPLUS_API */ -#endif /* __LOCALPOINTER_H__ */ diff --git a/Source/WTF/icu/unicode/parseerr.h b/Source/WTF/icu/unicode/parseerr.h deleted file mode 100644 index 44ff00811..000000000 --- a/Source/WTF/icu/unicode/parseerr.h +++ /dev/null @@ -1,92 +0,0 @@ -/* -********************************************************************** -* Copyright (C) 1999-2005, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* Date Name Description -* 03/14/00 aliu Creation. -* 06/27/00 aliu Change from C++ class to C struct -********************************************************************** -*/ -#ifndef PARSEERR_H -#define PARSEERR_H - -#include "unicode/utypes.h" - - -/** - * \file - * \brief C API: Parse Error Information - */ -/** - * The capacity of the context strings in UParseError. - * @stable ICU 2.0 - */ -enum { U_PARSE_CONTEXT_LEN = 16 }; - -/** - * A UParseError struct is used to returned detailed information about - * parsing errors. It is used by ICU parsing engines that parse long - * rules, patterns, or programs, where the text being parsed is long - * enough that more information than a UErrorCode is needed to - * localize the error. - * - * <p>The line, offset, and context fields are optional; parsing - * engines may choose not to use to use them. - * - * <p>The preContext and postContext strings include some part of the - * context surrounding the error. If the source text is "let for=7" - * and "for" is the error (e.g., because it is a reserved word), then - * some examples of what a parser might produce are the following: - * - * <pre> - * preContext postContext - * "" "" The parser does not support context - * "let " "=7" Pre- and post-context only - * "let " "for=7" Pre- and post-context and error text - * "" "for" Error text only - * </pre> - * - * <p>Examples of engines which use UParseError (or may use it in the - * future) are Transliterator, RuleBasedBreakIterator, and - * RegexPattern. - * - * @stable ICU 2.0 - */ -typedef struct UParseError { - - /** - * The line on which the error occured. If the parser uses this - * field, it sets it to the line number of the source text line on - * which the error appears, which will be be a value >= 1. If the - * parse does not support line numbers, the value will be <= 0. - * @stable ICU 2.0 - */ - int32_t line; - - /** - * The character offset to the error. If the line field is >= 1, - * then this is the offset from the start of the line. Otherwise, - * this is the offset from the start of the text. If the parser - * does not support this field, it will have a value < 0. - * @stable ICU 2.0 - */ - int32_t offset; - - /** - * Textual context before the error. Null-terminated. The empty - * string if not supported by parser. - * @stable ICU 2.0 - */ - UChar preContext[U_PARSE_CONTEXT_LEN]; - - /** - * The error itself and/or textual context after the error. - * Null-terminated. The empty string if not supported by parser. - * @stable ICU 2.0 - */ - UChar postContext[U_PARSE_CONTEXT_LEN]; - -} UParseError; - -#endif diff --git a/Source/WTF/icu/unicode/platform.h b/Source/WTF/icu/unicode/platform.h deleted file mode 100644 index 1b2ab306e..000000000 --- a/Source/WTF/icu/unicode/platform.h +++ /dev/null @@ -1,755 +0,0 @@ -/* -****************************************************************************** -* -* Copyright (C) 1997-2013, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* -* FILE NAME : platform.h -* -* Date Name Description -* 05/13/98 nos Creation (content moved here from ptypes.h). -* 03/02/99 stephen Added AS400 support. -* 03/30/99 stephen Added Linux support. -* 04/13/99 stephen Reworked for autoconf. -****************************************************************************** -*/ - -#ifndef _PLATFORM_H -#define _PLATFORM_H - -#include "unicode/uconfig.h" -#include "unicode/uvernum.h" - -/** - * \file - * \brief Basic types for the platform. - * - * This file used to be generated by autoconf/configure. - * Starting with ICU 49, platform.h is a normal source file, - * to simplify cross-compiling and working with non-autoconf/make build systems. - * - * When a value in this file does not work on a platform, then please - * try to derive it from the U_PLATFORM value - * (for which we might need a new value constant in rare cases) - * and/or from other macros that are predefined by the compiler - * or defined in standard (POSIX or platform or compiler) headers. - * - * As a temporary workaround, you can add an explicit <code>#define</code> for some macros - * before it is first tested, or add an equivalent -D macro definition - * to the compiler's command line. - * - * Note: Some compilers provide ways to show the predefined macros. - * For example, with gcc you can compile an empty .c file and have the compiler - * print the predefined macros with - * \code - * gcc -E -dM -x c /dev/null | sort - * \endcode - * (You can provide an actual empty .c file rather than /dev/null. - * <code>-x c++</code> is for C++.) - */ - -/** - * Define some things so that they can be documented. - * @internal - */ -#ifdef U_IN_DOXYGEN -/* - * Problem: "platform.h:335: warning: documentation for unknown define U_HAVE_STD_STRING found." means that U_HAVE_STD_STRING is not documented. - * Solution: #define any defines for non @internal API here, so that they are visible in the docs. If you just set PREDEFINED in Doxyfile.in, they won't be documented. - */ - -/* None for now. */ -#endif - -/** - * \def U_PLATFORM - * The U_PLATFORM macro defines the platform we're on. - * - * We used to define one different, value-less macro per platform. - * That made it hard to know the set of relevant platforms and macros, - * and hard to deal with variants of platforms. - * - * Starting with ICU 49, we define platforms as numeric macros, - * with ranges of values for related platforms and their variants. - * The U_PLATFORM macro is set to one of these values. - * - * Historical note from the Solaris Wikipedia article: - * AT&T and Sun collaborated on a project to merge the most popular Unix variants - * on the market at that time: BSD, System V, and Xenix. - * This became Unix System V Release 4 (SVR4). - * - * @internal - */ - -/** Unknown platform. @internal */ -#define U_PF_UNKNOWN 0 -/** Windows @internal */ -#define U_PF_WINDOWS 1000 -/** MinGW. Windows, calls to Win32 API, but using GNU gcc and binutils. @internal */ -#define U_PF_MINGW 1800 -/** - * Cygwin. Windows, calls to cygwin1.dll for Posix functions, - * using MSVC or GNU gcc and binutils. - * @internal - */ -#define U_PF_CYGWIN 1900 -/* Reserve 2000 for U_PF_UNIX? */ -/** HP-UX is based on UNIX System V. @internal */ -#define U_PF_HPUX 2100 -/** Solaris is a Unix operating system based on SVR4. @internal */ -#define U_PF_SOLARIS 2600 -/** BSD is a UNIX operating system derivative. @internal */ -#define U_PF_BSD 3000 -/** AIX is based on UNIX System V Releases and 4.3 BSD. @internal */ -#define U_PF_AIX 3100 -/** IRIX is based on UNIX System V with BSD extensions. @internal */ -#define U_PF_IRIX 3200 -/** - * Darwin is a POSIX-compliant operating system, composed of code developed by Apple, - * as well as code derived from NeXTSTEP, BSD, and other projects, - * built around the Mach kernel. - * Darwin forms the core set of components upon which Mac OS X, Apple TV, and iOS are based. - * (Original description modified from WikiPedia.) - * @internal - */ -#define U_PF_DARWIN 3500 -/** iPhone OS (iOS) is a derivative of Mac OS X. @internal */ -#define U_PF_IPHONE 3550 -/** QNX is a commercial Unix-like real-time operating system related to BSD. @internal */ -#define U_PF_QNX 3700 -/** Linux is a Unix-like operating system. @internal */ -#define U_PF_LINUX 4000 -/** Android is based on Linux. @internal */ -#define U_PF_ANDROID 4050 -/** "Classic" Mac OS (1984-2001) @internal */ -#define U_PF_CLASSIC_MACOS 8000 -/** z/OS is the successor to OS/390 which was the successor to MVS. @internal */ -#define U_PF_OS390 9000 -/** "IBM i" is the current name of what used to be i5/OS and earlier OS/400. @internal */ -#define U_PF_OS400 9400 - -#ifdef U_PLATFORM - /* Use the predefined value. */ -#elif defined(__MINGW32__) -# define U_PLATFORM U_PF_MINGW -#elif defined(__CYGWIN__) -# define U_PLATFORM U_PF_CYGWIN -#elif defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) -# define U_PLATFORM U_PF_WINDOWS -#elif defined(__ANDROID__) -# define U_PLATFORM U_PF_ANDROID - /* Android wchar_t support depends on the API level. */ -# include <android/api-level.h> -#elif defined(linux) || defined(__linux__) || defined(__linux) -# define U_PLATFORM U_PF_LINUX -#elif defined(__APPLE__) && defined(__MACH__) -# include <TargetConditionals.h> -# if defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE /* variant of TARGET_OS_MAC */ -# define U_PLATFORM U_PF_IPHONE -# else -# define U_PLATFORM U_PF_DARWIN -# endif -#elif defined(BSD) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__MirBSD__) -# define U_PLATFORM U_PF_BSD -#elif defined(sun) || defined(__sun) - /* Check defined(__SVR4) || defined(__svr4__) to distinguish Solaris from SunOS? */ -# define U_PLATFORM U_PF_SOLARIS -# if defined(__GNUC__) - /* Solaris/GCC needs this header file to get the proper endianness. Normally, this - * header file is included with stddef.h but on Solairs/GCC, the GCC version of stddef.h - * is included which does not include this header file. - */ -# include <sys/isa_defs.h> -# endif -#elif defined(_AIX) || defined(__TOS_AIX__) -# define U_PLATFORM U_PF_AIX -#elif defined(_hpux) || defined(hpux) || defined(__hpux) -# define U_PLATFORM U_PF_HPUX -#elif defined(sgi) || defined(__sgi) -# define U_PLATFORM U_PF_IRIX -#elif defined(macintosh) -# define U_PLATFORM U_PF_CLASSIC_MACOS -#elif defined(__QNX__) || defined(__QNXNTO__) -# define U_PLATFORM U_PF_QNX -#elif defined(__TOS_MVS__) -# define U_PLATFORM U_PF_OS390 -#elif defined(__OS400__) || defined(__TOS_OS400__) -# define U_PLATFORM U_PF_OS400 -#else -# define U_PLATFORM U_PF_UNKNOWN -#endif - -/** - * \def CYGWINMSVC - * Defined if this is Windows with Cygwin, but using MSVC rather than gcc. - * Otherwise undefined. - * @internal - */ -/* Commented out because this is already set in mh-cygwin-msvc -#if U_PLATFORM == U_PF_CYGWIN && defined(_MSC_VER) -# define CYGWINMSVC -#endif -*/ - -/** - * \def U_PLATFORM_USES_ONLY_WIN32_API - * Defines whether the platform uses only the Win32 API. - * Set to 1 for Windows/MSVC and MinGW but not Cygwin. - * @internal - */ -#ifdef U_PLATFORM_USES_ONLY_WIN32_API - /* Use the predefined value. */ -#elif (U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_MINGW) || defined(CYGWINMSVC) -# define U_PLATFORM_USES_ONLY_WIN32_API 1 -#else - /* Cygwin implements POSIX. */ -# define U_PLATFORM_USES_ONLY_WIN32_API 0 -#endif - -/** - * \def U_PLATFORM_HAS_WIN32_API - * Defines whether the Win32 API is available on the platform. - * Set to 1 for Windows/MSVC, MinGW and Cygwin. - * @internal - */ -#ifdef U_PLATFORM_HAS_WIN32_API - /* Use the predefined value. */ -#elif U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN -# define U_PLATFORM_HAS_WIN32_API 1 -#else -# define U_PLATFORM_HAS_WIN32_API 0 -#endif - -/** - * \def U_PLATFORM_IMPLEMENTS_POSIX - * Defines whether the platform implements (most of) the POSIX API. - * Set to 1 for Cygwin and most other platforms. - * @internal - */ -#ifdef U_PLATFORM_IMPLEMENTS_POSIX - /* Use the predefined value. */ -#elif U_PLATFORM_USES_ONLY_WIN32_API || U_PLATFORM == U_PF_CLASSIC_MACOS -# define U_PLATFORM_IMPLEMENTS_POSIX 0 -#else -# define U_PLATFORM_IMPLEMENTS_POSIX 1 -#endif - -/** - * \def U_PLATFORM_IS_LINUX_BASED - * Defines whether the platform is Linux or one of its derivatives. - * @internal - */ -#ifdef U_PLATFORM_IS_LINUX_BASED - /* Use the predefined value. */ -#elif U_PF_LINUX <= U_PLATFORM && U_PLATFORM <= U_PF_ANDROID -# define U_PLATFORM_IS_LINUX_BASED 1 -#else -# define U_PLATFORM_IS_LINUX_BASED 0 -#endif - -/** - * \def U_PLATFORM_IS_DARWIN_BASED - * Defines whether the platform is Darwin or one of its derivatives. - * @internal - */ -#ifdef U_PLATFORM_IS_DARWIN_BASED - /* Use the predefined value. */ -#elif U_PF_DARWIN <= U_PLATFORM && U_PLATFORM <= U_PF_IPHONE -# define U_PLATFORM_IS_DARWIN_BASED 1 -#else -# define U_PLATFORM_IS_DARWIN_BASED 0 -#endif - -/** - * \def U_HAVE_STDINT_H - * Defines whether stdint.h is available. It is a C99 standard header. - * We used to include inttypes.h which includes stdint.h but we usually do not need - * the additional definitions from inttypes.h. - * @internal - */ -#ifdef U_HAVE_STDINT_H - /* Use the predefined value. */ -#elif U_PLATFORM_USES_ONLY_WIN32_API -# if defined(__BORLANDC__) || U_PLATFORM == U_PF_MINGW || (defined(_MSC_VER) && _MSC_VER>=1600) - /* Windows Visual Studio 9 and below do not have stdint.h & inttypes.h, but VS 2010 adds them. */ -# define U_HAVE_STDINT_H 1 -# else -# define U_HAVE_STDINT_H 0 -# endif -#elif U_PLATFORM == U_PF_SOLARIS - /* Solaris has inttypes.h but not stdint.h. */ -# define U_HAVE_STDINT_H 0 -#elif U_PLATFORM == U_PF_AIX && !defined(_AIX51) && defined(_POWER) - /* PPC AIX <= 4.3 has inttypes.h but not stdint.h. */ -# define U_HAVE_STDINT_H 0 -#else -# define U_HAVE_STDINT_H 1 -#endif - -/** - * \def U_HAVE_INTTYPES_H - * Defines whether inttypes.h is available. It is a C99 standard header. - * We include inttypes.h where it is available but stdint.h is not. - * @internal - */ -#ifdef U_HAVE_INTTYPES_H - /* Use the predefined value. */ -#elif U_PLATFORM == U_PF_SOLARIS - /* Solaris has inttypes.h but not stdint.h. */ -# define U_HAVE_INTTYPES_H 1 -#elif U_PLATFORM == U_PF_AIX && !defined(_AIX51) && defined(_POWER) - /* PPC AIX <= 4.3 has inttypes.h but not stdint.h. */ -# define U_HAVE_INTTYPES_H 1 -#else - /* Most platforms have both inttypes.h and stdint.h, or neither. */ -# define U_HAVE_INTTYPES_H U_HAVE_STDINT_H -#endif - -/** - * \def U_IOSTREAM_SOURCE - * Defines what support for C++ streams is available. - * - * If U_IOSTREAM_SOURCE is set to 199711, then <iostream> is available - * (the ISO/IEC C++ FDIS was published in November 1997), and then - * one should qualify streams using the std namespace in ICU header - * files. - * Starting with ICU 49, this is the only supported version. - * - * If U_IOSTREAM_SOURCE is set to 198506, then <iostream.h> is - * available instead (in June 1985 Stroustrup published - * "An Extensible I/O Facility for C++" at the summer USENIX conference). - * Starting with ICU 49, this version is not supported any more. - * - * If U_IOSTREAM_SOURCE is 0 (or any value less than 199711), - * then C++ streams are not available and - * support for them will be silently suppressed in ICU. - * - * @internal - */ -#ifndef U_IOSTREAM_SOURCE -#define U_IOSTREAM_SOURCE 199711 -#endif - -/** - * \def U_HAVE_STD_STRING - * Defines whether the standard C++ (STL) <string> header is available. - * @internal - */ -#ifdef U_HAVE_STD_STRING - /* Use the predefined value. */ -#else -# define U_HAVE_STD_STRING 1 -#endif - -/*===========================================================================*/ -/** @{ Compiler and environment features */ -/*===========================================================================*/ - -/** - * \def U_GCC_MAJOR_MINOR - * Indicates whether the compiler is gcc (test for != 0), - * and if so, contains its major (times 100) and minor version numbers. - * If the compiler is not gcc, then U_GCC_MAJOR_MINOR == 0. - * - * For example, for testing for whether we have gcc, and whether it's 4.6 or higher, - * use "#if U_GCC_MAJOR_MINOR >= 406". - * @internal - */ -#ifdef __GNUC__ -# define U_GCC_MAJOR_MINOR (__GNUC__ * 100 + __GNUC_MINOR__) -#else -# define U_GCC_MAJOR_MINOR 0 -#endif - -/** - * \def U_IS_BIG_ENDIAN - * Determines the endianness of the platform. - * @internal - */ -#ifdef U_IS_BIG_ENDIAN - /* Use the predefined value. */ -#elif defined(BYTE_ORDER) && defined(BIG_ENDIAN) -# define U_IS_BIG_ENDIAN (BYTE_ORDER == BIG_ENDIAN) -#elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) - /* gcc */ -# define U_IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) -#elif defined(__BIG_ENDIAN__) || defined(_BIG_ENDIAN) -# define U_IS_BIG_ENDIAN 1 -#elif defined(__LITTLE_ENDIAN__) || defined(_LITTLE_ENDIAN) -# define U_IS_BIG_ENDIAN 0 -#elif U_PLATFORM == U_PF_OS390 || U_PLATFORM == U_PF_OS400 || defined(__s390__) || defined(__s390x__) - /* These platforms do not appear to predefine any endianness macros. */ -# define U_IS_BIG_ENDIAN 1 -#elif defined(_PA_RISC1_0) || defined(_PA_RISC1_1) || defined(_PA_RISC2_0) - /* HPPA do not appear to predefine any endianness macros. */ -# define U_IS_BIG_ENDIAN 1 -#elif defined(sparc) || defined(__sparc) || defined(__sparc__) - /* Some sparc based systems (e.g. Linux) do not predefine any endianness macros. */ -# define U_IS_BIG_ENDIAN 1 -#else -# define U_IS_BIG_ENDIAN 0 -#endif - -/** - * \def U_HAVE_PLACEMENT_NEW - * Determines whether to override placement new and delete for STL. - * @stable ICU 2.6 - */ -#ifdef U_HAVE_PLACEMENT_NEW - /* Use the predefined value. */ -#elif defined(__BORLANDC__) -# define U_HAVE_PLACEMENT_NEW 0 -#else -# define U_HAVE_PLACEMENT_NEW 1 -#endif - -/** - * \def U_HAVE_DEBUG_LOCATION_NEW - * Define this to define the MFC debug version of the operator new. - * - * @stable ICU 3.4 - */ -#ifdef U_HAVE_DEBUG_LOCATION_NEW - /* Use the predefined value. */ -#elif defined(_MSC_VER) -# define U_HAVE_DEBUG_LOCATION_NEW 1 -#else -# define U_HAVE_DEBUG_LOCATION_NEW 0 -#endif - -/* Compatibility with non clang compilers */ -#ifndef __has_attribute -# define __has_attribute(x) 0 -#endif - -/** - * \def U_MALLOC_ATTR - * Attribute to mark functions as malloc-like - * @internal - */ -#if defined(__GNUC__) && __GNUC__>=3 -# define U_MALLOC_ATTR __attribute__ ((__malloc__)) -#else -# define U_MALLOC_ATTR -#endif - -/** - * \def U_ALLOC_SIZE_ATTR - * Attribute to specify the size of the allocated buffer for malloc-like functions - * @internal - */ -#if (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) || __has_attribute(alloc_size) -# define U_ALLOC_SIZE_ATTR(X) __attribute__ ((alloc_size(X))) -# define U_ALLOC_SIZE_ATTR2(X,Y) __attribute__ ((alloc_size(X,Y))) -#else -# define U_ALLOC_SIZE_ATTR(X) -# define U_ALLOC_SIZE_ATTR2(X,Y) -#endif - -/** @} */ - -/*===========================================================================*/ -/** @{ Character data types */ -/*===========================================================================*/ - -/** - * U_CHARSET_FAMILY is equal to this value when the platform is an ASCII based platform. - * @stable ICU 2.0 - */ -#define U_ASCII_FAMILY 0 - -/** - * U_CHARSET_FAMILY is equal to this value when the platform is an EBCDIC based platform. - * @stable ICU 2.0 - */ -#define U_EBCDIC_FAMILY 1 - -/** - * \def U_CHARSET_FAMILY - * - * <p>These definitions allow to specify the encoding of text - * in the char data type as defined by the platform and the compiler. - * It is enough to determine the code point values of "invariant characters", - * which are the ones shared by all encodings that are in use - * on a given platform.</p> - * - * <p>Those "invariant characters" should be all the uppercase and lowercase - * latin letters, the digits, the space, and "basic punctuation". - * Also, '\\n', '\\r', '\\t' should be available.</p> - * - * <p>The list of "invariant characters" is:<br> - * \code - * A-Z a-z 0-9 SPACE " % & ' ( ) * + , - . / : ; < = > ? _ - * \endcode - * <br> - * (52 letters + 10 numbers + 20 punc/sym/space = 82 total)</p> - * - * <p>This matches the IBM Syntactic Character Set (CS 640).</p> - * - * <p>In other words, all the graphic characters in 7-bit ASCII should - * be safely accessible except the following:</p> - * - * \code - * '\' <backslash> - * '[' <left bracket> - * ']' <right bracket> - * '{' <left brace> - * '}' <right brace> - * '^' <circumflex> - * '~' <tilde> - * '!' <exclamation mark> - * '#' <number sign> - * '|' <vertical line> - * '$' <dollar sign> - * '@' <commercial at> - * '`' <grave accent> - * \endcode - * @stable ICU 2.0 - */ -#ifdef U_CHARSET_FAMILY - /* Use the predefined value. */ -#elif U_PLATFORM == U_PF_OS390 && (!defined(__CHARSET_LIB) || !__CHARSET_LIB) -# define U_CHARSET_FAMILY U_EBCDIC_FAMILY -#elif U_PLATFORM == U_PF_OS400 && !defined(__UTF32__) -# define U_CHARSET_FAMILY U_EBCDIC_FAMILY -#else -# define U_CHARSET_FAMILY U_ASCII_FAMILY -#endif - -/** - * \def U_CHARSET_IS_UTF8 - * - * Hardcode the default charset to UTF-8. - * - * If this is set to 1, then - * - ICU will assume that all non-invariant char*, StringPiece, std::string etc. - * contain UTF-8 text, regardless of what the system API uses - * - some ICU code will use fast functions like u_strFromUTF8() - * rather than the more general and more heavy-weight conversion API (ucnv.h) - * - ucnv_getDefaultName() always returns "UTF-8" - * - ucnv_setDefaultName() is disabled and will not change the default charset - * - static builds of ICU are smaller - * - more functionality is available with the UCONFIG_NO_CONVERSION build-time - * configuration option (see unicode/uconfig.h) - * - the UCONFIG_NO_CONVERSION build option in uconfig.h is more usable - * - * @stable ICU 4.2 - * @see UCONFIG_NO_CONVERSION - */ -#ifdef U_CHARSET_IS_UTF8 - /* Use the predefined value. */ -#elif U_PLATFORM == U_PF_ANDROID || U_PLATFORM_IS_DARWIN_BASED -# define U_CHARSET_IS_UTF8 1 -#else -# define U_CHARSET_IS_UTF8 0 -#endif - -/** @} */ - -/*===========================================================================*/ -/** @{ Information about wchar support */ -/*===========================================================================*/ - -/** - * \def U_HAVE_WCHAR_H - * Indicates whether <wchar.h> is available (1) or not (0). Set to 1 by default. - * - * @stable ICU 2.0 - */ -#ifdef U_HAVE_WCHAR_H - /* Use the predefined value. */ -#elif U_PLATFORM == U_PF_ANDROID && __ANDROID_API__ < 9 - /* - * Android before Gingerbread (Android 2.3, API level 9) did not support wchar_t. - * The type and header existed, but the library functions did not work as expected. - * The size of wchar_t was 1 but L"xyz" string literals had 32-bit units anyway. - */ -# define U_HAVE_WCHAR_H 0 -#else -# define U_HAVE_WCHAR_H 1 -#endif - -/** - * \def U_SIZEOF_WCHAR_T - * U_SIZEOF_WCHAR_T==sizeof(wchar_t) - * - * @stable ICU 2.0 - */ -#ifdef U_SIZEOF_WCHAR_T - /* Use the predefined value. */ -#elif (U_PLATFORM == U_PF_ANDROID && __ANDROID_API__ < 9) || U_PLATFORM == U_PF_CLASSIC_MACOS - /* - * Classic Mac OS and Mac OS X before 10.3 (Panther) did not support wchar_t or wstring. - * Newer Mac OS X has size 4. - */ -# define U_SIZEOF_WCHAR_T 1 -#elif U_PLATFORM_HAS_WIN32_API || U_PLATFORM == U_PF_CYGWIN -# define U_SIZEOF_WCHAR_T 2 -#elif U_PLATFORM == U_PF_AIX - /* - * AIX 6.1 information, section "Wide character data representation": - * "... the wchar_t datatype is 32-bit in the 64-bit environment and - * 16-bit in the 32-bit environment." - * and - * "All locales use Unicode for their wide character code values (process code), - * except the IBM-eucTW codeset." - */ -# ifdef __64BIT__ -# define U_SIZEOF_WCHAR_T 4 -# else -# define U_SIZEOF_WCHAR_T 2 -# endif -#elif U_PLATFORM == U_PF_OS390 - /* - * z/OS V1R11 information center, section "LP64 | ILP32": - * "In 31-bit mode, the size of long and pointers is 4 bytes and the size of wchar_t is 2 bytes. - * Under LP64, the size of long and pointer is 8 bytes and the size of wchar_t is 4 bytes." - */ -# ifdef _LP64 -# define U_SIZEOF_WCHAR_T 4 -# else -# define U_SIZEOF_WCHAR_T 2 -# endif -#elif U_PLATFORM == U_PF_OS400 -# if defined(__UTF32__) - /* - * LOCALETYPE(*LOCALEUTF) is specified. - * Wide-character strings are in UTF-32, - * narrow-character strings are in UTF-8. - */ -# define U_SIZEOF_WCHAR_T 4 -# elif defined(__UCS2__) - /* - * LOCALETYPE(*LOCALEUCS2) is specified. - * Wide-character strings are in UCS-2, - * narrow-character strings are in EBCDIC. - */ -# define U_SIZEOF_WCHAR_T 2 -#else - /* - * LOCALETYPE(*CLD) or LOCALETYPE(*LOCALE) is specified. - * Wide-character strings are in 16-bit EBCDIC, - * narrow-character strings are in EBCDIC. - */ -# define U_SIZEOF_WCHAR_T 2 -# endif -#else -# define U_SIZEOF_WCHAR_T 4 -#endif - -#ifndef U_HAVE_WCSCPY -#define U_HAVE_WCSCPY U_HAVE_WCHAR_H -#endif - -/** @} */ - -/** - * \def U_HAVE_CHAR16_T - * Defines whether the char16_t type is available for UTF-16 - * and u"abc" UTF-16 string literals are supported. - * This is a new standard type and standard string literal syntax in C++0x - * but has been available in some compilers before. - * @internal - */ -#ifdef U_HAVE_CHAR16_T - /* Use the predefined value. */ -#else - /* - * Notes: - * Visual Studio 10 (_MSC_VER>=1600) defines char16_t but - * does not support u"abc" string literals. - * gcc 4.4 defines the __CHAR16_TYPE__ macro to a usable type but - * does not support u"abc" string literals. - * C++11 and C11 require support for UTF-16 literals - */ -# if (defined(__cplusplus) && __cplusplus >= 201103L) || (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) -# define U_HAVE_CHAR16_T 1 -# else -# define U_HAVE_CHAR16_T 0 -# endif -#endif - -/** - * @{ - * \def U_DECLARE_UTF16 - * Do not use this macro because it is not defined on all platforms. - * Use the UNICODE_STRING or U_STRING_DECL macros instead. - * @internal - */ -#ifdef U_DECLARE_UTF16 - /* Use the predefined value. */ -#elif U_HAVE_CHAR16_T \ - || (defined(__xlC__) && defined(__IBM_UTF_LITERAL) && U_SIZEOF_WCHAR_T != 2) \ - || (defined(__HP_aCC) && __HP_aCC >= 035000) \ - || (defined(__HP_cc) && __HP_cc >= 111106) -# define U_DECLARE_UTF16(string) u ## string -#elif U_SIZEOF_WCHAR_T == 2 \ - && (U_CHARSET_FAMILY == 0 || (U_PF_OS390 <= U_PLATFORM && U_PLATFORM <= U_PF_OS400 && defined(__UCS2__))) -# define U_DECLARE_UTF16(string) L ## string -#else - /* Leave U_DECLARE_UTF16 undefined. See unistr.h. */ -#endif - -/** @} */ - -/*===========================================================================*/ -/** @{ Symbol import-export control */ -/*===========================================================================*/ - -#ifdef U_EXPORT - /* Use the predefined value. */ -#elif defined(U_STATIC_IMPLEMENTATION) -# define U_EXPORT -#elif defined(__GNUC__) -# define U_EXPORT __attribute__((visibility("default"))) -#elif (defined(__SUNPRO_CC) && __SUNPRO_CC >= 0x550) \ - || (defined(__SUNPRO_C) && __SUNPRO_C >= 0x550) -# define U_EXPORT __global -/*#elif defined(__HP_aCC) || defined(__HP_cc) -# define U_EXPORT __declspec(dllexport)*/ -#elif defined(_MSC_VER) -# define U_EXPORT __declspec(dllexport) -#else -# define U_EXPORT -#endif - -/* U_CALLCONV is releated to U_EXPORT2 */ -#ifdef U_EXPORT2 - /* Use the predefined value. */ -#elif defined(_MSC_VER) -# define U_EXPORT2 __cdecl -#else -# define U_EXPORT2 -#endif - -#ifdef U_IMPORT - /* Use the predefined value. */ -#elif defined(_MSC_VER) - /* Windows needs to export/import data. */ -# define U_IMPORT __declspec(dllimport) -#else -# define U_IMPORT -#endif - -/** - * \def U_CALLCONV - * Similar to U_CDECL_BEGIN/U_CDECL_END, this qualifier is necessary - * in callback function typedefs to make sure that the calling convention - * is compatible. - * - * This is only used for non-ICU-API functions. - * When a function is a public ICU API, - * you must use the U_CAPI and U_EXPORT2 qualifiers. - * @stable ICU 2.0 - */ -#if U_PLATFORM == U_PF_OS390 && defined(__cplusplus) -# define U_CALLCONV __cdecl -#else -# define U_CALLCONV U_EXPORT2 -#endif - -/* @} */ - -#endif diff --git a/Source/WTF/icu/unicode/ptypes.h b/Source/WTF/icu/unicode/ptypes.h deleted file mode 100644 index b7f711603..000000000 --- a/Source/WTF/icu/unicode/ptypes.h +++ /dev/null @@ -1,126 +0,0 @@ -/* -****************************************************************************** -* -* Copyright (C) 1997-2012, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* -* FILE NAME : ptypes.h -* -* Date Name Description -* 05/13/98 nos Creation (content moved here from ptypes.h). -* 03/02/99 stephen Added AS400 support. -* 03/30/99 stephen Added Linux support. -* 04/13/99 stephen Reworked for autoconf. -* 09/18/08 srl Moved basic types back to ptypes.h from platform.h -****************************************************************************** -*/ - -/** - * \file - * \brief C API: Definitions of integer types of various widths - */ - -#ifndef _PTYPES_H -#define _PTYPES_H - -/** - * \def __STDC_LIMIT_MACROS - * According to the Linux stdint.h, the ISO C99 standard specifies that in C++ implementations - * macros like INT32_MIN and UINTPTR_MAX should only be defined if explicitly requested. - * We need to define __STDC_LIMIT_MACROS before including stdint.h in C++ code - * that uses such limit macros. - * @internal - */ -#ifndef __STDC_LIMIT_MACROS -#define __STDC_LIMIT_MACROS -#endif - -/* NULL, size_t, wchar_t */ -#include <stddef.h> - -/* - * If all compilers provided all of the C99 headers and types, - * we would just unconditionally #include <stdint.h> here - * and not need any of the stuff after including platform.h. - */ - -/* Find out if we have stdint.h etc. */ -#include "unicode/platform.h" - -/*===========================================================================*/ -/* Generic data types */ -/*===========================================================================*/ - -/* If your platform does not have the <stdint.h> header, you may - need to edit the typedefs in the #else section below. - Use #if...#else...#endif with predefined compiler macros if possible. */ -#if U_HAVE_STDINT_H - -/* - * We mostly need <stdint.h> (which defines the standard integer types) but not <inttypes.h>. - * <inttypes.h> includes <stdint.h> and adds the printf/scanf helpers PRId32, SCNx16 etc. - * which we almost never use, plus stuff like imaxabs() which we never use. - */ -#include <stdint.h> - -#if U_PLATFORM == U_PF_OS390 -/* The features header is needed to get (u)int64_t sometimes. */ -#include <features.h> -/* z/OS has <stdint.h>, but some versions are missing uint8_t (APAR PK62248). */ -#if !defined(__uint8_t) -#define __uint8_t 1 -typedef unsigned char uint8_t; -#endif -#endif /* U_PLATFORM == U_PF_OS390 */ - -#elif U_HAVE_INTTYPES_H - -# include <inttypes.h> - -#else /* neither U_HAVE_STDINT_H nor U_HAVE_INTTYPES_H */ - -#if ! U_HAVE_INT8_T -typedef signed char int8_t; -#endif - -#if ! U_HAVE_UINT8_T -typedef unsigned char uint8_t; -#endif - -#if ! U_HAVE_INT16_T -typedef signed short int16_t; -#endif - -#if ! U_HAVE_UINT16_T -typedef unsigned short uint16_t; -#endif - -#if ! U_HAVE_INT32_T -typedef signed int int32_t; -#endif - -#if ! U_HAVE_UINT32_T -typedef unsigned int uint32_t; -#endif - -#if ! U_HAVE_INT64_T -#ifdef _MSC_VER - typedef signed __int64 int64_t; -#else - typedef signed long long int64_t; -#endif -#endif - -#if ! U_HAVE_UINT64_T -#ifdef _MSC_VER - typedef unsigned __int64 uint64_t; -#else - typedef unsigned long long uint64_t; -#endif -#endif - -#endif /* U_HAVE_STDINT_H / U_HAVE_INTTYPES_H */ - -#endif /* _PTYPES_H */ diff --git a/Source/WTF/icu/unicode/putil.h b/Source/WTF/icu/unicode/putil.h deleted file mode 100644 index 6fc7e9cd5..000000000 --- a/Source/WTF/icu/unicode/putil.h +++ /dev/null @@ -1,164 +0,0 @@ -/* -****************************************************************************** -* -* Copyright (C) 1997-2011, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* -* FILE NAME : putil.h -* -* Date Name Description -* 05/14/98 nos Creation (content moved here from utypes.h). -* 06/17/99 erm Added IEEE_754 -* 07/22/98 stephen Added IEEEremainder, max, min, trunc -* 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity -* 08/24/98 stephen Added longBitsFromDouble -* 03/02/99 stephen Removed openFile(). Added AS400 support. -* 04/15/99 stephen Converted to C -* 11/15/99 helena Integrated S/390 changes for IEEE support. -* 01/11/00 helena Added u_getVersion. -****************************************************************************** -*/ - -#ifndef PUTIL_H -#define PUTIL_H - -#include "unicode/utypes.h" - /** - * \file - * \brief C API: Platform Utilities - */ - -/*==========================================================================*/ -/* Platform utilities */ -/*==========================================================================*/ - -/** - * Platform utilities isolates the platform dependencies of the - * libarary. For each platform which this code is ported to, these - * functions may have to be re-implemented. - */ - -/** - * Return the ICU data directory. - * The data directory is where common format ICU data files (.dat files) - * are loaded from. Note that normal use of the built-in ICU - * facilities does not require loading of an external data file; - * unless you are adding custom data to ICU, the data directory - * does not need to be set. - * - * The data directory is determined as follows: - * If u_setDataDirectory() has been called, that is it, otherwise - * if the ICU_DATA environment variable is set, use that, otherwise - * If a data directory was specifed at ICU build time - * <code> - * \code - * #define ICU_DATA_DIR "path" - * \endcode - * </code> use that, - * otherwise no data directory is available. - * - * @return the data directory, or an empty string ("") if no data directory has - * been specified. - * - * @stable ICU 2.0 - */ -U_STABLE const char* U_EXPORT2 u_getDataDirectory(void); - -/** - * Set the ICU data directory. - * The data directory is where common format ICU data files (.dat files) - * are loaded from. Note that normal use of the built-in ICU - * facilities does not require loading of an external data file; - * unless you are adding custom data to ICU, the data directory - * does not need to be set. - * - * This function should be called at most once in a process, before the - * first ICU operation (e.g., u_init()) that will require the loading of an - * ICU data file. - * This function is not thread-safe. Use it before calling ICU APIs from - * multiple threads. - * - * @param directory The directory to be set. - * - * @see u_init - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 u_setDataDirectory(const char *directory); - -/** - * @{ - * Filesystem file and path separator characters. - * Example: '/' and ':' on Unix, '\\' and ';' on Windows. - * @stable ICU 2.0 - */ -#if U_PLATFORM == U_PF_CLASSIC_MACOS -# define U_FILE_SEP_CHAR ':' -# define U_FILE_ALT_SEP_CHAR ':' -# define U_PATH_SEP_CHAR ';' -# define U_FILE_SEP_STRING ":" -# define U_FILE_ALT_SEP_STRING ":" -# define U_PATH_SEP_STRING ";" -#elif U_PLATFORM_USES_ONLY_WIN32_API -# define U_FILE_SEP_CHAR '\\' -# define U_FILE_ALT_SEP_CHAR '/' -# define U_PATH_SEP_CHAR ';' -# define U_FILE_SEP_STRING "\\" -# define U_FILE_ALT_SEP_STRING "/" -# define U_PATH_SEP_STRING ";" -#else -# define U_FILE_SEP_CHAR '/' -# define U_FILE_ALT_SEP_CHAR '/' -# define U_PATH_SEP_CHAR ':' -# define U_FILE_SEP_STRING "/" -# define U_FILE_ALT_SEP_STRING "/" -# define U_PATH_SEP_STRING ":" -#endif - -/** @} */ - -/** - * Convert char characters to UChar characters. - * This utility function is useful only for "invariant characters" - * that are encoded in the platform default encoding. - * They are a small, constant subset of the encoding and include - * just the latin letters, digits, and some punctuation. - * For details, see U_CHARSET_FAMILY. - * - * @param cs Input string, points to <code>length</code> - * character bytes from a subset of the platform encoding. - * @param us Output string, points to memory for <code>length</code> - * Unicode characters. - * @param length The number of characters to convert; this may - * include the terminating <code>NUL</code>. - * - * @see U_CHARSET_FAMILY - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -u_charsToUChars(const char *cs, UChar *us, int32_t length); - -/** - * Convert UChar characters to char characters. - * This utility function is useful only for "invariant characters" - * that can be encoded in the platform default encoding. - * They are a small, constant subset of the encoding and include - * just the latin letters, digits, and some punctuation. - * For details, see U_CHARSET_FAMILY. - * - * @param us Input string, points to <code>length</code> - * Unicode characters that can be encoded with the - * codepage-invariant subset of the platform encoding. - * @param cs Output string, points to memory for <code>length</code> - * character bytes. - * @param length The number of characters to convert; this may - * include the terminating <code>NUL</code>. - * - * @see U_CHARSET_FAMILY - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -u_UCharsToChars(const UChar *us, char *cs, int32_t length); - -#endif diff --git a/Source/WTF/icu/unicode/rep.h b/Source/WTF/icu/unicode/rep.h deleted file mode 100644 index 4c7eae140..000000000 --- a/Source/WTF/icu/unicode/rep.h +++ /dev/null @@ -1,261 +0,0 @@ -/* -************************************************************************** -* Copyright (C) 1999-2012, International Business Machines Corporation and -* others. All Rights Reserved. -************************************************************************** -* Date Name Description -* 11/17/99 aliu Creation. Ported from java. Modified to -* match current UnicodeString API. Forced -* to use name "handleReplaceBetween" because -* of existing methods in UnicodeString. -************************************************************************** -*/ - -#ifndef REP_H -#define REP_H - -#include "unicode/uobject.h" - -/** - * \file - * \brief C++ API: Replaceable String - */ - -U_NAMESPACE_BEGIN - -class UnicodeString; - -/** - * <code>Replaceable</code> is an abstract base class representing a - * string of characters that supports the replacement of a range of - * itself with a new string of characters. It is used by APIs that - * change a piece of text while retaining metadata. Metadata is data - * other than the Unicode characters returned by char32At(). One - * example of metadata is style attributes; another is an edit - * history, marking each character with an author and revision number. - * - * <p>An implicit aspect of the <code>Replaceable</code> API is that - * during a replace operation, new characters take on the metadata of - * the old characters. For example, if the string "the <b>bold</b> - * font" has range (4, 8) replaced with "strong", then it becomes "the - * <b>strong</b> font". - * - * <p><code>Replaceable</code> specifies ranges using a start - * offset and a limit offset. The range of characters thus specified - * includes the characters at offset start..limit-1. That is, the - * start offset is inclusive, and the limit offset is exclusive. - * - * <p><code>Replaceable</code> also includes API to access characters - * in the string: <code>length()</code>, <code>charAt()</code>, - * <code>char32At()</code>, and <code>extractBetween()</code>. - * - * <p>For a subclass to support metadata, typical behavior of - * <code>replace()</code> is the following: - * <ul> - * <li>Set the metadata of the new text to the metadata of the first - * character replaced</li> - * <li>If no characters are replaced, use the metadata of the - * previous character</li> - * <li>If there is no previous character (i.e. start == 0), use the - * following character</li> - * <li>If there is no following character (i.e. the replaceable was - * empty), use default metadata.<br> - * <li>If the code point U+FFFF is seen, it should be interpreted as - * a special marker having no metadata<li> - * </li> - * </ul> - * If this is not the behavior, the subclass should document any differences. - * @author Alan Liu - * @stable ICU 2.0 - */ -class U_COMMON_API Replaceable : public UObject { - -public: - /** - * Destructor. - * @stable ICU 2.0 - */ - virtual ~Replaceable(); - - /** - * Returns the number of 16-bit code units in the text. - * @return number of 16-bit code units in text - * @stable ICU 1.8 - */ - inline int32_t length() const; - - /** - * Returns the 16-bit code unit at the given offset into the text. - * @param offset an integer between 0 and <code>length()</code>-1 - * inclusive - * @return 16-bit code unit of text at given offset - * @stable ICU 1.8 - */ - inline UChar charAt(int32_t offset) const; - - /** - * Returns the 32-bit code point at the given 16-bit offset into - * the text. This assumes the text is stored as 16-bit code units - * with surrogate pairs intermixed. If the offset of a leading or - * trailing code unit of a surrogate pair is given, return the - * code point of the surrogate pair. - * - * @param offset an integer between 0 and <code>length()</code>-1 - * inclusive - * @return 32-bit code point of text at given offset - * @stable ICU 1.8 - */ - inline UChar32 char32At(int32_t offset) const; - - /** - * Copies characters in the range [<tt>start</tt>, <tt>limit</tt>) - * into the UnicodeString <tt>target</tt>. - * @param start offset of first character which will be copied - * @param limit offset immediately following the last character to - * be copied - * @param target UnicodeString into which to copy characters. - * @return A reference to <TT>target</TT> - * @stable ICU 2.1 - */ - virtual void extractBetween(int32_t start, - int32_t limit, - UnicodeString& target) const = 0; - - /** - * Replaces a substring of this object with the given text. If the - * characters being replaced have metadata, the new characters - * that replace them should be given the same metadata. - * - * <p>Subclasses must ensure that if the text between start and - * limit is equal to the replacement text, that replace has no - * effect. That is, any metadata - * should be unaffected. In addition, subclasses are encouraged to - * check for initial and trailing identical characters, and make a - * smaller replacement if possible. This will preserve as much - * metadata as possible. - * @param start the beginning index, inclusive; <code>0 <= start - * <= limit</code>. - * @param limit the ending index, exclusive; <code>start <= limit - * <= length()</code>. - * @param text the text to replace characters <code>start</code> - * to <code>limit - 1</code> - * @stable ICU 2.0 - */ - virtual void handleReplaceBetween(int32_t start, - int32_t limit, - const UnicodeString& text) = 0; - // Note: All other methods in this class take the names of - // existing UnicodeString methods. This method is the exception. - // It is named differently because all replace methods of - // UnicodeString return a UnicodeString&. The 'between' is - // required in order to conform to the UnicodeString naming - // convention; API taking start/length are named <operation>, and - // those taking start/limit are named <operationBetween>. The - // 'handle' is added because 'replaceBetween' and - // 'doReplaceBetween' are already taken. - - /** - * Copies a substring of this object, retaining metadata. - * This method is used to duplicate or reorder substrings. - * The destination index must not overlap the source range. - * - * @param start the beginning index, inclusive; <code>0 <= start <= - * limit</code>. - * @param limit the ending index, exclusive; <code>start <= limit <= - * length()</code>. - * @param dest the destination index. The characters from - * <code>start..limit-1</code> will be copied to <code>dest</code>. - * Implementations of this method may assume that <code>dest <= start || - * dest >= limit</code>. - * @stable ICU 2.0 - */ - virtual void copy(int32_t start, int32_t limit, int32_t dest) = 0; - - /** - * Returns true if this object contains metadata. If a - * Replaceable object has metadata, calls to the Replaceable API - * must be made so as to preserve metadata. If it does not, calls - * to the Replaceable API may be optimized to improve performance. - * The default implementation returns true. - * @return true if this object contains metadata - * @stable ICU 2.2 - */ - virtual UBool hasMetaData() const; - - /** - * Clone this object, an instance of a subclass of Replaceable. - * Clones can be used concurrently in multiple threads. - * If a subclass does not implement clone(), or if an error occurs, - * then NULL is returned. - * The clone functions in all subclasses return a pointer to a Replaceable - * because some compilers do not support covariant (same-as-this) - * return types; cast to the appropriate subclass if necessary. - * The caller must delete the clone. - * - * @return a clone of this object - * - * @see getDynamicClassID - * @stable ICU 2.6 - */ - virtual Replaceable *clone() const; - -protected: - - /** - * Default constructor. - * @stable ICU 2.4 - */ - inline Replaceable(); - - /* - * Assignment operator not declared. The compiler will provide one - * which does nothing since this class does not contain any data members. - * API/code coverage may show the assignment operator as present and - * untested - ignore. - * Subclasses need this assignment operator if they use compiler-provided - * assignment operators of their own. An alternative to not declaring one - * here would be to declare and empty-implement a protected or public one. - Replaceable &Replaceable::operator=(const Replaceable &); - */ - - /** - * Virtual version of length(). - * @stable ICU 2.4 - */ - virtual int32_t getLength() const = 0; - - /** - * Virtual version of charAt(). - * @stable ICU 2.4 - */ - virtual UChar getCharAt(int32_t offset) const = 0; - - /** - * Virtual version of char32At(). - * @stable ICU 2.4 - */ - virtual UChar32 getChar32At(int32_t offset) const = 0; -}; - -inline Replaceable::Replaceable() {} - -inline int32_t -Replaceable::length() const { - return getLength(); -} - -inline UChar -Replaceable::charAt(int32_t offset) const { - return getCharAt(offset); -} - -inline UChar32 -Replaceable::char32At(int32_t offset) const { - return getChar32At(offset); -} - -// There is no rep.cpp, see unistr.cpp for Replaceable function implementations. - -U_NAMESPACE_END - -#endif diff --git a/Source/WTF/icu/unicode/std_string.h b/Source/WTF/icu/unicode/std_string.h deleted file mode 100644 index 67b1d6c5a..000000000 --- a/Source/WTF/icu/unicode/std_string.h +++ /dev/null @@ -1,34 +0,0 @@ -/* -******************************************************************************* -* -* Copyright (C) 2009-2011, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: std_string.h -* encoding: US-ASCII -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2009feb19 -* created by: Markus W. Scherer -*/ - -#ifndef __STD_STRING_H__ -#define __STD_STRING_H__ - -/** - * \file - * \brief C++ API: Central ICU header for including the C++ standard <string> - * header and for related definitions. - */ - -#include "unicode/utypes.h" - -#if U_HAVE_STD_STRING - -#include <string> - -#endif // U_HAVE_STD_STRING - -#endif // __STD_STRING_H__ diff --git a/Source/WTF/icu/unicode/strenum.h b/Source/WTF/icu/unicode/strenum.h deleted file mode 100644 index 3dbe21c6b..000000000 --- a/Source/WTF/icu/unicode/strenum.h +++ /dev/null @@ -1,276 +0,0 @@ -/* -******************************************************************************* -* -* Copyright (C) 2002-2012, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -*/ - -#ifndef STRENUM_H -#define STRENUM_H - -#include "unicode/uobject.h" -#include "unicode/unistr.h" - -/** - * \file - * \brief C++ API: String Enumeration - */ - -U_NAMESPACE_BEGIN - -/** - * Base class for 'pure' C++ implementations of uenum api. Adds a - * method that returns the next UnicodeString since in C++ this can - * be a common storage format for strings. - * - * <p>The model is that the enumeration is over strings maintained by - * a 'service.' At any point, the service might change, invalidating - * the enumerator (though this is expected to be rare). The iterator - * returns an error if this has occurred. Lack of the error is no - * guarantee that the service didn't change immediately after the - * call, so the returned string still might not be 'valid' on - * subsequent use.</p> - * - * <p>Strings may take the form of const char*, const UChar*, or const - * UnicodeString*. The type you get is determine by the variant of - * 'next' that you call. In general the StringEnumeration is - * optimized for one of these types, but all StringEnumerations can - * return all types. Returned strings are each terminated with a NUL. - * Depending on the service data, they might also include embedded NUL - * characters, so API is provided to optionally return the true - * length, counting the embedded NULs but not counting the terminating - * NUL.</p> - * - * <p>The pointers returned by next, unext, and snext become invalid - * upon any subsequent call to the enumeration's destructor, next, - * unext, snext, or reset.</p> - * - * ICU 2.8 adds some default implementations and helper functions - * for subclasses. - * - * @stable ICU 2.4 - */ -class U_COMMON_API StringEnumeration : public UObject { -public: - /** - * Destructor. - * @stable ICU 2.4 - */ - virtual ~StringEnumeration(); - - /** - * Clone this object, an instance of a subclass of StringEnumeration. - * Clones can be used concurrently in multiple threads. - * If a subclass does not implement clone(), or if an error occurs, - * then NULL is returned. - * The clone functions in all subclasses return a base class pointer - * because some compilers do not support covariant (same-as-this) - * return types; cast to the appropriate subclass if necessary. - * The caller must delete the clone. - * - * @return a clone of this object - * - * @see getDynamicClassID - * @stable ICU 2.8 - */ - virtual StringEnumeration *clone() const; - - /** - * <p>Return the number of elements that the iterator traverses. If - * the iterator is out of sync with its service, status is set to - * U_ENUM_OUT_OF_SYNC_ERROR, and the return value is zero.</p> - * - * <p>The return value will not change except possibly as a result of - * a subsequent call to reset, or if the iterator becomes out of sync.</p> - * - * <p>This is a convenience function. It can end up being very - * expensive as all the items might have to be pre-fetched - * (depending on the storage format of the data being - * traversed).</p> - * - * @param status the error code. - * @return number of elements in the iterator. - * - * @stable ICU 2.4 */ - virtual int32_t count(UErrorCode& status) const = 0; - - /** - * <p>Returns the next element as a NUL-terminated char*. If there - * are no more elements, returns NULL. If the resultLength pointer - * is not NULL, the length of the string (not counting the - * terminating NUL) is returned at that address. If an error - * status is returned, the value at resultLength is undefined.</p> - * - * <p>The returned pointer is owned by this iterator and must not be - * deleted by the caller. The pointer is valid until the next call - * to next, unext, snext, reset, or the enumerator's destructor.</p> - * - * <p>If the iterator is out of sync with its service, status is set - * to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.</p> - * - * <p>If the native service string is a UChar* string, it is - * converted to char* with the invariant converter. If the - * conversion fails (because a character cannot be converted) then - * status is set to U_INVARIANT_CONVERSION_ERROR and the return - * value is undefined (though not NULL).</p> - * - * Starting with ICU 2.8, the default implementation calls snext() - * and handles the conversion. - * Either next() or snext() must be implemented differently by a subclass. - * - * @param status the error code. - * @param resultLength a pointer to receive the length, can be NULL. - * @return a pointer to the string, or NULL. - * - * @stable ICU 2.4 - */ - virtual const char* next(int32_t *resultLength, UErrorCode& status); - - /** - * <p>Returns the next element as a NUL-terminated UChar*. If there - * are no more elements, returns NULL. If the resultLength pointer - * is not NULL, the length of the string (not counting the - * terminating NUL) is returned at that address. If an error - * status is returned, the value at resultLength is undefined.</p> - * - * <p>The returned pointer is owned by this iterator and must not be - * deleted by the caller. The pointer is valid until the next call - * to next, unext, snext, reset, or the enumerator's destructor.</p> - * - * <p>If the iterator is out of sync with its service, status is set - * to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.</p> - * - * Starting with ICU 2.8, the default implementation calls snext() - * and handles the conversion. - * - * @param status the error code. - * @param resultLength a ponter to receive the length, can be NULL. - * @return a pointer to the string, or NULL. - * - * @stable ICU 2.4 - */ - virtual const UChar* unext(int32_t *resultLength, UErrorCode& status); - - /** - * <p>Returns the next element a UnicodeString*. If there are no - * more elements, returns NULL.</p> - * - * <p>The returned pointer is owned by this iterator and must not be - * deleted by the caller. The pointer is valid until the next call - * to next, unext, snext, reset, or the enumerator's destructor.</p> - * - * <p>If the iterator is out of sync with its service, status is set - * to U_ENUM_OUT_OF_SYNC_ERROR and NULL is returned.</p> - * - * Starting with ICU 2.8, the default implementation calls next() - * and handles the conversion. - * Either next() or snext() must be implemented differently by a subclass. - * - * @param status the error code. - * @return a pointer to the string, or NULL. - * - * @stable ICU 2.4 - */ - virtual const UnicodeString* snext(UErrorCode& status); - - /** - * <p>Resets the iterator. This re-establishes sync with the - * service and rewinds the iterator to start at the first - * element.</p> - * - * <p>Previous pointers returned by next, unext, or snext become - * invalid, and the value returned by count might change.</p> - * - * @param status the error code. - * - * @stable ICU 2.4 - */ - virtual void reset(UErrorCode& status) = 0; - - /** - * Compares this enumeration to other to check if both are equal - * - * @param that The other string enumeration to compare this object to - * @return TRUE if the enumerations are equal. FALSE if not. - * @stable ICU 3.6 - */ - virtual UBool operator==(const StringEnumeration& that)const; - /** - * Compares this enumeration to other to check if both are not equal - * - * @param that The other string enumeration to compare this object to - * @return TRUE if the enumerations are equal. FALSE if not. - * @stable ICU 3.6 - */ - virtual UBool operator!=(const StringEnumeration& that)const; - -protected: - /** - * UnicodeString field for use with default implementations and subclasses. - * @stable ICU 2.8 - */ - UnicodeString unistr; - /** - * char * default buffer for use with default implementations and subclasses. - * @stable ICU 2.8 - */ - char charsBuffer[32]; - /** - * char * buffer for use with default implementations and subclasses. - * Allocated in constructor and in ensureCharsCapacity(). - * @stable ICU 2.8 - */ - char *chars; - /** - * Capacity of chars, for use with default implementations and subclasses. - * @stable ICU 2.8 - */ - int32_t charsCapacity; - - /** - * Default constructor for use with default implementations and subclasses. - * @stable ICU 2.8 - */ - StringEnumeration(); - - /** - * Ensures that chars is at least as large as the requested capacity. - * For use with default implementations and subclasses. - * - * @param capacity Requested capacity. - * @param status ICU in/out error code. - * @stable ICU 2.8 - */ - void ensureCharsCapacity(int32_t capacity, UErrorCode &status); - - /** - * Converts s to Unicode and sets unistr to the result. - * For use with default implementations and subclasses, - * especially for implementations of snext() in terms of next(). - * This is provided with a helper function instead of a default implementation - * of snext() to avoid potential infinite loops between next() and snext(). - * - * For example: - * \code - * const UnicodeString* snext(UErrorCode& status) { - * int32_t resultLength=0; - * const char *s=next(&resultLength, status); - * return setChars(s, resultLength, status); - * } - * \endcode - * - * @param s String to be converted to Unicode. - * @param length Length of the string. - * @param status ICU in/out error code. - * @return A pointer to unistr. - * @stable ICU 2.8 - */ - UnicodeString *setChars(const char *s, int32_t length, UErrorCode &status); -}; - -U_NAMESPACE_END - -/* STRENUM_H */ -#endif diff --git a/Source/WTF/icu/unicode/stringpiece.h b/Source/WTF/icu/unicode/stringpiece.h deleted file mode 100644 index b29571d4a..000000000 --- a/Source/WTF/icu/unicode/stringpiece.h +++ /dev/null @@ -1,224 +0,0 @@ -// Copyright (C) 2009-2013, International Business Machines -// Corporation and others. All Rights Reserved. -// -// Copyright 2001 and onwards Google Inc. -// Author: Sanjay Ghemawat - -// This code is a contribution of Google code, and the style used here is -// a compromise between the original Google code and the ICU coding guidelines. -// For example, data types are ICU-ified (size_t,int->int32_t), -// and API comments doxygen-ified, but function names and behavior are -// as in the original, if possible. -// Assertion-style error handling, not available in ICU, was changed to -// parameter "pinning" similar to UnicodeString. -// -// In addition, this is only a partial port of the original Google code, -// limited to what was needed so far. The (nearly) complete original code -// is in the ICU svn repository at icuhtml/trunk/design/strings/contrib -// (see ICU ticket 6765, r25517). - -#ifndef __STRINGPIECE_H__ -#define __STRINGPIECE_H__ - -/** - * \file - * \brief C++ API: StringPiece: Read-only byte string wrapper class. - */ - -#include "unicode/utypes.h" -#include "unicode/uobject.h" -#include "unicode/std_string.h" - -// Arghh! I wish C++ literals were "string". - -U_NAMESPACE_BEGIN - -/** - * A string-like object that points to a sized piece of memory. - * - * We provide non-explicit singleton constructors so users can pass - * in a "const char*" or a "string" wherever a "StringPiece" is - * expected. - * - * Functions or methods may use const StringPiece& parameters to accept either - * a "const char*" or a "string" value that will be implicitly converted to - * a StringPiece. - * - * Systematic usage of StringPiece is encouraged as it will reduce unnecessary - * conversions from "const char*" to "string" and back again. - * - * @stable ICU 4.2 - */ -class U_COMMON_API StringPiece : public UMemory { - private: - const char* ptr_; - int32_t length_; - - public: - /** - * Default constructor, creates an empty StringPiece. - * @stable ICU 4.2 - */ - StringPiece() : ptr_(NULL), length_(0) { } - /** - * Constructs from a NUL-terminated const char * pointer. - * @param str a NUL-terminated const char * pointer - * @stable ICU 4.2 - */ - StringPiece(const char* str); -#if U_HAVE_STD_STRING - /** - * Constructs from a std::string. - * @stable ICU 4.2 - */ - StringPiece(const std::string& str) - : ptr_(str.data()), length_(static_cast<int32_t>(str.size())) { } -#endif - /** - * Constructs from a const char * pointer and a specified length. - * @param offset a const char * pointer (need not be terminated) - * @param len the length of the string; must be non-negative - * @stable ICU 4.2 - */ - StringPiece(const char* offset, int32_t len) : ptr_(offset), length_(len) { } - /** - * Substring of another StringPiece. - * @param x the other StringPiece - * @param pos start position in x; must be non-negative and <= x.length(). - * @stable ICU 4.2 - */ - StringPiece(const StringPiece& x, int32_t pos); - /** - * Substring of another StringPiece. - * @param x the other StringPiece - * @param pos start position in x; must be non-negative and <= x.length(). - * @param len length of the substring; - * must be non-negative and will be pinned to at most x.length() - pos. - * @stable ICU 4.2 - */ - StringPiece(const StringPiece& x, int32_t pos, int32_t len); - - /** - * Returns the string pointer. May be NULL if it is empty. - * - * data() may return a pointer to a buffer with embedded NULs, and the - * returned buffer may or may not be null terminated. Therefore it is - * typically a mistake to pass data() to a routine that expects a NUL - * terminated string. - * @return the string pointer - * @stable ICU 4.2 - */ - const char* data() const { return ptr_; } - /** - * Returns the string length. Same as length(). - * @return the string length - * @stable ICU 4.2 - */ - int32_t size() const { return length_; } - /** - * Returns the string length. Same as size(). - * @return the string length - * @stable ICU 4.2 - */ - int32_t length() const { return length_; } - /** - * Returns whether the string is empty. - * @return TRUE if the string is empty - * @stable ICU 4.2 - */ - UBool empty() const { return length_ == 0; } - - /** - * Sets to an empty string. - * @stable ICU 4.2 - */ - void clear() { ptr_ = NULL; length_ = 0; } - - /** - * Reset the stringpiece to refer to new data. - * @param xdata pointer the new string data. Need not be nul terminated. - * @param len the length of the new data - * @stable ICU 4.8 - */ - void set(const char* xdata, int32_t len) { ptr_ = xdata; length_ = len; } - - /** - * Reset the stringpiece to refer to new data. - * @param str a pointer to a NUL-terminated string. - * @stable ICU 4.8 - */ - void set(const char* str); - - /** - * Removes the first n string units. - * @param n prefix length, must be non-negative and <=length() - * @stable ICU 4.2 - */ - void remove_prefix(int32_t n) { - if (n >= 0) { - if (n > length_) { - n = length_; - } - ptr_ += n; - length_ -= n; - } - } - - /** - * Removes the last n string units. - * @param n suffix length, must be non-negative and <=length() - * @stable ICU 4.2 - */ - void remove_suffix(int32_t n) { - if (n >= 0) { - if (n <= length_) { - length_ -= n; - } else { - length_ = 0; - } - } - } - - /** - * Maximum integer, used as a default value for substring methods. - * @stable ICU 4.2 - */ - static const int32_t npos; // = 0x7fffffff; - - /** - * Returns a substring of this StringPiece. - * @param pos start position; must be non-negative and <= length(). - * @param len length of the substring; - * must be non-negative and will be pinned to at most length() - pos. - * @return the substring StringPiece - * @stable ICU 4.2 - */ - StringPiece substr(int32_t pos, int32_t len = npos) const { - return StringPiece(*this, pos, len); - } -}; - -/** - * Global operator == for StringPiece - * @param x The first StringPiece to compare. - * @param y The second StringPiece to compare. - * @return TRUE if the string data is equal - * @stable ICU 4.8 - */ -U_EXPORT UBool U_EXPORT2 -operator==(const StringPiece& x, const StringPiece& y); - -/** - * Global operator != for StringPiece - * @param x The first StringPiece to compare. - * @param y The second StringPiece to compare. - * @return TRUE if the string data is not equal - * @stable ICU 4.8 - */ -inline UBool operator!=(const StringPiece& x, const StringPiece& y) { - return !(x == y); -} - -U_NAMESPACE_END - -#endif // __STRINGPIECE_H__ diff --git a/Source/WTF/icu/unicode/uchar.h b/Source/WTF/icu/unicode/uchar.h deleted file mode 100644 index 1a5b71b46..000000000 --- a/Source/WTF/icu/unicode/uchar.h +++ /dev/null @@ -1,3331 +0,0 @@ -/* -********************************************************************** -* Copyright (C) 1997-2013, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* -* File UCHAR.H -* -* Modification History: -* -* Date Name Description -* 04/02/97 aliu Creation. -* 03/29/99 helena Updated for C APIs. -* 4/15/99 Madhu Updated for C Implementation and Javadoc -* 5/20/99 Madhu Added the function u_getVersion() -* 8/19/1999 srl Upgraded scripts to Unicode 3.0 -* 8/27/1999 schererm UCharDirection constants: U_... -* 11/11/1999 weiv added u_isalnum(), cleaned comments -* 01/11/2000 helena Renamed u_getVersion to u_getUnicodeVersion(). -****************************************************************************** -*/ - -#ifndef UCHAR_H -#define UCHAR_H - -#include "unicode/utypes.h" - -U_CDECL_BEGIN - -/*==========================================================================*/ -/* Unicode version number */ -/*==========================================================================*/ -/** - * Unicode version number, default for the current ICU version. - * The actual Unicode Character Database (UCD) data is stored in uprops.dat - * and may be generated from UCD files from a different Unicode version. - * Call u_getUnicodeVersion to get the actual Unicode version of the data. - * - * @see u_getUnicodeVersion - * @stable ICU 2.0 - */ -#define U_UNICODE_VERSION "6.3" - -/** - * \file - * \brief C API: Unicode Properties - * - * This C API provides low-level access to the Unicode Character Database. - * In addition to raw property values, some convenience functions calculate - * derived properties, for example for Java-style programming. - * - * Unicode assigns each code point (not just assigned character) values for - * many properties. - * Most of them are simple boolean flags, or constants from a small enumerated list. - * For some properties, values are strings or other relatively more complex types. - * - * For more information see - * "About the Unicode Character Database" (http://www.unicode.org/ucd/) - * and the ICU User Guide chapter on Properties (http://icu-project.org/userguide/properties.html). - * - * Many functions are designed to match java.lang.Character functions. - * See the individual function documentation, - * and see the JDK 1.4 java.lang.Character documentation - * at http://java.sun.com/j2se/1.4/docs/api/java/lang/Character.html - * - * There are also functions that provide easy migration from C/POSIX functions - * like isblank(). Their use is generally discouraged because the C/POSIX - * standards do not define their semantics beyond the ASCII range, which means - * that different implementations exhibit very different behavior. - * Instead, Unicode properties should be used directly. - * - * There are also only a few, broad C/POSIX character classes, and they tend - * to be used for conflicting purposes. For example, the "isalpha()" class - * is sometimes used to determine word boundaries, while a more sophisticated - * approach would at least distinguish initial letters from continuation - * characters (the latter including combining marks). - * (In ICU, BreakIterator is the most sophisticated API for word boundaries.) - * Another example: There is no "istitle()" class for titlecase characters. - * - * ICU 3.4 and later provides API access for all twelve C/POSIX character classes. - * ICU implements them according to the Standard Recommendations in - * Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions - * (http://www.unicode.org/reports/tr18/#Compatibility_Properties). - * - * API access for C/POSIX character classes is as follows: - * - alpha: u_isUAlphabetic(c) or u_hasBinaryProperty(c, UCHAR_ALPHABETIC) - * - lower: u_isULowercase(c) or u_hasBinaryProperty(c, UCHAR_LOWERCASE) - * - upper: u_isUUppercase(c) or u_hasBinaryProperty(c, UCHAR_UPPERCASE) - * - punct: u_ispunct(c) - * - digit: u_isdigit(c) or u_charType(c)==U_DECIMAL_DIGIT_NUMBER - * - xdigit: u_isxdigit(c) or u_hasBinaryProperty(c, UCHAR_POSIX_XDIGIT) - * - alnum: u_hasBinaryProperty(c, UCHAR_POSIX_ALNUM) - * - space: u_isUWhiteSpace(c) or u_hasBinaryProperty(c, UCHAR_WHITE_SPACE) - * - blank: u_isblank(c) or u_hasBinaryProperty(c, UCHAR_POSIX_BLANK) - * - cntrl: u_charType(c)==U_CONTROL_CHAR - * - graph: u_hasBinaryProperty(c, UCHAR_POSIX_GRAPH) - * - print: u_hasBinaryProperty(c, UCHAR_POSIX_PRINT) - * - * Note: Some of the u_isxyz() functions in uchar.h predate, and do not match, - * the Standard Recommendations in UTS #18. Instead, they match Java - * functions according to their API documentation. - * - * \htmlonly - * The C/POSIX character classes are also available in UnicodeSet patterns, - * using patterns like [:graph:] or \p{graph}. - * \endhtmlonly - * - * Note: There are several ICU whitespace functions. - * Comparison: - * - u_isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property; - * most of general categories "Z" (separators) + most whitespace ISO controls - * (including no-break spaces, but excluding IS1..IS4 and ZWSP) - * - u_isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces - * - u_isJavaSpaceChar: Java isSpaceChar; just Z (including no-break spaces) - * - u_isspace: Z + whitespace ISO controls (including no-break spaces) - * - u_isblank: "horizontal spaces" = TAB + Zs - ZWSP - */ - -/** - * Constants. - */ - -/** The lowest Unicode code point value. Code points are non-negative. @stable ICU 2.0 */ -#define UCHAR_MIN_VALUE 0 - -/** - * The highest Unicode code point value (scalar value) according to - * The Unicode Standard. This is a 21-bit value (20.1 bits, rounded up). - * For a single character, UChar32 is a simple type that can hold any code point value. - * - * @see UChar32 - * @stable ICU 2.0 - */ -#define UCHAR_MAX_VALUE 0x10ffff - -/** - * Get a single-bit bit set (a flag) from a bit number 0..31. - * @stable ICU 2.1 - */ -#define U_MASK(x) ((uint32_t)1<<(x)) - -/** - * Selection constants for Unicode properties. - * These constants are used in functions like u_hasBinaryProperty to select - * one of the Unicode properties. - * - * The properties APIs are intended to reflect Unicode properties as defined - * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR). - * For details about the properties see http://www.unicode.org/ucd/ . - * For names of Unicode properties see the UCD file PropertyAliases.txt. - * - * Important: If ICU is built with UCD files from Unicode versions below, e.g., 3.2, - * then properties marked with "new in Unicode 3.2" are not or not fully available. - * Check u_getUnicodeVersion to be sure. - * - * @see u_hasBinaryProperty - * @see u_getIntPropertyValue - * @see u_getUnicodeVersion - * @stable ICU 2.1 - */ -typedef enum UProperty { - /* - * Note: UProperty constants are parsed by preparseucd.py. - * It matches lines like - * UCHAR_<Unicode property name>=<integer>, - */ - - /* Note: Place UCHAR_ALPHABETIC before UCHAR_BINARY_START so that - debuggers display UCHAR_ALPHABETIC as the symbolic name for 0, - rather than UCHAR_BINARY_START. Likewise for other *_START - identifiers. */ - - /** Binary property Alphabetic. Same as u_isUAlphabetic, different from u_isalpha. - Lu+Ll+Lt+Lm+Lo+Nl+Other_Alphabetic @stable ICU 2.1 */ - UCHAR_ALPHABETIC=0, - /** First constant for binary Unicode properties. @stable ICU 2.1 */ - UCHAR_BINARY_START=UCHAR_ALPHABETIC, - /** Binary property ASCII_Hex_Digit. 0-9 A-F a-f @stable ICU 2.1 */ - UCHAR_ASCII_HEX_DIGIT=1, - /** Binary property Bidi_Control. - Format controls which have specific functions - in the Bidi Algorithm. @stable ICU 2.1 */ - UCHAR_BIDI_CONTROL=2, - /** Binary property Bidi_Mirrored. - Characters that may change display in RTL text. - Same as u_isMirrored. - See Bidi Algorithm, UTR 9. @stable ICU 2.1 */ - UCHAR_BIDI_MIRRORED=3, - /** Binary property Dash. Variations of dashes. @stable ICU 2.1 */ - UCHAR_DASH=4, - /** Binary property Default_Ignorable_Code_Point (new in Unicode 3.2). - Ignorable in most processing. - <2060..206F, FFF0..FFFB, E0000..E0FFF>+Other_Default_Ignorable_Code_Point+(Cf+Cc+Cs-White_Space) @stable ICU 2.1 */ - UCHAR_DEFAULT_IGNORABLE_CODE_POINT=5, - /** Binary property Deprecated (new in Unicode 3.2). - The usage of deprecated characters is strongly discouraged. @stable ICU 2.1 */ - UCHAR_DEPRECATED=6, - /** Binary property Diacritic. Characters that linguistically modify - the meaning of another character to which they apply. @stable ICU 2.1 */ - UCHAR_DIACRITIC=7, - /** Binary property Extender. - Extend the value or shape of a preceding alphabetic character, - e.g., length and iteration marks. @stable ICU 2.1 */ - UCHAR_EXTENDER=8, - /** Binary property Full_Composition_Exclusion. - CompositionExclusions.txt+Singleton Decompositions+ - Non-Starter Decompositions. @stable ICU 2.1 */ - UCHAR_FULL_COMPOSITION_EXCLUSION=9, - /** Binary property Grapheme_Base (new in Unicode 3.2). - For programmatic determination of grapheme cluster boundaries. - [0..10FFFF]-Cc-Cf-Cs-Co-Cn-Zl-Zp-Grapheme_Link-Grapheme_Extend-CGJ @stable ICU 2.1 */ - UCHAR_GRAPHEME_BASE=10, - /** Binary property Grapheme_Extend (new in Unicode 3.2). - For programmatic determination of grapheme cluster boundaries. - Me+Mn+Mc+Other_Grapheme_Extend-Grapheme_Link-CGJ @stable ICU 2.1 */ - UCHAR_GRAPHEME_EXTEND=11, - /** Binary property Grapheme_Link (new in Unicode 3.2). - For programmatic determination of grapheme cluster boundaries. @stable ICU 2.1 */ - UCHAR_GRAPHEME_LINK=12, - /** Binary property Hex_Digit. - Characters commonly used for hexadecimal numbers. @stable ICU 2.1 */ - UCHAR_HEX_DIGIT=13, - /** Binary property Hyphen. Dashes used to mark connections - between pieces of words, plus the Katakana middle dot. @stable ICU 2.1 */ - UCHAR_HYPHEN=14, - /** Binary property ID_Continue. - Characters that can continue an identifier. - DerivedCoreProperties.txt also says "NOTE: Cf characters should be filtered out." - ID_Start+Mn+Mc+Nd+Pc @stable ICU 2.1 */ - UCHAR_ID_CONTINUE=15, - /** Binary property ID_Start. - Characters that can start an identifier. - Lu+Ll+Lt+Lm+Lo+Nl @stable ICU 2.1 */ - UCHAR_ID_START=16, - /** Binary property Ideographic. - CJKV ideographs. @stable ICU 2.1 */ - UCHAR_IDEOGRAPHIC=17, - /** Binary property IDS_Binary_Operator (new in Unicode 3.2). - For programmatic determination of - Ideographic Description Sequences. @stable ICU 2.1 */ - UCHAR_IDS_BINARY_OPERATOR=18, - /** Binary property IDS_Trinary_Operator (new in Unicode 3.2). - For programmatic determination of - Ideographic Description Sequences. @stable ICU 2.1 */ - UCHAR_IDS_TRINARY_OPERATOR=19, - /** Binary property Join_Control. - Format controls for cursive joining and ligation. @stable ICU 2.1 */ - UCHAR_JOIN_CONTROL=20, - /** Binary property Logical_Order_Exception (new in Unicode 3.2). - Characters that do not use logical order and - require special handling in most processing. @stable ICU 2.1 */ - UCHAR_LOGICAL_ORDER_EXCEPTION=21, - /** Binary property Lowercase. Same as u_isULowercase, different from u_islower. - Ll+Other_Lowercase @stable ICU 2.1 */ - UCHAR_LOWERCASE=22, - /** Binary property Math. Sm+Other_Math @stable ICU 2.1 */ - UCHAR_MATH=23, - /** Binary property Noncharacter_Code_Point. - Code points that are explicitly defined as illegal - for the encoding of characters. @stable ICU 2.1 */ - UCHAR_NONCHARACTER_CODE_POINT=24, - /** Binary property Quotation_Mark. @stable ICU 2.1 */ - UCHAR_QUOTATION_MARK=25, - /** Binary property Radical (new in Unicode 3.2). - For programmatic determination of - Ideographic Description Sequences. @stable ICU 2.1 */ - UCHAR_RADICAL=26, - /** Binary property Soft_Dotted (new in Unicode 3.2). - Characters with a "soft dot", like i or j. - An accent placed on these characters causes - the dot to disappear. @stable ICU 2.1 */ - UCHAR_SOFT_DOTTED=27, - /** Binary property Terminal_Punctuation. - Punctuation characters that generally mark - the end of textual units. @stable ICU 2.1 */ - UCHAR_TERMINAL_PUNCTUATION=28, - /** Binary property Unified_Ideograph (new in Unicode 3.2). - For programmatic determination of - Ideographic Description Sequences. @stable ICU 2.1 */ - UCHAR_UNIFIED_IDEOGRAPH=29, - /** Binary property Uppercase. Same as u_isUUppercase, different from u_isupper. - Lu+Other_Uppercase @stable ICU 2.1 */ - UCHAR_UPPERCASE=30, - /** Binary property White_Space. - Same as u_isUWhiteSpace, different from u_isspace and u_isWhitespace. - Space characters+TAB+CR+LF-ZWSP-ZWNBSP @stable ICU 2.1 */ - UCHAR_WHITE_SPACE=31, - /** Binary property XID_Continue. - ID_Continue modified to allow closure under - normalization forms NFKC and NFKD. @stable ICU 2.1 */ - UCHAR_XID_CONTINUE=32, - /** Binary property XID_Start. ID_Start modified to allow - closure under normalization forms NFKC and NFKD. @stable ICU 2.1 */ - UCHAR_XID_START=33, - /** Binary property Case_Sensitive. Either the source of a case - mapping or _in_ the target of a case mapping. Not the same as - the general category Cased_Letter. @stable ICU 2.6 */ - UCHAR_CASE_SENSITIVE=34, - /** Binary property STerm (new in Unicode 4.0.1). - Sentence Terminal. Used in UAX #29: Text Boundaries - (http://www.unicode.org/reports/tr29/) - @stable ICU 3.0 */ - UCHAR_S_TERM=35, - /** Binary property Variation_Selector (new in Unicode 4.0.1). - Indicates all those characters that qualify as Variation Selectors. - For details on the behavior of these characters, - see StandardizedVariants.html and 15.6 Variation Selectors. - @stable ICU 3.0 */ - UCHAR_VARIATION_SELECTOR=36, - /** Binary property NFD_Inert. - ICU-specific property for characters that are inert under NFD, - i.e., they do not interact with adjacent characters. - See the documentation for the Normalizer2 class and the - Normalizer2::isInert() method. - @stable ICU 3.0 */ - UCHAR_NFD_INERT=37, - /** Binary property NFKD_Inert. - ICU-specific property for characters that are inert under NFKD, - i.e., they do not interact with adjacent characters. - See the documentation for the Normalizer2 class and the - Normalizer2::isInert() method. - @stable ICU 3.0 */ - UCHAR_NFKD_INERT=38, - /** Binary property NFC_Inert. - ICU-specific property for characters that are inert under NFC, - i.e., they do not interact with adjacent characters. - See the documentation for the Normalizer2 class and the - Normalizer2::isInert() method. - @stable ICU 3.0 */ - UCHAR_NFC_INERT=39, - /** Binary property NFKC_Inert. - ICU-specific property for characters that are inert under NFKC, - i.e., they do not interact with adjacent characters. - See the documentation for the Normalizer2 class and the - Normalizer2::isInert() method. - @stable ICU 3.0 */ - UCHAR_NFKC_INERT=40, - /** Binary Property Segment_Starter. - ICU-specific property for characters that are starters in terms of - Unicode normalization and combining character sequences. - They have ccc=0 and do not occur in non-initial position of the - canonical decomposition of any character - (like a-umlaut in NFD and a Jamo T in an NFD(Hangul LVT)). - ICU uses this property for segmenting a string for generating a set of - canonically equivalent strings, e.g. for canonical closure while - processing collation tailoring rules. - @stable ICU 3.0 */ - UCHAR_SEGMENT_STARTER=41, - /** Binary property Pattern_Syntax (new in Unicode 4.1). - See UAX #31 Identifier and Pattern Syntax - (http://www.unicode.org/reports/tr31/) - @stable ICU 3.4 */ - UCHAR_PATTERN_SYNTAX=42, - /** Binary property Pattern_White_Space (new in Unicode 4.1). - See UAX #31 Identifier and Pattern Syntax - (http://www.unicode.org/reports/tr31/) - @stable ICU 3.4 */ - UCHAR_PATTERN_WHITE_SPACE=43, - /** Binary property alnum (a C/POSIX character class). - Implemented according to the UTS #18 Annex C Standard Recommendation. - See the uchar.h file documentation. - @stable ICU 3.4 */ - UCHAR_POSIX_ALNUM=44, - /** Binary property blank (a C/POSIX character class). - Implemented according to the UTS #18 Annex C Standard Recommendation. - See the uchar.h file documentation. - @stable ICU 3.4 */ - UCHAR_POSIX_BLANK=45, - /** Binary property graph (a C/POSIX character class). - Implemented according to the UTS #18 Annex C Standard Recommendation. - See the uchar.h file documentation. - @stable ICU 3.4 */ - UCHAR_POSIX_GRAPH=46, - /** Binary property print (a C/POSIX character class). - Implemented according to the UTS #18 Annex C Standard Recommendation. - See the uchar.h file documentation. - @stable ICU 3.4 */ - UCHAR_POSIX_PRINT=47, - /** Binary property xdigit (a C/POSIX character class). - Implemented according to the UTS #18 Annex C Standard Recommendation. - See the uchar.h file documentation. - @stable ICU 3.4 */ - UCHAR_POSIX_XDIGIT=48, - /** Binary property Cased. For Lowercase, Uppercase and Titlecase characters. @stable ICU 4.4 */ - UCHAR_CASED=49, - /** Binary property Case_Ignorable. Used in context-sensitive case mappings. @stable ICU 4.4 */ - UCHAR_CASE_IGNORABLE=50, - /** Binary property Changes_When_Lowercased. @stable ICU 4.4 */ - UCHAR_CHANGES_WHEN_LOWERCASED=51, - /** Binary property Changes_When_Uppercased. @stable ICU 4.4 */ - UCHAR_CHANGES_WHEN_UPPERCASED=52, - /** Binary property Changes_When_Titlecased. @stable ICU 4.4 */ - UCHAR_CHANGES_WHEN_TITLECASED=53, - /** Binary property Changes_When_Casefolded. @stable ICU 4.4 */ - UCHAR_CHANGES_WHEN_CASEFOLDED=54, - /** Binary property Changes_When_Casemapped. @stable ICU 4.4 */ - UCHAR_CHANGES_WHEN_CASEMAPPED=55, - /** Binary property Changes_When_NFKC_Casefolded. @stable ICU 4.4 */ - UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED=56, - /** One more than the last constant for binary Unicode properties. @stable ICU 2.1 */ - UCHAR_BINARY_LIMIT=57, - - /** Enumerated property Bidi_Class. - Same as u_charDirection, returns UCharDirection values. @stable ICU 2.2 */ - UCHAR_BIDI_CLASS=0x1000, - /** First constant for enumerated/integer Unicode properties. @stable ICU 2.2 */ - UCHAR_INT_START=UCHAR_BIDI_CLASS, - /** Enumerated property Block. - Same as ublock_getCode, returns UBlockCode values. @stable ICU 2.2 */ - UCHAR_BLOCK=0x1001, - /** Enumerated property Canonical_Combining_Class. - Same as u_getCombiningClass, returns 8-bit numeric values. @stable ICU 2.2 */ - UCHAR_CANONICAL_COMBINING_CLASS=0x1002, - /** Enumerated property Decomposition_Type. - Returns UDecompositionType values. @stable ICU 2.2 */ - UCHAR_DECOMPOSITION_TYPE=0x1003, - /** Enumerated property East_Asian_Width. - See http://www.unicode.org/reports/tr11/ - Returns UEastAsianWidth values. @stable ICU 2.2 */ - UCHAR_EAST_ASIAN_WIDTH=0x1004, - /** Enumerated property General_Category. - Same as u_charType, returns UCharCategory values. @stable ICU 2.2 */ - UCHAR_GENERAL_CATEGORY=0x1005, - /** Enumerated property Joining_Group. - Returns UJoiningGroup values. @stable ICU 2.2 */ - UCHAR_JOINING_GROUP=0x1006, - /** Enumerated property Joining_Type. - Returns UJoiningType values. @stable ICU 2.2 */ - UCHAR_JOINING_TYPE=0x1007, - /** Enumerated property Line_Break. - Returns ULineBreak values. @stable ICU 2.2 */ - UCHAR_LINE_BREAK=0x1008, - /** Enumerated property Numeric_Type. - Returns UNumericType values. @stable ICU 2.2 */ - UCHAR_NUMERIC_TYPE=0x1009, - /** Enumerated property Script. - Same as uscript_getScript, returns UScriptCode values. @stable ICU 2.2 */ - UCHAR_SCRIPT=0x100A, - /** Enumerated property Hangul_Syllable_Type, new in Unicode 4. - Returns UHangulSyllableType values. @stable ICU 2.6 */ - UCHAR_HANGUL_SYLLABLE_TYPE=0x100B, - /** Enumerated property NFD_Quick_Check. - Returns UNormalizationCheckResult values. @stable ICU 3.0 */ - UCHAR_NFD_QUICK_CHECK=0x100C, - /** Enumerated property NFKD_Quick_Check. - Returns UNormalizationCheckResult values. @stable ICU 3.0 */ - UCHAR_NFKD_QUICK_CHECK=0x100D, - /** Enumerated property NFC_Quick_Check. - Returns UNormalizationCheckResult values. @stable ICU 3.0 */ - UCHAR_NFC_QUICK_CHECK=0x100E, - /** Enumerated property NFKC_Quick_Check. - Returns UNormalizationCheckResult values. @stable ICU 3.0 */ - UCHAR_NFKC_QUICK_CHECK=0x100F, - /** Enumerated property Lead_Canonical_Combining_Class. - ICU-specific property for the ccc of the first code point - of the decomposition, or lccc(c)=ccc(NFD(c)[0]). - Useful for checking for canonically ordered text; - see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD . - Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. @stable ICU 3.0 */ - UCHAR_LEAD_CANONICAL_COMBINING_CLASS=0x1010, - /** Enumerated property Trail_Canonical_Combining_Class. - ICU-specific property for the ccc of the last code point - of the decomposition, or tccc(c)=ccc(NFD(c)[last]). - Useful for checking for canonically ordered text; - see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD . - Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. @stable ICU 3.0 */ - UCHAR_TRAIL_CANONICAL_COMBINING_CLASS=0x1011, - /** Enumerated property Grapheme_Cluster_Break (new in Unicode 4.1). - Used in UAX #29: Text Boundaries - (http://www.unicode.org/reports/tr29/) - Returns UGraphemeClusterBreak values. @stable ICU 3.4 */ - UCHAR_GRAPHEME_CLUSTER_BREAK=0x1012, - /** Enumerated property Sentence_Break (new in Unicode 4.1). - Used in UAX #29: Text Boundaries - (http://www.unicode.org/reports/tr29/) - Returns USentenceBreak values. @stable ICU 3.4 */ - UCHAR_SENTENCE_BREAK=0x1013, - /** Enumerated property Word_Break (new in Unicode 4.1). - Used in UAX #29: Text Boundaries - (http://www.unicode.org/reports/tr29/) - Returns UWordBreakValues values. @stable ICU 3.4 */ - UCHAR_WORD_BREAK=0x1014, - /** Enumerated property Bidi_Paired_Bracket_Type (new in Unicode 6.3). - Used in UAX #9: Unicode Bidirectional Algorithm - (http://www.unicode.org/reports/tr9/) - Returns UBidiPairedBracketType values. @stable ICU 52 */ - UCHAR_BIDI_PAIRED_BRACKET_TYPE=0x1015, - /** One more than the last constant for enumerated/integer Unicode properties. @stable ICU 2.2 */ - UCHAR_INT_LIMIT=0x1016, - - /** Bitmask property General_Category_Mask. - This is the General_Category property returned as a bit mask. - When used in u_getIntPropertyValue(c), same as U_MASK(u_charType(c)), - returns bit masks for UCharCategory values where exactly one bit is set. - When used with u_getPropertyValueName() and u_getPropertyValueEnum(), - a multi-bit mask is used for sets of categories like "Letters". - Mask values should be cast to uint32_t. - @stable ICU 2.4 */ - UCHAR_GENERAL_CATEGORY_MASK=0x2000, - /** First constant for bit-mask Unicode properties. @stable ICU 2.4 */ - UCHAR_MASK_START=UCHAR_GENERAL_CATEGORY_MASK, - /** One more than the last constant for bit-mask Unicode properties. @stable ICU 2.4 */ - UCHAR_MASK_LIMIT=0x2001, - - /** Double property Numeric_Value. - Corresponds to u_getNumericValue. @stable ICU 2.4 */ - UCHAR_NUMERIC_VALUE=0x3000, - /** First constant for double Unicode properties. @stable ICU 2.4 */ - UCHAR_DOUBLE_START=UCHAR_NUMERIC_VALUE, - /** One more than the last constant for double Unicode properties. @stable ICU 2.4 */ - UCHAR_DOUBLE_LIMIT=0x3001, - - /** String property Age. - Corresponds to u_charAge. @stable ICU 2.4 */ - UCHAR_AGE=0x4000, - /** First constant for string Unicode properties. @stable ICU 2.4 */ - UCHAR_STRING_START=UCHAR_AGE, - /** String property Bidi_Mirroring_Glyph. - Corresponds to u_charMirror. @stable ICU 2.4 */ - UCHAR_BIDI_MIRRORING_GLYPH=0x4001, - /** String property Case_Folding. - Corresponds to u_strFoldCase in ustring.h. @stable ICU 2.4 */ - UCHAR_CASE_FOLDING=0x4002, -#ifndef U_HIDE_DEPRECATED_API - /** Deprecated string property ISO_Comment. - Corresponds to u_getISOComment. @deprecated ICU 49 */ - UCHAR_ISO_COMMENT=0x4003, -#endif /* U_HIDE_DEPRECATED_API */ - /** String property Lowercase_Mapping. - Corresponds to u_strToLower in ustring.h. @stable ICU 2.4 */ - UCHAR_LOWERCASE_MAPPING=0x4004, - /** String property Name. - Corresponds to u_charName. @stable ICU 2.4 */ - UCHAR_NAME=0x4005, - /** String property Simple_Case_Folding. - Corresponds to u_foldCase. @stable ICU 2.4 */ - UCHAR_SIMPLE_CASE_FOLDING=0x4006, - /** String property Simple_Lowercase_Mapping. - Corresponds to u_tolower. @stable ICU 2.4 */ - UCHAR_SIMPLE_LOWERCASE_MAPPING=0x4007, - /** String property Simple_Titlecase_Mapping. - Corresponds to u_totitle. @stable ICU 2.4 */ - UCHAR_SIMPLE_TITLECASE_MAPPING=0x4008, - /** String property Simple_Uppercase_Mapping. - Corresponds to u_toupper. @stable ICU 2.4 */ - UCHAR_SIMPLE_UPPERCASE_MAPPING=0x4009, - /** String property Titlecase_Mapping. - Corresponds to u_strToTitle in ustring.h. @stable ICU 2.4 */ - UCHAR_TITLECASE_MAPPING=0x400A, -#ifndef U_HIDE_DEPRECATED_API - /** String property Unicode_1_Name. - This property is of little practical value. - Beginning with ICU 49, ICU APIs return an empty string for this property. - Corresponds to u_charName(U_UNICODE_10_CHAR_NAME). @deprecated ICU 49 */ - UCHAR_UNICODE_1_NAME=0x400B, -#endif /* U_HIDE_DEPRECATED_API */ - /** String property Uppercase_Mapping. - Corresponds to u_strToUpper in ustring.h. @stable ICU 2.4 */ - UCHAR_UPPERCASE_MAPPING=0x400C, - /** String property Bidi_Paired_Bracket (new in Unicode 6.3). - Corresponds to u_getBidiPairedBracket. @stable ICU 52 */ - UCHAR_BIDI_PAIRED_BRACKET=0x400D, - /** One more than the last constant for string Unicode properties. @stable ICU 2.4 */ - UCHAR_STRING_LIMIT=0x400E, - - /** Miscellaneous property Script_Extensions (new in Unicode 6.0). - Some characters are commonly used in multiple scripts. - For more information, see UAX #24: http://www.unicode.org/reports/tr24/. - Corresponds to uscript_hasScript and uscript_getScriptExtensions in uscript.h. - @stable ICU 4.6 */ - UCHAR_SCRIPT_EXTENSIONS=0x7000, - /** First constant for Unicode properties with unusual value types. @stable ICU 4.6 */ - UCHAR_OTHER_PROPERTY_START=UCHAR_SCRIPT_EXTENSIONS, - /** One more than the last constant for Unicode properties with unusual value types. - * @stable ICU 4.6 */ - UCHAR_OTHER_PROPERTY_LIMIT=0x7001, - /** Represents a nonexistent or invalid property or property value. @stable ICU 2.4 */ - UCHAR_INVALID_CODE = -1 -} UProperty; - -/** - * Data for enumerated Unicode general category types. - * See http://www.unicode.org/Public/UNIDATA/UnicodeData.html . - * @stable ICU 2.0 - */ -typedef enum UCharCategory -{ - /* - * Note: UCharCategory constants and their API comments are parsed by preparseucd.py. - * It matches pairs of lines like - * / ** <Unicode 2-letter General_Category value> comment... * / - * U_<[A-Z_]+> = <integer>, - */ - - /** Non-category for unassigned and non-character code points. @stable ICU 2.0 */ - U_UNASSIGNED = 0, - /** Cn "Other, Not Assigned (no characters in [UnicodeData.txt] have this property)" (same as U_UNASSIGNED!) @stable ICU 2.0 */ - U_GENERAL_OTHER_TYPES = 0, - /** Lu @stable ICU 2.0 */ - U_UPPERCASE_LETTER = 1, - /** Ll @stable ICU 2.0 */ - U_LOWERCASE_LETTER = 2, - /** Lt @stable ICU 2.0 */ - U_TITLECASE_LETTER = 3, - /** Lm @stable ICU 2.0 */ - U_MODIFIER_LETTER = 4, - /** Lo @stable ICU 2.0 */ - U_OTHER_LETTER = 5, - /** Mn @stable ICU 2.0 */ - U_NON_SPACING_MARK = 6, - /** Me @stable ICU 2.0 */ - U_ENCLOSING_MARK = 7, - /** Mc @stable ICU 2.0 */ - U_COMBINING_SPACING_MARK = 8, - /** Nd @stable ICU 2.0 */ - U_DECIMAL_DIGIT_NUMBER = 9, - /** Nl @stable ICU 2.0 */ - U_LETTER_NUMBER = 10, - /** No @stable ICU 2.0 */ - U_OTHER_NUMBER = 11, - /** Zs @stable ICU 2.0 */ - U_SPACE_SEPARATOR = 12, - /** Zl @stable ICU 2.0 */ - U_LINE_SEPARATOR = 13, - /** Zp @stable ICU 2.0 */ - U_PARAGRAPH_SEPARATOR = 14, - /** Cc @stable ICU 2.0 */ - U_CONTROL_CHAR = 15, - /** Cf @stable ICU 2.0 */ - U_FORMAT_CHAR = 16, - /** Co @stable ICU 2.0 */ - U_PRIVATE_USE_CHAR = 17, - /** Cs @stable ICU 2.0 */ - U_SURROGATE = 18, - /** Pd @stable ICU 2.0 */ - U_DASH_PUNCTUATION = 19, - /** Ps @stable ICU 2.0 */ - U_START_PUNCTUATION = 20, - /** Pe @stable ICU 2.0 */ - U_END_PUNCTUATION = 21, - /** Pc @stable ICU 2.0 */ - U_CONNECTOR_PUNCTUATION = 22, - /** Po @stable ICU 2.0 */ - U_OTHER_PUNCTUATION = 23, - /** Sm @stable ICU 2.0 */ - U_MATH_SYMBOL = 24, - /** Sc @stable ICU 2.0 */ - U_CURRENCY_SYMBOL = 25, - /** Sk @stable ICU 2.0 */ - U_MODIFIER_SYMBOL = 26, - /** So @stable ICU 2.0 */ - U_OTHER_SYMBOL = 27, - /** Pi @stable ICU 2.0 */ - U_INITIAL_PUNCTUATION = 28, - /** Pf @stable ICU 2.0 */ - U_FINAL_PUNCTUATION = 29, - /** One higher than the last enum UCharCategory constant. @stable ICU 2.0 */ - U_CHAR_CATEGORY_COUNT -} UCharCategory; - -/** - * U_GC_XX_MASK constants are bit flags corresponding to Unicode - * general category values. - * For each category, the nth bit is set if the numeric value of the - * corresponding UCharCategory constant is n. - * - * There are also some U_GC_Y_MASK constants for groups of general categories - * like L for all letter categories. - * - * @see u_charType - * @see U_GET_GC_MASK - * @see UCharCategory - * @stable ICU 2.1 - */ -#define U_GC_CN_MASK U_MASK(U_GENERAL_OTHER_TYPES) - -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_LU_MASK U_MASK(U_UPPERCASE_LETTER) -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_LL_MASK U_MASK(U_LOWERCASE_LETTER) -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_LT_MASK U_MASK(U_TITLECASE_LETTER) -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_LM_MASK U_MASK(U_MODIFIER_LETTER) -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_LO_MASK U_MASK(U_OTHER_LETTER) - -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_MN_MASK U_MASK(U_NON_SPACING_MARK) -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_ME_MASK U_MASK(U_ENCLOSING_MARK) -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_MC_MASK U_MASK(U_COMBINING_SPACING_MARK) - -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_ND_MASK U_MASK(U_DECIMAL_DIGIT_NUMBER) -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_NL_MASK U_MASK(U_LETTER_NUMBER) -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_NO_MASK U_MASK(U_OTHER_NUMBER) - -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_ZS_MASK U_MASK(U_SPACE_SEPARATOR) -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_ZL_MASK U_MASK(U_LINE_SEPARATOR) -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_ZP_MASK U_MASK(U_PARAGRAPH_SEPARATOR) - -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_CC_MASK U_MASK(U_CONTROL_CHAR) -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_CF_MASK U_MASK(U_FORMAT_CHAR) -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_CO_MASK U_MASK(U_PRIVATE_USE_CHAR) -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_CS_MASK U_MASK(U_SURROGATE) - -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_PD_MASK U_MASK(U_DASH_PUNCTUATION) -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_PS_MASK U_MASK(U_START_PUNCTUATION) -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_PE_MASK U_MASK(U_END_PUNCTUATION) -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_PC_MASK U_MASK(U_CONNECTOR_PUNCTUATION) -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_PO_MASK U_MASK(U_OTHER_PUNCTUATION) - -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_SM_MASK U_MASK(U_MATH_SYMBOL) -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_SC_MASK U_MASK(U_CURRENCY_SYMBOL) -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_SK_MASK U_MASK(U_MODIFIER_SYMBOL) -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_SO_MASK U_MASK(U_OTHER_SYMBOL) - -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_PI_MASK U_MASK(U_INITIAL_PUNCTUATION) -/** Mask constant for a UCharCategory. @stable ICU 2.1 */ -#define U_GC_PF_MASK U_MASK(U_FINAL_PUNCTUATION) - - -/** Mask constant for multiple UCharCategory bits (L Letters). @stable ICU 2.1 */ -#define U_GC_L_MASK \ - (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK|U_GC_LM_MASK|U_GC_LO_MASK) - -/** Mask constant for multiple UCharCategory bits (LC Cased Letters). @stable ICU 2.1 */ -#define U_GC_LC_MASK \ - (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK) - -/** Mask constant for multiple UCharCategory bits (M Marks). @stable ICU 2.1 */ -#define U_GC_M_MASK (U_GC_MN_MASK|U_GC_ME_MASK|U_GC_MC_MASK) - -/** Mask constant for multiple UCharCategory bits (N Numbers). @stable ICU 2.1 */ -#define U_GC_N_MASK (U_GC_ND_MASK|U_GC_NL_MASK|U_GC_NO_MASK) - -/** Mask constant for multiple UCharCategory bits (Z Separators). @stable ICU 2.1 */ -#define U_GC_Z_MASK (U_GC_ZS_MASK|U_GC_ZL_MASK|U_GC_ZP_MASK) - -/** Mask constant for multiple UCharCategory bits (C Others). @stable ICU 2.1 */ -#define U_GC_C_MASK \ - (U_GC_CN_MASK|U_GC_CC_MASK|U_GC_CF_MASK|U_GC_CO_MASK|U_GC_CS_MASK) - -/** Mask constant for multiple UCharCategory bits (P Punctuation). @stable ICU 2.1 */ -#define U_GC_P_MASK \ - (U_GC_PD_MASK|U_GC_PS_MASK|U_GC_PE_MASK|U_GC_PC_MASK|U_GC_PO_MASK| \ - U_GC_PI_MASK|U_GC_PF_MASK) - -/** Mask constant for multiple UCharCategory bits (S Symbols). @stable ICU 2.1 */ -#define U_GC_S_MASK (U_GC_SM_MASK|U_GC_SC_MASK|U_GC_SK_MASK|U_GC_SO_MASK) - -/** - * This specifies the language directional property of a character set. - * @stable ICU 2.0 - */ -typedef enum UCharDirection { - /* - * Note: UCharDirection constants and their API comments are parsed by preparseucd.py. - * It matches pairs of lines like - * / ** <Unicode 1..3-letter Bidi_Class value> comment... * / - * U_<[A-Z_]+> = <integer>, - */ - - /** L @stable ICU 2.0 */ - U_LEFT_TO_RIGHT = 0, - /** R @stable ICU 2.0 */ - U_RIGHT_TO_LEFT = 1, - /** EN @stable ICU 2.0 */ - U_EUROPEAN_NUMBER = 2, - /** ES @stable ICU 2.0 */ - U_EUROPEAN_NUMBER_SEPARATOR = 3, - /** ET @stable ICU 2.0 */ - U_EUROPEAN_NUMBER_TERMINATOR = 4, - /** AN @stable ICU 2.0 */ - U_ARABIC_NUMBER = 5, - /** CS @stable ICU 2.0 */ - U_COMMON_NUMBER_SEPARATOR = 6, - /** B @stable ICU 2.0 */ - U_BLOCK_SEPARATOR = 7, - /** S @stable ICU 2.0 */ - U_SEGMENT_SEPARATOR = 8, - /** WS @stable ICU 2.0 */ - U_WHITE_SPACE_NEUTRAL = 9, - /** ON @stable ICU 2.0 */ - U_OTHER_NEUTRAL = 10, - /** LRE @stable ICU 2.0 */ - U_LEFT_TO_RIGHT_EMBEDDING = 11, - /** LRO @stable ICU 2.0 */ - U_LEFT_TO_RIGHT_OVERRIDE = 12, - /** AL @stable ICU 2.0 */ - U_RIGHT_TO_LEFT_ARABIC = 13, - /** RLE @stable ICU 2.0 */ - U_RIGHT_TO_LEFT_EMBEDDING = 14, - /** RLO @stable ICU 2.0 */ - U_RIGHT_TO_LEFT_OVERRIDE = 15, - /** PDF @stable ICU 2.0 */ - U_POP_DIRECTIONAL_FORMAT = 16, - /** NSM @stable ICU 2.0 */ - U_DIR_NON_SPACING_MARK = 17, - /** BN @stable ICU 2.0 */ - U_BOUNDARY_NEUTRAL = 18, - /** FSI @stable ICU 52 */ - U_FIRST_STRONG_ISOLATE = 19, - /** LRI @stable ICU 52 */ - U_LEFT_TO_RIGHT_ISOLATE = 20, - /** RLI @stable ICU 52 */ - U_RIGHT_TO_LEFT_ISOLATE = 21, - /** PDI @stable ICU 52 */ - U_POP_DIRECTIONAL_ISOLATE = 22, - /** @stable ICU 2.0 */ - U_CHAR_DIRECTION_COUNT -} UCharDirection; - -/** - * Bidi Paired Bracket Type constants. - * - * @see UCHAR_BIDI_PAIRED_BRACKET_TYPE - * @stable ICU 52 - */ -typedef enum UBidiPairedBracketType { - /* - * Note: UBidiPairedBracketType constants are parsed by preparseucd.py. - * It matches lines like - * U_BPT_<Unicode Bidi_Paired_Bracket_Type value name> - */ - - /** Not a paired bracket. @stable ICU 52 */ - U_BPT_NONE, - /** Open paired bracket. @stable ICU 52 */ - U_BPT_OPEN, - /** Close paired bracket. @stable ICU 52 */ - U_BPT_CLOSE, - /** @stable ICU 52 */ - U_BPT_COUNT /* 3 */ -} UBidiPairedBracketType; - -/** - * Constants for Unicode blocks, see the Unicode Data file Blocks.txt - * @stable ICU 2.0 - */ -enum UBlockCode { - /* - * Note: UBlockCode constants are parsed by preparseucd.py. - * It matches lines like - * UBLOCK_<Unicode Block value name> = <integer>, - */ - - /** New No_Block value in Unicode 4. @stable ICU 2.6 */ - UBLOCK_NO_BLOCK = 0, /*[none]*/ /* Special range indicating No_Block */ - - /** @stable ICU 2.0 */ - UBLOCK_BASIC_LATIN = 1, /*[0000]*/ - - /** @stable ICU 2.0 */ - UBLOCK_LATIN_1_SUPPLEMENT=2, /*[0080]*/ - - /** @stable ICU 2.0 */ - UBLOCK_LATIN_EXTENDED_A =3, /*[0100]*/ - - /** @stable ICU 2.0 */ - UBLOCK_LATIN_EXTENDED_B =4, /*[0180]*/ - - /** @stable ICU 2.0 */ - UBLOCK_IPA_EXTENSIONS =5, /*[0250]*/ - - /** @stable ICU 2.0 */ - UBLOCK_SPACING_MODIFIER_LETTERS =6, /*[02B0]*/ - - /** @stable ICU 2.0 */ - UBLOCK_COMBINING_DIACRITICAL_MARKS =7, /*[0300]*/ - - /** - * Unicode 3.2 renames this block to "Greek and Coptic". - * @stable ICU 2.0 - */ - UBLOCK_GREEK =8, /*[0370]*/ - - /** @stable ICU 2.0 */ - UBLOCK_CYRILLIC =9, /*[0400]*/ - - /** @stable ICU 2.0 */ - UBLOCK_ARMENIAN =10, /*[0530]*/ - - /** @stable ICU 2.0 */ - UBLOCK_HEBREW =11, /*[0590]*/ - - /** @stable ICU 2.0 */ - UBLOCK_ARABIC =12, /*[0600]*/ - - /** @stable ICU 2.0 */ - UBLOCK_SYRIAC =13, /*[0700]*/ - - /** @stable ICU 2.0 */ - UBLOCK_THAANA =14, /*[0780]*/ - - /** @stable ICU 2.0 */ - UBLOCK_DEVANAGARI =15, /*[0900]*/ - - /** @stable ICU 2.0 */ - UBLOCK_BENGALI =16, /*[0980]*/ - - /** @stable ICU 2.0 */ - UBLOCK_GURMUKHI =17, /*[0A00]*/ - - /** @stable ICU 2.0 */ - UBLOCK_GUJARATI =18, /*[0A80]*/ - - /** @stable ICU 2.0 */ - UBLOCK_ORIYA =19, /*[0B00]*/ - - /** @stable ICU 2.0 */ - UBLOCK_TAMIL =20, /*[0B80]*/ - - /** @stable ICU 2.0 */ - UBLOCK_TELUGU =21, /*[0C00]*/ - - /** @stable ICU 2.0 */ - UBLOCK_KANNADA =22, /*[0C80]*/ - - /** @stable ICU 2.0 */ - UBLOCK_MALAYALAM =23, /*[0D00]*/ - - /** @stable ICU 2.0 */ - UBLOCK_SINHALA =24, /*[0D80]*/ - - /** @stable ICU 2.0 */ - UBLOCK_THAI =25, /*[0E00]*/ - - /** @stable ICU 2.0 */ - UBLOCK_LAO =26, /*[0E80]*/ - - /** @stable ICU 2.0 */ - UBLOCK_TIBETAN =27, /*[0F00]*/ - - /** @stable ICU 2.0 */ - UBLOCK_MYANMAR =28, /*[1000]*/ - - /** @stable ICU 2.0 */ - UBLOCK_GEORGIAN =29, /*[10A0]*/ - - /** @stable ICU 2.0 */ - UBLOCK_HANGUL_JAMO =30, /*[1100]*/ - - /** @stable ICU 2.0 */ - UBLOCK_ETHIOPIC =31, /*[1200]*/ - - /** @stable ICU 2.0 */ - UBLOCK_CHEROKEE =32, /*[13A0]*/ - - /** @stable ICU 2.0 */ - UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =33, /*[1400]*/ - - /** @stable ICU 2.0 */ - UBLOCK_OGHAM =34, /*[1680]*/ - - /** @stable ICU 2.0 */ - UBLOCK_RUNIC =35, /*[16A0]*/ - - /** @stable ICU 2.0 */ - UBLOCK_KHMER =36, /*[1780]*/ - - /** @stable ICU 2.0 */ - UBLOCK_MONGOLIAN =37, /*[1800]*/ - - /** @stable ICU 2.0 */ - UBLOCK_LATIN_EXTENDED_ADDITIONAL =38, /*[1E00]*/ - - /** @stable ICU 2.0 */ - UBLOCK_GREEK_EXTENDED =39, /*[1F00]*/ - - /** @stable ICU 2.0 */ - UBLOCK_GENERAL_PUNCTUATION =40, /*[2000]*/ - - /** @stable ICU 2.0 */ - UBLOCK_SUPERSCRIPTS_AND_SUBSCRIPTS =41, /*[2070]*/ - - /** @stable ICU 2.0 */ - UBLOCK_CURRENCY_SYMBOLS =42, /*[20A0]*/ - - /** - * Unicode 3.2 renames this block to "Combining Diacritical Marks for Symbols". - * @stable ICU 2.0 - */ - UBLOCK_COMBINING_MARKS_FOR_SYMBOLS =43, /*[20D0]*/ - - /** @stable ICU 2.0 */ - UBLOCK_LETTERLIKE_SYMBOLS =44, /*[2100]*/ - - /** @stable ICU 2.0 */ - UBLOCK_NUMBER_FORMS =45, /*[2150]*/ - - /** @stable ICU 2.0 */ - UBLOCK_ARROWS =46, /*[2190]*/ - - /** @stable ICU 2.0 */ - UBLOCK_MATHEMATICAL_OPERATORS =47, /*[2200]*/ - - /** @stable ICU 2.0 */ - UBLOCK_MISCELLANEOUS_TECHNICAL =48, /*[2300]*/ - - /** @stable ICU 2.0 */ - UBLOCK_CONTROL_PICTURES =49, /*[2400]*/ - - /** @stable ICU 2.0 */ - UBLOCK_OPTICAL_CHARACTER_RECOGNITION =50, /*[2440]*/ - - /** @stable ICU 2.0 */ - UBLOCK_ENCLOSED_ALPHANUMERICS =51, /*[2460]*/ - - /** @stable ICU 2.0 */ - UBLOCK_BOX_DRAWING =52, /*[2500]*/ - - /** @stable ICU 2.0 */ - UBLOCK_BLOCK_ELEMENTS =53, /*[2580]*/ - - /** @stable ICU 2.0 */ - UBLOCK_GEOMETRIC_SHAPES =54, /*[25A0]*/ - - /** @stable ICU 2.0 */ - UBLOCK_MISCELLANEOUS_SYMBOLS =55, /*[2600]*/ - - /** @stable ICU 2.0 */ - UBLOCK_DINGBATS =56, /*[2700]*/ - - /** @stable ICU 2.0 */ - UBLOCK_BRAILLE_PATTERNS =57, /*[2800]*/ - - /** @stable ICU 2.0 */ - UBLOCK_CJK_RADICALS_SUPPLEMENT =58, /*[2E80]*/ - - /** @stable ICU 2.0 */ - UBLOCK_KANGXI_RADICALS =59, /*[2F00]*/ - - /** @stable ICU 2.0 */ - UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS =60, /*[2FF0]*/ - - /** @stable ICU 2.0 */ - UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION =61, /*[3000]*/ - - /** @stable ICU 2.0 */ - UBLOCK_HIRAGANA =62, /*[3040]*/ - - /** @stable ICU 2.0 */ - UBLOCK_KATAKANA =63, /*[30A0]*/ - - /** @stable ICU 2.0 */ - UBLOCK_BOPOMOFO =64, /*[3100]*/ - - /** @stable ICU 2.0 */ - UBLOCK_HANGUL_COMPATIBILITY_JAMO =65, /*[3130]*/ - - /** @stable ICU 2.0 */ - UBLOCK_KANBUN =66, /*[3190]*/ - - /** @stable ICU 2.0 */ - UBLOCK_BOPOMOFO_EXTENDED =67, /*[31A0]*/ - - /** @stable ICU 2.0 */ - UBLOCK_ENCLOSED_CJK_LETTERS_AND_MONTHS =68, /*[3200]*/ - - /** @stable ICU 2.0 */ - UBLOCK_CJK_COMPATIBILITY =69, /*[3300]*/ - - /** @stable ICU 2.0 */ - UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =70, /*[3400]*/ - - /** @stable ICU 2.0 */ - UBLOCK_CJK_UNIFIED_IDEOGRAPHS =71, /*[4E00]*/ - - /** @stable ICU 2.0 */ - UBLOCK_YI_SYLLABLES =72, /*[A000]*/ - - /** @stable ICU 2.0 */ - UBLOCK_YI_RADICALS =73, /*[A490]*/ - - /** @stable ICU 2.0 */ - UBLOCK_HANGUL_SYLLABLES =74, /*[AC00]*/ - - /** @stable ICU 2.0 */ - UBLOCK_HIGH_SURROGATES =75, /*[D800]*/ - - /** @stable ICU 2.0 */ - UBLOCK_HIGH_PRIVATE_USE_SURROGATES =76, /*[DB80]*/ - - /** @stable ICU 2.0 */ - UBLOCK_LOW_SURROGATES =77, /*[DC00]*/ - - /** - * Same as UBLOCK_PRIVATE_USE. - * Until Unicode 3.1.1, the corresponding block name was "Private Use", - * and multiple code point ranges had this block. - * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and - * adds separate blocks for the supplementary PUAs. - * - * @stable ICU 2.0 - */ - UBLOCK_PRIVATE_USE_AREA =78, /*[E000]*/ - /** - * Same as UBLOCK_PRIVATE_USE_AREA. - * Until Unicode 3.1.1, the corresponding block name was "Private Use", - * and multiple code point ranges had this block. - * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and - * adds separate blocks for the supplementary PUAs. - * - * @stable ICU 2.0 - */ - UBLOCK_PRIVATE_USE = UBLOCK_PRIVATE_USE_AREA, - - /** @stable ICU 2.0 */ - UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS =79, /*[F900]*/ - - /** @stable ICU 2.0 */ - UBLOCK_ALPHABETIC_PRESENTATION_FORMS =80, /*[FB00]*/ - - /** @stable ICU 2.0 */ - UBLOCK_ARABIC_PRESENTATION_FORMS_A =81, /*[FB50]*/ - - /** @stable ICU 2.0 */ - UBLOCK_COMBINING_HALF_MARKS =82, /*[FE20]*/ - - /** @stable ICU 2.0 */ - UBLOCK_CJK_COMPATIBILITY_FORMS =83, /*[FE30]*/ - - /** @stable ICU 2.0 */ - UBLOCK_SMALL_FORM_VARIANTS =84, /*[FE50]*/ - - /** @stable ICU 2.0 */ - UBLOCK_ARABIC_PRESENTATION_FORMS_B =85, /*[FE70]*/ - - /** @stable ICU 2.0 */ - UBLOCK_SPECIALS =86, /*[FFF0]*/ - - /** @stable ICU 2.0 */ - UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS =87, /*[FF00]*/ - - /* New blocks in Unicode 3.1 */ - - /** @stable ICU 2.0 */ - UBLOCK_OLD_ITALIC = 88, /*[10300]*/ - /** @stable ICU 2.0 */ - UBLOCK_GOTHIC = 89, /*[10330]*/ - /** @stable ICU 2.0 */ - UBLOCK_DESERET = 90, /*[10400]*/ - /** @stable ICU 2.0 */ - UBLOCK_BYZANTINE_MUSICAL_SYMBOLS = 91, /*[1D000]*/ - /** @stable ICU 2.0 */ - UBLOCK_MUSICAL_SYMBOLS = 92, /*[1D100]*/ - /** @stable ICU 2.0 */ - UBLOCK_MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 93, /*[1D400]*/ - /** @stable ICU 2.0 */ - UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 94, /*[20000]*/ - /** @stable ICU 2.0 */ - UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 95, /*[2F800]*/ - /** @stable ICU 2.0 */ - UBLOCK_TAGS = 96, /*[E0000]*/ - - /* New blocks in Unicode 3.2 */ - - /** @stable ICU 3.0 */ - UBLOCK_CYRILLIC_SUPPLEMENT = 97, /*[0500]*/ - /** - * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". - * @stable ICU 2.2 - */ - UBLOCK_CYRILLIC_SUPPLEMENTARY = UBLOCK_CYRILLIC_SUPPLEMENT, - /** @stable ICU 2.2 */ - UBLOCK_TAGALOG = 98, /*[1700]*/ - /** @stable ICU 2.2 */ - UBLOCK_HANUNOO = 99, /*[1720]*/ - /** @stable ICU 2.2 */ - UBLOCK_BUHID = 100, /*[1740]*/ - /** @stable ICU 2.2 */ - UBLOCK_TAGBANWA = 101, /*[1760]*/ - /** @stable ICU 2.2 */ - UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 102, /*[27C0]*/ - /** @stable ICU 2.2 */ - UBLOCK_SUPPLEMENTAL_ARROWS_A = 103, /*[27F0]*/ - /** @stable ICU 2.2 */ - UBLOCK_SUPPLEMENTAL_ARROWS_B = 104, /*[2900]*/ - /** @stable ICU 2.2 */ - UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = 105, /*[2980]*/ - /** @stable ICU 2.2 */ - UBLOCK_SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 106, /*[2A00]*/ - /** @stable ICU 2.2 */ - UBLOCK_KATAKANA_PHONETIC_EXTENSIONS = 107, /*[31F0]*/ - /** @stable ICU 2.2 */ - UBLOCK_VARIATION_SELECTORS = 108, /*[FE00]*/ - /** @stable ICU 2.2 */ - UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_A = 109, /*[F0000]*/ - /** @stable ICU 2.2 */ - UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B = 110, /*[100000]*/ - - /* New blocks in Unicode 4 */ - - /** @stable ICU 2.6 */ - UBLOCK_LIMBU = 111, /*[1900]*/ - /** @stable ICU 2.6 */ - UBLOCK_TAI_LE = 112, /*[1950]*/ - /** @stable ICU 2.6 */ - UBLOCK_KHMER_SYMBOLS = 113, /*[19E0]*/ - /** @stable ICU 2.6 */ - UBLOCK_PHONETIC_EXTENSIONS = 114, /*[1D00]*/ - /** @stable ICU 2.6 */ - UBLOCK_MISCELLANEOUS_SYMBOLS_AND_ARROWS = 115, /*[2B00]*/ - /** @stable ICU 2.6 */ - UBLOCK_YIJING_HEXAGRAM_SYMBOLS = 116, /*[4DC0]*/ - /** @stable ICU 2.6 */ - UBLOCK_LINEAR_B_SYLLABARY = 117, /*[10000]*/ - /** @stable ICU 2.6 */ - UBLOCK_LINEAR_B_IDEOGRAMS = 118, /*[10080]*/ - /** @stable ICU 2.6 */ - UBLOCK_AEGEAN_NUMBERS = 119, /*[10100]*/ - /** @stable ICU 2.6 */ - UBLOCK_UGARITIC = 120, /*[10380]*/ - /** @stable ICU 2.6 */ - UBLOCK_SHAVIAN = 121, /*[10450]*/ - /** @stable ICU 2.6 */ - UBLOCK_OSMANYA = 122, /*[10480]*/ - /** @stable ICU 2.6 */ - UBLOCK_CYPRIOT_SYLLABARY = 123, /*[10800]*/ - /** @stable ICU 2.6 */ - UBLOCK_TAI_XUAN_JING_SYMBOLS = 124, /*[1D300]*/ - /** @stable ICU 2.6 */ - UBLOCK_VARIATION_SELECTORS_SUPPLEMENT = 125, /*[E0100]*/ - - /* New blocks in Unicode 4.1 */ - - /** @stable ICU 3.4 */ - UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION = 126, /*[1D200]*/ - /** @stable ICU 3.4 */ - UBLOCK_ANCIENT_GREEK_NUMBERS = 127, /*[10140]*/ - /** @stable ICU 3.4 */ - UBLOCK_ARABIC_SUPPLEMENT = 128, /*[0750]*/ - /** @stable ICU 3.4 */ - UBLOCK_BUGINESE = 129, /*[1A00]*/ - /** @stable ICU 3.4 */ - UBLOCK_CJK_STROKES = 130, /*[31C0]*/ - /** @stable ICU 3.4 */ - UBLOCK_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 131, /*[1DC0]*/ - /** @stable ICU 3.4 */ - UBLOCK_COPTIC = 132, /*[2C80]*/ - /** @stable ICU 3.4 */ - UBLOCK_ETHIOPIC_EXTENDED = 133, /*[2D80]*/ - /** @stable ICU 3.4 */ - UBLOCK_ETHIOPIC_SUPPLEMENT = 134, /*[1380]*/ - /** @stable ICU 3.4 */ - UBLOCK_GEORGIAN_SUPPLEMENT = 135, /*[2D00]*/ - /** @stable ICU 3.4 */ - UBLOCK_GLAGOLITIC = 136, /*[2C00]*/ - /** @stable ICU 3.4 */ - UBLOCK_KHAROSHTHI = 137, /*[10A00]*/ - /** @stable ICU 3.4 */ - UBLOCK_MODIFIER_TONE_LETTERS = 138, /*[A700]*/ - /** @stable ICU 3.4 */ - UBLOCK_NEW_TAI_LUE = 139, /*[1980]*/ - /** @stable ICU 3.4 */ - UBLOCK_OLD_PERSIAN = 140, /*[103A0]*/ - /** @stable ICU 3.4 */ - UBLOCK_PHONETIC_EXTENSIONS_SUPPLEMENT = 141, /*[1D80]*/ - /** @stable ICU 3.4 */ - UBLOCK_SUPPLEMENTAL_PUNCTUATION = 142, /*[2E00]*/ - /** @stable ICU 3.4 */ - UBLOCK_SYLOTI_NAGRI = 143, /*[A800]*/ - /** @stable ICU 3.4 */ - UBLOCK_TIFINAGH = 144, /*[2D30]*/ - /** @stable ICU 3.4 */ - UBLOCK_VERTICAL_FORMS = 145, /*[FE10]*/ - - /* New blocks in Unicode 5.0 */ - - /** @stable ICU 3.6 */ - UBLOCK_NKO = 146, /*[07C0]*/ - /** @stable ICU 3.6 */ - UBLOCK_BALINESE = 147, /*[1B00]*/ - /** @stable ICU 3.6 */ - UBLOCK_LATIN_EXTENDED_C = 148, /*[2C60]*/ - /** @stable ICU 3.6 */ - UBLOCK_LATIN_EXTENDED_D = 149, /*[A720]*/ - /** @stable ICU 3.6 */ - UBLOCK_PHAGS_PA = 150, /*[A840]*/ - /** @stable ICU 3.6 */ - UBLOCK_PHOENICIAN = 151, /*[10900]*/ - /** @stable ICU 3.6 */ - UBLOCK_CUNEIFORM = 152, /*[12000]*/ - /** @stable ICU 3.6 */ - UBLOCK_CUNEIFORM_NUMBERS_AND_PUNCTUATION = 153, /*[12400]*/ - /** @stable ICU 3.6 */ - UBLOCK_COUNTING_ROD_NUMERALS = 154, /*[1D360]*/ - - /* New blocks in Unicode 5.1 */ - - /** @stable ICU 4.0 */ - UBLOCK_SUNDANESE = 155, /*[1B80]*/ - /** @stable ICU 4.0 */ - UBLOCK_LEPCHA = 156, /*[1C00]*/ - /** @stable ICU 4.0 */ - UBLOCK_OL_CHIKI = 157, /*[1C50]*/ - /** @stable ICU 4.0 */ - UBLOCK_CYRILLIC_EXTENDED_A = 158, /*[2DE0]*/ - /** @stable ICU 4.0 */ - UBLOCK_VAI = 159, /*[A500]*/ - /** @stable ICU 4.0 */ - UBLOCK_CYRILLIC_EXTENDED_B = 160, /*[A640]*/ - /** @stable ICU 4.0 */ - UBLOCK_SAURASHTRA = 161, /*[A880]*/ - /** @stable ICU 4.0 */ - UBLOCK_KAYAH_LI = 162, /*[A900]*/ - /** @stable ICU 4.0 */ - UBLOCK_REJANG = 163, /*[A930]*/ - /** @stable ICU 4.0 */ - UBLOCK_CHAM = 164, /*[AA00]*/ - /** @stable ICU 4.0 */ - UBLOCK_ANCIENT_SYMBOLS = 165, /*[10190]*/ - /** @stable ICU 4.0 */ - UBLOCK_PHAISTOS_DISC = 166, /*[101D0]*/ - /** @stable ICU 4.0 */ - UBLOCK_LYCIAN = 167, /*[10280]*/ - /** @stable ICU 4.0 */ - UBLOCK_CARIAN = 168, /*[102A0]*/ - /** @stable ICU 4.0 */ - UBLOCK_LYDIAN = 169, /*[10920]*/ - /** @stable ICU 4.0 */ - UBLOCK_MAHJONG_TILES = 170, /*[1F000]*/ - /** @stable ICU 4.0 */ - UBLOCK_DOMINO_TILES = 171, /*[1F030]*/ - - /* New blocks in Unicode 5.2 */ - - /** @stable ICU 4.4 */ - UBLOCK_SAMARITAN = 172, /*[0800]*/ - /** @stable ICU 4.4 */ - UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 173, /*[18B0]*/ - /** @stable ICU 4.4 */ - UBLOCK_TAI_THAM = 174, /*[1A20]*/ - /** @stable ICU 4.4 */ - UBLOCK_VEDIC_EXTENSIONS = 175, /*[1CD0]*/ - /** @stable ICU 4.4 */ - UBLOCK_LISU = 176, /*[A4D0]*/ - /** @stable ICU 4.4 */ - UBLOCK_BAMUM = 177, /*[A6A0]*/ - /** @stable ICU 4.4 */ - UBLOCK_COMMON_INDIC_NUMBER_FORMS = 178, /*[A830]*/ - /** @stable ICU 4.4 */ - UBLOCK_DEVANAGARI_EXTENDED = 179, /*[A8E0]*/ - /** @stable ICU 4.4 */ - UBLOCK_HANGUL_JAMO_EXTENDED_A = 180, /*[A960]*/ - /** @stable ICU 4.4 */ - UBLOCK_JAVANESE = 181, /*[A980]*/ - /** @stable ICU 4.4 */ - UBLOCK_MYANMAR_EXTENDED_A = 182, /*[AA60]*/ - /** @stable ICU 4.4 */ - UBLOCK_TAI_VIET = 183, /*[AA80]*/ - /** @stable ICU 4.4 */ - UBLOCK_MEETEI_MAYEK = 184, /*[ABC0]*/ - /** @stable ICU 4.4 */ - UBLOCK_HANGUL_JAMO_EXTENDED_B = 185, /*[D7B0]*/ - /** @stable ICU 4.4 */ - UBLOCK_IMPERIAL_ARAMAIC = 186, /*[10840]*/ - /** @stable ICU 4.4 */ - UBLOCK_OLD_SOUTH_ARABIAN = 187, /*[10A60]*/ - /** @stable ICU 4.4 */ - UBLOCK_AVESTAN = 188, /*[10B00]*/ - /** @stable ICU 4.4 */ - UBLOCK_INSCRIPTIONAL_PARTHIAN = 189, /*[10B40]*/ - /** @stable ICU 4.4 */ - UBLOCK_INSCRIPTIONAL_PAHLAVI = 190, /*[10B60]*/ - /** @stable ICU 4.4 */ - UBLOCK_OLD_TURKIC = 191, /*[10C00]*/ - /** @stable ICU 4.4 */ - UBLOCK_RUMI_NUMERAL_SYMBOLS = 192, /*[10E60]*/ - /** @stable ICU 4.4 */ - UBLOCK_KAITHI = 193, /*[11080]*/ - /** @stable ICU 4.4 */ - UBLOCK_EGYPTIAN_HIEROGLYPHS = 194, /*[13000]*/ - /** @stable ICU 4.4 */ - UBLOCK_ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 195, /*[1F100]*/ - /** @stable ICU 4.4 */ - UBLOCK_ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 196, /*[1F200]*/ - /** @stable ICU 4.4 */ - UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 197, /*[2A700]*/ - - /* New blocks in Unicode 6.0 */ - - /** @stable ICU 4.6 */ - UBLOCK_MANDAIC = 198, /*[0840]*/ - /** @stable ICU 4.6 */ - UBLOCK_BATAK = 199, /*[1BC0]*/ - /** @stable ICU 4.6 */ - UBLOCK_ETHIOPIC_EXTENDED_A = 200, /*[AB00]*/ - /** @stable ICU 4.6 */ - UBLOCK_BRAHMI = 201, /*[11000]*/ - /** @stable ICU 4.6 */ - UBLOCK_BAMUM_SUPPLEMENT = 202, /*[16800]*/ - /** @stable ICU 4.6 */ - UBLOCK_KANA_SUPPLEMENT = 203, /*[1B000]*/ - /** @stable ICU 4.6 */ - UBLOCK_PLAYING_CARDS = 204, /*[1F0A0]*/ - /** @stable ICU 4.6 */ - UBLOCK_MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 205, /*[1F300]*/ - /** @stable ICU 4.6 */ - UBLOCK_EMOTICONS = 206, /*[1F600]*/ - /** @stable ICU 4.6 */ - UBLOCK_TRANSPORT_AND_MAP_SYMBOLS = 207, /*[1F680]*/ - /** @stable ICU 4.6 */ - UBLOCK_ALCHEMICAL_SYMBOLS = 208, /*[1F700]*/ - /** @stable ICU 4.6 */ - UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 209, /*[2B740]*/ - - /* New blocks in Unicode 6.1 */ - - /** @stable ICU 49 */ - UBLOCK_ARABIC_EXTENDED_A = 210, /*[08A0]*/ - /** @stable ICU 49 */ - UBLOCK_ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 211, /*[1EE00]*/ - /** @stable ICU 49 */ - UBLOCK_CHAKMA = 212, /*[11100]*/ - /** @stable ICU 49 */ - UBLOCK_MEETEI_MAYEK_EXTENSIONS = 213, /*[AAE0]*/ - /** @stable ICU 49 */ - UBLOCK_MEROITIC_CURSIVE = 214, /*[109A0]*/ - /** @stable ICU 49 */ - UBLOCK_MEROITIC_HIEROGLYPHS = 215, /*[10980]*/ - /** @stable ICU 49 */ - UBLOCK_MIAO = 216, /*[16F00]*/ - /** @stable ICU 49 */ - UBLOCK_SHARADA = 217, /*[11180]*/ - /** @stable ICU 49 */ - UBLOCK_SORA_SOMPENG = 218, /*[110D0]*/ - /** @stable ICU 49 */ - UBLOCK_SUNDANESE_SUPPLEMENT = 219, /*[1CC0]*/ - /** @stable ICU 49 */ - UBLOCK_TAKRI = 220, /*[11680]*/ - - /** @stable ICU 2.0 */ - UBLOCK_COUNT = 221, - - /** @stable ICU 2.0 */ - UBLOCK_INVALID_CODE=-1 -}; - -/** @stable ICU 2.0 */ -typedef enum UBlockCode UBlockCode; - -/** - * East Asian Width constants. - * - * @see UCHAR_EAST_ASIAN_WIDTH - * @see u_getIntPropertyValue - * @stable ICU 2.2 - */ -typedef enum UEastAsianWidth { - /* - * Note: UEastAsianWidth constants are parsed by preparseucd.py. - * It matches lines like - * U_EA_<Unicode East_Asian_Width value name> - */ - - U_EA_NEUTRAL, /*[N]*/ - U_EA_AMBIGUOUS, /*[A]*/ - U_EA_HALFWIDTH, /*[H]*/ - U_EA_FULLWIDTH, /*[F]*/ - U_EA_NARROW, /*[Na]*/ - U_EA_WIDE, /*[W]*/ - U_EA_COUNT -} UEastAsianWidth; - -/** - * Selector constants for u_charName(). - * u_charName() returns the "modern" name of a - * Unicode character; or the name that was defined in - * Unicode version 1.0, before the Unicode standard merged - * with ISO-10646; or an "extended" name that gives each - * Unicode code point a unique name. - * - * @see u_charName - * @stable ICU 2.0 - */ -typedef enum UCharNameChoice { - /** Unicode character name (Name property). @stable ICU 2.0 */ - U_UNICODE_CHAR_NAME, -#ifndef U_HIDE_DEPRECATED_API - /** - * The Unicode_1_Name property value which is of little practical value. - * Beginning with ICU 49, ICU APIs return an empty string for this name choice. - * @deprecated ICU 49 - */ - U_UNICODE_10_CHAR_NAME, -#endif /* U_HIDE_DEPRECATED_API */ - /** Standard or synthetic character name. @stable ICU 2.0 */ - U_EXTENDED_CHAR_NAME = U_UNICODE_CHAR_NAME+2, - /** Corrected name from NameAliases.txt. @stable ICU 4.4 */ - U_CHAR_NAME_ALIAS, - /** @stable ICU 2.0 */ - U_CHAR_NAME_CHOICE_COUNT -} UCharNameChoice; - -/** - * Selector constants for u_getPropertyName() and - * u_getPropertyValueName(). These selectors are used to choose which - * name is returned for a given property or value. All properties and - * values have a long name. Most have a short name, but some do not. - * Unicode allows for additional names, beyond the long and short - * name, which would be indicated by U_LONG_PROPERTY_NAME + i, where - * i=1, 2,... - * - * @see u_getPropertyName() - * @see u_getPropertyValueName() - * @stable ICU 2.4 - */ -typedef enum UPropertyNameChoice { - U_SHORT_PROPERTY_NAME, - U_LONG_PROPERTY_NAME, - U_PROPERTY_NAME_CHOICE_COUNT -} UPropertyNameChoice; - -/** - * Decomposition Type constants. - * - * @see UCHAR_DECOMPOSITION_TYPE - * @stable ICU 2.2 - */ -typedef enum UDecompositionType { - /* - * Note: UDecompositionType constants are parsed by preparseucd.py. - * It matches lines like - * U_DT_<Unicode Decomposition_Type value name> - */ - - U_DT_NONE, /*[none]*/ - U_DT_CANONICAL, /*[can]*/ - U_DT_COMPAT, /*[com]*/ - U_DT_CIRCLE, /*[enc]*/ - U_DT_FINAL, /*[fin]*/ - U_DT_FONT, /*[font]*/ - U_DT_FRACTION, /*[fra]*/ - U_DT_INITIAL, /*[init]*/ - U_DT_ISOLATED, /*[iso]*/ - U_DT_MEDIAL, /*[med]*/ - U_DT_NARROW, /*[nar]*/ - U_DT_NOBREAK, /*[nb]*/ - U_DT_SMALL, /*[sml]*/ - U_DT_SQUARE, /*[sqr]*/ - U_DT_SUB, /*[sub]*/ - U_DT_SUPER, /*[sup]*/ - U_DT_VERTICAL, /*[vert]*/ - U_DT_WIDE, /*[wide]*/ - U_DT_COUNT /* 18 */ -} UDecompositionType; - -/** - * Joining Type constants. - * - * @see UCHAR_JOINING_TYPE - * @stable ICU 2.2 - */ -typedef enum UJoiningType { - /* - * Note: UJoiningType constants are parsed by preparseucd.py. - * It matches lines like - * U_JT_<Unicode Joining_Type value name> - */ - - U_JT_NON_JOINING, /*[U]*/ - U_JT_JOIN_CAUSING, /*[C]*/ - U_JT_DUAL_JOINING, /*[D]*/ - U_JT_LEFT_JOINING, /*[L]*/ - U_JT_RIGHT_JOINING, /*[R]*/ - U_JT_TRANSPARENT, /*[T]*/ - U_JT_COUNT /* 6 */ -} UJoiningType; - -/** - * Joining Group constants. - * - * @see UCHAR_JOINING_GROUP - * @stable ICU 2.2 - */ -typedef enum UJoiningGroup { - /* - * Note: UJoiningGroup constants are parsed by preparseucd.py. - * It matches lines like - * U_JG_<Unicode Joining_Group value name> - */ - - U_JG_NO_JOINING_GROUP, - U_JG_AIN, - U_JG_ALAPH, - U_JG_ALEF, - U_JG_BEH, - U_JG_BETH, - U_JG_DAL, - U_JG_DALATH_RISH, - U_JG_E, - U_JG_FEH, - U_JG_FINAL_SEMKATH, - U_JG_GAF, - U_JG_GAMAL, - U_JG_HAH, - U_JG_TEH_MARBUTA_GOAL, /**< @stable ICU 4.6 */ - U_JG_HAMZA_ON_HEH_GOAL=U_JG_TEH_MARBUTA_GOAL, - U_JG_HE, - U_JG_HEH, - U_JG_HEH_GOAL, - U_JG_HETH, - U_JG_KAF, - U_JG_KAPH, - U_JG_KNOTTED_HEH, - U_JG_LAM, - U_JG_LAMADH, - U_JG_MEEM, - U_JG_MIM, - U_JG_NOON, - U_JG_NUN, - U_JG_PE, - U_JG_QAF, - U_JG_QAPH, - U_JG_REH, - U_JG_REVERSED_PE, - U_JG_SAD, - U_JG_SADHE, - U_JG_SEEN, - U_JG_SEMKATH, - U_JG_SHIN, - U_JG_SWASH_KAF, - U_JG_SYRIAC_WAW, - U_JG_TAH, - U_JG_TAW, - U_JG_TEH_MARBUTA, - U_JG_TETH, - U_JG_WAW, - U_JG_YEH, - U_JG_YEH_BARREE, - U_JG_YEH_WITH_TAIL, - U_JG_YUDH, - U_JG_YUDH_HE, - U_JG_ZAIN, - U_JG_FE, /**< @stable ICU 2.6 */ - U_JG_KHAPH, /**< @stable ICU 2.6 */ - U_JG_ZHAIN, /**< @stable ICU 2.6 */ - U_JG_BURUSHASKI_YEH_BARREE, /**< @stable ICU 4.0 */ - U_JG_FARSI_YEH, /**< @stable ICU 4.4 */ - U_JG_NYA, /**< @stable ICU 4.4 */ - U_JG_ROHINGYA_YEH, /**< @stable ICU 49 */ - U_JG_COUNT -} UJoiningGroup; - -/** - * Grapheme Cluster Break constants. - * - * @see UCHAR_GRAPHEME_CLUSTER_BREAK - * @stable ICU 3.4 - */ -typedef enum UGraphemeClusterBreak { - /* - * Note: UGraphemeClusterBreak constants are parsed by preparseucd.py. - * It matches lines like - * U_GCB_<Unicode Grapheme_Cluster_Break value name> - */ - - U_GCB_OTHER = 0, /*[XX]*/ - U_GCB_CONTROL = 1, /*[CN]*/ - U_GCB_CR = 2, /*[CR]*/ - U_GCB_EXTEND = 3, /*[EX]*/ - U_GCB_L = 4, /*[L]*/ - U_GCB_LF = 5, /*[LF]*/ - U_GCB_LV = 6, /*[LV]*/ - U_GCB_LVT = 7, /*[LVT]*/ - U_GCB_T = 8, /*[T]*/ - U_GCB_V = 9, /*[V]*/ - U_GCB_SPACING_MARK = 10, /*[SM]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */ - U_GCB_PREPEND = 11, /*[PP]*/ - U_GCB_REGIONAL_INDICATOR = 12, /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ - U_GCB_COUNT = 13 -} UGraphemeClusterBreak; - -/** - * Word Break constants. - * (UWordBreak is a pre-existing enum type in ubrk.h for word break status tags.) - * - * @see UCHAR_WORD_BREAK - * @stable ICU 3.4 - */ -typedef enum UWordBreakValues { - /* - * Note: UWordBreakValues constants are parsed by preparseucd.py. - * It matches lines like - * U_WB_<Unicode Word_Break value name> - */ - - U_WB_OTHER = 0, /*[XX]*/ - U_WB_ALETTER = 1, /*[LE]*/ - U_WB_FORMAT = 2, /*[FO]*/ - U_WB_KATAKANA = 3, /*[KA]*/ - U_WB_MIDLETTER = 4, /*[ML]*/ - U_WB_MIDNUM = 5, /*[MN]*/ - U_WB_NUMERIC = 6, /*[NU]*/ - U_WB_EXTENDNUMLET = 7, /*[EX]*/ - U_WB_CR = 8, /*[CR]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */ - U_WB_EXTEND = 9, /*[Extend]*/ - U_WB_LF = 10, /*[LF]*/ - U_WB_MIDNUMLET =11, /*[MB]*/ - U_WB_NEWLINE =12, /*[NL]*/ - U_WB_REGIONAL_INDICATOR = 13, /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ - U_WB_HEBREW_LETTER = 14, /*[HL]*/ /* from here on: new in Unicode 6.3/ICU 52 */ - U_WB_SINGLE_QUOTE = 15, /*[SQ]*/ - U_WB_DOUBLE_QUOTE = 16, /*[DQ]*/ - U_WB_COUNT = 17 -} UWordBreakValues; - -/** - * Sentence Break constants. - * - * @see UCHAR_SENTENCE_BREAK - * @stable ICU 3.4 - */ -typedef enum USentenceBreak { - /* - * Note: USentenceBreak constants are parsed by preparseucd.py. - * It matches lines like - * U_SB_<Unicode Sentence_Break value name> - */ - - U_SB_OTHER = 0, /*[XX]*/ - U_SB_ATERM = 1, /*[AT]*/ - U_SB_CLOSE = 2, /*[CL]*/ - U_SB_FORMAT = 3, /*[FO]*/ - U_SB_LOWER = 4, /*[LO]*/ - U_SB_NUMERIC = 5, /*[NU]*/ - U_SB_OLETTER = 6, /*[LE]*/ - U_SB_SEP = 7, /*[SE]*/ - U_SB_SP = 8, /*[SP]*/ - U_SB_STERM = 9, /*[ST]*/ - U_SB_UPPER = 10, /*[UP]*/ - U_SB_CR = 11, /*[CR]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */ - U_SB_EXTEND = 12, /*[EX]*/ - U_SB_LF = 13, /*[LF]*/ - U_SB_SCONTINUE = 14, /*[SC]*/ - U_SB_COUNT = 15 -} USentenceBreak; - -/** - * Line Break constants. - * - * @see UCHAR_LINE_BREAK - * @stable ICU 2.2 - */ -typedef enum ULineBreak { - /* - * Note: ULineBreak constants are parsed by preparseucd.py. - * It matches lines like - * U_LB_<Unicode Line_Break value name> - */ - - U_LB_UNKNOWN = 0, /*[XX]*/ - U_LB_AMBIGUOUS = 1, /*[AI]*/ - U_LB_ALPHABETIC = 2, /*[AL]*/ - U_LB_BREAK_BOTH = 3, /*[B2]*/ - U_LB_BREAK_AFTER = 4, /*[BA]*/ - U_LB_BREAK_BEFORE = 5, /*[BB]*/ - U_LB_MANDATORY_BREAK = 6, /*[BK]*/ - U_LB_CONTINGENT_BREAK = 7, /*[CB]*/ - U_LB_CLOSE_PUNCTUATION = 8, /*[CL]*/ - U_LB_COMBINING_MARK = 9, /*[CM]*/ - U_LB_CARRIAGE_RETURN = 10, /*[CR]*/ - U_LB_EXCLAMATION = 11, /*[EX]*/ - U_LB_GLUE = 12, /*[GL]*/ - U_LB_HYPHEN = 13, /*[HY]*/ - U_LB_IDEOGRAPHIC = 14, /*[ID]*/ - /** Renamed from the misspelled "inseperable" in Unicode 4.0.1/ICU 3.0 @stable ICU 3.0 */ - U_LB_INSEPARABLE = 15, /*[IN]*/ - U_LB_INSEPERABLE = U_LB_INSEPARABLE, - U_LB_INFIX_NUMERIC = 16, /*[IS]*/ - U_LB_LINE_FEED = 17, /*[LF]*/ - U_LB_NONSTARTER = 18, /*[NS]*/ - U_LB_NUMERIC = 19, /*[NU]*/ - U_LB_OPEN_PUNCTUATION = 20, /*[OP]*/ - U_LB_POSTFIX_NUMERIC = 21, /*[PO]*/ - U_LB_PREFIX_NUMERIC = 22, /*[PR]*/ - U_LB_QUOTATION = 23, /*[QU]*/ - U_LB_COMPLEX_CONTEXT = 24, /*[SA]*/ - U_LB_SURROGATE = 25, /*[SG]*/ - U_LB_SPACE = 26, /*[SP]*/ - U_LB_BREAK_SYMBOLS = 27, /*[SY]*/ - U_LB_ZWSPACE = 28, /*[ZW]*/ - U_LB_NEXT_LINE = 29, /*[NL]*/ /* from here on: new in Unicode 4/ICU 2.6 */ - U_LB_WORD_JOINER = 30, /*[WJ]*/ - U_LB_H2 = 31, /*[H2]*/ /* from here on: new in Unicode 4.1/ICU 3.4 */ - U_LB_H3 = 32, /*[H3]*/ - U_LB_JL = 33, /*[JL]*/ - U_LB_JT = 34, /*[JT]*/ - U_LB_JV = 35, /*[JV]*/ - U_LB_CLOSE_PARENTHESIS = 36, /*[CP]*/ /* new in Unicode 5.2/ICU 4.4 */ - U_LB_CONDITIONAL_JAPANESE_STARTER = 37,/*[CJ]*/ /* new in Unicode 6.1/ICU 49 */ - U_LB_HEBREW_LETTER = 38, /*[HL]*/ /* new in Unicode 6.1/ICU 49 */ - U_LB_REGIONAL_INDICATOR = 39,/*[RI]*/ /* new in Unicode 6.2/ICU 50 */ - U_LB_COUNT = 40 -} ULineBreak; - -/** - * Numeric Type constants. - * - * @see UCHAR_NUMERIC_TYPE - * @stable ICU 2.2 - */ -typedef enum UNumericType { - /* - * Note: UNumericType constants are parsed by preparseucd.py. - * It matches lines like - * U_NT_<Unicode Numeric_Type value name> - */ - - U_NT_NONE, /*[None]*/ - U_NT_DECIMAL, /*[de]*/ - U_NT_DIGIT, /*[di]*/ - U_NT_NUMERIC, /*[nu]*/ - U_NT_COUNT -} UNumericType; - -/** - * Hangul Syllable Type constants. - * - * @see UCHAR_HANGUL_SYLLABLE_TYPE - * @stable ICU 2.6 - */ -typedef enum UHangulSyllableType { - /* - * Note: UHangulSyllableType constants are parsed by preparseucd.py. - * It matches lines like - * U_HST_<Unicode Hangul_Syllable_Type value name> - */ - - U_HST_NOT_APPLICABLE, /*[NA]*/ - U_HST_LEADING_JAMO, /*[L]*/ - U_HST_VOWEL_JAMO, /*[V]*/ - U_HST_TRAILING_JAMO, /*[T]*/ - U_HST_LV_SYLLABLE, /*[LV]*/ - U_HST_LVT_SYLLABLE, /*[LVT]*/ - U_HST_COUNT -} UHangulSyllableType; - -/** - * Check a binary Unicode property for a code point. - * - * Unicode, especially in version 3.2, defines many more properties than the - * original set in UnicodeData.txt. - * - * The properties APIs are intended to reflect Unicode properties as defined - * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR). - * For details about the properties see http://www.unicode.org/ucd/ . - * For names of Unicode properties see the UCD file PropertyAliases.txt. - * - * Important: If ICU is built with UCD files from Unicode versions below 3.2, - * then properties marked with "new in Unicode 3.2" are not or not fully available. - * - * @param c Code point to test. - * @param which UProperty selector constant, identifies which binary property to check. - * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT. - * @return TRUE or FALSE according to the binary Unicode property value for c. - * Also FALSE if 'which' is out of bounds or if the Unicode version - * does not have data for the property at all, or not for this code point. - * - * @see UProperty - * @see u_getIntPropertyValue - * @see u_getUnicodeVersion - * @stable ICU 2.1 - */ -U_STABLE UBool U_EXPORT2 -u_hasBinaryProperty(UChar32 c, UProperty which); - -/** - * Check if a code point has the Alphabetic Unicode property. - * Same as u_hasBinaryProperty(c, UCHAR_ALPHABETIC). - * This is different from u_isalpha! - * @param c Code point to test - * @return true if the code point has the Alphabetic Unicode property, false otherwise - * - * @see UCHAR_ALPHABETIC - * @see u_isalpha - * @see u_hasBinaryProperty - * @stable ICU 2.1 - */ -U_STABLE UBool U_EXPORT2 -u_isUAlphabetic(UChar32 c); - -/** - * Check if a code point has the Lowercase Unicode property. - * Same as u_hasBinaryProperty(c, UCHAR_LOWERCASE). - * This is different from u_islower! - * @param c Code point to test - * @return true if the code point has the Lowercase Unicode property, false otherwise - * - * @see UCHAR_LOWERCASE - * @see u_islower - * @see u_hasBinaryProperty - * @stable ICU 2.1 - */ -U_STABLE UBool U_EXPORT2 -u_isULowercase(UChar32 c); - -/** - * Check if a code point has the Uppercase Unicode property. - * Same as u_hasBinaryProperty(c, UCHAR_UPPERCASE). - * This is different from u_isupper! - * @param c Code point to test - * @return true if the code point has the Uppercase Unicode property, false otherwise - * - * @see UCHAR_UPPERCASE - * @see u_isupper - * @see u_hasBinaryProperty - * @stable ICU 2.1 - */ -U_STABLE UBool U_EXPORT2 -u_isUUppercase(UChar32 c); - -/** - * Check if a code point has the White_Space Unicode property. - * Same as u_hasBinaryProperty(c, UCHAR_WHITE_SPACE). - * This is different from both u_isspace and u_isWhitespace! - * - * Note: There are several ICU whitespace functions; please see the uchar.h - * file documentation for a detailed comparison. - * - * @param c Code point to test - * @return true if the code point has the White_Space Unicode property, false otherwise. - * - * @see UCHAR_WHITE_SPACE - * @see u_isWhitespace - * @see u_isspace - * @see u_isJavaSpaceChar - * @see u_hasBinaryProperty - * @stable ICU 2.1 - */ -U_STABLE UBool U_EXPORT2 -u_isUWhiteSpace(UChar32 c); - -/** - * Get the property value for an enumerated or integer Unicode property for a code point. - * Also returns binary and mask property values. - * - * Unicode, especially in version 3.2, defines many more properties than the - * original set in UnicodeData.txt. - * - * The properties APIs are intended to reflect Unicode properties as defined - * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR). - * For details about the properties see http://www.unicode.org/ . - * For names of Unicode properties see the UCD file PropertyAliases.txt. - * - * Sample usage: - * UEastAsianWidth ea=(UEastAsianWidth)u_getIntPropertyValue(c, UCHAR_EAST_ASIAN_WIDTH); - * UBool b=(UBool)u_getIntPropertyValue(c, UCHAR_IDEOGRAPHIC); - * - * @param c Code point to test. - * @param which UProperty selector constant, identifies which property to check. - * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT - * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT - * or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT. - * @return Numeric value that is directly the property value or, - * for enumerated properties, corresponds to the numeric value of the enumerated - * constant of the respective property value enumeration type - * (cast to enum type if necessary). - * Returns 0 or 1 (for FALSE/TRUE) for binary Unicode properties. - * Returns a bit-mask for mask properties. - * Returns 0 if 'which' is out of bounds or if the Unicode version - * does not have data for the property at all, or not for this code point. - * - * @see UProperty - * @see u_hasBinaryProperty - * @see u_getIntPropertyMinValue - * @see u_getIntPropertyMaxValue - * @see u_getUnicodeVersion - * @stable ICU 2.2 - */ -U_STABLE int32_t U_EXPORT2 -u_getIntPropertyValue(UChar32 c, UProperty which); - -/** - * Get the minimum value for an enumerated/integer/binary Unicode property. - * Can be used together with u_getIntPropertyMaxValue - * to allocate arrays of UnicodeSet or similar. - * - * @param which UProperty selector constant, identifies which binary property to check. - * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT - * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT. - * @return Minimum value returned by u_getIntPropertyValue for a Unicode property. - * 0 if the property selector is out of range. - * - * @see UProperty - * @see u_hasBinaryProperty - * @see u_getUnicodeVersion - * @see u_getIntPropertyMaxValue - * @see u_getIntPropertyValue - * @stable ICU 2.2 - */ -U_STABLE int32_t U_EXPORT2 -u_getIntPropertyMinValue(UProperty which); - -/** - * Get the maximum value for an enumerated/integer/binary Unicode property. - * Can be used together with u_getIntPropertyMinValue - * to allocate arrays of UnicodeSet or similar. - * - * Examples for min/max values (for Unicode 3.2): - * - * - UCHAR_BIDI_CLASS: 0/18 (U_LEFT_TO_RIGHT/U_BOUNDARY_NEUTRAL) - * - UCHAR_SCRIPT: 0/45 (USCRIPT_COMMON/USCRIPT_TAGBANWA) - * - UCHAR_IDEOGRAPHIC: 0/1 (FALSE/TRUE) - * - * For undefined UProperty constant values, min/max values will be 0/-1. - * - * @param which UProperty selector constant, identifies which binary property to check. - * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT - * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT. - * @return Maximum value returned by u_getIntPropertyValue for a Unicode property. - * <=0 if the property selector is out of range. - * - * @see UProperty - * @see u_hasBinaryProperty - * @see u_getUnicodeVersion - * @see u_getIntPropertyMaxValue - * @see u_getIntPropertyValue - * @stable ICU 2.2 - */ -U_STABLE int32_t U_EXPORT2 -u_getIntPropertyMaxValue(UProperty which); - -/** - * Get the numeric value for a Unicode code point as defined in the - * Unicode Character Database. - * - * A "double" return type is necessary because - * some numeric values are fractions, negative, or too large for int32_t. - * - * For characters without any numeric values in the Unicode Character Database, - * this function will return U_NO_NUMERIC_VALUE. - * Note: This is different from the Unicode Standard which specifies NaN as the default value. - * (NaN is not available on all platforms.) - * - * Similar to java.lang.Character.getNumericValue(), but u_getNumericValue() - * also supports negative values, large values, and fractions, - * while Java's getNumericValue() returns values 10..35 for ASCII letters. - * - * @param c Code point to get the numeric value for. - * @return Numeric value of c, or U_NO_NUMERIC_VALUE if none is defined. - * - * @see U_NO_NUMERIC_VALUE - * @stable ICU 2.2 - */ -U_STABLE double U_EXPORT2 -u_getNumericValue(UChar32 c); - -/** - * Special value that is returned by u_getNumericValue when - * no numeric value is defined for a code point. - * - * @see u_getNumericValue - * @stable ICU 2.2 - */ -#define U_NO_NUMERIC_VALUE ((double)-123456789.) - -/** - * Determines whether the specified code point has the general category "Ll" - * (lowercase letter). - * - * Same as java.lang.Character.isLowerCase(). - * - * This misses some characters that are also lowercase but - * have a different general category value. - * In order to include those, use UCHAR_LOWERCASE. - * - * In addition to being equivalent to a Java function, this also serves - * as a C/POSIX migration function. - * See the comments about C/POSIX character classification functions in the - * documentation at the top of this header file. - * - * @param c the code point to be tested - * @return TRUE if the code point is an Ll lowercase letter - * - * @see UCHAR_LOWERCASE - * @see u_isupper - * @see u_istitle - * @stable ICU 2.0 - */ -U_STABLE UBool U_EXPORT2 -u_islower(UChar32 c); - -/** - * Determines whether the specified code point has the general category "Lu" - * (uppercase letter). - * - * Same as java.lang.Character.isUpperCase(). - * - * This misses some characters that are also uppercase but - * have a different general category value. - * In order to include those, use UCHAR_UPPERCASE. - * - * In addition to being equivalent to a Java function, this also serves - * as a C/POSIX migration function. - * See the comments about C/POSIX character classification functions in the - * documentation at the top of this header file. - * - * @param c the code point to be tested - * @return TRUE if the code point is an Lu uppercase letter - * - * @see UCHAR_UPPERCASE - * @see u_islower - * @see u_istitle - * @see u_tolower - * @stable ICU 2.0 - */ -U_STABLE UBool U_EXPORT2 -u_isupper(UChar32 c); - -/** - * Determines whether the specified code point is a titlecase letter. - * True for general category "Lt" (titlecase letter). - * - * Same as java.lang.Character.isTitleCase(). - * - * @param c the code point to be tested - * @return TRUE if the code point is an Lt titlecase letter - * - * @see u_isupper - * @see u_islower - * @see u_totitle - * @stable ICU 2.0 - */ -U_STABLE UBool U_EXPORT2 -u_istitle(UChar32 c); - -/** - * Determines whether the specified code point is a digit character according to Java. - * True for characters with general category "Nd" (decimal digit numbers). - * Beginning with Unicode 4, this is the same as - * testing for the Numeric_Type of Decimal. - * - * Same as java.lang.Character.isDigit(). - * - * In addition to being equivalent to a Java function, this also serves - * as a C/POSIX migration function. - * See the comments about C/POSIX character classification functions in the - * documentation at the top of this header file. - * - * @param c the code point to be tested - * @return TRUE if the code point is a digit character according to Character.isDigit() - * - * @stable ICU 2.0 - */ -U_STABLE UBool U_EXPORT2 -u_isdigit(UChar32 c); - -/** - * Determines whether the specified code point is a letter character. - * True for general categories "L" (letters). - * - * Same as java.lang.Character.isLetter(). - * - * In addition to being equivalent to a Java function, this also serves - * as a C/POSIX migration function. - * See the comments about C/POSIX character classification functions in the - * documentation at the top of this header file. - * - * @param c the code point to be tested - * @return TRUE if the code point is a letter character - * - * @see u_isdigit - * @see u_isalnum - * @stable ICU 2.0 - */ -U_STABLE UBool U_EXPORT2 -u_isalpha(UChar32 c); - -/** - * Determines whether the specified code point is an alphanumeric character - * (letter or digit) according to Java. - * True for characters with general categories - * "L" (letters) and "Nd" (decimal digit numbers). - * - * Same as java.lang.Character.isLetterOrDigit(). - * - * In addition to being equivalent to a Java function, this also serves - * as a C/POSIX migration function. - * See the comments about C/POSIX character classification functions in the - * documentation at the top of this header file. - * - * @param c the code point to be tested - * @return TRUE if the code point is an alphanumeric character according to Character.isLetterOrDigit() - * - * @stable ICU 2.0 - */ -U_STABLE UBool U_EXPORT2 -u_isalnum(UChar32 c); - -/** - * Determines whether the specified code point is a hexadecimal digit. - * This is equivalent to u_digit(c, 16)>=0. - * True for characters with general category "Nd" (decimal digit numbers) - * as well as Latin letters a-f and A-F in both ASCII and Fullwidth ASCII. - * (That is, for letters with code points - * 0041..0046, 0061..0066, FF21..FF26, FF41..FF46.) - * - * In order to narrow the definition of hexadecimal digits to only ASCII - * characters, use (c<=0x7f && u_isxdigit(c)). - * - * This is a C/POSIX migration function. - * See the comments about C/POSIX character classification functions in the - * documentation at the top of this header file. - * - * @param c the code point to be tested - * @return TRUE if the code point is a hexadecimal digit - * - * @stable ICU 2.6 - */ -U_STABLE UBool U_EXPORT2 -u_isxdigit(UChar32 c); - -/** - * Determines whether the specified code point is a punctuation character. - * True for characters with general categories "P" (punctuation). - * - * This is a C/POSIX migration function. - * See the comments about C/POSIX character classification functions in the - * documentation at the top of this header file. - * - * @param c the code point to be tested - * @return TRUE if the code point is a punctuation character - * - * @stable ICU 2.6 - */ -U_STABLE UBool U_EXPORT2 -u_ispunct(UChar32 c); - -/** - * Determines whether the specified code point is a "graphic" character - * (printable, excluding spaces). - * TRUE for all characters except those with general categories - * "Cc" (control codes), "Cf" (format controls), "Cs" (surrogates), - * "Cn" (unassigned), and "Z" (separators). - * - * This is a C/POSIX migration function. - * See the comments about C/POSIX character classification functions in the - * documentation at the top of this header file. - * - * @param c the code point to be tested - * @return TRUE if the code point is a "graphic" character - * - * @stable ICU 2.6 - */ -U_STABLE UBool U_EXPORT2 -u_isgraph(UChar32 c); - -/** - * Determines whether the specified code point is a "blank" or "horizontal space", - * a character that visibly separates words on a line. - * The following are equivalent definitions: - * - * TRUE for Unicode White_Space characters except for "vertical space controls" - * where "vertical space controls" are the following characters: - * U+000A (LF) U+000B (VT) U+000C (FF) U+000D (CR) U+0085 (NEL) U+2028 (LS) U+2029 (PS) - * - * same as - * - * TRUE for U+0009 (TAB) and characters with general category "Zs" (space separators) - * except Zero Width Space (ZWSP, U+200B). - * - * Note: There are several ICU whitespace functions; please see the uchar.h - * file documentation for a detailed comparison. - * - * This is a C/POSIX migration function. - * See the comments about C/POSIX character classification functions in the - * documentation at the top of this header file. - * - * @param c the code point to be tested - * @return TRUE if the code point is a "blank" - * - * @stable ICU 2.6 - */ -U_STABLE UBool U_EXPORT2 -u_isblank(UChar32 c); - -/** - * Determines whether the specified code point is "defined", - * which usually means that it is assigned a character. - * True for general categories other than "Cn" (other, not assigned), - * i.e., true for all code points mentioned in UnicodeData.txt. - * - * Note that non-character code points (e.g., U+FDD0) are not "defined" - * (they are Cn), but surrogate code points are "defined" (Cs). - * - * Same as java.lang.Character.isDefined(). - * - * @param c the code point to be tested - * @return TRUE if the code point is assigned a character - * - * @see u_isdigit - * @see u_isalpha - * @see u_isalnum - * @see u_isupper - * @see u_islower - * @see u_istitle - * @stable ICU 2.0 - */ -U_STABLE UBool U_EXPORT2 -u_isdefined(UChar32 c); - -/** - * Determines if the specified character is a space character or not. - * - * Note: There are several ICU whitespace functions; please see the uchar.h - * file documentation for a detailed comparison. - * - * This is a C/POSIX migration function. - * See the comments about C/POSIX character classification functions in the - * documentation at the top of this header file. - * - * @param c the character to be tested - * @return true if the character is a space character; false otherwise. - * - * @see u_isJavaSpaceChar - * @see u_isWhitespace - * @see u_isUWhiteSpace - * @stable ICU 2.0 - */ -U_STABLE UBool U_EXPORT2 -u_isspace(UChar32 c); - -/** - * Determine if the specified code point is a space character according to Java. - * True for characters with general categories "Z" (separators), - * which does not include control codes (e.g., TAB or Line Feed). - * - * Same as java.lang.Character.isSpaceChar(). - * - * Note: There are several ICU whitespace functions; please see the uchar.h - * file documentation for a detailed comparison. - * - * @param c the code point to be tested - * @return TRUE if the code point is a space character according to Character.isSpaceChar() - * - * @see u_isspace - * @see u_isWhitespace - * @see u_isUWhiteSpace - * @stable ICU 2.6 - */ -U_STABLE UBool U_EXPORT2 -u_isJavaSpaceChar(UChar32 c); - -/** - * Determines if the specified code point is a whitespace character according to Java/ICU. - * A character is considered to be a Java whitespace character if and only - * if it satisfies one of the following criteria: - * - * - It is a Unicode Separator character (categories "Z" = "Zs" or "Zl" or "Zp"), but is not - * also a non-breaking space (U+00A0 NBSP or U+2007 Figure Space or U+202F Narrow NBSP). - * - It is U+0009 HORIZONTAL TABULATION. - * - It is U+000A LINE FEED. - * - It is U+000B VERTICAL TABULATION. - * - It is U+000C FORM FEED. - * - It is U+000D CARRIAGE RETURN. - * - It is U+001C FILE SEPARATOR. - * - It is U+001D GROUP SEPARATOR. - * - It is U+001E RECORD SEPARATOR. - * - It is U+001F UNIT SEPARATOR. - * - * This API tries to sync with the semantics of Java's - * java.lang.Character.isWhitespace(), but it may not return - * the exact same results because of the Unicode version - * difference. - * - * Note: Unicode 4.0.1 changed U+200B ZERO WIDTH SPACE from a Space Separator (Zs) - * to a Format Control (Cf). Since then, isWhitespace(0x200b) returns false. - * See http://www.unicode.org/versions/Unicode4.0.1/ - * - * Note: There are several ICU whitespace functions; please see the uchar.h - * file documentation for a detailed comparison. - * - * @param c the code point to be tested - * @return TRUE if the code point is a whitespace character according to Java/ICU - * - * @see u_isspace - * @see u_isJavaSpaceChar - * @see u_isUWhiteSpace - * @stable ICU 2.0 - */ -U_STABLE UBool U_EXPORT2 -u_isWhitespace(UChar32 c); - -/** - * Determines whether the specified code point is a control character - * (as defined by this function). - * A control character is one of the following: - * - ISO 8-bit control character (U+0000..U+001f and U+007f..U+009f) - * - U_CONTROL_CHAR (Cc) - * - U_FORMAT_CHAR (Cf) - * - U_LINE_SEPARATOR (Zl) - * - U_PARAGRAPH_SEPARATOR (Zp) - * - * This is a C/POSIX migration function. - * See the comments about C/POSIX character classification functions in the - * documentation at the top of this header file. - * - * @param c the code point to be tested - * @return TRUE if the code point is a control character - * - * @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT - * @see u_isprint - * @stable ICU 2.0 - */ -U_STABLE UBool U_EXPORT2 -u_iscntrl(UChar32 c); - -/** - * Determines whether the specified code point is an ISO control code. - * True for U+0000..U+001f and U+007f..U+009f (general category "Cc"). - * - * Same as java.lang.Character.isISOControl(). - * - * @param c the code point to be tested - * @return TRUE if the code point is an ISO control code - * - * @see u_iscntrl - * @stable ICU 2.6 - */ -U_STABLE UBool U_EXPORT2 -u_isISOControl(UChar32 c); - -/** - * Determines whether the specified code point is a printable character. - * True for general categories <em>other</em> than "C" (controls). - * - * This is a C/POSIX migration function. - * See the comments about C/POSIX character classification functions in the - * documentation at the top of this header file. - * - * @param c the code point to be tested - * @return TRUE if the code point is a printable character - * - * @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT - * @see u_iscntrl - * @stable ICU 2.0 - */ -U_STABLE UBool U_EXPORT2 -u_isprint(UChar32 c); - -/** - * Determines whether the specified code point is a base character. - * True for general categories "L" (letters), "N" (numbers), - * "Mc" (spacing combining marks), and "Me" (enclosing marks). - * - * Note that this is different from the Unicode definition in - * chapter 3.5, conformance clause D13, - * which defines base characters to be all characters (not Cn) - * that do not graphically combine with preceding characters (M) - * and that are neither control (Cc) or format (Cf) characters. - * - * @param c the code point to be tested - * @return TRUE if the code point is a base character according to this function - * - * @see u_isalpha - * @see u_isdigit - * @stable ICU 2.0 - */ -U_STABLE UBool U_EXPORT2 -u_isbase(UChar32 c); - -/** - * Returns the bidirectional category value for the code point, - * which is used in the Unicode bidirectional algorithm - * (UAX #9 http://www.unicode.org/reports/tr9/). - * Note that some <em>unassigned</em> code points have bidi values - * of R or AL because they are in blocks that are reserved - * for Right-To-Left scripts. - * - * Same as java.lang.Character.getDirectionality() - * - * @param c the code point to be tested - * @return the bidirectional category (UCharDirection) value - * - * @see UCharDirection - * @stable ICU 2.0 - */ -U_STABLE UCharDirection U_EXPORT2 -u_charDirection(UChar32 c); - -/** - * Determines whether the code point has the Bidi_Mirrored property. - * This property is set for characters that are commonly used in - * Right-To-Left contexts and need to be displayed with a "mirrored" - * glyph. - * - * Same as java.lang.Character.isMirrored(). - * Same as UCHAR_BIDI_MIRRORED - * - * @param c the code point to be tested - * @return TRUE if the character has the Bidi_Mirrored property - * - * @see UCHAR_BIDI_MIRRORED - * @stable ICU 2.0 - */ -U_STABLE UBool U_EXPORT2 -u_isMirrored(UChar32 c); - -/** - * Maps the specified character to a "mirror-image" character. - * For characters with the Bidi_Mirrored property, implementations - * sometimes need a "poor man's" mapping to another Unicode - * character (code point) such that the default glyph may serve - * as the mirror-image of the default glyph of the specified - * character. This is useful for text conversion to and from - * codepages with visual order, and for displays without glyph - * selection capabilities. - * - * @param c the code point to be mapped - * @return another Unicode code point that may serve as a mirror-image - * substitute, or c itself if there is no such mapping or c - * does not have the Bidi_Mirrored property - * - * @see UCHAR_BIDI_MIRRORED - * @see u_isMirrored - * @stable ICU 2.0 - */ -U_STABLE UChar32 U_EXPORT2 -u_charMirror(UChar32 c); - -/** - * Maps the specified character to its paired bracket character. - * For Bidi_Paired_Bracket_Type!=None, this is the same as u_charMirror(). - * Otherwise c itself is returned. - * See http://www.unicode.org/reports/tr9/ - * - * @param c the code point to be mapped - * @return the paired bracket code point, - * or c itself if there is no such mapping - * (Bidi_Paired_Bracket_Type=None) - * - * @see UCHAR_BIDI_PAIRED_BRACKET - * @see UCHAR_BIDI_PAIRED_BRACKET_TYPE - * @see u_charMirror - * @stable ICU 52 - */ -U_STABLE UChar32 U_EXPORT2 -u_getBidiPairedBracket(UChar32 c); - -/** - * Returns the general category value for the code point. - * - * Same as java.lang.Character.getType(). - * - * @param c the code point to be tested - * @return the general category (UCharCategory) value - * - * @see UCharCategory - * @stable ICU 2.0 - */ -U_STABLE int8_t U_EXPORT2 -u_charType(UChar32 c); - -/** - * Get a single-bit bit set for the general category of a character. - * This bit set can be compared bitwise with U_GC_SM_MASK, U_GC_L_MASK, etc. - * Same as U_MASK(u_charType(c)). - * - * @param c the code point to be tested - * @return a single-bit mask corresponding to the general category (UCharCategory) value - * - * @see u_charType - * @see UCharCategory - * @see U_GC_CN_MASK - * @stable ICU 2.1 - */ -#define U_GET_GC_MASK(c) U_MASK(u_charType(c)) - -/** - * Callback from u_enumCharTypes(), is called for each contiguous range - * of code points c (where start<=c<limit) - * with the same Unicode general category ("character type"). - * - * The callback function can stop the enumeration by returning FALSE. - * - * @param context an opaque pointer, as passed into utrie_enum() - * @param start the first code point in a contiguous range with value - * @param limit one past the last code point in a contiguous range with value - * @param type the general category for all code points in [start..limit[ - * @return FALSE to stop the enumeration - * - * @stable ICU 2.1 - * @see UCharCategory - * @see u_enumCharTypes - */ -typedef UBool U_CALLCONV -UCharEnumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type); - -/** - * Enumerate efficiently all code points with their Unicode general categories. - * - * This is useful for building data structures (e.g., UnicodeSet's), - * for enumerating all assigned code points (type!=U_UNASSIGNED), etc. - * - * For each contiguous range of code points with a given general category ("character type"), - * the UCharEnumTypeRange function is called. - * Adjacent ranges have different types. - * The Unicode Standard guarantees that the numeric value of the type is 0..31. - * - * @param enumRange a pointer to a function that is called for each contiguous range - * of code points with the same general category - * @param context an opaque pointer that is passed on to the callback function - * - * @stable ICU 2.1 - * @see UCharCategory - * @see UCharEnumTypeRange - */ -U_STABLE void U_EXPORT2 -u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context); - -#if !UCONFIG_NO_NORMALIZATION - -/** - * Returns the combining class of the code point as specified in UnicodeData.txt. - * - * @param c the code point of the character - * @return the combining class of the character - * @stable ICU 2.0 - */ -U_STABLE uint8_t U_EXPORT2 -u_getCombiningClass(UChar32 c); - -#endif - -/** - * Returns the decimal digit value of a decimal digit character. - * Such characters have the general category "Nd" (decimal digit numbers) - * and a Numeric_Type of Decimal. - * - * Unlike ICU releases before 2.6, no digit values are returned for any - * Han characters because Han number characters are often used with a special - * Chinese-style number format (with characters for powers of 10 in between) - * instead of in decimal-positional notation. - * Unicode 4 explicitly assigns Han number characters the Numeric_Type - * Numeric instead of Decimal. - * See Jitterbug 1483 for more details. - * - * Use u_getIntPropertyValue(c, UCHAR_NUMERIC_TYPE) and u_getNumericValue() - * for complete numeric Unicode properties. - * - * @param c the code point for which to get the decimal digit value - * @return the decimal digit value of c, - * or -1 if c is not a decimal digit character - * - * @see u_getNumericValue - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -u_charDigitValue(UChar32 c); - -/** - * Returns the Unicode allocation block that contains the character. - * - * @param c the code point to be tested - * @return the block value (UBlockCode) for c - * - * @see UBlockCode - * @stable ICU 2.0 - */ -U_STABLE UBlockCode U_EXPORT2 -ublock_getCode(UChar32 c); - -/** - * Retrieve the name of a Unicode character. - * Depending on <code>nameChoice</code>, the character name written - * into the buffer is the "modern" name or the name that was defined - * in Unicode version 1.0. - * The name contains only "invariant" characters - * like A-Z, 0-9, space, and '-'. - * Unicode 1.0 names are only retrieved if they are different from the modern - * names and if the data file contains the data for them. gennames may or may - * not be called with a command line option to include 1.0 names in unames.dat. - * - * @param code The character (code point) for which to get the name. - * It must be <code>0<=code<=0x10ffff</code>. - * @param nameChoice Selector for which name to get. - * @param buffer Destination address for copying the name. - * The name will always be zero-terminated. - * If there is no name, then the buffer will be set to the empty string. - * @param bufferLength <code>==sizeof(buffer)</code> - * @param pErrorCode Pointer to a UErrorCode variable; - * check for <code>U_SUCCESS()</code> after <code>u_charName()</code> - * returns. - * @return The length of the name, or 0 if there is no name for this character. - * If the bufferLength is less than or equal to the length, then the buffer - * contains the truncated name and the returned length indicates the full - * length of the name. - * The length does not include the zero-termination. - * - * @see UCharNameChoice - * @see u_charFromName - * @see u_enumCharNames - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -u_charName(UChar32 code, UCharNameChoice nameChoice, - char *buffer, int32_t bufferLength, - UErrorCode *pErrorCode); - -#ifndef U_HIDE_DEPRECATED_API -/** - * Returns an empty string. - * Used to return the ISO 10646 comment for a character. - * The Unicode ISO_Comment property is deprecated and has no values. - * - * @param c The character (code point) for which to get the ISO comment. - * It must be <code>0<=c<=0x10ffff</code>. - * @param dest Destination address for copying the comment. - * The comment will be zero-terminated if possible. - * If there is no comment, then the buffer will be set to the empty string. - * @param destCapacity <code>==sizeof(dest)</code> - * @param pErrorCode Pointer to a UErrorCode variable; - * check for <code>U_SUCCESS()</code> after <code>u_getISOComment()</code> - * returns. - * @return 0 - * - * @deprecated ICU 49 - */ -U_STABLE int32_t U_EXPORT2 -u_getISOComment(UChar32 c, - char *dest, int32_t destCapacity, - UErrorCode *pErrorCode); -#endif /* U_HIDE_DEPRECATED_API */ - -/** - * Find a Unicode character by its name and return its code point value. - * The name is matched exactly and completely. - * If the name does not correspond to a code point, <i>pErrorCode</i> - * is set to <code>U_INVALID_CHAR_FOUND</code>. - * A Unicode 1.0 name is matched only if it differs from the modern name. - * Unicode names are all uppercase. Extended names are lowercase followed - * by an uppercase hexadecimal number, and within angle brackets. - * - * @param nameChoice Selector for which name to match. - * @param name The name to match. - * @param pErrorCode Pointer to a UErrorCode variable - * @return The Unicode value of the code point with the given name, - * or an undefined value if there is no such code point. - * - * @see UCharNameChoice - * @see u_charName - * @see u_enumCharNames - * @stable ICU 1.7 - */ -U_STABLE UChar32 U_EXPORT2 -u_charFromName(UCharNameChoice nameChoice, - const char *name, - UErrorCode *pErrorCode); - -/** - * Type of a callback function for u_enumCharNames() that gets called - * for each Unicode character with the code point value and - * the character name. - * If such a function returns FALSE, then the enumeration is stopped. - * - * @param context The context pointer that was passed to u_enumCharNames(). - * @param code The Unicode code point for the character with this name. - * @param nameChoice Selector for which kind of names is enumerated. - * @param name The character's name, zero-terminated. - * @param length The length of the name. - * @return TRUE if the enumeration should continue, FALSE to stop it. - * - * @see UCharNameChoice - * @see u_enumCharNames - * @stable ICU 1.7 - */ -typedef UBool U_CALLCONV UEnumCharNamesFn(void *context, - UChar32 code, - UCharNameChoice nameChoice, - const char *name, - int32_t length); - -/** - * Enumerate all assigned Unicode characters between the start and limit - * code points (start inclusive, limit exclusive) and call a function - * for each, passing the code point value and the character name. - * For Unicode 1.0 names, only those are enumerated that differ from the - * modern names. - * - * @param start The first code point in the enumeration range. - * @param limit One more than the last code point in the enumeration range - * (the first one after the range). - * @param fn The function that is to be called for each character name. - * @param context An arbitrary pointer that is passed to the function. - * @param nameChoice Selector for which kind of names to enumerate. - * @param pErrorCode Pointer to a UErrorCode variable - * - * @see UCharNameChoice - * @see UEnumCharNamesFn - * @see u_charName - * @see u_charFromName - * @stable ICU 1.7 - */ -U_STABLE void U_EXPORT2 -u_enumCharNames(UChar32 start, UChar32 limit, - UEnumCharNamesFn *fn, - void *context, - UCharNameChoice nameChoice, - UErrorCode *pErrorCode); - -/** - * Return the Unicode name for a given property, as given in the - * Unicode database file PropertyAliases.txt. - * - * In addition, this function maps the property - * UCHAR_GENERAL_CATEGORY_MASK to the synthetic names "gcm" / - * "General_Category_Mask". These names are not in - * PropertyAliases.txt. - * - * @param property UProperty selector other than UCHAR_INVALID_CODE. - * If out of range, NULL is returned. - * - * @param nameChoice selector for which name to get. If out of range, - * NULL is returned. All properties have a long name. Most - * have a short name, but some do not. Unicode allows for - * additional names; if present these will be returned by - * U_LONG_PROPERTY_NAME + i, where i=1, 2,... - * - * @return a pointer to the name, or NULL if either the - * property or the nameChoice is out of range. If a given - * nameChoice returns NULL, then all larger values of - * nameChoice will return NULL, with one exception: if NULL is - * returned for U_SHORT_PROPERTY_NAME, then - * U_LONG_PROPERTY_NAME (and higher) may still return a - * non-NULL value. The returned pointer is valid until - * u_cleanup() is called. - * - * @see UProperty - * @see UPropertyNameChoice - * @stable ICU 2.4 - */ -U_STABLE const char* U_EXPORT2 -u_getPropertyName(UProperty property, - UPropertyNameChoice nameChoice); - -/** - * Return the UProperty enum for a given property name, as specified - * in the Unicode database file PropertyAliases.txt. Short, long, and - * any other variants are recognized. - * - * In addition, this function maps the synthetic names "gcm" / - * "General_Category_Mask" to the property - * UCHAR_GENERAL_CATEGORY_MASK. These names are not in - * PropertyAliases.txt. - * - * @param alias the property name to be matched. The name is compared - * using "loose matching" as described in PropertyAliases.txt. - * - * @return a UProperty enum, or UCHAR_INVALID_CODE if the given name - * does not match any property. - * - * @see UProperty - * @stable ICU 2.4 - */ -U_STABLE UProperty U_EXPORT2 -u_getPropertyEnum(const char* alias); - -/** - * Return the Unicode name for a given property value, as given in the - * Unicode database file PropertyValueAliases.txt. - * - * Note: Some of the names in PropertyValueAliases.txt can only be - * retrieved using UCHAR_GENERAL_CATEGORY_MASK, not - * UCHAR_GENERAL_CATEGORY. These include: "C" / "Other", "L" / - * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P" - * / "Punctuation", "S" / "Symbol", and "Z" / "Separator". - * - * @param property UProperty selector constant. - * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT - * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT - * or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT. - * If out of range, NULL is returned. - * - * @param value selector for a value for the given property. If out - * of range, NULL is returned. In general, valid values range - * from 0 up to some maximum. There are a few exceptions: - * (1.) UCHAR_BLOCK values begin at the non-zero value - * UBLOCK_BASIC_LATIN. (2.) UCHAR_CANONICAL_COMBINING_CLASS - * values are not contiguous and range from 0..240. (3.) - * UCHAR_GENERAL_CATEGORY_MASK values are not values of - * UCharCategory, but rather mask values produced by - * U_GET_GC_MASK(). This allows grouped categories such as - * [:L:] to be represented. Mask values range - * non-contiguously from 1..U_GC_P_MASK. - * - * @param nameChoice selector for which name to get. If out of range, - * NULL is returned. All values have a long name. Most have - * a short name, but some do not. Unicode allows for - * additional names; if present these will be returned by - * U_LONG_PROPERTY_NAME + i, where i=1, 2,... - - * @return a pointer to the name, or NULL if either the - * property or the nameChoice is out of range. If a given - * nameChoice returns NULL, then all larger values of - * nameChoice will return NULL, with one exception: if NULL is - * returned for U_SHORT_PROPERTY_NAME, then - * U_LONG_PROPERTY_NAME (and higher) may still return a - * non-NULL value. The returned pointer is valid until - * u_cleanup() is called. - * - * @see UProperty - * @see UPropertyNameChoice - * @stable ICU 2.4 - */ -U_STABLE const char* U_EXPORT2 -u_getPropertyValueName(UProperty property, - int32_t value, - UPropertyNameChoice nameChoice); - -/** - * Return the property value integer for a given value name, as - * specified in the Unicode database file PropertyValueAliases.txt. - * Short, long, and any other variants are recognized. - * - * Note: Some of the names in PropertyValueAliases.txt will only be - * recognized with UCHAR_GENERAL_CATEGORY_MASK, not - * UCHAR_GENERAL_CATEGORY. These include: "C" / "Other", "L" / - * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P" - * / "Punctuation", "S" / "Symbol", and "Z" / "Separator". - * - * @param property UProperty selector constant. - * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT - * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT - * or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT. - * If out of range, UCHAR_INVALID_CODE is returned. - * - * @param alias the value name to be matched. The name is compared - * using "loose matching" as described in - * PropertyValueAliases.txt. - * - * @return a value integer or UCHAR_INVALID_CODE if the given name - * does not match any value of the given property, or if the - * property is invalid. Note: UCHAR_GENERAL_CATEGORY_MASK values - * are not values of UCharCategory, but rather mask values - * produced by U_GET_GC_MASK(). This allows grouped - * categories such as [:L:] to be represented. - * - * @see UProperty - * @stable ICU 2.4 - */ -U_STABLE int32_t U_EXPORT2 -u_getPropertyValueEnum(UProperty property, - const char* alias); - -/** - * Determines if the specified character is permissible as the - * first character in an identifier according to Unicode - * (The Unicode Standard, Version 3.0, chapter 5.16 Identifiers). - * True for characters with general categories "L" (letters) and "Nl" (letter numbers). - * - * Same as java.lang.Character.isUnicodeIdentifierStart(). - * Same as UCHAR_ID_START - * - * @param c the code point to be tested - * @return TRUE if the code point may start an identifier - * - * @see UCHAR_ID_START - * @see u_isalpha - * @see u_isIDPart - * @stable ICU 2.0 - */ -U_STABLE UBool U_EXPORT2 -u_isIDStart(UChar32 c); - -/** - * Determines if the specified character is permissible - * in an identifier according to Java. - * True for characters with general categories "L" (letters), - * "Nl" (letter numbers), "Nd" (decimal digits), - * "Mc" and "Mn" (combining marks), "Pc" (connecting punctuation), and - * u_isIDIgnorable(c). - * - * Same as java.lang.Character.isUnicodeIdentifierPart(). - * Almost the same as Unicode's ID_Continue (UCHAR_ID_CONTINUE) - * except that Unicode recommends to ignore Cf which is less than - * u_isIDIgnorable(c). - * - * @param c the code point to be tested - * @return TRUE if the code point may occur in an identifier according to Java - * - * @see UCHAR_ID_CONTINUE - * @see u_isIDStart - * @see u_isIDIgnorable - * @stable ICU 2.0 - */ -U_STABLE UBool U_EXPORT2 -u_isIDPart(UChar32 c); - -/** - * Determines if the specified character should be regarded - * as an ignorable character in an identifier, - * according to Java. - * True for characters with general category "Cf" (format controls) as well as - * non-whitespace ISO controls - * (U+0000..U+0008, U+000E..U+001B, U+007F..U+009F). - * - * Same as java.lang.Character.isIdentifierIgnorable(). - * - * Note that Unicode just recommends to ignore Cf (format controls). - * - * @param c the code point to be tested - * @return TRUE if the code point is ignorable in identifiers according to Java - * - * @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT - * @see u_isIDStart - * @see u_isIDPart - * @stable ICU 2.0 - */ -U_STABLE UBool U_EXPORT2 -u_isIDIgnorable(UChar32 c); - -/** - * Determines if the specified character is permissible as the - * first character in a Java identifier. - * In addition to u_isIDStart(c), true for characters with - * general categories "Sc" (currency symbols) and "Pc" (connecting punctuation). - * - * Same as java.lang.Character.isJavaIdentifierStart(). - * - * @param c the code point to be tested - * @return TRUE if the code point may start a Java identifier - * - * @see u_isJavaIDPart - * @see u_isalpha - * @see u_isIDStart - * @stable ICU 2.0 - */ -U_STABLE UBool U_EXPORT2 -u_isJavaIDStart(UChar32 c); - -/** - * Determines if the specified character is permissible - * in a Java identifier. - * In addition to u_isIDPart(c), true for characters with - * general category "Sc" (currency symbols). - * - * Same as java.lang.Character.isJavaIdentifierPart(). - * - * @param c the code point to be tested - * @return TRUE if the code point may occur in a Java identifier - * - * @see u_isIDIgnorable - * @see u_isJavaIDStart - * @see u_isalpha - * @see u_isdigit - * @see u_isIDPart - * @stable ICU 2.0 - */ -U_STABLE UBool U_EXPORT2 -u_isJavaIDPart(UChar32 c); - -/** - * The given character is mapped to its lowercase equivalent according to - * UnicodeData.txt; if the character has no lowercase equivalent, the character - * itself is returned. - * - * Same as java.lang.Character.toLowerCase(). - * - * This function only returns the simple, single-code point case mapping. - * Full case mappings should be used whenever possible because they produce - * better results by working on whole strings. - * They take into account the string context and the language and can map - * to a result string with a different length as appropriate. - * Full case mappings are applied by the string case mapping functions, - * see ustring.h and the UnicodeString class. - * See also the User Guide chapter on C/POSIX migration: - * http://icu-project.org/userguide/posix.html#case_mappings - * - * @param c the code point to be mapped - * @return the Simple_Lowercase_Mapping of the code point, if any; - * otherwise the code point itself. - * @stable ICU 2.0 - */ -U_STABLE UChar32 U_EXPORT2 -u_tolower(UChar32 c); - -/** - * The given character is mapped to its uppercase equivalent according to UnicodeData.txt; - * if the character has no uppercase equivalent, the character itself is - * returned. - * - * Same as java.lang.Character.toUpperCase(). - * - * This function only returns the simple, single-code point case mapping. - * Full case mappings should be used whenever possible because they produce - * better results by working on whole strings. - * They take into account the string context and the language and can map - * to a result string with a different length as appropriate. - * Full case mappings are applied by the string case mapping functions, - * see ustring.h and the UnicodeString class. - * See also the User Guide chapter on C/POSIX migration: - * http://icu-project.org/userguide/posix.html#case_mappings - * - * @param c the code point to be mapped - * @return the Simple_Uppercase_Mapping of the code point, if any; - * otherwise the code point itself. - * @stable ICU 2.0 - */ -U_STABLE UChar32 U_EXPORT2 -u_toupper(UChar32 c); - -/** - * The given character is mapped to its titlecase equivalent - * according to UnicodeData.txt; - * if none is defined, the character itself is returned. - * - * Same as java.lang.Character.toTitleCase(). - * - * This function only returns the simple, single-code point case mapping. - * Full case mappings should be used whenever possible because they produce - * better results by working on whole strings. - * They take into account the string context and the language and can map - * to a result string with a different length as appropriate. - * Full case mappings are applied by the string case mapping functions, - * see ustring.h and the UnicodeString class. - * See also the User Guide chapter on C/POSIX migration: - * http://icu-project.org/userguide/posix.html#case_mappings - * - * @param c the code point to be mapped - * @return the Simple_Titlecase_Mapping of the code point, if any; - * otherwise the code point itself. - * @stable ICU 2.0 - */ -U_STABLE UChar32 U_EXPORT2 -u_totitle(UChar32 c); - -/** Option value for case folding: use default mappings defined in CaseFolding.txt. @stable ICU 2.0 */ -#define U_FOLD_CASE_DEFAULT 0 - -/** - * Option value for case folding: - * - * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I - * and dotless i appropriately for Turkic languages (tr, az). - * - * Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that - * are to be included for default mappings and - * excluded for the Turkic-specific mappings. - * - * Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that - * are to be excluded for default mappings and - * included for the Turkic-specific mappings. - * - * @stable ICU 2.0 - */ -#define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1 - -/** - * The given character is mapped to its case folding equivalent according to - * UnicodeData.txt and CaseFolding.txt; - * if the character has no case folding equivalent, the character - * itself is returned. - * - * This function only returns the simple, single-code point case mapping. - * Full case mappings should be used whenever possible because they produce - * better results by working on whole strings. - * They take into account the string context and the language and can map - * to a result string with a different length as appropriate. - * Full case mappings are applied by the string case mapping functions, - * see ustring.h and the UnicodeString class. - * See also the User Guide chapter on C/POSIX migration: - * http://icu-project.org/userguide/posix.html#case_mappings - * - * @param c the code point to be mapped - * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I - * @return the Simple_Case_Folding of the code point, if any; - * otherwise the code point itself. - * @stable ICU 2.0 - */ -U_STABLE UChar32 U_EXPORT2 -u_foldCase(UChar32 c, uint32_t options); - -/** - * Returns the decimal digit value of the code point in the - * specified radix. - * - * If the radix is not in the range <code>2<=radix<=36</code> or if the - * value of <code>c</code> is not a valid digit in the specified - * radix, <code>-1</code> is returned. A character is a valid digit - * if at least one of the following is true: - * <ul> - * <li>The character has a decimal digit value. - * Such characters have the general category "Nd" (decimal digit numbers) - * and a Numeric_Type of Decimal. - * In this case the value is the character's decimal digit value.</li> - * <li>The character is one of the uppercase Latin letters - * <code>'A'</code> through <code>'Z'</code>. - * In this case the value is <code>c-'A'+10</code>.</li> - * <li>The character is one of the lowercase Latin letters - * <code>'a'</code> through <code>'z'</code>. - * In this case the value is <code>ch-'a'+10</code>.</li> - * <li>Latin letters from both the ASCII range (0061..007A, 0041..005A) - * as well as from the Fullwidth ASCII range (FF41..FF5A, FF21..FF3A) - * are recognized.</li> - * </ul> - * - * Same as java.lang.Character.digit(). - * - * @param ch the code point to be tested. - * @param radix the radix. - * @return the numeric value represented by the character in the - * specified radix, - * or -1 if there is no value or if the value exceeds the radix. - * - * @see UCHAR_NUMERIC_TYPE - * @see u_forDigit - * @see u_charDigitValue - * @see u_isdigit - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -u_digit(UChar32 ch, int8_t radix); - -/** - * Determines the character representation for a specific digit in - * the specified radix. If the value of <code>radix</code> is not a - * valid radix, or the value of <code>digit</code> is not a valid - * digit in the specified radix, the null character - * (<code>U+0000</code>) is returned. - * <p> - * The <code>radix</code> argument is valid if it is greater than or - * equal to 2 and less than or equal to 36. - * The <code>digit</code> argument is valid if - * <code>0 <= digit < radix</code>. - * <p> - * If the digit is less than 10, then - * <code>'0' + digit</code> is returned. Otherwise, the value - * <code>'a' + digit - 10</code> is returned. - * - * Same as java.lang.Character.forDigit(). - * - * @param digit the number to convert to a character. - * @param radix the radix. - * @return the <code>char</code> representation of the specified digit - * in the specified radix. - * - * @see u_digit - * @see u_charDigitValue - * @see u_isdigit - * @stable ICU 2.0 - */ -U_STABLE UChar32 U_EXPORT2 -u_forDigit(int32_t digit, int8_t radix); - -/** - * Get the "age" of the code point. - * The "age" is the Unicode version when the code point was first - * designated (as a non-character or for Private Use) - * or assigned a character. - * This can be useful to avoid emitting code points to receiving - * processes that do not accept newer characters. - * The data is from the UCD file DerivedAge.txt. - * - * @param c The code point. - * @param versionArray The Unicode version number array, to be filled in. - * - * @stable ICU 2.1 - */ -U_STABLE void U_EXPORT2 -u_charAge(UChar32 c, UVersionInfo versionArray); - -/** - * Gets the Unicode version information. - * The version array is filled in with the version information - * for the Unicode standard that is currently used by ICU. - * For example, Unicode version 3.1.1 is represented as an array with - * the values { 3, 1, 1, 0 }. - * - * @param versionArray an output array that will be filled in with - * the Unicode version number - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -u_getUnicodeVersion(UVersionInfo versionArray); - -#if !UCONFIG_NO_NORMALIZATION -/** - * Get the FC_NFKC_Closure property string for a character. - * See Unicode Standard Annex #15 for details, search for "FC_NFKC_Closure" - * or for "FNC": http://www.unicode.org/reports/tr15/ - * - * @param c The character (code point) for which to get the FC_NFKC_Closure string. - * It must be <code>0<=c<=0x10ffff</code>. - * @param dest Destination address for copying the string. - * The string will be zero-terminated if possible. - * If there is no FC_NFKC_Closure string, - * then the buffer will be set to the empty string. - * @param destCapacity <code>==sizeof(dest)</code> - * @param pErrorCode Pointer to a UErrorCode variable. - * @return The length of the string, or 0 if there is no FC_NFKC_Closure string for this character. - * If the destCapacity is less than or equal to the length, then the buffer - * contains the truncated name and the returned length indicates the full - * length of the name. - * The length does not include the zero-termination. - * - * @stable ICU 2.2 - */ -U_STABLE int32_t U_EXPORT2 -u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode); - -#endif - - -U_CDECL_END - -#endif /*_UCHAR*/ -/*eof*/ diff --git a/Source/WTF/icu/unicode/ucnv.h b/Source/WTF/icu/unicode/ucnv.h deleted file mode 100644 index c5fc2dc78..000000000 --- a/Source/WTF/icu/unicode/ucnv.h +++ /dev/null @@ -1,2034 +0,0 @@ -/* -********************************************************************** -* Copyright (C) 1999-2013, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** - * ucnv.h: - * External APIs for the ICU's codeset conversion library - * Bertrand A. Damiba - * - * Modification History: - * - * Date Name Description - * 04/04/99 helena Fixed internal header inclusion. - * 05/11/00 helena Added setFallback and usesFallback APIs. - * 06/29/2000 helena Major rewrite of the callback APIs. - * 12/07/2000 srl Update of documentation - */ - -/** - * \file - * \brief C API: Character conversion - * - * <h2>Character Conversion C API</h2> - * - * <p>This API is used to convert codepage or character encoded data to and - * from UTF-16. You can open a converter with {@link ucnv_open() }. With that - * converter, you can get its properties, set options, convert your data and - * close the converter.</p> - * - * <p>Since many software programs recogize different converter names for - * different types of converters, there are other functions in this API to - * iterate over the converter aliases. The functions {@link ucnv_getAvailableName() }, - * {@link ucnv_getAlias() } and {@link ucnv_getStandardName() } are some of the - * more frequently used alias functions to get this information.</p> - * - * <p>When a converter encounters an illegal, irregular, invalid or unmappable character - * its default behavior is to use a substitution character to replace the - * bad byte sequence. This behavior can be changed by using {@link ucnv_setFromUCallBack() } - * or {@link ucnv_setToUCallBack() } on the converter. The header ucnv_err.h defines - * many other callback actions that can be used instead of a character substitution.</p> - * - * <p>More information about this API can be found in our - * <a href="http://icu-project.org/userguide/conversion.html">User's - * Guide</a>.</p> - */ - -#ifndef UCNV_H -#define UCNV_H - -#include "unicode/ucnv_err.h" -#include "unicode/uenum.h" -#include "unicode/localpointer.h" - -#ifndef __USET_H__ - -/** - * USet is the C API type for Unicode sets. - * It is forward-declared here to avoid including the header file if related - * conversion APIs are not used. - * See unicode/uset.h - * - * @see ucnv_getUnicodeSet - * @stable ICU 2.6 - */ -struct USet; -/** @stable ICU 2.6 */ -typedef struct USet USet; - -#endif - -#if !UCONFIG_NO_CONVERSION - -U_CDECL_BEGIN - -/** Maximum length of a converter name including the terminating NULL @stable ICU 2.0 */ -#define UCNV_MAX_CONVERTER_NAME_LENGTH 60 -/** Maximum length of a converter name including path and terminating NULL @stable ICU 2.0 */ -#define UCNV_MAX_FULL_FILE_NAME_LENGTH (600+UCNV_MAX_CONVERTER_NAME_LENGTH) - -/** Shift in for EBDCDIC_STATEFUL and iso2022 states @stable ICU 2.0 */ -#define UCNV_SI 0x0F -/** Shift out for EBDCDIC_STATEFUL and iso2022 states @stable ICU 2.0 */ -#define UCNV_SO 0x0E - -/** - * Enum for specifying basic types of converters - * @see ucnv_getType - * @stable ICU 2.0 - */ -typedef enum { - /** @stable ICU 2.0 */ - UCNV_UNSUPPORTED_CONVERTER = -1, - /** @stable ICU 2.0 */ - UCNV_SBCS = 0, - /** @stable ICU 2.0 */ - UCNV_DBCS = 1, - /** @stable ICU 2.0 */ - UCNV_MBCS = 2, - /** @stable ICU 2.0 */ - UCNV_LATIN_1 = 3, - /** @stable ICU 2.0 */ - UCNV_UTF8 = 4, - /** @stable ICU 2.0 */ - UCNV_UTF16_BigEndian = 5, - /** @stable ICU 2.0 */ - UCNV_UTF16_LittleEndian = 6, - /** @stable ICU 2.0 */ - UCNV_UTF32_BigEndian = 7, - /** @stable ICU 2.0 */ - UCNV_UTF32_LittleEndian = 8, - /** @stable ICU 2.0 */ - UCNV_EBCDIC_STATEFUL = 9, - /** @stable ICU 2.0 */ - UCNV_ISO_2022 = 10, - - /** @stable ICU 2.0 */ - UCNV_LMBCS_1 = 11, - /** @stable ICU 2.0 */ - UCNV_LMBCS_2, - /** @stable ICU 2.0 */ - UCNV_LMBCS_3, - /** @stable ICU 2.0 */ - UCNV_LMBCS_4, - /** @stable ICU 2.0 */ - UCNV_LMBCS_5, - /** @stable ICU 2.0 */ - UCNV_LMBCS_6, - /** @stable ICU 2.0 */ - UCNV_LMBCS_8, - /** @stable ICU 2.0 */ - UCNV_LMBCS_11, - /** @stable ICU 2.0 */ - UCNV_LMBCS_16, - /** @stable ICU 2.0 */ - UCNV_LMBCS_17, - /** @stable ICU 2.0 */ - UCNV_LMBCS_18, - /** @stable ICU 2.0 */ - UCNV_LMBCS_19, - /** @stable ICU 2.0 */ - UCNV_LMBCS_LAST = UCNV_LMBCS_19, - /** @stable ICU 2.0 */ - UCNV_HZ, - /** @stable ICU 2.0 */ - UCNV_SCSU, - /** @stable ICU 2.0 */ - UCNV_ISCII, - /** @stable ICU 2.0 */ - UCNV_US_ASCII, - /** @stable ICU 2.0 */ - UCNV_UTF7, - /** @stable ICU 2.2 */ - UCNV_BOCU1, - /** @stable ICU 2.2 */ - UCNV_UTF16, - /** @stable ICU 2.2 */ - UCNV_UTF32, - /** @stable ICU 2.2 */ - UCNV_CESU8, - /** @stable ICU 2.4 */ - UCNV_IMAP_MAILBOX, - /** @stable ICU 4.8 */ - UCNV_COMPOUND_TEXT, - - /* Number of converter types for which we have conversion routines. */ - UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES -} UConverterType; - -/** - * Enum for specifying which platform a converter ID refers to. - * The use of platform/CCSID is not recommended. See ucnv_openCCSID(). - * - * @see ucnv_getPlatform - * @see ucnv_openCCSID - * @see ucnv_getCCSID - * @stable ICU 2.0 - */ -typedef enum { - UCNV_UNKNOWN = -1, - UCNV_IBM = 0 -} UConverterPlatform; - -/** - * Function pointer for error callback in the codepage to unicode direction. - * Called when an error has occured in conversion to unicode, or on open/close of the callback (see reason). - * @param context Pointer to the callback's private data - * @param args Information about the conversion in progress - * @param codeUnits Points to 'length' bytes of the concerned codepage sequence - * @param length Size (in bytes) of the concerned codepage sequence - * @param reason Defines the reason the callback was invoked - * @param pErrorCode ICU error code in/out parameter. - * For converter callback functions, set to a conversion error - * before the call, and the callback may reset it to U_ZERO_ERROR. - * @see ucnv_setToUCallBack - * @see UConverterToUnicodeArgs - * @stable ICU 2.0 - */ -typedef void (U_EXPORT2 *UConverterToUCallback) ( - const void* context, - UConverterToUnicodeArgs *args, - const char *codeUnits, - int32_t length, - UConverterCallbackReason reason, - UErrorCode *pErrorCode); - -/** - * Function pointer for error callback in the unicode to codepage direction. - * Called when an error has occured in conversion from unicode, or on open/close of the callback (see reason). - * @param context Pointer to the callback's private data - * @param args Information about the conversion in progress - * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence - * @param length Size (in bytes) of the concerned codepage sequence - * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint. - * @param reason Defines the reason the callback was invoked - * @param pErrorCode ICU error code in/out parameter. - * For converter callback functions, set to a conversion error - * before the call, and the callback may reset it to U_ZERO_ERROR. - * @see ucnv_setFromUCallBack - * @stable ICU 2.0 - */ -typedef void (U_EXPORT2 *UConverterFromUCallback) ( - const void* context, - UConverterFromUnicodeArgs *args, - const UChar* codeUnits, - int32_t length, - UChar32 codePoint, - UConverterCallbackReason reason, - UErrorCode *pErrorCode); - -U_CDECL_END - -/** - * Character that separates converter names from options and options from each other. - * @see ucnv_open - * @stable ICU 2.0 - */ -#define UCNV_OPTION_SEP_CHAR ',' - -/** - * String version of UCNV_OPTION_SEP_CHAR. - * @see ucnv_open - * @stable ICU 2.0 - */ -#define UCNV_OPTION_SEP_STRING "," - -/** - * Character that separates a converter option from its value. - * @see ucnv_open - * @stable ICU 2.0 - */ -#define UCNV_VALUE_SEP_CHAR '=' - -/** - * String version of UCNV_VALUE_SEP_CHAR. - * @see ucnv_open - * @stable ICU 2.0 - */ -#define UCNV_VALUE_SEP_STRING "=" - -/** - * Converter option for specifying a locale. - * For example, ucnv_open("SCSU,locale=ja", &errorCode); - * See convrtrs.txt. - * - * @see ucnv_open - * @stable ICU 2.0 - */ -#define UCNV_LOCALE_OPTION_STRING ",locale=" - -/** - * Converter option for specifying a version selector (0..9) for some converters. - * For example, - * \code - * ucnv_open("UTF-7,version=1", &errorCode); - * \endcode - * See convrtrs.txt. - * - * @see ucnv_open - * @stable ICU 2.4 - */ -#define UCNV_VERSION_OPTION_STRING ",version=" - -/** - * Converter option for EBCDIC SBCS or mixed-SBCS/DBCS (stateful) codepages. - * Swaps Unicode mappings for EBCDIC LF and NL codes, as used on - * S/390 (z/OS) Unix System Services (Open Edition). - * For example, ucnv_open("ibm-1047,swaplfnl", &errorCode); - * See convrtrs.txt. - * - * @see ucnv_open - * @stable ICU 2.4 - */ -#define UCNV_SWAP_LFNL_OPTION_STRING ",swaplfnl" - -/** - * Do a fuzzy compare of two converter/alias names. - * The comparison is case-insensitive, ignores leading zeroes if they are not - * followed by further digits, and ignores all but letters and digits. - * Thus the strings "UTF-8", "utf_8", "u*T@f08" and "Utf 8" are exactly equivalent. - * See section 1.4, Charset Alias Matching in Unicode Technical Standard #22 - * at http://www.unicode.org/reports/tr22/ - * - * @param name1 a converter name or alias, zero-terminated - * @param name2 a converter name or alias, zero-terminated - * @return 0 if the names match, or a negative value if the name1 - * lexically precedes name2, or a positive value if the name1 - * lexically follows name2. - * @stable ICU 2.0 - */ -U_STABLE int U_EXPORT2 -ucnv_compareNames(const char *name1, const char *name2); - - -/** - * Creates a UConverter object with the name of a coded character set specified as a C string. - * The actual name will be resolved with the alias file - * using a case-insensitive string comparison that ignores - * leading zeroes and all non-alphanumeric characters. - * E.g., the names "UTF8", "utf-8", "u*T@f08" and "Utf 8" are all equivalent. - * (See also ucnv_compareNames().) - * If <code>NULL</code> is passed for the converter name, it will create one with the - * getDefaultName return value. - * - * <p>A converter name for ICU 1.5 and above may contain options - * like a locale specification to control the specific behavior of - * the newly instantiated converter. - * The meaning of the options depends on the particular converter. - * If an option is not defined for or recognized by a given converter, then it is ignored.</p> - * - * <p>Options are appended to the converter name string, with a - * <code>UCNV_OPTION_SEP_CHAR</code> between the name and the first option and - * also between adjacent options.</p> - * - * <p>If the alias is ambiguous, then the preferred converter is used - * and the status is set to U_AMBIGUOUS_ALIAS_WARNING.</p> - * - * <p>The conversion behavior and names can vary between platforms. ICU may - * convert some characters differently from other platforms. Details on this topic - * are in the <a href="http://icu-project.org/userguide/conversion.html">User's - * Guide</a>. Aliases starting with a "cp" prefix have no specific meaning - * other than its an alias starting with the letters "cp". Please do not - * associate any meaning to these aliases.</p> - * - * \snippet samples/ucnv/convsamp.cpp ucnv_open - * - * @param converterName Name of the coded character set table. - * This may have options appended to the string. - * IANA alias character set names, IBM CCSIDs starting with "ibm-", - * Windows codepage numbers starting with "windows-" are frequently - * used for this parameter. See ucnv_getAvailableName and - * ucnv_getAlias for a complete list that is available. - * If this parameter is NULL, the default converter will be used. - * @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR</TT> - * @return the created Unicode converter object, or <TT>NULL</TT> if an error occured - * @see ucnv_openU - * @see ucnv_openCCSID - * @see ucnv_getAvailableName - * @see ucnv_getAlias - * @see ucnv_getDefaultName - * @see ucnv_close - * @see ucnv_compareNames - * @stable ICU 2.0 - */ -U_STABLE UConverter* U_EXPORT2 -ucnv_open(const char *converterName, UErrorCode *err); - - -/** - * Creates a Unicode converter with the names specified as unicode string. - * The name should be limited to the ASCII-7 alphanumerics range. - * The actual name will be resolved with the alias file - * using a case-insensitive string comparison that ignores - * leading zeroes and all non-alphanumeric characters. - * E.g., the names "UTF8", "utf-8", "u*T@f08" and "Utf 8" are all equivalent. - * (See also ucnv_compareNames().) - * If <TT>NULL</TT> is passed for the converter name, it will create - * one with the ucnv_getDefaultName() return value. - * If the alias is ambiguous, then the preferred converter is used - * and the status is set to U_AMBIGUOUS_ALIAS_WARNING. - * - * <p>See ucnv_open for the complete details</p> - * @param name Name of the UConverter table in a zero terminated - * Unicode string - * @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR, - * U_FILE_ACCESS_ERROR</TT> - * @return the created Unicode converter object, or <TT>NULL</TT> if an - * error occured - * @see ucnv_open - * @see ucnv_openCCSID - * @see ucnv_close - * @see ucnv_compareNames - * @stable ICU 2.0 - */ -U_STABLE UConverter* U_EXPORT2 -ucnv_openU(const UChar *name, - UErrorCode *err); - -/** - * Creates a UConverter object from a CCSID number and platform pair. - * Note that the usefulness of this function is limited to platforms with numeric - * encoding IDs. Only IBM and Microsoft platforms use numeric (16-bit) identifiers for - * encodings. - * - * In addition, IBM CCSIDs and Unicode conversion tables are not 1:1 related. - * For many IBM CCSIDs there are multiple (up to six) Unicode conversion tables, and - * for some Unicode conversion tables there are multiple CCSIDs. - * Some "alternate" Unicode conversion tables are provided by the - * IBM CDRA conversion table registry. - * The most prominent example of a systematic modification of conversion tables that is - * not provided in the form of conversion table files in the repository is - * that S/390 Unix System Services swaps the codes for Line Feed and New Line in all - * EBCDIC codepages, which requires such a swap in the Unicode conversion tables as well. - * - * Only IBM default conversion tables are accessible with ucnv_openCCSID(). - * ucnv_getCCSID() will return the same CCSID for all conversion tables that are associated - * with that CCSID. - * - * Currently, the only "platform" supported in the ICU converter API is UCNV_IBM. - * - * In summary, the use of CCSIDs and the associated API functions is not recommended. - * - * In order to open a converter with the default IBM CDRA Unicode conversion table, - * you can use this function or use the prefix "ibm-": - * \code - * char name[20]; - * sprintf(name, "ibm-%hu", ccsid); - * cnv=ucnv_open(name, &errorCode); - * \endcode - * - * In order to open a converter with the IBM S/390 Unix System Services variant - * of a Unicode/EBCDIC conversion table, - * you can use the prefix "ibm-" together with the option string UCNV_SWAP_LFNL_OPTION_STRING: - * \code - * char name[20]; - * sprintf(name, "ibm-%hu" UCNV_SWAP_LFNL_OPTION_STRING, ccsid); - * cnv=ucnv_open(name, &errorCode); - * \endcode - * - * In order to open a converter from a Microsoft codepage number, use the prefix "cp": - * \code - * char name[20]; - * sprintf(name, "cp%hu", codepageID); - * cnv=ucnv_open(name, &errorCode); - * \endcode - * - * If the alias is ambiguous, then the preferred converter is used - * and the status is set to U_AMBIGUOUS_ALIAS_WARNING. - * - * @param codepage codepage number to create - * @param platform the platform in which the codepage number exists - * @param err error status <TT>U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR</TT> - * @return the created Unicode converter object, or <TT>NULL</TT> if an error - * occured. - * @see ucnv_open - * @see ucnv_openU - * @see ucnv_close - * @see ucnv_getCCSID - * @see ucnv_getPlatform - * @see UConverterPlatform - * @stable ICU 2.0 - */ -U_STABLE UConverter* U_EXPORT2 -ucnv_openCCSID(int32_t codepage, - UConverterPlatform platform, - UErrorCode * err); - -/** - * <p>Creates a UConverter object specified from a packageName and a converterName.</p> - * - * <p>The packageName and converterName must point to an ICU udata object, as defined by - * <code> udata_open( packageName, "cnv", converterName, err) </code> or equivalent. - * Typically, packageName will refer to a (.dat) file, or to a package registered with - * udata_setAppData(). Using a full file or directory pathname for packageName is deprecated.</p> - * - * <p>The name will NOT be looked up in the alias mechanism, nor will the converter be - * stored in the converter cache or the alias table. The only way to open further converters - * is call this function multiple times, or use the ucnv_safeClone() function to clone a - * 'master' converter.</p> - * - * <p>A future version of ICU may add alias table lookups and/or caching - * to this function.</p> - * - * <p>Example Use: - * <code>cnv = ucnv_openPackage("myapp", "myconverter", &err);</code> - * </p> - * - * @param packageName name of the package (equivalent to 'path' in udata_open() call) - * @param converterName name of the data item to be used, without suffix. - * @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR</TT> - * @return the created Unicode converter object, or <TT>NULL</TT> if an error occured - * @see udata_open - * @see ucnv_open - * @see ucnv_safeClone - * @see ucnv_close - * @stable ICU 2.2 - */ -U_STABLE UConverter* U_EXPORT2 -ucnv_openPackage(const char *packageName, const char *converterName, UErrorCode *err); - -/** - * Thread safe converter cloning operation. - * For most efficient operation, pass in a stackBuffer (and a *pBufferSize) - * with at least U_CNV_SAFECLONE_BUFFERSIZE bytes of space. - * If the buffer size is sufficient, then the clone will use the stack buffer; - * otherwise, it will be allocated, and *pBufferSize will indicate - * the actual size. (This should not occur with U_CNV_SAFECLONE_BUFFERSIZE.) - * - * You must ucnv_close() the clone in any case. - * - * If *pBufferSize==0, (regardless of whether stackBuffer==NULL or not) - * then *pBufferSize will be changed to a sufficient size - * for cloning this converter, - * without actually cloning the converter ("pure pre-flighting"). - * - * If *pBufferSize is greater than zero but not large enough for a stack-based - * clone, then the converter is cloned using newly allocated memory - * and *pBufferSize is changed to the necessary size. - * - * If the converter clone fits into the stack buffer but the stack buffer is not - * sufficiently aligned for the clone, then the clone will use an - * adjusted pointer and use an accordingly smaller buffer size. - * - * @param cnv converter to be cloned - * @param stackBuffer <em>Deprecated functionality as of ICU 52, use NULL.</em><br> - * user allocated space for the new clone. If NULL new memory will be allocated. - * If buffer is not large enough, new memory will be allocated. - * Clients can use the U_CNV_SAFECLONE_BUFFERSIZE. This will probably be enough to avoid memory allocations. - * @param pBufferSize <em>Deprecated functionality as of ICU 52, use NULL or 1.</em><br> - * pointer to size of allocated space. - * @param status to indicate whether the operation went on smoothly or there were errors - * An informational status value, U_SAFECLONE_ALLOCATED_WARNING, - * is used if any allocations were necessary. - * However, it is better to check if *pBufferSize grew for checking for - * allocations because warning codes can be overridden by subsequent - * function calls. - * @return pointer to the new clone - * @stable ICU 2.0 - */ -U_STABLE UConverter * U_EXPORT2 -ucnv_safeClone(const UConverter *cnv, - void *stackBuffer, - int32_t *pBufferSize, - UErrorCode *status); - -#ifndef U_HIDE_DEPRECATED_API - -/** - * \def U_CNV_SAFECLONE_BUFFERSIZE - * Definition of a buffer size that is designed to be large enough for - * converters to be cloned with ucnv_safeClone(). - * @deprecated ICU 52. Do not rely on ucnv_safeClone() cloning into any provided buffer. - */ -#define U_CNV_SAFECLONE_BUFFERSIZE 1024 - -#endif /* U_HIDE_DEPRECATED_API */ - -/** - * Deletes the unicode converter and releases resources associated - * with just this instance. - * Does not free up shared converter tables. - * - * @param converter the converter object to be deleted - * @see ucnv_open - * @see ucnv_openU - * @see ucnv_openCCSID - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ucnv_close(UConverter * converter); - -#if U_SHOW_CPLUSPLUS_API - -U_NAMESPACE_BEGIN - -/** - * \class LocalUConverterPointer - * "Smart pointer" class, closes a UConverter via ucnv_close(). - * For most methods see the LocalPointerBase base class. - * - * @see LocalPointerBase - * @see LocalPointer - * @stable ICU 4.4 - */ -U_DEFINE_LOCAL_OPEN_POINTER(LocalUConverterPointer, UConverter, ucnv_close); - -U_NAMESPACE_END - -#endif - -/** - * Fills in the output parameter, subChars, with the substitution characters - * as multiple bytes. - * If ucnv_setSubstString() set a Unicode string because the converter is - * stateful, then subChars will be an empty string. - * - * @param converter the Unicode converter - * @param subChars the subsitution characters - * @param len on input the capacity of subChars, on output the number - * of bytes copied to it - * @param err the outgoing error status code. - * If the substitution character array is too small, an - * <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned. - * @see ucnv_setSubstString - * @see ucnv_setSubstChars - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ucnv_getSubstChars(const UConverter *converter, - char *subChars, - int8_t *len, - UErrorCode *err); - -/** - * Sets the substitution chars when converting from unicode to a codepage. The - * substitution is specified as a string of 1-4 bytes, and may contain - * <TT>NULL</TT> bytes. - * The subChars must represent a single character. The caller needs to know the - * byte sequence of a valid character in the converter's charset. - * For some converters, for example some ISO 2022 variants, only single-byte - * substitution characters may be supported. - * The newer ucnv_setSubstString() function relaxes these limitations. - * - * @param converter the Unicode converter - * @param subChars the substitution character byte sequence we want set - * @param len the number of bytes in subChars - * @param err the error status code. <TT>U_INDEX_OUTOFBOUNDS_ERROR </TT> if - * len is bigger than the maximum number of bytes allowed in subchars - * @see ucnv_setSubstString - * @see ucnv_getSubstChars - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ucnv_setSubstChars(UConverter *converter, - const char *subChars, - int8_t len, - UErrorCode *err); - -/** - * Set a substitution string for converting from Unicode to a charset. - * The caller need not know the charset byte sequence for each charset. - * - * Unlike ucnv_setSubstChars() which is designed to set a charset byte sequence - * for a single character, this function takes a Unicode string with - * zero, one or more characters, and immediately verifies that the string can be - * converted to the charset. - * If not, or if the result is too long (more than 32 bytes as of ICU 3.6), - * then the function returns with an error accordingly. - * - * Also unlike ucnv_setSubstChars(), this function works for stateful charsets - * by converting on the fly at the point of substitution rather than setting - * a fixed byte sequence. - * - * @param cnv The UConverter object. - * @param s The Unicode string. - * @param length The number of UChars in s, or -1 for a NUL-terminated string. - * @param err Pointer to a standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * - * @see ucnv_setSubstChars - * @see ucnv_getSubstChars - * @stable ICU 3.6 - */ -U_STABLE void U_EXPORT2 -ucnv_setSubstString(UConverter *cnv, - const UChar *s, - int32_t length, - UErrorCode *err); - -/** - * Fills in the output parameter, errBytes, with the error characters from the - * last failing conversion. - * - * @param converter the Unicode converter - * @param errBytes the codepage bytes which were in error - * @param len on input the capacity of errBytes, on output the number of - * bytes which were copied to it - * @param err the error status code. - * If the substitution character array is too small, an - * <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned. - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ucnv_getInvalidChars(const UConverter *converter, - char *errBytes, - int8_t *len, - UErrorCode *err); - -/** - * Fills in the output parameter, errChars, with the error characters from the - * last failing conversion. - * - * @param converter the Unicode converter - * @param errUChars the UChars which were in error - * @param len on input the capacity of errUChars, on output the number of - * UChars which were copied to it - * @param err the error status code. - * If the substitution character array is too small, an - * <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned. - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ucnv_getInvalidUChars(const UConverter *converter, - UChar *errUChars, - int8_t *len, - UErrorCode *err); - -/** - * Resets the state of a converter to the default state. This is used - * in the case of an error, to restart a conversion from a known default state. - * It will also empty the internal output buffers. - * @param converter the Unicode converter - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ucnv_reset(UConverter *converter); - -/** - * Resets the to-Unicode part of a converter state to the default state. - * This is used in the case of an error to restart a conversion to - * Unicode to a known default state. It will also empty the internal - * output buffers used for the conversion to Unicode codepoints. - * @param converter the Unicode converter - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ucnv_resetToUnicode(UConverter *converter); - -/** - * Resets the from-Unicode part of a converter state to the default state. - * This is used in the case of an error to restart a conversion from - * Unicode to a known default state. It will also empty the internal output - * buffers used for the conversion from Unicode codepoints. - * @param converter the Unicode converter - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ucnv_resetFromUnicode(UConverter *converter); - -/** - * Returns the maximum number of bytes that are output per UChar in conversion - * from Unicode using this converter. - * The returned number can be used with UCNV_GET_MAX_BYTES_FOR_STRING - * to calculate the size of a target buffer for conversion from Unicode. - * - * Note: Before ICU 2.8, this function did not return reliable numbers for - * some stateful converters (EBCDIC_STATEFUL, ISO-2022) and LMBCS. - * - * This number may not be the same as the maximum number of bytes per - * "conversion unit". In other words, it may not be the intuitively expected - * number of bytes per character that would be published for a charset, - * and may not fulfill any other purpose than the allocation of an output - * buffer of guaranteed sufficient size for a given input length and converter. - * - * Examples for special cases that are taken into account: - * - Supplementary code points may convert to more bytes than BMP code points. - * This function returns bytes per UChar (UTF-16 code unit), not per - * Unicode code point, for efficient buffer allocation. - * - State-shifting output (SI/SO, escapes, etc.) from stateful converters. - * - When m input UChars are converted to n output bytes, then the maximum m/n - * is taken into account. - * - * The number returned here does not take into account - * (see UCNV_GET_MAX_BYTES_FOR_STRING): - * - callbacks which output more than one charset character sequence per call, - * like escape callbacks - * - initial and final non-character bytes that are output by some converters - * (automatic BOMs, initial escape sequence, final SI, etc.) - * - * Examples for returned values: - * - SBCS charsets: 1 - * - Shift-JIS: 2 - * - UTF-16: 2 (2 per BMP, 4 per surrogate _pair_, BOM not counted) - * - UTF-8: 3 (3 per BMP, 4 per surrogate _pair_) - * - EBCDIC_STATEFUL (EBCDIC mixed SBCS/DBCS): 3 (SO + DBCS) - * - ISO-2022: 3 (always outputs UTF-8) - * - ISO-2022-JP: 6 (4-byte escape sequences + DBCS) - * - ISO-2022-CN: 8 (4-byte designator sequences + 2-byte SS2/SS3 + DBCS) - * - * @param converter The Unicode converter. - * @return The maximum number of bytes per UChar that are output by ucnv_fromUnicode(), - * to be used together with UCNV_GET_MAX_BYTES_FOR_STRING for buffer allocation. - * - * @see UCNV_GET_MAX_BYTES_FOR_STRING - * @see ucnv_getMinCharSize - * @stable ICU 2.0 - */ -U_STABLE int8_t U_EXPORT2 -ucnv_getMaxCharSize(const UConverter *converter); - -/** - * Calculates the size of a buffer for conversion from Unicode to a charset. - * The calculated size is guaranteed to be sufficient for this conversion. - * - * It takes into account initial and final non-character bytes that are output - * by some converters. - * It does not take into account callbacks which output more than one charset - * character sequence per call, like escape callbacks. - * The default (substitution) callback only outputs one charset character sequence. - * - * @param length Number of UChars to be converted. - * @param maxCharSize Return value from ucnv_getMaxCharSize() for the converter - * that will be used. - * @return Size of a buffer that will be large enough to hold the output bytes of - * converting length UChars with the converter that returned the maxCharSize. - * - * @see ucnv_getMaxCharSize - * @stable ICU 2.8 - */ -#define UCNV_GET_MAX_BYTES_FOR_STRING(length, maxCharSize) \ - (((int32_t)(length)+10)*(int32_t)(maxCharSize)) - -/** - * Returns the minimum byte length for characters in this codepage. - * This is usually either 1 or 2. - * @param converter the Unicode converter - * @return the minimum number of bytes allowed by this particular converter - * @see ucnv_getMaxCharSize - * @stable ICU 2.0 - */ -U_STABLE int8_t U_EXPORT2 -ucnv_getMinCharSize(const UConverter *converter); - -/** - * Returns the display name of the converter passed in based on the Locale - * passed in. If the locale contains no display name, the internal ASCII - * name will be filled in. - * - * @param converter the Unicode converter. - * @param displayLocale is the specific Locale we want to localised for - * @param displayName user provided buffer to be filled in - * @param displayNameCapacity size of displayName Buffer - * @param err error status code - * @return displayNameLength number of UChar needed in displayName - * @see ucnv_getName - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -ucnv_getDisplayName(const UConverter *converter, - const char *displayLocale, - UChar *displayName, - int32_t displayNameCapacity, - UErrorCode *err); - -/** - * Gets the internal, canonical name of the converter (zero-terminated). - * The lifetime of the returned string will be that of the converter - * passed to this function. - * @param converter the Unicode converter - * @param err UErrorCode status - * @return the internal name of the converter - * @see ucnv_getDisplayName - * @stable ICU 2.0 - */ -U_STABLE const char * U_EXPORT2 -ucnv_getName(const UConverter *converter, UErrorCode *err); - -/** - * Gets a codepage number associated with the converter. This is not guaranteed - * to be the one used to create the converter. Some converters do not represent - * platform registered codepages and return zero for the codepage number. - * The error code fill-in parameter indicates if the codepage number - * is available. - * Does not check if the converter is <TT>NULL</TT> or if converter's data - * table is <TT>NULL</TT>. - * - * Important: The use of CCSIDs is not recommended because it is limited - * to only two platforms in principle and only one (UCNV_IBM) in the current - * ICU converter API. - * Also, CCSIDs are insufficient to identify IBM Unicode conversion tables precisely. - * For more details see ucnv_openCCSID(). - * - * @param converter the Unicode converter - * @param err the error status code. - * @return If any error occurrs, -1 will be returned otherwise, the codepage number - * will be returned - * @see ucnv_openCCSID - * @see ucnv_getPlatform - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -ucnv_getCCSID(const UConverter *converter, - UErrorCode *err); - -/** - * Gets a codepage platform associated with the converter. Currently, - * only <TT>UCNV_IBM</TT> will be returned. - * Does not test if the converter is <TT>NULL</TT> or if converter's data - * table is <TT>NULL</TT>. - * @param converter the Unicode converter - * @param err the error status code. - * @return The codepage platform - * @stable ICU 2.0 - */ -U_STABLE UConverterPlatform U_EXPORT2 -ucnv_getPlatform(const UConverter *converter, - UErrorCode *err); - -/** - * Gets the type of the converter - * e.g. SBCS, MBCS, DBCS, UTF8, UTF16_BE, UTF16_LE, ISO_2022, - * EBCDIC_STATEFUL, LATIN_1 - * @param converter a valid, opened converter - * @return the type of the converter - * @stable ICU 2.0 - */ -U_STABLE UConverterType U_EXPORT2 -ucnv_getType(const UConverter * converter); - -/** - * Gets the "starter" (lead) bytes for converters of type MBCS. - * Will fill in an <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> if converter passed in - * is not MBCS. Fills in an array of type UBool, with the value of the byte - * as offset to the array. For example, if (starters[0x20] == TRUE) at return, - * it means that the byte 0x20 is a starter byte in this converter. - * Context pointers are always owned by the caller. - * - * @param converter a valid, opened converter of type MBCS - * @param starters an array of size 256 to be filled in - * @param err error status, <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> if the - * converter is not a type which can return starters. - * @see ucnv_getType - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ucnv_getStarters(const UConverter* converter, - UBool starters[256], - UErrorCode* err); - - -/** - * Selectors for Unicode sets that can be returned by ucnv_getUnicodeSet(). - * @see ucnv_getUnicodeSet - * @stable ICU 2.6 - */ -typedef enum UConverterUnicodeSet { - /** Select the set of roundtrippable Unicode code points. @stable ICU 2.6 */ - UCNV_ROUNDTRIP_SET, - /** Select the set of Unicode code points with roundtrip or fallback mappings. @stable ICU 4.0 */ - UCNV_ROUNDTRIP_AND_FALLBACK_SET, - /** Number of UConverterUnicodeSet selectors. @stable ICU 2.6 */ - UCNV_SET_COUNT -} UConverterUnicodeSet; - - -/** - * Returns the set of Unicode code points that can be converted by an ICU converter. - * - * Returns one of several kinds of set: - * - * 1. UCNV_ROUNDTRIP_SET - * - * The set of all Unicode code points that can be roundtrip-converted - * (converted without any data loss) with the converter (ucnv_fromUnicode()). - * This set will not include code points that have fallback mappings - * or are only the result of reverse fallback mappings. - * This set will also not include PUA code points with fallbacks, although - * ucnv_fromUnicode() will always uses those mappings despite ucnv_setFallback(). - * See UTR #22 "Character Mapping Markup Language" - * at http://www.unicode.org/reports/tr22/ - * - * This is useful for example for - * - checking that a string or document can be roundtrip-converted with a converter, - * without/before actually performing the conversion - * - testing if a converter can be used for text for typical text for a certain locale, - * by comparing its roundtrip set with the set of ExemplarCharacters from - * ICU's locale data or other sources - * - * 2. UCNV_ROUNDTRIP_AND_FALLBACK_SET - * - * The set of all Unicode code points that can be converted with the converter (ucnv_fromUnicode()) - * when fallbacks are turned on (see ucnv_setFallback()). - * This set includes all code points with roundtrips and fallbacks (but not reverse fallbacks). - * - * In the future, there may be more UConverterUnicodeSet choices to select - * sets with different properties. - * - * @param cnv The converter for which a set is requested. - * @param setFillIn A valid USet *. It will be cleared by this function before - * the converter's specific set is filled into the USet. - * @param whichSet A UConverterUnicodeSet selector; - * currently UCNV_ROUNDTRIP_SET is the only supported value. - * @param pErrorCode ICU error code in/out parameter. - * Must fulfill U_SUCCESS before the function call. - * - * @see UConverterUnicodeSet - * @see uset_open - * @see uset_close - * @stable ICU 2.6 - */ -U_STABLE void U_EXPORT2 -ucnv_getUnicodeSet(const UConverter *cnv, - USet *setFillIn, - UConverterUnicodeSet whichSet, - UErrorCode *pErrorCode); - -/** - * Gets the current calback function used by the converter when an illegal - * or invalid codepage sequence is found. - * Context pointers are always owned by the caller. - * - * @param converter the unicode converter - * @param action fillin: returns the callback function pointer - * @param context fillin: returns the callback's private void* context - * @see ucnv_setToUCallBack - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ucnv_getToUCallBack (const UConverter * converter, - UConverterToUCallback *action, - const void **context); - -/** - * Gets the current callback function used by the converter when illegal - * or invalid Unicode sequence is found. - * Context pointers are always owned by the caller. - * - * @param converter the unicode converter - * @param action fillin: returns the callback function pointer - * @param context fillin: returns the callback's private void* context - * @see ucnv_setFromUCallBack - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ucnv_getFromUCallBack (const UConverter * converter, - UConverterFromUCallback *action, - const void **context); - -/** - * Changes the callback function used by the converter when - * an illegal or invalid sequence is found. - * Context pointers are always owned by the caller. - * Predefined actions and contexts can be found in the ucnv_err.h header. - * - * @param converter the unicode converter - * @param newAction the new callback function - * @param newContext the new toUnicode callback context pointer. This can be NULL. - * @param oldAction fillin: returns the old callback function pointer. This can be NULL. - * @param oldContext fillin: returns the old callback's private void* context. This can be NULL. - * @param err The error code status - * @see ucnv_getToUCallBack - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ucnv_setToUCallBack (UConverter * converter, - UConverterToUCallback newAction, - const void* newContext, - UConverterToUCallback *oldAction, - const void** oldContext, - UErrorCode * err); - -/** - * Changes the current callback function used by the converter when - * an illegal or invalid sequence is found. - * Context pointers are always owned by the caller. - * Predefined actions and contexts can be found in the ucnv_err.h header. - * - * @param converter the unicode converter - * @param newAction the new callback function - * @param newContext the new fromUnicode callback context pointer. This can be NULL. - * @param oldAction fillin: returns the old callback function pointer. This can be NULL. - * @param oldContext fillin: returns the old callback's private void* context. This can be NULL. - * @param err The error code status - * @see ucnv_getFromUCallBack - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ucnv_setFromUCallBack (UConverter * converter, - UConverterFromUCallback newAction, - const void *newContext, - UConverterFromUCallback *oldAction, - const void **oldContext, - UErrorCode * err); - -/** - * Converts an array of unicode characters to an array of codepage - * characters. This function is optimized for converting a continuous - * stream of data in buffer-sized chunks, where the entire source and - * target does not fit in available buffers. - * - * The source pointer is an in/out parameter. It starts out pointing where the - * conversion is to begin, and ends up pointing after the last UChar consumed. - * - * Target similarly starts out pointer at the first available byte in the output - * buffer, and ends up pointing after the last byte written to the output. - * - * The converter always attempts to consume the entire source buffer, unless - * (1.) the target buffer is full, or (2.) a failing error is returned from the - * current callback function. When a successful error status has been - * returned, it means that all of the source buffer has been - * consumed. At that point, the caller should reset the source and - * sourceLimit pointers to point to the next chunk. - * - * At the end of the stream (flush==TRUE), the input is completely consumed - * when *source==sourceLimit and no error code is set. - * The converter object is then automatically reset by this function. - * (This means that a converter need not be reset explicitly between data - * streams if it finishes the previous stream without errors.) - * - * This is a <I>stateful</I> conversion. Additionally, even when all source data has - * been consumed, some data may be in the converters' internal state. - * Call this function repeatedly, updating the target pointers with - * the next empty chunk of target in case of a - * <TT>U_BUFFER_OVERFLOW_ERROR</TT>, and updating the source pointers - * with the next chunk of source when a successful error status is - * returned, until there are no more chunks of source data. - * @param converter the Unicode converter - * @param target I/O parameter. Input : Points to the beginning of the buffer to copy - * codepage characters to. Output : points to after the last codepage character copied - * to <TT>target</TT>. - * @param targetLimit the pointer just after last of the <TT>target</TT> buffer - * @param source I/O parameter, pointer to pointer to the source Unicode character buffer. - * @param sourceLimit the pointer just after the last of the source buffer - * @param offsets if NULL is passed, nothing will happen to it, otherwise it needs to have the same number - * of allocated cells as <TT>target</TT>. Will fill in offsets from target to source pointer - * e.g: <TT>offsets[3]</TT> is equal to 6, it means that the <TT>target[3]</TT> was a result of transcoding <TT>source[6]</TT> - * For output data carried across calls, and other data without a specific source character - * (such as from escape sequences or callbacks) -1 will be placed for offsets. - * @param flush set to <TT>TRUE</TT> if the current source buffer is the last available - * chunk of the source, <TT>FALSE</TT> otherwise. Note that if a failing status is returned, - * this function may have to be called multiple times with flush set to <TT>TRUE</TT> until - * the source buffer is consumed. - * @param err the error status. <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> will be set if the - * converter is <TT>NULL</TT>. - * <code>U_BUFFER_OVERFLOW_ERROR</code> will be set if the target is full and there is - * still data to be written to the target. - * @see ucnv_fromUChars - * @see ucnv_convert - * @see ucnv_getMinCharSize - * @see ucnv_setToUCallBack - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ucnv_fromUnicode (UConverter * converter, - char **target, - const char *targetLimit, - const UChar ** source, - const UChar * sourceLimit, - int32_t* offsets, - UBool flush, - UErrorCode * err); - -/** - * Converts a buffer of codepage bytes into an array of unicode UChars - * characters. This function is optimized for converting a continuous - * stream of data in buffer-sized chunks, where the entire source and - * target does not fit in available buffers. - * - * The source pointer is an in/out parameter. It starts out pointing where the - * conversion is to begin, and ends up pointing after the last byte of source consumed. - * - * Target similarly starts out pointer at the first available UChar in the output - * buffer, and ends up pointing after the last UChar written to the output. - * It does NOT necessarily keep UChar sequences together. - * - * The converter always attempts to consume the entire source buffer, unless - * (1.) the target buffer is full, or (2.) a failing error is returned from the - * current callback function. When a successful error status has been - * returned, it means that all of the source buffer has been - * consumed. At that point, the caller should reset the source and - * sourceLimit pointers to point to the next chunk. - * - * At the end of the stream (flush==TRUE), the input is completely consumed - * when *source==sourceLimit and no error code is set - * The converter object is then automatically reset by this function. - * (This means that a converter need not be reset explicitly between data - * streams if it finishes the previous stream without errors.) - * - * This is a <I>stateful</I> conversion. Additionally, even when all source data has - * been consumed, some data may be in the converters' internal state. - * Call this function repeatedly, updating the target pointers with - * the next empty chunk of target in case of a - * <TT>U_BUFFER_OVERFLOW_ERROR</TT>, and updating the source pointers - * with the next chunk of source when a successful error status is - * returned, until there are no more chunks of source data. - * @param converter the Unicode converter - * @param target I/O parameter. Input : Points to the beginning of the buffer to copy - * UChars into. Output : points to after the last UChar copied. - * @param targetLimit the pointer just after the end of the <TT>target</TT> buffer - * @param source I/O parameter, pointer to pointer to the source codepage buffer. - * @param sourceLimit the pointer to the byte after the end of the source buffer - * @param offsets if NULL is passed, nothing will happen to it, otherwise it needs to have the same number - * of allocated cells as <TT>target</TT>. Will fill in offsets from target to source pointer - * e.g: <TT>offsets[3]</TT> is equal to 6, it means that the <TT>target[3]</TT> was a result of transcoding <TT>source[6]</TT> - * For output data carried across calls, and other data without a specific source character - * (such as from escape sequences or callbacks) -1 will be placed for offsets. - * @param flush set to <TT>TRUE</TT> if the current source buffer is the last available - * chunk of the source, <TT>FALSE</TT> otherwise. Note that if a failing status is returned, - * this function may have to be called multiple times with flush set to <TT>TRUE</TT> until - * the source buffer is consumed. - * @param err the error status. <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> will be set if the - * converter is <TT>NULL</TT>. - * <code>U_BUFFER_OVERFLOW_ERROR</code> will be set if the target is full and there is - * still data to be written to the target. - * @see ucnv_fromUChars - * @see ucnv_convert - * @see ucnv_getMinCharSize - * @see ucnv_setFromUCallBack - * @see ucnv_getNextUChar - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ucnv_toUnicode(UConverter *converter, - UChar **target, - const UChar *targetLimit, - const char **source, - const char *sourceLimit, - int32_t *offsets, - UBool flush, - UErrorCode *err); - -/** - * Convert the Unicode string into a codepage string using an existing UConverter. - * The output string is NUL-terminated if possible. - * - * This function is a more convenient but less powerful version of ucnv_fromUnicode(). - * It is only useful for whole strings, not for streaming conversion. - * - * The maximum output buffer capacity required (barring output from callbacks) will be - * UCNV_GET_MAX_BYTES_FOR_STRING(srcLength, ucnv_getMaxCharSize(cnv)). - * - * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called) - * @param src the input Unicode string - * @param srcLength the input string length, or -1 if NUL-terminated - * @param dest destination string buffer, can be NULL if destCapacity==0 - * @param destCapacity the number of chars available at dest - * @param pErrorCode normal ICU error code; - * common error codes that may be set by this function include - * U_BUFFER_OVERFLOW_ERROR, U_STRING_NOT_TERMINATED_WARNING, - * U_ILLEGAL_ARGUMENT_ERROR, and conversion errors - * @return the length of the output string, not counting the terminating NUL; - * if the length is greater than destCapacity, then the string will not fit - * and a buffer of the indicated length would need to be passed in - * @see ucnv_fromUnicode - * @see ucnv_convert - * @see UCNV_GET_MAX_BYTES_FOR_STRING - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -ucnv_fromUChars(UConverter *cnv, - char *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - UErrorCode *pErrorCode); - -/** - * Convert the codepage string into a Unicode string using an existing UConverter. - * The output string is NUL-terminated if possible. - * - * This function is a more convenient but less powerful version of ucnv_toUnicode(). - * It is only useful for whole strings, not for streaming conversion. - * - * The maximum output buffer capacity required (barring output from callbacks) will be - * 2*srcLength (each char may be converted into a surrogate pair). - * - * @param cnv the converter object to be used (ucnv_resetToUnicode() will be called) - * @param src the input codepage string - * @param srcLength the input string length, or -1 if NUL-terminated - * @param dest destination string buffer, can be NULL if destCapacity==0 - * @param destCapacity the number of UChars available at dest - * @param pErrorCode normal ICU error code; - * common error codes that may be set by this function include - * U_BUFFER_OVERFLOW_ERROR, U_STRING_NOT_TERMINATED_WARNING, - * U_ILLEGAL_ARGUMENT_ERROR, and conversion errors - * @return the length of the output string, not counting the terminating NUL; - * if the length is greater than destCapacity, then the string will not fit - * and a buffer of the indicated length would need to be passed in - * @see ucnv_toUnicode - * @see ucnv_convert - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -ucnv_toUChars(UConverter *cnv, - UChar *dest, int32_t destCapacity, - const char *src, int32_t srcLength, - UErrorCode *pErrorCode); - -/** - * Convert a codepage buffer into Unicode one character at a time. - * The input is completely consumed when the U_INDEX_OUTOFBOUNDS_ERROR is set. - * - * Advantage compared to ucnv_toUnicode() or ucnv_toUChars(): - * - Faster for small amounts of data, for most converters, e.g., - * US-ASCII, ISO-8859-1, UTF-8/16/32, and most "normal" charsets. - * (For complex converters, e.g., SCSU, UTF-7 and ISO 2022 variants, - * it uses ucnv_toUnicode() internally.) - * - Convenient. - * - * Limitations compared to ucnv_toUnicode(): - * - Always assumes flush=TRUE. - * This makes ucnv_getNextUChar() unsuitable for "streaming" conversion, - * that is, for where the input is supplied in multiple buffers, - * because ucnv_getNextUChar() will assume the end of the input at the end - * of the first buffer. - * - Does not provide offset output. - * - * It is possible to "mix" ucnv_getNextUChar() and ucnv_toUnicode() because - * ucnv_getNextUChar() uses the current state of the converter - * (unlike ucnv_toUChars() which always resets first). - * However, if ucnv_getNextUChar() is called after ucnv_toUnicode() - * stopped in the middle of a character sequence (with flush=FALSE), - * then ucnv_getNextUChar() will always use the slower ucnv_toUnicode() - * internally until the next character boundary. - * (This is new in ICU 2.6. In earlier releases, ucnv_getNextUChar() had to - * start at a character boundary.) - * - * Instead of using ucnv_getNextUChar(), it is recommended - * to convert using ucnv_toUnicode() or ucnv_toUChars() - * and then iterate over the text using U16_NEXT() or a UCharIterator (uiter.h) - * or a C++ CharacterIterator or similar. - * This allows streaming conversion and offset output, for example. - * - * <p>Handling of surrogate pairs and supplementary-plane code points:<br> - * There are two different kinds of codepages that provide mappings for surrogate characters: - * <ul> - * <li>Codepages like UTF-8, UTF-32, and GB 18030 provide direct representations for Unicode - * code points U+10000-U+10ffff as well as for single surrogates U+d800-U+dfff. - * Each valid sequence will result in exactly one returned code point. - * If a sequence results in a single surrogate, then that will be returned - * by itself, even if a neighboring sequence encodes the matching surrogate.</li> - * <li>Codepages like SCSU and LMBCS (and UTF-16) provide direct representations only for BMP code points - * including surrogates. Code points in supplementary planes are represented with - * two sequences, each encoding a surrogate. - * For these codepages, matching pairs of surrogates will be combined into single - * code points for returning from this function. - * (Note that SCSU is actually a mix of these codepage types.)</li> - * </ul></p> - * - * @param converter an open UConverter - * @param source the address of a pointer to the codepage buffer, will be - * updated to point after the bytes consumed in the conversion call. - * @param sourceLimit points to the end of the input buffer - * @param err fills in error status (see ucnv_toUnicode) - * <code>U_INDEX_OUTOFBOUNDS_ERROR</code> will be set if the input - * is empty or does not convert to any output (e.g.: pure state-change - * codes SI/SO, escape sequences for ISO 2022, - * or if the callback did not output anything, ...). - * This function will not set a <code>U_BUFFER_OVERFLOW_ERROR</code> because - * the "buffer" is the return code. However, there might be subsequent output - * stored in the converter object - * that will be returned in following calls to this function. - * @return a UChar32 resulting from the partial conversion of source - * @see ucnv_toUnicode - * @see ucnv_toUChars - * @see ucnv_convert - * @stable ICU 2.0 - */ -U_STABLE UChar32 U_EXPORT2 -ucnv_getNextUChar(UConverter * converter, - const char **source, - const char * sourceLimit, - UErrorCode * err); - -/** - * Convert from one external charset to another using two existing UConverters. - * Internally, two conversions - ucnv_toUnicode() and ucnv_fromUnicode() - - * are used, "pivoting" through 16-bit Unicode. - * - * Important: For streaming conversion (multiple function calls for successive - * parts of a text stream), the caller must provide a pivot buffer explicitly, - * and must preserve the pivot buffer and associated pointers from one - * call to another. (The buffer may be moved if its contents and the relative - * pointer positions are preserved.) - * - * There is a similar function, ucnv_convert(), - * which has the following limitations: - * - it takes charset names, not converter objects, so that - * - two converters are opened for each call - * - only single-string conversion is possible, not streaming operation - * - it does not provide enough information to find out, - * in case of failure, whether the toUnicode or - * the fromUnicode conversion failed - * - * By contrast, ucnv_convertEx() - * - takes UConverter parameters instead of charset names - * - fully exposes the pivot buffer for streaming conversion and complete error handling - * - * ucnv_convertEx() also provides further convenience: - * - an option to reset the converters at the beginning - * (if reset==TRUE, see parameters; - * also sets *pivotTarget=*pivotSource=pivotStart) - * - allow NUL-terminated input - * (only a single NUL byte, will not work for charsets with multi-byte NULs) - * (if sourceLimit==NULL, see parameters) - * - terminate with a NUL on output - * (only a single NUL byte, not useful for charsets with multi-byte NULs), - * or set U_STRING_NOT_TERMINATED_WARNING if the output exactly fills - * the target buffer - * - the pivot buffer can be provided internally; - * possible only for whole-string conversion, not streaming conversion; - * in this case, the caller will not be able to get details about where an - * error occurred - * (if pivotStart==NULL, see below) - * - * The function returns when one of the following is true: - * - the entire source text has been converted successfully to the target buffer - * - a target buffer overflow occurred (U_BUFFER_OVERFLOW_ERROR) - * - a conversion error occurred - * (other U_FAILURE(), see description of pErrorCode) - * - * Limitation compared to the direct use of - * ucnv_fromUnicode() and ucnv_toUnicode(): - * ucnv_convertEx() does not provide offset information. - * - * Limitation compared to ucnv_fromUChars() and ucnv_toUChars(): - * ucnv_convertEx() does not support preflighting directly. - * - * Sample code for converting a single string from - * one external charset to UTF-8, ignoring the location of errors: - * - * \code - * int32_t - * myToUTF8(UConverter *cnv, - * const char *s, int32_t length, - * char *u8, int32_t capacity, - * UErrorCode *pErrorCode) { - * UConverter *utf8Cnv; - * char *target; - * - * if(U_FAILURE(*pErrorCode)) { - * return 0; - * } - * - * utf8Cnv=myGetCachedUTF8Converter(pErrorCode); - * if(U_FAILURE(*pErrorCode)) { - * return 0; - * } - * - * if(length<0) { - * length=strlen(s); - * } - * target=u8; - * ucnv_convertEx(utf8Cnv, cnv, - * &target, u8+capacity, - * &s, s+length, - * NULL, NULL, NULL, NULL, - * TRUE, TRUE, - * pErrorCode); - * - * myReleaseCachedUTF8Converter(utf8Cnv); - * - * // return the output string length, but without preflighting - * return (int32_t)(target-u8); - * } - * \endcode - * - * @param targetCnv Output converter, used to convert from the UTF-16 pivot - * to the target using ucnv_fromUnicode(). - * @param sourceCnv Input converter, used to convert from the source to - * the UTF-16 pivot using ucnv_toUnicode(). - * @param target I/O parameter, same as for ucnv_fromUChars(). - * Input: *target points to the beginning of the target buffer. - * Output: *target points to the first unit after the last char written. - * @param targetLimit Pointer to the first unit after the target buffer. - * @param source I/O parameter, same as for ucnv_toUChars(). - * Input: *source points to the beginning of the source buffer. - * Output: *source points to the first unit after the last char read. - * @param sourceLimit Pointer to the first unit after the source buffer. - * @param pivotStart Pointer to the UTF-16 pivot buffer. If pivotStart==NULL, - * then an internal buffer is used and the other pivot - * arguments are ignored and can be NULL as well. - * @param pivotSource I/O parameter, same as source in ucnv_fromUChars() for - * conversion from the pivot buffer to the target buffer. - * @param pivotTarget I/O parameter, same as target in ucnv_toUChars() for - * conversion from the source buffer to the pivot buffer. - * It must be pivotStart<=*pivotSource<=*pivotTarget<=pivotLimit - * and pivotStart<pivotLimit (unless pivotStart==NULL). - * @param pivotLimit Pointer to the first unit after the pivot buffer. - * @param reset If TRUE, then ucnv_resetToUnicode(sourceCnv) and - * ucnv_resetFromUnicode(targetCnv) are called, and the - * pivot pointers are reset (*pivotTarget=*pivotSource=pivotStart). - * @param flush If true, indicates the end of the input. - * Passed directly to ucnv_toUnicode(), and carried over to - * ucnv_fromUnicode() when the source is empty as well. - * @param pErrorCode ICU error code in/out parameter. - * Must fulfill U_SUCCESS before the function call. - * U_BUFFER_OVERFLOW_ERROR always refers to the target buffer - * because overflows into the pivot buffer are handled internally. - * Other conversion errors are from the source-to-pivot - * conversion if *pivotSource==pivotStart, otherwise from - * the pivot-to-target conversion. - * - * @see ucnv_convert - * @see ucnv_fromAlgorithmic - * @see ucnv_toAlgorithmic - * @see ucnv_fromUnicode - * @see ucnv_toUnicode - * @see ucnv_fromUChars - * @see ucnv_toUChars - * @stable ICU 2.6 - */ -U_STABLE void U_EXPORT2 -ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv, - char **target, const char *targetLimit, - const char **source, const char *sourceLimit, - UChar *pivotStart, UChar **pivotSource, - UChar **pivotTarget, const UChar *pivotLimit, - UBool reset, UBool flush, - UErrorCode *pErrorCode); - -/** - * Convert from one external charset to another. - * Internally, two converters are opened according to the name arguments, - * then the text is converted to and from the 16-bit Unicode "pivot" - * using ucnv_convertEx(), then the converters are closed again. - * - * This is a convenience function, not an efficient way to convert a lot of text: - * ucnv_convert() - * - takes charset names, not converter objects, so that - * - two converters are opened for each call - * - only single-string conversion is possible, not streaming operation - * - does not provide enough information to find out, - * in case of failure, whether the toUnicode or - * the fromUnicode conversion failed - * - allows NUL-terminated input - * (only a single NUL byte, will not work for charsets with multi-byte NULs) - * (if sourceLength==-1, see parameters) - * - terminate with a NUL on output - * (only a single NUL byte, not useful for charsets with multi-byte NULs), - * or set U_STRING_NOT_TERMINATED_WARNING if the output exactly fills - * the target buffer - * - a pivot buffer is provided internally - * - * The function returns when one of the following is true: - * - the entire source text has been converted successfully to the target buffer - * and either the target buffer is terminated with a single NUL byte - * or the error code is set to U_STRING_NOT_TERMINATED_WARNING - * - a target buffer overflow occurred (U_BUFFER_OVERFLOW_ERROR) - * and the full output string length is returned ("preflighting") - * - a conversion error occurred - * (other U_FAILURE(), see description of pErrorCode) - * - * @param toConverterName The name of the converter that is used to convert - * from the UTF-16 pivot buffer to the target. - * @param fromConverterName The name of the converter that is used to convert - * from the source to the UTF-16 pivot buffer. - * @param target Pointer to the output buffer. - * @param targetCapacity Capacity of the target, in bytes. - * @param source Pointer to the input buffer. - * @param sourceLength Length of the input text, in bytes, or -1 for NUL-terminated input. - * @param pErrorCode ICU error code in/out parameter. - * Must fulfill U_SUCCESS before the function call. - * @return Length of the complete output text in bytes, even if it exceeds the targetCapacity - * and a U_BUFFER_OVERFLOW_ERROR is set. - * - * @see ucnv_convertEx - * @see ucnv_fromAlgorithmic - * @see ucnv_toAlgorithmic - * @see ucnv_fromUnicode - * @see ucnv_toUnicode - * @see ucnv_fromUChars - * @see ucnv_toUChars - * @see ucnv_getNextUChar - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -ucnv_convert(const char *toConverterName, - const char *fromConverterName, - char *target, - int32_t targetCapacity, - const char *source, - int32_t sourceLength, - UErrorCode *pErrorCode); - -/** - * Convert from one external charset to another. - * Internally, the text is converted to and from the 16-bit Unicode "pivot" - * using ucnv_convertEx(). ucnv_toAlgorithmic() works exactly like ucnv_convert() - * except that the two converters need not be looked up and opened completely. - * - * The source-to-pivot conversion uses the cnv converter parameter. - * The pivot-to-target conversion uses a purely algorithmic converter - * according to the specified type, e.g., UCNV_UTF8 for a UTF-8 converter. - * - * Internally, the algorithmic converter is opened and closed for each - * function call, which is more efficient than using the public ucnv_open() - * but somewhat less efficient than only resetting an existing converter - * and using ucnv_convertEx(). - * - * This function is more convenient than ucnv_convertEx() for single-string - * conversions, especially when "preflighting" is desired (returning the length - * of the complete output even if it does not fit into the target buffer; - * see the User Guide Strings chapter). See ucnv_convert() for details. - * - * @param algorithmicType UConverterType constant identifying the desired target - * charset as a purely algorithmic converter. - * Those are converters for Unicode charsets like - * UTF-8, BOCU-1, SCSU, UTF-7, IMAP-mailbox-name, etc., - * as well as US-ASCII and ISO-8859-1. - * @param cnv The converter that is used to convert - * from the source to the UTF-16 pivot buffer. - * @param target Pointer to the output buffer. - * @param targetCapacity Capacity of the target, in bytes. - * @param source Pointer to the input buffer. - * @param sourceLength Length of the input text, in bytes - * @param pErrorCode ICU error code in/out parameter. - * Must fulfill U_SUCCESS before the function call. - * @return Length of the complete output text in bytes, even if it exceeds the targetCapacity - * and a U_BUFFER_OVERFLOW_ERROR is set. - * - * @see ucnv_fromAlgorithmic - * @see ucnv_convert - * @see ucnv_convertEx - * @see ucnv_fromUnicode - * @see ucnv_toUnicode - * @see ucnv_fromUChars - * @see ucnv_toUChars - * @stable ICU 2.6 - */ -U_STABLE int32_t U_EXPORT2 -ucnv_toAlgorithmic(UConverterType algorithmicType, - UConverter *cnv, - char *target, int32_t targetCapacity, - const char *source, int32_t sourceLength, - UErrorCode *pErrorCode); - -/** - * Convert from one external charset to another. - * Internally, the text is converted to and from the 16-bit Unicode "pivot" - * using ucnv_convertEx(). ucnv_fromAlgorithmic() works exactly like ucnv_convert() - * except that the two converters need not be looked up and opened completely. - * - * The source-to-pivot conversion uses a purely algorithmic converter - * according to the specified type, e.g., UCNV_UTF8 for a UTF-8 converter. - * The pivot-to-target conversion uses the cnv converter parameter. - * - * Internally, the algorithmic converter is opened and closed for each - * function call, which is more efficient than using the public ucnv_open() - * but somewhat less efficient than only resetting an existing converter - * and using ucnv_convertEx(). - * - * This function is more convenient than ucnv_convertEx() for single-string - * conversions, especially when "preflighting" is desired (returning the length - * of the complete output even if it does not fit into the target buffer; - * see the User Guide Strings chapter). See ucnv_convert() for details. - * - * @param cnv The converter that is used to convert - * from the UTF-16 pivot buffer to the target. - * @param algorithmicType UConverterType constant identifying the desired source - * charset as a purely algorithmic converter. - * Those are converters for Unicode charsets like - * UTF-8, BOCU-1, SCSU, UTF-7, IMAP-mailbox-name, etc., - * as well as US-ASCII and ISO-8859-1. - * @param target Pointer to the output buffer. - * @param targetCapacity Capacity of the target, in bytes. - * @param source Pointer to the input buffer. - * @param sourceLength Length of the input text, in bytes - * @param pErrorCode ICU error code in/out parameter. - * Must fulfill U_SUCCESS before the function call. - * @return Length of the complete output text in bytes, even if it exceeds the targetCapacity - * and a U_BUFFER_OVERFLOW_ERROR is set. - * - * @see ucnv_fromAlgorithmic - * @see ucnv_convert - * @see ucnv_convertEx - * @see ucnv_fromUnicode - * @see ucnv_toUnicode - * @see ucnv_fromUChars - * @see ucnv_toUChars - * @stable ICU 2.6 - */ -U_STABLE int32_t U_EXPORT2 -ucnv_fromAlgorithmic(UConverter *cnv, - UConverterType algorithmicType, - char *target, int32_t targetCapacity, - const char *source, int32_t sourceLength, - UErrorCode *pErrorCode); - -/** - * Frees up memory occupied by unused, cached converter shared data. - * - * @return the number of cached converters successfully deleted - * @see ucnv_close - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -ucnv_flushCache(void); - -/** - * Returns the number of available converters, as per the alias file. - * - * @return the number of available converters - * @see ucnv_getAvailableName - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -ucnv_countAvailable(void); - -/** - * Gets the canonical converter name of the specified converter from a list of - * all available converters contaied in the alias file. All converters - * in this list can be opened. - * - * @param n the index to a converter available on the system (in the range <TT>[0..ucnv_countAvaiable()]</TT>) - * @return a pointer a string (library owned), or <TT>NULL</TT> if the index is out of bounds. - * @see ucnv_countAvailable - * @stable ICU 2.0 - */ -U_STABLE const char* U_EXPORT2 -ucnv_getAvailableName(int32_t n); - -/** - * Returns a UEnumeration to enumerate all of the canonical converter - * names, as per the alias file, regardless of the ability to open each - * converter. - * - * @return A UEnumeration object for getting all the recognized canonical - * converter names. - * @see ucnv_getAvailableName - * @see uenum_close - * @see uenum_next - * @stable ICU 2.4 - */ -U_STABLE UEnumeration * U_EXPORT2 -ucnv_openAllNames(UErrorCode *pErrorCode); - -/** - * Gives the number of aliases for a given converter or alias name. - * If the alias is ambiguous, then the preferred converter is used - * and the status is set to U_AMBIGUOUS_ALIAS_WARNING. - * This method only enumerates the listed entries in the alias file. - * @param alias alias name - * @param pErrorCode error status - * @return number of names on alias list for given alias - * @stable ICU 2.0 - */ -U_STABLE uint16_t U_EXPORT2 -ucnv_countAliases(const char *alias, UErrorCode *pErrorCode); - -/** - * Gives the name of the alias at given index of alias list. - * This method only enumerates the listed entries in the alias file. - * If the alias is ambiguous, then the preferred converter is used - * and the status is set to U_AMBIGUOUS_ALIAS_WARNING. - * @param alias alias name - * @param n index in alias list - * @param pErrorCode result of operation - * @return returns the name of the alias at given index - * @see ucnv_countAliases - * @stable ICU 2.0 - */ -U_STABLE const char * U_EXPORT2 -ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode); - -/** - * Fill-up the list of alias names for the given alias. - * This method only enumerates the listed entries in the alias file. - * If the alias is ambiguous, then the preferred converter is used - * and the status is set to U_AMBIGUOUS_ALIAS_WARNING. - * @param alias alias name - * @param aliases fill-in list, aliases is a pointer to an array of - * <code>ucnv_countAliases()</code> string-pointers - * (<code>const char *</code>) that will be filled in. - * The strings themselves are owned by the library. - * @param pErrorCode result of operation - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode); - -/** - * Return a new UEnumeration object for enumerating all the - * alias names for a given converter that are recognized by a standard. - * This method only enumerates the listed entries in the alias file. - * The convrtrs.txt file can be modified to change the results of - * this function. - * The first result in this list is the same result given by - * <code>ucnv_getStandardName</code>, which is the default alias for - * the specified standard name. The returned object must be closed with - * <code>uenum_close</code> when you are done with the object. - * - * @param convName original converter name - * @param standard name of the standard governing the names; MIME and IANA - * are such standards - * @param pErrorCode The error code - * @return A UEnumeration object for getting all aliases that are recognized - * by a standard. If any of the parameters are invalid, NULL - * is returned. - * @see ucnv_getStandardName - * @see uenum_close - * @see uenum_next - * @stable ICU 2.2 - */ -U_STABLE UEnumeration * U_EXPORT2 -ucnv_openStandardNames(const char *convName, - const char *standard, - UErrorCode *pErrorCode); - -/** - * Gives the number of standards associated to converter names. - * @return number of standards - * @stable ICU 2.0 - */ -U_STABLE uint16_t U_EXPORT2 -ucnv_countStandards(void); - -/** - * Gives the name of the standard at given index of standard list. - * @param n index in standard list - * @param pErrorCode result of operation - * @return returns the name of the standard at given index. Owned by the library. - * @stable ICU 2.0 - */ -U_STABLE const char * U_EXPORT2 -ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode); - -/** - * Returns a standard name for a given converter name. - * <p> - * Example alias table:<br> - * conv alias1 { STANDARD1 } alias2 { STANDARD1* } - * <p> - * Result of ucnv_getStandardName("conv", "STANDARD1") from example - * alias table:<br> - * <b>"alias2"</b> - * - * @param name original converter name - * @param standard name of the standard governing the names; MIME and IANA - * are such standards - * @param pErrorCode result of operation - * @return returns the standard converter name; - * if a standard converter name cannot be determined, - * then <code>NULL</code> is returned. Owned by the library. - * @stable ICU 2.0 - */ -U_STABLE const char * U_EXPORT2 -ucnv_getStandardName(const char *name, const char *standard, UErrorCode *pErrorCode); - -/** - * This function will return the internal canonical converter name of the - * tagged alias. This is the opposite of ucnv_openStandardNames, which - * returns the tagged alias given the canonical name. - * <p> - * Example alias table:<br> - * conv alias1 { STANDARD1 } alias2 { STANDARD1* } - * <p> - * Result of ucnv_getStandardName("alias1", "STANDARD1") from example - * alias table:<br> - * <b>"conv"</b> - * - * @return returns the canonical converter name; - * if a standard or alias name cannot be determined, - * then <code>NULL</code> is returned. The returned string is - * owned by the library. - * @see ucnv_getStandardName - * @stable ICU 2.4 - */ -U_STABLE const char * U_EXPORT2 -ucnv_getCanonicalName(const char *alias, const char *standard, UErrorCode *pErrorCode); - -/** - * Returns the current default converter name. If you want to open - * a default converter, you do not need to use this function. - * It is faster if you pass a NULL argument to ucnv_open the - * default converter. - * - * If U_CHARSET_IS_UTF8 is defined to 1 in utypes.h then this function - * always returns "UTF-8". - * - * @return returns the current default converter name. - * Storage owned by the library - * @see ucnv_setDefaultName - * @stable ICU 2.0 - */ -U_STABLE const char * U_EXPORT2 -ucnv_getDefaultName(void); - -#ifndef U_HIDE_SYSTEM_API -/** - * This function is not thread safe. DO NOT call this function when ANY ICU - * function is being used from more than one thread! This function sets the - * current default converter name. If this function needs to be called, it - * should be called during application initialization. Most of the time, the - * results from ucnv_getDefaultName() or ucnv_open with a NULL string argument - * is sufficient for your application. - * - * If U_CHARSET_IS_UTF8 is defined to 1 in utypes.h then this function - * does nothing. - * - * @param name the converter name to be the default (must be known by ICU). - * @see ucnv_getDefaultName - * @system - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ucnv_setDefaultName(const char *name); -#endif /* U_HIDE_SYSTEM_API */ - -/** - * Fixes the backslash character mismapping. For example, in SJIS, the backslash - * character in the ASCII portion is also used to represent the yen currency sign. - * When mapping from Unicode character 0x005C, it's unclear whether to map the - * character back to yen or backslash in SJIS. This function will take the input - * buffer and replace all the yen sign characters with backslash. This is necessary - * when the user tries to open a file with the input buffer on Windows. - * This function will test the converter to see whether such mapping is - * required. You can sometimes avoid using this function by using the correct version - * of Shift-JIS. - * - * @param cnv The converter representing the target codepage. - * @param source the input buffer to be fixed - * @param sourceLen the length of the input buffer - * @see ucnv_isAmbiguous - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ucnv_fixFileSeparator(const UConverter *cnv, UChar *source, int32_t sourceLen); - -/** - * Determines if the converter contains ambiguous mappings of the same - * character or not. - * @param cnv the converter to be tested - * @return TRUE if the converter contains ambiguous mapping of the same - * character, FALSE otherwise. - * @stable ICU 2.0 - */ -U_STABLE UBool U_EXPORT2 -ucnv_isAmbiguous(const UConverter *cnv); - -/** - * Sets the converter to use fallback mappings or not. - * Regardless of this flag, the converter will always use - * fallbacks from Unicode Private Use code points, as well as - * reverse fallbacks (to Unicode). - * For details see ".ucm File Format" - * in the Conversion Data chapter of the ICU User Guide: - * http://www.icu-project.org/userguide/conversion-data.html#ucmformat - * - * @param cnv The converter to set the fallback mapping usage on. - * @param usesFallback TRUE if the user wants the converter to take advantage of the fallback - * mapping, FALSE otherwise. - * @stable ICU 2.0 - * @see ucnv_usesFallback - */ -U_STABLE void U_EXPORT2 -ucnv_setFallback(UConverter *cnv, UBool usesFallback); - -/** - * Determines if the converter uses fallback mappings or not. - * This flag has restrictions, see ucnv_setFallback(). - * - * @param cnv The converter to be tested - * @return TRUE if the converter uses fallback, FALSE otherwise. - * @stable ICU 2.0 - * @see ucnv_setFallback - */ -U_STABLE UBool U_EXPORT2 -ucnv_usesFallback(const UConverter *cnv); - -/** - * Detects Unicode signature byte sequences at the start of the byte stream - * and returns the charset name of the indicated Unicode charset. - * NULL is returned when no Unicode signature is recognized. - * The number of bytes in the signature is output as well. - * - * The caller can ucnv_open() a converter using the charset name. - * The first code unit (UChar) from the start of the stream will be U+FEFF - * (the Unicode BOM/signature character) and can usually be ignored. - * - * For most Unicode charsets it is also possible to ignore the indicated - * number of initial stream bytes and start converting after them. - * However, there are stateful Unicode charsets (UTF-7 and BOCU-1) for which - * this will not work. Therefore, it is best to ignore the first output UChar - * instead of the input signature bytes. - * <p> - * Usage: - * \snippet samples/ucnv/convsamp.cpp ucnv_detectUnicodeSignature - * - * @param source The source string in which the signature should be detected. - * @param sourceLength Length of the input string, or -1 if terminated with a NUL byte. - * @param signatureLength A pointer to int32_t to receive the number of bytes that make up the signature - * of the detected UTF. 0 if not detected. - * Can be a NULL pointer. - * @param pErrorCode ICU error code in/out parameter. - * Must fulfill U_SUCCESS before the function call. - * @return The name of the encoding detected. NULL if encoding is not detected. - * @stable ICU 2.4 - */ -U_STABLE const char* U_EXPORT2 -ucnv_detectUnicodeSignature(const char* source, - int32_t sourceLength, - int32_t *signatureLength, - UErrorCode *pErrorCode); - -/** - * Returns the number of UChars held in the converter's internal state - * because more input is needed for completing the conversion. This function is - * useful for mapping semantics of ICU's converter interface to those of iconv, - * and this information is not needed for normal conversion. - * @param cnv The converter in which the input is held - * @param status ICU error code in/out parameter. - * Must fulfill U_SUCCESS before the function call. - * @return The number of UChars in the state. -1 if an error is encountered. - * @stable ICU 3.4 - */ -U_STABLE int32_t U_EXPORT2 -ucnv_fromUCountPending(const UConverter* cnv, UErrorCode* status); - -/** - * Returns the number of chars held in the converter's internal state - * because more input is needed for completing the conversion. This function is - * useful for mapping semantics of ICU's converter interface to those of iconv, - * and this information is not needed for normal conversion. - * @param cnv The converter in which the input is held as internal state - * @param status ICU error code in/out parameter. - * Must fulfill U_SUCCESS before the function call. - * @return The number of chars in the state. -1 if an error is encountered. - * @stable ICU 3.4 - */ -U_STABLE int32_t U_EXPORT2 -ucnv_toUCountPending(const UConverter* cnv, UErrorCode* status); - -/** - * Returns whether or not the charset of the converter has a fixed number of bytes - * per charset character. - * An example of this are converters that are of the type UCNV_SBCS or UCNV_DBCS. - * Another example is UTF-32 which is always 4 bytes per character. - * A Unicode code point may be represented by more than one UTF-8 or UTF-16 code unit - * but a UTF-32 converter encodes each code point with 4 bytes. - * Note: This method is not intended to be used to determine whether the charset has a - * fixed ratio of bytes to Unicode codes <i>units</i> for any particular Unicode encoding form. - * FALSE is returned with the UErrorCode if error occurs or cnv is NULL. - * @param cnv The converter to be tested - * @param status ICU error code in/out paramter - * @return TRUE if the converter is fixed-width - * @stable ICU 4.8 - */ -U_STABLE UBool U_EXPORT2 -ucnv_isFixedWidth(UConverter *cnv, UErrorCode *status); - -#endif - -#endif -/*_UCNV*/ diff --git a/Source/WTF/icu/unicode/ucnv_err.h b/Source/WTF/icu/unicode/ucnv_err.h deleted file mode 100644 index e092e95f8..000000000 --- a/Source/WTF/icu/unicode/ucnv_err.h +++ /dev/null @@ -1,463 +0,0 @@ -/* -********************************************************************** -* Copyright (C) 1999-2009, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** - * - * - * ucnv_err.h: - */ - -/** - * \file - * \brief C UConverter predefined error callbacks - * - * <h2>Error Behaviour Functions</h2> - * Defines some error behaviour functions called by ucnv_{from,to}Unicode - * These are provided as part of ICU and many are stable, but they - * can also be considered only as an example of what can be done with - * callbacks. You may of course write your own. - * - * If you want to write your own, you may also find the functions from - * ucnv_cb.h useful when writing your own callbacks. - * - * These functions, although public, should NEVER be called directly. - * They should be used as parameters to the ucnv_setFromUCallback - * and ucnv_setToUCallback functions, to set the behaviour of a converter - * when it encounters ILLEGAL/UNMAPPED/INVALID sequences. - * - * usage example: 'STOP' doesn't need any context, but newContext - * could be set to something other than 'NULL' if needed. The available - * contexts in this header can modify the default behavior of the callback. - * - * \code - * UErrorCode err = U_ZERO_ERROR; - * UConverter *myConverter = ucnv_open("ibm-949", &err); - * const void *oldContext; - * UConverterFromUCallback oldAction; - * - * - * if (U_SUCCESS(err)) - * { - * ucnv_setFromUCallBack(myConverter, - * UCNV_FROM_U_CALLBACK_STOP, - * NULL, - * &oldAction, - * &oldContext, - * &status); - * } - * \endcode - * - * The code above tells "myConverter" to stop when it encounters an - * ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from - * Unicode -> Codepage. The behavior from Codepage to Unicode is not changed, - * and ucnv_setToUCallBack would need to be called in order to change - * that behavior too. - * - * Here is an example with a context: - * - * \code - * UErrorCode err = U_ZERO_ERROR; - * UConverter *myConverter = ucnv_open("ibm-949", &err); - * const void *oldContext; - * UConverterFromUCallback oldAction; - * - * - * if (U_SUCCESS(err)) - * { - * ucnv_setToUCallBack(myConverter, - * UCNV_TO_U_CALLBACK_SUBSTITUTE, - * UCNV_SUB_STOP_ON_ILLEGAL, - * &oldAction, - * &oldContext, - * &status); - * } - * \endcode - * - * The code above tells "myConverter" to stop when it encounters an - * ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from - * Codepage -> Unicode. Any unmapped and legal characters will be - * substituted to be the default substitution character. - */ - -#ifndef UCNV_ERR_H -#define UCNV_ERR_H - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_CONVERSION - -/** Forward declaring the UConverter structure. @stable ICU 2.0 */ -struct UConverter; - -/** @stable ICU 2.0 */ -typedef struct UConverter UConverter; - -/** - * FROM_U, TO_U context options for sub callback - * @stable ICU 2.0 - */ -#define UCNV_SUB_STOP_ON_ILLEGAL "i" - -/** - * FROM_U, TO_U context options for skip callback - * @stable ICU 2.0 - */ -#define UCNV_SKIP_STOP_ON_ILLEGAL "i" - -/** - * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to ICU (%UXXXX) - * @stable ICU 2.0 - */ -#define UCNV_ESCAPE_ICU NULL -/** - * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to JAVA (\\uXXXX) - * @stable ICU 2.0 - */ -#define UCNV_ESCAPE_JAVA "J" -/** - * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to C (\\uXXXX \\UXXXXXXXX) - * TO_U_CALLBACK_ESCAPE option to escape the character value accoding to C (\\xXXXX) - * @stable ICU 2.0 - */ -#define UCNV_ESCAPE_C "C" -/** - * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly - * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Decimal escape \htmlonly(&#DDDD;)\endhtmlonly - * @stable ICU 2.0 - */ -#define UCNV_ESCAPE_XML_DEC "D" -/** - * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly - * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Hex escape \htmlonly(&#xXXXX;)\endhtmlonly - * @stable ICU 2.0 - */ -#define UCNV_ESCAPE_XML_HEX "X" -/** - * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to Unicode (U+XXXXX) - * @stable ICU 2.0 - */ -#define UCNV_ESCAPE_UNICODE "U" - -/** - * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to CSS2 conventions (\\HH..H<space>, that is, - * a backslash, 1..6 hex digits, and a space) - * @stable ICU 4.0 - */ -#define UCNV_ESCAPE_CSS2 "S" - -/** - * The process condition code to be used with the callbacks. - * Codes which are greater than UCNV_IRREGULAR should be - * passed on to any chained callbacks. - * @stable ICU 2.0 - */ -typedef enum { - UCNV_UNASSIGNED = 0, /**< The code point is unassigned. - The error code U_INVALID_CHAR_FOUND will be set. */ - UCNV_ILLEGAL = 1, /**< The code point is illegal. For example, - \\x81\\x2E is illegal in SJIS because \\x2E - is not a valid trail byte for the \\x81 - lead byte. - Also, starting with Unicode 3.0.1, non-shortest byte sequences - in UTF-8 (like \\xC1\\xA1 instead of \\x61 for U+0061) - are also illegal, not just irregular. - The error code U_ILLEGAL_CHAR_FOUND will be set. */ - UCNV_IRREGULAR = 2, /**< The codepoint is not a regular sequence in - the encoding. For example, \\xED\\xA0\\x80..\\xED\\xBF\\xBF - are irregular UTF-8 byte sequences for single surrogate - code points. - The error code U_INVALID_CHAR_FOUND will be set. */ - UCNV_RESET = 3, /**< The callback is called with this reason when a - 'reset' has occured. Callback should reset all - state. */ - UCNV_CLOSE = 4, /**< Called when the converter is closed. The - callback should release any allocated memory.*/ - UCNV_CLONE = 5 /**< Called when ucnv_safeClone() is called on the - converter. the pointer available as the - 'context' is an alias to the original converters' - context pointer. If the context must be owned - by the new converter, the callback must clone - the data and call ucnv_setFromUCallback - (or setToUCallback) with the correct pointer. - @stable ICU 2.2 - */ -} UConverterCallbackReason; - - -/** - * The structure for the fromUnicode callback function parameter. - * @stable ICU 2.0 - */ -typedef struct { - uint16_t size; /**< The size of this struct. @stable ICU 2.0 */ - UBool flush; /**< The internal state of converter will be reset and data flushed if set to TRUE. @stable ICU 2.0 */ - UConverter *converter; /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */ - const UChar *source; /**< Pointer to the source source buffer. @stable ICU 2.0 */ - const UChar *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */ - char *target; /**< Pointer to the target buffer. @stable ICU 2.0 */ - const char *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */ - int32_t *offsets; /**< Pointer to the buffer that recieves the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */ -} UConverterFromUnicodeArgs; - - -/** - * The structure for the toUnicode callback function parameter. - * @stable ICU 2.0 - */ -typedef struct { - uint16_t size; /**< The size of this struct @stable ICU 2.0 */ - UBool flush; /**< The internal state of converter will be reset and data flushed if set to TRUE. @stable ICU 2.0 */ - UConverter *converter; /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */ - const char *source; /**< Pointer to the source source buffer. @stable ICU 2.0 */ - const char *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */ - UChar *target; /**< Pointer to the target buffer. @stable ICU 2.0 */ - const UChar *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */ - int32_t *offsets; /**< Pointer to the buffer that recieves the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */ -} UConverterToUnicodeArgs; - - -/** - * DO NOT CALL THIS FUNCTION DIRECTLY! - * This From Unicode callback STOPS at the ILLEGAL_SEQUENCE, - * returning the error code back to the caller immediately. - * - * @param context Pointer to the callback's private data - * @param fromUArgs Information about the conversion in progress - * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence - * @param length Size (in bytes) of the concerned codepage sequence - * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint. - * @param reason Defines the reason the callback was invoked - * @param err This should always be set to a failure status prior to calling. - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_STOP ( - const void *context, - UConverterFromUnicodeArgs *fromUArgs, - const UChar* codeUnits, - int32_t length, - UChar32 codePoint, - UConverterCallbackReason reason, - UErrorCode * err); - - - -/** - * DO NOT CALL THIS FUNCTION DIRECTLY! - * This To Unicode callback STOPS at the ILLEGAL_SEQUENCE, - * returning the error code back to the caller immediately. - * - * @param context Pointer to the callback's private data - * @param toUArgs Information about the conversion in progress - * @param codeUnits Points to 'length' bytes of the concerned codepage sequence - * @param length Size (in bytes) of the concerned codepage sequence - * @param reason Defines the reason the callback was invoked - * @param err This should always be set to a failure status prior to calling. - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_STOP ( - const void *context, - UConverterToUnicodeArgs *toUArgs, - const char* codeUnits, - int32_t length, - UConverterCallbackReason reason, - UErrorCode * err); - -/** - * DO NOT CALL THIS FUNCTION DIRECTLY! - * This From Unicode callback skips any ILLEGAL_SEQUENCE, or - * skips only UNASSINGED_SEQUENCE depending on the context parameter - * simply ignoring those characters. - * - * @param context The function currently recognizes the callback options: - * UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE, - * returning the error code back to the caller immediately. - * NULL: Skips any ILLEGAL_SEQUENCE - * @param fromUArgs Information about the conversion in progress - * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence - * @param length Size (in bytes) of the concerned codepage sequence - * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint. - * @param reason Defines the reason the callback was invoked - * @param err Return value will be set to success if the callback was handled, - * otherwise this value will be set to a failure status. - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_SKIP ( - const void *context, - UConverterFromUnicodeArgs *fromUArgs, - const UChar* codeUnits, - int32_t length, - UChar32 codePoint, - UConverterCallbackReason reason, - UErrorCode * err); - -/** - * DO NOT CALL THIS FUNCTION DIRECTLY! - * This From Unicode callback will Substitute the ILLEGAL SEQUENCE, or - * UNASSIGNED_SEQUENCE depending on context parameter, with the - * current substitution string for the converter. This is the default - * callback. - * - * @param context The function currently recognizes the callback options: - * UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE, - * returning the error code back to the caller immediately. - * NULL: Substitutes any ILLEGAL_SEQUENCE - * @param fromUArgs Information about the conversion in progress - * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence - * @param length Size (in bytes) of the concerned codepage sequence - * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint. - * @param reason Defines the reason the callback was invoked - * @param err Return value will be set to success if the callback was handled, - * otherwise this value will be set to a failure status. - * @see ucnv_setSubstChars - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_SUBSTITUTE ( - const void *context, - UConverterFromUnicodeArgs *fromUArgs, - const UChar* codeUnits, - int32_t length, - UChar32 codePoint, - UConverterCallbackReason reason, - UErrorCode * err); - -/** - * DO NOT CALL THIS FUNCTION DIRECTLY! - * This From Unicode callback will Substitute the ILLEGAL SEQUENCE with the - * hexadecimal representation of the illegal codepoints - * - * @param context The function currently recognizes the callback options: - * <ul> - * <li>UCNV_ESCAPE_ICU: Substitues the ILLEGAL SEQUENCE with the hexadecimal - * representation in the format %UXXXX, e.g. "%uFFFE%u00AC%uC8FE"). - * In the Event the converter doesn't support the characters {%,U}[A-F][0-9], - * it will substitute the illegal sequence with the substitution characters. - * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as - * %UD84D%UDC56</li> - * <li>UCNV_ESCAPE_JAVA: Substitues the ILLEGAL SEQUENCE with the hexadecimal - * representation in the format \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE"). - * In the Event the converter doesn't support the characters {\,u}[A-F][0-9], - * it will substitute the illegal sequence with the substitution characters. - * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as - * \\uD84D\\uDC56</li> - * <li>UCNV_ESCAPE_C: Substitues the ILLEGAL SEQUENCE with the hexadecimal - * representation in the format \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE"). - * In the Event the converter doesn't support the characters {\,u,U}[A-F][0-9], - * it will substitute the illegal sequence with the substitution characters. - * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as - * \\U00023456</li> - * <li>UCNV_ESCAPE_XML_DEC: Substitues the ILLEGAL SEQUENCE with the decimal - * representation in the format \htmlonly&#DDDDDDDD;, e.g. "&#65534;&#172;&#51454;")\endhtmlonly. - * In the Event the converter doesn't support the characters {&,#}[0-9], - * it will substitute the illegal sequence with the substitution characters. - * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as - * &#144470; and Zero padding is ignored.</li> - * <li>UCNV_ESCAPE_XML_HEX:Substitues the ILLEGAL SEQUENCE with the decimal - * representation in the format \htmlonly&#xXXXX; e.g. "&#xFFFE;&#x00AC;&#xC8FE;")\endhtmlonly. - * In the Event the converter doesn't support the characters {&,#,x}[0-9], - * it will substitute the illegal sequence with the substitution characters. - * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as - * \htmlonly&#x23456;\endhtmlonly</li> - * </ul> - * @param fromUArgs Information about the conversion in progress - * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence - * @param length Size (in bytes) of the concerned codepage sequence - * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint. - * @param reason Defines the reason the callback was invoked - * @param err Return value will be set to success if the callback was handled, - * otherwise this value will be set to a failure status. - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_ESCAPE ( - const void *context, - UConverterFromUnicodeArgs *fromUArgs, - const UChar* codeUnits, - int32_t length, - UChar32 codePoint, - UConverterCallbackReason reason, - UErrorCode * err); - - -/** - * DO NOT CALL THIS FUNCTION DIRECTLY! - * This To Unicode callback skips any ILLEGAL_SEQUENCE, or - * skips only UNASSINGED_SEQUENCE depending on the context parameter - * simply ignoring those characters. - * - * @param context The function currently recognizes the callback options: - * UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE, - * returning the error code back to the caller immediately. - * NULL: Skips any ILLEGAL_SEQUENCE - * @param toUArgs Information about the conversion in progress - * @param codeUnits Points to 'length' bytes of the concerned codepage sequence - * @param length Size (in bytes) of the concerned codepage sequence - * @param reason Defines the reason the callback was invoked - * @param err Return value will be set to success if the callback was handled, - * otherwise this value will be set to a failure status. - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_SKIP ( - const void *context, - UConverterToUnicodeArgs *toUArgs, - const char* codeUnits, - int32_t length, - UConverterCallbackReason reason, - UErrorCode * err); - -/** - * DO NOT CALL THIS FUNCTION DIRECTLY! - * This To Unicode callback will Substitute the ILLEGAL SEQUENCE,or - * UNASSIGNED_SEQUENCE depending on context parameter, with the - * Unicode substitution character, U+FFFD. - * - * @param context The function currently recognizes the callback options: - * UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE, - * returning the error code back to the caller immediately. - * NULL: Substitutes any ILLEGAL_SEQUENCE - * @param toUArgs Information about the conversion in progress - * @param codeUnits Points to 'length' bytes of the concerned codepage sequence - * @param length Size (in bytes) of the concerned codepage sequence - * @param reason Defines the reason the callback was invoked - * @param err Return value will be set to success if the callback was handled, - * otherwise this value will be set to a failure status. - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_SUBSTITUTE ( - const void *context, - UConverterToUnicodeArgs *toUArgs, - const char* codeUnits, - int32_t length, - UConverterCallbackReason reason, - UErrorCode * err); - -/** - * DO NOT CALL THIS FUNCTION DIRECTLY! - * This To Unicode callback will Substitute the ILLEGAL SEQUENCE with the - * hexadecimal representation of the illegal bytes - * (in the format %XNN, e.g. "%XFF%X0A%XC8%X03"). - * - * @param context This function currently recognizes the callback options: - * UCNV_ESCAPE_ICU, UCNV_ESCAPE_JAVA, UCNV_ESCAPE_C, UCNV_ESCAPE_XML_DEC, - * UCNV_ESCAPE_XML_HEX and UCNV_ESCAPE_UNICODE. - * @param toUArgs Information about the conversion in progress - * @param codeUnits Points to 'length' bytes of the concerned codepage sequence - * @param length Size (in bytes) of the concerned codepage sequence - * @param reason Defines the reason the callback was invoked - * @param err Return value will be set to success if the callback was handled, - * otherwise this value will be set to a failure status. - * @stable ICU 2.0 - */ - -U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_ESCAPE ( - const void *context, - UConverterToUnicodeArgs *toUArgs, - const char* codeUnits, - int32_t length, - UConverterCallbackReason reason, - UErrorCode * err); - -#endif - -#endif - -/*UCNV_ERR_H*/ diff --git a/Source/WTF/icu/unicode/ucol.h b/Source/WTF/icu/unicode/ucol.h deleted file mode 100644 index 5a459b52a..000000000 --- a/Source/WTF/icu/unicode/ucol.h +++ /dev/null @@ -1,1433 +0,0 @@ -/* -******************************************************************************* -* Copyright (c) 1996-2013, International Business Machines Corporation and others. -* All Rights Reserved. -******************************************************************************* -*/ - -#ifndef UCOL_H -#define UCOL_H - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_COLLATION - -#include "unicode/unorm.h" -#include "unicode/localpointer.h" -#include "unicode/parseerr.h" -#include "unicode/uloc.h" -#include "unicode/uset.h" -#include "unicode/uscript.h" - -/** - * \file - * \brief C API: Collator - * - * <h2> Collator C API </h2> - * - * The C API for Collator performs locale-sensitive - * string comparison. You use this service to build - * searching and sorting routines for natural language text. - * <em>Important: </em>The ICU collation service has been reimplemented - * in order to achieve better performance and UCA compliance. - * For details, see the - * <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm"> - * collation design document</a>. - * <p> - * For more information about the collation service see - * <a href="http://icu-project.org/userguide/Collate_Intro.html">the users guide</a>. - * <p> - * Collation service provides correct sorting orders for most locales supported in ICU. - * If specific data for a locale is not available, the orders eventually falls back - * to the <a href="http://www.unicode.org/unicode/reports/tr10/">UCA sort order</a>. - * <p> - * Sort ordering may be customized by providing your own set of rules. For more on - * this subject see the - * <a href="http://icu-project.org/userguide/Collate_Customization.html"> - * Collation customization</a> section of the users guide. - * <p> - * @see UCollationResult - * @see UNormalizationMode - * @see UCollationStrength - * @see UCollationElements - */ - -/** A collator. -* For usage in C programs. -*/ -struct UCollator; -/** structure representing a collator object instance - * @stable ICU 2.0 - */ -typedef struct UCollator UCollator; - - -/** - * UCOL_LESS is returned if source string is compared to be less than target - * string in the ucol_strcoll() method. - * UCOL_EQUAL is returned if source string is compared to be equal to target - * string in the ucol_strcoll() method. - * UCOL_GREATER is returned if source string is compared to be greater than - * target string in the ucol_strcoll() method. - * @see ucol_strcoll() - * <p> - * Possible values for a comparison result - * @stable ICU 2.0 - */ -typedef enum { - /** string a == string b */ - UCOL_EQUAL = 0, - /** string a > string b */ - UCOL_GREATER = 1, - /** string a < string b */ - UCOL_LESS = -1 -} UCollationResult ; - - -/** Enum containing attribute values for controling collation behavior. - * Here are all the allowable values. Not every attribute can take every value. The only - * universal value is UCOL_DEFAULT, which resets the attribute value to the predefined - * value for that locale - * @stable ICU 2.0 - */ -typedef enum { - /** accepted by most attributes */ - UCOL_DEFAULT = -1, - - /** Primary collation strength */ - UCOL_PRIMARY = 0, - /** Secondary collation strength */ - UCOL_SECONDARY = 1, - /** Tertiary collation strength */ - UCOL_TERTIARY = 2, - /** Default collation strength */ - UCOL_DEFAULT_STRENGTH = UCOL_TERTIARY, - UCOL_CE_STRENGTH_LIMIT, - /** Quaternary collation strength */ - UCOL_QUATERNARY=3, - /** Identical collation strength */ - UCOL_IDENTICAL=15, - UCOL_STRENGTH_LIMIT, - - /** Turn the feature off - works for UCOL_FRENCH_COLLATION, - UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE - & UCOL_DECOMPOSITION_MODE*/ - UCOL_OFF = 16, - /** Turn the feature on - works for UCOL_FRENCH_COLLATION, - UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE - & UCOL_DECOMPOSITION_MODE*/ - UCOL_ON = 17, - - /** Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be shifted */ - UCOL_SHIFTED = 20, - /** Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be non ignorable */ - UCOL_NON_IGNORABLE = 21, - - /** Valid for UCOL_CASE_FIRST - - lower case sorts before upper case */ - UCOL_LOWER_FIRST = 24, - /** upper case sorts before lower case */ - UCOL_UPPER_FIRST = 25, - - UCOL_ATTRIBUTE_VALUE_COUNT - -} UColAttributeValue; - -/** - * Enum containing the codes for reordering segments of the collation table that are not script - * codes. These reordering codes are to be used in conjunction with the script codes. - * @see ucol_getReorderCodes - * @see ucol_setReorderCodes - * @see ucol_getEquivalentReorderCodes - * @see UScriptCode - * @stable ICU 4.8 - */ - typedef enum { - /** - * A special reordering code that is used to specify the default - * reordering codes for a locale. - * @stable ICU 4.8 - */ - UCOL_REORDER_CODE_DEFAULT = -1, - /** - * A special reordering code that is used to specify no reordering codes. - * @stable ICU 4.8 - */ - UCOL_REORDER_CODE_NONE = USCRIPT_UNKNOWN, - /** - * A special reordering code that is used to specify all other codes used for - * reordering except for the codes lised as UColReorderCode values and those - * listed explicitly in a reordering. - * @stable ICU 4.8 - */ - UCOL_REORDER_CODE_OTHERS = USCRIPT_UNKNOWN, - /** - * Characters with the space property. - * This is equivalent to the rule value "space". - * @stable ICU 4.8 - */ - UCOL_REORDER_CODE_SPACE = 0x1000, - /** - * The first entry in the enumeration of reordering groups. This is intended for use in - * range checking and enumeration of the reorder codes. - * @stable ICU 4.8 - */ - UCOL_REORDER_CODE_FIRST = UCOL_REORDER_CODE_SPACE, - /** - * Characters with the punctuation property. - * This is equivalent to the rule value "punct". - * @stable ICU 4.8 - */ - UCOL_REORDER_CODE_PUNCTUATION = 0x1001, - /** - * Characters with the symbol property. - * This is equivalent to the rule value "symbol". - * @stable ICU 4.8 - */ - UCOL_REORDER_CODE_SYMBOL = 0x1002, - /** - * Characters with the currency property. - * This is equivalent to the rule value "currency". - * @stable ICU 4.8 - */ - UCOL_REORDER_CODE_CURRENCY = 0x1003, - /** - * Characters with the digit property. - * This is equivalent to the rule value "digit". - * @stable ICU 4.8 - */ - UCOL_REORDER_CODE_DIGIT = 0x1004, - /** - * The limit of the reorder codes. This is intended for use in range checking - * and enumeration of the reorder codes. - * @stable ICU 4.8 - */ - UCOL_REORDER_CODE_LIMIT = 0x1005 -} UColReorderCode; - -/** - * Base letter represents a primary difference. Set comparison - * level to UCOL_PRIMARY to ignore secondary and tertiary differences. - * Use this to set the strength of a Collator object. - * Example of primary difference, "abc" < "abd" - * - * Diacritical differences on the same base letter represent a secondary - * difference. Set comparison level to UCOL_SECONDARY to ignore tertiary - * differences. Use this to set the strength of a Collator object. - * Example of secondary difference, "ä" >> "a". - * - * Uppercase and lowercase versions of the same character represents a - * tertiary difference. Set comparison level to UCOL_TERTIARY to include - * all comparison differences. Use this to set the strength of a Collator - * object. - * Example of tertiary difference, "abc" <<< "ABC". - * - * Two characters are considered "identical" when they have the same - * unicode spellings. UCOL_IDENTICAL. - * For example, "ä" == "ä". - * - * UCollationStrength is also used to determine the strength of sort keys - * generated from UCollator objects - * These values can be now found in the UColAttributeValue enum. - * @stable ICU 2.0 - **/ -typedef UColAttributeValue UCollationStrength; - -/** Attributes that collation service understands. All the attributes can take UCOL_DEFAULT - * value, as well as the values specific to each one. - * @stable ICU 2.0 - */ -typedef enum { - /** Attribute for direction of secondary weights - used in Canadian French. - * Acceptable values are UCOL_ON, which results in secondary weights - * being considered backwards and UCOL_OFF which treats secondary - * weights in the order they appear. - * @stable ICU 2.0 - */ - UCOL_FRENCH_COLLATION, - /** Attribute for handling variable elements. - * Acceptable values are UCOL_NON_IGNORABLE (default) - * which treats all the codepoints with non-ignorable - * primary weights in the same way, - * and UCOL_SHIFTED which causes codepoints with primary - * weights that are equal or below the variable top value - * to be ignored on primary level and moved to the quaternary - * level. - * @stable ICU 2.0 - */ - UCOL_ALTERNATE_HANDLING, - /** Controls the ordering of upper and lower case letters. - * Acceptable values are UCOL_OFF (default), which orders - * upper and lower case letters in accordance to their tertiary - * weights, UCOL_UPPER_FIRST which forces upper case letters to - * sort before lower case letters, and UCOL_LOWER_FIRST which does - * the opposite. - * @stable ICU 2.0 - */ - UCOL_CASE_FIRST, - /** Controls whether an extra case level (positioned before the third - * level) is generated or not. Acceptable values are UCOL_OFF (default), - * when case level is not generated, and UCOL_ON which causes the case - * level to be generated. Contents of the case level are affected by - * the value of UCOL_CASE_FIRST attribute. A simple way to ignore - * accent differences in a string is to set the strength to UCOL_PRIMARY - * and enable case level. - * @stable ICU 2.0 - */ - UCOL_CASE_LEVEL, - /** Controls whether the normalization check and necessary normalizations - * are performed. When set to UCOL_OFF (default) no normalization check - * is performed. The correctness of the result is guaranteed only if the - * input data is in so-called FCD form (see users manual for more info). - * When set to UCOL_ON, an incremental check is performed to see whether - * the input data is in the FCD form. If the data is not in the FCD form, - * incremental NFD normalization is performed. - * @stable ICU 2.0 - */ - UCOL_NORMALIZATION_MODE, - /** An alias for UCOL_NORMALIZATION_MODE attribute. - * @stable ICU 2.0 - */ - UCOL_DECOMPOSITION_MODE = UCOL_NORMALIZATION_MODE, - /** The strength attribute. Can be either UCOL_PRIMARY, UCOL_SECONDARY, - * UCOL_TERTIARY, UCOL_QUATERNARY or UCOL_IDENTICAL. The usual strength - * for most locales (except Japanese) is tertiary. Quaternary strength - * is useful when combined with shifted setting for alternate handling - * attribute and for JIS x 4061 collation, when it is used to distinguish - * between Katakana and Hiragana (this is achieved by setting the - * UCOL_HIRAGANA_QUATERNARY mode to on. Otherwise, quaternary level - * is affected only by the number of non ignorable code points in - * the string. Identical strength is rarely useful, as it amounts - * to codepoints of the NFD form of the string. - * @stable ICU 2.0 - */ - UCOL_STRENGTH, -#ifndef U_HIDE_DEPRECATED_API - /** When turned on, this attribute positions Hiragana before all - * non-ignorables on quaternary level This is a sneaky way to produce JIS - * sort order. - * - * This attribute is an implementation detail of the CLDR Japanese tailoring. - * The implementation might change to use a different mechanism - * to achieve the same Japanese sort order. - * Since ICU 50, this attribute is not settable any more via API functions. - * @deprecated ICU 50 Implementation detail, cannot be set via API, might be removed from implementation. - */ - UCOL_HIRAGANA_QUATERNARY_MODE = UCOL_STRENGTH + 1, -#endif /* U_HIDE_DEPRECATED_API */ - /** When turned on, this attribute generates a collation key - * for the numeric value of substrings of digits. - * This is a way to get '100' to sort AFTER '2'. Note that the longest - * digit substring that can be treated as a single collation element is - * 254 digits (not counting leading zeros). If a digit substring is - * longer than that, the digits beyond the limit will be treated as a - * separate digit substring associated with a separate collation element. - * @stable ICU 2.8 - */ - UCOL_NUMERIC_COLLATION = UCOL_STRENGTH + 2, - /** - * The number of UColAttribute constants. - * @stable ICU 2.0 - */ - UCOL_ATTRIBUTE_COUNT -} UColAttribute; - -/** Options for retrieving the rule string - * @stable ICU 2.0 - */ -typedef enum { - /** - * Retrieves the tailoring rules only. - * Same as calling the version of getRules() without UColRuleOption. - * @stable ICU 2.0 - */ - UCOL_TAILORING_ONLY, - /** - * Retrieves the "UCA rules" concatenated with the tailoring rules. - * The "UCA rules" are an <i>approximation</i> of the root collator's sort order. - * They are almost never used or useful at runtime and can be removed from the data. - * See http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales - * @stable ICU 2.0 - */ - UCOL_FULL_RULES -} UColRuleOption ; - -/** - * Open a UCollator for comparing strings. - * The UCollator pointer is used in all the calls to the Collation - * service. After finished, collator must be disposed of by calling - * {@link #ucol_close }. - * @param loc The locale containing the required collation rules. - * Special values for locales can be passed in - - * if NULL is passed for the locale, the default locale - * collation rules will be used. If empty string ("") or - * "root" are passed, UCA rules will be used. - * @param status A pointer to an UErrorCode to receive any errors - * @return A pointer to a UCollator, or 0 if an error occurred. - * @see ucol_openRules - * @see ucol_safeClone - * @see ucol_close - * @stable ICU 2.0 - */ -U_STABLE UCollator* U_EXPORT2 -ucol_open(const char *loc, UErrorCode *status); - -/** - * Produce an UCollator instance according to the rules supplied. - * The rules are used to change the default ordering, defined in the - * UCA in a process called tailoring. The resulting UCollator pointer - * can be used in the same way as the one obtained by {@link #ucol_strcoll }. - * @param rules A string describing the collation rules. For the syntax - * of the rules please see users guide. - * @param rulesLength The length of rules, or -1 if null-terminated. - * @param normalizationMode The normalization mode: One of - * UCOL_OFF (expect the text to not need normalization), - * UCOL_ON (normalize), or - * UCOL_DEFAULT (set the mode according to the rules) - * @param strength The default collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY, - * UCOL_TERTIARY, UCOL_IDENTICAL,UCOL_DEFAULT_STRENGTH - can be also set in the rules. - * @param parseError A pointer to UParseError to recieve information about errors - * occurred during parsing. This argument can currently be set - * to NULL, but at users own risk. Please provide a real structure. - * @param status A pointer to an UErrorCode to receive any errors - * @return A pointer to a UCollator. It is not guaranteed that NULL be returned in case - * of error - please use status argument to check for errors. - * @see ucol_open - * @see ucol_safeClone - * @see ucol_close - * @stable ICU 2.0 - */ -U_STABLE UCollator* U_EXPORT2 -ucol_openRules( const UChar *rules, - int32_t rulesLength, - UColAttributeValue normalizationMode, - UCollationStrength strength, - UParseError *parseError, - UErrorCode *status); - -/** - * Open a collator defined by a short form string. - * The structure and the syntax of the string is defined in the "Naming collators" - * section of the users guide: - * http://icu-project.org/userguide/Collate_Concepts.html#Naming_Collators - * Attributes are overriden by the subsequent attributes. So, for "S2_S3", final - * strength will be 3. 3066bis locale overrides individual locale parts. - * The call to this function is equivalent to a call to ucol_open, followed by a - * series of calls to ucol_setAttribute and ucol_setVariableTop. - * @param definition A short string containing a locale and a set of attributes. - * Attributes not explicitly mentioned are left at the default - * state for a locale. - * @param parseError if not NULL, structure that will get filled with error's pre - * and post context in case of error. - * @param forceDefaults if FALSE, the settings that are the same as the collator - * default settings will not be applied (for example, setting - * French secondary on a French collator would not be executed). - * If TRUE, all the settings will be applied regardless of the - * collator default value. If the definition - * strings are to be cached, should be set to FALSE. - * @param status Error code. Apart from regular error conditions connected to - * instantiating collators (like out of memory or similar), this - * API will return an error if an invalid attribute or attribute/value - * combination is specified. - * @return A pointer to a UCollator or 0 if an error occured (including an - * invalid attribute). - * @see ucol_open - * @see ucol_setAttribute - * @see ucol_setVariableTop - * @see ucol_getShortDefinitionString - * @see ucol_normalizeShortDefinitionString - * @stable ICU 3.0 - * - */ -U_STABLE UCollator* U_EXPORT2 -ucol_openFromShortString( const char *definition, - UBool forceDefaults, - UParseError *parseError, - UErrorCode *status); - -#ifndef U_HIDE_DEPRECATED_API -/** - * Get a set containing the contractions defined by the collator. The set includes - * both the UCA contractions and the contractions defined by the collator. This set - * will contain only strings. If a tailoring explicitly suppresses contractions from - * the UCA (like Russian), removed contractions will not be in the resulting set. - * @param coll collator - * @param conts the set to hold the result. It gets emptied before - * contractions are added. - * @param status to hold the error code - * @return the size of the contraction set - * - * @deprecated ICU 3.4, use ucol_getContractionsAndExpansions instead - */ -U_DEPRECATED int32_t U_EXPORT2 -ucol_getContractions( const UCollator *coll, - USet *conts, - UErrorCode *status); -#endif /* U_HIDE_DEPRECATED_API */ - -/** - * Get a set containing the expansions defined by the collator. The set includes - * both the UCA expansions and the expansions defined by the tailoring - * @param coll collator - * @param contractions if not NULL, the set to hold the contractions - * @param expansions if not NULL, the set to hold the expansions - * @param addPrefixes add the prefix contextual elements to contractions - * @param status to hold the error code - * - * @stable ICU 3.4 - */ -U_STABLE void U_EXPORT2 -ucol_getContractionsAndExpansions( const UCollator *coll, - USet *contractions, USet *expansions, - UBool addPrefixes, UErrorCode *status); - -/** - * Close a UCollator. - * Once closed, a UCollator should not be used. Every open collator should - * be closed. Otherwise, a memory leak will result. - * @param coll The UCollator to close. - * @see ucol_open - * @see ucol_openRules - * @see ucol_safeClone - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ucol_close(UCollator *coll); - -#if U_SHOW_CPLUSPLUS_API - -U_NAMESPACE_BEGIN - -/** - * \class LocalUCollatorPointer - * "Smart pointer" class, closes a UCollator via ucol_close(). - * For most methods see the LocalPointerBase base class. - * - * @see LocalPointerBase - * @see LocalPointer - * @stable ICU 4.4 - */ -U_DEFINE_LOCAL_OPEN_POINTER(LocalUCollatorPointer, UCollator, ucol_close); - -U_NAMESPACE_END - -#endif - -/** - * Compare two strings. - * The strings will be compared using the options already specified. - * @param coll The UCollator containing the comparison rules. - * @param source The source string. - * @param sourceLength The length of source, or -1 if null-terminated. - * @param target The target string. - * @param targetLength The length of target, or -1 if null-terminated. - * @return The result of comparing the strings; one of UCOL_EQUAL, - * UCOL_GREATER, UCOL_LESS - * @see ucol_greater - * @see ucol_greaterOrEqual - * @see ucol_equal - * @stable ICU 2.0 - */ -U_STABLE UCollationResult U_EXPORT2 -ucol_strcoll( const UCollator *coll, - const UChar *source, - int32_t sourceLength, - const UChar *target, - int32_t targetLength); - -/** -* Compare two strings in UTF-8. -* The strings will be compared using the options already specified. -* Note: When input string contains malformed a UTF-8 byte sequence, -* this function treats these bytes as REPLACEMENT CHARACTER (U+FFFD). -* @param coll The UCollator containing the comparison rules. -* @param source The source UTF-8 string. -* @param sourceLength The length of source, or -1 if null-terminated. -* @param target The target UTF-8 string. -* @param targetLength The length of target, or -1 if null-terminated. -* @param status A pointer to an UErrorCode to receive any errors -* @return The result of comparing the strings; one of UCOL_EQUAL, -* UCOL_GREATER, UCOL_LESS -* @see ucol_greater -* @see ucol_greaterOrEqual -* @see ucol_equal -* @stable ICU 50 -*/ -U_STABLE UCollationResult U_EXPORT2 -ucol_strcollUTF8( - const UCollator *coll, - const char *source, - int32_t sourceLength, - const char *target, - int32_t targetLength, - UErrorCode *status); - -/** - * Determine if one string is greater than another. - * This function is equivalent to {@link #ucol_strcoll } == UCOL_GREATER - * @param coll The UCollator containing the comparison rules. - * @param source The source string. - * @param sourceLength The length of source, or -1 if null-terminated. - * @param target The target string. - * @param targetLength The length of target, or -1 if null-terminated. - * @return TRUE if source is greater than target, FALSE otherwise. - * @see ucol_strcoll - * @see ucol_greaterOrEqual - * @see ucol_equal - * @stable ICU 2.0 - */ -U_STABLE UBool U_EXPORT2 -ucol_greater(const UCollator *coll, - const UChar *source, int32_t sourceLength, - const UChar *target, int32_t targetLength); - -/** - * Determine if one string is greater than or equal to another. - * This function is equivalent to {@link #ucol_strcoll } != UCOL_LESS - * @param coll The UCollator containing the comparison rules. - * @param source The source string. - * @param sourceLength The length of source, or -1 if null-terminated. - * @param target The target string. - * @param targetLength The length of target, or -1 if null-terminated. - * @return TRUE if source is greater than or equal to target, FALSE otherwise. - * @see ucol_strcoll - * @see ucol_greater - * @see ucol_equal - * @stable ICU 2.0 - */ -U_STABLE UBool U_EXPORT2 -ucol_greaterOrEqual(const UCollator *coll, - const UChar *source, int32_t sourceLength, - const UChar *target, int32_t targetLength); - -/** - * Compare two strings for equality. - * This function is equivalent to {@link #ucol_strcoll } == UCOL_EQUAL - * @param coll The UCollator containing the comparison rules. - * @param source The source string. - * @param sourceLength The length of source, or -1 if null-terminated. - * @param target The target string. - * @param targetLength The length of target, or -1 if null-terminated. - * @return TRUE if source is equal to target, FALSE otherwise - * @see ucol_strcoll - * @see ucol_greater - * @see ucol_greaterOrEqual - * @stable ICU 2.0 - */ -U_STABLE UBool U_EXPORT2 -ucol_equal(const UCollator *coll, - const UChar *source, int32_t sourceLength, - const UChar *target, int32_t targetLength); - -/** - * Compare two UTF-8 encoded trings. - * The strings will be compared using the options already specified. - * @param coll The UCollator containing the comparison rules. - * @param sIter The source string iterator. - * @param tIter The target string iterator. - * @return The result of comparing the strings; one of UCOL_EQUAL, - * UCOL_GREATER, UCOL_LESS - * @param status A pointer to an UErrorCode to receive any errors - * @see ucol_strcoll - * @stable ICU 2.6 - */ -U_STABLE UCollationResult U_EXPORT2 -ucol_strcollIter( const UCollator *coll, - UCharIterator *sIter, - UCharIterator *tIter, - UErrorCode *status); - -/** - * Get the collation strength used in a UCollator. - * The strength influences how strings are compared. - * @param coll The UCollator to query. - * @return The collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY, - * UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL - * @see ucol_setStrength - * @stable ICU 2.0 - */ -U_STABLE UCollationStrength U_EXPORT2 -ucol_getStrength(const UCollator *coll); - -/** - * Set the collation strength used in a UCollator. - * The strength influences how strings are compared. - * @param coll The UCollator to set. - * @param strength The desired collation strength; one of UCOL_PRIMARY, - * UCOL_SECONDARY, UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL, UCOL_DEFAULT - * @see ucol_getStrength - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ucol_setStrength(UCollator *coll, - UCollationStrength strength); - -/** - * Retrieves the reordering codes for this collator. - * These reordering codes are a combination of UScript codes and UColReorderCode entries. - * @param coll The UCollator to query. - * @param dest The array to fill with the script ordering. - * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function - * will only return the length of the result without writing any of the result string (pre-flighting). - * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate a - * failure before the function call. - * @return The number of reordering codes written to the dest array. - * @see ucol_setReorderCodes - * @see ucol_getEquivalentReorderCodes - * @see UScriptCode - * @see UColReorderCode - * @stable ICU 4.8 - */ -U_STABLE int32_t U_EXPORT2 -ucol_getReorderCodes(const UCollator* coll, - int32_t* dest, - int32_t destCapacity, - UErrorCode *pErrorCode); -/** - * Sets the reordering codes for this collator. - * Collation reordering allows scripts and some other defined blocks of characters - * to be moved relative to each other as a block. This reordering is done on top of - * the DUCET/CLDR standard collation order. Reordering can specify groups to be placed - * at the start and/or the end of the collation order. These groups are specified using - * UScript codes and UColReorderCode entries. - * <p>By default, reordering codes specified for the start of the order are placed in the - * order given after a group of "special" non-script blocks. These special groups of characters - * are space, punctuation, symbol, currency, and digit. These special groups are represented with - * UColReorderCode entries. Script groups can be intermingled with - * these special non-script blocks if those special blocks are explicitly specified in the reordering. - * <p>The special code OTHERS stands for any script that is not explicitly - * mentioned in the list of reordering codes given. Anything that is after OTHERS - * will go at the very end of the reordering in the order given. - * <p>The special reorder code DEFAULT will reset the reordering for this collator - * to the default for this collator. The default reordering may be the DUCET/CLDR order or may be a reordering that - * was specified when this collator was created from resource data or from rules. The - * DEFAULT code <b>must</b> be the sole code supplied when it used. If not - * that will result in an U_ILLEGAL_ARGUMENT_ERROR being set. - * <p>The special reorder code NONE will remove any reordering for this collator. - * The result of setting no reordering will be to have the DUCET/CLDR ordering used. The - * NONE code <b>must</b> be the sole code supplied when it used. - * @param coll The UCollator to set. - * @param reorderCodes An array of script codes in the new order. This can be NULL if the - * length is also set to 0. An empty array will clear any reordering codes on the collator. - * @param reorderCodesLength The length of reorderCodes. - * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate a - * failure before the function call. - * @see ucol_getReorderCodes - * @see ucol_getEquivalentReorderCodes - * @see UScriptCode - * @see UColReorderCode - * @stable ICU 4.8 - */ -U_STABLE void U_EXPORT2 -ucol_setReorderCodes(UCollator* coll, - const int32_t* reorderCodes, - int32_t reorderCodesLength, - UErrorCode *pErrorCode); - -/** - * Retrieves the reorder codes that are grouped with the given reorder code. Some reorder - * codes will be grouped and must reorder together. - * @param reorderCode The reorder code to determine equivalence for. - * @param dest The array to fill with the script ordering. - * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function - * will only return the length of the result without writing any of the result string (pre-flighting). - * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate - * a failure before the function call. - * @return The number of reordering codes written to the dest array. - * @see ucol_setReorderCodes - * @see ucol_getReorderCodes - * @see UScriptCode - * @see UColReorderCode - * @stable ICU 4.8 - */ -U_STABLE int32_t U_EXPORT2 -ucol_getEquivalentReorderCodes(int32_t reorderCode, - int32_t* dest, - int32_t destCapacity, - UErrorCode *pErrorCode); - -/** - * Get the display name for a UCollator. - * The display name is suitable for presentation to a user. - * @param objLoc The locale of the collator in question. - * @param dispLoc The locale for display. - * @param result A pointer to a buffer to receive the attribute. - * @param resultLength The maximum size of result. - * @param status A pointer to an UErrorCode to receive any errors - * @return The total buffer size needed; if greater than resultLength, - * the output was truncated. - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -ucol_getDisplayName( const char *objLoc, - const char *dispLoc, - UChar *result, - int32_t resultLength, - UErrorCode *status); - -/** - * Get a locale for which collation rules are available. - * A UCollator in a locale returned by this function will perform the correct - * collation for the locale. - * @param localeIndex The index of the desired locale. - * @return A locale for which collation rules are available, or 0 if none. - * @see ucol_countAvailable - * @stable ICU 2.0 - */ -U_STABLE const char* U_EXPORT2 -ucol_getAvailable(int32_t localeIndex); - -/** - * Determine how many locales have collation rules available. - * This function is most useful as determining the loop ending condition for - * calls to {@link #ucol_getAvailable }. - * @return The number of locales for which collation rules are available. - * @see ucol_getAvailable - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -ucol_countAvailable(void); - -#if !UCONFIG_NO_SERVICE -/** - * Create a string enumerator of all locales for which a valid - * collator may be opened. - * @param status input-output error code - * @return a string enumeration over locale strings. The caller is - * responsible for closing the result. - * @stable ICU 3.0 - */ -U_STABLE UEnumeration* U_EXPORT2 -ucol_openAvailableLocales(UErrorCode *status); -#endif - -/** - * Create a string enumerator of all possible keywords that are relevant to - * collation. At this point, the only recognized keyword for this - * service is "collation". - * @param status input-output error code - * @return a string enumeration over locale strings. The caller is - * responsible for closing the result. - * @stable ICU 3.0 - */ -U_STABLE UEnumeration* U_EXPORT2 -ucol_getKeywords(UErrorCode *status); - -/** - * Given a keyword, create a string enumeration of all values - * for that keyword that are currently in use. - * @param keyword a particular keyword as enumerated by - * ucol_getKeywords. If any other keyword is passed in, *status is set - * to U_ILLEGAL_ARGUMENT_ERROR. - * @param status input-output error code - * @return a string enumeration over collation keyword values, or NULL - * upon error. The caller is responsible for closing the result. - * @stable ICU 3.0 - */ -U_STABLE UEnumeration* U_EXPORT2 -ucol_getKeywordValues(const char *keyword, UErrorCode *status); - -/** - * Given a key and a locale, returns an array of string values in a preferred - * order that would make a difference. These are all and only those values where - * the open (creation) of the service with the locale formed from the input locale - * plus input keyword and that value has different behavior than creation with the - * input locale alone. - * @param key one of the keys supported by this service. For now, only - * "collation" is supported. - * @param locale the locale - * @param commonlyUsed if set to true it will return only commonly used values - * with the given locale in preferred order. Otherwise, - * it will return all the available values for the locale. - * @param status error status - * @return a string enumeration over keyword values for the given key and the locale. - * @stable ICU 4.2 - */ -U_STABLE UEnumeration* U_EXPORT2 -ucol_getKeywordValuesForLocale(const char* key, - const char* locale, - UBool commonlyUsed, - UErrorCode* status); - -/** - * Return the functionally equivalent locale for the given - * requested locale, with respect to given keyword, for the - * collation service. If two locales return the same result, then - * collators instantiated for these locales will behave - * equivalently. The converse is not always true; two collators - * may in fact be equivalent, but return different results, due to - * internal details. The return result has no other meaning than - * that stated above, and implies nothing as to the relationship - * between the two locales. This is intended for use by - * applications who wish to cache collators, or otherwise reuse - * collators when possible. The functional equivalent may change - * over time. For more information, please see the <a - * href="http://icu-project.org/userguide/locale.html#services"> - * Locales and Services</a> section of the ICU User Guide. - * @param result fillin for the functionally equivalent locale - * @param resultCapacity capacity of the fillin buffer - * @param keyword a particular keyword as enumerated by - * ucol_getKeywords. - * @param locale the requested locale - * @param isAvailable if non-NULL, pointer to a fillin parameter that - * indicates whether the requested locale was 'available' to the - * collation service. A locale is defined as 'available' if it - * physically exists within the collation locale data. - * @param status pointer to input-output error code - * @return the actual buffer size needed for the locale. If greater - * than resultCapacity, the returned full name will be truncated and - * an error code will be returned. - * @stable ICU 3.0 - */ -U_STABLE int32_t U_EXPORT2 -ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity, - const char* keyword, const char* locale, - UBool* isAvailable, UErrorCode* status); - -/** - * Get the collation tailoring rules from a UCollator. - * The rules will follow the rule syntax. - * @param coll The UCollator to query. - * @param length - * @return The collation tailoring rules. - * @stable ICU 2.0 - */ -U_STABLE const UChar* U_EXPORT2 -ucol_getRules( const UCollator *coll, - int32_t *length); - -/** Get the short definition string for a collator. This API harvests the collator's - * locale and the attribute set and produces a string that can be used for opening - * a collator with the same properties using the ucol_openFromShortString API. - * This string will be normalized. - * The structure and the syntax of the string is defined in the "Naming collators" - * section of the users guide: - * http://icu-project.org/userguide/Collate_Concepts.html#Naming_Collators - * This API supports preflighting. - * @param coll a collator - * @param locale a locale that will appear as a collators locale in the resulting - * short string definition. If NULL, the locale will be harvested - * from the collator. - * @param buffer space to hold the resulting string - * @param capacity capacity of the buffer - * @param status for returning errors. All the preflighting errors are featured - * @return length of the resulting string - * @see ucol_openFromShortString - * @see ucol_normalizeShortDefinitionString - * @stable ICU 3.0 - */ -U_STABLE int32_t U_EXPORT2 -ucol_getShortDefinitionString(const UCollator *coll, - const char *locale, - char *buffer, - int32_t capacity, - UErrorCode *status); - -/** Verifies and normalizes short definition string. - * Normalized short definition string has all the option sorted by the argument name, - * so that equivalent definition strings are the same. - * This API supports preflighting. - * @param source definition string - * @param destination space to hold the resulting string - * @param capacity capacity of the buffer - * @param parseError if not NULL, structure that will get filled with error's pre - * and post context in case of error. - * @param status Error code. This API will return an error if an invalid attribute - * or attribute/value combination is specified. All the preflighting - * errors are also featured - * @return length of the resulting normalized string. - * - * @see ucol_openFromShortString - * @see ucol_getShortDefinitionString - * - * @stable ICU 3.0 - */ - -U_STABLE int32_t U_EXPORT2 -ucol_normalizeShortDefinitionString(const char *source, - char *destination, - int32_t capacity, - UParseError *parseError, - UErrorCode *status); - - -/** - * Get a sort key for a string from a UCollator. - * Sort keys may be compared using <TT>strcmp</TT>. - * - * Like ICU functions that write to an output buffer, the buffer contents - * is undefined if the buffer capacity (resultLength parameter) is too small. - * Unlike ICU functions that write a string to an output buffer, - * the terminating zero byte is counted in the sort key length. - * @param coll The UCollator containing the collation rules. - * @param source The string to transform. - * @param sourceLength The length of source, or -1 if null-terminated. - * @param result A pointer to a buffer to receive the attribute. - * @param resultLength The maximum size of result. - * @return The size needed to fully store the sort key. - * If there was an internal error generating the sort key, - * a zero value is returned. - * @see ucol_keyHashCode - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -ucol_getSortKey(const UCollator *coll, - const UChar *source, - int32_t sourceLength, - uint8_t *result, - int32_t resultLength); - - -/** Gets the next count bytes of a sort key. Caller needs - * to preserve state array between calls and to provide - * the same type of UCharIterator set with the same string. - * The destination buffer provided must be big enough to store - * the number of requested bytes. - * - * The generated sort key may or may not be compatible with - * sort keys generated using ucol_getSortKey(). - * @param coll The UCollator containing the collation rules. - * @param iter UCharIterator containing the string we need - * the sort key to be calculated for. - * @param state Opaque state of sortkey iteration. - * @param dest Buffer to hold the resulting sortkey part - * @param count number of sort key bytes required. - * @param status error code indicator. - * @return the actual number of bytes of a sortkey. It can be - * smaller than count if we have reached the end of - * the sort key. - * @stable ICU 2.6 - */ -U_STABLE int32_t U_EXPORT2 -ucol_nextSortKeyPart(const UCollator *coll, - UCharIterator *iter, - uint32_t state[2], - uint8_t *dest, int32_t count, - UErrorCode *status); - -/** enum that is taken by ucol_getBound API - * See below for explanation - * do not change the values assigned to the - * members of this enum. Underlying code - * depends on them having these numbers - * @stable ICU 2.0 - */ -typedef enum { - /** lower bound */ - UCOL_BOUND_LOWER = 0, - /** upper bound that will match strings of exact size */ - UCOL_BOUND_UPPER = 1, - /** upper bound that will match all the strings that have the same initial substring as the given string */ - UCOL_BOUND_UPPER_LONG = 2, - UCOL_BOUND_VALUE_COUNT -} UColBoundMode; - -/** - * Produce a bound for a given sortkey and a number of levels. - * Return value is always the number of bytes needed, regardless of - * whether the result buffer was big enough or even valid.<br> - * Resulting bounds can be used to produce a range of strings that are - * between upper and lower bounds. For example, if bounds are produced - * for a sortkey of string "smith", strings between upper and lower - * bounds with one level would include "Smith", "SMITH", "sMiTh".<br> - * There are two upper bounds that can be produced. If UCOL_BOUND_UPPER - * is produced, strings matched would be as above. However, if bound - * produced using UCOL_BOUND_UPPER_LONG is used, the above example will - * also match "Smithsonian" and similar.<br> - * For more on usage, see example in cintltst/capitst.c in procedure - * TestBounds. - * Sort keys may be compared using <TT>strcmp</TT>. - * @param source The source sortkey. - * @param sourceLength The length of source, or -1 if null-terminated. - * (If an unmodified sortkey is passed, it is always null - * terminated). - * @param boundType Type of bound required. It can be UCOL_BOUND_LOWER, which - * produces a lower inclusive bound, UCOL_BOUND_UPPER, that - * produces upper bound that matches strings of the same length - * or UCOL_BOUND_UPPER_LONG that matches strings that have the - * same starting substring as the source string. - * @param noOfLevels Number of levels required in the resulting bound (for most - * uses, the recommended value is 1). See users guide for - * explanation on number of levels a sortkey can have. - * @param result A pointer to a buffer to receive the resulting sortkey. - * @param resultLength The maximum size of result. - * @param status Used for returning error code if something went wrong. If the - * number of levels requested is higher than the number of levels - * in the source key, a warning (U_SORT_KEY_TOO_SHORT_WARNING) is - * issued. - * @return The size needed to fully store the bound. - * @see ucol_keyHashCode - * @stable ICU 2.1 - */ -U_STABLE int32_t U_EXPORT2 -ucol_getBound(const uint8_t *source, - int32_t sourceLength, - UColBoundMode boundType, - uint32_t noOfLevels, - uint8_t *result, - int32_t resultLength, - UErrorCode *status); - -/** - * Gets the version information for a Collator. Version is currently - * an opaque 32-bit number which depends, among other things, on major - * versions of the collator tailoring and UCA. - * @param coll The UCollator to query. - * @param info the version # information, the result will be filled in - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ucol_getVersion(const UCollator* coll, UVersionInfo info); - -/** - * Gets the UCA version information for a Collator. Version is the - * UCA version number (3.1.1, 4.0). - * @param coll The UCollator to query. - * @param info the version # information, the result will be filled in - * @stable ICU 2.8 - */ -U_STABLE void U_EXPORT2 -ucol_getUCAVersion(const UCollator* coll, UVersionInfo info); - -/** - * Merges two sort keys. The levels are merged with their corresponding counterparts - * (primaries with primaries, secondaries with secondaries etc.). Between the values - * from the same level a separator is inserted. - * - * This is useful, for example, for combining sort keys from first and last names - * to sort such pairs. - * It is possible to merge multiple sort keys by consecutively merging - * another one with the intermediate result. - * - * The length of the merge result is the sum of the lengths of the input sort keys. - * - * Example (uncompressed): - * <pre>191B1D 01 050505 01 910505 00 - * 1F2123 01 050505 01 910505 00</pre> - * will be merged as - * <pre>191B1D 02 1F2123 01 050505 02 050505 01 910505 02 910505 00</pre> - * - * If the destination buffer is not big enough, then its contents are undefined. - * If any of source lengths are zero or any of the source pointers are NULL/undefined, - * the result is of size zero. - * - * @param src1 the first sort key - * @param src1Length the length of the first sort key, including the zero byte at the end; - * can be -1 if the function is to find the length - * @param src2 the second sort key - * @param src2Length the length of the second sort key, including the zero byte at the end; - * can be -1 if the function is to find the length - * @param dest the buffer where the merged sort key is written, - * can be NULL if destCapacity==0 - * @param destCapacity the number of bytes in the dest buffer - * @return the length of the merged sort key, src1Length+src2Length; - * can be larger than destCapacity, or 0 if an error occurs (only for illegal arguments), - * in which cases the contents of dest is undefined - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -ucol_mergeSortkeys(const uint8_t *src1, int32_t src1Length, - const uint8_t *src2, int32_t src2Length, - uint8_t *dest, int32_t destCapacity); - -/** - * Universal attribute setter - * @param coll collator which attributes are to be changed - * @param attr attribute type - * @param value attribute value - * @param status to indicate whether the operation went on smoothly or there were errors - * @see UColAttribute - * @see UColAttributeValue - * @see ucol_getAttribute - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ucol_setAttribute(UCollator *coll, UColAttribute attr, UColAttributeValue value, UErrorCode *status); - -/** - * Universal attribute getter - * @param coll collator which attributes are to be changed - * @param attr attribute type - * @return attribute value - * @param status to indicate whether the operation went on smoothly or there were errors - * @see UColAttribute - * @see UColAttributeValue - * @see ucol_setAttribute - * @stable ICU 2.0 - */ -U_STABLE UColAttributeValue U_EXPORT2 -ucol_getAttribute(const UCollator *coll, UColAttribute attr, UErrorCode *status); - -/** Variable top - * is a two byte primary value which causes all the codepoints with primary values that - * are less or equal than the variable top to be shifted when alternate handling is set - * to UCOL_SHIFTED. - * Sets the variable top to a collation element value of a string supplied. - * @param coll collator which variable top needs to be changed - * @param varTop one or more (if contraction) UChars to which the variable top should be set - * @param len length of variable top string. If -1 it is considered to be zero terminated. - * @param status error code. If error code is set, the return value is undefined. - * Errors set by this function are: <br> - * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such - * a contraction<br> - * U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes - * @return a 32 bit value containing the value of the variable top in upper 16 bits. - * Lower 16 bits are undefined - * @see ucol_getVariableTop - * @see ucol_restoreVariableTop - * @stable ICU 2.0 - */ -U_STABLE uint32_t U_EXPORT2 -ucol_setVariableTop(UCollator *coll, - const UChar *varTop, int32_t len, - UErrorCode *status); - -/** - * Gets the variable top value of a Collator. - * Lower 16 bits are undefined and should be ignored. - * @param coll collator which variable top needs to be retrieved - * @param status error code (not changed by function). If error code is set, - * the return value is undefined. - * @return the variable top value of a Collator. - * @see ucol_setVariableTop - * @see ucol_restoreVariableTop - * @stable ICU 2.0 - */ -U_STABLE uint32_t U_EXPORT2 ucol_getVariableTop(const UCollator *coll, UErrorCode *status); - -/** - * Sets the variable top to a collation element value supplied. Variable top is - * set to the upper 16 bits. - * Lower 16 bits are ignored. - * @param coll collator which variable top needs to be changed - * @param varTop CE value, as returned by ucol_setVariableTop or ucol)getVariableTop - * @param status error code (not changed by function) - * @see ucol_getVariableTop - * @see ucol_setVariableTop - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -ucol_restoreVariableTop(UCollator *coll, const uint32_t varTop, UErrorCode *status); - -/** - * Thread safe cloning operation. The result is a clone of a given collator. - * @param coll collator to be cloned - * @param stackBuffer <em>Deprecated functionality as of ICU 52, use NULL.</em><br> - * user allocated space for the new clone. - * If NULL new memory will be allocated. - * If buffer is not large enough, new memory will be allocated. - * Clients can use the U_COL_SAFECLONE_BUFFERSIZE. - * @param pBufferSize <em>Deprecated functionality as of ICU 52, use NULL or 1.</em><br> - * pointer to size of allocated space. - * If *pBufferSize == 0, a sufficient size for use in cloning will - * be returned ('pre-flighting') - * If *pBufferSize is not enough for a stack-based safe clone, - * new memory will be allocated. - * @param status to indicate whether the operation went on smoothly or there were errors - * An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any - * allocations were necessary. - * @return pointer to the new clone - * @see ucol_open - * @see ucol_openRules - * @see ucol_close - * @stable ICU 2.0 - */ -U_STABLE UCollator* U_EXPORT2 -ucol_safeClone(const UCollator *coll, - void *stackBuffer, - int32_t *pBufferSize, - UErrorCode *status); - -#ifndef U_HIDE_DEPRECATED_API - -/** default memory size for the new clone. - * @deprecated ICU 52. Do not rely on ucol_safeClone() cloning into any provided buffer. - */ -#define U_COL_SAFECLONE_BUFFERSIZE 1 - -#endif /* U_HIDE_DEPRECATED_API */ - -/** - * Returns current rules. Delta defines whether full rules are returned or just the tailoring. - * Returns number of UChars needed to store rules. If buffer is NULL or bufferLen is not enough - * to store rules, will store up to available space. - * - * ucol_getRules() should normally be used instead. - * See http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales - * @param coll collator to get the rules from - * @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES. - * @param buffer buffer to store the result in. If NULL, you'll get no rules. - * @param bufferLen length of buffer to store rules in. If less than needed you'll get only the part that fits in. - * @return current rules - * @stable ICU 2.0 - * @see UCOL_FULL_RULES - */ -U_STABLE int32_t U_EXPORT2 -ucol_getRulesEx(const UCollator *coll, UColRuleOption delta, UChar *buffer, int32_t bufferLen); - -#ifndef U_HIDE_DEPRECATED_API -/** - * gets the locale name of the collator. If the collator - * is instantiated from the rules, then this function returns - * NULL. - * @param coll The UCollator for which the locale is needed - * @param type You can choose between requested, valid and actual - * locale. For description see the definition of - * ULocDataLocaleType in uloc.h - * @param status error code of the operation - * @return real locale name from which the collation data comes. - * If the collator was instantiated from rules, returns - * NULL. - * @deprecated ICU 2.8 Use ucol_getLocaleByType instead - */ -U_DEPRECATED const char * U_EXPORT2 -ucol_getLocale(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status); -#endif /* U_HIDE_DEPRECATED_API */ - -/** - * gets the locale name of the collator. If the collator - * is instantiated from the rules, then this function returns - * NULL. - * @param coll The UCollator for which the locale is needed - * @param type You can choose between requested, valid and actual - * locale. For description see the definition of - * ULocDataLocaleType in uloc.h - * @param status error code of the operation - * @return real locale name from which the collation data comes. - * If the collator was instantiated from rules, returns - * NULL. - * @stable ICU 2.8 - */ -U_STABLE const char * U_EXPORT2 -ucol_getLocaleByType(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status); - -/** - * Get an Unicode set that contains all the characters and sequences tailored in - * this collator. The result must be disposed of by using uset_close. - * @param coll The UCollator for which we want to get tailored chars - * @param status error code of the operation - * @return a pointer to newly created USet. Must be be disposed by using uset_close - * @see ucol_openRules - * @see uset_close - * @stable ICU 2.4 - */ -U_STABLE USet * U_EXPORT2 -ucol_getTailoredSet(const UCollator *coll, UErrorCode *status); - -#ifndef U_HIDE_INTERNAL_API -/** - * Universal attribute getter that returns UCOL_DEFAULT if the value is default - * @param coll collator which attributes are to be changed - * @param attr attribute type - * @return attribute value or UCOL_DEFAULT if the value is default - * @param status to indicate whether the operation went on smoothly or there were errors - * @see UColAttribute - * @see UColAttributeValue - * @see ucol_setAttribute - * @internal ICU 3.0 - */ -U_INTERNAL UColAttributeValue U_EXPORT2 -ucol_getAttributeOrDefault(const UCollator *coll, UColAttribute attr, UErrorCode *status); - -/** Check whether two collators are equal. Collators are considered equal if they - * will sort strings the same. This means that both the current attributes and the - * rules must be equivalent. Currently used for RuleBasedCollator::operator==. - * @param source first collator - * @param target second collator - * @return TRUE or FALSE - * @internal ICU 3.0 - */ -U_INTERNAL UBool U_EXPORT2 -ucol_equals(const UCollator *source, const UCollator *target); - -/** Calculates the set of unsafe code points, given a collator. - * A character is unsafe if you could append any character and cause the ordering to alter significantly. - * Collation sorts in normalized order, so anything that rearranges in normalization can cause this. - * Thus if you have a character like a_umlaut, and you add a lower_dot to it, - * then it normalizes to a_lower_dot + umlaut, and sorts differently. - * @param coll Collator - * @param unsafe a fill-in set to receive the unsafe points - * @param status for catching errors - * @return number of elements in the set - * @internal ICU 3.0 - */ -U_INTERNAL int32_t U_EXPORT2 -ucol_getUnsafeSet( const UCollator *coll, - USet *unsafe, - UErrorCode *status); - -/** Reset UCA's static pointers. You don't want to use this, unless your static memory can go away. - * @internal ICU 3.2.1 - */ -U_INTERNAL void U_EXPORT2 -ucol_forgetUCA(void); - -/** Touches all resources needed for instantiating a collator from a short string definition, - * thus filling up the cache. - * @param definition A short string containing a locale and a set of attributes. - * Attributes not explicitly mentioned are left at the default - * state for a locale. - * @param parseError if not NULL, structure that will get filled with error's pre - * and post context in case of error. - * @param forceDefaults if FALSE, the settings that are the same as the collator - * default settings will not be applied (for example, setting - * French secondary on a French collator would not be executed). - * If TRUE, all the settings will be applied regardless of the - * collator default value. If the definition - * strings are to be cached, should be set to FALSE. - * @param status Error code. Apart from regular error conditions connected to - * instantiating collators (like out of memory or similar), this - * API will return an error if an invalid attribute or attribute/value - * combination is specified. - * @see ucol_openFromShortString - * @internal ICU 3.2.1 - */ -U_INTERNAL void U_EXPORT2 -ucol_prepareShortStringOpen( const char *definition, - UBool forceDefaults, - UParseError *parseError, - UErrorCode *status); -#endif /* U_HIDE_INTERNAL_API */ - -/** Creates a binary image of a collator. This binary image can be stored and - * later used to instantiate a collator using ucol_openBinary. - * This API supports preflighting. - * @param coll Collator - * @param buffer a fill-in buffer to receive the binary image - * @param capacity capacity of the destination buffer - * @param status for catching errors - * @return size of the image - * @see ucol_openBinary - * @stable ICU 3.2 - */ -U_STABLE int32_t U_EXPORT2 -ucol_cloneBinary(const UCollator *coll, - uint8_t *buffer, int32_t capacity, - UErrorCode *status); - -/** Opens a collator from a collator binary image created using - * ucol_cloneBinary. Binary image used in instantiation of the - * collator remains owned by the user and should stay around for - * the lifetime of the collator. The API also takes a base collator - * which usualy should be UCA. - * @param bin binary image owned by the user and required through the - * lifetime of the collator - * @param length size of the image. If negative, the API will try to - * figure out the length of the image - * @param base fallback collator, usually UCA. Base is required to be - * present through the lifetime of the collator. Currently - * it cannot be NULL. - * @param status for catching errors - * @return newly created collator - * @see ucol_cloneBinary - * @stable ICU 3.2 - */ -U_STABLE UCollator* U_EXPORT2 -ucol_openBinary(const uint8_t *bin, int32_t length, - const UCollator *base, - UErrorCode *status); - - -#endif /* #if !UCONFIG_NO_COLLATION */ - -#endif diff --git a/Source/WTF/icu/unicode/uconfig.h b/Source/WTF/icu/unicode/uconfig.h deleted file mode 100644 index bfa8e77b0..000000000 --- a/Source/WTF/icu/unicode/uconfig.h +++ /dev/null @@ -1,412 +0,0 @@ -/* -********************************************************************** -* Copyright (C) 2002-2013, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* file name: uconfig.h -* encoding: US-ASCII -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2002sep19 -* created by: Markus W. Scherer -*/ - -#ifndef __UCONFIG_H__ -#define __UCONFIG_H__ - - -/*! - * \file - * \brief User-configurable settings - * - * Miscellaneous switches: - * - * A number of macros affect a variety of minor aspects of ICU. - * Most of them used to be defined elsewhere (e.g., in utypes.h or platform.h) - * and moved here to make them easier to find. - * - * Switches for excluding parts of ICU library code modules: - * - * Changing these macros allows building partial, smaller libraries for special purposes. - * By default, all modules are built. - * The switches are fairly coarse, controlling large modules. - * Basic services cannot be turned off. - * - * Building with any of these options does not guarantee that the - * ICU build process will completely work. It is recommended that - * the ICU libraries and data be built using the normal build. - * At that time you should remove the data used by those services. - * After building the ICU data library, you should rebuild the ICU - * libraries with these switches customized to your needs. - * - * @stable ICU 2.4 - */ - -/** - * If this switch is defined, ICU will attempt to load a header file named "uconfig_local.h" - * prior to determining default settings for uconfig variables. - * - * @internal ICU 4.0 - */ -#if defined(UCONFIG_USE_LOCAL) -#include "uconfig_local.h" -#endif - -/** - * \def U_DEBUG - * Determines whether to include debugging code. - * Automatically set on Windows, but most compilers do not have - * related predefined macros. - * @internal - */ -#ifdef U_DEBUG - /* Use the predefined value. */ -#elif defined(_DEBUG) - /* - * _DEBUG is defined by Visual Studio debug compilation. - * Do *not* test for its NDEBUG macro: It is an orthogonal macro - * which disables assert(). - */ -# define U_DEBUG 1 -# else -# define U_DEBUG 0 -#endif - -/** - * Determines wheter to enable auto cleanup of libraries. - * @internal - */ -#ifndef UCLN_NO_AUTO_CLEANUP -#define UCLN_NO_AUTO_CLEANUP 1 -#endif - -/** - * \def U_DISABLE_RENAMING - * Determines whether to disable renaming or not. - * @internal - */ -#ifndef U_DISABLE_RENAMING -#define U_DISABLE_RENAMING 1 -#endif - -/** - * \def U_NO_DEFAULT_INCLUDE_UTF_HEADERS - * Determines whether utypes.h includes utf.h, utf8.h, utf16.h and utf_old.h. - * utypes.h includes those headers if this macro is defined to 0. - * Otherwise, each those headers must be included explicitly when using one of their macros. - * Defaults to 0 for backward compatibility, except inside ICU. - * @stable ICU 49 - */ -#ifdef U_NO_DEFAULT_INCLUDE_UTF_HEADERS - /* Use the predefined value. */ -#elif defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || \ - defined(U_IO_IMPLEMENTATION) || defined(U_LAYOUT_IMPLEMENTATION) || defined(U_LAYOUTEX_IMPLEMENTATION) || \ - defined(U_TOOLUTIL_IMPLEMENTATION) -# define U_NO_DEFAULT_INCLUDE_UTF_HEADERS 1 -#else -# define U_NO_DEFAULT_INCLUDE_UTF_HEADERS 0 -#endif - -/** - * \def U_OVERRIDE_CXX_ALLOCATION - * Determines whether to override new and delete. - * ICU is normally built such that all of its C++ classes, via their UMemory base, - * override operators new and delete to use its internal, customizable, - * non-exception-throwing memory allocation functions. (Default value 1 for this macro.) - * - * This is especially important when the application and its libraries use multiple heaps. - * For example, on Windows, this allows the ICU DLL to be used by - * applications that statically link the C Runtime library. - * - * @stable ICU 2.2 - */ -#ifndef U_OVERRIDE_CXX_ALLOCATION -#define U_OVERRIDE_CXX_ALLOCATION 1 -#endif - -/** - * \def U_ENABLE_TRACING - * Determines whether to enable tracing. - * @internal - */ -#ifndef U_ENABLE_TRACING -#define U_ENABLE_TRACING 0 -#endif - -/** - * \def U_ENABLE_DYLOAD - * Whether to enable Dynamic loading in ICU. - * @internal - */ -#ifndef U_ENABLE_DYLOAD -#define U_ENABLE_DYLOAD 1 -#endif - -/** - * \def U_CHECK_DYLOAD - * Whether to test Dynamic loading as an OS capability. - * @internal - */ -#ifndef U_CHECK_DYLOAD -#define U_CHECK_DYLOAD 1 -#endif - - -/** - * \def U_DEFAULT_SHOW_DRAFT - * Do we allow ICU users to use the draft APIs by default? - * @internal - */ -#ifndef U_DEFAULT_SHOW_DRAFT -#define U_DEFAULT_SHOW_DRAFT 1 -#endif - -/*===========================================================================*/ -/* Custom icu entry point renaming */ -/*===========================================================================*/ - -/** - * \def U_HAVE_LIB_SUFFIX - * 1 if a custom library suffix is set. - * @internal - */ -#ifdef U_HAVE_LIB_SUFFIX - /* Use the predefined value. */ -#elif defined(U_LIB_SUFFIX_C_NAME) -# define U_HAVE_LIB_SUFFIX 1 -#endif - -/** - * \def U_LIB_SUFFIX_C_NAME_STRING - * Defines the library suffix as a string with C syntax. - * @internal - */ -#ifdef U_LIB_SUFFIX_C_NAME_STRING - /* Use the predefined value. */ -#elif defined(U_LIB_SUFFIX_C_NAME) -# define U_LIB_SUFFIX_C_NAME_STRING #U_LIB_SUFFIX_C_NAME -#else -# define U_LIB_SUFFIX_C_NAME_STRING "" -#endif - -/* common/i18n library switches --------------------------------------------- */ - -/** - * \def UCONFIG_ONLY_COLLATION - * This switch turns off modules that are not needed for collation. - * - * It does not turn off legacy conversion because that is necessary - * for ICU to work on EBCDIC platforms (for the default converter). - * If you want "only collation" and do not build for EBCDIC, - * then you can define UCONFIG_NO_LEGACY_CONVERSION 1 as well. - * - * @stable ICU 2.4 - */ -#ifndef UCONFIG_ONLY_COLLATION -# define UCONFIG_ONLY_COLLATION 0 -#endif - -#if UCONFIG_ONLY_COLLATION - /* common library */ -# define UCONFIG_NO_BREAK_ITERATION 1 -# define UCONFIG_NO_IDNA 1 - - /* i18n library */ -# if UCONFIG_NO_COLLATION -# error Contradictory collation switches in uconfig.h. -# endif -# define UCONFIG_NO_FORMATTING 1 -# define UCONFIG_NO_TRANSLITERATION 1 -# define UCONFIG_NO_REGULAR_EXPRESSIONS 1 -#endif - -/* common library switches -------------------------------------------------- */ - -/** - * \def UCONFIG_NO_FILE_IO - * This switch turns off all file access in the common library - * where file access is only used for data loading. - * ICU data must then be provided in the form of a data DLL (or with an - * equivalent way to link to the data residing in an executable, - * as in building a combined library with both the common library's code and - * the data), or via udata_setCommonData(). - * Application data must be provided via udata_setAppData() or by using - * "open" functions that take pointers to data, for example ucol_openBinary(). - * - * File access is not used at all in the i18n library. - * - * File access cannot be turned off for the icuio library or for the ICU - * test suites and ICU tools. - * - * @stable ICU 3.6 - */ -#ifndef UCONFIG_NO_FILE_IO -# define UCONFIG_NO_FILE_IO 0 -#endif - -/** - * \def UCONFIG_NO_CONVERSION - * ICU will not completely build with this switch turned on. - * This switch turns off all converters. - * - * You may want to use this together with U_CHARSET_IS_UTF8 defined to 1 - * in utypes.h if char* strings in your environment are always in UTF-8. - * - * @stable ICU 3.2 - * @see U_CHARSET_IS_UTF8 - */ -#ifndef UCONFIG_NO_CONVERSION -# define UCONFIG_NO_CONVERSION 0 -#endif - -#if UCONFIG_NO_CONVERSION -# define UCONFIG_NO_LEGACY_CONVERSION 1 -#endif - -/** - * \def UCONFIG_NO_LEGACY_CONVERSION - * This switch turns off all converters except for - * - Unicode charsets (UTF-7/8/16/32, CESU-8, SCSU, BOCU-1) - * - US-ASCII - * - ISO-8859-1 - * - * Turning off legacy conversion is not possible on EBCDIC platforms - * because they need ibm-37 or ibm-1047 default converters. - * - * @stable ICU 2.4 - */ -#ifndef UCONFIG_NO_LEGACY_CONVERSION -# define UCONFIG_NO_LEGACY_CONVERSION 0 -#endif - -/** - * \def UCONFIG_NO_NORMALIZATION - * This switch turns off normalization. - * It implies turning off several other services as well, for example - * collation and IDNA. - * - * @stable ICU 2.6 - */ -#ifndef UCONFIG_NO_NORMALIZATION -# define UCONFIG_NO_NORMALIZATION 0 -#elif UCONFIG_NO_NORMALIZATION - /* common library */ - /* ICU 50 CJK dictionary BreakIterator uses normalization */ -# define UCONFIG_NO_BREAK_ITERATION 1 - /* IDNA (UTS #46) is implemented via normalization */ -# define UCONFIG_NO_IDNA 1 - - /* i18n library */ -# if UCONFIG_ONLY_COLLATION -# error Contradictory collation switches in uconfig.h. -# endif -# define UCONFIG_NO_COLLATION 1 -# define UCONFIG_NO_TRANSLITERATION 1 -#endif - -/** - * \def UCONFIG_NO_BREAK_ITERATION - * This switch turns off break iteration. - * - * @stable ICU 2.4 - */ -#ifndef UCONFIG_NO_BREAK_ITERATION -# define UCONFIG_NO_BREAK_ITERATION 0 -#endif - -/** - * \def UCONFIG_NO_IDNA - * This switch turns off IDNA. - * - * @stable ICU 2.6 - */ -#ifndef UCONFIG_NO_IDNA -# define UCONFIG_NO_IDNA 0 -#endif - -/** - * \def UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE - * Determines the default UMessagePatternApostropheMode. - * See the documentation for that enum. - * - * @stable ICU 4.8 - */ -#ifndef UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE -# define UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE UMSGPAT_APOS_DOUBLE_OPTIONAL -#endif - -/* i18n library switches ---------------------------------------------------- */ - -/** - * \def UCONFIG_NO_COLLATION - * This switch turns off collation and collation-based string search. - * - * @stable ICU 2.4 - */ -#ifndef UCONFIG_NO_COLLATION -# define UCONFIG_NO_COLLATION 0 -#endif - -/** - * \def UCONFIG_NO_FORMATTING - * This switch turns off formatting and calendar/timezone services. - * - * @stable ICU 2.4 - */ -#ifndef UCONFIG_NO_FORMATTING -# define UCONFIG_NO_FORMATTING 0 -#endif - -/** - * \def UCONFIG_NO_TRANSLITERATION - * This switch turns off transliteration. - * - * @stable ICU 2.4 - */ -#ifndef UCONFIG_NO_TRANSLITERATION -# define UCONFIG_NO_TRANSLITERATION 0 -#endif - -/** - * \def UCONFIG_NO_REGULAR_EXPRESSIONS - * This switch turns off regular expressions. - * - * @stable ICU 2.4 - */ -#ifndef UCONFIG_NO_REGULAR_EXPRESSIONS -# define UCONFIG_NO_REGULAR_EXPRESSIONS 0 -#endif - -/** - * \def UCONFIG_NO_SERVICE - * This switch turns off service registration. - * - * @stable ICU 3.2 - */ -#ifndef UCONFIG_NO_SERVICE -# define UCONFIG_NO_SERVICE 1 -#endif - -/** - * \def UCONFIG_HAVE_PARSEALLINPUT - * This switch turns on the "parse all input" attribute. Binary incompatible. - * - * @internal - */ -#ifndef UCONFIG_HAVE_PARSEALLINPUT -# define UCONFIG_HAVE_PARSEALLINPUT 1 -#endif - - -/** - * \def UCONFIG_FORMAT_FASTPATHS_49 - * This switch turns on other formatting fastpaths. Binary incompatible in object DecimalFormat and DecimalFormatSymbols - * - * @internal - */ -#ifndef UCONFIG_FORMAT_FASTPATHS_49 -# define UCONFIG_FORMAT_FASTPATHS_49 1 -#endif - -#endif diff --git a/Source/WTF/icu/unicode/uenum.h b/Source/WTF/icu/unicode/uenum.h deleted file mode 100644 index 5408ec5a6..000000000 --- a/Source/WTF/icu/unicode/uenum.h +++ /dev/null @@ -1,206 +0,0 @@ -/* -******************************************************************************* -* -* Copyright (C) 2002-2013, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: uenum.h -* encoding: US-ASCII -* tab size: 8 (not used) -* indentation:2 -* -* created on: 2002jul08 -* created by: Vladimir Weinstein -*/ - -#ifndef __UENUM_H -#define __UENUM_H - -#include "unicode/utypes.h" -#include "unicode/localpointer.h" - -#if U_SHOW_CPLUSPLUS_API -#include "unicode/strenum.h" -#endif - -/** - * \file - * \brief C API: String Enumeration - */ - -/** - * An enumeration object. - * For usage in C programs. - * @stable ICU 2.2 - */ -struct UEnumeration; -/** structure representing an enumeration object instance @stable ICU 2.2 */ -typedef struct UEnumeration UEnumeration; - -/** - * Disposes of resources in use by the iterator. If en is NULL, - * does nothing. After this call, any char* or UChar* pointer - * returned by uenum_unext() or uenum_next() is invalid. - * @param en UEnumeration structure pointer - * @stable ICU 2.2 - */ -U_STABLE void U_EXPORT2 -uenum_close(UEnumeration* en); - -#if U_SHOW_CPLUSPLUS_API - -U_NAMESPACE_BEGIN - -/** - * \class LocalUEnumerationPointer - * "Smart pointer" class, closes a UEnumeration via uenum_close(). - * For most methods see the LocalPointerBase base class. - * - * @see LocalPointerBase - * @see LocalPointer - * @stable ICU 4.4 - */ -U_DEFINE_LOCAL_OPEN_POINTER(LocalUEnumerationPointer, UEnumeration, uenum_close); - -U_NAMESPACE_END - -#endif - -/** - * Returns the number of elements that the iterator traverses. If - * the iterator is out-of-sync with its service, status is set to - * U_ENUM_OUT_OF_SYNC_ERROR. - * This is a convenience function. It can end up being very - * expensive as all the items might have to be pre-fetched (depending - * on the type of data being traversed). Use with caution and only - * when necessary. - * @param en UEnumeration structure pointer - * @param status error code, can be U_ENUM_OUT_OF_SYNC_ERROR if the - * iterator is out of sync. - * @return number of elements in the iterator - * @stable ICU 2.2 - */ -U_STABLE int32_t U_EXPORT2 -uenum_count(UEnumeration* en, UErrorCode* status); - -/** - * Returns the next element in the iterator's list. If there are - * no more elements, returns NULL. If the iterator is out-of-sync - * with its service, status is set to U_ENUM_OUT_OF_SYNC_ERROR and - * NULL is returned. If the native service string is a char* string, - * it is converted to UChar* with the invariant converter. - * The result is terminated by (UChar)0. - * @param en the iterator object - * @param resultLength pointer to receive the length of the result - * (not including the terminating \\0). - * If the pointer is NULL it is ignored. - * @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if - * the iterator is out of sync with its service. - * @return a pointer to the string. The string will be - * zero-terminated. The return pointer is owned by this iterator - * and must not be deleted by the caller. The pointer is valid - * until the next call to any uenum_... method, including - * uenum_next() or uenum_unext(). When all strings have been - * traversed, returns NULL. - * @stable ICU 2.2 - */ -U_STABLE const UChar* U_EXPORT2 -uenum_unext(UEnumeration* en, - int32_t* resultLength, - UErrorCode* status); - -/** - * Returns the next element in the iterator's list. If there are - * no more elements, returns NULL. If the iterator is out-of-sync - * with its service, status is set to U_ENUM_OUT_OF_SYNC_ERROR and - * NULL is returned. If the native service string is a UChar* - * string, it is converted to char* with the invariant converter. - * The result is terminated by (char)0. If the conversion fails - * (because a character cannot be converted) then status is set to - * U_INVARIANT_CONVERSION_ERROR and the return value is undefined - * (but non-NULL). - * @param en the iterator object - * @param resultLength pointer to receive the length of the result - * (not including the terminating \\0). - * If the pointer is NULL it is ignored. - * @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if - * the iterator is out of sync with its service. Set to - * U_INVARIANT_CONVERSION_ERROR if the underlying native string is - * UChar* and conversion to char* with the invariant converter - * fails. This error pertains only to current string, so iteration - * might be able to continue successfully. - * @return a pointer to the string. The string will be - * zero-terminated. The return pointer is owned by this iterator - * and must not be deleted by the caller. The pointer is valid - * until the next call to any uenum_... method, including - * uenum_next() or uenum_unext(). When all strings have been - * traversed, returns NULL. - * @stable ICU 2.2 - */ -U_STABLE const char* U_EXPORT2 -uenum_next(UEnumeration* en, - int32_t* resultLength, - UErrorCode* status); - -/** - * Resets the iterator to the current list of service IDs. This - * re-establishes sync with the service and rewinds the iterator - * to start at the first element. - * @param en the iterator object - * @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if - * the iterator is out of sync with its service. - * @stable ICU 2.2 - */ -U_STABLE void U_EXPORT2 -uenum_reset(UEnumeration* en, UErrorCode* status); - -#if U_SHOW_CPLUSPLUS_API - -/** - * Given a StringEnumeration, wrap it in a UEnumeration. The - * StringEnumeration is adopted; after this call, the caller must not - * delete it (regardless of error status). - * @param adopted the C++ StringEnumeration to be wrapped in a UEnumeration. - * @param ec the error code. - * @return a UEnumeration wrapping the adopted StringEnumeration. - * @stable ICU 4.2 - */ -U_STABLE UEnumeration* U_EXPORT2 -uenum_openFromStringEnumeration(icu::StringEnumeration* adopted, UErrorCode* ec); - -#endif - -/** - * Given an array of const UChar* strings, return a UEnumeration. String pointers from 0..count-1 must not be null. - * Do not free or modify either the string array or the characters it points to until this object has been destroyed with uenum_close. - * \snippet test/cintltst/uenumtst.c uenum_openUCharStringsEnumeration - * @param strings array of const UChar* strings (each null terminated). All storage is owned by the caller. - * @param count length of the array - * @param ec error code - * @return the new UEnumeration object. Caller is responsible for calling uenum_close to free memory. - * @see uenum_close - * @stable ICU 50 - */ -U_STABLE UEnumeration* U_EXPORT2 -uenum_openUCharStringsEnumeration(const UChar* const strings[], int32_t count, - UErrorCode* ec); - -/* Note: next function is not hidden as draft, as it is used internally (it was formerly an internal function). */ - -/** - * Given an array of const char* strings (invariant chars only), return a UEnumeration. String pointers from 0..count-1 must not be null. - * Do not free or modify either the string array or the characters it points to until this object has been destroyed with uenum_close. - * \snippet test/cintltst/uenumtst.c uenum_openCharStringsEnumeration - * @param strings array of char* strings (each null terminated). All storage is owned by the caller. - * @param count length of the array - * @param ec error code - * @return the new UEnumeration object. Caller is responsible for calling uenum_close to free memory - * @see uenum_close - * @stable ICU 50 - */ -U_STABLE UEnumeration* U_EXPORT2 -uenum_openCharStringsEnumeration(const char* const strings[], int32_t count, - UErrorCode* ec); - -#endif diff --git a/Source/WTF/icu/unicode/uiter.h b/Source/WTF/icu/unicode/uiter.h deleted file mode 100644 index 0cdb8ffbe..000000000 --- a/Source/WTF/icu/unicode/uiter.h +++ /dev/null @@ -1,707 +0,0 @@ -/* -******************************************************************************* -* -* Copyright (C) 2002-2011 International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: uiter.h -* encoding: US-ASCII -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2002jan18 -* created by: Markus W. Scherer -*/ - -#ifndef __UITER_H__ -#define __UITER_H__ - -/** - * \file - * \brief C API: Unicode Character Iteration - * - * @see UCharIterator - */ - -#include "unicode/utypes.h" - -#if U_SHOW_CPLUSPLUS_API - U_NAMESPACE_BEGIN - - class CharacterIterator; - class Replaceable; - - U_NAMESPACE_END -#endif - -U_CDECL_BEGIN - -struct UCharIterator; -typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharIterator. @stable ICU 2.1 */ - -/** - * Origin constants for UCharIterator.getIndex() and UCharIterator.move(). - * @see UCharIteratorMove - * @see UCharIterator - * @stable ICU 2.1 - */ -typedef enum UCharIteratorOrigin { - UITER_START, UITER_CURRENT, UITER_LIMIT, UITER_ZERO, UITER_LENGTH -} UCharIteratorOrigin; - -/** Constants for UCharIterator. @stable ICU 2.6 */ -enum { - /** - * Constant value that may be returned by UCharIteratorMove - * indicating that the final UTF-16 index is not known, but that the move succeeded. - * This can occur when moving relative to limit or length, or - * when moving relative to the current index after a setState() - * when the current UTF-16 index is not known. - * - * It would be very inefficient to have to count from the beginning of the text - * just to get the current/limit/length index after moving relative to it. - * The actual index can be determined with getIndex(UITER_CURRENT) - * which will count the UChars if necessary. - * - * @stable ICU 2.6 - */ - UITER_UNKNOWN_INDEX=-2 -}; - - -/** - * Constant for UCharIterator getState() indicating an error or - * an unknown state. - * Returned by uiter_getState()/UCharIteratorGetState - * when an error occurs. - * Also, some UCharIterator implementations may not be able to return - * a valid state for each position. This will be clearly documented - * for each such iterator (none of the public ones here). - * - * @stable ICU 2.6 - */ -#define UITER_NO_STATE ((uint32_t)0xffffffff) - -/** - * Function type declaration for UCharIterator.getIndex(). - * - * Gets the current position, or the start or limit of the - * iteration range. - * - * This function may perform slowly for UITER_CURRENT after setState() was called, - * or for UITER_LENGTH, because an iterator implementation may have to count - * UChars if the underlying storage is not UTF-16. - * - * @param iter the UCharIterator structure ("this pointer") - * @param origin get the 0, start, limit, length, or current index - * @return the requested index, or U_SENTINEL in an error condition - * - * @see UCharIteratorOrigin - * @see UCharIterator - * @stable ICU 2.1 - */ -typedef int32_t U_CALLCONV -UCharIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin); - -/** - * Function type declaration for UCharIterator.move(). - * - * Use iter->move(iter, index, UITER_ZERO) like CharacterIterator::setIndex(index). - * - * Moves the current position relative to the start or limit of the - * iteration range, or relative to the current position itself. - * The movement is expressed in numbers of code units forward - * or backward by specifying a positive or negative delta. - * Out of bounds movement will be pinned to the start or limit. - * - * This function may perform slowly for moving relative to UITER_LENGTH - * because an iterator implementation may have to count the rest of the - * UChars if the native storage is not UTF-16. - * - * When moving relative to the limit or length, or - * relative to the current position after setState() was called, - * move() may return UITER_UNKNOWN_INDEX (-2) to avoid an inefficient - * determination of the actual UTF-16 index. - * The actual index can be determined with getIndex(UITER_CURRENT) - * which will count the UChars if necessary. - * See UITER_UNKNOWN_INDEX for details. - * - * @param iter the UCharIterator structure ("this pointer") - * @param delta can be positive, zero, or negative - * @param origin move relative to the 0, start, limit, length, or current index - * @return the new index, or U_SENTINEL on an error condition, - * or UITER_UNKNOWN_INDEX when the index is not known. - * - * @see UCharIteratorOrigin - * @see UCharIterator - * @see UITER_UNKNOWN_INDEX - * @stable ICU 2.1 - */ -typedef int32_t U_CALLCONV -UCharIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin); - -/** - * Function type declaration for UCharIterator.hasNext(). - * - * Check if current() and next() can still - * return another code unit. - * - * @param iter the UCharIterator structure ("this pointer") - * @return boolean value for whether current() and next() can still return another code unit - * - * @see UCharIterator - * @stable ICU 2.1 - */ -typedef UBool U_CALLCONV -UCharIteratorHasNext(UCharIterator *iter); - -/** - * Function type declaration for UCharIterator.hasPrevious(). - * - * Check if previous() can still return another code unit. - * - * @param iter the UCharIterator structure ("this pointer") - * @return boolean value for whether previous() can still return another code unit - * - * @see UCharIterator - * @stable ICU 2.1 - */ -typedef UBool U_CALLCONV -UCharIteratorHasPrevious(UCharIterator *iter); - -/** - * Function type declaration for UCharIterator.current(). - * - * Return the code unit at the current position, - * or U_SENTINEL if there is none (index is at the limit). - * - * @param iter the UCharIterator structure ("this pointer") - * @return the current code unit - * - * @see UCharIterator - * @stable ICU 2.1 - */ -typedef UChar32 U_CALLCONV -UCharIteratorCurrent(UCharIterator *iter); - -/** - * Function type declaration for UCharIterator.next(). - * - * Return the code unit at the current index and increment - * the index (post-increment, like s[i++]), - * or return U_SENTINEL if there is none (index is at the limit). - * - * @param iter the UCharIterator structure ("this pointer") - * @return the current code unit (and post-increment the current index) - * - * @see UCharIterator - * @stable ICU 2.1 - */ -typedef UChar32 U_CALLCONV -UCharIteratorNext(UCharIterator *iter); - -/** - * Function type declaration for UCharIterator.previous(). - * - * Decrement the index and return the code unit from there - * (pre-decrement, like s[--i]), - * or return U_SENTINEL if there is none (index is at the start). - * - * @param iter the UCharIterator structure ("this pointer") - * @return the previous code unit (after pre-decrementing the current index) - * - * @see UCharIterator - * @stable ICU 2.1 - */ -typedef UChar32 U_CALLCONV -UCharIteratorPrevious(UCharIterator *iter); - -/** - * Function type declaration for UCharIterator.reservedFn(). - * Reserved for future use. - * - * @param iter the UCharIterator structure ("this pointer") - * @param something some integer argument - * @return some integer - * - * @see UCharIterator - * @stable ICU 2.1 - */ -typedef int32_t U_CALLCONV -UCharIteratorReserved(UCharIterator *iter, int32_t something); - -/** - * Function type declaration for UCharIterator.getState(). - * - * Get the "state" of the iterator in the form of a single 32-bit word. - * It is recommended that the state value be calculated to be as small as - * is feasible. For strings with limited lengths, fewer than 32 bits may - * be sufficient. - * - * This is used together with setState()/UCharIteratorSetState - * to save and restore the iterator position more efficiently than with - * getIndex()/move(). - * - * The iterator state is defined as a uint32_t value because it is designed - * for use in ucol_nextSortKeyPart() which provides 32 bits to store the state - * of the character iterator. - * - * With some UCharIterator implementations (e.g., UTF-8), - * getting and setting the UTF-16 index with existing functions - * (getIndex(UITER_CURRENT) followed by move(pos, UITER_ZERO)) is possible but - * relatively slow because the iterator has to "walk" from a known index - * to the requested one. - * This takes more time the farther it needs to go. - * - * An opaque state value allows an iterator implementation to provide - * an internal index (UTF-8: the source byte array index) for - * fast, constant-time restoration. - * - * After calling setState(), a getIndex(UITER_CURRENT) may be slow because - * the UTF-16 index may not be restored as well, but the iterator can deliver - * the correct text contents and move relative to the current position - * without performance degradation. - * - * Some UCharIterator implementations may not be able to return - * a valid state for each position, in which case they return UITER_NO_STATE instead. - * This will be clearly documented for each such iterator (none of the public ones here). - * - * @param iter the UCharIterator structure ("this pointer") - * @return the state word - * - * @see UCharIterator - * @see UCharIteratorSetState - * @see UITER_NO_STATE - * @stable ICU 2.6 - */ -typedef uint32_t U_CALLCONV -UCharIteratorGetState(const UCharIterator *iter); - -/** - * Function type declaration for UCharIterator.setState(). - * - * Restore the "state" of the iterator using a state word from a getState() call. - * The iterator object need not be the same one as for which getState() was called, - * but it must be of the same type (set up using the same uiter_setXYZ function) - * and it must iterate over the same string - * (binary identical regardless of memory address). - * For more about the state word see UCharIteratorGetState. - * - * After calling setState(), a getIndex(UITER_CURRENT) may be slow because - * the UTF-16 index may not be restored as well, but the iterator can deliver - * the correct text contents and move relative to the current position - * without performance degradation. - * - * @param iter the UCharIterator structure ("this pointer") - * @param state the state word from a getState() call - * on a same-type, same-string iterator - * @param pErrorCode Must be a valid pointer to an error code value, - * which must not indicate a failure before the function call. - * - * @see UCharIterator - * @see UCharIteratorGetState - * @stable ICU 2.6 - */ -typedef void U_CALLCONV -UCharIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode); - - -/** - * C API for code unit iteration. - * This can be used as a C wrapper around - * CharacterIterator, Replaceable, or implemented using simple strings, etc. - * - * There are two roles for using UCharIterator: - * - * A "provider" sets the necessary function pointers and controls the "protected" - * fields of the UCharIterator structure. A "provider" passes a UCharIterator - * into C APIs that need a UCharIterator as an abstract, flexible string interface. - * - * Implementations of such C APIs are "callers" of UCharIterator functions; - * they only use the "public" function pointers and never access the "protected" - * fields directly. - * - * The current() and next() functions only check the current index against the - * limit, and previous() only checks the current index against the start, - * to see if the iterator already reached the end of the iteration range. - * - * The assumption - in all iterators - is that the index is moved via the API, - * which means it won't go out of bounds, or the index is modified by - * user code that knows enough about the iterator implementation to set valid - * index values. - * - * UCharIterator functions return code unit values 0..0xffff, - * or U_SENTINEL if the iteration bounds are reached. - * - * @stable ICU 2.1 - */ -struct UCharIterator { - /** - * (protected) Pointer to string or wrapped object or similar. - * Not used by caller. - * @stable ICU 2.1 - */ - const void *context; - - /** - * (protected) Length of string or similar. - * Not used by caller. - * @stable ICU 2.1 - */ - int32_t length; - - /** - * (protected) Start index or similar. - * Not used by caller. - * @stable ICU 2.1 - */ - int32_t start; - - /** - * (protected) Current index or similar. - * Not used by caller. - * @stable ICU 2.1 - */ - int32_t index; - - /** - * (protected) Limit index or similar. - * Not used by caller. - * @stable ICU 2.1 - */ - int32_t limit; - - /** - * (protected) Used by UTF-8 iterators and possibly others. - * @stable ICU 2.1 - */ - int32_t reservedField; - - /** - * (public) Returns the current position or the - * start or limit index of the iteration range. - * - * @see UCharIteratorGetIndex - * @stable ICU 2.1 - */ - UCharIteratorGetIndex *getIndex; - - /** - * (public) Moves the current position relative to the start or limit of the - * iteration range, or relative to the current position itself. - * The movement is expressed in numbers of code units forward - * or backward by specifying a positive or negative delta. - * - * @see UCharIteratorMove - * @stable ICU 2.1 - */ - UCharIteratorMove *move; - - /** - * (public) Check if current() and next() can still - * return another code unit. - * - * @see UCharIteratorHasNext - * @stable ICU 2.1 - */ - UCharIteratorHasNext *hasNext; - - /** - * (public) Check if previous() can still return another code unit. - * - * @see UCharIteratorHasPrevious - * @stable ICU 2.1 - */ - UCharIteratorHasPrevious *hasPrevious; - - /** - * (public) Return the code unit at the current position, - * or U_SENTINEL if there is none (index is at the limit). - * - * @see UCharIteratorCurrent - * @stable ICU 2.1 - */ - UCharIteratorCurrent *current; - - /** - * (public) Return the code unit at the current index and increment - * the index (post-increment, like s[i++]), - * or return U_SENTINEL if there is none (index is at the limit). - * - * @see UCharIteratorNext - * @stable ICU 2.1 - */ - UCharIteratorNext *next; - - /** - * (public) Decrement the index and return the code unit from there - * (pre-decrement, like s[--i]), - * or return U_SENTINEL if there is none (index is at the start). - * - * @see UCharIteratorPrevious - * @stable ICU 2.1 - */ - UCharIteratorPrevious *previous; - - /** - * (public) Reserved for future use. Currently NULL. - * - * @see UCharIteratorReserved - * @stable ICU 2.1 - */ - UCharIteratorReserved *reservedFn; - - /** - * (public) Return the state of the iterator, to be restored later with setState(). - * This function pointer is NULL if the iterator does not implement it. - * - * @see UCharIteratorGet - * @stable ICU 2.6 - */ - UCharIteratorGetState *getState; - - /** - * (public) Restore the iterator state from the state word from a call - * to getState(). - * This function pointer is NULL if the iterator does not implement it. - * - * @see UCharIteratorSet - * @stable ICU 2.6 - */ - UCharIteratorSetState *setState; -}; - -/** - * Helper function for UCharIterator to get the code point - * at the current index. - * - * Return the code point that includes the code unit at the current position, - * or U_SENTINEL if there is none (index is at the limit). - * If the current code unit is a lead or trail surrogate, - * then the following or preceding surrogate is used to form - * the code point value. - * - * @param iter the UCharIterator structure ("this pointer") - * @return the current code point - * - * @see UCharIterator - * @see U16_GET - * @see UnicodeString::char32At() - * @stable ICU 2.1 - */ -U_STABLE UChar32 U_EXPORT2 -uiter_current32(UCharIterator *iter); - -/** - * Helper function for UCharIterator to get the next code point. - * - * Return the code point at the current index and increment - * the index (post-increment, like s[i++]), - * or return U_SENTINEL if there is none (index is at the limit). - * - * @param iter the UCharIterator structure ("this pointer") - * @return the current code point (and post-increment the current index) - * - * @see UCharIterator - * @see U16_NEXT - * @stable ICU 2.1 - */ -U_STABLE UChar32 U_EXPORT2 -uiter_next32(UCharIterator *iter); - -/** - * Helper function for UCharIterator to get the previous code point. - * - * Decrement the index and return the code point from there - * (pre-decrement, like s[--i]), - * or return U_SENTINEL if there is none (index is at the start). - * - * @param iter the UCharIterator structure ("this pointer") - * @return the previous code point (after pre-decrementing the current index) - * - * @see UCharIterator - * @see U16_PREV - * @stable ICU 2.1 - */ -U_STABLE UChar32 U_EXPORT2 -uiter_previous32(UCharIterator *iter); - -/** - * Get the "state" of the iterator in the form of a single 32-bit word. - * This is a convenience function that calls iter->getState(iter) - * if iter->getState is not NULL; - * if it is NULL or any other error occurs, then UITER_NO_STATE is returned. - * - * Some UCharIterator implementations may not be able to return - * a valid state for each position, in which case they return UITER_NO_STATE instead. - * This will be clearly documented for each such iterator (none of the public ones here). - * - * @param iter the UCharIterator structure ("this pointer") - * @return the state word - * - * @see UCharIterator - * @see UCharIteratorGetState - * @see UITER_NO_STATE - * @stable ICU 2.6 - */ -U_STABLE uint32_t U_EXPORT2 -uiter_getState(const UCharIterator *iter); - -/** - * Restore the "state" of the iterator using a state word from a getState() call. - * This is a convenience function that calls iter->setState(iter, state, pErrorCode) - * if iter->setState is not NULL; if it is NULL, then U_UNSUPPORTED_ERROR is set. - * - * @param iter the UCharIterator structure ("this pointer") - * @param state the state word from a getState() call - * on a same-type, same-string iterator - * @param pErrorCode Must be a valid pointer to an error code value, - * which must not indicate a failure before the function call. - * - * @see UCharIterator - * @see UCharIteratorSetState - * @stable ICU 2.6 - */ -U_STABLE void U_EXPORT2 -uiter_setState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode); - -/** - * Set up a UCharIterator to iterate over a string. - * - * Sets the UCharIterator function pointers for iteration over the string s - * with iteration boundaries start=index=0 and length=limit=string length. - * The "provider" may set the start, index, and limit values at any time - * within the range 0..length. - * The length field will be ignored. - * - * The string pointer s is set into UCharIterator.context without copying - * or reallocating the string contents. - * - * getState() simply returns the current index. - * move() will always return the final index. - * - * @param iter UCharIterator structure to be set for iteration - * @param s String to iterate over - * @param length Length of s, or -1 if NUL-terminated - * - * @see UCharIterator - * @stable ICU 2.1 - */ -U_STABLE void U_EXPORT2 -uiter_setString(UCharIterator *iter, const UChar *s, int32_t length); - -/** - * Set up a UCharIterator to iterate over a UTF-16BE string - * (byte vector with a big-endian pair of bytes per UChar). - * - * Everything works just like with a normal UChar iterator (uiter_setString), - * except that UChars are assembled from byte pairs, - * and that the length argument here indicates an even number of bytes. - * - * getState() simply returns the current index. - * move() will always return the final index. - * - * @param iter UCharIterator structure to be set for iteration - * @param s UTF-16BE string to iterate over - * @param length Length of s as an even number of bytes, or -1 if NUL-terminated - * (NUL means pair of 0 bytes at even index from s) - * - * @see UCharIterator - * @see uiter_setString - * @stable ICU 2.6 - */ -U_STABLE void U_EXPORT2 -uiter_setUTF16BE(UCharIterator *iter, const char *s, int32_t length); - -/** - * Set up a UCharIterator to iterate over a UTF-8 string. - * - * Sets the UCharIterator function pointers for iteration over the UTF-8 string s - * with UTF-8 iteration boundaries 0 and length. - * The implementation counts the UTF-16 index on the fly and - * lazily evaluates the UTF-16 length of the text. - * - * The start field is used as the UTF-8 offset, the limit field as the UTF-8 length. - * When the reservedField is not 0, then it contains a supplementary code point - * and the UTF-16 index is between the two corresponding surrogates. - * At that point, the UTF-8 index is behind that code point. - * - * The UTF-8 string pointer s is set into UCharIterator.context without copying - * or reallocating the string contents. - * - * getState() returns a state value consisting of - * - the current UTF-8 source byte index (bits 31..1) - * - a flag (bit 0) that indicates whether the UChar position is in the middle - * of a surrogate pair - * (from a 4-byte UTF-8 sequence for the corresponding supplementary code point) - * - * getState() cannot also encode the UTF-16 index in the state value. - * move(relative to limit or length), or - * move(relative to current) after setState(), may return UITER_UNKNOWN_INDEX. - * - * @param iter UCharIterator structure to be set for iteration - * @param s UTF-8 string to iterate over - * @param length Length of s in bytes, or -1 if NUL-terminated - * - * @see UCharIterator - * @stable ICU 2.6 - */ -U_STABLE void U_EXPORT2 -uiter_setUTF8(UCharIterator *iter, const char *s, int32_t length); - -#if U_SHOW_CPLUSPLUS_API - -/** - * Set up a UCharIterator to wrap around a C++ CharacterIterator. - * - * Sets the UCharIterator function pointers for iteration using the - * CharacterIterator charIter. - * - * The CharacterIterator pointer charIter is set into UCharIterator.context - * without copying or cloning the CharacterIterator object. - * The other "protected" UCharIterator fields are set to 0 and will be ignored. - * The iteration index and boundaries are controlled by the CharacterIterator. - * - * getState() simply returns the current index. - * move() will always return the final index. - * - * @param iter UCharIterator structure to be set for iteration - * @param charIter CharacterIterator to wrap - * - * @see UCharIterator - * @stable ICU 2.1 - */ -U_STABLE void U_EXPORT2 -uiter_setCharacterIterator(UCharIterator *iter, icu::CharacterIterator *charIter); - -/** - * Set up a UCharIterator to iterate over a C++ Replaceable. - * - * Sets the UCharIterator function pointers for iteration over the - * Replaceable rep with iteration boundaries start=index=0 and - * length=limit=rep->length(). - * The "provider" may set the start, index, and limit values at any time - * within the range 0..length=rep->length(). - * The length field will be ignored. - * - * The Replaceable pointer rep is set into UCharIterator.context without copying - * or cloning/reallocating the Replaceable object. - * - * getState() simply returns the current index. - * move() will always return the final index. - * - * @param iter UCharIterator structure to be set for iteration - * @param rep Replaceable to iterate over - * - * @see UCharIterator - * @stable ICU 2.1 - */ -U_STABLE void U_EXPORT2 -uiter_setReplaceable(UCharIterator *iter, const icu::Replaceable *rep); - -#endif - -U_CDECL_END - -#endif diff --git a/Source/WTF/icu/unicode/uloc.h b/Source/WTF/icu/unicode/uloc.h deleted file mode 100644 index 28ab902b5..000000000 --- a/Source/WTF/icu/unicode/uloc.h +++ /dev/null @@ -1,1135 +0,0 @@ -/* -********************************************************************** -* Copyright (C) 1997-2013, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* -* File ULOC.H -* -* Modification History: -* -* Date Name Description -* 04/01/97 aliu Creation. -* 08/22/98 stephen JDK 1.2 sync. -* 12/08/98 rtg New C API for Locale -* 03/30/99 damiba overhaul -* 03/31/99 helena Javadoc for uloc functions. -* 04/15/99 Madhu Updated Javadoc -******************************************************************************** -*/ - -#ifndef ULOC_H -#define ULOC_H - -#include "unicode/utypes.h" -#include "unicode/uenum.h" - -/** - * \file - * \brief C API: Locale - * - * <h2> ULoc C API for Locale </h2> - * A <code>Locale</code> represents a specific geographical, political, - * or cultural region. An operation that requires a <code>Locale</code> to perform - * its task is called <em>locale-sensitive</em> and uses the <code>Locale</code> - * to tailor information for the user. For example, displaying a number - * is a locale-sensitive operation--the number should be formatted - * according to the customs/conventions of the user's native country, - * region, or culture. In the C APIs, a locales is simply a const char string. - * - * <P> - * You create a <code>Locale</code> with one of the three options listed below. - * Each of the component is separated by '_' in the locale string. - * \htmlonly<blockquote>\endhtmlonly - * <pre> - * \code - * newLanguage - * - * newLanguage + newCountry - * - * newLanguage + newCountry + newVariant - * \endcode - * </pre> - * \htmlonly</blockquote>\endhtmlonly - * The first option is a valid <STRONG>ISO - * Language Code.</STRONG> These codes are the lower-case two-letter - * codes as defined by ISO-639. - * You can find a full list of these codes at a number of sites, such as: - * <BR><a href ="http://www.ics.uci.edu/pub/ietf/http/related/iso639.txt"> - * http://www.ics.uci.edu/pub/ietf/http/related/iso639.txt</a> - * - * <P> - * The second option includes an additonal <STRONG>ISO Country - * Code.</STRONG> These codes are the upper-case two-letter codes - * as defined by ISO-3166. - * You can find a full list of these codes at a number of sites, such as: - * <BR><a href="http://www.chemie.fu-berlin.de/diverse/doc/ISO_3166.html"> - * http://www.chemie.fu-berlin.de/diverse/doc/ISO_3166.html</a> - * - * <P> - * The third option requires another additonal information--the - * <STRONG>Variant.</STRONG> - * The Variant codes are vendor and browser-specific. - * For example, use WIN for Windows, MAC for Macintosh, and POSIX for POSIX. - * Where there are two variants, separate them with an underscore, and - * put the most important one first. For - * example, a Traditional Spanish collation might be referenced, with - * "ES", "ES", "Traditional_WIN". - * - * <P> - * Because a <code>Locale</code> is just an identifier for a region, - * no validity check is performed when you specify a <code>Locale</code>. - * If you want to see whether particular resources are available for the - * <code>Locale</code> you asked for, you must query those resources. For - * example, ask the <code>UNumberFormat</code> for the locales it supports - * using its <code>getAvailable</code> method. - * <BR><STRONG>Note:</STRONG> When you ask for a resource for a particular - * locale, you get back the best available match, not necessarily - * precisely what you asked for. For more information, look at - * <code>UResourceBundle</code>. - * - * <P> - * The <code>Locale</code> provides a number of convenient constants - * that you can use to specify the commonly used - * locales. For example, the following refers to a locale - * for the United States: - * \htmlonly<blockquote>\endhtmlonly - * <pre> - * \code - * ULOC_US - * \endcode - * </pre> - * \htmlonly</blockquote>\endhtmlonly - * - * <P> - * Once you've specified a locale you can query it for information about - * itself. Use <code>uloc_getCountry</code> to get the ISO Country Code and - * <code>uloc_getLanguage</code> to get the ISO Language Code. You can - * use <code>uloc_getDisplayCountry</code> to get the - * name of the country suitable for displaying to the user. Similarly, - * you can use <code>uloc_getDisplayLanguage</code> to get the name of - * the language suitable for displaying to the user. Interestingly, - * the <code>uloc_getDisplayXXX</code> methods are themselves locale-sensitive - * and have two versions: one that uses the default locale and one - * that takes a locale as an argument and displays the name or country in - * a language appropriate to that locale. - * - * <P> - * The ICU provides a number of services that perform locale-sensitive - * operations. For example, the <code>unum_xxx</code> functions format - * numbers, currency, or percentages in a locale-sensitive manner. - * </P> - * \htmlonly<blockquote>\endhtmlonly - * <pre> - * \code - * UErrorCode success = U_ZERO_ERROR; - * UNumberFormat *nf; - * const char* myLocale = "fr_FR"; - * - * nf = unum_open( UNUM_DEFAULT, NULL, success ); - * unum_close(nf); - * nf = unum_open( UNUM_CURRENCY, NULL, success ); - * unum_close(nf); - * nf = unum_open( UNUM_PERCENT, NULL, success ); - * unum_close(nf); - * \endcode - * </pre> - * \htmlonly</blockquote>\endhtmlonly - * Each of these methods has two variants; one with an explicit locale - * and one without; the latter using the default locale. - * \htmlonly<blockquote>\endhtmlonly - * <pre> - * \code - * - * nf = unum_open( UNUM_DEFAULT, myLocale, success ); - * unum_close(nf); - * nf = unum_open( UNUM_CURRENCY, myLocale, success ); - * unum_close(nf); - * nf = unum_open( UNUM_PERCENT, myLocale, success ); - * unum_close(nf); - * \endcode - * </pre> - * \htmlonly</blockquote>\endhtmlonly - * A <code>Locale</code> is the mechanism for identifying the kind of services - * (<code>UNumberFormat</code>) that you would like to get. The locale is - * <STRONG>just</STRONG> a mechanism for identifying these services. - * - * <P> - * Each international serivce that performs locale-sensitive operations - * allows you - * to get all the available objects of that type. You can sift - * through these objects by language, country, or variant, - * and use the display names to present a menu to the user. - * For example, you can create a menu of all the collation objects - * suitable for a given language. Such classes implement these - * three class methods: - * \htmlonly<blockquote>\endhtmlonly - * <pre> - * \code - * const char* uloc_getAvailable(int32_t index); - * int32_t uloc_countAvailable(); - * int32_t - * uloc_getDisplayName(const char* localeID, - * const char* inLocaleID, - * UChar* result, - * int32_t maxResultSize, - * UErrorCode* err); - * - * \endcode - * </pre> - * \htmlonly</blockquote>\endhtmlonly - * <P> - * Concerning POSIX/RFC1766 Locale IDs, - * the getLanguage/getCountry/getVariant/getName functions do understand - * the POSIX type form of language_COUNTRY.ENCODING\@VARIANT - * and if there is not an ICU-stype variant, uloc_getVariant() for example - * will return the one listed after the \@at sign. As well, the hyphen - * "-" is recognized as a country/variant separator similarly to RFC1766. - * So for example, "en-us" will be interpreted as en_US. - * As a result, uloc_getName() is far from a no-op, and will have the - * effect of converting POSIX/RFC1766 IDs into ICU form, although it does - * NOT map any of the actual codes (i.e. russian->ru) in any way. - * Applications should call uloc_getName() at the point where a locale ID - * is coming from an external source (user entry, OS, web browser) - * and pass the resulting string to other ICU functions. For example, - * don't use de-de\@EURO as an argument to resourcebundle. - * - * @see UResourceBundle - */ - -/** Useful constant for this language. @stable ICU 2.0 */ -#define ULOC_CHINESE "zh" -/** Useful constant for this language. @stable ICU 2.0 */ -#define ULOC_ENGLISH "en" -/** Useful constant for this language. @stable ICU 2.0 */ -#define ULOC_FRENCH "fr" -/** Useful constant for this language. @stable ICU 2.0 */ -#define ULOC_GERMAN "de" -/** Useful constant for this language. @stable ICU 2.0 */ -#define ULOC_ITALIAN "it" -/** Useful constant for this language. @stable ICU 2.0 */ -#define ULOC_JAPANESE "ja" -/** Useful constant for this language. @stable ICU 2.0 */ -#define ULOC_KOREAN "ko" -/** Useful constant for this language. @stable ICU 2.0 */ -#define ULOC_SIMPLIFIED_CHINESE "zh_CN" -/** Useful constant for this language. @stable ICU 2.0 */ -#define ULOC_TRADITIONAL_CHINESE "zh_TW" - -/** Useful constant for this country/region. @stable ICU 2.0 */ -#define ULOC_CANADA "en_CA" -/** Useful constant for this country/region. @stable ICU 2.0 */ -#define ULOC_CANADA_FRENCH "fr_CA" -/** Useful constant for this country/region. @stable ICU 2.0 */ -#define ULOC_CHINA "zh_CN" -/** Useful constant for this country/region. @stable ICU 2.0 */ -#define ULOC_PRC "zh_CN" -/** Useful constant for this country/region. @stable ICU 2.0 */ -#define ULOC_FRANCE "fr_FR" -/** Useful constant for this country/region. @stable ICU 2.0 */ -#define ULOC_GERMANY "de_DE" -/** Useful constant for this country/region. @stable ICU 2.0 */ -#define ULOC_ITALY "it_IT" -/** Useful constant for this country/region. @stable ICU 2.0 */ -#define ULOC_JAPAN "ja_JP" -/** Useful constant for this country/region. @stable ICU 2.0 */ -#define ULOC_KOREA "ko_KR" -/** Useful constant for this country/region. @stable ICU 2.0 */ -#define ULOC_TAIWAN "zh_TW" -/** Useful constant for this country/region. @stable ICU 2.0 */ -#define ULOC_UK "en_GB" -/** Useful constant for this country/region. @stable ICU 2.0 */ -#define ULOC_US "en_US" - -/** - * Useful constant for the maximum size of the language part of a locale ID. - * (including the terminating NULL). - * @stable ICU 2.0 - */ -#define ULOC_LANG_CAPACITY 12 - -/** - * Useful constant for the maximum size of the country part of a locale ID - * (including the terminating NULL). - * @stable ICU 2.0 - */ -#define ULOC_COUNTRY_CAPACITY 4 -/** - * Useful constant for the maximum size of the whole locale ID - * (including the terminating NULL and all keywords). - * @stable ICU 2.0 - */ -#define ULOC_FULLNAME_CAPACITY 157 - -/** - * Useful constant for the maximum size of the script part of a locale ID - * (including the terminating NULL). - * @stable ICU 2.8 - */ -#define ULOC_SCRIPT_CAPACITY 6 - -/** - * Useful constant for the maximum size of keywords in a locale - * @stable ICU 2.8 - */ -#define ULOC_KEYWORDS_CAPACITY 50 - -/** - * Useful constant for the maximum total size of keywords and their values in a locale - * @stable ICU 2.8 - */ -#define ULOC_KEYWORD_AND_VALUES_CAPACITY 100 - -/** - * Invariant character separating keywords from the locale string - * @stable ICU 2.8 - */ -#define ULOC_KEYWORD_SEPARATOR '@' - -/** - * Unicode code point for '@' separating keywords from the locale string. - * @see ULOC_KEYWORD_SEPARATOR - * @stable ICU 4.6 - */ -#define ULOC_KEYWORD_SEPARATOR_UNICODE 0x40 - -/** - * Invariant character for assigning value to a keyword - * @stable ICU 2.8 - */ -#define ULOC_KEYWORD_ASSIGN '=' - -/** - * Unicode code point for '=' for assigning value to a keyword. - * @see ULOC_KEYWORD_ASSIGN - * @stable ICU 4.6 - */ -#define ULOC_KEYWORD_ASSIGN_UNICODE 0x3D - -/** - * Invariant character separating keywords - * @stable ICU 2.8 - */ -#define ULOC_KEYWORD_ITEM_SEPARATOR ';' - -/** - * Unicode code point for ';' separating keywords - * @see ULOC_KEYWORD_ITEM_SEPARATOR - * @stable ICU 4.6 - */ -#define ULOC_KEYWORD_ITEM_SEPARATOR_UNICODE 0x3B - -/** - * Constants for *_getLocale() - * Allow user to select whether she wants information on - * requested, valid or actual locale. - * For example, a collator for "en_US_CALIFORNIA" was - * requested. In the current state of ICU (2.0), - * the requested locale is "en_US_CALIFORNIA", - * the valid locale is "en_US" (most specific locale supported by ICU) - * and the actual locale is "root" (the collation data comes unmodified - * from the UCA) - * The locale is considered supported by ICU if there is a core ICU bundle - * for that locale (although it may be empty). - * @stable ICU 2.1 - */ -typedef enum { - /** This is locale the data actually comes from - * @stable ICU 2.1 - */ - ULOC_ACTUAL_LOCALE = 0, - /** This is the most specific locale supported by ICU - * @stable ICU 2.1 - */ - ULOC_VALID_LOCALE = 1, - -#ifndef U_HIDE_DEPRECATED_API - /** This is the requested locale - * @deprecated ICU 2.8 - */ - ULOC_REQUESTED_LOCALE = 2, -#endif /* U_HIDE_DEPRECATED_API */ - - ULOC_DATA_LOCALE_TYPE_LIMIT = 3 -} ULocDataLocaleType ; - -#ifndef U_HIDE_SYSTEM_API -/** - * Gets ICU's default locale. - * The returned string is a snapshot in time, and will remain valid - * and unchanged even when uloc_setDefault() is called. - * The returned storage is owned by ICU, and must not be altered or deleted - * by the caller. - * - * @return the ICU default locale - * @system - * @stable ICU 2.0 - */ -U_STABLE const char* U_EXPORT2 -uloc_getDefault(void); - -/** - * Sets ICU's default locale. - * By default (without calling this function), ICU's default locale will be based - * on information obtained from the underlying system environment. - * <p> - * Changes to ICU's default locale do not propagate back to the - * system environment. - * <p> - * Changes to ICU's default locale to not affect any ICU services that - * may already be open based on the previous default locale value. - * - * @param localeID the new ICU default locale. A value of NULL will try to get - * the system's default locale. - * @param status the error information if the setting of default locale fails - * @system - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -uloc_setDefault(const char* localeID, - UErrorCode* status); -#endif /* U_HIDE_SYSTEM_API */ - -/** - * Gets the language code for the specified locale. - * - * @param localeID the locale to get the ISO language code with - * @param language the language code for localeID - * @param languageCapacity the size of the language buffer to store the - * language code with - * @param err error information if retrieving the language code failed - * @return the actual buffer size needed for the language code. If it's greater - * than languageCapacity, the returned language code will be truncated. - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -uloc_getLanguage(const char* localeID, - char* language, - int32_t languageCapacity, - UErrorCode* err); - -/** - * Gets the script code for the specified locale. - * - * @param localeID the locale to get the ISO language code with - * @param script the language code for localeID - * @param scriptCapacity the size of the language buffer to store the - * language code with - * @param err error information if retrieving the language code failed - * @return the actual buffer size needed for the language code. If it's greater - * than scriptCapacity, the returned language code will be truncated. - * @stable ICU 2.8 - */ -U_STABLE int32_t U_EXPORT2 -uloc_getScript(const char* localeID, - char* script, - int32_t scriptCapacity, - UErrorCode* err); - -/** - * Gets the country code for the specified locale. - * - * @param localeID the locale to get the country code with - * @param country the country code for localeID - * @param countryCapacity the size of the country buffer to store the - * country code with - * @param err error information if retrieving the country code failed - * @return the actual buffer size needed for the country code. If it's greater - * than countryCapacity, the returned country code will be truncated. - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -uloc_getCountry(const char* localeID, - char* country, - int32_t countryCapacity, - UErrorCode* err); - -/** - * Gets the variant code for the specified locale. - * - * @param localeID the locale to get the variant code with - * @param variant the variant code for localeID - * @param variantCapacity the size of the variant buffer to store the - * variant code with - * @param err error information if retrieving the variant code failed - * @return the actual buffer size needed for the variant code. If it's greater - * than variantCapacity, the returned variant code will be truncated. - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -uloc_getVariant(const char* localeID, - char* variant, - int32_t variantCapacity, - UErrorCode* err); - - -/** - * Gets the full name for the specified locale. - * Note: This has the effect of 'canonicalizing' the ICU locale ID to - * a certain extent. Upper and lower case are set as needed. - * It does NOT map aliased names in any way. - * See the top of this header file. - * This API supports preflighting. - * - * @param localeID the locale to get the full name with - * @param name fill in buffer for the name without keywords. - * @param nameCapacity capacity of the fill in buffer. - * @param err error information if retrieving the full name failed - * @return the actual buffer size needed for the full name. If it's greater - * than nameCapacity, the returned full name will be truncated. - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -uloc_getName(const char* localeID, - char* name, - int32_t nameCapacity, - UErrorCode* err); - -/** - * Gets the full name for the specified locale. - * Note: This has the effect of 'canonicalizing' the string to - * a certain extent. Upper and lower case are set as needed, - * and if the components were in 'POSIX' format they are changed to - * ICU format. It does NOT map aliased names in any way. - * See the top of this header file. - * - * @param localeID the locale to get the full name with - * @param name the full name for localeID - * @param nameCapacity the size of the name buffer to store the - * full name with - * @param err error information if retrieving the full name failed - * @return the actual buffer size needed for the full name. If it's greater - * than nameCapacity, the returned full name will be truncated. - * @stable ICU 2.8 - */ -U_STABLE int32_t U_EXPORT2 -uloc_canonicalize(const char* localeID, - char* name, - int32_t nameCapacity, - UErrorCode* err); - -/** - * Gets the ISO language code for the specified locale. - * - * @param localeID the locale to get the ISO language code with - * @return language the ISO language code for localeID - * @stable ICU 2.0 - */ -U_STABLE const char* U_EXPORT2 -uloc_getISO3Language(const char* localeID); - - -/** - * Gets the ISO country code for the specified locale. - * - * @param localeID the locale to get the ISO country code with - * @return country the ISO country code for localeID - * @stable ICU 2.0 - */ -U_STABLE const char* U_EXPORT2 -uloc_getISO3Country(const char* localeID); - -/** - * Gets the Win32 LCID value for the specified locale. - * If the ICU locale is not recognized by Windows, 0 will be returned. - * - * @param localeID the locale to get the Win32 LCID value with - * @return country the Win32 LCID for localeID - * @stable ICU 2.0 - */ -U_STABLE uint32_t U_EXPORT2 -uloc_getLCID(const char* localeID); - -/** - * Gets the language name suitable for display for the specified locale. - * - * @param locale the locale to get the ISO language code with - * @param displayLocale Specifies the locale to be used to display the name. In other words, - * if the locale's language code is "en", passing Locale::getFrench() for - * inLocale would result in "Anglais", while passing Locale::getGerman() - * for inLocale would result in "Englisch". - * @param language the displayable language code for localeID - * @param languageCapacity the size of the language buffer to store the - * displayable language code with - * @param status error information if retrieving the displayable language code failed - * @return the actual buffer size needed for the displayable language code. If it's greater - * than languageCapacity, the returned language code will be truncated. - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -uloc_getDisplayLanguage(const char* locale, - const char* displayLocale, - UChar* language, - int32_t languageCapacity, - UErrorCode* status); - -/** - * Gets the script name suitable for display for the specified locale. - * - * @param locale the locale to get the displayable script code with. NULL may be used to specify the default. - * @param displayLocale Specifies the locale to be used to display the name. In other words, - * if the locale's language code is "en", passing Locale::getFrench() for - * inLocale would result in "", while passing Locale::getGerman() - * for inLocale would result in "". NULL may be used to specify the default. - * @param script the displayable country code for localeID - * @param scriptCapacity the size of the script buffer to store the - * displayable script code with - * @param status error information if retrieving the displayable script code failed - * @return the actual buffer size needed for the displayable script code. If it's greater - * than scriptCapacity, the returned displayable script code will be truncated. - * @stable ICU 2.8 - */ -U_STABLE int32_t U_EXPORT2 -uloc_getDisplayScript(const char* locale, - const char* displayLocale, - UChar* script, - int32_t scriptCapacity, - UErrorCode* status); - -/** - * Gets the country name suitable for display for the specified locale. - * - * @param locale the locale to get the displayable country code with. NULL may be used to specify the default. - * @param displayLocale Specifies the locale to be used to display the name. In other words, - * if the locale's language code is "en", passing Locale::getFrench() for - * inLocale would result in "Anglais", while passing Locale::getGerman() - * for inLocale would result in "Englisch". NULL may be used to specify the default. - * @param country the displayable country code for localeID - * @param countryCapacity the size of the country buffer to store the - * displayable country code with - * @param status error information if retrieving the displayable country code failed - * @return the actual buffer size needed for the displayable country code. If it's greater - * than countryCapacity, the returned displayable country code will be truncated. - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -uloc_getDisplayCountry(const char* locale, - const char* displayLocale, - UChar* country, - int32_t countryCapacity, - UErrorCode* status); - - -/** - * Gets the variant name suitable for display for the specified locale. - * - * @param locale the locale to get the displayable variant code with. NULL may be used to specify the default. - * @param displayLocale Specifies the locale to be used to display the name. In other words, - * if the locale's language code is "en", passing Locale::getFrench() for - * inLocale would result in "Anglais", while passing Locale::getGerman() - * for inLocale would result in "Englisch". NULL may be used to specify the default. - * @param variant the displayable variant code for localeID - * @param variantCapacity the size of the variant buffer to store the - * displayable variant code with - * @param status error information if retrieving the displayable variant code failed - * @return the actual buffer size needed for the displayable variant code. If it's greater - * than variantCapacity, the returned displayable variant code will be truncated. - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -uloc_getDisplayVariant(const char* locale, - const char* displayLocale, - UChar* variant, - int32_t variantCapacity, - UErrorCode* status); - -/** - * Gets the keyword name suitable for display for the specified locale. - * E.g: for the locale string de_DE\@collation=PHONEBOOK, this API gets the display - * string for the keyword collation. - * Usage: - * <code> - * UErrorCode status = U_ZERO_ERROR; - * const char* keyword =NULL; - * int32_t keywordLen = 0; - * int32_t keywordCount = 0; - * UChar displayKeyword[256]; - * int32_t displayKeywordLen = 0; - * UEnumeration* keywordEnum = uloc_openKeywords("de_DE@collation=PHONEBOOK;calendar=TRADITIONAL", &status); - * for(keywordCount = uenum_count(keywordEnum, &status); keywordCount > 0 ; keywordCount--){ - * if(U_FAILURE(status)){ - * ...something went wrong so handle the error... - * break; - * } - * // the uenum_next returns NUL terminated string - * keyword = uenum_next(keywordEnum, &keywordLen, &status); - * displayKeywordLen = uloc_getDisplayKeyword(keyword, "en_US", displayKeyword, 256); - * ... do something interesting ..... - * } - * uenum_close(keywordEnum); - * </code> - * @param keyword The keyword whose display string needs to be returned. - * @param displayLocale Specifies the locale to be used to display the name. In other words, - * if the locale's language code is "en", passing Locale::getFrench() for - * inLocale would result in "Anglais", while passing Locale::getGerman() - * for inLocale would result in "Englisch". NULL may be used to specify the default. - * @param dest the buffer to which the displayable keyword should be written. - * @param destCapacity The size of the buffer (number of UChars). If it is 0, then - * dest may be NULL and the function will only return the length of the - * result without writing any of the result string (pre-flighting). - * @param status error information if retrieving the displayable string failed. - * Should not be NULL and should not indicate failure on entry. - * @return the actual buffer size needed for the displayable variant code. - * @see #uloc_openKeywords - * @stable ICU 2.8 - */ -U_STABLE int32_t U_EXPORT2 -uloc_getDisplayKeyword(const char* keyword, - const char* displayLocale, - UChar* dest, - int32_t destCapacity, - UErrorCode* status); -/** - * Gets the value of the keyword suitable for display for the specified locale. - * E.g: for the locale string de_DE\@collation=PHONEBOOK, this API gets the display - * string for PHONEBOOK, in the display locale, when "collation" is specified as the keyword. - * - * @param locale The locale to get the displayable variant code with. NULL may be used to specify the default. - * @param keyword The keyword for whose value should be used. - * @param displayLocale Specifies the locale to be used to display the name. In other words, - * if the locale's language code is "en", passing Locale::getFrench() for - * inLocale would result in "Anglais", while passing Locale::getGerman() - * for inLocale would result in "Englisch". NULL may be used to specify the default. - * @param dest the buffer to which the displayable keyword should be written. - * @param destCapacity The size of the buffer (number of UChars). If it is 0, then - * dest may be NULL and the function will only return the length of the - * result without writing any of the result string (pre-flighting). - * @param status error information if retrieving the displayable string failed. - * Should not be NULL and must not indicate failure on entry. - * @return the actual buffer size needed for the displayable variant code. - * @stable ICU 2.8 - */ -U_STABLE int32_t U_EXPORT2 -uloc_getDisplayKeywordValue( const char* locale, - const char* keyword, - const char* displayLocale, - UChar* dest, - int32_t destCapacity, - UErrorCode* status); -/** - * Gets the full name suitable for display for the specified locale. - * - * @param localeID the locale to get the displayable name with. NULL may be used to specify the default. - * @param inLocaleID Specifies the locale to be used to display the name. In other words, - * if the locale's language code is "en", passing Locale::getFrench() for - * inLocale would result in "Anglais", while passing Locale::getGerman() - * for inLocale would result in "Englisch". NULL may be used to specify the default. - * @param result the displayable name for localeID - * @param maxResultSize the size of the name buffer to store the - * displayable full name with - * @param err error information if retrieving the displayable name failed - * @return the actual buffer size needed for the displayable name. If it's greater - * than maxResultSize, the returned displayable name will be truncated. - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -uloc_getDisplayName(const char* localeID, - const char* inLocaleID, - UChar* result, - int32_t maxResultSize, - UErrorCode* err); - - -/** - * Gets the specified locale from a list of all available locales. - * The return value is a pointer to an item of - * a locale name array. Both this array and the pointers - * it contains are owned by ICU and should not be deleted or written through - * by the caller. The locale name is terminated by a null pointer. - * @param n the specific locale name index of the available locale list - * @return a specified locale name of all available locales - * @stable ICU 2.0 - */ -U_STABLE const char* U_EXPORT2 -uloc_getAvailable(int32_t n); - -/** - * Gets the size of the all available locale list. - * - * @return the size of the locale list - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 uloc_countAvailable(void); - -/** - * - * Gets a list of all available 2-letter language codes defined in ISO 639, - * plus additional 3-letter codes determined to be useful for locale generation as - * defined by Unicode CLDR. This is a pointer - * to an array of pointers to arrays of char. All of these pointers are owned - * by ICU-- do not delete them, and do not write through them. The array is - * terminated with a null pointer. - * @return a list of all available language codes - * @stable ICU 2.0 - */ -U_STABLE const char* const* U_EXPORT2 -uloc_getISOLanguages(void); - -/** - * - * Gets a list of all available 2-letter country codes defined in ISO 639. This is a - * pointer to an array of pointers to arrays of char. All of these pointers are - * owned by ICU-- do not delete them, and do not write through them. The array is - * terminated with a null pointer. - * @return a list of all available country codes - * @stable ICU 2.0 - */ -U_STABLE const char* const* U_EXPORT2 -uloc_getISOCountries(void); - -/** - * Truncate the locale ID string to get the parent locale ID. - * Copies the part of the string before the last underscore. - * The parent locale ID will be an empty string if there is no - * underscore, or if there is only one underscore at localeID[0]. - * - * @param localeID Input locale ID string. - * @param parent Output string buffer for the parent locale ID. - * @param parentCapacity Size of the output buffer. - * @param err A UErrorCode value. - * @return The length of the parent locale ID. - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -uloc_getParent(const char* localeID, - char* parent, - int32_t parentCapacity, - UErrorCode* err); - - - - -/** - * Gets the full name for the specified locale, like uloc_getName(), - * but without keywords. - * - * Note: This has the effect of 'canonicalizing' the string to - * a certain extent. Upper and lower case are set as needed, - * and if the components were in 'POSIX' format they are changed to - * ICU format. It does NOT map aliased names in any way. - * See the top of this header file. - * - * This API strips off the keyword part, so "de_DE\@collation=phonebook" - * will become "de_DE". - * This API supports preflighting. - * - * @param localeID the locale to get the full name with - * @param name fill in buffer for the name without keywords. - * @param nameCapacity capacity of the fill in buffer. - * @param err error information if retrieving the full name failed - * @return the actual buffer size needed for the full name. If it's greater - * than nameCapacity, the returned full name will be truncated. - * @stable ICU 2.8 - */ -U_STABLE int32_t U_EXPORT2 -uloc_getBaseName(const char* localeID, - char* name, - int32_t nameCapacity, - UErrorCode* err); - -/** - * Gets an enumeration of keywords for the specified locale. Enumeration - * must get disposed of by the client using uenum_close function. - * - * @param localeID the locale to get the variant code with - * @param status error information if retrieving the keywords failed - * @return enumeration of keywords or NULL if there are no keywords. - * @stable ICU 2.8 - */ -U_STABLE UEnumeration* U_EXPORT2 -uloc_openKeywords(const char* localeID, - UErrorCode* status); - -/** - * Get the value for a keyword. Locale name does not need to be normalized. - * - * @param localeID locale name containing the keyword ("de_DE@currency=EURO;collation=PHONEBOOK") - * @param keywordName name of the keyword for which we want the value. Case insensitive. - * @param buffer receiving buffer - * @param bufferCapacity capacity of receiving buffer - * @param status containing error code - buffer not big enough. - * @return the length of keyword value - * @stable ICU 2.8 - */ -U_STABLE int32_t U_EXPORT2 -uloc_getKeywordValue(const char* localeID, - const char* keywordName, - char* buffer, int32_t bufferCapacity, - UErrorCode* status); - - -/** - * Sets or removes the value of the specified keyword. - * - * For removing all keywords, use uloc_getBaseName(). - * - * NOTE: Unlike almost every other ICU function which takes a - * buffer, this function will NOT truncate the output text. If a - * BUFFER_OVERFLOW_ERROR is received, it means that the original - * buffer is untouched. This is done to prevent incorrect or possibly - * even malformed locales from being generated and used. - * - * @param keywordName name of the keyword to be set. Case insensitive. - * @param keywordValue value of the keyword to be set. If 0-length or - * NULL, will result in the keyword being removed. No error is given if - * that keyword does not exist. - * @param buffer input buffer containing locale to be modified. - * @param bufferCapacity capacity of receiving buffer - * @param status containing error code - buffer not big enough. - * @return the length needed for the buffer - * @see uloc_getKeywordValue - * @stable ICU 3.2 - */ -U_STABLE int32_t U_EXPORT2 -uloc_setKeywordValue(const char* keywordName, - const char* keywordValue, - char* buffer, int32_t bufferCapacity, - UErrorCode* status); - -/** - * enums for the return value for the character and line orientation - * functions. - * @stable ICU 4.0 - */ -typedef enum { - ULOC_LAYOUT_LTR = 0, /* left-to-right. */ - ULOC_LAYOUT_RTL = 1, /* right-to-left. */ - ULOC_LAYOUT_TTB = 2, /* top-to-bottom. */ - ULOC_LAYOUT_BTT = 3, /* bottom-to-top. */ - ULOC_LAYOUT_UNKNOWN -} ULayoutType; - -/** - * Get the layout character orientation for the specified locale. - * - * @param localeId locale name - * @param status Error status - * @return an enum indicating the layout orientation for characters. - * @stable ICU 4.0 - */ -U_STABLE ULayoutType U_EXPORT2 -uloc_getCharacterOrientation(const char* localeId, - UErrorCode *status); - -/** - * Get the layout line orientation for the specified locale. - * - * @param localeId locale name - * @param status Error status - * @return an enum indicating the layout orientation for lines. - * @stable ICU 4.0 - */ -U_STABLE ULayoutType U_EXPORT2 -uloc_getLineOrientation(const char* localeId, - UErrorCode *status); - -/** - * enums for the 'outResult' parameter return value - * @see uloc_acceptLanguageFromHTTP - * @see uloc_acceptLanguage - * @stable ICU 3.2 - */ -typedef enum { - ULOC_ACCEPT_FAILED = 0, /* No exact match was found. */ - ULOC_ACCEPT_VALID = 1, /* An exact match was found. */ - ULOC_ACCEPT_FALLBACK = 2 /* A fallback was found, for example, - Accept list contained 'ja_JP' - which matched available locale 'ja'. */ -} UAcceptResult; - - -/** - * Based on a HTTP header from a web browser and a list of available locales, - * determine an acceptable locale for the user. - * @param result - buffer to accept the result locale - * @param resultAvailable the size of the result buffer. - * @param outResult - An out parameter that contains the fallback status - * @param httpAcceptLanguage - "Accept-Language:" header as per HTTP. - * @param availableLocales - list of available locales to match - * @param status Error status, may be BUFFER_OVERFLOW_ERROR - * @return length needed for the locale. - * @stable ICU 3.2 - */ -U_STABLE int32_t U_EXPORT2 -uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, - UAcceptResult *outResult, - const char *httpAcceptLanguage, - UEnumeration* availableLocales, - UErrorCode *status); - -/** - * Based on a list of available locales, - * determine an acceptable locale for the user. - * @param result - buffer to accept the result locale - * @param resultAvailable the size of the result buffer. - * @param outResult - An out parameter that contains the fallback status - * @param acceptList - list of acceptable languages - * @param acceptListCount - count of acceptList items - * @param availableLocales - list of available locales to match - * @param status Error status, may be BUFFER_OVERFLOW_ERROR - * @return length needed for the locale. - * @stable ICU 3.2 - */ -U_STABLE int32_t U_EXPORT2 -uloc_acceptLanguage(char *result, int32_t resultAvailable, - UAcceptResult *outResult, const char **acceptList, - int32_t acceptListCount, - UEnumeration* availableLocales, - UErrorCode *status); - - -/** - * Gets the ICU locale ID for the specified Win32 LCID value. - * - * @param hostID the Win32 LCID to translate - * @param locale the output buffer for the ICU locale ID, which will be NUL-terminated - * if there is room. - * @param localeCapacity the size of the output buffer - * @param status an error is returned if the LCID is unrecognized or the output buffer - * is too small - * @return actual the actual size of the locale ID, not including NUL-termination - * @stable ICU 3.8 - */ -U_STABLE int32_t U_EXPORT2 -uloc_getLocaleForLCID(uint32_t hostID, char *locale, int32_t localeCapacity, - UErrorCode *status); - - -/** - * Add the likely subtags for a provided locale ID, per the algorithm described - * in the following CLDR technical report: - * - * http://www.unicode.org/reports/tr35/#Likely_Subtags - * - * If localeID is already in the maximal form, or there is no data available - * for maximization, it will be copied to the output buffer. For example, - * "und-Zzzz" cannot be maximized, since there is no reasonable maximization. - * - * Examples: - * - * "en" maximizes to "en_Latn_US" - * - * "de" maximizes to "de_Latn_US" - * - * "sr" maximizes to "sr_Cyrl_RS" - * - * "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.) - * - * "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.) - * - * @param localeID The locale to maximize - * @param maximizedLocaleID The maximized locale - * @param maximizedLocaleIDCapacity The capacity of the maximizedLocaleID buffer - * @param err Error information if maximizing the locale failed. If the length - * of the localeID and the null-terminator is greater than the maximum allowed size, - * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR. - * @return The actual buffer size needed for the maximized locale. If it's - * greater than maximizedLocaleIDCapacity, the returned ID will be truncated. - * On error, the return value is -1. - * @stable ICU 4.0 - */ -U_STABLE int32_t U_EXPORT2 -uloc_addLikelySubtags(const char* localeID, - char* maximizedLocaleID, - int32_t maximizedLocaleIDCapacity, - UErrorCode* err); - - -/** - * Minimize the subtags for a provided locale ID, per the algorithm described - * in the following CLDR technical report: - * - * http://www.unicode.org/reports/tr35/#Likely_Subtags - * - * If localeID is already in the minimal form, or there is no data available - * for minimization, it will be copied to the output buffer. Since the - * minimization algorithm relies on proper maximization, see the comments - * for uloc_addLikelySubtags for reasons why there might not be any data. - * - * Examples: - * - * "en_Latn_US" minimizes to "en" - * - * "de_Latn_US" minimizes to "de" - * - * "sr_Cyrl_RS" minimizes to "sr" - * - * "zh_Hant_TW" minimizes to "zh_TW" (The region is preferred to the - * script, and minimizing to "zh" would imply "zh_Hans_CN".) - * - * @param localeID The locale to minimize - * @param minimizedLocaleID The minimized locale - * @param minimizedLocaleIDCapacity The capacity of the minimizedLocaleID buffer - * @param err Error information if minimizing the locale failed. If the length - * of the localeID and the null-terminator is greater than the maximum allowed size, - * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR. - * @return The actual buffer size needed for the minimized locale. If it's - * greater than minimizedLocaleIDCapacity, the returned ID will be truncated. - * On error, the return value is -1. - * @stable ICU 4.0 - */ -U_STABLE int32_t U_EXPORT2 -uloc_minimizeSubtags(const char* localeID, - char* minimizedLocaleID, - int32_t minimizedLocaleIDCapacity, - UErrorCode* err); - -/** - * Returns a locale ID for the specified BCP47 language tag string. - * If the specified language tag contains any ill-formed subtags, - * the first such subtag and all following subtags are ignored. - * <p> - * This implements the 'Language-Tag' production of BCP47, and so - * supports grandfathered (regular and irregular) as well as private - * use language tags. Private use tags are represented as 'x-whatever', - * and grandfathered tags are converted to their canonical replacements - * where they exist. Note that a few grandfathered tags have no modern - * replacement, these will be converted using the fallback described in - * the first paragraph, so some information might be lost. - * @param langtag the input BCP47 language tag. - * @param localeID the output buffer receiving a locale ID for the - * specified BCP47 language tag. - * @param localeIDCapacity the size of the locale ID output buffer. - * @param parsedLength if not NULL, successfully parsed length - * for the input language tag is set. - * @param err error information if receiving the locald ID - * failed. - * @return the length of the locale ID. - * @stable ICU 4.2 - */ -U_STABLE int32_t U_EXPORT2 -uloc_forLanguageTag(const char* langtag, - char* localeID, - int32_t localeIDCapacity, - int32_t* parsedLength, - UErrorCode* err); - -/** - * Returns a well-formed language tag for this locale ID. - * <p> - * <b>Note</b>: When <code>strict</code> is FALSE, any locale - * fields which do not satisfy the BCP47 syntax requirement will - * be omitted from the result. When <code>strict</code> is - * TRUE, this function sets U_ILLEGAL_ARGUMENT_ERROR to the - * <code>err</code> if any locale fields do not satisfy the - * BCP47 syntax requirement. - * @param localeID the input locale ID - * @param langtag the output buffer receiving BCP47 language - * tag for the locale ID. - * @param langtagCapacity the size of the BCP47 language tag - * output buffer. - * @param strict boolean value indicating if the function returns - * an error for an ill-formed input locale ID. - * @param err error information if receiving the language - * tag failed. - * @return The length of the BCP47 language tag. - * @stable ICU 4.2 - */ -U_STABLE int32_t U_EXPORT2 -uloc_toLanguageTag(const char* localeID, - char* langtag, - int32_t langtagCapacity, - UBool strict, - UErrorCode* err); - -#endif /*_ULOC*/ diff --git a/Source/WTF/icu/unicode/umachine.h b/Source/WTF/icu/unicode/umachine.h deleted file mode 100644 index d1102f493..000000000 --- a/Source/WTF/icu/unicode/umachine.h +++ /dev/null @@ -1,322 +0,0 @@ -/* -****************************************************************************** -* -* Copyright (C) 1999-2012, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* file name: umachine.h -* encoding: US-ASCII -* tab size: 8 (not used) -* indentation:4 -* -* created on: 1999sep13 -* created by: Markus W. Scherer -* -* This file defines basic types and constants for utf.h to be -* platform-independent. umachine.h and utf.h are included into -* utypes.h to provide all the general definitions for ICU. -* All of these definitions used to be in utypes.h before -* the UTF-handling macros made this unmaintainable. -*/ - -#ifndef __UMACHINE_H__ -#define __UMACHINE_H__ - - -/** - * \file - * \brief Basic types and constants for UTF - * - * <h2> Basic types and constants for UTF </h2> - * This file defines basic types and constants for utf.h to be - * platform-independent. umachine.h and utf.h are included into - * utypes.h to provide all the general definitions for ICU. - * All of these definitions used to be in utypes.h before - * the UTF-handling macros made this unmaintainable. - * - */ -/*==========================================================================*/ -/* Include platform-dependent definitions */ -/* which are contained in the platform-specific file platform.h */ -/*==========================================================================*/ - -#include "unicode/ptypes.h" /* platform.h is included in ptypes.h */ - -/* - * ANSI C headers: - * stddef.h defines wchar_t - */ -#include <stddef.h> - -/*==========================================================================*/ -/* For C wrappers, we use the symbol U_STABLE. */ -/* This works properly if the includer is C or C++. */ -/* Functions are declared U_STABLE return-type U_EXPORT2 function-name()... */ -/*==========================================================================*/ - -/** - * \def U_CFUNC - * This is used in a declaration of a library private ICU C function. - * @stable ICU 2.4 - */ - -/** - * \def U_CDECL_BEGIN - * This is used to begin a declaration of a library private ICU C API. - * @stable ICU 2.4 - */ - -/** - * \def U_CDECL_END - * This is used to end a declaration of a library private ICU C API - * @stable ICU 2.4 - */ - -#ifdef __cplusplus -# define U_CFUNC extern "C" -# define U_CDECL_BEGIN extern "C" { -# define U_CDECL_END } -#else -# define U_CFUNC extern -# define U_CDECL_BEGIN -# define U_CDECL_END -#endif - -#ifndef U_ATTRIBUTE_DEPRECATED -/** - * \def U_ATTRIBUTE_DEPRECATED - * This is used for GCC specific attributes - * @internal - */ -#if U_GCC_MAJOR_MINOR >= 302 -# define U_ATTRIBUTE_DEPRECATED __attribute__ ((deprecated)) -/** - * \def U_ATTRIBUTE_DEPRECATED - * This is used for Visual C++ specific attributes - * @internal - */ -#elif defined(_MSC_VER) && (_MSC_VER >= 1400) -# define U_ATTRIBUTE_DEPRECATED __declspec(deprecated) -#else -# define U_ATTRIBUTE_DEPRECATED -#endif -#endif - -/** This is used to declare a function as a public ICU C API @stable ICU 2.0*/ -#define U_CAPI U_CFUNC U_EXPORT -/** This is used to declare a function as a stable public ICU C API*/ -#define U_STABLE U_CAPI -/** This is used to declare a function as a draft public ICU C API */ -#define U_DRAFT U_CAPI -/** This is used to declare a function as a deprecated public ICU C API */ -#define U_DEPRECATED U_CAPI U_ATTRIBUTE_DEPRECATED -/** This is used to declare a function as an obsolete public ICU C API */ -#define U_OBSOLETE U_CAPI -/** This is used to declare a function as an internal ICU C API */ -#define U_INTERNAL U_CAPI - -/*==========================================================================*/ -/* limits for int32_t etc., like in POSIX inttypes.h */ -/*==========================================================================*/ - -#ifndef INT8_MIN -/** The smallest value an 8 bit signed integer can hold @stable ICU 2.0 */ -# define INT8_MIN ((int8_t)(-128)) -#endif -#ifndef INT16_MIN -/** The smallest value a 16 bit signed integer can hold @stable ICU 2.0 */ -# define INT16_MIN ((int16_t)(-32767-1)) -#endif -#ifndef INT32_MIN -/** The smallest value a 32 bit signed integer can hold @stable ICU 2.0 */ -# define INT32_MIN ((int32_t)(-2147483647-1)) -#endif - -#ifndef INT8_MAX -/** The largest value an 8 bit signed integer can hold @stable ICU 2.0 */ -# define INT8_MAX ((int8_t)(127)) -#endif -#ifndef INT16_MAX -/** The largest value a 16 bit signed integer can hold @stable ICU 2.0 */ -# define INT16_MAX ((int16_t)(32767)) -#endif -#ifndef INT32_MAX -/** The largest value a 32 bit signed integer can hold @stable ICU 2.0 */ -# define INT32_MAX ((int32_t)(2147483647)) -#endif - -#ifndef UINT8_MAX -/** The largest value an 8 bit unsigned integer can hold @stable ICU 2.0 */ -# define UINT8_MAX ((uint8_t)(255U)) -#endif -#ifndef UINT16_MAX -/** The largest value a 16 bit unsigned integer can hold @stable ICU 2.0 */ -# define UINT16_MAX ((uint16_t)(65535U)) -#endif -#ifndef UINT32_MAX -/** The largest value a 32 bit unsigned integer can hold @stable ICU 2.0 */ -# define UINT32_MAX ((uint32_t)(4294967295U)) -#endif - -#if defined(U_INT64_T_UNAVAILABLE) -# error int64_t is required for decimal format and rule-based number format. -#else -# ifndef INT64_C -/** - * Provides a platform independent way to specify a signed 64-bit integer constant. - * note: may be wrong for some 64 bit platforms - ensure your compiler provides INT64_C - * @stable ICU 2.8 - */ -# define INT64_C(c) c ## LL -# endif -# ifndef UINT64_C -/** - * Provides a platform independent way to specify an unsigned 64-bit integer constant. - * note: may be wrong for some 64 bit platforms - ensure your compiler provides UINT64_C - * @stable ICU 2.8 - */ -# define UINT64_C(c) c ## ULL -# endif -# ifndef U_INT64_MIN -/** The smallest value a 64 bit signed integer can hold @stable ICU 2.8 */ -# define U_INT64_MIN ((int64_t)(INT64_C(-9223372036854775807)-1)) -# endif -# ifndef U_INT64_MAX -/** The largest value a 64 bit signed integer can hold @stable ICU 2.8 */ -# define U_INT64_MAX ((int64_t)(INT64_C(9223372036854775807))) -# endif -# ifndef U_UINT64_MAX -/** The largest value a 64 bit unsigned integer can hold @stable ICU 2.8 */ -# define U_UINT64_MAX ((uint64_t)(UINT64_C(18446744073709551615))) -# endif -#endif - -/*==========================================================================*/ -/* Boolean data type */ -/*==========================================================================*/ - -/** The ICU boolean type @stable ICU 2.0 */ -typedef int8_t UBool; - -#ifndef TRUE -/** The TRUE value of a UBool @stable ICU 2.0 */ -# define TRUE 1 -#endif -#ifndef FALSE -/** The FALSE value of a UBool @stable ICU 2.0 */ -# define FALSE 0 -#endif - - -/*==========================================================================*/ -/* Unicode data types */ -/*==========================================================================*/ - -/* wchar_t-related definitions -------------------------------------------- */ - -/* - * \def U_WCHAR_IS_UTF16 - * Defined if wchar_t uses UTF-16. - * - * @stable ICU 2.0 - */ -/* - * \def U_WCHAR_IS_UTF32 - * Defined if wchar_t uses UTF-32. - * - * @stable ICU 2.0 - */ -#if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) -# ifdef __STDC_ISO_10646__ -# if (U_SIZEOF_WCHAR_T==2) -# define U_WCHAR_IS_UTF16 -# elif (U_SIZEOF_WCHAR_T==4) -# define U_WCHAR_IS_UTF32 -# endif -# elif defined __UCS2__ -# if (U_PF_OS390 <= U_PLATFORM && U_PLATFORM <= U_PF_OS400) && (U_SIZEOF_WCHAR_T==2) -# define U_WCHAR_IS_UTF16 -# endif -# elif defined(__UCS4__) || (U_PLATFORM == U_PF_OS400 && defined(__UTF32__)) -# if (U_SIZEOF_WCHAR_T==4) -# define U_WCHAR_IS_UTF32 -# endif -# elif U_PLATFORM_IS_DARWIN_BASED || (U_SIZEOF_WCHAR_T==4 && U_PLATFORM_IS_LINUX_BASED) -# define U_WCHAR_IS_UTF32 -# elif U_PLATFORM_HAS_WIN32_API -# define U_WCHAR_IS_UTF16 -# endif -#endif - -/* UChar and UChar32 definitions -------------------------------------------- */ - -/** Number of bytes in a UChar. @stable ICU 2.0 */ -#define U_SIZEOF_UCHAR 2 - -/** - * \var UChar - * Define UChar to be UCHAR_TYPE, if that is #defined (for example, to char16_t), - * or wchar_t if that is 16 bits wide; always assumed to be unsigned. - * If neither is available, then define UChar to be uint16_t. - * - * This makes the definition of UChar platform-dependent - * but allows direct string type compatibility with platforms with - * 16-bit wchar_t types. - * - * @stable ICU 4.4 - */ -#if defined(UCHAR_TYPE) - typedef UCHAR_TYPE UChar; -/* Not #elif U_HAVE_CHAR16_T -- because that is type-incompatible with pre-C++11 callers - typedef char16_t UChar; */ -#elif U_SIZEOF_WCHAR_T==2 - typedef wchar_t UChar; -#elif defined(__CHAR16_TYPE__) - typedef __CHAR16_TYPE__ UChar; -#else - typedef uint16_t UChar; -#endif - -/** - * Define UChar32 as a type for single Unicode code points. - * UChar32 is a signed 32-bit integer (same as int32_t). - * - * The Unicode code point range is 0..0x10ffff. - * All other values (negative or >=0x110000) are illegal as Unicode code points. - * They may be used as sentinel values to indicate "done", "error" - * or similar non-code point conditions. - * - * Before ICU 2.4 (Jitterbug 2146), UChar32 was defined - * to be wchar_t if that is 32 bits wide (wchar_t may be signed or unsigned) - * or else to be uint32_t. - * That is, the definition of UChar32 was platform-dependent. - * - * @see U_SENTINEL - * @stable ICU 2.4 - */ -typedef int32_t UChar32; - -/** - * This value is intended for sentinel values for APIs that - * (take or) return single code points (UChar32). - * It is outside of the Unicode code point range 0..0x10ffff. - * - * For example, a "done" or "error" value in a new API - * could be indicated with U_SENTINEL. - * - * ICU APIs designed before ICU 2.4 usually define service-specific "done" - * values, mostly 0xffff. - * Those may need to be distinguished from - * actual U+ffff text contents by calling functions like - * CharacterIterator::hasNext() or UnicodeString::length(). - * - * @return -1 - * @see UChar32 - * @stable ICU 2.4 - */ -#define U_SENTINEL (-1) - -#include "unicode/urename.h" - -#endif diff --git a/Source/WTF/icu/unicode/unistr.h b/Source/WTF/icu/unicode/unistr.h deleted file mode 100644 index c6e8b4466..000000000 --- a/Source/WTF/icu/unicode/unistr.h +++ /dev/null @@ -1,4470 +0,0 @@ -/* -********************************************************************** -* Copyright (C) 1998-2013, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* -* File unistr.h -* -* Modification History: -* -* Date Name Description -* 09/25/98 stephen Creation. -* 11/11/98 stephen Changed per 11/9 code review. -* 04/20/99 stephen Overhauled per 4/16 code review. -* 11/18/99 aliu Made to inherit from Replaceable. Added method -* handleReplaceBetween(); other methods unchanged. -* 06/25/01 grhoten Remove dependency on iostream. -****************************************************************************** -*/ - -#ifndef UNISTR_H -#define UNISTR_H - -/** - * \file - * \brief C++ API: Unicode String - */ - -#include "unicode/utypes.h" -#include "unicode/rep.h" -#include "unicode/std_string.h" -#include "unicode/stringpiece.h" -#include "unicode/bytestream.h" -#include "unicode/ucasemap.h" - -struct UConverter; // unicode/ucnv.h -class StringThreadTest; - -#ifndef U_COMPARE_CODE_POINT_ORDER -/* see also ustring.h and unorm.h */ -/** - * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc: - * Compare strings in code point order instead of code unit order. - * @stable ICU 2.2 - */ -#define U_COMPARE_CODE_POINT_ORDER 0x8000 -#endif - -#ifndef USTRING_H -/** - * \ingroup ustring_ustrlen - */ -U_STABLE int32_t U_EXPORT2 -u_strlen(const UChar *s); -#endif - -/** - * \def U_STRING_CASE_MAPPER_DEFINED - * @internal - */ -#ifndef U_STRING_CASE_MAPPER_DEFINED -#define U_STRING_CASE_MAPPER_DEFINED - -/** - * Internal string case mapping function type. - * @internal - */ -typedef int32_t U_CALLCONV -UStringCaseMapper(const UCaseMap *csm, - UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - UErrorCode *pErrorCode); - -#endif - -U_NAMESPACE_BEGIN - -class BreakIterator; // unicode/brkiter.h -class Locale; // unicode/locid.h -class StringCharacterIterator; -class UnicodeStringAppendable; // unicode/appendable.h - -/* The <iostream> include has been moved to unicode/ustream.h */ - -/** - * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor - * which constructs a Unicode string from an invariant-character char * string. - * About invariant characters see utypes.h. - * This constructor has no runtime dependency on conversion code and is - * therefore recommended over ones taking a charset name string - * (where the empty string "" indicates invariant-character conversion). - * - * @stable ICU 3.2 - */ -#define US_INV icu::UnicodeString::kInvariant - -/** - * Unicode String literals in C++. - * Dependent on the platform properties, different UnicodeString - * constructors should be used to create a UnicodeString object from - * a string literal. - * The macros are defined for maximum performance. - * They work only for strings that contain "invariant characters", i.e., - * only latin letters, digits, and some punctuation. - * See utypes.h for details. - * - * The string parameter must be a C string literal. - * The length of the string, not including the terminating - * <code>NUL</code>, must be specified as a constant. - * The U_STRING_DECL macro should be invoked exactly once for one - * such string variable before it is used. - * @stable ICU 2.0 - */ -#if defined(U_DECLARE_UTF16) -# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length) -#elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16))) -# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)L ## cs, _length) -#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY -# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)cs, _length) -#else -# define UNICODE_STRING(cs, _length) icu::UnicodeString(cs, _length, US_INV) -#endif - -/** - * Unicode String literals in C++. - * Dependent on the platform properties, different UnicodeString - * constructors should be used to create a UnicodeString object from - * a string literal. - * The macros are defined for improved performance. - * They work only for strings that contain "invariant characters", i.e., - * only latin letters, digits, and some punctuation. - * See utypes.h for details. - * - * The string parameter must be a C string literal. - * @stable ICU 2.0 - */ -#define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1) - -/** - * \def UNISTR_FROM_CHAR_EXPLICIT - * This can be defined to be empty or "explicit". - * If explicit, then the UnicodeString(UChar) and UnicodeString(UChar32) - * constructors are marked as explicit, preventing their inadvertent use. - * @stable ICU 49 - */ -#ifndef UNISTR_FROM_CHAR_EXPLICIT -# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) - // Auto-"explicit" in ICU library code. -# define UNISTR_FROM_CHAR_EXPLICIT explicit -# else - // Empty by default for source code compatibility. -# define UNISTR_FROM_CHAR_EXPLICIT -# endif -#endif - -/** - * \def UNISTR_FROM_STRING_EXPLICIT - * This can be defined to be empty or "explicit". - * If explicit, then the UnicodeString(const char *) and UnicodeString(const UChar *) - * constructors are marked as explicit, preventing their inadvertent use. - * - * In particular, this helps prevent accidentally depending on ICU conversion code - * by passing a string literal into an API with a const UnicodeString & parameter. - * @stable ICU 49 - */ -#ifndef UNISTR_FROM_STRING_EXPLICIT -# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) - // Auto-"explicit" in ICU library code. -# define UNISTR_FROM_STRING_EXPLICIT explicit -# else - // Empty by default for source code compatibility. -# define UNISTR_FROM_STRING_EXPLICIT -# endif -#endif - -/** - * UnicodeString is a string class that stores Unicode characters directly and provides - * similar functionality as the Java String and StringBuffer classes. - * It is a concrete implementation of the abstract class Replaceable (for transliteration). - * - * The UnicodeString class is not suitable for subclassing. - * - * <p>For an overview of Unicode strings in C and C++ see the - * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p> - * - * <p>In ICU, a Unicode string consists of 16-bit Unicode <em>code units</em>. - * A Unicode character may be stored with either one code unit - * (the most common case) or with a matched pair of special code units - * ("surrogates"). The data type for code units is UChar. - * For single-character handling, a Unicode character code <em>point</em> is a value - * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.</p> - * - * <p>Indexes and offsets into and lengths of strings always count code units, not code points. - * This is the same as with multi-byte char* strings in traditional string handling. - * Operations on partial strings typically do not test for code point boundaries. - * If necessary, the user needs to take care of such boundaries by testing for the code unit - * values or by using functions like - * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit() - * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).</p> - * - * UnicodeString methods are more lenient with regard to input parameter values - * than other ICU APIs. In particular: - * - If indexes are out of bounds for a UnicodeString object - * (<0 or >length()) then they are "pinned" to the nearest boundary. - * - If primitive string pointer values (e.g., const UChar * or char *) - * for input strings are NULL, then those input string parameters are treated - * as if they pointed to an empty string. - * However, this is <em>not</em> the case for char * parameters for charset names - * or other IDs. - * - Most UnicodeString methods do not take a UErrorCode parameter because - * there are usually very few opportunities for failure other than a shortage - * of memory, error codes in low-level C++ string methods would be inconvenient, - * and the error code as the last parameter (ICU convention) would prevent - * the use of default parameter values. - * Instead, such methods set the UnicodeString into a "bogus" state - * (see isBogus()) if an error occurs. - * - * In string comparisons, two UnicodeString objects that are both "bogus" - * compare equal (to be transitive and prevent endless loops in sorting), - * and a "bogus" string compares less than any non-"bogus" one. - * - * Const UnicodeString methods are thread-safe. Multiple threads can use - * const methods on the same UnicodeString object simultaneously, - * but non-const methods must not be called concurrently (in multiple threads) - * with any other (const or non-const) methods. - * - * Similarly, const UnicodeString & parameters are thread-safe. - * One object may be passed in as such a parameter concurrently in multiple threads. - * This includes the const UnicodeString & parameters for - * copy construction, assignment, and cloning. - * - * <p>UnicodeString uses several storage methods. - * String contents can be stored inside the UnicodeString object itself, - * in an allocated and shared buffer, or in an outside buffer that is "aliased". - * Most of this is done transparently, but careful aliasing in particular provides - * significant performance improvements. - * Also, the internal buffer is accessible via special functions. - * For details see the - * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p> - * - * @see utf.h - * @see CharacterIterator - * @stable ICU 2.0 - */ -class U_COMMON_API UnicodeString : public Replaceable -{ -public: - - /** - * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor - * which constructs a Unicode string from an invariant-character char * string. - * Use the macro US_INV instead of the full qualification for this value. - * - * @see US_INV - * @stable ICU 3.2 - */ - enum EInvariant { - /** - * @see EInvariant - * @stable ICU 3.2 - */ - kInvariant - }; - - //======================================== - // Read-only operations - //======================================== - - /* Comparison - bitwise only - for international comparison use collation */ - - /** - * Equality operator. Performs only bitwise comparison. - * @param text The UnicodeString to compare to this one. - * @return TRUE if <TT>text</TT> contains the same characters as this one, - * FALSE otherwise. - * @stable ICU 2.0 - */ - inline UBool operator== (const UnicodeString& text) const; - - /** - * Inequality operator. Performs only bitwise comparison. - * @param text The UnicodeString to compare to this one. - * @return FALSE if <TT>text</TT> contains the same characters as this one, - * TRUE otherwise. - * @stable ICU 2.0 - */ - inline UBool operator!= (const UnicodeString& text) const; - - /** - * Greater than operator. Performs only bitwise comparison. - * @param text The UnicodeString to compare to this one. - * @return TRUE if the characters in this are bitwise - * greater than the characters in <code>text</code>, FALSE otherwise - * @stable ICU 2.0 - */ - inline UBool operator> (const UnicodeString& text) const; - - /** - * Less than operator. Performs only bitwise comparison. - * @param text The UnicodeString to compare to this one. - * @return TRUE if the characters in this are bitwise - * less than the characters in <code>text</code>, FALSE otherwise - * @stable ICU 2.0 - */ - inline UBool operator< (const UnicodeString& text) const; - - /** - * Greater than or equal operator. Performs only bitwise comparison. - * @param text The UnicodeString to compare to this one. - * @return TRUE if the characters in this are bitwise - * greater than or equal to the characters in <code>text</code>, FALSE otherwise - * @stable ICU 2.0 - */ - inline UBool operator>= (const UnicodeString& text) const; - - /** - * Less than or equal operator. Performs only bitwise comparison. - * @param text The UnicodeString to compare to this one. - * @return TRUE if the characters in this are bitwise - * less than or equal to the characters in <code>text</code>, FALSE otherwise - * @stable ICU 2.0 - */ - inline UBool operator<= (const UnicodeString& text) const; - - /** - * Compare the characters bitwise in this UnicodeString to - * the characters in <code>text</code>. - * @param text The UnicodeString to compare to this one. - * @return The result of bitwise character comparison: 0 if this - * contains the same characters as <code>text</code>, -1 if the characters in - * this are bitwise less than the characters in <code>text</code>, +1 if the - * characters in this are bitwise greater than the characters - * in <code>text</code>. - * @stable ICU 2.0 - */ - inline int8_t compare(const UnicodeString& text) const; - - /** - * Compare the characters bitwise in the range - * [<TT>start</TT>, <TT>start + length</TT>) with the characters - * in the <b>entire string</b> <TT>text</TT>. - * (The parameters "start" and "length" are not applied to the other text "text".) - * @param start the offset at which the compare operation begins - * @param length the number of characters of text to compare. - * @param text the other text to be compared against this string. - * @return The result of bitwise character comparison: 0 if this - * contains the same characters as <code>text</code>, -1 if the characters in - * this are bitwise less than the characters in <code>text</code>, +1 if the - * characters in this are bitwise greater than the characters - * in <code>text</code>. - * @stable ICU 2.0 - */ - inline int8_t compare(int32_t start, - int32_t length, - const UnicodeString& text) const; - - /** - * Compare the characters bitwise in the range - * [<TT>start</TT>, <TT>start + length</TT>) with the characters - * in <TT>srcText</TT> in the range - * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). - * @param start the offset at which the compare operation begins - * @param length the number of characters in this to compare. - * @param srcText the text to be compared - * @param srcStart the offset into <TT>srcText</TT> to start comparison - * @param srcLength the number of characters in <TT>src</TT> to compare - * @return The result of bitwise character comparison: 0 if this - * contains the same characters as <code>srcText</code>, -1 if the characters in - * this are bitwise less than the characters in <code>srcText</code>, +1 if the - * characters in this are bitwise greater than the characters - * in <code>srcText</code>. - * @stable ICU 2.0 - */ - inline int8_t compare(int32_t start, - int32_t length, - const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLength) const; - - /** - * Compare the characters bitwise in this UnicodeString with the first - * <TT>srcLength</TT> characters in <TT>srcChars</TT>. - * @param srcChars The characters to compare to this UnicodeString. - * @param srcLength the number of characters in <TT>srcChars</TT> to compare - * @return The result of bitwise character comparison: 0 if this - * contains the same characters as <code>srcChars</code>, -1 if the characters in - * this are bitwise less than the characters in <code>srcChars</code>, +1 if the - * characters in this are bitwise greater than the characters - * in <code>srcChars</code>. - * @stable ICU 2.0 - */ - inline int8_t compare(const UChar *srcChars, - int32_t srcLength) const; - - /** - * Compare the characters bitwise in the range - * [<TT>start</TT>, <TT>start + length</TT>) with the first - * <TT>length</TT> characters in <TT>srcChars</TT> - * @param start the offset at which the compare operation begins - * @param length the number of characters to compare. - * @param srcChars the characters to be compared - * @return The result of bitwise character comparison: 0 if this - * contains the same characters as <code>srcChars</code>, -1 if the characters in - * this are bitwise less than the characters in <code>srcChars</code>, +1 if the - * characters in this are bitwise greater than the characters - * in <code>srcChars</code>. - * @stable ICU 2.0 - */ - inline int8_t compare(int32_t start, - int32_t length, - const UChar *srcChars) const; - - /** - * Compare the characters bitwise in the range - * [<TT>start</TT>, <TT>start + length</TT>) with the characters - * in <TT>srcChars</TT> in the range - * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). - * @param start the offset at which the compare operation begins - * @param length the number of characters in this to compare - * @param srcChars the characters to be compared - * @param srcStart the offset into <TT>srcChars</TT> to start comparison - * @param srcLength the number of characters in <TT>srcChars</TT> to compare - * @return The result of bitwise character comparison: 0 if this - * contains the same characters as <code>srcChars</code>, -1 if the characters in - * this are bitwise less than the characters in <code>srcChars</code>, +1 if the - * characters in this are bitwise greater than the characters - * in <code>srcChars</code>. - * @stable ICU 2.0 - */ - inline int8_t compare(int32_t start, - int32_t length, - const UChar *srcChars, - int32_t srcStart, - int32_t srcLength) const; - - /** - * Compare the characters bitwise in the range - * [<TT>start</TT>, <TT>limit</TT>) with the characters - * in <TT>srcText</TT> in the range - * [<TT>srcStart</TT>, <TT>srcLimit</TT>). - * @param start the offset at which the compare operation begins - * @param limit the offset immediately following the compare operation - * @param srcText the text to be compared - * @param srcStart the offset into <TT>srcText</TT> to start comparison - * @param srcLimit the offset into <TT>srcText</TT> to limit comparison - * @return The result of bitwise character comparison: 0 if this - * contains the same characters as <code>srcText</code>, -1 if the characters in - * this are bitwise less than the characters in <code>srcText</code>, +1 if the - * characters in this are bitwise greater than the characters - * in <code>srcText</code>. - * @stable ICU 2.0 - */ - inline int8_t compareBetween(int32_t start, - int32_t limit, - const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLimit) const; - - /** - * Compare two Unicode strings in code point order. - * The result may be different from the results of compare(), operator<, etc. - * if supplementary characters are present: - * - * In UTF-16, supplementary characters (with code points U+10000 and above) are - * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, - * which means that they compare as less than some other BMP characters like U+feff. - * This function compares Unicode strings in code point order. - * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. - * - * @param text Another string to compare this one to. - * @return a negative/zero/positive integer corresponding to whether - * this string is less than/equal to/greater than the second one - * in code point order - * @stable ICU 2.0 - */ - inline int8_t compareCodePointOrder(const UnicodeString& text) const; - - /** - * Compare two Unicode strings in code point order. - * The result may be different from the results of compare(), operator<, etc. - * if supplementary characters are present: - * - * In UTF-16, supplementary characters (with code points U+10000 and above) are - * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, - * which means that they compare as less than some other BMP characters like U+feff. - * This function compares Unicode strings in code point order. - * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. - * - * @param start The start offset in this string at which the compare operation begins. - * @param length The number of code units from this string to compare. - * @param srcText Another string to compare this one to. - * @return a negative/zero/positive integer corresponding to whether - * this string is less than/equal to/greater than the second one - * in code point order - * @stable ICU 2.0 - */ - inline int8_t compareCodePointOrder(int32_t start, - int32_t length, - const UnicodeString& srcText) const; - - /** - * Compare two Unicode strings in code point order. - * The result may be different from the results of compare(), operator<, etc. - * if supplementary characters are present: - * - * In UTF-16, supplementary characters (with code points U+10000 and above) are - * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, - * which means that they compare as less than some other BMP characters like U+feff. - * This function compares Unicode strings in code point order. - * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. - * - * @param start The start offset in this string at which the compare operation begins. - * @param length The number of code units from this string to compare. - * @param srcText Another string to compare this one to. - * @param srcStart The start offset in that string at which the compare operation begins. - * @param srcLength The number of code units from that string to compare. - * @return a negative/zero/positive integer corresponding to whether - * this string is less than/equal to/greater than the second one - * in code point order - * @stable ICU 2.0 - */ - inline int8_t compareCodePointOrder(int32_t start, - int32_t length, - const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLength) const; - - /** - * Compare two Unicode strings in code point order. - * The result may be different from the results of compare(), operator<, etc. - * if supplementary characters are present: - * - * In UTF-16, supplementary characters (with code points U+10000 and above) are - * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, - * which means that they compare as less than some other BMP characters like U+feff. - * This function compares Unicode strings in code point order. - * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. - * - * @param srcChars A pointer to another string to compare this one to. - * @param srcLength The number of code units from that string to compare. - * @return a negative/zero/positive integer corresponding to whether - * this string is less than/equal to/greater than the second one - * in code point order - * @stable ICU 2.0 - */ - inline int8_t compareCodePointOrder(const UChar *srcChars, - int32_t srcLength) const; - - /** - * Compare two Unicode strings in code point order. - * The result may be different from the results of compare(), operator<, etc. - * if supplementary characters are present: - * - * In UTF-16, supplementary characters (with code points U+10000 and above) are - * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, - * which means that they compare as less than some other BMP characters like U+feff. - * This function compares Unicode strings in code point order. - * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. - * - * @param start The start offset in this string at which the compare operation begins. - * @param length The number of code units from this string to compare. - * @param srcChars A pointer to another string to compare this one to. - * @return a negative/zero/positive integer corresponding to whether - * this string is less than/equal to/greater than the second one - * in code point order - * @stable ICU 2.0 - */ - inline int8_t compareCodePointOrder(int32_t start, - int32_t length, - const UChar *srcChars) const; - - /** - * Compare two Unicode strings in code point order. - * The result may be different from the results of compare(), operator<, etc. - * if supplementary characters are present: - * - * In UTF-16, supplementary characters (with code points U+10000 and above) are - * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, - * which means that they compare as less than some other BMP characters like U+feff. - * This function compares Unicode strings in code point order. - * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. - * - * @param start The start offset in this string at which the compare operation begins. - * @param length The number of code units from this string to compare. - * @param srcChars A pointer to another string to compare this one to. - * @param srcStart The start offset in that string at which the compare operation begins. - * @param srcLength The number of code units from that string to compare. - * @return a negative/zero/positive integer corresponding to whether - * this string is less than/equal to/greater than the second one - * in code point order - * @stable ICU 2.0 - */ - inline int8_t compareCodePointOrder(int32_t start, - int32_t length, - const UChar *srcChars, - int32_t srcStart, - int32_t srcLength) const; - - /** - * Compare two Unicode strings in code point order. - * The result may be different from the results of compare(), operator<, etc. - * if supplementary characters are present: - * - * In UTF-16, supplementary characters (with code points U+10000 and above) are - * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, - * which means that they compare as less than some other BMP characters like U+feff. - * This function compares Unicode strings in code point order. - * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. - * - * @param start The start offset in this string at which the compare operation begins. - * @param limit The offset after the last code unit from this string to compare. - * @param srcText Another string to compare this one to. - * @param srcStart The start offset in that string at which the compare operation begins. - * @param srcLimit The offset after the last code unit from that string to compare. - * @return a negative/zero/positive integer corresponding to whether - * this string is less than/equal to/greater than the second one - * in code point order - * @stable ICU 2.0 - */ - inline int8_t compareCodePointOrderBetween(int32_t start, - int32_t limit, - const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLimit) const; - - /** - * Compare two strings case-insensitively using full case folding. - * This is equivalent to this->foldCase(options).compare(text.foldCase(options)). - * - * @param text Another string to compare this one to. - * @param options A bit set of options: - * - U_FOLD_CASE_DEFAULT or 0 is used for default options: - * Comparison in code unit order with default case folding. - * - * - U_COMPARE_CODE_POINT_ORDER - * Set to choose code point order instead of code unit order - * (see u_strCompare for details). - * - * - U_FOLD_CASE_EXCLUDE_SPECIAL_I - * - * @return A negative, zero, or positive integer indicating the comparison result. - * @stable ICU 2.0 - */ - inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const; - - /** - * Compare two strings case-insensitively using full case folding. - * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)). - * - * @param start The start offset in this string at which the compare operation begins. - * @param length The number of code units from this string to compare. - * @param srcText Another string to compare this one to. - * @param options A bit set of options: - * - U_FOLD_CASE_DEFAULT or 0 is used for default options: - * Comparison in code unit order with default case folding. - * - * - U_COMPARE_CODE_POINT_ORDER - * Set to choose code point order instead of code unit order - * (see u_strCompare for details). - * - * - U_FOLD_CASE_EXCLUDE_SPECIAL_I - * - * @return A negative, zero, or positive integer indicating the comparison result. - * @stable ICU 2.0 - */ - inline int8_t caseCompare(int32_t start, - int32_t length, - const UnicodeString& srcText, - uint32_t options) const; - - /** - * Compare two strings case-insensitively using full case folding. - * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)). - * - * @param start The start offset in this string at which the compare operation begins. - * @param length The number of code units from this string to compare. - * @param srcText Another string to compare this one to. - * @param srcStart The start offset in that string at which the compare operation begins. - * @param srcLength The number of code units from that string to compare. - * @param options A bit set of options: - * - U_FOLD_CASE_DEFAULT or 0 is used for default options: - * Comparison in code unit order with default case folding. - * - * - U_COMPARE_CODE_POINT_ORDER - * Set to choose code point order instead of code unit order - * (see u_strCompare for details). - * - * - U_FOLD_CASE_EXCLUDE_SPECIAL_I - * - * @return A negative, zero, or positive integer indicating the comparison result. - * @stable ICU 2.0 - */ - inline int8_t caseCompare(int32_t start, - int32_t length, - const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLength, - uint32_t options) const; - - /** - * Compare two strings case-insensitively using full case folding. - * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)). - * - * @param srcChars A pointer to another string to compare this one to. - * @param srcLength The number of code units from that string to compare. - * @param options A bit set of options: - * - U_FOLD_CASE_DEFAULT or 0 is used for default options: - * Comparison in code unit order with default case folding. - * - * - U_COMPARE_CODE_POINT_ORDER - * Set to choose code point order instead of code unit order - * (see u_strCompare for details). - * - * - U_FOLD_CASE_EXCLUDE_SPECIAL_I - * - * @return A negative, zero, or positive integer indicating the comparison result. - * @stable ICU 2.0 - */ - inline int8_t caseCompare(const UChar *srcChars, - int32_t srcLength, - uint32_t options) const; - - /** - * Compare two strings case-insensitively using full case folding. - * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)). - * - * @param start The start offset in this string at which the compare operation begins. - * @param length The number of code units from this string to compare. - * @param srcChars A pointer to another string to compare this one to. - * @param options A bit set of options: - * - U_FOLD_CASE_DEFAULT or 0 is used for default options: - * Comparison in code unit order with default case folding. - * - * - U_COMPARE_CODE_POINT_ORDER - * Set to choose code point order instead of code unit order - * (see u_strCompare for details). - * - * - U_FOLD_CASE_EXCLUDE_SPECIAL_I - * - * @return A negative, zero, or positive integer indicating the comparison result. - * @stable ICU 2.0 - */ - inline int8_t caseCompare(int32_t start, - int32_t length, - const UChar *srcChars, - uint32_t options) const; - - /** - * Compare two strings case-insensitively using full case folding. - * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)). - * - * @param start The start offset in this string at which the compare operation begins. - * @param length The number of code units from this string to compare. - * @param srcChars A pointer to another string to compare this one to. - * @param srcStart The start offset in that string at which the compare operation begins. - * @param srcLength The number of code units from that string to compare. - * @param options A bit set of options: - * - U_FOLD_CASE_DEFAULT or 0 is used for default options: - * Comparison in code unit order with default case folding. - * - * - U_COMPARE_CODE_POINT_ORDER - * Set to choose code point order instead of code unit order - * (see u_strCompare for details). - * - * - U_FOLD_CASE_EXCLUDE_SPECIAL_I - * - * @return A negative, zero, or positive integer indicating the comparison result. - * @stable ICU 2.0 - */ - inline int8_t caseCompare(int32_t start, - int32_t length, - const UChar *srcChars, - int32_t srcStart, - int32_t srcLength, - uint32_t options) const; - - /** - * Compare two strings case-insensitively using full case folding. - * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)). - * - * @param start The start offset in this string at which the compare operation begins. - * @param limit The offset after the last code unit from this string to compare. - * @param srcText Another string to compare this one to. - * @param srcStart The start offset in that string at which the compare operation begins. - * @param srcLimit The offset after the last code unit from that string to compare. - * @param options A bit set of options: - * - U_FOLD_CASE_DEFAULT or 0 is used for default options: - * Comparison in code unit order with default case folding. - * - * - U_COMPARE_CODE_POINT_ORDER - * Set to choose code point order instead of code unit order - * (see u_strCompare for details). - * - * - U_FOLD_CASE_EXCLUDE_SPECIAL_I - * - * @return A negative, zero, or positive integer indicating the comparison result. - * @stable ICU 2.0 - */ - inline int8_t caseCompareBetween(int32_t start, - int32_t limit, - const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLimit, - uint32_t options) const; - - /** - * Determine if this starts with the characters in <TT>text</TT> - * @param text The text to match. - * @return TRUE if this starts with the characters in <TT>text</TT>, - * FALSE otherwise - * @stable ICU 2.0 - */ - inline UBool startsWith(const UnicodeString& text) const; - - /** - * Determine if this starts with the characters in <TT>srcText</TT> - * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). - * @param srcText The text to match. - * @param srcStart the offset into <TT>srcText</TT> to start matching - * @param srcLength the number of characters in <TT>srcText</TT> to match - * @return TRUE if this starts with the characters in <TT>text</TT>, - * FALSE otherwise - * @stable ICU 2.0 - */ - inline UBool startsWith(const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLength) const; - - /** - * Determine if this starts with the characters in <TT>srcChars</TT> - * @param srcChars The characters to match. - * @param srcLength the number of characters in <TT>srcChars</TT> - * @return TRUE if this starts with the characters in <TT>srcChars</TT>, - * FALSE otherwise - * @stable ICU 2.0 - */ - inline UBool startsWith(const UChar *srcChars, - int32_t srcLength) const; - - /** - * Determine if this ends with the characters in <TT>srcChars</TT> - * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). - * @param srcChars The characters to match. - * @param srcStart the offset into <TT>srcText</TT> to start matching - * @param srcLength the number of characters in <TT>srcChars</TT> to match - * @return TRUE if this ends with the characters in <TT>srcChars</TT>, FALSE otherwise - * @stable ICU 2.0 - */ - inline UBool startsWith(const UChar *srcChars, - int32_t srcStart, - int32_t srcLength) const; - - /** - * Determine if this ends with the characters in <TT>text</TT> - * @param text The text to match. - * @return TRUE if this ends with the characters in <TT>text</TT>, - * FALSE otherwise - * @stable ICU 2.0 - */ - inline UBool endsWith(const UnicodeString& text) const; - - /** - * Determine if this ends with the characters in <TT>srcText</TT> - * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). - * @param srcText The text to match. - * @param srcStart the offset into <TT>srcText</TT> to start matching - * @param srcLength the number of characters in <TT>srcText</TT> to match - * @return TRUE if this ends with the characters in <TT>text</TT>, - * FALSE otherwise - * @stable ICU 2.0 - */ - inline UBool endsWith(const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLength) const; - - /** - * Determine if this ends with the characters in <TT>srcChars</TT> - * @param srcChars The characters to match. - * @param srcLength the number of characters in <TT>srcChars</TT> - * @return TRUE if this ends with the characters in <TT>srcChars</TT>, - * FALSE otherwise - * @stable ICU 2.0 - */ - inline UBool endsWith(const UChar *srcChars, - int32_t srcLength) const; - - /** - * Determine if this ends with the characters in <TT>srcChars</TT> - * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). - * @param srcChars The characters to match. - * @param srcStart the offset into <TT>srcText</TT> to start matching - * @param srcLength the number of characters in <TT>srcChars</TT> to match - * @return TRUE if this ends with the characters in <TT>srcChars</TT>, - * FALSE otherwise - * @stable ICU 2.0 - */ - inline UBool endsWith(const UChar *srcChars, - int32_t srcStart, - int32_t srcLength) const; - - - /* Searching - bitwise only */ - - /** - * Locate in this the first occurrence of the characters in <TT>text</TT>, - * using bitwise comparison. - * @param text The text to search for. - * @return The offset into this of the start of <TT>text</TT>, - * or -1 if not found. - * @stable ICU 2.0 - */ - inline int32_t indexOf(const UnicodeString& text) const; - - /** - * Locate in this the first occurrence of the characters in <TT>text</TT> - * starting at offset <TT>start</TT>, using bitwise comparison. - * @param text The text to search for. - * @param start The offset at which searching will start. - * @return The offset into this of the start of <TT>text</TT>, - * or -1 if not found. - * @stable ICU 2.0 - */ - inline int32_t indexOf(const UnicodeString& text, - int32_t start) const; - - /** - * Locate in this the first occurrence in the range - * [<TT>start</TT>, <TT>start + length</TT>) of the characters - * in <TT>text</TT>, using bitwise comparison. - * @param text The text to search for. - * @param start The offset at which searching will start. - * @param length The number of characters to search - * @return The offset into this of the start of <TT>text</TT>, - * or -1 if not found. - * @stable ICU 2.0 - */ - inline int32_t indexOf(const UnicodeString& text, - int32_t start, - int32_t length) const; - - /** - * Locate in this the first occurrence in the range - * [<TT>start</TT>, <TT>start + length</TT>) of the characters - * in <TT>srcText</TT> in the range - * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>), - * using bitwise comparison. - * @param srcText The text to search for. - * @param srcStart the offset into <TT>srcText</TT> at which - * to start matching - * @param srcLength the number of characters in <TT>srcText</TT> to match - * @param start the offset into this at which to start matching - * @param length the number of characters in this to search - * @return The offset into this of the start of <TT>text</TT>, - * or -1 if not found. - * @stable ICU 2.0 - */ - inline int32_t indexOf(const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLength, - int32_t start, - int32_t length) const; - - /** - * Locate in this the first occurrence of the characters in - * <TT>srcChars</TT> - * starting at offset <TT>start</TT>, using bitwise comparison. - * @param srcChars The text to search for. - * @param srcLength the number of characters in <TT>srcChars</TT> to match - * @param start the offset into this at which to start matching - * @return The offset into this of the start of <TT>text</TT>, - * or -1 if not found. - * @stable ICU 2.0 - */ - inline int32_t indexOf(const UChar *srcChars, - int32_t srcLength, - int32_t start) const; - - /** - * Locate in this the first occurrence in the range - * [<TT>start</TT>, <TT>start + length</TT>) of the characters - * in <TT>srcChars</TT>, using bitwise comparison. - * @param srcChars The text to search for. - * @param srcLength the number of characters in <TT>srcChars</TT> - * @param start The offset at which searching will start. - * @param length The number of characters to search - * @return The offset into this of the start of <TT>srcChars</TT>, - * or -1 if not found. - * @stable ICU 2.0 - */ - inline int32_t indexOf(const UChar *srcChars, - int32_t srcLength, - int32_t start, - int32_t length) const; - - /** - * Locate in this the first occurrence in the range - * [<TT>start</TT>, <TT>start + length</TT>) of the characters - * in <TT>srcChars</TT> in the range - * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>), - * using bitwise comparison. - * @param srcChars The text to search for. - * @param srcStart the offset into <TT>srcChars</TT> at which - * to start matching - * @param srcLength the number of characters in <TT>srcChars</TT> to match - * @param start the offset into this at which to start matching - * @param length the number of characters in this to search - * @return The offset into this of the start of <TT>text</TT>, - * or -1 if not found. - * @stable ICU 2.0 - */ - int32_t indexOf(const UChar *srcChars, - int32_t srcStart, - int32_t srcLength, - int32_t start, - int32_t length) const; - - /** - * Locate in this the first occurrence of the BMP code point <code>c</code>, - * using bitwise comparison. - * @param c The code unit to search for. - * @return The offset into this of <TT>c</TT>, or -1 if not found. - * @stable ICU 2.0 - */ - inline int32_t indexOf(UChar c) const; - - /** - * Locate in this the first occurrence of the code point <TT>c</TT>, - * using bitwise comparison. - * - * @param c The code point to search for. - * @return The offset into this of <TT>c</TT>, or -1 if not found. - * @stable ICU 2.0 - */ - inline int32_t indexOf(UChar32 c) const; - - /** - * Locate in this the first occurrence of the BMP code point <code>c</code>, - * starting at offset <TT>start</TT>, using bitwise comparison. - * @param c The code unit to search for. - * @param start The offset at which searching will start. - * @return The offset into this of <TT>c</TT>, or -1 if not found. - * @stable ICU 2.0 - */ - inline int32_t indexOf(UChar c, - int32_t start) const; - - /** - * Locate in this the first occurrence of the code point <TT>c</TT> - * starting at offset <TT>start</TT>, using bitwise comparison. - * - * @param c The code point to search for. - * @param start The offset at which searching will start. - * @return The offset into this of <TT>c</TT>, or -1 if not found. - * @stable ICU 2.0 - */ - inline int32_t indexOf(UChar32 c, - int32_t start) const; - - /** - * Locate in this the first occurrence of the BMP code point <code>c</code> - * in the range [<TT>start</TT>, <TT>start + length</TT>), - * using bitwise comparison. - * @param c The code unit to search for. - * @param start the offset into this at which to start matching - * @param length the number of characters in this to search - * @return The offset into this of <TT>c</TT>, or -1 if not found. - * @stable ICU 2.0 - */ - inline int32_t indexOf(UChar c, - int32_t start, - int32_t length) const; - - /** - * Locate in this the first occurrence of the code point <TT>c</TT> - * in the range [<TT>start</TT>, <TT>start + length</TT>), - * using bitwise comparison. - * - * @param c The code point to search for. - * @param start the offset into this at which to start matching - * @param length the number of characters in this to search - * @return The offset into this of <TT>c</TT>, or -1 if not found. - * @stable ICU 2.0 - */ - inline int32_t indexOf(UChar32 c, - int32_t start, - int32_t length) const; - - /** - * Locate in this the last occurrence of the characters in <TT>text</TT>, - * using bitwise comparison. - * @param text The text to search for. - * @return The offset into this of the start of <TT>text</TT>, - * or -1 if not found. - * @stable ICU 2.0 - */ - inline int32_t lastIndexOf(const UnicodeString& text) const; - - /** - * Locate in this the last occurrence of the characters in <TT>text</TT> - * starting at offset <TT>start</TT>, using bitwise comparison. - * @param text The text to search for. - * @param start The offset at which searching will start. - * @return The offset into this of the start of <TT>text</TT>, - * or -1 if not found. - * @stable ICU 2.0 - */ - inline int32_t lastIndexOf(const UnicodeString& text, - int32_t start) const; - - /** - * Locate in this the last occurrence in the range - * [<TT>start</TT>, <TT>start + length</TT>) of the characters - * in <TT>text</TT>, using bitwise comparison. - * @param text The text to search for. - * @param start The offset at which searching will start. - * @param length The number of characters to search - * @return The offset into this of the start of <TT>text</TT>, - * or -1 if not found. - * @stable ICU 2.0 - */ - inline int32_t lastIndexOf(const UnicodeString& text, - int32_t start, - int32_t length) const; - - /** - * Locate in this the last occurrence in the range - * [<TT>start</TT>, <TT>start + length</TT>) of the characters - * in <TT>srcText</TT> in the range - * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>), - * using bitwise comparison. - * @param srcText The text to search for. - * @param srcStart the offset into <TT>srcText</TT> at which - * to start matching - * @param srcLength the number of characters in <TT>srcText</TT> to match - * @param start the offset into this at which to start matching - * @param length the number of characters in this to search - * @return The offset into this of the start of <TT>text</TT>, - * or -1 if not found. - * @stable ICU 2.0 - */ - inline int32_t lastIndexOf(const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLength, - int32_t start, - int32_t length) const; - - /** - * Locate in this the last occurrence of the characters in <TT>srcChars</TT> - * starting at offset <TT>start</TT>, using bitwise comparison. - * @param srcChars The text to search for. - * @param srcLength the number of characters in <TT>srcChars</TT> to match - * @param start the offset into this at which to start matching - * @return The offset into this of the start of <TT>text</TT>, - * or -1 if not found. - * @stable ICU 2.0 - */ - inline int32_t lastIndexOf(const UChar *srcChars, - int32_t srcLength, - int32_t start) const; - - /** - * Locate in this the last occurrence in the range - * [<TT>start</TT>, <TT>start + length</TT>) of the characters - * in <TT>srcChars</TT>, using bitwise comparison. - * @param srcChars The text to search for. - * @param srcLength the number of characters in <TT>srcChars</TT> - * @param start The offset at which searching will start. - * @param length The number of characters to search - * @return The offset into this of the start of <TT>srcChars</TT>, - * or -1 if not found. - * @stable ICU 2.0 - */ - inline int32_t lastIndexOf(const UChar *srcChars, - int32_t srcLength, - int32_t start, - int32_t length) const; - - /** - * Locate in this the last occurrence in the range - * [<TT>start</TT>, <TT>start + length</TT>) of the characters - * in <TT>srcChars</TT> in the range - * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>), - * using bitwise comparison. - * @param srcChars The text to search for. - * @param srcStart the offset into <TT>srcChars</TT> at which - * to start matching - * @param srcLength the number of characters in <TT>srcChars</TT> to match - * @param start the offset into this at which to start matching - * @param length the number of characters in this to search - * @return The offset into this of the start of <TT>text</TT>, - * or -1 if not found. - * @stable ICU 2.0 - */ - int32_t lastIndexOf(const UChar *srcChars, - int32_t srcStart, - int32_t srcLength, - int32_t start, - int32_t length) const; - - /** - * Locate in this the last occurrence of the BMP code point <code>c</code>, - * using bitwise comparison. - * @param c The code unit to search for. - * @return The offset into this of <TT>c</TT>, or -1 if not found. - * @stable ICU 2.0 - */ - inline int32_t lastIndexOf(UChar c) const; - - /** - * Locate in this the last occurrence of the code point <TT>c</TT>, - * using bitwise comparison. - * - * @param c The code point to search for. - * @return The offset into this of <TT>c</TT>, or -1 if not found. - * @stable ICU 2.0 - */ - inline int32_t lastIndexOf(UChar32 c) const; - - /** - * Locate in this the last occurrence of the BMP code point <code>c</code> - * starting at offset <TT>start</TT>, using bitwise comparison. - * @param c The code unit to search for. - * @param start The offset at which searching will start. - * @return The offset into this of <TT>c</TT>, or -1 if not found. - * @stable ICU 2.0 - */ - inline int32_t lastIndexOf(UChar c, - int32_t start) const; - - /** - * Locate in this the last occurrence of the code point <TT>c</TT> - * starting at offset <TT>start</TT>, using bitwise comparison. - * - * @param c The code point to search for. - * @param start The offset at which searching will start. - * @return The offset into this of <TT>c</TT>, or -1 if not found. - * @stable ICU 2.0 - */ - inline int32_t lastIndexOf(UChar32 c, - int32_t start) const; - - /** - * Locate in this the last occurrence of the BMP code point <code>c</code> - * in the range [<TT>start</TT>, <TT>start + length</TT>), - * using bitwise comparison. - * @param c The code unit to search for. - * @param start the offset into this at which to start matching - * @param length the number of characters in this to search - * @return The offset into this of <TT>c</TT>, or -1 if not found. - * @stable ICU 2.0 - */ - inline int32_t lastIndexOf(UChar c, - int32_t start, - int32_t length) const; - - /** - * Locate in this the last occurrence of the code point <TT>c</TT> - * in the range [<TT>start</TT>, <TT>start + length</TT>), - * using bitwise comparison. - * - * @param c The code point to search for. - * @param start the offset into this at which to start matching - * @param length the number of characters in this to search - * @return The offset into this of <TT>c</TT>, or -1 if not found. - * @stable ICU 2.0 - */ - inline int32_t lastIndexOf(UChar32 c, - int32_t start, - int32_t length) const; - - - /* Character access */ - - /** - * Return the code unit at offset <tt>offset</tt>. - * If the offset is not valid (0..length()-1) then U+ffff is returned. - * @param offset a valid offset into the text - * @return the code unit at offset <tt>offset</tt> - * or 0xffff if the offset is not valid for this string - * @stable ICU 2.0 - */ - inline UChar charAt(int32_t offset) const; - - /** - * Return the code unit at offset <tt>offset</tt>. - * If the offset is not valid (0..length()-1) then U+ffff is returned. - * @param offset a valid offset into the text - * @return the code unit at offset <tt>offset</tt> - * @stable ICU 2.0 - */ - inline UChar operator[] (int32_t offset) const; - - /** - * Return the code point that contains the code unit - * at offset <tt>offset</tt>. - * If the offset is not valid (0..length()-1) then U+ffff is returned. - * @param offset a valid offset into the text - * that indicates the text offset of any of the code units - * that will be assembled into a code point (21-bit value) and returned - * @return the code point of text at <tt>offset</tt> - * or 0xffff if the offset is not valid for this string - * @stable ICU 2.0 - */ - UChar32 char32At(int32_t offset) const; - - /** - * Adjust a random-access offset so that - * it points to the beginning of a Unicode character. - * The offset that is passed in points to - * any code unit of a code point, - * while the returned offset will point to the first code unit - * of the same code point. - * In UTF-16, if the input offset points to a second surrogate - * of a surrogate pair, then the returned offset will point - * to the first surrogate. - * @param offset a valid offset into one code point of the text - * @return offset of the first code unit of the same code point - * @see U16_SET_CP_START - * @stable ICU 2.0 - */ - int32_t getChar32Start(int32_t offset) const; - - /** - * Adjust a random-access offset so that - * it points behind a Unicode character. - * The offset that is passed in points behind - * any code unit of a code point, - * while the returned offset will point behind the last code unit - * of the same code point. - * In UTF-16, if the input offset points behind the first surrogate - * (i.e., to the second surrogate) - * of a surrogate pair, then the returned offset will point - * behind the second surrogate (i.e., to the first surrogate). - * @param offset a valid offset after any code unit of a code point of the text - * @return offset of the first code unit after the same code point - * @see U16_SET_CP_LIMIT - * @stable ICU 2.0 - */ - int32_t getChar32Limit(int32_t offset) const; - - /** - * Move the code unit index along the string by delta code points. - * Interpret the input index as a code unit-based offset into the string, - * move the index forward or backward by delta code points, and - * return the resulting index. - * The input index should point to the first code unit of a code point, - * if there is more than one. - * - * Both input and output indexes are code unit-based as for all - * string indexes/offsets in ICU (and other libraries, like MBCS char*). - * If delta<0 then the index is moved backward (toward the start of the string). - * If delta>0 then the index is moved forward (toward the end of the string). - * - * This behaves like CharacterIterator::move32(delta, kCurrent). - * - * Behavior for out-of-bounds indexes: - * <code>moveIndex32</code> pins the input index to 0..length(), i.e., - * if the input index<0 then it is pinned to 0; - * if it is index>length() then it is pinned to length(). - * Afterwards, the index is moved by <code>delta</code> code points - * forward or backward, - * but no further backward than to 0 and no further forward than to length(). - * The resulting index return value will be in between 0 and length(), inclusively. - * - * Examples: - * <pre> - * // s has code points 'a' U+10000 'b' U+10ffff U+2029 - * UnicodeString s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unescape(); - * - * // initial index: position of U+10000 - * int32_t index=1; - * - * // the following examples will all result in index==4, position of U+10ffff - * - * // skip 2 code points from some position in the string - * index=s.moveIndex32(index, 2); // skips U+10000 and 'b' - * - * // go to the 3rd code point from the start of s (0-based) - * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b' - * - * // go to the next-to-last code point of s - * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff - * </pre> - * - * @param index input code unit index - * @param delta (signed) code point count to move the index forward or backward - * in the string - * @return the resulting code unit index - * @stable ICU 2.0 - */ - int32_t moveIndex32(int32_t index, int32_t delta) const; - - /* Substring extraction */ - - /** - * Copy the characters in the range - * [<tt>start</tt>, <tt>start + length</tt>) into the array <tt>dst</tt>, - * beginning at <tt>dstStart</tt>. - * If the string aliases to <code>dst</code> itself as an external buffer, - * then extract() will not copy the contents. - * - * @param start offset of first character which will be copied into the array - * @param length the number of characters to extract - * @param dst array in which to copy characters. The length of <tt>dst</tt> - * must be at least (<tt>dstStart + length</tt>). - * @param dstStart the offset in <TT>dst</TT> where the first character - * will be extracted - * @stable ICU 2.0 - */ - inline void extract(int32_t start, - int32_t length, - UChar *dst, - int32_t dstStart = 0) const; - - /** - * Copy the contents of the string into dest. - * This is a convenience function that - * checks if there is enough space in dest, - * extracts the entire string if possible, - * and NUL-terminates dest if possible. - * - * If the string fits into dest but cannot be NUL-terminated - * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING. - * If the string itself does not fit into dest - * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR. - * - * If the string aliases to <code>dest</code> itself as an external buffer, - * then extract() will not copy the contents. - * - * @param dest Destination string buffer. - * @param destCapacity Number of UChars available at dest. - * @param errorCode ICU error code. - * @return length() - * @stable ICU 2.0 - */ - int32_t - extract(UChar *dest, int32_t destCapacity, - UErrorCode &errorCode) const; - - /** - * Copy the characters in the range - * [<tt>start</tt>, <tt>start + length</tt>) into the UnicodeString - * <tt>target</tt>. - * @param start offset of first character which will be copied - * @param length the number of characters to extract - * @param target UnicodeString into which to copy characters. - * @return A reference to <TT>target</TT> - * @stable ICU 2.0 - */ - inline void extract(int32_t start, - int32_t length, - UnicodeString& target) const; - - /** - * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>) - * into the array <tt>dst</tt>, beginning at <tt>dstStart</tt>. - * @param start offset of first character which will be copied into the array - * @param limit offset immediately following the last character to be copied - * @param dst array in which to copy characters. The length of <tt>dst</tt> - * must be at least (<tt>dstStart + (limit - start)</tt>). - * @param dstStart the offset in <TT>dst</TT> where the first character - * will be extracted - * @stable ICU 2.0 - */ - inline void extractBetween(int32_t start, - int32_t limit, - UChar *dst, - int32_t dstStart = 0) const; - - /** - * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>) - * into the UnicodeString <tt>target</tt>. Replaceable API. - * @param start offset of first character which will be copied - * @param limit offset immediately following the last character to be copied - * @param target UnicodeString into which to copy characters. - * @return A reference to <TT>target</TT> - * @stable ICU 2.0 - */ - virtual void extractBetween(int32_t start, - int32_t limit, - UnicodeString& target) const; - - /** - * Copy the characters in the range - * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters. - * All characters must be invariant (see utypes.h). - * Use US_INV as the last, signature-distinguishing parameter. - * - * This function does not write any more than <code>targetLength</code> - * characters but returns the length of the entire output string - * so that one can allocate a larger buffer and call the function again - * if necessary. - * The output string is NUL-terminated if possible. - * - * @param start offset of first character which will be copied - * @param startLength the number of characters to extract - * @param target the target buffer for extraction, can be NULL - * if targetLength is 0 - * @param targetCapacity the length of the target buffer - * @param inv Signature-distinguishing paramater, use US_INV. - * @return the output string length, not including the terminating NUL - * @stable ICU 3.2 - */ - int32_t extract(int32_t start, - int32_t startLength, - char *target, - int32_t targetCapacity, - enum EInvariant inv) const; - -#if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION - - /** - * Copy the characters in the range - * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters - * in the platform's default codepage. - * This function does not write any more than <code>targetLength</code> - * characters but returns the length of the entire output string - * so that one can allocate a larger buffer and call the function again - * if necessary. - * The output string is NUL-terminated if possible. - * - * @param start offset of first character which will be copied - * @param startLength the number of characters to extract - * @param target the target buffer for extraction - * @param targetLength the length of the target buffer - * If <TT>target</TT> is NULL, then the number of bytes required for - * <TT>target</TT> is returned. - * @return the output string length, not including the terminating NUL - * @stable ICU 2.0 - */ - int32_t extract(int32_t start, - int32_t startLength, - char *target, - uint32_t targetLength) const; - -#endif - -#if !UCONFIG_NO_CONVERSION - - /** - * Copy the characters in the range - * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters - * in a specified codepage. - * The output string is NUL-terminated. - * - * Recommendation: For invariant-character strings use - * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const - * because it avoids object code dependencies of UnicodeString on - * the conversion code. - * - * @param start offset of first character which will be copied - * @param startLength the number of characters to extract - * @param target the target buffer for extraction - * @param codepage the desired codepage for the characters. 0 has - * the special meaning of the default codepage - * If <code>codepage</code> is an empty string (<code>""</code>), - * then a simple conversion is performed on the codepage-invariant - * subset ("invariant characters") of the platform encoding. See utypes.h. - * If <TT>target</TT> is NULL, then the number of bytes required for - * <TT>target</TT> is returned. It is assumed that the target is big enough - * to fit all of the characters. - * @return the output string length, not including the terminating NUL - * @stable ICU 2.0 - */ - inline int32_t extract(int32_t start, - int32_t startLength, - char *target, - const char *codepage = 0) const; - - /** - * Copy the characters in the range - * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters - * in a specified codepage. - * This function does not write any more than <code>targetLength</code> - * characters but returns the length of the entire output string - * so that one can allocate a larger buffer and call the function again - * if necessary. - * The output string is NUL-terminated if possible. - * - * Recommendation: For invariant-character strings use - * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const - * because it avoids object code dependencies of UnicodeString on - * the conversion code. - * - * @param start offset of first character which will be copied - * @param startLength the number of characters to extract - * @param target the target buffer for extraction - * @param targetLength the length of the target buffer - * @param codepage the desired codepage for the characters. 0 has - * the special meaning of the default codepage - * If <code>codepage</code> is an empty string (<code>""</code>), - * then a simple conversion is performed on the codepage-invariant - * subset ("invariant characters") of the platform encoding. See utypes.h. - * If <TT>target</TT> is NULL, then the number of bytes required for - * <TT>target</TT> is returned. - * @return the output string length, not including the terminating NUL - * @stable ICU 2.0 - */ - int32_t extract(int32_t start, - int32_t startLength, - char *target, - uint32_t targetLength, - const char *codepage) const; - - /** - * Convert the UnicodeString into a codepage string using an existing UConverter. - * The output string is NUL-terminated if possible. - * - * This function avoids the overhead of opening and closing a converter if - * multiple strings are extracted. - * - * @param dest destination string buffer, can be NULL if destCapacity==0 - * @param destCapacity the number of chars available at dest - * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called), - * or NULL for the default converter - * @param errorCode normal ICU error code - * @return the length of the output string, not counting the terminating NUL; - * if the length is greater than destCapacity, then the string will not fit - * and a buffer of the indicated length would need to be passed in - * @stable ICU 2.0 - */ - int32_t extract(char *dest, int32_t destCapacity, - UConverter *cnv, - UErrorCode &errorCode) const; - -#endif - - /** - * Create a temporary substring for the specified range. - * Unlike the substring constructor and setTo() functions, - * the object returned here will be a read-only alias (using getBuffer()) - * rather than copying the text. - * As a result, this substring operation is much faster but requires - * that the original string not be modified or deleted during the lifetime - * of the returned substring object. - * @param start offset of the first character visible in the substring - * @param length length of the substring - * @return a read-only alias UnicodeString object for the substring - * @stable ICU 4.4 - */ - UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const; - - /** - * Create a temporary substring for the specified range. - * Same as tempSubString(start, length) except that the substring range - * is specified as a (start, limit) pair (with an exclusive limit index) - * rather than a (start, length) pair. - * @param start offset of the first character visible in the substring - * @param limit offset immediately following the last character visible in the substring - * @return a read-only alias UnicodeString object for the substring - * @stable ICU 4.4 - */ - inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const; - - /** - * Convert the UnicodeString to UTF-8 and write the result - * to a ByteSink. This is called by toUTF8String(). - * Unpaired surrogates are replaced with U+FFFD. - * Calls u_strToUTF8WithSub(). - * - * @param sink A ByteSink to which the UTF-8 version of the string is written. - * sink.Flush() is called at the end. - * @stable ICU 4.2 - * @see toUTF8String - */ - void toUTF8(ByteSink &sink) const; - -#if U_HAVE_STD_STRING - - /** - * Convert the UnicodeString to UTF-8 and append the result - * to a standard string. - * Unpaired surrogates are replaced with U+FFFD. - * Calls toUTF8(). - * - * @param result A standard string (or a compatible object) - * to which the UTF-8 version of the string is appended. - * @return The string object. - * @stable ICU 4.2 - * @see toUTF8 - */ - template<typename StringClass> - StringClass &toUTF8String(StringClass &result) const { - StringByteSink<StringClass> sbs(&result); - toUTF8(sbs); - return result; - } - -#endif - - /** - * Convert the UnicodeString to UTF-32. - * Unpaired surrogates are replaced with U+FFFD. - * Calls u_strToUTF32WithSub(). - * - * @param utf32 destination string buffer, can be NULL if capacity==0 - * @param capacity the number of UChar32s available at utf32 - * @param errorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return The length of the UTF-32 string. - * @see fromUTF32 - * @stable ICU 4.2 - */ - int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const; - - /* Length operations */ - - /** - * Return the length of the UnicodeString object. - * The length is the number of UChar code units are in the UnicodeString. - * If you want the number of code points, please use countChar32(). - * @return the length of the UnicodeString object - * @see countChar32 - * @stable ICU 2.0 - */ - inline int32_t length(void) const; - - /** - * Count Unicode code points in the length UChar code units of the string. - * A code point may occupy either one or two UChar code units. - * Counting code points involves reading all code units. - * - * This functions is basically the inverse of moveIndex32(). - * - * @param start the index of the first code unit to check - * @param length the number of UChar code units to check - * @return the number of code points in the specified code units - * @see length - * @stable ICU 2.0 - */ - int32_t - countChar32(int32_t start=0, int32_t length=INT32_MAX) const; - - /** - * Check if the length UChar code units of the string - * contain more Unicode code points than a certain number. - * This is more efficient than counting all code points in this part of the string - * and comparing that number with a threshold. - * This function may not need to scan the string at all if the length - * falls within a certain range, and - * never needs to count more than 'number+1' code points. - * Logically equivalent to (countChar32(start, length)>number). - * A Unicode code point may occupy either one or two UChar code units. - * - * @param start the index of the first code unit to check (0 for the entire string) - * @param length the number of UChar code units to check - * (use INT32_MAX for the entire string; remember that start/length - * values are pinned) - * @param number The number of code points in the (sub)string is compared against - * the 'number' parameter. - * @return Boolean value for whether the string contains more Unicode code points - * than 'number'. Same as (u_countChar32(s, length)>number). - * @see countChar32 - * @see u_strHasMoreChar32Than - * @stable ICU 2.4 - */ - UBool - hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const; - - /** - * Determine if this string is empty. - * @return TRUE if this string contains 0 characters, FALSE otherwise. - * @stable ICU 2.0 - */ - inline UBool isEmpty(void) const; - - /** - * Return the capacity of the internal buffer of the UnicodeString object. - * This is useful together with the getBuffer functions. - * See there for details. - * - * @return the number of UChars available in the internal buffer - * @see getBuffer - * @stable ICU 2.0 - */ - inline int32_t getCapacity(void) const; - - /* Other operations */ - - /** - * Generate a hash code for this object. - * @return The hash code of this UnicodeString. - * @stable ICU 2.0 - */ - inline int32_t hashCode(void) const; - - /** - * Determine if this object contains a valid string. - * A bogus string has no value. It is different from an empty string, - * although in both cases isEmpty() returns TRUE and length() returns 0. - * setToBogus() and isBogus() can be used to indicate that no string value is available. - * For a bogus string, getBuffer() and getTerminatedBuffer() return NULL, and - * length() returns 0. - * - * @return TRUE if the string is bogus/invalid, FALSE otherwise - * @see setToBogus() - * @stable ICU 2.0 - */ - inline UBool isBogus(void) const; - - - //======================================== - // Write operations - //======================================== - - /* Assignment operations */ - - /** - * Assignment operator. Replace the characters in this UnicodeString - * with the characters from <TT>srcText</TT>. - * @param srcText The text containing the characters to replace - * @return a reference to this - * @stable ICU 2.0 - */ - UnicodeString &operator=(const UnicodeString &srcText); - - /** - * Almost the same as the assignment operator. - * Replace the characters in this UnicodeString - * with the characters from <code>srcText</code>. - * - * This function works the same as the assignment operator - * for all strings except for ones that are readonly aliases. - * - * Starting with ICU 2.4, the assignment operator and the copy constructor - * allocate a new buffer and copy the buffer contents even for readonly aliases. - * This function implements the old, more efficient but less safe behavior - * of making this string also a readonly alias to the same buffer. - * - * The fastCopyFrom function must be used only if it is known that the lifetime of - * this UnicodeString does not exceed the lifetime of the aliased buffer - * including its contents, for example for strings from resource bundles - * or aliases to string constants. - * - * @param src The text containing the characters to replace. - * @return a reference to this - * @stable ICU 2.4 - */ - UnicodeString &fastCopyFrom(const UnicodeString &src); - - /** - * Assignment operator. Replace the characters in this UnicodeString - * with the code unit <TT>ch</TT>. - * @param ch the code unit to replace - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& operator= (UChar ch); - - /** - * Assignment operator. Replace the characters in this UnicodeString - * with the code point <TT>ch</TT>. - * @param ch the code point to replace - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& operator= (UChar32 ch); - - /** - * Set the text in the UnicodeString object to the characters - * in <TT>srcText</TT> in the range - * [<TT>srcStart</TT>, <TT>srcText.length()</TT>). - * <TT>srcText</TT> is not modified. - * @param srcText the source for the new characters - * @param srcStart the offset into <TT>srcText</TT> where new characters - * will be obtained - * @return a reference to this - * @stable ICU 2.2 - */ - inline UnicodeString& setTo(const UnicodeString& srcText, - int32_t srcStart); - - /** - * Set the text in the UnicodeString object to the characters - * in <TT>srcText</TT> in the range - * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). - * <TT>srcText</TT> is not modified. - * @param srcText the source for the new characters - * @param srcStart the offset into <TT>srcText</TT> where new characters - * will be obtained - * @param srcLength the number of characters in <TT>srcText</TT> in the - * replace string. - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& setTo(const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLength); - - /** - * Set the text in the UnicodeString object to the characters in - * <TT>srcText</TT>. - * <TT>srcText</TT> is not modified. - * @param srcText the source for the new characters - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& setTo(const UnicodeString& srcText); - - /** - * Set the characters in the UnicodeString object to the characters - * in <TT>srcChars</TT>. <TT>srcChars</TT> is not modified. - * @param srcChars the source for the new characters - * @param srcLength the number of Unicode characters in srcChars. - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& setTo(const UChar *srcChars, - int32_t srcLength); - - /** - * Set the characters in the UnicodeString object to the code unit - * <TT>srcChar</TT>. - * @param srcChar the code unit which becomes the UnicodeString's character - * content - * @return a reference to this - * @stable ICU 2.0 - */ - UnicodeString& setTo(UChar srcChar); - - /** - * Set the characters in the UnicodeString object to the code point - * <TT>srcChar</TT>. - * @param srcChar the code point which becomes the UnicodeString's character - * content - * @return a reference to this - * @stable ICU 2.0 - */ - UnicodeString& setTo(UChar32 srcChar); - - /** - * Aliasing setTo() function, analogous to the readonly-aliasing UChar* constructor. - * The text will be used for the UnicodeString object, but - * it will not be released when the UnicodeString is destroyed. - * This has copy-on-write semantics: - * When the string is modified, then the buffer is first copied into - * newly allocated memory. - * The aliased buffer is never modified. - * - * In an assignment to another UnicodeString, when using the copy constructor - * or the assignment operator, the text will be copied. - * When using fastCopyFrom(), the text will be aliased again, - * so that both strings then alias the same readonly-text. - * - * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated. - * This must be true if <code>textLength==-1</code>. - * @param text The characters to alias for the UnicodeString. - * @param textLength The number of Unicode characters in <code>text</code> to alias. - * If -1, then this constructor will determine the length - * by calling <code>u_strlen()</code>. - * @return a reference to this - * @stable ICU 2.0 - */ - UnicodeString &setTo(UBool isTerminated, - const UChar *text, - int32_t textLength); - - /** - * Aliasing setTo() function, analogous to the writable-aliasing UChar* constructor. - * The text will be used for the UnicodeString object, but - * it will not be released when the UnicodeString is destroyed. - * This has write-through semantics: - * For as long as the capacity of the buffer is sufficient, write operations - * will directly affect the buffer. When more capacity is necessary, then - * a new buffer will be allocated and the contents copied as with regularly - * constructed strings. - * In an assignment to another UnicodeString, the buffer will be copied. - * The extract(UChar *dst) function detects whether the dst pointer is the same - * as the string buffer itself and will in this case not copy the contents. - * - * @param buffer The characters to alias for the UnicodeString. - * @param buffLength The number of Unicode characters in <code>buffer</code> to alias. - * @param buffCapacity The size of <code>buffer</code> in UChars. - * @return a reference to this - * @stable ICU 2.0 - */ - UnicodeString &setTo(UChar *buffer, - int32_t buffLength, - int32_t buffCapacity); - - /** - * Make this UnicodeString object invalid. - * The string will test TRUE with isBogus(). - * - * A bogus string has no value. It is different from an empty string. - * It can be used to indicate that no string value is available. - * getBuffer() and getTerminatedBuffer() return NULL, and - * length() returns 0. - * - * This utility function is used throughout the UnicodeString - * implementation to indicate that a UnicodeString operation failed, - * and may be used in other functions, - * especially but not exclusively when such functions do not - * take a UErrorCode for simplicity. - * - * The following methods, and no others, will clear a string object's bogus flag: - * - remove() - * - remove(0, INT32_MAX) - * - truncate(0) - * - operator=() (assignment operator) - * - setTo(...) - * - * The simplest ways to turn a bogus string into an empty one - * is to use the remove() function. - * Examples for other functions that are equivalent to "set to empty string": - * \code - * if(s.isBogus()) { - * s.remove(); // set to an empty string (remove all), or - * s.remove(0, INT32_MAX); // set to an empty string (remove all), or - * s.truncate(0); // set to an empty string (complete truncation), or - * s=UnicodeString(); // assign an empty string, or - * s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or - * static const UChar nul=0; - * s.setTo(&nul, 0); // set to an empty C Unicode string - * } - * \endcode - * - * @see isBogus() - * @stable ICU 2.0 - */ - void setToBogus(); - - /** - * Set the character at the specified offset to the specified character. - * @param offset A valid offset into the text of the character to set - * @param ch The new character - * @return A reference to this - * @stable ICU 2.0 - */ - UnicodeString& setCharAt(int32_t offset, - UChar ch); - - - /* Append operations */ - - /** - * Append operator. Append the code unit <TT>ch</TT> to the UnicodeString - * object. - * @param ch the code unit to be appended - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& operator+= (UChar ch); - - /** - * Append operator. Append the code point <TT>ch</TT> to the UnicodeString - * object. - * @param ch the code point to be appended - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& operator+= (UChar32 ch); - - /** - * Append operator. Append the characters in <TT>srcText</TT> to the - * UnicodeString object. <TT>srcText</TT> is not modified. - * @param srcText the source for the new characters - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& operator+= (const UnicodeString& srcText); - - /** - * Append the characters - * in <TT>srcText</TT> in the range - * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the - * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT> - * is not modified. - * @param srcText the source for the new characters - * @param srcStart the offset into <TT>srcText</TT> where new characters - * will be obtained - * @param srcLength the number of characters in <TT>srcText</TT> in - * the append string - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& append(const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLength); - - /** - * Append the characters in <TT>srcText</TT> to the UnicodeString object. - * <TT>srcText</TT> is not modified. - * @param srcText the source for the new characters - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& append(const UnicodeString& srcText); - - /** - * Append the characters in <TT>srcChars</TT> in the range - * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the UnicodeString - * object at offset - * <TT>start</TT>. <TT>srcChars</TT> is not modified. - * @param srcChars the source for the new characters - * @param srcStart the offset into <TT>srcChars</TT> where new characters - * will be obtained - * @param srcLength the number of characters in <TT>srcChars</TT> in - * the append string; can be -1 if <TT>srcChars</TT> is NUL-terminated - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& append(const UChar *srcChars, - int32_t srcStart, - int32_t srcLength); - - /** - * Append the characters in <TT>srcChars</TT> to the UnicodeString object - * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified. - * @param srcChars the source for the new characters - * @param srcLength the number of Unicode characters in <TT>srcChars</TT>; - * can be -1 if <TT>srcChars</TT> is NUL-terminated - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& append(const UChar *srcChars, - int32_t srcLength); - - /** - * Append the code unit <TT>srcChar</TT> to the UnicodeString object. - * @param srcChar the code unit to append - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& append(UChar srcChar); - - /** - * Append the code point <TT>srcChar</TT> to the UnicodeString object. - * @param srcChar the code point to append - * @return a reference to this - * @stable ICU 2.0 - */ - UnicodeString& append(UChar32 srcChar); - - - /* Insert operations */ - - /** - * Insert the characters in <TT>srcText</TT> in the range - * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString - * object at offset <TT>start</TT>. <TT>srcText</TT> is not modified. - * @param start the offset where the insertion begins - * @param srcText the source for the new characters - * @param srcStart the offset into <TT>srcText</TT> where new characters - * will be obtained - * @param srcLength the number of characters in <TT>srcText</TT> in - * the insert string - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& insert(int32_t start, - const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLength); - - /** - * Insert the characters in <TT>srcText</TT> into the UnicodeString object - * at offset <TT>start</TT>. <TT>srcText</TT> is not modified. - * @param start the offset where the insertion begins - * @param srcText the source for the new characters - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& insert(int32_t start, - const UnicodeString& srcText); - - /** - * Insert the characters in <TT>srcChars</TT> in the range - * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString - * object at offset <TT>start</TT>. <TT>srcChars</TT> is not modified. - * @param start the offset at which the insertion begins - * @param srcChars the source for the new characters - * @param srcStart the offset into <TT>srcChars</TT> where new characters - * will be obtained - * @param srcLength the number of characters in <TT>srcChars</TT> - * in the insert string - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& insert(int32_t start, - const UChar *srcChars, - int32_t srcStart, - int32_t srcLength); - - /** - * Insert the characters in <TT>srcChars</TT> into the UnicodeString object - * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified. - * @param start the offset where the insertion begins - * @param srcChars the source for the new characters - * @param srcLength the number of Unicode characters in srcChars. - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& insert(int32_t start, - const UChar *srcChars, - int32_t srcLength); - - /** - * Insert the code unit <TT>srcChar</TT> into the UnicodeString object at - * offset <TT>start</TT>. - * @param start the offset at which the insertion occurs - * @param srcChar the code unit to insert - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& insert(int32_t start, - UChar srcChar); - - /** - * Insert the code point <TT>srcChar</TT> into the UnicodeString object at - * offset <TT>start</TT>. - * @param start the offset at which the insertion occurs - * @param srcChar the code point to insert - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& insert(int32_t start, - UChar32 srcChar); - - - /* Replace operations */ - - /** - * Replace the characters in the range - * [<TT>start</TT>, <TT>start + length</TT>) with the characters in - * <TT>srcText</TT> in the range - * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). - * <TT>srcText</TT> is not modified. - * @param start the offset at which the replace operation begins - * @param length the number of characters to replace. The character at - * <TT>start + length</TT> is not modified. - * @param srcText the source for the new characters - * @param srcStart the offset into <TT>srcText</TT> where new characters - * will be obtained - * @param srcLength the number of characters in <TT>srcText</TT> in - * the replace string - * @return a reference to this - * @stable ICU 2.0 - */ - UnicodeString& replace(int32_t start, - int32_t length, - const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLength); - - /** - * Replace the characters in the range - * [<TT>start</TT>, <TT>start + length</TT>) - * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is - * not modified. - * @param start the offset at which the replace operation begins - * @param length the number of characters to replace. The character at - * <TT>start + length</TT> is not modified. - * @param srcText the source for the new characters - * @return a reference to this - * @stable ICU 2.0 - */ - UnicodeString& replace(int32_t start, - int32_t length, - const UnicodeString& srcText); - - /** - * Replace the characters in the range - * [<TT>start</TT>, <TT>start + length</TT>) with the characters in - * <TT>srcChars</TT> in the range - * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). <TT>srcChars</TT> - * is not modified. - * @param start the offset at which the replace operation begins - * @param length the number of characters to replace. The character at - * <TT>start + length</TT> is not modified. - * @param srcChars the source for the new characters - * @param srcStart the offset into <TT>srcChars</TT> where new characters - * will be obtained - * @param srcLength the number of characters in <TT>srcChars</TT> - * in the replace string - * @return a reference to this - * @stable ICU 2.0 - */ - UnicodeString& replace(int32_t start, - int32_t length, - const UChar *srcChars, - int32_t srcStart, - int32_t srcLength); - - /** - * Replace the characters in the range - * [<TT>start</TT>, <TT>start + length</TT>) with the characters in - * <TT>srcChars</TT>. <TT>srcChars</TT> is not modified. - * @param start the offset at which the replace operation begins - * @param length number of characters to replace. The character at - * <TT>start + length</TT> is not modified. - * @param srcChars the source for the new characters - * @param srcLength the number of Unicode characters in srcChars - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& replace(int32_t start, - int32_t length, - const UChar *srcChars, - int32_t srcLength); - - /** - * Replace the characters in the range - * [<TT>start</TT>, <TT>start + length</TT>) with the code unit - * <TT>srcChar</TT>. - * @param start the offset at which the replace operation begins - * @param length the number of characters to replace. The character at - * <TT>start + length</TT> is not modified. - * @param srcChar the new code unit - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& replace(int32_t start, - int32_t length, - UChar srcChar); - - /** - * Replace the characters in the range - * [<TT>start</TT>, <TT>start + length</TT>) with the code point - * <TT>srcChar</TT>. - * @param start the offset at which the replace operation begins - * @param length the number of characters to replace. The character at - * <TT>start + length</TT> is not modified. - * @param srcChar the new code point - * @return a reference to this - * @stable ICU 2.0 - */ - UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar); - - /** - * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>) - * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is not modified. - * @param start the offset at which the replace operation begins - * @param limit the offset immediately following the replace range - * @param srcText the source for the new characters - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& replaceBetween(int32_t start, - int32_t limit, - const UnicodeString& srcText); - - /** - * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>) - * with the characters in <TT>srcText</TT> in the range - * [<TT>srcStart</TT>, <TT>srcLimit</TT>). <TT>srcText</TT> is not modified. - * @param start the offset at which the replace operation begins - * @param limit the offset immediately following the replace range - * @param srcText the source for the new characters - * @param srcStart the offset into <TT>srcChars</TT> where new characters - * will be obtained - * @param srcLimit the offset immediately following the range to copy - * in <TT>srcText</TT> - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& replaceBetween(int32_t start, - int32_t limit, - const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLimit); - - /** - * Replace a substring of this object with the given text. - * @param start the beginning index, inclusive; <code>0 <= start - * <= limit</code>. - * @param limit the ending index, exclusive; <code>start <= limit - * <= length()</code>. - * @param text the text to replace characters <code>start</code> - * to <code>limit - 1</code> - * @stable ICU 2.0 - */ - virtual void handleReplaceBetween(int32_t start, - int32_t limit, - const UnicodeString& text); - - /** - * Replaceable API - * @return TRUE if it has MetaData - * @stable ICU 2.4 - */ - virtual UBool hasMetaData() const; - - /** - * Copy a substring of this object, retaining attribute (out-of-band) - * information. This method is used to duplicate or reorder substrings. - * The destination index must not overlap the source range. - * - * @param start the beginning index, inclusive; <code>0 <= start <= - * limit</code>. - * @param limit the ending index, exclusive; <code>start <= limit <= - * length()</code>. - * @param dest the destination index. The characters from - * <code>start..limit-1</code> will be copied to <code>dest</code>. - * Implementations of this method may assume that <code>dest <= start || - * dest >= limit</code>. - * @stable ICU 2.0 - */ - virtual void copy(int32_t start, int32_t limit, int32_t dest); - - /* Search and replace operations */ - - /** - * Replace all occurrences of characters in oldText with the characters - * in newText - * @param oldText the text containing the search text - * @param newText the text containing the replacement text - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& findAndReplace(const UnicodeString& oldText, - const UnicodeString& newText); - - /** - * Replace all occurrences of characters in oldText with characters - * in newText - * in the range [<TT>start</TT>, <TT>start + length</TT>). - * @param start the start of the range in which replace will performed - * @param length the length of the range in which replace will be performed - * @param oldText the text containing the search text - * @param newText the text containing the replacement text - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& findAndReplace(int32_t start, - int32_t length, - const UnicodeString& oldText, - const UnicodeString& newText); - - /** - * Replace all occurrences of characters in oldText in the range - * [<TT>oldStart</TT>, <TT>oldStart + oldLength</TT>) with the characters - * in newText in the range - * [<TT>newStart</TT>, <TT>newStart + newLength</TT>) - * in the range [<TT>start</TT>, <TT>start + length</TT>). - * @param start the start of the range in which replace will performed - * @param length the length of the range in which replace will be performed - * @param oldText the text containing the search text - * @param oldStart the start of the search range in <TT>oldText</TT> - * @param oldLength the length of the search range in <TT>oldText</TT> - * @param newText the text containing the replacement text - * @param newStart the start of the replacement range in <TT>newText</TT> - * @param newLength the length of the replacement range in <TT>newText</TT> - * @return a reference to this - * @stable ICU 2.0 - */ - UnicodeString& findAndReplace(int32_t start, - int32_t length, - const UnicodeString& oldText, - int32_t oldStart, - int32_t oldLength, - const UnicodeString& newText, - int32_t newStart, - int32_t newLength); - - - /* Remove operations */ - - /** - * Remove all characters from the UnicodeString object. - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& remove(void); - - /** - * Remove the characters in the range - * [<TT>start</TT>, <TT>start + length</TT>) from the UnicodeString object. - * @param start the offset of the first character to remove - * @param length the number of characters to remove - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& remove(int32_t start, - int32_t length = (int32_t)INT32_MAX); - - /** - * Remove the characters in the range - * [<TT>start</TT>, <TT>limit</TT>) from the UnicodeString object. - * @param start the offset of the first character to remove - * @param limit the offset immediately following the range to remove - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& removeBetween(int32_t start, - int32_t limit = (int32_t)INT32_MAX); - - /** - * Retain only the characters in the range - * [<code>start</code>, <code>limit</code>) from the UnicodeString object. - * Removes characters before <code>start</code> and at and after <code>limit</code>. - * @param start the offset of the first character to retain - * @param limit the offset immediately following the range to retain - * @return a reference to this - * @stable ICU 4.4 - */ - inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX); - - /* Length operations */ - - /** - * Pad the start of this UnicodeString with the character <TT>padChar</TT>. - * If the length of this UnicodeString is less than targetLength, - * length() - targetLength copies of padChar will be added to the - * beginning of this UnicodeString. - * @param targetLength the desired length of the string - * @param padChar the character to use for padding. Defaults to - * space (U+0020) - * @return TRUE if the text was padded, FALSE otherwise. - * @stable ICU 2.0 - */ - UBool padLeading(int32_t targetLength, - UChar padChar = 0x0020); - - /** - * Pad the end of this UnicodeString with the character <TT>padChar</TT>. - * If the length of this UnicodeString is less than targetLength, - * length() - targetLength copies of padChar will be added to the - * end of this UnicodeString. - * @param targetLength the desired length of the string - * @param padChar the character to use for padding. Defaults to - * space (U+0020) - * @return TRUE if the text was padded, FALSE otherwise. - * @stable ICU 2.0 - */ - UBool padTrailing(int32_t targetLength, - UChar padChar = 0x0020); - - /** - * Truncate this UnicodeString to the <TT>targetLength</TT>. - * @param targetLength the desired length of this UnicodeString. - * @return TRUE if the text was truncated, FALSE otherwise - * @stable ICU 2.0 - */ - inline UBool truncate(int32_t targetLength); - - /** - * Trims leading and trailing whitespace from this UnicodeString. - * @return a reference to this - * @stable ICU 2.0 - */ - UnicodeString& trim(void); - - - /* Miscellaneous operations */ - - /** - * Reverse this UnicodeString in place. - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& reverse(void); - - /** - * Reverse the range [<TT>start</TT>, <TT>start + length</TT>) in - * this UnicodeString. - * @param start the start of the range to reverse - * @param length the number of characters to to reverse - * @return a reference to this - * @stable ICU 2.0 - */ - inline UnicodeString& reverse(int32_t start, - int32_t length); - - /** - * Convert the characters in this to UPPER CASE following the conventions of - * the default locale. - * @return A reference to this. - * @stable ICU 2.0 - */ - UnicodeString& toUpper(void); - - /** - * Convert the characters in this to UPPER CASE following the conventions of - * a specific locale. - * @param locale The locale containing the conventions to use. - * @return A reference to this. - * @stable ICU 2.0 - */ - UnicodeString& toUpper(const Locale& locale); - - /** - * Convert the characters in this to lower case following the conventions of - * the default locale. - * @return A reference to this. - * @stable ICU 2.0 - */ - UnicodeString& toLower(void); - - /** - * Convert the characters in this to lower case following the conventions of - * a specific locale. - * @param locale The locale containing the conventions to use. - * @return A reference to this. - * @stable ICU 2.0 - */ - UnicodeString& toLower(const Locale& locale); - -#if !UCONFIG_NO_BREAK_ITERATION - - /** - * Titlecase this string, convenience function using the default locale. - * - * Casing is locale-dependent and context-sensitive. - * Titlecasing uses a break iterator to find the first characters of words - * that are to be titlecased. It titlecases those characters and lowercases - * all others. - * - * The titlecase break iterator can be provided to customize for arbitrary - * styles, using rules and dictionaries beyond the standard iterators. - * It may be more efficient to always provide an iterator to avoid - * opening and closing one for each string. - * The standard titlecase iterator for the root locale implements the - * algorithm of Unicode TR 21. - * - * This function uses only the setText(), first() and next() methods of the - * provided break iterator. - * - * @param titleIter A break iterator to find the first characters of words - * that are to be titlecased. - * If none is provided (0), then a standard titlecase - * break iterator is opened. - * Otherwise the provided iterator is set to the string's text. - * @return A reference to this. - * @stable ICU 2.1 - */ - UnicodeString &toTitle(BreakIterator *titleIter); - - /** - * Titlecase this string. - * - * Casing is locale-dependent and context-sensitive. - * Titlecasing uses a break iterator to find the first characters of words - * that are to be titlecased. It titlecases those characters and lowercases - * all others. - * - * The titlecase break iterator can be provided to customize for arbitrary - * styles, using rules and dictionaries beyond the standard iterators. - * It may be more efficient to always provide an iterator to avoid - * opening and closing one for each string. - * The standard titlecase iterator for the root locale implements the - * algorithm of Unicode TR 21. - * - * This function uses only the setText(), first() and next() methods of the - * provided break iterator. - * - * @param titleIter A break iterator to find the first characters of words - * that are to be titlecased. - * If none is provided (0), then a standard titlecase - * break iterator is opened. - * Otherwise the provided iterator is set to the string's text. - * @param locale The locale to consider. - * @return A reference to this. - * @stable ICU 2.1 - */ - UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale); - - /** - * Titlecase this string, with options. - * - * Casing is locale-dependent and context-sensitive. - * Titlecasing uses a break iterator to find the first characters of words - * that are to be titlecased. It titlecases those characters and lowercases - * all others. (This can be modified with options.) - * - * The titlecase break iterator can be provided to customize for arbitrary - * styles, using rules and dictionaries beyond the standard iterators. - * It may be more efficient to always provide an iterator to avoid - * opening and closing one for each string. - * The standard titlecase iterator for the root locale implements the - * algorithm of Unicode TR 21. - * - * This function uses only the setText(), first() and next() methods of the - * provided break iterator. - * - * @param titleIter A break iterator to find the first characters of words - * that are to be titlecased. - * If none is provided (0), then a standard titlecase - * break iterator is opened. - * Otherwise the provided iterator is set to the string's text. - * @param locale The locale to consider. - * @param options Options bit set, see ucasemap_open(). - * @return A reference to this. - * @see U_TITLECASE_NO_LOWERCASE - * @see U_TITLECASE_NO_BREAK_ADJUSTMENT - * @see ucasemap_open - * @stable ICU 3.8 - */ - UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options); - -#endif - - /** - * Case-folds the characters in this string. - * - * Case-folding is locale-independent and not context-sensitive, - * but there is an option for whether to include or exclude mappings for dotted I - * and dotless i that are marked with 'T' in CaseFolding.txt. - * - * The result may be longer or shorter than the original. - * - * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I - * @return A reference to this. - * @stable ICU 2.0 - */ - UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/); - - //======================================== - // Access to the internal buffer - //======================================== - - /** - * Get a read/write pointer to the internal buffer. - * The buffer is guaranteed to be large enough for at least minCapacity UChars, - * writable, and is still owned by the UnicodeString object. - * Calls to getBuffer(minCapacity) must not be nested, and - * must be matched with calls to releaseBuffer(newLength). - * If the string buffer was read-only or shared, - * then it will be reallocated and copied. - * - * An attempted nested call will return 0, and will not further modify the - * state of the UnicodeString object. - * It also returns 0 if the string is bogus. - * - * The actual capacity of the string buffer may be larger than minCapacity. - * getCapacity() returns the actual capacity. - * For many operations, the full capacity should be used to avoid reallocations. - * - * While the buffer is "open" between getBuffer(minCapacity) - * and releaseBuffer(newLength), the following applies: - * - The string length is set to 0. - * - Any read API call on the UnicodeString object will behave like on a 0-length string. - * - Any write API call on the UnicodeString object is disallowed and will have no effect. - * - You can read from and write to the returned buffer. - * - The previous string contents will still be in the buffer; - * if you want to use it, then you need to call length() before getBuffer(minCapacity). - * If the length() was greater than minCapacity, then any contents after minCapacity - * may be lost. - * The buffer contents is not NUL-terminated by getBuffer(). - * If length()<getCapacity() then you can terminate it by writing a NUL - * at index length(). - * - You must call releaseBuffer(newLength) before and in order to - * return to normal UnicodeString operation. - * - * @param minCapacity the minimum number of UChars that are to be available - * in the buffer, starting at the returned pointer; - * default to the current string capacity if minCapacity==-1 - * @return a writable pointer to the internal string buffer, - * or 0 if an error occurs (nested calls, out of memory) - * - * @see releaseBuffer - * @see getTerminatedBuffer() - * @stable ICU 2.0 - */ - UChar *getBuffer(int32_t minCapacity); - - /** - * Release a read/write buffer on a UnicodeString object with an - * "open" getBuffer(minCapacity). - * This function must be called in a matched pair with getBuffer(minCapacity). - * releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open". - * - * It will set the string length to newLength, at most to the current capacity. - * If newLength==-1 then it will set the length according to the - * first NUL in the buffer, or to the capacity if there is no NUL. - * - * After calling releaseBuffer(newLength) the UnicodeString is back to normal operation. - * - * @param newLength the new length of the UnicodeString object; - * defaults to the current capacity if newLength is greater than that; - * if newLength==-1, it defaults to u_strlen(buffer) but not more than - * the current capacity of the string - * - * @see getBuffer(int32_t minCapacity) - * @stable ICU 2.0 - */ - void releaseBuffer(int32_t newLength=-1); - - /** - * Get a read-only pointer to the internal buffer. - * This can be called at any time on a valid UnicodeString. - * - * It returns 0 if the string is bogus, or - * during an "open" getBuffer(minCapacity). - * - * It can be called as many times as desired. - * The pointer that it returns will remain valid until the UnicodeString object is modified, - * at which time the pointer is semantically invalidated and must not be used any more. - * - * The capacity of the buffer can be determined with getCapacity(). - * The part after length() may or may not be initialized and valid, - * depending on the history of the UnicodeString object. - * - * The buffer contents is (probably) not NUL-terminated. - * You can check if it is with - * <code>(s.length()<s.getCapacity() && buffer[s.length()]==0)</code>. - * (See getTerminatedBuffer().) - * - * The buffer may reside in read-only memory. Its contents must not - * be modified. - * - * @return a read-only pointer to the internal string buffer, - * or 0 if the string is empty or bogus - * - * @see getBuffer(int32_t minCapacity) - * @see getTerminatedBuffer() - * @stable ICU 2.0 - */ - inline const UChar *getBuffer() const; - - /** - * Get a read-only pointer to the internal buffer, - * making sure that it is NUL-terminated. - * This can be called at any time on a valid UnicodeString. - * - * It returns 0 if the string is bogus, or - * during an "open" getBuffer(minCapacity), or if the buffer cannot - * be NUL-terminated (because memory allocation failed). - * - * It can be called as many times as desired. - * The pointer that it returns will remain valid until the UnicodeString object is modified, - * at which time the pointer is semantically invalidated and must not be used any more. - * - * The capacity of the buffer can be determined with getCapacity(). - * The part after length()+1 may or may not be initialized and valid, - * depending on the history of the UnicodeString object. - * - * The buffer contents is guaranteed to be NUL-terminated. - * getTerminatedBuffer() may reallocate the buffer if a terminating NUL - * is written. - * For this reason, this function is not const, unlike getBuffer(). - * Note that a UnicodeString may also contain NUL characters as part of its contents. - * - * The buffer may reside in read-only memory. Its contents must not - * be modified. - * - * @return a read-only pointer to the internal string buffer, - * or 0 if the string is empty or bogus - * - * @see getBuffer(int32_t minCapacity) - * @see getBuffer() - * @stable ICU 2.2 - */ - const UChar *getTerminatedBuffer(); - - //======================================== - // Constructors - //======================================== - - /** Construct an empty UnicodeString. - * @stable ICU 2.0 - */ - inline UnicodeString(); - - /** - * Construct a UnicodeString with capacity to hold <TT>capacity</TT> UChars - * @param capacity the number of UChars this UnicodeString should hold - * before a resize is necessary; if count is greater than 0 and count - * code points c take up more space than capacity, then capacity is adjusted - * accordingly. - * @param c is used to initially fill the string - * @param count specifies how many code points c are to be written in the - * string - * @stable ICU 2.0 - */ - UnicodeString(int32_t capacity, UChar32 c, int32_t count); - - /** - * Single UChar (code unit) constructor. - * - * It is recommended to mark this constructor "explicit" by - * <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code> - * on the compiler command line or similar. - * @param ch the character to place in the UnicodeString - * @stable ICU 2.0 - */ - UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar ch); - - /** - * Single UChar32 (code point) constructor. - * - * It is recommended to mark this constructor "explicit" by - * <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code> - * on the compiler command line or similar. - * @param ch the character to place in the UnicodeString - * @stable ICU 2.0 - */ - UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch); - - /** - * UChar* constructor. - * - * It is recommended to mark this constructor "explicit" by - * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code> - * on the compiler command line or similar. - * @param text The characters to place in the UnicodeString. <TT>text</TT> - * must be NULL (U+0000) terminated. - * @stable ICU 2.0 - */ - UNISTR_FROM_STRING_EXPLICIT UnicodeString(const UChar *text); - - /** - * UChar* constructor. - * @param text The characters to place in the UnicodeString. - * @param textLength The number of Unicode characters in <TT>text</TT> - * to copy. - * @stable ICU 2.0 - */ - UnicodeString(const UChar *text, - int32_t textLength); - - /** - * Readonly-aliasing UChar* constructor. - * The text will be used for the UnicodeString object, but - * it will not be released when the UnicodeString is destroyed. - * This has copy-on-write semantics: - * When the string is modified, then the buffer is first copied into - * newly allocated memory. - * The aliased buffer is never modified. - * - * In an assignment to another UnicodeString, when using the copy constructor - * or the assignment operator, the text will be copied. - * When using fastCopyFrom(), the text will be aliased again, - * so that both strings then alias the same readonly-text. - * - * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated. - * This must be true if <code>textLength==-1</code>. - * @param text The characters to alias for the UnicodeString. - * @param textLength The number of Unicode characters in <code>text</code> to alias. - * If -1, then this constructor will determine the length - * by calling <code>u_strlen()</code>. - * @stable ICU 2.0 - */ - UnicodeString(UBool isTerminated, - const UChar *text, - int32_t textLength); - - /** - * Writable-aliasing UChar* constructor. - * The text will be used for the UnicodeString object, but - * it will not be released when the UnicodeString is destroyed. - * This has write-through semantics: - * For as long as the capacity of the buffer is sufficient, write operations - * will directly affect the buffer. When more capacity is necessary, then - * a new buffer will be allocated and the contents copied as with regularly - * constructed strings. - * In an assignment to another UnicodeString, the buffer will be copied. - * The extract(UChar *dst) function detects whether the dst pointer is the same - * as the string buffer itself and will in this case not copy the contents. - * - * @param buffer The characters to alias for the UnicodeString. - * @param buffLength The number of Unicode characters in <code>buffer</code> to alias. - * @param buffCapacity The size of <code>buffer</code> in UChars. - * @stable ICU 2.0 - */ - UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity); - -#if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION - - /** - * char* constructor. - * Uses the default converter (and thus depends on the ICU conversion code) - * unless U_CHARSET_IS_UTF8 is set to 1. - * - * For ASCII (really "invariant character") strings it is more efficient to use - * the constructor that takes a US_INV (for its enum EInvariant). - * For ASCII (invariant-character) string literals, see UNICODE_STRING and - * UNICODE_STRING_SIMPLE. - * - * It is recommended to mark this constructor "explicit" by - * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code> - * on the compiler command line or similar. - * @param codepageData an array of bytes, null-terminated, - * in the platform's default codepage. - * @stable ICU 2.0 - * @see UNICODE_STRING - * @see UNICODE_STRING_SIMPLE - */ - UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData); - - /** - * char* constructor. - * Uses the default converter (and thus depends on the ICU conversion code) - * unless U_CHARSET_IS_UTF8 is set to 1. - * @param codepageData an array of bytes in the platform's default codepage. - * @param dataLength The number of bytes in <TT>codepageData</TT>. - * @stable ICU 2.0 - */ - UnicodeString(const char *codepageData, int32_t dataLength); - -#endif - -#if !UCONFIG_NO_CONVERSION - - /** - * char* constructor. - * @param codepageData an array of bytes, null-terminated - * @param codepage the encoding of <TT>codepageData</TT>. The special - * value 0 for <TT>codepage</TT> indicates that the text is in the - * platform's default codepage. - * - * If <code>codepage</code> is an empty string (<code>""</code>), - * then a simple conversion is performed on the codepage-invariant - * subset ("invariant characters") of the platform encoding. See utypes.h. - * Recommendation: For invariant-character strings use the constructor - * UnicodeString(const char *src, int32_t length, enum EInvariant inv) - * because it avoids object code dependencies of UnicodeString on - * the conversion code. - * - * @stable ICU 2.0 - */ - UnicodeString(const char *codepageData, const char *codepage); - - /** - * char* constructor. - * @param codepageData an array of bytes. - * @param dataLength The number of bytes in <TT>codepageData</TT>. - * @param codepage the encoding of <TT>codepageData</TT>. The special - * value 0 for <TT>codepage</TT> indicates that the text is in the - * platform's default codepage. - * If <code>codepage</code> is an empty string (<code>""</code>), - * then a simple conversion is performed on the codepage-invariant - * subset ("invariant characters") of the platform encoding. See utypes.h. - * Recommendation: For invariant-character strings use the constructor - * UnicodeString(const char *src, int32_t length, enum EInvariant inv) - * because it avoids object code dependencies of UnicodeString on - * the conversion code. - * - * @stable ICU 2.0 - */ - UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage); - - /** - * char * / UConverter constructor. - * This constructor uses an existing UConverter object to - * convert the codepage string to Unicode and construct a UnicodeString - * from that. - * - * The converter is reset at first. - * If the error code indicates a failure before this constructor is called, - * or if an error occurs during conversion or construction, - * then the string will be bogus. - * - * This function avoids the overhead of opening and closing a converter if - * multiple strings are constructed. - * - * @param src input codepage string - * @param srcLength length of the input string, can be -1 for NUL-terminated strings - * @param cnv converter object (ucnv_resetToUnicode() will be called), - * can be NULL for the default converter - * @param errorCode normal ICU error code - * @stable ICU 2.0 - */ - UnicodeString( - const char *src, int32_t srcLength, - UConverter *cnv, - UErrorCode &errorCode); - -#endif - - /** - * Constructs a Unicode string from an invariant-character char * string. - * About invariant characters see utypes.h. - * This constructor has no runtime dependency on conversion code and is - * therefore recommended over ones taking a charset name string - * (where the empty string "" indicates invariant-character conversion). - * - * Use the macro US_INV as the third, signature-distinguishing parameter. - * - * For example: - * \code - * void fn(const char *s) { - * UnicodeString ustr(s, -1, US_INV); - * // use ustr ... - * } - * \endcode - * - * @param src String using only invariant characters. - * @param length Length of src, or -1 if NUL-terminated. - * @param inv Signature-distinguishing paramater, use US_INV. - * - * @see US_INV - * @stable ICU 3.2 - */ - UnicodeString(const char *src, int32_t length, enum EInvariant inv); - - - /** - * Copy constructor. - * @param that The UnicodeString object to copy. - * @stable ICU 2.0 - */ - UnicodeString(const UnicodeString& that); - - /** - * 'Substring' constructor from tail of source string. - * @param src The UnicodeString object to copy. - * @param srcStart The offset into <tt>src</tt> at which to start copying. - * @stable ICU 2.2 - */ - UnicodeString(const UnicodeString& src, int32_t srcStart); - - /** - * 'Substring' constructor from subrange of source string. - * @param src The UnicodeString object to copy. - * @param srcStart The offset into <tt>src</tt> at which to start copying. - * @param srcLength The number of characters from <tt>src</tt> to copy. - * @stable ICU 2.2 - */ - UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength); - - /** - * Clone this object, an instance of a subclass of Replaceable. - * Clones can be used concurrently in multiple threads. - * If a subclass does not implement clone(), or if an error occurs, - * then NULL is returned. - * The clone functions in all subclasses return a pointer to a Replaceable - * because some compilers do not support covariant (same-as-this) - * return types; cast to the appropriate subclass if necessary. - * The caller must delete the clone. - * - * @return a clone of this object - * - * @see Replaceable::clone - * @see getDynamicClassID - * @stable ICU 2.6 - */ - virtual Replaceable *clone() const; - - /** Destructor. - * @stable ICU 2.0 - */ - virtual ~UnicodeString(); - - /** - * Create a UnicodeString from a UTF-8 string. - * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string. - * Calls u_strFromUTF8WithSub(). - * - * @param utf8 UTF-8 input string. - * Note that a StringPiece can be implicitly constructed - * from a std::string or a NUL-terminated const char * string. - * @return A UnicodeString with equivalent UTF-16 contents. - * @see toUTF8 - * @see toUTF8String - * @stable ICU 4.2 - */ - static UnicodeString fromUTF8(const StringPiece &utf8); - - /** - * Create a UnicodeString from a UTF-32 string. - * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string. - * Calls u_strFromUTF32WithSub(). - * - * @param utf32 UTF-32 input string. Must not be NULL. - * @param length Length of the input string, or -1 if NUL-terminated. - * @return A UnicodeString with equivalent UTF-16 contents. - * @see toUTF32 - * @stable ICU 4.2 - */ - static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length); - - /* Miscellaneous operations */ - - /** - * Unescape a string of characters and return a string containing - * the result. The following escape sequences are recognized: - * - * \\uhhhh 4 hex digits; h in [0-9A-Fa-f] - * \\Uhhhhhhhh 8 hex digits - * \\xhh 1-2 hex digits - * \\ooo 1-3 octal digits; o in [0-7] - * \\cX control-X; X is masked with 0x1F - * - * as well as the standard ANSI C escapes: - * - * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A, - * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B, - * \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C - * - * Anything else following a backslash is generically escaped. For - * example, "[a\\-z]" returns "[a-z]". - * - * If an escape sequence is ill-formed, this method returns an empty - * string. An example of an ill-formed sequence is "\\u" followed by - * fewer than 4 hex digits. - * - * This function is similar to u_unescape() but not identical to it. - * The latter takes a source char*, so it does escape recognition - * and also invariant conversion. - * - * @return a string with backslash escapes interpreted, or an - * empty string on error. - * @see UnicodeString#unescapeAt() - * @see u_unescape() - * @see u_unescapeAt() - * @stable ICU 2.0 - */ - UnicodeString unescape() const; - - /** - * Unescape a single escape sequence and return the represented - * character. See unescape() for a listing of the recognized escape - * sequences. The character at offset-1 is assumed (without - * checking) to be a backslash. If the escape sequence is - * ill-formed, or the offset is out of range, U_SENTINEL=-1 is - * returned. - * - * @param offset an input output parameter. On input, it is the - * offset into this string where the escape sequence is located, - * after the initial backslash. On output, it is advanced after the - * last character parsed. On error, it is not advanced at all. - * @return the character represented by the escape sequence at - * offset, or U_SENTINEL=-1 on error. - * @see UnicodeString#unescape() - * @see u_unescape() - * @see u_unescapeAt() - * @stable ICU 2.0 - */ - UChar32 unescapeAt(int32_t &offset) const; - - /** - * ICU "poor man's RTTI", returns a UClassID for this class. - * - * @stable ICU 2.2 - */ - static UClassID U_EXPORT2 getStaticClassID(); - - /** - * ICU "poor man's RTTI", returns a UClassID for the actual class. - * - * @stable ICU 2.2 - */ - virtual UClassID getDynamicClassID() const; - - //======================================== - // Implementation methods - //======================================== - -protected: - /** - * Implement Replaceable::getLength() (see jitterbug 1027). - * @stable ICU 2.4 - */ - virtual int32_t getLength() const; - - /** - * The change in Replaceable to use virtual getCharAt() allows - * UnicodeString::charAt() to be inline again (see jitterbug 709). - * @stable ICU 2.4 - */ - virtual UChar getCharAt(int32_t offset) const; - - /** - * The change in Replaceable to use virtual getChar32At() allows - * UnicodeString::char32At() to be inline again (see jitterbug 709). - * @stable ICU 2.4 - */ - virtual UChar32 getChar32At(int32_t offset) const; - -private: - // For char* constructors. Could be made public. - UnicodeString &setToUTF8(const StringPiece &utf8); - // For extract(char*). - // We could make a toUTF8(target, capacity, errorCode) public but not - // this version: New API will be cleaner if we make callers create substrings - // rather than having start+length on every method, - // and it should take a UErrorCode&. - int32_t - toUTF8(int32_t start, int32_t len, - char *target, int32_t capacity) const; - - /** - * Internal string contents comparison, called by operator==. - * Requires: this & text not bogus and have same lengths. - */ - UBool doEquals(const UnicodeString &text, int32_t len) const; - - inline int8_t - doCompare(int32_t start, - int32_t length, - const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLength) const; - - int8_t doCompare(int32_t start, - int32_t length, - const UChar *srcChars, - int32_t srcStart, - int32_t srcLength) const; - - inline int8_t - doCompareCodePointOrder(int32_t start, - int32_t length, - const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLength) const; - - int8_t doCompareCodePointOrder(int32_t start, - int32_t length, - const UChar *srcChars, - int32_t srcStart, - int32_t srcLength) const; - - inline int8_t - doCaseCompare(int32_t start, - int32_t length, - const UnicodeString &srcText, - int32_t srcStart, - int32_t srcLength, - uint32_t options) const; - - int8_t - doCaseCompare(int32_t start, - int32_t length, - const UChar *srcChars, - int32_t srcStart, - int32_t srcLength, - uint32_t options) const; - - int32_t doIndexOf(UChar c, - int32_t start, - int32_t length) const; - - int32_t doIndexOf(UChar32 c, - int32_t start, - int32_t length) const; - - int32_t doLastIndexOf(UChar c, - int32_t start, - int32_t length) const; - - int32_t doLastIndexOf(UChar32 c, - int32_t start, - int32_t length) const; - - void doExtract(int32_t start, - int32_t length, - UChar *dst, - int32_t dstStart) const; - - inline void doExtract(int32_t start, - int32_t length, - UnicodeString& target) const; - - inline UChar doCharAt(int32_t offset) const; - - UnicodeString& doReplace(int32_t start, - int32_t length, - const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLength); - - UnicodeString& doReplace(int32_t start, - int32_t length, - const UChar *srcChars, - int32_t srcStart, - int32_t srcLength); - - UnicodeString& doReverse(int32_t start, - int32_t length); - - // calculate hash code - int32_t doHashCode(void) const; - - // get pointer to start of array - // these do not check for kOpenGetBuffer, unlike the public getBuffer() function - inline UChar* getArrayStart(void); - inline const UChar* getArrayStart(void) const; - - // A UnicodeString object (not necessarily its current buffer) - // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity). - inline UBool isWritable() const; - - // Is the current buffer writable? - inline UBool isBufferWritable() const; - - // None of the following does releaseArray(). - inline void setLength(int32_t len); // sets only fShortLength and fLength - inline void setToEmpty(); // sets fFlags=kShortString - inline void setArray(UChar *array, int32_t len, int32_t capacity); // does not set fFlags - - // allocate the array; result may be fStackBuffer - // sets refCount to 1 if appropriate - // sets fArray, fCapacity, and fFlags - // returns boolean for success or failure - UBool allocate(int32_t capacity); - - // release the array if owned - void releaseArray(void); - - // turn a bogus string into an empty one - void unBogus(); - - // implements assigment operator, copy constructor, and fastCopyFrom() - UnicodeString ©From(const UnicodeString &src, UBool fastCopy=FALSE); - - // Pin start and limit to acceptable values. - inline void pinIndex(int32_t& start) const; - inline void pinIndices(int32_t& start, - int32_t& length) const; - -#if !UCONFIG_NO_CONVERSION - - /* Internal extract() using UConverter. */ - int32_t doExtract(int32_t start, int32_t length, - char *dest, int32_t destCapacity, - UConverter *cnv, - UErrorCode &errorCode) const; - - /* - * Real constructor for converting from codepage data. - * It assumes that it is called with !fRefCounted. - * - * If <code>codepage==0</code>, then the default converter - * is used for the platform encoding. - * If <code>codepage</code> is an empty string (<code>""</code>), - * then a simple conversion is performed on the codepage-invariant - * subset ("invariant characters") of the platform encoding. See utypes.h. - */ - void doCodepageCreate(const char *codepageData, - int32_t dataLength, - const char *codepage); - - /* - * Worker function for creating a UnicodeString from - * a codepage string using a UConverter. - */ - void - doCodepageCreate(const char *codepageData, - int32_t dataLength, - UConverter *converter, - UErrorCode &status); - -#endif - - /* - * This function is called when write access to the array - * is necessary. - * - * We need to make a copy of the array if - * the buffer is read-only, or - * the buffer is refCounted (shared), and refCount>1, or - * the buffer is too small. - * - * Return FALSE if memory could not be allocated. - */ - UBool cloneArrayIfNeeded(int32_t newCapacity = -1, - int32_t growCapacity = -1, - UBool doCopyArray = TRUE, - int32_t **pBufferToDelete = 0, - UBool forceClone = FALSE); - - /** - * Common function for UnicodeString case mappings. - * The stringCaseMapper has the same type UStringCaseMapper - * as in ustr_imp.h for ustrcase_map(). - */ - UnicodeString & - caseMap(const UCaseMap *csm, UStringCaseMapper *stringCaseMapper); - - // ref counting - void addRef(void); - int32_t removeRef(void); - int32_t refCount(void) const; - - // constants - enum { - // Set the stack buffer size so that sizeof(UnicodeString) is, - // naturally (without padding), a multiple of sizeof(pointer). - US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for short strings - kInvalidUChar=0xffff, // invalid UChar index - kGrowSize=128, // grow size for this buffer - kInvalidHashCode=0, // invalid hash code - kEmptyHashCode=1, // hash code for empty string - - // bit flag values for fFlags - kIsBogus=1, // this string is bogus, i.e., not valid or NULL - kUsingStackBuffer=2,// using fUnion.fStackBuffer instead of fUnion.fFields - kRefCounted=4, // there is a refCount field before the characters in fArray - kBufferIsReadonly=8,// do not write to this buffer - kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"), - // and releaseBuffer(newLength) must be called - - // combined values for convenience - kShortString=kUsingStackBuffer, - kLongString=kRefCounted, - kReadonlyAlias=kBufferIsReadonly, - kWritableAlias=0 - }; - - friend class StringThreadTest; - friend class UnicodeStringAppendable; - - union StackBufferOrFields; // forward declaration necessary before friend declaration - friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion - - /* - * The following are all the class fields that are stored - * in each UnicodeString object. - * Note that UnicodeString has virtual functions, - * therefore there is an implicit vtable pointer - * as the first real field. - * The fields should be aligned such that no padding is necessary. - * On 32-bit machines, the size should be 32 bytes, - * on 64-bit machines (8-byte pointers), it should be 40 bytes. - * - * We use a hack to achieve this. - * - * With at least some compilers, each of the following is forced to - * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer], - * rounded up with additional padding if the fields do not already fit that requirement: - * - sizeof(class UnicodeString) - * - offsetof(UnicodeString, fUnion) - * - sizeof(fUnion) - * - sizeof(fFields) - * - * In order to avoid padding, we make sizeof(fStackBuffer)=16 (=8 UChars) - * which is at least as large as sizeof(fFields) on 32-bit and 64-bit machines. - * (Padding at the end of fFields is ok: - * As long as there is no padding after fStackBuffer, it is not wasted space.) - * - * We further assume that the compiler does not reorder the fields, - * so that fRestOfStackBuffer (which holds a few more UChars) immediately follows after fUnion, - * with at most some padding (but no other field) in between. - * (Padding there would be wasted space, but functionally harmless.) - * - * We use a few more sizeof(pointer)'s chunks of space with - * fRestOfStackBuffer, fShortLength and fFlags, - * to get up exactly to the intended sizeof(UnicodeString). - */ - // (implicit) *vtable; - union StackBufferOrFields { - // fStackBuffer is used iff (fFlags&kUsingStackBuffer) - // else fFields is used - UChar fStackBuffer[8]; // buffer for short strings, together with fRestOfStackBuffer - struct { - UChar *fArray; // the Unicode data - int32_t fCapacity; // capacity of fArray (in UChars) - int32_t fLength; // number of characters in fArray if >127; else undefined - } fFields; - } fUnion; - UChar fRestOfStackBuffer[US_STACKBUF_SIZE-8]; - int8_t fShortLength; // 0..127: length <0: real length is in fUnion.fFields.fLength - uint8_t fFlags; // bit flags: see constants above -}; - -/** - * Create a new UnicodeString with the concatenation of two others. - * - * @param s1 The first string to be copied to the new one. - * @param s2 The second string to be copied to the new one, after s1. - * @return UnicodeString(s1).append(s2) - * @stable ICU 2.8 - */ -U_COMMON_API UnicodeString U_EXPORT2 -operator+ (const UnicodeString &s1, const UnicodeString &s2); - -//======================================== -// Inline members -//======================================== - -//======================================== -// Privates -//======================================== - -inline void -UnicodeString::pinIndex(int32_t& start) const -{ - // pin index - if(start < 0) { - start = 0; - } else if(start > length()) { - start = length(); - } -} - -inline void -UnicodeString::pinIndices(int32_t& start, - int32_t& _length) const -{ - // pin indices - int32_t len = length(); - if(start < 0) { - start = 0; - } else if(start > len) { - start = len; - } - if(_length < 0) { - _length = 0; - } else if(_length > (len - start)) { - _length = (len - start); - } -} - -inline UChar* -UnicodeString::getArrayStart() -{ return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; } - -inline const UChar* -UnicodeString::getArrayStart() const -{ return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; } - -//======================================== -// Default constructor -//======================================== - -inline -UnicodeString::UnicodeString() - : fShortLength(0), - fFlags(kShortString) -{} - -//======================================== -// Read-only implementation methods -//======================================== -inline int32_t -UnicodeString::length() const -{ return fShortLength>=0 ? fShortLength : fUnion.fFields.fLength; } - -inline int32_t -UnicodeString::getCapacity() const -{ return (fFlags&kUsingStackBuffer) ? US_STACKBUF_SIZE : fUnion.fFields.fCapacity; } - -inline int32_t -UnicodeString::hashCode() const -{ return doHashCode(); } - -inline UBool -UnicodeString::isBogus() const -{ return (UBool)(fFlags & kIsBogus); } - -inline UBool -UnicodeString::isWritable() const -{ return (UBool)!(fFlags&(kOpenGetBuffer|kIsBogus)); } - -inline UBool -UnicodeString::isBufferWritable() const -{ - return (UBool)( - !(fFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) && - (!(fFlags&kRefCounted) || refCount()==1)); -} - -inline const UChar * -UnicodeString::getBuffer() const { - if(fFlags&(kIsBogus|kOpenGetBuffer)) { - return 0; - } else if(fFlags&kUsingStackBuffer) { - return fUnion.fStackBuffer; - } else { - return fUnion.fFields.fArray; - } -} - -//======================================== -// Read-only alias methods -//======================================== -inline int8_t -UnicodeString::doCompare(int32_t start, - int32_t thisLength, - const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLength) const -{ - if(srcText.isBogus()) { - return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise - } else { - srcText.pinIndices(srcStart, srcLength); - return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength); - } -} - -inline UBool -UnicodeString::operator== (const UnicodeString& text) const -{ - if(isBogus()) { - return text.isBogus(); - } else { - int32_t len = length(), textLength = text.length(); - return !text.isBogus() && len == textLength && doEquals(text, len); - } -} - -inline UBool -UnicodeString::operator!= (const UnicodeString& text) const -{ return (! operator==(text)); } - -inline UBool -UnicodeString::operator> (const UnicodeString& text) const -{ return doCompare(0, length(), text, 0, text.length()) == 1; } - -inline UBool -UnicodeString::operator< (const UnicodeString& text) const -{ return doCompare(0, length(), text, 0, text.length()) == -1; } - -inline UBool -UnicodeString::operator>= (const UnicodeString& text) const -{ return doCompare(0, length(), text, 0, text.length()) != -1; } - -inline UBool -UnicodeString::operator<= (const UnicodeString& text) const -{ return doCompare(0, length(), text, 0, text.length()) != 1; } - -inline int8_t -UnicodeString::compare(const UnicodeString& text) const -{ return doCompare(0, length(), text, 0, text.length()); } - -inline int8_t -UnicodeString::compare(int32_t start, - int32_t _length, - const UnicodeString& srcText) const -{ return doCompare(start, _length, srcText, 0, srcText.length()); } - -inline int8_t -UnicodeString::compare(const UChar *srcChars, - int32_t srcLength) const -{ return doCompare(0, length(), srcChars, 0, srcLength); } - -inline int8_t -UnicodeString::compare(int32_t start, - int32_t _length, - const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLength) const -{ return doCompare(start, _length, srcText, srcStart, srcLength); } - -inline int8_t -UnicodeString::compare(int32_t start, - int32_t _length, - const UChar *srcChars) const -{ return doCompare(start, _length, srcChars, 0, _length); } - -inline int8_t -UnicodeString::compare(int32_t start, - int32_t _length, - const UChar *srcChars, - int32_t srcStart, - int32_t srcLength) const -{ return doCompare(start, _length, srcChars, srcStart, srcLength); } - -inline int8_t -UnicodeString::compareBetween(int32_t start, - int32_t limit, - const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLimit) const -{ return doCompare(start, limit - start, - srcText, srcStart, srcLimit - srcStart); } - -inline int8_t -UnicodeString::doCompareCodePointOrder(int32_t start, - int32_t thisLength, - const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLength) const -{ - if(srcText.isBogus()) { - return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise - } else { - srcText.pinIndices(srcStart, srcLength); - return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength); - } -} - -inline int8_t -UnicodeString::compareCodePointOrder(const UnicodeString& text) const -{ return doCompareCodePointOrder(0, length(), text, 0, text.length()); } - -inline int8_t -UnicodeString::compareCodePointOrder(int32_t start, - int32_t _length, - const UnicodeString& srcText) const -{ return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); } - -inline int8_t -UnicodeString::compareCodePointOrder(const UChar *srcChars, - int32_t srcLength) const -{ return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); } - -inline int8_t -UnicodeString::compareCodePointOrder(int32_t start, - int32_t _length, - const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLength) const -{ return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); } - -inline int8_t -UnicodeString::compareCodePointOrder(int32_t start, - int32_t _length, - const UChar *srcChars) const -{ return doCompareCodePointOrder(start, _length, srcChars, 0, _length); } - -inline int8_t -UnicodeString::compareCodePointOrder(int32_t start, - int32_t _length, - const UChar *srcChars, - int32_t srcStart, - int32_t srcLength) const -{ return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); } - -inline int8_t -UnicodeString::compareCodePointOrderBetween(int32_t start, - int32_t limit, - const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLimit) const -{ return doCompareCodePointOrder(start, limit - start, - srcText, srcStart, srcLimit - srcStart); } - -inline int8_t -UnicodeString::doCaseCompare(int32_t start, - int32_t thisLength, - const UnicodeString &srcText, - int32_t srcStart, - int32_t srcLength, - uint32_t options) const -{ - if(srcText.isBogus()) { - return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise - } else { - srcText.pinIndices(srcStart, srcLength); - return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options); - } -} - -inline int8_t -UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const { - return doCaseCompare(0, length(), text, 0, text.length(), options); -} - -inline int8_t -UnicodeString::caseCompare(int32_t start, - int32_t _length, - const UnicodeString &srcText, - uint32_t options) const { - return doCaseCompare(start, _length, srcText, 0, srcText.length(), options); -} - -inline int8_t -UnicodeString::caseCompare(const UChar *srcChars, - int32_t srcLength, - uint32_t options) const { - return doCaseCompare(0, length(), srcChars, 0, srcLength, options); -} - -inline int8_t -UnicodeString::caseCompare(int32_t start, - int32_t _length, - const UnicodeString &srcText, - int32_t srcStart, - int32_t srcLength, - uint32_t options) const { - return doCaseCompare(start, _length, srcText, srcStart, srcLength, options); -} - -inline int8_t -UnicodeString::caseCompare(int32_t start, - int32_t _length, - const UChar *srcChars, - uint32_t options) const { - return doCaseCompare(start, _length, srcChars, 0, _length, options); -} - -inline int8_t -UnicodeString::caseCompare(int32_t start, - int32_t _length, - const UChar *srcChars, - int32_t srcStart, - int32_t srcLength, - uint32_t options) const { - return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options); -} - -inline int8_t -UnicodeString::caseCompareBetween(int32_t start, - int32_t limit, - const UnicodeString &srcText, - int32_t srcStart, - int32_t srcLimit, - uint32_t options) const { - return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options); -} - -inline int32_t -UnicodeString::indexOf(const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLength, - int32_t start, - int32_t _length) const -{ - if(!srcText.isBogus()) { - srcText.pinIndices(srcStart, srcLength); - if(srcLength > 0) { - return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length); - } - } - return -1; -} - -inline int32_t -UnicodeString::indexOf(const UnicodeString& text) const -{ return indexOf(text, 0, text.length(), 0, length()); } - -inline int32_t -UnicodeString::indexOf(const UnicodeString& text, - int32_t start) const { - pinIndex(start); - return indexOf(text, 0, text.length(), start, length() - start); -} - -inline int32_t -UnicodeString::indexOf(const UnicodeString& text, - int32_t start, - int32_t _length) const -{ return indexOf(text, 0, text.length(), start, _length); } - -inline int32_t -UnicodeString::indexOf(const UChar *srcChars, - int32_t srcLength, - int32_t start) const { - pinIndex(start); - return indexOf(srcChars, 0, srcLength, start, length() - start); -} - -inline int32_t -UnicodeString::indexOf(const UChar *srcChars, - int32_t srcLength, - int32_t start, - int32_t _length) const -{ return indexOf(srcChars, 0, srcLength, start, _length); } - -inline int32_t -UnicodeString::indexOf(UChar c, - int32_t start, - int32_t _length) const -{ return doIndexOf(c, start, _length); } - -inline int32_t -UnicodeString::indexOf(UChar32 c, - int32_t start, - int32_t _length) const -{ return doIndexOf(c, start, _length); } - -inline int32_t -UnicodeString::indexOf(UChar c) const -{ return doIndexOf(c, 0, length()); } - -inline int32_t -UnicodeString::indexOf(UChar32 c) const -{ return indexOf(c, 0, length()); } - -inline int32_t -UnicodeString::indexOf(UChar c, - int32_t start) const { - pinIndex(start); - return doIndexOf(c, start, length() - start); -} - -inline int32_t -UnicodeString::indexOf(UChar32 c, - int32_t start) const { - pinIndex(start); - return indexOf(c, start, length() - start); -} - -inline int32_t -UnicodeString::lastIndexOf(const UChar *srcChars, - int32_t srcLength, - int32_t start, - int32_t _length) const -{ return lastIndexOf(srcChars, 0, srcLength, start, _length); } - -inline int32_t -UnicodeString::lastIndexOf(const UChar *srcChars, - int32_t srcLength, - int32_t start) const { - pinIndex(start); - return lastIndexOf(srcChars, 0, srcLength, start, length() - start); -} - -inline int32_t -UnicodeString::lastIndexOf(const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLength, - int32_t start, - int32_t _length) const -{ - if(!srcText.isBogus()) { - srcText.pinIndices(srcStart, srcLength); - if(srcLength > 0) { - return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length); - } - } - return -1; -} - -inline int32_t -UnicodeString::lastIndexOf(const UnicodeString& text, - int32_t start, - int32_t _length) const -{ return lastIndexOf(text, 0, text.length(), start, _length); } - -inline int32_t -UnicodeString::lastIndexOf(const UnicodeString& text, - int32_t start) const { - pinIndex(start); - return lastIndexOf(text, 0, text.length(), start, length() - start); -} - -inline int32_t -UnicodeString::lastIndexOf(const UnicodeString& text) const -{ return lastIndexOf(text, 0, text.length(), 0, length()); } - -inline int32_t -UnicodeString::lastIndexOf(UChar c, - int32_t start, - int32_t _length) const -{ return doLastIndexOf(c, start, _length); } - -inline int32_t -UnicodeString::lastIndexOf(UChar32 c, - int32_t start, - int32_t _length) const { - return doLastIndexOf(c, start, _length); -} - -inline int32_t -UnicodeString::lastIndexOf(UChar c) const -{ return doLastIndexOf(c, 0, length()); } - -inline int32_t -UnicodeString::lastIndexOf(UChar32 c) const { - return lastIndexOf(c, 0, length()); -} - -inline int32_t -UnicodeString::lastIndexOf(UChar c, - int32_t start) const { - pinIndex(start); - return doLastIndexOf(c, start, length() - start); -} - -inline int32_t -UnicodeString::lastIndexOf(UChar32 c, - int32_t start) const { - pinIndex(start); - return lastIndexOf(c, start, length() - start); -} - -inline UBool -UnicodeString::startsWith(const UnicodeString& text) const -{ return compare(0, text.length(), text, 0, text.length()) == 0; } - -inline UBool -UnicodeString::startsWith(const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLength) const -{ return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; } - -inline UBool -UnicodeString::startsWith(const UChar *srcChars, int32_t srcLength) const { - if(srcLength < 0) { - srcLength = u_strlen(srcChars); - } - return doCompare(0, srcLength, srcChars, 0, srcLength) == 0; -} - -inline UBool -UnicodeString::startsWith(const UChar *srcChars, int32_t srcStart, int32_t srcLength) const { - if(srcLength < 0) { - srcLength = u_strlen(srcChars); - } - return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0; -} - -inline UBool -UnicodeString::endsWith(const UnicodeString& text) const -{ return doCompare(length() - text.length(), text.length(), - text, 0, text.length()) == 0; } - -inline UBool -UnicodeString::endsWith(const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLength) const { - srcText.pinIndices(srcStart, srcLength); - return doCompare(length() - srcLength, srcLength, - srcText, srcStart, srcLength) == 0; -} - -inline UBool -UnicodeString::endsWith(const UChar *srcChars, - int32_t srcLength) const { - if(srcLength < 0) { - srcLength = u_strlen(srcChars); - } - return doCompare(length() - srcLength, srcLength, - srcChars, 0, srcLength) == 0; -} - -inline UBool -UnicodeString::endsWith(const UChar *srcChars, - int32_t srcStart, - int32_t srcLength) const { - if(srcLength < 0) { - srcLength = u_strlen(srcChars + srcStart); - } - return doCompare(length() - srcLength, srcLength, - srcChars, srcStart, srcLength) == 0; -} - -//======================================== -// replace -//======================================== -inline UnicodeString& -UnicodeString::replace(int32_t start, - int32_t _length, - const UnicodeString& srcText) -{ return doReplace(start, _length, srcText, 0, srcText.length()); } - -inline UnicodeString& -UnicodeString::replace(int32_t start, - int32_t _length, - const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLength) -{ return doReplace(start, _length, srcText, srcStart, srcLength); } - -inline UnicodeString& -UnicodeString::replace(int32_t start, - int32_t _length, - const UChar *srcChars, - int32_t srcLength) -{ return doReplace(start, _length, srcChars, 0, srcLength); } - -inline UnicodeString& -UnicodeString::replace(int32_t start, - int32_t _length, - const UChar *srcChars, - int32_t srcStart, - int32_t srcLength) -{ return doReplace(start, _length, srcChars, srcStart, srcLength); } - -inline UnicodeString& -UnicodeString::replace(int32_t start, - int32_t _length, - UChar srcChar) -{ return doReplace(start, _length, &srcChar, 0, 1); } - -inline UnicodeString& -UnicodeString::replaceBetween(int32_t start, - int32_t limit, - const UnicodeString& srcText) -{ return doReplace(start, limit - start, srcText, 0, srcText.length()); } - -inline UnicodeString& -UnicodeString::replaceBetween(int32_t start, - int32_t limit, - const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLimit) -{ return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); } - -inline UnicodeString& -UnicodeString::findAndReplace(const UnicodeString& oldText, - const UnicodeString& newText) -{ return findAndReplace(0, length(), oldText, 0, oldText.length(), - newText, 0, newText.length()); } - -inline UnicodeString& -UnicodeString::findAndReplace(int32_t start, - int32_t _length, - const UnicodeString& oldText, - const UnicodeString& newText) -{ return findAndReplace(start, _length, oldText, 0, oldText.length(), - newText, 0, newText.length()); } - -// ============================ -// extract -// ============================ -inline void -UnicodeString::doExtract(int32_t start, - int32_t _length, - UnicodeString& target) const -{ target.replace(0, target.length(), *this, start, _length); } - -inline void -UnicodeString::extract(int32_t start, - int32_t _length, - UChar *target, - int32_t targetStart) const -{ doExtract(start, _length, target, targetStart); } - -inline void -UnicodeString::extract(int32_t start, - int32_t _length, - UnicodeString& target) const -{ doExtract(start, _length, target); } - -#if !UCONFIG_NO_CONVERSION - -inline int32_t -UnicodeString::extract(int32_t start, - int32_t _length, - char *dst, - const char *codepage) const - -{ - // This dstSize value will be checked explicitly - return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage); -} - -#endif - -inline void -UnicodeString::extractBetween(int32_t start, - int32_t limit, - UChar *dst, - int32_t dstStart) const { - pinIndex(start); - pinIndex(limit); - doExtract(start, limit - start, dst, dstStart); -} - -inline UnicodeString -UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const { - return tempSubString(start, limit - start); -} - -inline UChar -UnicodeString::doCharAt(int32_t offset) const -{ - if((uint32_t)offset < (uint32_t)length()) { - return getArrayStart()[offset]; - } else { - return kInvalidUChar; - } -} - -inline UChar -UnicodeString::charAt(int32_t offset) const -{ return doCharAt(offset); } - -inline UChar -UnicodeString::operator[] (int32_t offset) const -{ return doCharAt(offset); } - -inline UBool -UnicodeString::isEmpty() const { - return fShortLength == 0; -} - -//======================================== -// Write implementation methods -//======================================== -inline void -UnicodeString::setLength(int32_t len) { - if(len <= 127) { - fShortLength = (int8_t)len; - } else { - fShortLength = (int8_t)-1; - fUnion.fFields.fLength = len; - } -} - -inline void -UnicodeString::setToEmpty() { - fShortLength = 0; - fFlags = kShortString; -} - -inline void -UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) { - setLength(len); - fUnion.fFields.fArray = array; - fUnion.fFields.fCapacity = capacity; -} - -inline UnicodeString& -UnicodeString::operator= (UChar ch) -{ return doReplace(0, length(), &ch, 0, 1); } - -inline UnicodeString& -UnicodeString::operator= (UChar32 ch) -{ return replace(0, length(), ch); } - -inline UnicodeString& -UnicodeString::setTo(const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLength) -{ - unBogus(); - return doReplace(0, length(), srcText, srcStart, srcLength); -} - -inline UnicodeString& -UnicodeString::setTo(const UnicodeString& srcText, - int32_t srcStart) -{ - unBogus(); - srcText.pinIndex(srcStart); - return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart); -} - -inline UnicodeString& -UnicodeString::setTo(const UnicodeString& srcText) -{ - return copyFrom(srcText); -} - -inline UnicodeString& -UnicodeString::setTo(const UChar *srcChars, - int32_t srcLength) -{ - unBogus(); - return doReplace(0, length(), srcChars, 0, srcLength); -} - -inline UnicodeString& -UnicodeString::setTo(UChar srcChar) -{ - unBogus(); - return doReplace(0, length(), &srcChar, 0, 1); -} - -inline UnicodeString& -UnicodeString::setTo(UChar32 srcChar) -{ - unBogus(); - return replace(0, length(), srcChar); -} - -inline UnicodeString& -UnicodeString::append(const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLength) -{ return doReplace(length(), 0, srcText, srcStart, srcLength); } - -inline UnicodeString& -UnicodeString::append(const UnicodeString& srcText) -{ return doReplace(length(), 0, srcText, 0, srcText.length()); } - -inline UnicodeString& -UnicodeString::append(const UChar *srcChars, - int32_t srcStart, - int32_t srcLength) -{ return doReplace(length(), 0, srcChars, srcStart, srcLength); } - -inline UnicodeString& -UnicodeString::append(const UChar *srcChars, - int32_t srcLength) -{ return doReplace(length(), 0, srcChars, 0, srcLength); } - -inline UnicodeString& -UnicodeString::append(UChar srcChar) -{ return doReplace(length(), 0, &srcChar, 0, 1); } - -inline UnicodeString& -UnicodeString::operator+= (UChar ch) -{ return doReplace(length(), 0, &ch, 0, 1); } - -inline UnicodeString& -UnicodeString::operator+= (UChar32 ch) { - return append(ch); -} - -inline UnicodeString& -UnicodeString::operator+= (const UnicodeString& srcText) -{ return doReplace(length(), 0, srcText, 0, srcText.length()); } - -inline UnicodeString& -UnicodeString::insert(int32_t start, - const UnicodeString& srcText, - int32_t srcStart, - int32_t srcLength) -{ return doReplace(start, 0, srcText, srcStart, srcLength); } - -inline UnicodeString& -UnicodeString::insert(int32_t start, - const UnicodeString& srcText) -{ return doReplace(start, 0, srcText, 0, srcText.length()); } - -inline UnicodeString& -UnicodeString::insert(int32_t start, - const UChar *srcChars, - int32_t srcStart, - int32_t srcLength) -{ return doReplace(start, 0, srcChars, srcStart, srcLength); } - -inline UnicodeString& -UnicodeString::insert(int32_t start, - const UChar *srcChars, - int32_t srcLength) -{ return doReplace(start, 0, srcChars, 0, srcLength); } - -inline UnicodeString& -UnicodeString::insert(int32_t start, - UChar srcChar) -{ return doReplace(start, 0, &srcChar, 0, 1); } - -inline UnicodeString& -UnicodeString::insert(int32_t start, - UChar32 srcChar) -{ return replace(start, 0, srcChar); } - - -inline UnicodeString& -UnicodeString::remove() -{ - // remove() of a bogus string makes the string empty and non-bogus - if(isBogus()) { - setToEmpty(); - } else { - fShortLength = 0; - } - return *this; -} - -inline UnicodeString& -UnicodeString::remove(int32_t start, - int32_t _length) -{ - if(start <= 0 && _length == INT32_MAX) { - // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus - return remove(); - } - return doReplace(start, _length, NULL, 0, 0); -} - -inline UnicodeString& -UnicodeString::removeBetween(int32_t start, - int32_t limit) -{ return doReplace(start, limit - start, NULL, 0, 0); } - -inline UnicodeString & -UnicodeString::retainBetween(int32_t start, int32_t limit) { - truncate(limit); - return doReplace(0, start, NULL, 0, 0); -} - -inline UBool -UnicodeString::truncate(int32_t targetLength) -{ - if(isBogus() && targetLength == 0) { - // truncate(0) of a bogus string makes the string empty and non-bogus - unBogus(); - return FALSE; - } else if((uint32_t)targetLength < (uint32_t)length()) { - setLength(targetLength); - return TRUE; - } else { - return FALSE; - } -} - -inline UnicodeString& -UnicodeString::reverse() -{ return doReverse(0, length()); } - -inline UnicodeString& -UnicodeString::reverse(int32_t start, - int32_t _length) -{ return doReverse(start, _length); } - -U_NAMESPACE_END - -#endif diff --git a/Source/WTF/icu/unicode/unorm.h b/Source/WTF/icu/unicode/unorm.h deleted file mode 100644 index fbb7b49b3..000000000 --- a/Source/WTF/icu/unicode/unorm.h +++ /dev/null @@ -1,561 +0,0 @@ -/* -******************************************************************************* -* Copyright (c) 1996-2010, International Business Machines Corporation -* and others. All Rights Reserved. -******************************************************************************* -* File unorm.h -* -* Created by: Vladimir Weinstein 12052000 -* -* Modification history : -* -* Date Name Description -* 02/01/01 synwee Added normalization quickcheck enum and method. -*/ -#ifndef UNORM_H -#define UNORM_H - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_NORMALIZATION - -#include "unicode/uiter.h" -#include "unicode/unorm2.h" - -/** - * \file - * \brief C API: Unicode Normalization - * - * <h2>Unicode normalization API</h2> - * - * Note: This API has been replaced by the unorm2.h API and is only available - * for backward compatibility. The functions here simply delegate to the - * unorm2.h functions, for example unorm2_getInstance() and unorm2_normalize(). - * There is one exception: The new API does not provide a replacement for unorm_compare(). - * - * <code>unorm_normalize</code> transforms Unicode text into an equivalent composed or - * decomposed form, allowing for easier sorting and searching of text. - * <code>unorm_normalize</code> supports the standard normalization forms described in - * <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode"> - * Unicode Standard Annex #15: Unicode Normalization Forms</a>. - * - * Characters with accents or other adornments can be encoded in - * several different ways in Unicode. For example, take the character A-acute. - * In Unicode, this can be encoded as a single character (the - * "composed" form): - * - * \code - * 00C1 LATIN CAPITAL LETTER A WITH ACUTE - * \endcode - * - * or as two separate characters (the "decomposed" form): - * - * \code - * 0041 LATIN CAPITAL LETTER A - * 0301 COMBINING ACUTE ACCENT - * \endcode - * - * To a user of your program, however, both of these sequences should be - * treated as the same "user-level" character "A with acute accent". When you are searching or - * comparing text, you must ensure that these two sequences are treated - * equivalently. In addition, you must handle characters with more than one - * accent. Sometimes the order of a character's combining accents is - * significant, while in other cases accent sequences in different orders are - * really equivalent. - * - * Similarly, the string "ffi" can be encoded as three separate letters: - * - * \code - * 0066 LATIN SMALL LETTER F - * 0066 LATIN SMALL LETTER F - * 0069 LATIN SMALL LETTER I - * \endcode - * - * or as the single character - * - * \code - * FB03 LATIN SMALL LIGATURE FFI - * \endcode - * - * The ffi ligature is not a distinct semantic character, and strictly speaking - * it shouldn't be in Unicode at all, but it was included for compatibility - * with existing character sets that already provided it. The Unicode standard - * identifies such characters by giving them "compatibility" decompositions - * into the corresponding semantic characters. When sorting and searching, you - * will often want to use these mappings. - * - * <code>unorm_normalize</code> helps solve these problems by transforming text into the - * canonical composed and decomposed forms as shown in the first example above. - * In addition, you can have it perform compatibility decompositions so that - * you can treat compatibility characters the same as their equivalents. - * Finally, <code>unorm_normalize</code> rearranges accents into the proper canonical - * order, so that you do not have to worry about accent rearrangement on your - * own. - * - * Form FCD, "Fast C or D", is also designed for collation. - * It allows to work on strings that are not necessarily normalized - * with an algorithm (like in collation) that works under "canonical closure", i.e., it treats precomposed - * characters and their decomposed equivalents the same. - * - * It is not a normalization form because it does not provide for uniqueness of representation. Multiple strings - * may be canonically equivalent (their NFDs are identical) and may all conform to FCD without being identical - * themselves. - * - * The form is defined such that the "raw decomposition", the recursive canonical decomposition of each character, - * results in a string that is canonically ordered. This means that precomposed characters are allowed for as long - * as their decompositions do not need canonical reordering. - * - * Its advantage for a process like collation is that all NFD and most NFC texts - and many unnormalized texts - - * already conform to FCD and do not need to be normalized (NFD) for such a process. The FCD quick check will - * return UNORM_YES for most strings in practice. - * - * unorm_normalize(UNORM_FCD) may be implemented with UNORM_NFD. - * - * For more details on FCD see the collation design document: - * http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm - * - * ICU collation performs either NFD or FCD normalization automatically if normalization - * is turned on for the collator object. - * Beyond collation and string search, normalized strings may be useful for string equivalence comparisons, - * transliteration/transcription, unique representations, etc. - * - * The W3C generally recommends to exchange texts in NFC. - * Note also that most legacy character encodings use only precomposed forms and often do not - * encode any combining marks by themselves. For conversion to such character encodings the - * Unicode text needs to be normalized to NFC. - * For more usage examples, see the Unicode Standard Annex. - */ - -/** - * Constants for normalization modes. - * @stable ICU 2.0 - */ -typedef enum { - /** No decomposition/composition. @stable ICU 2.0 */ - UNORM_NONE = 1, - /** Canonical decomposition. @stable ICU 2.0 */ - UNORM_NFD = 2, - /** Compatibility decomposition. @stable ICU 2.0 */ - UNORM_NFKD = 3, - /** Canonical decomposition followed by canonical composition. @stable ICU 2.0 */ - UNORM_NFC = 4, - /** Default normalization. @stable ICU 2.0 */ - UNORM_DEFAULT = UNORM_NFC, - /** Compatibility decomposition followed by canonical composition. @stable ICU 2.0 */ - UNORM_NFKC =5, - /** "Fast C or D" form. @stable ICU 2.0 */ - UNORM_FCD = 6, - - /** One more than the highest normalization mode constant. @stable ICU 2.0 */ - UNORM_MODE_COUNT -} UNormalizationMode; - -/** - * Constants for options flags for normalization. - * Use 0 for default options, - * including normalization according to the Unicode version - * that is currently supported by ICU (see u_getUnicodeVersion). - * @stable ICU 2.6 - */ -enum { - /** - * Options bit set value to select Unicode 3.2 normalization - * (except NormalizationCorrections). - * At most one Unicode version can be selected at a time. - * @stable ICU 2.6 - */ - UNORM_UNICODE_3_2=0x20 -}; - -/** - * Lowest-order bit number of unorm_compare() options bits corresponding to - * normalization options bits. - * - * The options parameter for unorm_compare() uses most bits for - * itself and for various comparison and folding flags. - * The most significant bits, however, are shifted down and passed on - * to the normalization implementation. - * (That is, from unorm_compare(..., options, ...), - * options>>UNORM_COMPARE_NORM_OPTIONS_SHIFT will be passed on to the - * internal normalization functions.) - * - * @see unorm_compare - * @stable ICU 2.6 - */ -#define UNORM_COMPARE_NORM_OPTIONS_SHIFT 20 - -/** - * Normalize a string. - * The string will be normalized according the specified normalization mode - * and options. - * The source and result buffers must not be the same, nor overlap. - * - * @param source The string to normalize. - * @param sourceLength The length of source, or -1 if NUL-terminated. - * @param mode The normalization mode; one of UNORM_NONE, - * UNORM_NFD, UNORM_NFC, UNORM_NFKC, UNORM_NFKD, UNORM_DEFAULT. - * @param options The normalization options, ORed together (0 for no options). - * @param result A pointer to a buffer to receive the result string. - * The result string is NUL-terminated if possible. - * @param resultLength The maximum size of result. - * @param status A pointer to a UErrorCode to receive any errors. - * @return The total buffer size needed; if greater than resultLength, - * the output was truncated, and the error code is set to U_BUFFER_OVERFLOW_ERROR. - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -unorm_normalize(const UChar *source, int32_t sourceLength, - UNormalizationMode mode, int32_t options, - UChar *result, int32_t resultLength, - UErrorCode *status); - -/** - * Performing quick check on a string, to quickly determine if the string is - * in a particular normalization format. - * Three types of result can be returned UNORM_YES, UNORM_NO or - * UNORM_MAYBE. Result UNORM_YES indicates that the argument - * string is in the desired normalized format, UNORM_NO determines that - * argument string is not in the desired normalized format. A - * UNORM_MAYBE result indicates that a more thorough check is required, - * the user may have to put the string in its normalized form and compare the - * results. - * - * @param source string for determining if it is in a normalized format - * @param sourcelength length of source to test, or -1 if NUL-terminated - * @param mode which normalization form to test for - * @param status a pointer to a UErrorCode to receive any errors - * @return UNORM_YES, UNORM_NO or UNORM_MAYBE - * - * @see unorm_isNormalized - * @stable ICU 2.0 - */ -U_STABLE UNormalizationCheckResult U_EXPORT2 -unorm_quickCheck(const UChar *source, int32_t sourcelength, - UNormalizationMode mode, - UErrorCode *status); - -/** - * Performing quick check on a string; same as unorm_quickCheck but - * takes an extra options parameter like most normalization functions. - * - * @param src String that is to be tested if it is in a normalization format. - * @param srcLength Length of source to test, or -1 if NUL-terminated. - * @param mode Which normalization form to test for. - * @param options The normalization options, ORed together (0 for no options). - * @param pErrorCode ICU error code in/out parameter. - * Must fulfill U_SUCCESS before the function call. - * @return UNORM_YES, UNORM_NO or UNORM_MAYBE - * - * @see unorm_quickCheck - * @see unorm_isNormalized - * @stable ICU 2.6 - */ -U_STABLE UNormalizationCheckResult U_EXPORT2 -unorm_quickCheckWithOptions(const UChar *src, int32_t srcLength, - UNormalizationMode mode, int32_t options, - UErrorCode *pErrorCode); - -/** - * Test if a string is in a given normalization form. - * This is semantically equivalent to source.equals(normalize(source, mode)) . - * - * Unlike unorm_quickCheck(), this function returns a definitive result, - * never a "maybe". - * For NFD, NFKD, and FCD, both functions work exactly the same. - * For NFC and NFKC where quickCheck may return "maybe", this function will - * perform further tests to arrive at a TRUE/FALSE result. - * - * @param src String that is to be tested if it is in a normalization format. - * @param srcLength Length of source to test, or -1 if NUL-terminated. - * @param mode Which normalization form to test for. - * @param pErrorCode ICU error code in/out parameter. - * Must fulfill U_SUCCESS before the function call. - * @return Boolean value indicating whether the source string is in the - * "mode" normalization form. - * - * @see unorm_quickCheck - * @stable ICU 2.2 - */ -U_STABLE UBool U_EXPORT2 -unorm_isNormalized(const UChar *src, int32_t srcLength, - UNormalizationMode mode, - UErrorCode *pErrorCode); - -/** - * Test if a string is in a given normalization form; same as unorm_isNormalized but - * takes an extra options parameter like most normalization functions. - * - * @param src String that is to be tested if it is in a normalization format. - * @param srcLength Length of source to test, or -1 if NUL-terminated. - * @param mode Which normalization form to test for. - * @param options The normalization options, ORed together (0 for no options). - * @param pErrorCode ICU error code in/out parameter. - * Must fulfill U_SUCCESS before the function call. - * @return Boolean value indicating whether the source string is in the - * "mode/options" normalization form. - * - * @see unorm_quickCheck - * @see unorm_isNormalized - * @stable ICU 2.6 - */ -U_STABLE UBool U_EXPORT2 -unorm_isNormalizedWithOptions(const UChar *src, int32_t srcLength, - UNormalizationMode mode, int32_t options, - UErrorCode *pErrorCode); - -/** - * Iterative normalization forward. - * This function (together with unorm_previous) is somewhat - * similar to the C++ Normalizer class (see its non-static functions). - * - * Iterative normalization is useful when only a small portion of a longer - * string/text needs to be processed. - * - * For example, the likelihood may be high that processing the first 10% of some - * text will be sufficient to find certain data. - * Another example: When one wants to concatenate two normalized strings and get a - * normalized result, it is much more efficient to normalize just a small part of - * the result around the concatenation place instead of re-normalizing everything. - * - * The input text is an instance of the C character iteration API UCharIterator. - * It may wrap around a simple string, a CharacterIterator, a Replaceable, or any - * other kind of text object. - * - * If a buffer overflow occurs, then the caller needs to reset the iterator to the - * old index and call the function again with a larger buffer - if the caller cares - * for the actual output. - * Regardless of the output buffer, the iterator will always be moved to the next - * normalization boundary. - * - * This function (like unorm_previous) serves two purposes: - * - * 1) To find the next boundary so that the normalization of the part of the text - * from the current position to that boundary does not affect and is not affected - * by the part of the text beyond that boundary. - * - * 2) To normalize the text up to the boundary. - * - * The second step is optional, per the doNormalize parameter. - * It is omitted for operations like string concatenation, where the two adjacent - * string ends need to be normalized together. - * In such a case, the output buffer will just contain a copy of the text up to the - * boundary. - * - * pNeededToNormalize is an output-only parameter. Its output value is only defined - * if normalization was requested (doNormalize) and successful (especially, no - * buffer overflow). - * It is useful for operations like a normalizing transliterator, where one would - * not want to replace a piece of text if it is not modified. - * - * If doNormalize==TRUE and pNeededToNormalize!=NULL then *pNeeded... is set TRUE - * if the normalization was necessary. - * - * If doNormalize==FALSE then *pNeededToNormalize will be set to FALSE. - * - * If the buffer overflows, then *pNeededToNormalize will be undefined; - * essentially, whenever U_FAILURE is true (like in buffer overflows), this result - * will be undefined. - * - * @param src The input text in the form of a C character iterator. - * @param dest The output buffer; can be NULL if destCapacity==0 for pure preflighting. - * @param destCapacity The number of UChars that fit into dest. - * @param mode The normalization mode. - * @param options The normalization options, ORed together (0 for no options). - * @param doNormalize Indicates if the source text up to the next boundary - * is to be normalized (TRUE) or just copied (FALSE). - * @param pNeededToNormalize Output flag indicating if the normalization resulted in - * different text from the input. - * Not defined if an error occurs including buffer overflow. - * Always FALSE if !doNormalize. - * @param pErrorCode ICU error code in/out parameter. - * Must fulfill U_SUCCESS before the function call. - * @return Length of output (number of UChars) when successful or buffer overflow. - * - * @see unorm_previous - * @see unorm_normalize - * - * @stable ICU 2.1 - */ -U_STABLE int32_t U_EXPORT2 -unorm_next(UCharIterator *src, - UChar *dest, int32_t destCapacity, - UNormalizationMode mode, int32_t options, - UBool doNormalize, UBool *pNeededToNormalize, - UErrorCode *pErrorCode); - -/** - * Iterative normalization backward. - * This function (together with unorm_next) is somewhat - * similar to the C++ Normalizer class (see its non-static functions). - * For all details see unorm_next. - * - * @param src The input text in the form of a C character iterator. - * @param dest The output buffer; can be NULL if destCapacity==0 for pure preflighting. - * @param destCapacity The number of UChars that fit into dest. - * @param mode The normalization mode. - * @param options The normalization options, ORed together (0 for no options). - * @param doNormalize Indicates if the source text up to the next boundary - * is to be normalized (TRUE) or just copied (FALSE). - * @param pNeededToNormalize Output flag indicating if the normalization resulted in - * different text from the input. - * Not defined if an error occurs including buffer overflow. - * Always FALSE if !doNormalize. - * @param pErrorCode ICU error code in/out parameter. - * Must fulfill U_SUCCESS before the function call. - * @return Length of output (number of UChars) when successful or buffer overflow. - * - * @see unorm_next - * @see unorm_normalize - * - * @stable ICU 2.1 - */ -U_STABLE int32_t U_EXPORT2 -unorm_previous(UCharIterator *src, - UChar *dest, int32_t destCapacity, - UNormalizationMode mode, int32_t options, - UBool doNormalize, UBool *pNeededToNormalize, - UErrorCode *pErrorCode); - -/** - * Concatenate normalized strings, making sure that the result is normalized as well. - * - * If both the left and the right strings are in - * the normalization form according to "mode/options", - * then the result will be - * - * \code - * dest=normalize(left+right, mode, options) - * \endcode - * - * With the input strings already being normalized, - * this function will use unorm_next() and unorm_previous() - * to find the adjacent end pieces of the input strings. - * Only the concatenation of these end pieces will be normalized and - * then concatenated with the remaining parts of the input strings. - * - * It is allowed to have dest==left to avoid copying the entire left string. - * - * @param left Left source string, may be same as dest. - * @param leftLength Length of left source string, or -1 if NUL-terminated. - * @param right Right source string. Must not be the same as dest, nor overlap. - * @param rightLength Length of right source string, or -1 if NUL-terminated. - * @param dest The output buffer; can be NULL if destCapacity==0 for pure preflighting. - * @param destCapacity The number of UChars that fit into dest. - * @param mode The normalization mode. - * @param options The normalization options, ORed together (0 for no options). - * @param pErrorCode ICU error code in/out parameter. - * Must fulfill U_SUCCESS before the function call. - * @return Length of output (number of UChars) when successful or buffer overflow. - * - * @see unorm_normalize - * @see unorm_next - * @see unorm_previous - * - * @stable ICU 2.1 - */ -U_STABLE int32_t U_EXPORT2 -unorm_concatenate(const UChar *left, int32_t leftLength, - const UChar *right, int32_t rightLength, - UChar *dest, int32_t destCapacity, - UNormalizationMode mode, int32_t options, - UErrorCode *pErrorCode); - -/** - * Option bit for unorm_compare: - * Both input strings are assumed to fulfill FCD conditions. - * @stable ICU 2.2 - */ -#define UNORM_INPUT_IS_FCD 0x20000 - -/** - * Option bit for unorm_compare: - * Perform case-insensitive comparison. - * @stable ICU 2.2 - */ -#define U_COMPARE_IGNORE_CASE 0x10000 - -#ifndef U_COMPARE_CODE_POINT_ORDER -/* see also unistr.h and ustring.h */ -/** - * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc: - * Compare strings in code point order instead of code unit order. - * @stable ICU 2.2 - */ -#define U_COMPARE_CODE_POINT_ORDER 0x8000 -#endif - -/** - * Compare two strings for canonical equivalence. - * Further options include case-insensitive comparison and - * code point order (as opposed to code unit order). - * - * Canonical equivalence between two strings is defined as their normalized - * forms (NFD or NFC) being identical. - * This function compares strings incrementally instead of normalizing - * (and optionally case-folding) both strings entirely, - * improving performance significantly. - * - * Bulk normalization is only necessary if the strings do not fulfill the FCD - * conditions. Only in this case, and only if the strings are relatively long, - * is memory allocated temporarily. - * For FCD strings and short non-FCD strings there is no memory allocation. - * - * Semantically, this is equivalent to - * strcmp[CodePointOrder](NFD(foldCase(NFD(s1))), NFD(foldCase(NFD(s2)))) - * where code point order and foldCase are all optional. - * - * UAX 21 2.5 Caseless Matching specifies that for a canonical caseless match - * the case folding must be performed first, then the normalization. - * - * @param s1 First source string. - * @param length1 Length of first source string, or -1 if NUL-terminated. - * - * @param s2 Second source string. - * @param length2 Length of second source string, or -1 if NUL-terminated. - * - * @param options A bit set of options: - * - U_FOLD_CASE_DEFAULT or 0 is used for default options: - * Case-sensitive comparison in code unit order, and the input strings - * are quick-checked for FCD. - * - * - UNORM_INPUT_IS_FCD - * Set if the caller knows that both s1 and s2 fulfill the FCD conditions. - * If not set, the function will quickCheck for FCD - * and normalize if necessary. - * - * - U_COMPARE_CODE_POINT_ORDER - * Set to choose code point order instead of code unit order - * (see u_strCompare for details). - * - * - U_COMPARE_IGNORE_CASE - * Set to compare strings case-insensitively using case folding, - * instead of case-sensitively. - * If set, then the following case folding options are used. - * - * - Options as used with case-insensitive comparisons, currently: - * - * - U_FOLD_CASE_EXCLUDE_SPECIAL_I - * (see u_strCaseCompare for details) - * - * - regular normalization options shifted left by UNORM_COMPARE_NORM_OPTIONS_SHIFT - * - * @param pErrorCode ICU error code in/out parameter. - * Must fulfill U_SUCCESS before the function call. - * @return <0 or 0 or >0 as usual for string comparisons - * - * @see unorm_normalize - * @see UNORM_FCD - * @see u_strCompare - * @see u_strCaseCompare - * - * @stable ICU 2.2 - */ -U_STABLE int32_t U_EXPORT2 -unorm_compare(const UChar *s1, int32_t length1, - const UChar *s2, int32_t length2, - uint32_t options, - UErrorCode *pErrorCode); - -#endif /* #if !UCONFIG_NO_NORMALIZATION */ - -#endif diff --git a/Source/WTF/icu/unicode/unorm2.h b/Source/WTF/icu/unicode/unorm2.h deleted file mode 100644 index 7152fc109..000000000 --- a/Source/WTF/icu/unicode/unorm2.h +++ /dev/null @@ -1,528 +0,0 @@ -/* -******************************************************************************* -* -* Copyright (C) 2009-2013, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: unorm2.h -* encoding: US-ASCII -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2009dec15 -* created by: Markus W. Scherer -*/ - -#ifndef __UNORM2_H__ -#define __UNORM2_H__ - -/** - * \file - * \brief C API: New API for Unicode Normalization. - * - * Unicode normalization functionality for standard Unicode normalization or - * for using custom mapping tables. - * All instances of UNormalizer2 are unmodifiable/immutable. - * Instances returned by unorm2_getInstance() are singletons that must not be deleted by the caller. - * For more details see the Normalizer2 C++ class. - */ - -#include "unicode/utypes.h" -#include "unicode/localpointer.h" -#include "unicode/uset.h" - -/** - * Constants for normalization modes. - * For details about standard Unicode normalization forms - * and about the algorithms which are also used with custom mapping tables - * see http://www.unicode.org/unicode/reports/tr15/ - * @stable ICU 4.4 - */ -typedef enum { - /** - * Decomposition followed by composition. - * Same as standard NFC when using an "nfc" instance. - * Same as standard NFKC when using an "nfkc" instance. - * For details about standard Unicode normalization forms - * see http://www.unicode.org/unicode/reports/tr15/ - * @stable ICU 4.4 - */ - UNORM2_COMPOSE, - /** - * Map, and reorder canonically. - * Same as standard NFD when using an "nfc" instance. - * Same as standard NFKD when using an "nfkc" instance. - * For details about standard Unicode normalization forms - * see http://www.unicode.org/unicode/reports/tr15/ - * @stable ICU 4.4 - */ - UNORM2_DECOMPOSE, - /** - * "Fast C or D" form. - * If a string is in this form, then further decomposition <i>without reordering</i> - * would yield the same form as DECOMPOSE. - * Text in "Fast C or D" form can be processed efficiently with data tables - * that are "canonically closed", that is, that provide equivalent data for - * equivalent text, without having to be fully normalized. - * Not a standard Unicode normalization form. - * Not a unique form: Different FCD strings can be canonically equivalent. - * For details see http://www.unicode.org/notes/tn5/#FCD - * @stable ICU 4.4 - */ - UNORM2_FCD, - /** - * Compose only contiguously. - * Also known as "FCC" or "Fast C Contiguous". - * The result will often but not always be in NFC. - * The result will conform to FCD which is useful for processing. - * Not a standard Unicode normalization form. - * For details see http://www.unicode.org/notes/tn5/#FCC - * @stable ICU 4.4 - */ - UNORM2_COMPOSE_CONTIGUOUS -} UNormalization2Mode; - -/** - * Result values for normalization quick check functions. - * For details see http://www.unicode.org/reports/tr15/#Detecting_Normalization_Forms - * @stable ICU 2.0 - */ -typedef enum UNormalizationCheckResult { - /** - * The input string is not in the normalization form. - * @stable ICU 2.0 - */ - UNORM_NO, - /** - * The input string is in the normalization form. - * @stable ICU 2.0 - */ - UNORM_YES, - /** - * The input string may or may not be in the normalization form. - * This value is only returned for composition forms like NFC and FCC, - * when a backward-combining character is found for which the surrounding text - * would have to be analyzed further. - * @stable ICU 2.0 - */ - UNORM_MAYBE -} UNormalizationCheckResult; - -/** - * Opaque C service object type for the new normalization API. - * @stable ICU 4.4 - */ -struct UNormalizer2; -typedef struct UNormalizer2 UNormalizer2; /**< C typedef for struct UNormalizer2. @stable ICU 4.4 */ - -#if !UCONFIG_NO_NORMALIZATION - -/** - * Returns a UNormalizer2 instance for Unicode NFC normalization. - * Same as unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE, pErrorCode). - * Returns an unmodifiable singleton instance. Do not delete it. - * @param pErrorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return the requested Normalizer2, if successful - * @stable ICU 49 - */ -U_STABLE const UNormalizer2 * U_EXPORT2 -unorm2_getNFCInstance(UErrorCode *pErrorCode); - -/** - * Returns a UNormalizer2 instance for Unicode NFD normalization. - * Same as unorm2_getInstance(NULL, "nfc", UNORM2_DECOMPOSE, pErrorCode). - * Returns an unmodifiable singleton instance. Do not delete it. - * @param pErrorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return the requested Normalizer2, if successful - * @stable ICU 49 - */ -U_STABLE const UNormalizer2 * U_EXPORT2 -unorm2_getNFDInstance(UErrorCode *pErrorCode); - -/** - * Returns a UNormalizer2 instance for Unicode NFKC normalization. - * Same as unorm2_getInstance(NULL, "nfkc", UNORM2_COMPOSE, pErrorCode). - * Returns an unmodifiable singleton instance. Do not delete it. - * @param pErrorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return the requested Normalizer2, if successful - * @stable ICU 49 - */ -U_STABLE const UNormalizer2 * U_EXPORT2 -unorm2_getNFKCInstance(UErrorCode *pErrorCode); - -/** - * Returns a UNormalizer2 instance for Unicode NFKD normalization. - * Same as unorm2_getInstance(NULL, "nfkc", UNORM2_DECOMPOSE, pErrorCode). - * Returns an unmodifiable singleton instance. Do not delete it. - * @param pErrorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return the requested Normalizer2, if successful - * @stable ICU 49 - */ -U_STABLE const UNormalizer2 * U_EXPORT2 -unorm2_getNFKDInstance(UErrorCode *pErrorCode); - -/** - * Returns a UNormalizer2 instance for Unicode NFKC_Casefold normalization. - * Same as unorm2_getInstance(NULL, "nfkc_cf", UNORM2_COMPOSE, pErrorCode). - * Returns an unmodifiable singleton instance. Do not delete it. - * @param pErrorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return the requested Normalizer2, if successful - * @stable ICU 49 - */ -U_STABLE const UNormalizer2 * U_EXPORT2 -unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode); - -/** - * Returns a UNormalizer2 instance which uses the specified data file - * (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle) - * and which composes or decomposes text according to the specified mode. - * Returns an unmodifiable singleton instance. Do not delete it. - * - * Use packageName=NULL for data files that are part of ICU's own data. - * Use name="nfc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFC/NFD. - * Use name="nfkc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFKC/NFKD. - * Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold. - * - * @param packageName NULL for ICU built-in data, otherwise application data package name - * @param name "nfc" or "nfkc" or "nfkc_cf" or name of custom data file - * @param mode normalization mode (compose or decompose etc.) - * @param pErrorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return the requested UNormalizer2, if successful - * @stable ICU 4.4 - */ -U_STABLE const UNormalizer2 * U_EXPORT2 -unorm2_getInstance(const char *packageName, - const char *name, - UNormalization2Mode mode, - UErrorCode *pErrorCode); - -/** - * Constructs a filtered normalizer wrapping any UNormalizer2 instance - * and a filter set. - * Both are aliased and must not be modified or deleted while this object - * is used. - * The filter set should be frozen; otherwise the performance will suffer greatly. - * @param norm2 wrapped UNormalizer2 instance - * @param filterSet USet which determines the characters to be normalized - * @param pErrorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return the requested UNormalizer2, if successful - * @stable ICU 4.4 - */ -U_STABLE UNormalizer2 * U_EXPORT2 -unorm2_openFiltered(const UNormalizer2 *norm2, const USet *filterSet, UErrorCode *pErrorCode); - -/** - * Closes a UNormalizer2 instance from unorm2_openFiltered(). - * Do not close instances from unorm2_getInstance()! - * @param norm2 UNormalizer2 instance to be closed - * @stable ICU 4.4 - */ -U_STABLE void U_EXPORT2 -unorm2_close(UNormalizer2 *norm2); - -#if U_SHOW_CPLUSPLUS_API - -U_NAMESPACE_BEGIN - -/** - * \class LocalUNormalizer2Pointer - * "Smart pointer" class, closes a UNormalizer2 via unorm2_close(). - * For most methods see the LocalPointerBase base class. - * - * @see LocalPointerBase - * @see LocalPointer - * @stable ICU 4.4 - */ -U_DEFINE_LOCAL_OPEN_POINTER(LocalUNormalizer2Pointer, UNormalizer2, unorm2_close); - -U_NAMESPACE_END - -#endif - -/** - * Writes the normalized form of the source string to the destination string - * (replacing its contents) and returns the length of the destination string. - * The source and destination strings must be different buffers. - * @param norm2 UNormalizer2 instance - * @param src source string - * @param length length of the source string, or -1 if NUL-terminated - * @param dest destination string; its contents is replaced with normalized src - * @param capacity number of UChars that can be written to dest - * @param pErrorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return dest - * @stable ICU 4.4 - */ -U_STABLE int32_t U_EXPORT2 -unorm2_normalize(const UNormalizer2 *norm2, - const UChar *src, int32_t length, - UChar *dest, int32_t capacity, - UErrorCode *pErrorCode); -/** - * Appends the normalized form of the second string to the first string - * (merging them at the boundary) and returns the length of the first string. - * The result is normalized if the first string was normalized. - * The first and second strings must be different buffers. - * @param norm2 UNormalizer2 instance - * @param first string, should be normalized - * @param firstLength length of the first string, or -1 if NUL-terminated - * @param firstCapacity number of UChars that can be written to first - * @param second string, will be normalized - * @param secondLength length of the source string, or -1 if NUL-terminated - * @param pErrorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return first - * @stable ICU 4.4 - */ -U_STABLE int32_t U_EXPORT2 -unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2, - UChar *first, int32_t firstLength, int32_t firstCapacity, - const UChar *second, int32_t secondLength, - UErrorCode *pErrorCode); -/** - * Appends the second string to the first string - * (merging them at the boundary) and returns the length of the first string. - * The result is normalized if both the strings were normalized. - * The first and second strings must be different buffers. - * @param norm2 UNormalizer2 instance - * @param first string, should be normalized - * @param firstLength length of the first string, or -1 if NUL-terminated - * @param firstCapacity number of UChars that can be written to first - * @param second string, should be normalized - * @param secondLength length of the source string, or -1 if NUL-terminated - * @param pErrorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return first - * @stable ICU 4.4 - */ -U_STABLE int32_t U_EXPORT2 -unorm2_append(const UNormalizer2 *norm2, - UChar *first, int32_t firstLength, int32_t firstCapacity, - const UChar *second, int32_t secondLength, - UErrorCode *pErrorCode); - -/** - * Gets the decomposition mapping of c. - * Roughly equivalent to normalizing the String form of c - * on a UNORM2_DECOMPOSE UNormalizer2 instance, but much faster, and except that this function - * returns a negative value and does not write a string - * if c does not have a decomposition mapping in this instance's data. - * This function is independent of the mode of the UNormalizer2. - * @param norm2 UNormalizer2 instance - * @param c code point - * @param decomposition String buffer which will be set to c's - * decomposition mapping, if there is one. - * @param capacity number of UChars that can be written to decomposition - * @param pErrorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return the non-negative length of c's decomposition, if there is one; otherwise a negative value - * @stable ICU 4.6 - */ -U_STABLE int32_t U_EXPORT2 -unorm2_getDecomposition(const UNormalizer2 *norm2, - UChar32 c, UChar *decomposition, int32_t capacity, - UErrorCode *pErrorCode); - -/** - * Gets the raw decomposition mapping of c. - * - * This is similar to the unorm2_getDecomposition() function but returns the - * raw decomposition mapping as specified in UnicodeData.txt or - * (for custom data) in the mapping files processed by the gennorm2 tool. - * By contrast, unorm2_getDecomposition() returns the processed, - * recursively-decomposed version of this mapping. - * - * When used on a standard NFKC Normalizer2 instance, - * unorm2_getRawDecomposition() returns the Unicode Decomposition_Mapping (dm) property. - * - * When used on a standard NFC Normalizer2 instance, - * it returns the Decomposition_Mapping only if the Decomposition_Type (dt) is Canonical (Can); - * in this case, the result contains either one or two code points (=1..4 UChars). - * - * This function is independent of the mode of the UNormalizer2. - * @param norm2 UNormalizer2 instance - * @param c code point - * @param decomposition String buffer which will be set to c's - * raw decomposition mapping, if there is one. - * @param capacity number of UChars that can be written to decomposition - * @param pErrorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return the non-negative length of c's raw decomposition, if there is one; otherwise a negative value - * @stable ICU 49 - */ -U_STABLE int32_t U_EXPORT2 -unorm2_getRawDecomposition(const UNormalizer2 *norm2, - UChar32 c, UChar *decomposition, int32_t capacity, - UErrorCode *pErrorCode); - -/** - * Performs pairwise composition of a & b and returns the composite if there is one. - * - * Returns a composite code point c only if c has a two-way mapping to a+b. - * In standard Unicode normalization, this means that - * c has a canonical decomposition to a+b - * and c does not have the Full_Composition_Exclusion property. - * - * This function is independent of the mode of the UNormalizer2. - * @param norm2 UNormalizer2 instance - * @param a A (normalization starter) code point. - * @param b Another code point. - * @return The non-negative composite code point if there is one; otherwise a negative value. - * @stable ICU 49 - */ -U_STABLE UChar32 U_EXPORT2 -unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b); - -/** - * Gets the combining class of c. - * The default implementation returns 0 - * but all standard implementations return the Unicode Canonical_Combining_Class value. - * @param norm2 UNormalizer2 instance - * @param c code point - * @return c's combining class - * @stable ICU 49 - */ -U_STABLE uint8_t U_EXPORT2 -unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c); - -/** - * Tests if the string is normalized. - * Internally, in cases where the quickCheck() method would return "maybe" - * (which is only possible for the two COMPOSE modes) this method - * resolves to "yes" or "no" to provide a definitive result, - * at the cost of doing more work in those cases. - * @param norm2 UNormalizer2 instance - * @param s input string - * @param length length of the string, or -1 if NUL-terminated - * @param pErrorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return TRUE if s is normalized - * @stable ICU 4.4 - */ -U_STABLE UBool U_EXPORT2 -unorm2_isNormalized(const UNormalizer2 *norm2, - const UChar *s, int32_t length, - UErrorCode *pErrorCode); - -/** - * Tests if the string is normalized. - * For the two COMPOSE modes, the result could be "maybe" in cases that - * would take a little more work to resolve definitively. - * Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster - * combination of quick check + normalization, to avoid - * re-checking the "yes" prefix. - * @param norm2 UNormalizer2 instance - * @param s input string - * @param length length of the string, or -1 if NUL-terminated - * @param pErrorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return UNormalizationCheckResult - * @stable ICU 4.4 - */ -U_STABLE UNormalizationCheckResult U_EXPORT2 -unorm2_quickCheck(const UNormalizer2 *norm2, - const UChar *s, int32_t length, - UErrorCode *pErrorCode); - -/** - * Returns the end of the normalized substring of the input string. - * In other words, with <code>end=spanQuickCheckYes(s, ec);</code> - * the substring <code>UnicodeString(s, 0, end)</code> - * will pass the quick check with a "yes" result. - * - * The returned end index is usually one or more characters before the - * "no" or "maybe" character: The end index is at a normalization boundary. - * (See the class documentation for more about normalization boundaries.) - * - * When the goal is a normalized string and most input strings are expected - * to be normalized already, then call this method, - * and if it returns a prefix shorter than the input string, - * copy that prefix and use normalizeSecondAndAppend() for the remainder. - * @param norm2 UNormalizer2 instance - * @param s input string - * @param length length of the string, or -1 if NUL-terminated - * @param pErrorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return "yes" span end index - * @stable ICU 4.4 - */ -U_STABLE int32_t U_EXPORT2 -unorm2_spanQuickCheckYes(const UNormalizer2 *norm2, - const UChar *s, int32_t length, - UErrorCode *pErrorCode); - -/** - * Tests if the character always has a normalization boundary before it, - * regardless of context. - * For details see the Normalizer2 base class documentation. - * @param norm2 UNormalizer2 instance - * @param c character to test - * @return TRUE if c has a normalization boundary before it - * @stable ICU 4.4 - */ -U_STABLE UBool U_EXPORT2 -unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c); - -/** - * Tests if the character always has a normalization boundary after it, - * regardless of context. - * For details see the Normalizer2 base class documentation. - * @param norm2 UNormalizer2 instance - * @param c character to test - * @return TRUE if c has a normalization boundary after it - * @stable ICU 4.4 - */ -U_STABLE UBool U_EXPORT2 -unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c); - -/** - * Tests if the character is normalization-inert. - * For details see the Normalizer2 base class documentation. - * @param norm2 UNormalizer2 instance - * @param c character to test - * @return TRUE if c is normalization-inert - * @stable ICU 4.4 - */ -U_STABLE UBool U_EXPORT2 -unorm2_isInert(const UNormalizer2 *norm2, UChar32 c); - -#endif /* !UCONFIG_NO_NORMALIZATION */ -#endif /* __UNORM2_H__ */ diff --git a/Source/WTF/icu/unicode/uobject.h b/Source/WTF/icu/unicode/uobject.h deleted file mode 100644 index 54ceace62..000000000 --- a/Source/WTF/icu/unicode/uobject.h +++ /dev/null @@ -1,320 +0,0 @@ -/* -****************************************************************************** -* -* Copyright (C) 2002-2012, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* file name: uobject.h -* encoding: US-ASCII -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2002jun26 -* created by: Markus W. Scherer -*/ - -#ifndef __UOBJECT_H__ -#define __UOBJECT_H__ - -#include "unicode/utypes.h" - -/** - * \file - * \brief C++ API: Common ICU base class UObject. - */ - -/** - * @{ - * \def U_NO_THROW - * Define this to define the throw() specification so - * certain functions do not throw any exceptions - * - * UMemory operator new methods should have the throw() specification - * appended to them, so that the compiler adds the additional NULL check - * before calling constructors. Without, if <code>operator new</code> returns NULL the - * constructor is still called, and if the constructor references member - * data, (which it typically does), the result is a segmentation violation. - * - * @stable ICU 4.2 - */ -#ifndef U_NO_THROW -#define U_NO_THROW throw() -#endif - -/** @} */ - -/*===========================================================================*/ -/* UClassID-based RTTI */ -/*===========================================================================*/ - -/** - * UClassID is used to identify classes without using the compiler's RTTI. - * This was used before C++ compilers consistently supported RTTI. - * ICU 4.6 requires compiler RTTI to be turned on. - * - * Each class hierarchy which needs - * to implement polymorphic clone() or operator==() defines two methods, - * described in detail below. UClassID values can be compared using - * operator==(). Nothing else should be done with them. - * - * \par - * In class hierarchies that implement "poor man's RTTI", - * each concrete subclass implements getDynamicClassID() in the same way: - * - * \code - * class Derived { - * public: - * virtual UClassID getDynamicClassID() const - * { return Derived::getStaticClassID(); } - * } - * \endcode - * - * Each concrete class implements getStaticClassID() as well, which allows - * clients to test for a specific type. - * - * \code - * class Derived { - * public: - * static UClassID U_EXPORT2 getStaticClassID(); - * private: - * static char fgClassID; - * } - * - * // In Derived.cpp: - * UClassID Derived::getStaticClassID() - * { return (UClassID)&Derived::fgClassID; } - * char Derived::fgClassID = 0; // Value is irrelevant - * \endcode - * @stable ICU 2.0 - */ -typedef void* UClassID; - -U_NAMESPACE_BEGIN - -/** - * UMemory is the common ICU base class. - * All other ICU C++ classes are derived from UMemory (starting with ICU 2.4). - * - * This is primarily to make it possible and simple to override the - * C++ memory management by adding new/delete operators to this base class. - * - * To override ALL ICU memory management, including that from plain C code, - * replace the allocation functions declared in cmemory.h - * - * UMemory does not contain any virtual functions. - * Common "boilerplate" functions are defined in UObject. - * - * @stable ICU 2.4 - */ -class U_COMMON_API UMemory { -public: - -/* test versions for debugging shaper heap memory problems */ -#ifdef SHAPER_MEMORY_DEBUG - static void * NewArray(int size, int count); - static void * GrowArray(void * array, int newSize ); - static void FreeArray(void * array ); -#endif - -#if U_OVERRIDE_CXX_ALLOCATION - /** - * Override for ICU4C C++ memory management. - * simple, non-class types are allocated using the macros in common/cmemory.h - * (uprv_malloc(), uprv_free(), uprv_realloc()); - * they or something else could be used here to implement C++ new/delete - * for ICU4C C++ classes - * @stable ICU 2.4 - */ - static void * U_EXPORT2 operator new(size_t size) U_NO_THROW; - - /** - * Override for ICU4C C++ memory management. - * See new(). - * @stable ICU 2.4 - */ - static void * U_EXPORT2 operator new[](size_t size) U_NO_THROW; - - /** - * Override for ICU4C C++ memory management. - * simple, non-class types are allocated using the macros in common/cmemory.h - * (uprv_malloc(), uprv_free(), uprv_realloc()); - * they or something else could be used here to implement C++ new/delete - * for ICU4C C++ classes - * @stable ICU 2.4 - */ - static void U_EXPORT2 operator delete(void *p) U_NO_THROW; - - /** - * Override for ICU4C C++ memory management. - * See delete(). - * @stable ICU 2.4 - */ - static void U_EXPORT2 operator delete[](void *p) U_NO_THROW; - -#if U_HAVE_PLACEMENT_NEW - /** - * Override for ICU4C C++ memory management for STL. - * See new(). - * @stable ICU 2.6 - */ - static inline void * U_EXPORT2 operator new(size_t, void *ptr) U_NO_THROW { return ptr; } - - /** - * Override for ICU4C C++ memory management for STL. - * See delete(). - * @stable ICU 2.6 - */ - static inline void U_EXPORT2 operator delete(void *, void *) U_NO_THROW {} -#endif /* U_HAVE_PLACEMENT_NEW */ -#if U_HAVE_DEBUG_LOCATION_NEW - /** - * This method overrides the MFC debug version of the operator new - * - * @param size The requested memory size - * @param file The file where the allocation was requested - * @param line The line where the allocation was requested - */ - static void * U_EXPORT2 operator new(size_t size, const char* file, int line) U_NO_THROW; - /** - * This method provides a matching delete for the MFC debug new - * - * @param p The pointer to the allocated memory - * @param file The file where the allocation was requested - * @param line The line where the allocation was requested - */ - static void U_EXPORT2 operator delete(void* p, const char* file, int line) U_NO_THROW; -#endif /* U_HAVE_DEBUG_LOCATION_NEW */ -#endif /* U_OVERRIDE_CXX_ALLOCATION */ - - /* - * Assignment operator not declared. The compiler will provide one - * which does nothing since this class does not contain any data members. - * API/code coverage may show the assignment operator as present and - * untested - ignore. - * Subclasses need this assignment operator if they use compiler-provided - * assignment operators of their own. An alternative to not declaring one - * here would be to declare and empty-implement a protected or public one. - UMemory &UMemory::operator=(const UMemory &); - */ -}; - -/** - * UObject is the common ICU "boilerplate" class. - * UObject inherits UMemory (starting with ICU 2.4), - * and all other public ICU C++ classes - * are derived from UObject (starting with ICU 2.2). - * - * UObject contains common virtual functions, in particular a virtual destructor. - * - * The clone() function is not available in UObject because it is not - * implemented by all ICU classes. - * Many ICU services provide a clone() function for their class trees, - * defined on the service's C++ base class, and all subclasses within that - * service class tree return a pointer to the service base class - * (which itself is a subclass of UObject). - * This is because some compilers do not support covariant (same-as-this) - * return types; cast to the appropriate subclass if necessary. - * - * @stable ICU 2.2 - */ -class U_COMMON_API UObject : public UMemory { -public: - /** - * Destructor. - * - * @stable ICU 2.2 - */ - virtual ~UObject(); - - /** - * ICU4C "poor man's RTTI", returns a UClassID for the actual ICU class. - * The base class implementation returns a dummy value. - * - * Use compiler RTTI rather than ICU's "poor man's RTTI". - * Since ICU 4.6, new ICU C++ class hierarchies do not implement "poor man's RTTI". - * - * @stable ICU 2.2 - */ - virtual UClassID getDynamicClassID() const; - -protected: - // the following functions are protected to prevent instantiation and - // direct use of UObject itself - - // default constructor - // inline UObject() {} - - // copy constructor - // inline UObject(const UObject &other) {} - -#if 0 - // TODO Sometime in the future. Implement operator==(). - // (This comment inserted in 2.2) - // some or all of the following "boilerplate" functions may be made public - // in a future ICU4C release when all subclasses implement them - - // assignment operator - // (not virtual, see "Taligent's Guide to Designing Programs" pp.73..74) - // commented out because the implementation is the same as a compiler's default - // UObject &operator=(const UObject &other) { return *this; } - - // comparison operators - virtual inline UBool operator==(const UObject &other) const { return this==&other; } - inline UBool operator!=(const UObject &other) const { return !operator==(other); } - - // clone() commented out from the base class: - // some compilers do not support co-variant return types - // (i.e., subclasses would have to return UObject * as well, instead of SubClass *) - // see also UObject class documentation. - // virtual UObject *clone() const; -#endif - - /* - * Assignment operator not declared. The compiler will provide one - * which does nothing since this class does not contain any data members. - * API/code coverage may show the assignment operator as present and - * untested - ignore. - * Subclasses need this assignment operator if they use compiler-provided - * assignment operators of their own. An alternative to not declaring one - * here would be to declare and empty-implement a protected or public one. - UObject &UObject::operator=(const UObject &); - */ -}; - -#ifndef U_HIDE_INTERNAL_API -/** - * This is a simple macro to add ICU RTTI to an ICU object implementation. - * This does not go into the header. This should only be used in *.cpp files. - * - * @param myClass The name of the class that needs RTTI defined. - * @internal - */ -#define UOBJECT_DEFINE_RTTI_IMPLEMENTATION(myClass) \ - UClassID U_EXPORT2 myClass::getStaticClassID() { \ - static char classID = 0; \ - return (UClassID)&classID; \ - } \ - UClassID myClass::getDynamicClassID() const \ - { return myClass::getStaticClassID(); } - - -/** - * This macro adds ICU RTTI to an ICU abstract class implementation. - * This macro should be invoked in *.cpp files. The corresponding - * header should declare getStaticClassID. - * - * @param myClass The name of the class that needs RTTI defined. - * @internal - */ -#define UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(myClass) \ - UClassID U_EXPORT2 myClass::getStaticClassID() { \ - static char classID = 0; \ - return (UClassID)&classID; \ - } - -#endif /* U_HIDE_INTERNAL_API */ - -U_NAMESPACE_END - -#endif diff --git a/Source/WTF/icu/unicode/urename.h b/Source/WTF/icu/unicode/urename.h deleted file mode 100644 index 6b1f49098..000000000 --- a/Source/WTF/icu/unicode/urename.h +++ /dev/null @@ -1,1825 +0,0 @@ -/* -******************************************************************************* -* Copyright (C) 2002-2013, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* -* file name: urename.h -* encoding: US-ASCII -* tab size: 8 (not used) -* indentation:4 -* -* Created by: Perl script tools/genren.pl written by Vladimir Weinstein -* -* Contains data for renaming ICU exports. -* Gets included by umachine.h -* -* THIS FILE IS MACHINE-GENERATED, DON'T PLAY WITH IT IF YOU DON'T KNOW WHAT -* YOU ARE DOING, OTHERWISE VERY BAD THINGS WILL HAPPEN! -*/ - -#ifndef URENAME_H -#define URENAME_H - -/* U_DISABLE_RENAMING can be defined in the following ways: - * - when running configure, e.g. - * runConfigureICU Linux --disable-renaming - * - by changing the default setting of U_DISABLE_RENAMING in uconfig.h - */ - -#include "unicode/uconfig.h" - -#if !U_DISABLE_RENAMING - -/* We need the U_ICU_ENTRY_POINT_RENAME definition. There's a default one in unicode/uvernum.h we can use, but we will give - the platform a chance to define it first. - Normally (if utypes.h or umachine.h was included first) this will not be necessary as it will already be defined. - */ - -#ifndef U_ICU_ENTRY_POINT_RENAME -#include "unicode/umachine.h" -#endif - -/* If we still don't have U_ICU_ENTRY_POINT_RENAME use the default. */ -#ifndef U_ICU_ENTRY_POINT_RENAME -#include "unicode/uvernum.h" -#endif - -/* Error out before the following defines cause very strange and unexpected code breakage */ -#ifndef U_ICU_ENTRY_POINT_RENAME -#error U_ICU_ENTRY_POINT_RENAME is not defined - cannot continue. Consider defining U_DISABLE_RENAMING if renaming should not be used. -#endif - - -/* C exports renaming data */ - -#define T_CString_int64ToString U_ICU_ENTRY_POINT_RENAME(T_CString_int64ToString) -#define T_CString_integerToString U_ICU_ENTRY_POINT_RENAME(T_CString_integerToString) -#define T_CString_stringToInteger U_ICU_ENTRY_POINT_RENAME(T_CString_stringToInteger) -#define T_CString_toLowerCase U_ICU_ENTRY_POINT_RENAME(T_CString_toLowerCase) -#define T_CString_toUpperCase U_ICU_ENTRY_POINT_RENAME(T_CString_toUpperCase) -#define UCNV_FROM_U_CALLBACK_ESCAPE U_ICU_ENTRY_POINT_RENAME(UCNV_FROM_U_CALLBACK_ESCAPE) -#define UCNV_FROM_U_CALLBACK_SKIP U_ICU_ENTRY_POINT_RENAME(UCNV_FROM_U_CALLBACK_SKIP) -#define UCNV_FROM_U_CALLBACK_STOP U_ICU_ENTRY_POINT_RENAME(UCNV_FROM_U_CALLBACK_STOP) -#define UCNV_FROM_U_CALLBACK_SUBSTITUTE U_ICU_ENTRY_POINT_RENAME(UCNV_FROM_U_CALLBACK_SUBSTITUTE) -#define UCNV_TO_U_CALLBACK_ESCAPE U_ICU_ENTRY_POINT_RENAME(UCNV_TO_U_CALLBACK_ESCAPE) -#define UCNV_TO_U_CALLBACK_SKIP U_ICU_ENTRY_POINT_RENAME(UCNV_TO_U_CALLBACK_SKIP) -#define UCNV_TO_U_CALLBACK_STOP U_ICU_ENTRY_POINT_RENAME(UCNV_TO_U_CALLBACK_STOP) -#define UCNV_TO_U_CALLBACK_SUBSTITUTE U_ICU_ENTRY_POINT_RENAME(UCNV_TO_U_CALLBACK_SUBSTITUTE) -#define UDataMemory_createNewInstance U_ICU_ENTRY_POINT_RENAME(UDataMemory_createNewInstance) -#define UDataMemory_init U_ICU_ENTRY_POINT_RENAME(UDataMemory_init) -#define UDataMemory_isLoaded U_ICU_ENTRY_POINT_RENAME(UDataMemory_isLoaded) -#define UDataMemory_normalizeDataPointer U_ICU_ENTRY_POINT_RENAME(UDataMemory_normalizeDataPointer) -#define UDataMemory_setData U_ICU_ENTRY_POINT_RENAME(UDataMemory_setData) -#define UDatamemory_assign U_ICU_ENTRY_POINT_RENAME(UDatamemory_assign) -#define _ASCIIData U_ICU_ENTRY_POINT_RENAME(_ASCIIData) -#define _Bocu1Data U_ICU_ENTRY_POINT_RENAME(_Bocu1Data) -#define _CESU8Data U_ICU_ENTRY_POINT_RENAME(_CESU8Data) -#define _CompoundTextData U_ICU_ENTRY_POINT_RENAME(_CompoundTextData) -#define _HZData U_ICU_ENTRY_POINT_RENAME(_HZData) -#define _IMAPData U_ICU_ENTRY_POINT_RENAME(_IMAPData) -#define _ISCIIData U_ICU_ENTRY_POINT_RENAME(_ISCIIData) -#define _ISO2022Data U_ICU_ENTRY_POINT_RENAME(_ISO2022Data) -#define _LMBCSData1 U_ICU_ENTRY_POINT_RENAME(_LMBCSData1) -#define _LMBCSData11 U_ICU_ENTRY_POINT_RENAME(_LMBCSData11) -#define _LMBCSData16 U_ICU_ENTRY_POINT_RENAME(_LMBCSData16) -#define _LMBCSData17 U_ICU_ENTRY_POINT_RENAME(_LMBCSData17) -#define _LMBCSData18 U_ICU_ENTRY_POINT_RENAME(_LMBCSData18) -#define _LMBCSData19 U_ICU_ENTRY_POINT_RENAME(_LMBCSData19) -#define _LMBCSData2 U_ICU_ENTRY_POINT_RENAME(_LMBCSData2) -#define _LMBCSData3 U_ICU_ENTRY_POINT_RENAME(_LMBCSData3) -#define _LMBCSData4 U_ICU_ENTRY_POINT_RENAME(_LMBCSData4) -#define _LMBCSData5 U_ICU_ENTRY_POINT_RENAME(_LMBCSData5) -#define _LMBCSData6 U_ICU_ENTRY_POINT_RENAME(_LMBCSData6) -#define _LMBCSData8 U_ICU_ENTRY_POINT_RENAME(_LMBCSData8) -#define _Latin1Data U_ICU_ENTRY_POINT_RENAME(_Latin1Data) -#define _MBCSData U_ICU_ENTRY_POINT_RENAME(_MBCSData) -#define _SCSUData U_ICU_ENTRY_POINT_RENAME(_SCSUData) -#define _UTF16BEData U_ICU_ENTRY_POINT_RENAME(_UTF16BEData) -#define _UTF16Data U_ICU_ENTRY_POINT_RENAME(_UTF16Data) -#define _UTF16LEData U_ICU_ENTRY_POINT_RENAME(_UTF16LEData) -#define _UTF32BEData U_ICU_ENTRY_POINT_RENAME(_UTF32BEData) -#define _UTF32Data U_ICU_ENTRY_POINT_RENAME(_UTF32Data) -#define _UTF32LEData U_ICU_ENTRY_POINT_RENAME(_UTF32LEData) -#define _UTF7Data U_ICU_ENTRY_POINT_RENAME(_UTF7Data) -#define _UTF8Data U_ICU_ENTRY_POINT_RENAME(_UTF8Data) -#define cmemory_cleanup U_ICU_ENTRY_POINT_RENAME(cmemory_cleanup) -#define cmemory_inUse U_ICU_ENTRY_POINT_RENAME(cmemory_inUse) -#define izrule_clone U_ICU_ENTRY_POINT_RENAME(izrule_clone) -#define izrule_close U_ICU_ENTRY_POINT_RENAME(izrule_close) -#define izrule_equals U_ICU_ENTRY_POINT_RENAME(izrule_equals) -#define izrule_getDSTSavings U_ICU_ENTRY_POINT_RENAME(izrule_getDSTSavings) -#define izrule_getDynamicClassID U_ICU_ENTRY_POINT_RENAME(izrule_getDynamicClassID) -#define izrule_getFinalStart U_ICU_ENTRY_POINT_RENAME(izrule_getFinalStart) -#define izrule_getFirstStart U_ICU_ENTRY_POINT_RENAME(izrule_getFirstStart) -#define izrule_getName U_ICU_ENTRY_POINT_RENAME(izrule_getName) -#define izrule_getNextStart U_ICU_ENTRY_POINT_RENAME(izrule_getNextStart) -#define izrule_getPreviousStart U_ICU_ENTRY_POINT_RENAME(izrule_getPreviousStart) -#define izrule_getRawOffset U_ICU_ENTRY_POINT_RENAME(izrule_getRawOffset) -#define izrule_getStaticClassID U_ICU_ENTRY_POINT_RENAME(izrule_getStaticClassID) -#define izrule_isEquivalentTo U_ICU_ENTRY_POINT_RENAME(izrule_isEquivalentTo) -#define izrule_open U_ICU_ENTRY_POINT_RENAME(izrule_open) -#define le_close U_ICU_ENTRY_POINT_RENAME(le_close) -#define le_create U_ICU_ENTRY_POINT_RENAME(le_create) -#define le_getCharIndices U_ICU_ENTRY_POINT_RENAME(le_getCharIndices) -#define le_getCharIndicesWithBase U_ICU_ENTRY_POINT_RENAME(le_getCharIndicesWithBase) -#define le_getGlyphCount U_ICU_ENTRY_POINT_RENAME(le_getGlyphCount) -#define le_getGlyphPosition U_ICU_ENTRY_POINT_RENAME(le_getGlyphPosition) -#define le_getGlyphPositions U_ICU_ENTRY_POINT_RENAME(le_getGlyphPositions) -#define le_getGlyphs U_ICU_ENTRY_POINT_RENAME(le_getGlyphs) -#define le_layoutChars U_ICU_ENTRY_POINT_RENAME(le_layoutChars) -#define le_reset U_ICU_ENTRY_POINT_RENAME(le_reset) -#define locale_getKeywords U_ICU_ENTRY_POINT_RENAME(locale_getKeywords) -#define locale_getKeywordsStart U_ICU_ENTRY_POINT_RENAME(locale_getKeywordsStart) -#define locale_get_default U_ICU_ENTRY_POINT_RENAME(locale_get_default) -#define locale_set_default U_ICU_ENTRY_POINT_RENAME(locale_set_default) -#define pl_addFontRun U_ICU_ENTRY_POINT_RENAME(pl_addFontRun) -#define pl_addLocaleRun U_ICU_ENTRY_POINT_RENAME(pl_addLocaleRun) -#define pl_addValueRun U_ICU_ENTRY_POINT_RENAME(pl_addValueRun) -#define pl_close U_ICU_ENTRY_POINT_RENAME(pl_close) -#define pl_closeFontRuns U_ICU_ENTRY_POINT_RENAME(pl_closeFontRuns) -#define pl_closeLine U_ICU_ENTRY_POINT_RENAME(pl_closeLine) -#define pl_closeLocaleRuns U_ICU_ENTRY_POINT_RENAME(pl_closeLocaleRuns) -#define pl_closeValueRuns U_ICU_ENTRY_POINT_RENAME(pl_closeValueRuns) -#define pl_countLineRuns U_ICU_ENTRY_POINT_RENAME(pl_countLineRuns) -#define pl_create U_ICU_ENTRY_POINT_RENAME(pl_create) -#define pl_getAscent U_ICU_ENTRY_POINT_RENAME(pl_getAscent) -#define pl_getDescent U_ICU_ENTRY_POINT_RENAME(pl_getDescent) -#define pl_getFontRunCount U_ICU_ENTRY_POINT_RENAME(pl_getFontRunCount) -#define pl_getFontRunFont U_ICU_ENTRY_POINT_RENAME(pl_getFontRunFont) -#define pl_getFontRunLastLimit U_ICU_ENTRY_POINT_RENAME(pl_getFontRunLastLimit) -#define pl_getFontRunLimit U_ICU_ENTRY_POINT_RENAME(pl_getFontRunLimit) -#define pl_getLeading U_ICU_ENTRY_POINT_RENAME(pl_getLeading) -#define pl_getLineAscent U_ICU_ENTRY_POINT_RENAME(pl_getLineAscent) -#define pl_getLineDescent U_ICU_ENTRY_POINT_RENAME(pl_getLineDescent) -#define pl_getLineLeading U_ICU_ENTRY_POINT_RENAME(pl_getLineLeading) -#define pl_getLineVisualRun U_ICU_ENTRY_POINT_RENAME(pl_getLineVisualRun) -#define pl_getLineWidth U_ICU_ENTRY_POINT_RENAME(pl_getLineWidth) -#define pl_getLocaleRunCount U_ICU_ENTRY_POINT_RENAME(pl_getLocaleRunCount) -#define pl_getLocaleRunLastLimit U_ICU_ENTRY_POINT_RENAME(pl_getLocaleRunLastLimit) -#define pl_getLocaleRunLimit U_ICU_ENTRY_POINT_RENAME(pl_getLocaleRunLimit) -#define pl_getLocaleRunLocale U_ICU_ENTRY_POINT_RENAME(pl_getLocaleRunLocale) -#define pl_getParagraphLevel U_ICU_ENTRY_POINT_RENAME(pl_getParagraphLevel) -#define pl_getTextDirection U_ICU_ENTRY_POINT_RENAME(pl_getTextDirection) -#define pl_getValueRunCount U_ICU_ENTRY_POINT_RENAME(pl_getValueRunCount) -#define pl_getValueRunLastLimit U_ICU_ENTRY_POINT_RENAME(pl_getValueRunLastLimit) -#define pl_getValueRunLimit U_ICU_ENTRY_POINT_RENAME(pl_getValueRunLimit) -#define pl_getValueRunValue U_ICU_ENTRY_POINT_RENAME(pl_getValueRunValue) -#define pl_getVisualRunAscent U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunAscent) -#define pl_getVisualRunDescent U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunDescent) -#define pl_getVisualRunDirection U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunDirection) -#define pl_getVisualRunFont U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunFont) -#define pl_getVisualRunGlyphCount U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunGlyphCount) -#define pl_getVisualRunGlyphToCharMap U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunGlyphToCharMap) -#define pl_getVisualRunGlyphs U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunGlyphs) -#define pl_getVisualRunLeading U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunLeading) -#define pl_getVisualRunPositions U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunPositions) -#define pl_isComplex U_ICU_ENTRY_POINT_RENAME(pl_isComplex) -#define pl_nextLine U_ICU_ENTRY_POINT_RENAME(pl_nextLine) -#define pl_openEmptyFontRuns U_ICU_ENTRY_POINT_RENAME(pl_openEmptyFontRuns) -#define pl_openEmptyLocaleRuns U_ICU_ENTRY_POINT_RENAME(pl_openEmptyLocaleRuns) -#define pl_openEmptyValueRuns U_ICU_ENTRY_POINT_RENAME(pl_openEmptyValueRuns) -#define pl_openFontRuns U_ICU_ENTRY_POINT_RENAME(pl_openFontRuns) -#define pl_openLocaleRuns U_ICU_ENTRY_POINT_RENAME(pl_openLocaleRuns) -#define pl_openValueRuns U_ICU_ENTRY_POINT_RENAME(pl_openValueRuns) -#define pl_reflow U_ICU_ENTRY_POINT_RENAME(pl_reflow) -#define pl_resetFontRuns U_ICU_ENTRY_POINT_RENAME(pl_resetFontRuns) -#define pl_resetLocaleRuns U_ICU_ENTRY_POINT_RENAME(pl_resetLocaleRuns) -#define pl_resetValueRuns U_ICU_ENTRY_POINT_RENAME(pl_resetValueRuns) -#define res_countArrayItems U_ICU_ENTRY_POINT_RENAME(res_countArrayItems) -#define res_findResource U_ICU_ENTRY_POINT_RENAME(res_findResource) -#define res_getAlias U_ICU_ENTRY_POINT_RENAME(res_getAlias) -#define res_getArrayItem U_ICU_ENTRY_POINT_RENAME(res_getArrayItem) -#define res_getBinary U_ICU_ENTRY_POINT_RENAME(res_getBinary) -#define res_getIntVector U_ICU_ENTRY_POINT_RENAME(res_getIntVector) -#define res_getPublicType U_ICU_ENTRY_POINT_RENAME(res_getPublicType) -#define res_getResource U_ICU_ENTRY_POINT_RENAME(res_getResource) -#define res_getString U_ICU_ENTRY_POINT_RENAME(res_getString) -#define res_getTableItemByIndex U_ICU_ENTRY_POINT_RENAME(res_getTableItemByIndex) -#define res_getTableItemByKey U_ICU_ENTRY_POINT_RENAME(res_getTableItemByKey) -#define res_load U_ICU_ENTRY_POINT_RENAME(res_load) -#define res_read U_ICU_ENTRY_POINT_RENAME(res_read) -#define res_unload U_ICU_ENTRY_POINT_RENAME(res_unload) -#define u_UCharsToChars U_ICU_ENTRY_POINT_RENAME(u_UCharsToChars) -#define u_austrcpy U_ICU_ENTRY_POINT_RENAME(u_austrcpy) -#define u_austrncpy U_ICU_ENTRY_POINT_RENAME(u_austrncpy) -#define u_catclose U_ICU_ENTRY_POINT_RENAME(u_catclose) -#define u_catgets U_ICU_ENTRY_POINT_RENAME(u_catgets) -#define u_catopen U_ICU_ENTRY_POINT_RENAME(u_catopen) -#define u_charAge U_ICU_ENTRY_POINT_RENAME(u_charAge) -#define u_charDigitValue U_ICU_ENTRY_POINT_RENAME(u_charDigitValue) -#define u_charDirection U_ICU_ENTRY_POINT_RENAME(u_charDirection) -#define u_charFromName U_ICU_ENTRY_POINT_RENAME(u_charFromName) -#define u_charMirror U_ICU_ENTRY_POINT_RENAME(u_charMirror) -#define u_charName U_ICU_ENTRY_POINT_RENAME(u_charName) -#define u_charType U_ICU_ENTRY_POINT_RENAME(u_charType) -#define u_charsToUChars U_ICU_ENTRY_POINT_RENAME(u_charsToUChars) -#define u_cleanup U_ICU_ENTRY_POINT_RENAME(u_cleanup) -#define u_countChar32 U_ICU_ENTRY_POINT_RENAME(u_countChar32) -#define u_digit U_ICU_ENTRY_POINT_RENAME(u_digit) -#define u_enumCharNames U_ICU_ENTRY_POINT_RENAME(u_enumCharNames) -#define u_enumCharTypes U_ICU_ENTRY_POINT_RENAME(u_enumCharTypes) -#define u_errorName U_ICU_ENTRY_POINT_RENAME(u_errorName) -#define u_fadopt U_ICU_ENTRY_POINT_RENAME(u_fadopt) -#define u_fclose U_ICU_ENTRY_POINT_RENAME(u_fclose) -#define u_feof U_ICU_ENTRY_POINT_RENAME(u_feof) -#define u_fflush U_ICU_ENTRY_POINT_RENAME(u_fflush) -#define u_fgetConverter U_ICU_ENTRY_POINT_RENAME(u_fgetConverter) -#define u_fgetNumberFormat U_ICU_ENTRY_POINT_RENAME(u_fgetNumberFormat) -#define u_fgetc U_ICU_ENTRY_POINT_RENAME(u_fgetc) -#define u_fgetcodepage U_ICU_ENTRY_POINT_RENAME(u_fgetcodepage) -#define u_fgetcx U_ICU_ENTRY_POINT_RENAME(u_fgetcx) -#define u_fgetfile U_ICU_ENTRY_POINT_RENAME(u_fgetfile) -#define u_fgetlocale U_ICU_ENTRY_POINT_RENAME(u_fgetlocale) -#define u_fgets U_ICU_ENTRY_POINT_RENAME(u_fgets) -#define u_file_read U_ICU_ENTRY_POINT_RENAME(u_file_read) -#define u_file_write U_ICU_ENTRY_POINT_RENAME(u_file_write) -#define u_file_write_flush U_ICU_ENTRY_POINT_RENAME(u_file_write_flush) -#define u_finit U_ICU_ENTRY_POINT_RENAME(u_finit) -#define u_flushDefaultConverter U_ICU_ENTRY_POINT_RENAME(u_flushDefaultConverter) -#define u_foldCase U_ICU_ENTRY_POINT_RENAME(u_foldCase) -#define u_fopen U_ICU_ENTRY_POINT_RENAME(u_fopen) -#define u_forDigit U_ICU_ENTRY_POINT_RENAME(u_forDigit) -#define u_formatMessage U_ICU_ENTRY_POINT_RENAME(u_formatMessage) -#define u_formatMessageWithError U_ICU_ENTRY_POINT_RENAME(u_formatMessageWithError) -#define u_fprintf U_ICU_ENTRY_POINT_RENAME(u_fprintf) -#define u_fprintf_u U_ICU_ENTRY_POINT_RENAME(u_fprintf_u) -#define u_fputc U_ICU_ENTRY_POINT_RENAME(u_fputc) -#define u_fputs U_ICU_ENTRY_POINT_RENAME(u_fputs) -#define u_frewind U_ICU_ENTRY_POINT_RENAME(u_frewind) -#define u_fscanf U_ICU_ENTRY_POINT_RENAME(u_fscanf) -#define u_fscanf_u U_ICU_ENTRY_POINT_RENAME(u_fscanf_u) -#define u_fsetcodepage U_ICU_ENTRY_POINT_RENAME(u_fsetcodepage) -#define u_fsetlocale U_ICU_ENTRY_POINT_RENAME(u_fsetlocale) -#define u_fsettransliterator U_ICU_ENTRY_POINT_RENAME(u_fsettransliterator) -#define u_fstropen U_ICU_ENTRY_POINT_RENAME(u_fstropen) -#define u_fungetc U_ICU_ENTRY_POINT_RENAME(u_fungetc) -#define u_getBidiPairedBracket U_ICU_ENTRY_POINT_RENAME(u_getBidiPairedBracket) -#define u_getCombiningClass U_ICU_ENTRY_POINT_RENAME(u_getCombiningClass) -#define u_getDataDirectory U_ICU_ENTRY_POINT_RENAME(u_getDataDirectory) -#define u_getDataVersion U_ICU_ENTRY_POINT_RENAME(u_getDataVersion) -#define u_getDefaultConverter U_ICU_ENTRY_POINT_RENAME(u_getDefaultConverter) -#define u_getFC_NFKC_Closure U_ICU_ENTRY_POINT_RENAME(u_getFC_NFKC_Closure) -#define u_getISOComment U_ICU_ENTRY_POINT_RENAME(u_getISOComment) -#define u_getIntPropertyMaxValue U_ICU_ENTRY_POINT_RENAME(u_getIntPropertyMaxValue) -#define u_getIntPropertyMinValue U_ICU_ENTRY_POINT_RENAME(u_getIntPropertyMinValue) -#define u_getIntPropertyValue U_ICU_ENTRY_POINT_RENAME(u_getIntPropertyValue) -#define u_getMainProperties U_ICU_ENTRY_POINT_RENAME(u_getMainProperties) -#define u_getNumericValue U_ICU_ENTRY_POINT_RENAME(u_getNumericValue) -#define u_getPropertyEnum U_ICU_ENTRY_POINT_RENAME(u_getPropertyEnum) -#define u_getPropertyName U_ICU_ENTRY_POINT_RENAME(u_getPropertyName) -#define u_getPropertyValueEnum U_ICU_ENTRY_POINT_RENAME(u_getPropertyValueEnum) -#define u_getPropertyValueName U_ICU_ENTRY_POINT_RENAME(u_getPropertyValueName) -#define u_getUnicodeProperties U_ICU_ENTRY_POINT_RENAME(u_getUnicodeProperties) -#define u_getUnicodeVersion U_ICU_ENTRY_POINT_RENAME(u_getUnicodeVersion) -#define u_getVersion U_ICU_ENTRY_POINT_RENAME(u_getVersion) -#define u_get_stdout U_ICU_ENTRY_POINT_RENAME(u_get_stdout) -#define u_hasBinaryProperty U_ICU_ENTRY_POINT_RENAME(u_hasBinaryProperty) -#define u_init U_ICU_ENTRY_POINT_RENAME(u_init) -#define u_isIDIgnorable U_ICU_ENTRY_POINT_RENAME(u_isIDIgnorable) -#define u_isIDPart U_ICU_ENTRY_POINT_RENAME(u_isIDPart) -#define u_isIDStart U_ICU_ENTRY_POINT_RENAME(u_isIDStart) -#define u_isISOControl U_ICU_ENTRY_POINT_RENAME(u_isISOControl) -#define u_isJavaIDPart U_ICU_ENTRY_POINT_RENAME(u_isJavaIDPart) -#define u_isJavaIDStart U_ICU_ENTRY_POINT_RENAME(u_isJavaIDStart) -#define u_isJavaSpaceChar U_ICU_ENTRY_POINT_RENAME(u_isJavaSpaceChar) -#define u_isMirrored U_ICU_ENTRY_POINT_RENAME(u_isMirrored) -#define u_isUAlphabetic U_ICU_ENTRY_POINT_RENAME(u_isUAlphabetic) -#define u_isULowercase U_ICU_ENTRY_POINT_RENAME(u_isULowercase) -#define u_isUUppercase U_ICU_ENTRY_POINT_RENAME(u_isUUppercase) -#define u_isUWhiteSpace U_ICU_ENTRY_POINT_RENAME(u_isUWhiteSpace) -#define u_isWhitespace U_ICU_ENTRY_POINT_RENAME(u_isWhitespace) -#define u_isalnum U_ICU_ENTRY_POINT_RENAME(u_isalnum) -#define u_isalnumPOSIX U_ICU_ENTRY_POINT_RENAME(u_isalnumPOSIX) -#define u_isalpha U_ICU_ENTRY_POINT_RENAME(u_isalpha) -#define u_isbase U_ICU_ENTRY_POINT_RENAME(u_isbase) -#define u_isblank U_ICU_ENTRY_POINT_RENAME(u_isblank) -#define u_iscntrl U_ICU_ENTRY_POINT_RENAME(u_iscntrl) -#define u_isdefined U_ICU_ENTRY_POINT_RENAME(u_isdefined) -#define u_isdigit U_ICU_ENTRY_POINT_RENAME(u_isdigit) -#define u_isgraph U_ICU_ENTRY_POINT_RENAME(u_isgraph) -#define u_isgraphPOSIX U_ICU_ENTRY_POINT_RENAME(u_isgraphPOSIX) -#define u_islower U_ICU_ENTRY_POINT_RENAME(u_islower) -#define u_isprint U_ICU_ENTRY_POINT_RENAME(u_isprint) -#define u_isprintPOSIX U_ICU_ENTRY_POINT_RENAME(u_isprintPOSIX) -#define u_ispunct U_ICU_ENTRY_POINT_RENAME(u_ispunct) -#define u_isspace U_ICU_ENTRY_POINT_RENAME(u_isspace) -#define u_istitle U_ICU_ENTRY_POINT_RENAME(u_istitle) -#define u_isupper U_ICU_ENTRY_POINT_RENAME(u_isupper) -#define u_isxdigit U_ICU_ENTRY_POINT_RENAME(u_isxdigit) -#define u_locbund_close U_ICU_ENTRY_POINT_RENAME(u_locbund_close) -#define u_locbund_getNumberFormat U_ICU_ENTRY_POINT_RENAME(u_locbund_getNumberFormat) -#define u_locbund_init U_ICU_ENTRY_POINT_RENAME(u_locbund_init) -#define u_memcasecmp U_ICU_ENTRY_POINT_RENAME(u_memcasecmp) -#define u_memchr U_ICU_ENTRY_POINT_RENAME(u_memchr) -#define u_memchr32 U_ICU_ENTRY_POINT_RENAME(u_memchr32) -#define u_memcmp U_ICU_ENTRY_POINT_RENAME(u_memcmp) -#define u_memcmpCodePointOrder U_ICU_ENTRY_POINT_RENAME(u_memcmpCodePointOrder) -#define u_memcpy U_ICU_ENTRY_POINT_RENAME(u_memcpy) -#define u_memmove U_ICU_ENTRY_POINT_RENAME(u_memmove) -#define u_memrchr U_ICU_ENTRY_POINT_RENAME(u_memrchr) -#define u_memrchr32 U_ICU_ENTRY_POINT_RENAME(u_memrchr32) -#define u_memset U_ICU_ENTRY_POINT_RENAME(u_memset) -#define u_parseMessage U_ICU_ENTRY_POINT_RENAME(u_parseMessage) -#define u_parseMessageWithError U_ICU_ENTRY_POINT_RENAME(u_parseMessageWithError) -#define u_printf U_ICU_ENTRY_POINT_RENAME(u_printf) -#define u_printf_parse U_ICU_ENTRY_POINT_RENAME(u_printf_parse) -#define u_printf_u U_ICU_ENTRY_POINT_RENAME(u_printf_u) -#define u_releaseDefaultConverter U_ICU_ENTRY_POINT_RENAME(u_releaseDefaultConverter) -#define u_scanf_parse U_ICU_ENTRY_POINT_RENAME(u_scanf_parse) -#define u_setAtomicIncDecFunctions U_ICU_ENTRY_POINT_RENAME(u_setAtomicIncDecFunctions) -#define u_setDataDirectory U_ICU_ENTRY_POINT_RENAME(u_setDataDirectory) -#define u_setMemoryFunctions U_ICU_ENTRY_POINT_RENAME(u_setMemoryFunctions) -#define u_setMutexFunctions U_ICU_ENTRY_POINT_RENAME(u_setMutexFunctions) -#define u_shapeArabic U_ICU_ENTRY_POINT_RENAME(u_shapeArabic) -#define u_snprintf U_ICU_ENTRY_POINT_RENAME(u_snprintf) -#define u_snprintf_u U_ICU_ENTRY_POINT_RENAME(u_snprintf_u) -#define u_sprintf U_ICU_ENTRY_POINT_RENAME(u_sprintf) -#define u_sprintf_u U_ICU_ENTRY_POINT_RENAME(u_sprintf_u) -#define u_sscanf U_ICU_ENTRY_POINT_RENAME(u_sscanf) -#define u_sscanf_u U_ICU_ENTRY_POINT_RENAME(u_sscanf_u) -#define u_strCaseCompare U_ICU_ENTRY_POINT_RENAME(u_strCaseCompare) -#define u_strCompare U_ICU_ENTRY_POINT_RENAME(u_strCompare) -#define u_strCompareIter U_ICU_ENTRY_POINT_RENAME(u_strCompareIter) -#define u_strFindFirst U_ICU_ENTRY_POINT_RENAME(u_strFindFirst) -#define u_strFindLast U_ICU_ENTRY_POINT_RENAME(u_strFindLast) -#define u_strFoldCase U_ICU_ENTRY_POINT_RENAME(u_strFoldCase) -#define u_strFromJavaModifiedUTF8WithSub U_ICU_ENTRY_POINT_RENAME(u_strFromJavaModifiedUTF8WithSub) -#define u_strFromPunycode U_ICU_ENTRY_POINT_RENAME(u_strFromPunycode) -#define u_strFromUTF32 U_ICU_ENTRY_POINT_RENAME(u_strFromUTF32) -#define u_strFromUTF32WithSub U_ICU_ENTRY_POINT_RENAME(u_strFromUTF32WithSub) -#define u_strFromUTF8 U_ICU_ENTRY_POINT_RENAME(u_strFromUTF8) -#define u_strFromUTF8Lenient U_ICU_ENTRY_POINT_RENAME(u_strFromUTF8Lenient) -#define u_strFromUTF8WithSub U_ICU_ENTRY_POINT_RENAME(u_strFromUTF8WithSub) -#define u_strFromWCS U_ICU_ENTRY_POINT_RENAME(u_strFromWCS) -#define u_strHasMoreChar32Than U_ICU_ENTRY_POINT_RENAME(u_strHasMoreChar32Than) -#define u_strToJavaModifiedUTF8 U_ICU_ENTRY_POINT_RENAME(u_strToJavaModifiedUTF8) -#define u_strToLower U_ICU_ENTRY_POINT_RENAME(u_strToLower) -#define u_strToPunycode U_ICU_ENTRY_POINT_RENAME(u_strToPunycode) -#define u_strToTitle U_ICU_ENTRY_POINT_RENAME(u_strToTitle) -#define u_strToUTF32 U_ICU_ENTRY_POINT_RENAME(u_strToUTF32) -#define u_strToUTF32WithSub U_ICU_ENTRY_POINT_RENAME(u_strToUTF32WithSub) -#define u_strToUTF8 U_ICU_ENTRY_POINT_RENAME(u_strToUTF8) -#define u_strToUTF8WithSub U_ICU_ENTRY_POINT_RENAME(u_strToUTF8WithSub) -#define u_strToUpper U_ICU_ENTRY_POINT_RENAME(u_strToUpper) -#define u_strToWCS U_ICU_ENTRY_POINT_RENAME(u_strToWCS) -#define u_strcasecmp U_ICU_ENTRY_POINT_RENAME(u_strcasecmp) -#define u_strcat U_ICU_ENTRY_POINT_RENAME(u_strcat) -#define u_strchr U_ICU_ENTRY_POINT_RENAME(u_strchr) -#define u_strchr32 U_ICU_ENTRY_POINT_RENAME(u_strchr32) -#define u_strcmp U_ICU_ENTRY_POINT_RENAME(u_strcmp) -#define u_strcmpCodePointOrder U_ICU_ENTRY_POINT_RENAME(u_strcmpCodePointOrder) -#define u_strcmpFold U_ICU_ENTRY_POINT_RENAME(u_strcmpFold) -#define u_strcpy U_ICU_ENTRY_POINT_RENAME(u_strcpy) -#define u_strcspn U_ICU_ENTRY_POINT_RENAME(u_strcspn) -#define u_strlen U_ICU_ENTRY_POINT_RENAME(u_strlen) -#define u_strncasecmp U_ICU_ENTRY_POINT_RENAME(u_strncasecmp) -#define u_strncat U_ICU_ENTRY_POINT_RENAME(u_strncat) -#define u_strncmp U_ICU_ENTRY_POINT_RENAME(u_strncmp) -#define u_strncmpCodePointOrder U_ICU_ENTRY_POINT_RENAME(u_strncmpCodePointOrder) -#define u_strncpy U_ICU_ENTRY_POINT_RENAME(u_strncpy) -#define u_strpbrk U_ICU_ENTRY_POINT_RENAME(u_strpbrk) -#define u_strrchr U_ICU_ENTRY_POINT_RENAME(u_strrchr) -#define u_strrchr32 U_ICU_ENTRY_POINT_RENAME(u_strrchr32) -#define u_strrstr U_ICU_ENTRY_POINT_RENAME(u_strrstr) -#define u_strspn U_ICU_ENTRY_POINT_RENAME(u_strspn) -#define u_strstr U_ICU_ENTRY_POINT_RENAME(u_strstr) -#define u_strtok_r U_ICU_ENTRY_POINT_RENAME(u_strtok_r) -#define u_terminateChars U_ICU_ENTRY_POINT_RENAME(u_terminateChars) -#define u_terminateUChar32s U_ICU_ENTRY_POINT_RENAME(u_terminateUChar32s) -#define u_terminateUChars U_ICU_ENTRY_POINT_RENAME(u_terminateUChars) -#define u_terminateWChars U_ICU_ENTRY_POINT_RENAME(u_terminateWChars) -#define u_tolower U_ICU_ENTRY_POINT_RENAME(u_tolower) -#define u_totitle U_ICU_ENTRY_POINT_RENAME(u_totitle) -#define u_toupper U_ICU_ENTRY_POINT_RENAME(u_toupper) -#define u_uastrcpy U_ICU_ENTRY_POINT_RENAME(u_uastrcpy) -#define u_uastrncpy U_ICU_ENTRY_POINT_RENAME(u_uastrncpy) -#define u_unescape U_ICU_ENTRY_POINT_RENAME(u_unescape) -#define u_unescapeAt U_ICU_ENTRY_POINT_RENAME(u_unescapeAt) -#define u_versionFromString U_ICU_ENTRY_POINT_RENAME(u_versionFromString) -#define u_versionFromUString U_ICU_ENTRY_POINT_RENAME(u_versionFromUString) -#define u_versionToString U_ICU_ENTRY_POINT_RENAME(u_versionToString) -#define u_vformatMessage U_ICU_ENTRY_POINT_RENAME(u_vformatMessage) -#define u_vformatMessageWithError U_ICU_ENTRY_POINT_RENAME(u_vformatMessageWithError) -#define u_vfprintf U_ICU_ENTRY_POINT_RENAME(u_vfprintf) -#define u_vfprintf_u U_ICU_ENTRY_POINT_RENAME(u_vfprintf_u) -#define u_vfscanf U_ICU_ENTRY_POINT_RENAME(u_vfscanf) -#define u_vfscanf_u U_ICU_ENTRY_POINT_RENAME(u_vfscanf_u) -#define u_vparseMessage U_ICU_ENTRY_POINT_RENAME(u_vparseMessage) -#define u_vparseMessageWithError U_ICU_ENTRY_POINT_RENAME(u_vparseMessageWithError) -#define u_vsnprintf U_ICU_ENTRY_POINT_RENAME(u_vsnprintf) -#define u_vsnprintf_u U_ICU_ENTRY_POINT_RENAME(u_vsnprintf_u) -#define u_vsprintf U_ICU_ENTRY_POINT_RENAME(u_vsprintf) -#define u_vsprintf_u U_ICU_ENTRY_POINT_RENAME(u_vsprintf_u) -#define u_vsscanf U_ICU_ENTRY_POINT_RENAME(u_vsscanf) -#define u_vsscanf_u U_ICU_ENTRY_POINT_RENAME(u_vsscanf_u) -#define u_writeDiff U_ICU_ENTRY_POINT_RENAME(u_writeDiff) -#define u_writeIdenticalLevelRun U_ICU_ENTRY_POINT_RENAME(u_writeIdenticalLevelRun) -#define u_writeIdenticalLevelRunTwoChars U_ICU_ENTRY_POINT_RENAME(u_writeIdenticalLevelRunTwoChars) -#define ubidi_addPropertyStarts U_ICU_ENTRY_POINT_RENAME(ubidi_addPropertyStarts) -#define ubidi_close U_ICU_ENTRY_POINT_RENAME(ubidi_close) -#define ubidi_countParagraphs U_ICU_ENTRY_POINT_RENAME(ubidi_countParagraphs) -#define ubidi_countRuns U_ICU_ENTRY_POINT_RENAME(ubidi_countRuns) -#define ubidi_getBaseDirection U_ICU_ENTRY_POINT_RENAME(ubidi_getBaseDirection) -#define ubidi_getClass U_ICU_ENTRY_POINT_RENAME(ubidi_getClass) -#define ubidi_getClassCallback U_ICU_ENTRY_POINT_RENAME(ubidi_getClassCallback) -#define ubidi_getCustomizedClass U_ICU_ENTRY_POINT_RENAME(ubidi_getCustomizedClass) -#define ubidi_getDirection U_ICU_ENTRY_POINT_RENAME(ubidi_getDirection) -#define ubidi_getJoiningGroup U_ICU_ENTRY_POINT_RENAME(ubidi_getJoiningGroup) -#define ubidi_getJoiningType U_ICU_ENTRY_POINT_RENAME(ubidi_getJoiningType) -#define ubidi_getLength U_ICU_ENTRY_POINT_RENAME(ubidi_getLength) -#define ubidi_getLevelAt U_ICU_ENTRY_POINT_RENAME(ubidi_getLevelAt) -#define ubidi_getLevels U_ICU_ENTRY_POINT_RENAME(ubidi_getLevels) -#define ubidi_getLogicalIndex U_ICU_ENTRY_POINT_RENAME(ubidi_getLogicalIndex) -#define ubidi_getLogicalMap U_ICU_ENTRY_POINT_RENAME(ubidi_getLogicalMap) -#define ubidi_getLogicalRun U_ICU_ENTRY_POINT_RENAME(ubidi_getLogicalRun) -#define ubidi_getMaxValue U_ICU_ENTRY_POINT_RENAME(ubidi_getMaxValue) -#define ubidi_getMemory U_ICU_ENTRY_POINT_RENAME(ubidi_getMemory) -#define ubidi_getMirror U_ICU_ENTRY_POINT_RENAME(ubidi_getMirror) -#define ubidi_getPairedBracket U_ICU_ENTRY_POINT_RENAME(ubidi_getPairedBracket) -#define ubidi_getPairedBracketType U_ICU_ENTRY_POINT_RENAME(ubidi_getPairedBracketType) -#define ubidi_getParaLevel U_ICU_ENTRY_POINT_RENAME(ubidi_getParaLevel) -#define ubidi_getParaLevelAtIndex U_ICU_ENTRY_POINT_RENAME(ubidi_getParaLevelAtIndex) -#define ubidi_getParagraph U_ICU_ENTRY_POINT_RENAME(ubidi_getParagraph) -#define ubidi_getParagraphByIndex U_ICU_ENTRY_POINT_RENAME(ubidi_getParagraphByIndex) -#define ubidi_getProcessedLength U_ICU_ENTRY_POINT_RENAME(ubidi_getProcessedLength) -#define ubidi_getReorderingMode U_ICU_ENTRY_POINT_RENAME(ubidi_getReorderingMode) -#define ubidi_getReorderingOptions U_ICU_ENTRY_POINT_RENAME(ubidi_getReorderingOptions) -#define ubidi_getResultLength U_ICU_ENTRY_POINT_RENAME(ubidi_getResultLength) -#define ubidi_getRuns U_ICU_ENTRY_POINT_RENAME(ubidi_getRuns) -#define ubidi_getSingleton U_ICU_ENTRY_POINT_RENAME(ubidi_getSingleton) -#define ubidi_getText U_ICU_ENTRY_POINT_RENAME(ubidi_getText) -#define ubidi_getVisualIndex U_ICU_ENTRY_POINT_RENAME(ubidi_getVisualIndex) -#define ubidi_getVisualMap U_ICU_ENTRY_POINT_RENAME(ubidi_getVisualMap) -#define ubidi_getVisualRun U_ICU_ENTRY_POINT_RENAME(ubidi_getVisualRun) -#define ubidi_invertMap U_ICU_ENTRY_POINT_RENAME(ubidi_invertMap) -#define ubidi_isBidiControl U_ICU_ENTRY_POINT_RENAME(ubidi_isBidiControl) -#define ubidi_isInverse U_ICU_ENTRY_POINT_RENAME(ubidi_isInverse) -#define ubidi_isJoinControl U_ICU_ENTRY_POINT_RENAME(ubidi_isJoinControl) -#define ubidi_isMirrored U_ICU_ENTRY_POINT_RENAME(ubidi_isMirrored) -#define ubidi_isOrderParagraphsLTR U_ICU_ENTRY_POINT_RENAME(ubidi_isOrderParagraphsLTR) -#define ubidi_open U_ICU_ENTRY_POINT_RENAME(ubidi_open) -#define ubidi_openSized U_ICU_ENTRY_POINT_RENAME(ubidi_openSized) -#define ubidi_orderParagraphsLTR U_ICU_ENTRY_POINT_RENAME(ubidi_orderParagraphsLTR) -#define ubidi_reorderLogical U_ICU_ENTRY_POINT_RENAME(ubidi_reorderLogical) -#define ubidi_reorderVisual U_ICU_ENTRY_POINT_RENAME(ubidi_reorderVisual) -#define ubidi_setClassCallback U_ICU_ENTRY_POINT_RENAME(ubidi_setClassCallback) -#define ubidi_setContext U_ICU_ENTRY_POINT_RENAME(ubidi_setContext) -#define ubidi_setInverse U_ICU_ENTRY_POINT_RENAME(ubidi_setInverse) -#define ubidi_setLine U_ICU_ENTRY_POINT_RENAME(ubidi_setLine) -#define ubidi_setPara U_ICU_ENTRY_POINT_RENAME(ubidi_setPara) -#define ubidi_setReorderingMode U_ICU_ENTRY_POINT_RENAME(ubidi_setReorderingMode) -#define ubidi_setReorderingOptions U_ICU_ENTRY_POINT_RENAME(ubidi_setReorderingOptions) -#define ubidi_writeReordered U_ICU_ENTRY_POINT_RENAME(ubidi_writeReordered) -#define ubidi_writeReverse U_ICU_ENTRY_POINT_RENAME(ubidi_writeReverse) -#define ublock_getCode U_ICU_ENTRY_POINT_RENAME(ublock_getCode) -#define ubrk_close U_ICU_ENTRY_POINT_RENAME(ubrk_close) -#define ubrk_countAvailable U_ICU_ENTRY_POINT_RENAME(ubrk_countAvailable) -#define ubrk_current U_ICU_ENTRY_POINT_RENAME(ubrk_current) -#define ubrk_first U_ICU_ENTRY_POINT_RENAME(ubrk_first) -#define ubrk_following U_ICU_ENTRY_POINT_RENAME(ubrk_following) -#define ubrk_getAvailable U_ICU_ENTRY_POINT_RENAME(ubrk_getAvailable) -#define ubrk_getLocaleByType U_ICU_ENTRY_POINT_RENAME(ubrk_getLocaleByType) -#define ubrk_getRuleStatus U_ICU_ENTRY_POINT_RENAME(ubrk_getRuleStatus) -#define ubrk_getRuleStatusVec U_ICU_ENTRY_POINT_RENAME(ubrk_getRuleStatusVec) -#define ubrk_isBoundary U_ICU_ENTRY_POINT_RENAME(ubrk_isBoundary) -#define ubrk_last U_ICU_ENTRY_POINT_RENAME(ubrk_last) -#define ubrk_next U_ICU_ENTRY_POINT_RENAME(ubrk_next) -#define ubrk_open U_ICU_ENTRY_POINT_RENAME(ubrk_open) -#define ubrk_openRules U_ICU_ENTRY_POINT_RENAME(ubrk_openRules) -#define ubrk_preceding U_ICU_ENTRY_POINT_RENAME(ubrk_preceding) -#define ubrk_previous U_ICU_ENTRY_POINT_RENAME(ubrk_previous) -#define ubrk_refreshUText U_ICU_ENTRY_POINT_RENAME(ubrk_refreshUText) -#define ubrk_safeClone U_ICU_ENTRY_POINT_RENAME(ubrk_safeClone) -#define ubrk_setText U_ICU_ENTRY_POINT_RENAME(ubrk_setText) -#define ubrk_setUText U_ICU_ENTRY_POINT_RENAME(ubrk_setUText) -#define ubrk_swap U_ICU_ENTRY_POINT_RENAME(ubrk_swap) -#define ucal_add U_ICU_ENTRY_POINT_RENAME(ucal_add) -#define ucal_clear U_ICU_ENTRY_POINT_RENAME(ucal_clear) -#define ucal_clearField U_ICU_ENTRY_POINT_RENAME(ucal_clearField) -#define ucal_clone U_ICU_ENTRY_POINT_RENAME(ucal_clone) -#define ucal_close U_ICU_ENTRY_POINT_RENAME(ucal_close) -#define ucal_countAvailable U_ICU_ENTRY_POINT_RENAME(ucal_countAvailable) -#define ucal_equivalentTo U_ICU_ENTRY_POINT_RENAME(ucal_equivalentTo) -#define ucal_get U_ICU_ENTRY_POINT_RENAME(ucal_get) -#define ucal_getAttribute U_ICU_ENTRY_POINT_RENAME(ucal_getAttribute) -#define ucal_getAvailable U_ICU_ENTRY_POINT_RENAME(ucal_getAvailable) -#define ucal_getCanonicalTimeZoneID U_ICU_ENTRY_POINT_RENAME(ucal_getCanonicalTimeZoneID) -#define ucal_getDSTSavings U_ICU_ENTRY_POINT_RENAME(ucal_getDSTSavings) -#define ucal_getDayOfWeekType U_ICU_ENTRY_POINT_RENAME(ucal_getDayOfWeekType) -#define ucal_getDefaultTimeZone U_ICU_ENTRY_POINT_RENAME(ucal_getDefaultTimeZone) -#define ucal_getFieldDifference U_ICU_ENTRY_POINT_RENAME(ucal_getFieldDifference) -#define ucal_getGregorianChange U_ICU_ENTRY_POINT_RENAME(ucal_getGregorianChange) -#define ucal_getKeywordValuesForLocale U_ICU_ENTRY_POINT_RENAME(ucal_getKeywordValuesForLocale) -#define ucal_getLimit U_ICU_ENTRY_POINT_RENAME(ucal_getLimit) -#define ucal_getLocaleByType U_ICU_ENTRY_POINT_RENAME(ucal_getLocaleByType) -#define ucal_getMillis U_ICU_ENTRY_POINT_RENAME(ucal_getMillis) -#define ucal_getNow U_ICU_ENTRY_POINT_RENAME(ucal_getNow) -#define ucal_getTZDataVersion U_ICU_ENTRY_POINT_RENAME(ucal_getTZDataVersion) -#define ucal_getTimeZoneDisplayName U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneDisplayName) -#define ucal_getTimeZoneID U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneID) -#define ucal_getTimeZoneIDForWindowsID U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneIDForWindowsID) -#define ucal_getTimeZoneTransitionDate U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneTransitionDate) -#define ucal_getType U_ICU_ENTRY_POINT_RENAME(ucal_getType) -#define ucal_getWeekendTransition U_ICU_ENTRY_POINT_RENAME(ucal_getWeekendTransition) -#define ucal_getWindowsTimeZoneID U_ICU_ENTRY_POINT_RENAME(ucal_getWindowsTimeZoneID) -#define ucal_inDaylightTime U_ICU_ENTRY_POINT_RENAME(ucal_inDaylightTime) -#define ucal_isSet U_ICU_ENTRY_POINT_RENAME(ucal_isSet) -#define ucal_isWeekend U_ICU_ENTRY_POINT_RENAME(ucal_isWeekend) -#define ucal_open U_ICU_ENTRY_POINT_RENAME(ucal_open) -#define ucal_openCountryTimeZones U_ICU_ENTRY_POINT_RENAME(ucal_openCountryTimeZones) -#define ucal_openTimeZoneIDEnumeration U_ICU_ENTRY_POINT_RENAME(ucal_openTimeZoneIDEnumeration) -#define ucal_openTimeZones U_ICU_ENTRY_POINT_RENAME(ucal_openTimeZones) -#define ucal_roll U_ICU_ENTRY_POINT_RENAME(ucal_roll) -#define ucal_set U_ICU_ENTRY_POINT_RENAME(ucal_set) -#define ucal_setAttribute U_ICU_ENTRY_POINT_RENAME(ucal_setAttribute) -#define ucal_setDate U_ICU_ENTRY_POINT_RENAME(ucal_setDate) -#define ucal_setDateTime U_ICU_ENTRY_POINT_RENAME(ucal_setDateTime) -#define ucal_setDefaultTimeZone U_ICU_ENTRY_POINT_RENAME(ucal_setDefaultTimeZone) -#define ucal_setGregorianChange U_ICU_ENTRY_POINT_RENAME(ucal_setGregorianChange) -#define ucal_setMillis U_ICU_ENTRY_POINT_RENAME(ucal_setMillis) -#define ucal_setTimeZone U_ICU_ENTRY_POINT_RENAME(ucal_setTimeZone) -#define ucase_addCaseClosure U_ICU_ENTRY_POINT_RENAME(ucase_addCaseClosure) -#define ucase_addPropertyStarts U_ICU_ENTRY_POINT_RENAME(ucase_addPropertyStarts) -#define ucase_addStringCaseClosure U_ICU_ENTRY_POINT_RENAME(ucase_addStringCaseClosure) -#define ucase_fold U_ICU_ENTRY_POINT_RENAME(ucase_fold) -#define ucase_getCaseLocale U_ICU_ENTRY_POINT_RENAME(ucase_getCaseLocale) -#define ucase_getSingleton U_ICU_ENTRY_POINT_RENAME(ucase_getSingleton) -#define ucase_getType U_ICU_ENTRY_POINT_RENAME(ucase_getType) -#define ucase_getTypeOrIgnorable U_ICU_ENTRY_POINT_RENAME(ucase_getTypeOrIgnorable) -#define ucase_hasBinaryProperty U_ICU_ENTRY_POINT_RENAME(ucase_hasBinaryProperty) -#define ucase_isCaseSensitive U_ICU_ENTRY_POINT_RENAME(ucase_isCaseSensitive) -#define ucase_isSoftDotted U_ICU_ENTRY_POINT_RENAME(ucase_isSoftDotted) -#define ucase_toFullFolding U_ICU_ENTRY_POINT_RENAME(ucase_toFullFolding) -#define ucase_toFullLower U_ICU_ENTRY_POINT_RENAME(ucase_toFullLower) -#define ucase_toFullTitle U_ICU_ENTRY_POINT_RENAME(ucase_toFullTitle) -#define ucase_toFullUpper U_ICU_ENTRY_POINT_RENAME(ucase_toFullUpper) -#define ucase_tolower U_ICU_ENTRY_POINT_RENAME(ucase_tolower) -#define ucase_totitle U_ICU_ENTRY_POINT_RENAME(ucase_totitle) -#define ucase_toupper U_ICU_ENTRY_POINT_RENAME(ucase_toupper) -#define ucasemap_close U_ICU_ENTRY_POINT_RENAME(ucasemap_close) -#define ucasemap_getBreakIterator U_ICU_ENTRY_POINT_RENAME(ucasemap_getBreakIterator) -#define ucasemap_getLocale U_ICU_ENTRY_POINT_RENAME(ucasemap_getLocale) -#define ucasemap_getOptions U_ICU_ENTRY_POINT_RENAME(ucasemap_getOptions) -#define ucasemap_internalUTF8ToTitle U_ICU_ENTRY_POINT_RENAME(ucasemap_internalUTF8ToTitle) -#define ucasemap_mapUTF8 U_ICU_ENTRY_POINT_RENAME(ucasemap_mapUTF8) -#define ucasemap_open U_ICU_ENTRY_POINT_RENAME(ucasemap_open) -#define ucasemap_setBreakIterator U_ICU_ENTRY_POINT_RENAME(ucasemap_setBreakIterator) -#define ucasemap_setLocale U_ICU_ENTRY_POINT_RENAME(ucasemap_setLocale) -#define ucasemap_setOptions U_ICU_ENTRY_POINT_RENAME(ucasemap_setOptions) -#define ucasemap_toTitle U_ICU_ENTRY_POINT_RENAME(ucasemap_toTitle) -#define ucasemap_utf8FoldCase U_ICU_ENTRY_POINT_RENAME(ucasemap_utf8FoldCase) -#define ucasemap_utf8ToLower U_ICU_ENTRY_POINT_RENAME(ucasemap_utf8ToLower) -#define ucasemap_utf8ToTitle U_ICU_ENTRY_POINT_RENAME(ucasemap_utf8ToTitle) -#define ucasemap_utf8ToUpper U_ICU_ENTRY_POINT_RENAME(ucasemap_utf8ToUpper) -#define uchar_addPropertyStarts U_ICU_ENTRY_POINT_RENAME(uchar_addPropertyStarts) -#define uchar_swapNames U_ICU_ENTRY_POINT_RENAME(uchar_swapNames) -#define ucln_cleanupOne U_ICU_ENTRY_POINT_RENAME(ucln_cleanupOne) -#define ucln_common_registerCleanup U_ICU_ENTRY_POINT_RENAME(ucln_common_registerCleanup) -#define ucln_i18n_registerCleanup U_ICU_ENTRY_POINT_RENAME(ucln_i18n_registerCleanup) -#define ucln_io_registerCleanup U_ICU_ENTRY_POINT_RENAME(ucln_io_registerCleanup) -#define ucln_lib_cleanup U_ICU_ENTRY_POINT_RENAME(ucln_lib_cleanup) -#define ucln_registerCleanup U_ICU_ENTRY_POINT_RENAME(ucln_registerCleanup) -#define ucnv_MBCSFromUChar32 U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSFromUChar32) -#define ucnv_MBCSFromUnicodeWithOffsets U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSFromUnicodeWithOffsets) -#define ucnv_MBCSGetFilteredUnicodeSetForUnicode U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSGetFilteredUnicodeSetForUnicode) -#define ucnv_MBCSGetType U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSGetType) -#define ucnv_MBCSGetUnicodeSetForUnicode U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSGetUnicodeSetForUnicode) -#define ucnv_MBCSIsLeadByte U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSIsLeadByte) -#define ucnv_MBCSSimpleGetNextUChar U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSSimpleGetNextUChar) -#define ucnv_MBCSToUnicodeWithOffsets U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSToUnicodeWithOffsets) -#define ucnv_bld_countAvailableConverters U_ICU_ENTRY_POINT_RENAME(ucnv_bld_countAvailableConverters) -#define ucnv_bld_getAvailableConverter U_ICU_ENTRY_POINT_RENAME(ucnv_bld_getAvailableConverter) -#define ucnv_canCreateConverter U_ICU_ENTRY_POINT_RENAME(ucnv_canCreateConverter) -#define ucnv_cbFromUWriteBytes U_ICU_ENTRY_POINT_RENAME(ucnv_cbFromUWriteBytes) -#define ucnv_cbFromUWriteSub U_ICU_ENTRY_POINT_RENAME(ucnv_cbFromUWriteSub) -#define ucnv_cbFromUWriteUChars U_ICU_ENTRY_POINT_RENAME(ucnv_cbFromUWriteUChars) -#define ucnv_cbToUWriteSub U_ICU_ENTRY_POINT_RENAME(ucnv_cbToUWriteSub) -#define ucnv_cbToUWriteUChars U_ICU_ENTRY_POINT_RENAME(ucnv_cbToUWriteUChars) -#define ucnv_close U_ICU_ENTRY_POINT_RENAME(ucnv_close) -#define ucnv_compareNames U_ICU_ENTRY_POINT_RENAME(ucnv_compareNames) -#define ucnv_convert U_ICU_ENTRY_POINT_RENAME(ucnv_convert) -#define ucnv_convertEx U_ICU_ENTRY_POINT_RENAME(ucnv_convertEx) -#define ucnv_countAliases U_ICU_ENTRY_POINT_RENAME(ucnv_countAliases) -#define ucnv_countAvailable U_ICU_ENTRY_POINT_RENAME(ucnv_countAvailable) -#define ucnv_countStandards U_ICU_ENTRY_POINT_RENAME(ucnv_countStandards) -#define ucnv_createAlgorithmicConverter U_ICU_ENTRY_POINT_RENAME(ucnv_createAlgorithmicConverter) -#define ucnv_createConverter U_ICU_ENTRY_POINT_RENAME(ucnv_createConverter) -#define ucnv_createConverterFromPackage U_ICU_ENTRY_POINT_RENAME(ucnv_createConverterFromPackage) -#define ucnv_createConverterFromSharedData U_ICU_ENTRY_POINT_RENAME(ucnv_createConverterFromSharedData) -#define ucnv_detectUnicodeSignature U_ICU_ENTRY_POINT_RENAME(ucnv_detectUnicodeSignature) -#define ucnv_extContinueMatchFromU U_ICU_ENTRY_POINT_RENAME(ucnv_extContinueMatchFromU) -#define ucnv_extContinueMatchToU U_ICU_ENTRY_POINT_RENAME(ucnv_extContinueMatchToU) -#define ucnv_extGetUnicodeSet U_ICU_ENTRY_POINT_RENAME(ucnv_extGetUnicodeSet) -#define ucnv_extInitialMatchFromU U_ICU_ENTRY_POINT_RENAME(ucnv_extInitialMatchFromU) -#define ucnv_extInitialMatchToU U_ICU_ENTRY_POINT_RENAME(ucnv_extInitialMatchToU) -#define ucnv_extSimpleMatchFromU U_ICU_ENTRY_POINT_RENAME(ucnv_extSimpleMatchFromU) -#define ucnv_extSimpleMatchToU U_ICU_ENTRY_POINT_RENAME(ucnv_extSimpleMatchToU) -#define ucnv_fixFileSeparator U_ICU_ENTRY_POINT_RENAME(ucnv_fixFileSeparator) -#define ucnv_flushCache U_ICU_ENTRY_POINT_RENAME(ucnv_flushCache) -#define ucnv_fromAlgorithmic U_ICU_ENTRY_POINT_RENAME(ucnv_fromAlgorithmic) -#define ucnv_fromUChars U_ICU_ENTRY_POINT_RENAME(ucnv_fromUChars) -#define ucnv_fromUCountPending U_ICU_ENTRY_POINT_RENAME(ucnv_fromUCountPending) -#define ucnv_fromUWriteBytes U_ICU_ENTRY_POINT_RENAME(ucnv_fromUWriteBytes) -#define ucnv_fromUnicode U_ICU_ENTRY_POINT_RENAME(ucnv_fromUnicode) -#define ucnv_fromUnicode_UTF8 U_ICU_ENTRY_POINT_RENAME(ucnv_fromUnicode_UTF8) -#define ucnv_fromUnicode_UTF8_OFFSETS_LOGIC U_ICU_ENTRY_POINT_RENAME(ucnv_fromUnicode_UTF8_OFFSETS_LOGIC) -#define ucnv_getAlias U_ICU_ENTRY_POINT_RENAME(ucnv_getAlias) -#define ucnv_getAliases U_ICU_ENTRY_POINT_RENAME(ucnv_getAliases) -#define ucnv_getAvailableName U_ICU_ENTRY_POINT_RENAME(ucnv_getAvailableName) -#define ucnv_getCCSID U_ICU_ENTRY_POINT_RENAME(ucnv_getCCSID) -#define ucnv_getCanonicalName U_ICU_ENTRY_POINT_RENAME(ucnv_getCanonicalName) -#define ucnv_getCompleteUnicodeSet U_ICU_ENTRY_POINT_RENAME(ucnv_getCompleteUnicodeSet) -#define ucnv_getDefaultName U_ICU_ENTRY_POINT_RENAME(ucnv_getDefaultName) -#define ucnv_getDisplayName U_ICU_ENTRY_POINT_RENAME(ucnv_getDisplayName) -#define ucnv_getFromUCallBack U_ICU_ENTRY_POINT_RENAME(ucnv_getFromUCallBack) -#define ucnv_getInvalidChars U_ICU_ENTRY_POINT_RENAME(ucnv_getInvalidChars) -#define ucnv_getInvalidUChars U_ICU_ENTRY_POINT_RENAME(ucnv_getInvalidUChars) -#define ucnv_getMaxCharSize U_ICU_ENTRY_POINT_RENAME(ucnv_getMaxCharSize) -#define ucnv_getMinCharSize U_ICU_ENTRY_POINT_RENAME(ucnv_getMinCharSize) -#define ucnv_getName U_ICU_ENTRY_POINT_RENAME(ucnv_getName) -#define ucnv_getNextUChar U_ICU_ENTRY_POINT_RENAME(ucnv_getNextUChar) -#define ucnv_getNonSurrogateUnicodeSet U_ICU_ENTRY_POINT_RENAME(ucnv_getNonSurrogateUnicodeSet) -#define ucnv_getPlatform U_ICU_ENTRY_POINT_RENAME(ucnv_getPlatform) -#define ucnv_getStandard U_ICU_ENTRY_POINT_RENAME(ucnv_getStandard) -#define ucnv_getStandardName U_ICU_ENTRY_POINT_RENAME(ucnv_getStandardName) -#define ucnv_getStarters U_ICU_ENTRY_POINT_RENAME(ucnv_getStarters) -#define ucnv_getSubstChars U_ICU_ENTRY_POINT_RENAME(ucnv_getSubstChars) -#define ucnv_getToUCallBack U_ICU_ENTRY_POINT_RENAME(ucnv_getToUCallBack) -#define ucnv_getType U_ICU_ENTRY_POINT_RENAME(ucnv_getType) -#define ucnv_getUnicodeSet U_ICU_ENTRY_POINT_RENAME(ucnv_getUnicodeSet) -#define ucnv_incrementRefCount U_ICU_ENTRY_POINT_RENAME(ucnv_incrementRefCount) -#define ucnv_io_countKnownConverters U_ICU_ENTRY_POINT_RENAME(ucnv_io_countKnownConverters) -#define ucnv_io_getConverterName U_ICU_ENTRY_POINT_RENAME(ucnv_io_getConverterName) -#define ucnv_io_stripASCIIForCompare U_ICU_ENTRY_POINT_RENAME(ucnv_io_stripASCIIForCompare) -#define ucnv_io_stripEBCDICForCompare U_ICU_ENTRY_POINT_RENAME(ucnv_io_stripEBCDICForCompare) -#define ucnv_isAmbiguous U_ICU_ENTRY_POINT_RENAME(ucnv_isAmbiguous) -#define ucnv_isFixedWidth U_ICU_ENTRY_POINT_RENAME(ucnv_isFixedWidth) -#define ucnv_load U_ICU_ENTRY_POINT_RENAME(ucnv_load) -#define ucnv_loadSharedData U_ICU_ENTRY_POINT_RENAME(ucnv_loadSharedData) -#define ucnv_open U_ICU_ENTRY_POINT_RENAME(ucnv_open) -#define ucnv_openAllNames U_ICU_ENTRY_POINT_RENAME(ucnv_openAllNames) -#define ucnv_openCCSID U_ICU_ENTRY_POINT_RENAME(ucnv_openCCSID) -#define ucnv_openPackage U_ICU_ENTRY_POINT_RENAME(ucnv_openPackage) -#define ucnv_openStandardNames U_ICU_ENTRY_POINT_RENAME(ucnv_openStandardNames) -#define ucnv_openU U_ICU_ENTRY_POINT_RENAME(ucnv_openU) -#define ucnv_reset U_ICU_ENTRY_POINT_RENAME(ucnv_reset) -#define ucnv_resetFromUnicode U_ICU_ENTRY_POINT_RENAME(ucnv_resetFromUnicode) -#define ucnv_resetToUnicode U_ICU_ENTRY_POINT_RENAME(ucnv_resetToUnicode) -#define ucnv_safeClone U_ICU_ENTRY_POINT_RENAME(ucnv_safeClone) -#define ucnv_setDefaultName U_ICU_ENTRY_POINT_RENAME(ucnv_setDefaultName) -#define ucnv_setFallback U_ICU_ENTRY_POINT_RENAME(ucnv_setFallback) -#define ucnv_setFromUCallBack U_ICU_ENTRY_POINT_RENAME(ucnv_setFromUCallBack) -#define ucnv_setSubstChars U_ICU_ENTRY_POINT_RENAME(ucnv_setSubstChars) -#define ucnv_setSubstString U_ICU_ENTRY_POINT_RENAME(ucnv_setSubstString) -#define ucnv_setToUCallBack U_ICU_ENTRY_POINT_RENAME(ucnv_setToUCallBack) -#define ucnv_swap U_ICU_ENTRY_POINT_RENAME(ucnv_swap) -#define ucnv_swapAliases U_ICU_ENTRY_POINT_RENAME(ucnv_swapAliases) -#define ucnv_toAlgorithmic U_ICU_ENTRY_POINT_RENAME(ucnv_toAlgorithmic) -#define ucnv_toUChars U_ICU_ENTRY_POINT_RENAME(ucnv_toUChars) -#define ucnv_toUCountPending U_ICU_ENTRY_POINT_RENAME(ucnv_toUCountPending) -#define ucnv_toUWriteCodePoint U_ICU_ENTRY_POINT_RENAME(ucnv_toUWriteCodePoint) -#define ucnv_toUWriteUChars U_ICU_ENTRY_POINT_RENAME(ucnv_toUWriteUChars) -#define ucnv_toUnicode U_ICU_ENTRY_POINT_RENAME(ucnv_toUnicode) -#define ucnv_unload U_ICU_ENTRY_POINT_RENAME(ucnv_unload) -#define ucnv_unloadSharedDataIfReady U_ICU_ENTRY_POINT_RENAME(ucnv_unloadSharedDataIfReady) -#define ucnv_usesFallback U_ICU_ENTRY_POINT_RENAME(ucnv_usesFallback) -#define ucnvsel_close U_ICU_ENTRY_POINT_RENAME(ucnvsel_close) -#define ucnvsel_open U_ICU_ENTRY_POINT_RENAME(ucnvsel_open) -#define ucnvsel_openFromSerialized U_ICU_ENTRY_POINT_RENAME(ucnvsel_openFromSerialized) -#define ucnvsel_selectForString U_ICU_ENTRY_POINT_RENAME(ucnvsel_selectForString) -#define ucnvsel_selectForUTF8 U_ICU_ENTRY_POINT_RENAME(ucnvsel_selectForUTF8) -#define ucnvsel_serialize U_ICU_ENTRY_POINT_RENAME(ucnvsel_serialize) -#define ucol_allocWeights U_ICU_ENTRY_POINT_RENAME(ucol_allocWeights) -#define ucol_assembleTailoringTable U_ICU_ENTRY_POINT_RENAME(ucol_assembleTailoringTable) -#define ucol_buildPermutationTable U_ICU_ENTRY_POINT_RENAME(ucol_buildPermutationTable) -#define ucol_calcSortKey U_ICU_ENTRY_POINT_RENAME(ucol_calcSortKey) -#define ucol_calcSortKeySimpleTertiary U_ICU_ENTRY_POINT_RENAME(ucol_calcSortKeySimpleTertiary) -#define ucol_cloneBinary U_ICU_ENTRY_POINT_RENAME(ucol_cloneBinary) -#define ucol_close U_ICU_ENTRY_POINT_RENAME(ucol_close) -#define ucol_closeElements U_ICU_ENTRY_POINT_RENAME(ucol_closeElements) -#define ucol_countAvailable U_ICU_ENTRY_POINT_RENAME(ucol_countAvailable) -#define ucol_createElements U_ICU_ENTRY_POINT_RENAME(ucol_createElements) -#define ucol_doCE U_ICU_ENTRY_POINT_RENAME(ucol_doCE) -#define ucol_equal U_ICU_ENTRY_POINT_RENAME(ucol_equal) -#define ucol_equals U_ICU_ENTRY_POINT_RENAME(ucol_equals) -#define ucol_findReorderingEntry U_ICU_ENTRY_POINT_RENAME(ucol_findReorderingEntry) -#define ucol_forceHanImplicit U_ICU_ENTRY_POINT_RENAME(ucol_forceHanImplicit) -#define ucol_forgetUCA U_ICU_ENTRY_POINT_RENAME(ucol_forgetUCA) -#define ucol_freeOffsetBuffer U_ICU_ENTRY_POINT_RENAME(ucol_freeOffsetBuffer) -#define ucol_getAttribute U_ICU_ENTRY_POINT_RENAME(ucol_getAttribute) -#define ucol_getAttributeOrDefault U_ICU_ENTRY_POINT_RENAME(ucol_getAttributeOrDefault) -#define ucol_getAvailable U_ICU_ENTRY_POINT_RENAME(ucol_getAvailable) -#define ucol_getBound U_ICU_ENTRY_POINT_RENAME(ucol_getBound) -#define ucol_getCEStrengthDifference U_ICU_ENTRY_POINT_RENAME(ucol_getCEStrengthDifference) -#define ucol_getCollationKey U_ICU_ENTRY_POINT_RENAME(ucol_getCollationKey) -#define ucol_getContractions U_ICU_ENTRY_POINT_RENAME(ucol_getContractions) -#define ucol_getContractionsAndExpansions U_ICU_ENTRY_POINT_RENAME(ucol_getContractionsAndExpansions) -#define ucol_getDisplayName U_ICU_ENTRY_POINT_RENAME(ucol_getDisplayName) -#define ucol_getEquivalentReorderCodes U_ICU_ENTRY_POINT_RENAME(ucol_getEquivalentReorderCodes) -#define ucol_getFirstCE U_ICU_ENTRY_POINT_RENAME(ucol_getFirstCE) -#define ucol_getFunctionalEquivalent U_ICU_ENTRY_POINT_RENAME(ucol_getFunctionalEquivalent) -#define ucol_getKeywordValues U_ICU_ENTRY_POINT_RENAME(ucol_getKeywordValues) -#define ucol_getKeywordValuesForLocale U_ICU_ENTRY_POINT_RENAME(ucol_getKeywordValuesForLocale) -#define ucol_getKeywords U_ICU_ENTRY_POINT_RENAME(ucol_getKeywords) -#define ucol_getLeadBytesForReorderCode U_ICU_ENTRY_POINT_RENAME(ucol_getLeadBytesForReorderCode) -#define ucol_getLocale U_ICU_ENTRY_POINT_RENAME(ucol_getLocale) -#define ucol_getLocaleByType U_ICU_ENTRY_POINT_RENAME(ucol_getLocaleByType) -#define ucol_getMaxExpansion U_ICU_ENTRY_POINT_RENAME(ucol_getMaxExpansion) -#define ucol_getNextCE U_ICU_ENTRY_POINT_RENAME(ucol_getNextCE) -#define ucol_getOffset U_ICU_ENTRY_POINT_RENAME(ucol_getOffset) -#define ucol_getPrevCE U_ICU_ENTRY_POINT_RENAME(ucol_getPrevCE) -#define ucol_getReorderCodes U_ICU_ENTRY_POINT_RENAME(ucol_getReorderCodes) -#define ucol_getReorderCodesForLeadByte U_ICU_ENTRY_POINT_RENAME(ucol_getReorderCodesForLeadByte) -#define ucol_getRules U_ICU_ENTRY_POINT_RENAME(ucol_getRules) -#define ucol_getRulesEx U_ICU_ENTRY_POINT_RENAME(ucol_getRulesEx) -#define ucol_getShortDefinitionString U_ICU_ENTRY_POINT_RENAME(ucol_getShortDefinitionString) -#define ucol_getSortKey U_ICU_ENTRY_POINT_RENAME(ucol_getSortKey) -#define ucol_getStrength U_ICU_ENTRY_POINT_RENAME(ucol_getStrength) -#define ucol_getTailoredSet U_ICU_ENTRY_POINT_RENAME(ucol_getTailoredSet) -#define ucol_getUCAVersion U_ICU_ENTRY_POINT_RENAME(ucol_getUCAVersion) -#define ucol_getUnsafeSet U_ICU_ENTRY_POINT_RENAME(ucol_getUnsafeSet) -#define ucol_getVariableTop U_ICU_ENTRY_POINT_RENAME(ucol_getVariableTop) -#define ucol_getVersion U_ICU_ENTRY_POINT_RENAME(ucol_getVersion) -#define ucol_greater U_ICU_ENTRY_POINT_RENAME(ucol_greater) -#define ucol_greaterOrEqual U_ICU_ENTRY_POINT_RENAME(ucol_greaterOrEqual) -#define ucol_initBuffers U_ICU_ENTRY_POINT_RENAME(ucol_initBuffers) -#define ucol_initCollator U_ICU_ENTRY_POINT_RENAME(ucol_initCollator) -#define ucol_initInverseUCA U_ICU_ENTRY_POINT_RENAME(ucol_initInverseUCA) -#define ucol_initUCA U_ICU_ENTRY_POINT_RENAME(ucol_initUCA) -#define ucol_inv_getNextCE U_ICU_ENTRY_POINT_RENAME(ucol_inv_getNextCE) -#define ucol_inv_getPrevCE U_ICU_ENTRY_POINT_RENAME(ucol_inv_getPrevCE) -#define ucol_isTailored U_ICU_ENTRY_POINT_RENAME(ucol_isTailored) -#define ucol_keyHashCode U_ICU_ENTRY_POINT_RENAME(ucol_keyHashCode) -#define ucol_looksLikeCollationBinary U_ICU_ENTRY_POINT_RENAME(ucol_looksLikeCollationBinary) -#define ucol_mergeSortkeys U_ICU_ENTRY_POINT_RENAME(ucol_mergeSortkeys) -#define ucol_next U_ICU_ENTRY_POINT_RENAME(ucol_next) -#define ucol_nextProcessed U_ICU_ENTRY_POINT_RENAME(ucol_nextProcessed) -#define ucol_nextSortKeyPart U_ICU_ENTRY_POINT_RENAME(ucol_nextSortKeyPart) -#define ucol_nextWeight U_ICU_ENTRY_POINT_RENAME(ucol_nextWeight) -#define ucol_normalizeShortDefinitionString U_ICU_ENTRY_POINT_RENAME(ucol_normalizeShortDefinitionString) -#define ucol_open U_ICU_ENTRY_POINT_RENAME(ucol_open) -#define ucol_openAvailableLocales U_ICU_ENTRY_POINT_RENAME(ucol_openAvailableLocales) -#define ucol_openBinary U_ICU_ENTRY_POINT_RENAME(ucol_openBinary) -#define ucol_openElements U_ICU_ENTRY_POINT_RENAME(ucol_openElements) -#define ucol_openFromShortString U_ICU_ENTRY_POINT_RENAME(ucol_openFromShortString) -#define ucol_openRules U_ICU_ENTRY_POINT_RENAME(ucol_openRules) -#define ucol_openRulesForImport U_ICU_ENTRY_POINT_RENAME(ucol_openRulesForImport) -#define ucol_open_internal U_ICU_ENTRY_POINT_RENAME(ucol_open_internal) -#define ucol_prepareShortStringOpen U_ICU_ENTRY_POINT_RENAME(ucol_prepareShortStringOpen) -#define ucol_previous U_ICU_ENTRY_POINT_RENAME(ucol_previous) -#define ucol_previousProcessed U_ICU_ENTRY_POINT_RENAME(ucol_previousProcessed) -#define ucol_primaryOrder U_ICU_ENTRY_POINT_RENAME(ucol_primaryOrder) -#define ucol_prv_getSpecialCE U_ICU_ENTRY_POINT_RENAME(ucol_prv_getSpecialCE) -#define ucol_prv_getSpecialPrevCE U_ICU_ENTRY_POINT_RENAME(ucol_prv_getSpecialPrevCE) -#define ucol_reset U_ICU_ENTRY_POINT_RENAME(ucol_reset) -#define ucol_restoreVariableTop U_ICU_ENTRY_POINT_RENAME(ucol_restoreVariableTop) -#define ucol_safeClone U_ICU_ENTRY_POINT_RENAME(ucol_safeClone) -#define ucol_secondaryOrder U_ICU_ENTRY_POINT_RENAME(ucol_secondaryOrder) -#define ucol_setAttribute U_ICU_ENTRY_POINT_RENAME(ucol_setAttribute) -#define ucol_setOffset U_ICU_ENTRY_POINT_RENAME(ucol_setOffset) -#define ucol_setOptionsFromHeader U_ICU_ENTRY_POINT_RENAME(ucol_setOptionsFromHeader) -#define ucol_setReorderCodes U_ICU_ENTRY_POINT_RENAME(ucol_setReorderCodes) -#define ucol_setReqValidLocales U_ICU_ENTRY_POINT_RENAME(ucol_setReqValidLocales) -#define ucol_setStrength U_ICU_ENTRY_POINT_RENAME(ucol_setStrength) -#define ucol_setText U_ICU_ENTRY_POINT_RENAME(ucol_setText) -#define ucol_setVariableTop U_ICU_ENTRY_POINT_RENAME(ucol_setVariableTop) -#define ucol_strcoll U_ICU_ENTRY_POINT_RENAME(ucol_strcoll) -#define ucol_strcollIter U_ICU_ENTRY_POINT_RENAME(ucol_strcollIter) -#define ucol_strcollUTF8 U_ICU_ENTRY_POINT_RENAME(ucol_strcollUTF8) -#define ucol_swap U_ICU_ENTRY_POINT_RENAME(ucol_swap) -#define ucol_swapBinary U_ICU_ENTRY_POINT_RENAME(ucol_swapBinary) -#define ucol_swapInverseUCA U_ICU_ENTRY_POINT_RENAME(ucol_swapInverseUCA) -#define ucol_tertiaryOrder U_ICU_ENTRY_POINT_RENAME(ucol_tertiaryOrder) -#define ucol_tok_assembleTokenList U_ICU_ENTRY_POINT_RENAME(ucol_tok_assembleTokenList) -#define ucol_tok_closeTokenList U_ICU_ENTRY_POINT_RENAME(ucol_tok_closeTokenList) -#define ucol_tok_getNextArgument U_ICU_ENTRY_POINT_RENAME(ucol_tok_getNextArgument) -#define ucol_tok_getRulesFromBundle U_ICU_ENTRY_POINT_RENAME(ucol_tok_getRulesFromBundle) -#define ucol_tok_initTokenList U_ICU_ENTRY_POINT_RENAME(ucol_tok_initTokenList) -#define ucol_tok_parseNextToken U_ICU_ENTRY_POINT_RENAME(ucol_tok_parseNextToken) -#define ucol_updateInternalState U_ICU_ENTRY_POINT_RENAME(ucol_updateInternalState) -#define ucsdet_close U_ICU_ENTRY_POINT_RENAME(ucsdet_close) -#define ucsdet_detect U_ICU_ENTRY_POINT_RENAME(ucsdet_detect) -#define ucsdet_detectAll U_ICU_ENTRY_POINT_RENAME(ucsdet_detectAll) -#define ucsdet_enableInputFilter U_ICU_ENTRY_POINT_RENAME(ucsdet_enableInputFilter) -#define ucsdet_getAllDetectableCharsets U_ICU_ENTRY_POINT_RENAME(ucsdet_getAllDetectableCharsets) -#define ucsdet_getConfidence U_ICU_ENTRY_POINT_RENAME(ucsdet_getConfidence) -#define ucsdet_getDetectableCharsets U_ICU_ENTRY_POINT_RENAME(ucsdet_getDetectableCharsets) -#define ucsdet_getLanguage U_ICU_ENTRY_POINT_RENAME(ucsdet_getLanguage) -#define ucsdet_getName U_ICU_ENTRY_POINT_RENAME(ucsdet_getName) -#define ucsdet_getUChars U_ICU_ENTRY_POINT_RENAME(ucsdet_getUChars) -#define ucsdet_isInputFilterEnabled U_ICU_ENTRY_POINT_RENAME(ucsdet_isInputFilterEnabled) -#define ucsdet_open U_ICU_ENTRY_POINT_RENAME(ucsdet_open) -#define ucsdet_setDeclaredEncoding U_ICU_ENTRY_POINT_RENAME(ucsdet_setDeclaredEncoding) -#define ucsdet_setDetectableCharset U_ICU_ENTRY_POINT_RENAME(ucsdet_setDetectableCharset) -#define ucsdet_setText U_ICU_ENTRY_POINT_RENAME(ucsdet_setText) -#define ucurr_countCurrencies U_ICU_ENTRY_POINT_RENAME(ucurr_countCurrencies) -#define ucurr_forLocale U_ICU_ENTRY_POINT_RENAME(ucurr_forLocale) -#define ucurr_forLocaleAndDate U_ICU_ENTRY_POINT_RENAME(ucurr_forLocaleAndDate) -#define ucurr_getDefaultFractionDigits U_ICU_ENTRY_POINT_RENAME(ucurr_getDefaultFractionDigits) -#define ucurr_getKeywordValuesForLocale U_ICU_ENTRY_POINT_RENAME(ucurr_getKeywordValuesForLocale) -#define ucurr_getName U_ICU_ENTRY_POINT_RENAME(ucurr_getName) -#define ucurr_getNumericCode U_ICU_ENTRY_POINT_RENAME(ucurr_getNumericCode) -#define ucurr_getPluralName U_ICU_ENTRY_POINT_RENAME(ucurr_getPluralName) -#define ucurr_getRoundingIncrement U_ICU_ENTRY_POINT_RENAME(ucurr_getRoundingIncrement) -#define ucurr_isAvailable U_ICU_ENTRY_POINT_RENAME(ucurr_isAvailable) -#define ucurr_openISOCurrencies U_ICU_ENTRY_POINT_RENAME(ucurr_openISOCurrencies) -#define ucurr_register U_ICU_ENTRY_POINT_RENAME(ucurr_register) -#define ucurr_unregister U_ICU_ENTRY_POINT_RENAME(ucurr_unregister) -#define udat_applyPattern U_ICU_ENTRY_POINT_RENAME(udat_applyPattern) -#define udat_applyPatternRelative U_ICU_ENTRY_POINT_RENAME(udat_applyPatternRelative) -#define udat_clone U_ICU_ENTRY_POINT_RENAME(udat_clone) -#define udat_close U_ICU_ENTRY_POINT_RENAME(udat_close) -#define udat_countAvailable U_ICU_ENTRY_POINT_RENAME(udat_countAvailable) -#define udat_countSymbols U_ICU_ENTRY_POINT_RENAME(udat_countSymbols) -#define udat_format U_ICU_ENTRY_POINT_RENAME(udat_format) -#define udat_get2DigitYearStart U_ICU_ENTRY_POINT_RENAME(udat_get2DigitYearStart) -#define udat_getAvailable U_ICU_ENTRY_POINT_RENAME(udat_getAvailable) -#define udat_getCalendar U_ICU_ENTRY_POINT_RENAME(udat_getCalendar) -#define udat_getContext U_ICU_ENTRY_POINT_RENAME(udat_getContext) -#define udat_getLocaleByType U_ICU_ENTRY_POINT_RENAME(udat_getLocaleByType) -#define udat_getNumberFormat U_ICU_ENTRY_POINT_RENAME(udat_getNumberFormat) -#define udat_getSymbols U_ICU_ENTRY_POINT_RENAME(udat_getSymbols) -#define udat_isLenient U_ICU_ENTRY_POINT_RENAME(udat_isLenient) -#define udat_open U_ICU_ENTRY_POINT_RENAME(udat_open) -#define udat_parse U_ICU_ENTRY_POINT_RENAME(udat_parse) -#define udat_parseCalendar U_ICU_ENTRY_POINT_RENAME(udat_parseCalendar) -#define udat_registerOpener U_ICU_ENTRY_POINT_RENAME(udat_registerOpener) -#define udat_set2DigitYearStart U_ICU_ENTRY_POINT_RENAME(udat_set2DigitYearStart) -#define udat_setCalendar U_ICU_ENTRY_POINT_RENAME(udat_setCalendar) -#define udat_setContext U_ICU_ENTRY_POINT_RENAME(udat_setContext) -#define udat_setLenient U_ICU_ENTRY_POINT_RENAME(udat_setLenient) -#define udat_setNumberFormat U_ICU_ENTRY_POINT_RENAME(udat_setNumberFormat) -#define udat_setSymbols U_ICU_ENTRY_POINT_RENAME(udat_setSymbols) -#define udat_toCalendarDateField U_ICU_ENTRY_POINT_RENAME(udat_toCalendarDateField) -#define udat_toPattern U_ICU_ENTRY_POINT_RENAME(udat_toPattern) -#define udat_toPatternRelativeDate U_ICU_ENTRY_POINT_RENAME(udat_toPatternRelativeDate) -#define udat_toPatternRelativeTime U_ICU_ENTRY_POINT_RENAME(udat_toPatternRelativeTime) -#define udat_unregisterOpener U_ICU_ENTRY_POINT_RENAME(udat_unregisterOpener) -#define udata_checkCommonData U_ICU_ENTRY_POINT_RENAME(udata_checkCommonData) -#define udata_close U_ICU_ENTRY_POINT_RENAME(udata_close) -#define udata_closeSwapper U_ICU_ENTRY_POINT_RENAME(udata_closeSwapper) -#define udata_getHeaderSize U_ICU_ENTRY_POINT_RENAME(udata_getHeaderSize) -#define udata_getInfo U_ICU_ENTRY_POINT_RENAME(udata_getInfo) -#define udata_getInfoSize U_ICU_ENTRY_POINT_RENAME(udata_getInfoSize) -#define udata_getLength U_ICU_ENTRY_POINT_RENAME(udata_getLength) -#define udata_getMemory U_ICU_ENTRY_POINT_RENAME(udata_getMemory) -#define udata_getRawMemory U_ICU_ENTRY_POINT_RENAME(udata_getRawMemory) -#define udata_open U_ICU_ENTRY_POINT_RENAME(udata_open) -#define udata_openChoice U_ICU_ENTRY_POINT_RENAME(udata_openChoice) -#define udata_openSwapper U_ICU_ENTRY_POINT_RENAME(udata_openSwapper) -#define udata_openSwapperForInputData U_ICU_ENTRY_POINT_RENAME(udata_openSwapperForInputData) -#define udata_printError U_ICU_ENTRY_POINT_RENAME(udata_printError) -#define udata_readInt16 U_ICU_ENTRY_POINT_RENAME(udata_readInt16) -#define udata_readInt32 U_ICU_ENTRY_POINT_RENAME(udata_readInt32) -#define udata_setAppData U_ICU_ENTRY_POINT_RENAME(udata_setAppData) -#define udata_setCommonData U_ICU_ENTRY_POINT_RENAME(udata_setCommonData) -#define udata_setFileAccess U_ICU_ENTRY_POINT_RENAME(udata_setFileAccess) -#define udata_swapDataHeader U_ICU_ENTRY_POINT_RENAME(udata_swapDataHeader) -#define udata_swapInvStringBlock U_ICU_ENTRY_POINT_RENAME(udata_swapInvStringBlock) -#define udatpg_addPattern U_ICU_ENTRY_POINT_RENAME(udatpg_addPattern) -#define udatpg_clone U_ICU_ENTRY_POINT_RENAME(udatpg_clone) -#define udatpg_close U_ICU_ENTRY_POINT_RENAME(udatpg_close) -#define udatpg_getAppendItemFormat U_ICU_ENTRY_POINT_RENAME(udatpg_getAppendItemFormat) -#define udatpg_getAppendItemName U_ICU_ENTRY_POINT_RENAME(udatpg_getAppendItemName) -#define udatpg_getBaseSkeleton U_ICU_ENTRY_POINT_RENAME(udatpg_getBaseSkeleton) -#define udatpg_getBestPattern U_ICU_ENTRY_POINT_RENAME(udatpg_getBestPattern) -#define udatpg_getBestPatternWithOptions U_ICU_ENTRY_POINT_RENAME(udatpg_getBestPatternWithOptions) -#define udatpg_getDateTimeFormat U_ICU_ENTRY_POINT_RENAME(udatpg_getDateTimeFormat) -#define udatpg_getDecimal U_ICU_ENTRY_POINT_RENAME(udatpg_getDecimal) -#define udatpg_getPatternForSkeleton U_ICU_ENTRY_POINT_RENAME(udatpg_getPatternForSkeleton) -#define udatpg_getSkeleton U_ICU_ENTRY_POINT_RENAME(udatpg_getSkeleton) -#define udatpg_open U_ICU_ENTRY_POINT_RENAME(udatpg_open) -#define udatpg_openBaseSkeletons U_ICU_ENTRY_POINT_RENAME(udatpg_openBaseSkeletons) -#define udatpg_openEmpty U_ICU_ENTRY_POINT_RENAME(udatpg_openEmpty) -#define udatpg_openSkeletons U_ICU_ENTRY_POINT_RENAME(udatpg_openSkeletons) -#define udatpg_replaceFieldTypes U_ICU_ENTRY_POINT_RENAME(udatpg_replaceFieldTypes) -#define udatpg_replaceFieldTypesWithOptions U_ICU_ENTRY_POINT_RENAME(udatpg_replaceFieldTypesWithOptions) -#define udatpg_setAppendItemFormat U_ICU_ENTRY_POINT_RENAME(udatpg_setAppendItemFormat) -#define udatpg_setAppendItemName U_ICU_ENTRY_POINT_RENAME(udatpg_setAppendItemName) -#define udatpg_setDateTimeFormat U_ICU_ENTRY_POINT_RENAME(udatpg_setDateTimeFormat) -#define udatpg_setDecimal U_ICU_ENTRY_POINT_RENAME(udatpg_setDecimal) -#define udict_swap U_ICU_ENTRY_POINT_RENAME(udict_swap) -#define udtitvfmt_close U_ICU_ENTRY_POINT_RENAME(udtitvfmt_close) -#define udtitvfmt_format U_ICU_ENTRY_POINT_RENAME(udtitvfmt_format) -#define udtitvfmt_open U_ICU_ENTRY_POINT_RENAME(udtitvfmt_open) -#define uenum_close U_ICU_ENTRY_POINT_RENAME(uenum_close) -#define uenum_count U_ICU_ENTRY_POINT_RENAME(uenum_count) -#define uenum_next U_ICU_ENTRY_POINT_RENAME(uenum_next) -#define uenum_nextDefault U_ICU_ENTRY_POINT_RENAME(uenum_nextDefault) -#define uenum_openCharStringsEnumeration U_ICU_ENTRY_POINT_RENAME(uenum_openCharStringsEnumeration) -#define uenum_openFromStringEnumeration U_ICU_ENTRY_POINT_RENAME(uenum_openFromStringEnumeration) -#define uenum_openUCharStringsEnumeration U_ICU_ENTRY_POINT_RENAME(uenum_openUCharStringsEnumeration) -#define uenum_reset U_ICU_ENTRY_POINT_RENAME(uenum_reset) -#define uenum_unext U_ICU_ENTRY_POINT_RENAME(uenum_unext) -#define uenum_unextDefault U_ICU_ENTRY_POINT_RENAME(uenum_unextDefault) -#define ufile_close_translit U_ICU_ENTRY_POINT_RENAME(ufile_close_translit) -#define ufile_fill_uchar_buffer U_ICU_ENTRY_POINT_RENAME(ufile_fill_uchar_buffer) -#define ufile_flush_io U_ICU_ENTRY_POINT_RENAME(ufile_flush_io) -#define ufile_flush_translit U_ICU_ENTRY_POINT_RENAME(ufile_flush_translit) -#define ufile_getch U_ICU_ENTRY_POINT_RENAME(ufile_getch) -#define ufile_getch32 U_ICU_ENTRY_POINT_RENAME(ufile_getch32) -#define ufmt_64tou U_ICU_ENTRY_POINT_RENAME(ufmt_64tou) -#define ufmt_close U_ICU_ENTRY_POINT_RENAME(ufmt_close) -#define ufmt_defaultCPToUnicode U_ICU_ENTRY_POINT_RENAME(ufmt_defaultCPToUnicode) -#define ufmt_digitvalue U_ICU_ENTRY_POINT_RENAME(ufmt_digitvalue) -#define ufmt_getArrayItemByIndex U_ICU_ENTRY_POINT_RENAME(ufmt_getArrayItemByIndex) -#define ufmt_getArrayLength U_ICU_ENTRY_POINT_RENAME(ufmt_getArrayLength) -#define ufmt_getDate U_ICU_ENTRY_POINT_RENAME(ufmt_getDate) -#define ufmt_getDecNumChars U_ICU_ENTRY_POINT_RENAME(ufmt_getDecNumChars) -#define ufmt_getDouble U_ICU_ENTRY_POINT_RENAME(ufmt_getDouble) -#define ufmt_getInt64 U_ICU_ENTRY_POINT_RENAME(ufmt_getInt64) -#define ufmt_getLong U_ICU_ENTRY_POINT_RENAME(ufmt_getLong) -#define ufmt_getObject U_ICU_ENTRY_POINT_RENAME(ufmt_getObject) -#define ufmt_getType U_ICU_ENTRY_POINT_RENAME(ufmt_getType) -#define ufmt_getUChars U_ICU_ENTRY_POINT_RENAME(ufmt_getUChars) -#define ufmt_isNumeric U_ICU_ENTRY_POINT_RENAME(ufmt_isNumeric) -#define ufmt_isdigit U_ICU_ENTRY_POINT_RENAME(ufmt_isdigit) -#define ufmt_open U_ICU_ENTRY_POINT_RENAME(ufmt_open) -#define ufmt_ptou U_ICU_ENTRY_POINT_RENAME(ufmt_ptou) -#define ufmt_uto64 U_ICU_ENTRY_POINT_RENAME(ufmt_uto64) -#define ufmt_utop U_ICU_ENTRY_POINT_RENAME(ufmt_utop) -#define ugender_getInstance U_ICU_ENTRY_POINT_RENAME(ugender_getInstance) -#define ugender_getListGender U_ICU_ENTRY_POINT_RENAME(ugender_getListGender) -#define uhash_close U_ICU_ENTRY_POINT_RENAME(uhash_close) -#define uhash_compareCaselessUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_compareCaselessUnicodeString) -#define uhash_compareChars U_ICU_ENTRY_POINT_RENAME(uhash_compareChars) -#define uhash_compareIChars U_ICU_ENTRY_POINT_RENAME(uhash_compareIChars) -#define uhash_compareLong U_ICU_ENTRY_POINT_RENAME(uhash_compareLong) -#define uhash_compareScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_compareScriptSet) -#define uhash_compareUChars U_ICU_ENTRY_POINT_RENAME(uhash_compareUChars) -#define uhash_compareUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_compareUnicodeString) -#define uhash_count U_ICU_ENTRY_POINT_RENAME(uhash_count) -#define uhash_deleteHashtable U_ICU_ENTRY_POINT_RENAME(uhash_deleteHashtable) -#define uhash_deleteScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_deleteScriptSet) -#define uhash_equals U_ICU_ENTRY_POINT_RENAME(uhash_equals) -#define uhash_equalsScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_equalsScriptSet) -#define uhash_find U_ICU_ENTRY_POINT_RENAME(uhash_find) -#define uhash_get U_ICU_ENTRY_POINT_RENAME(uhash_get) -#define uhash_geti U_ICU_ENTRY_POINT_RENAME(uhash_geti) -#define uhash_hashCaselessUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_hashCaselessUnicodeString) -#define uhash_hashChars U_ICU_ENTRY_POINT_RENAME(uhash_hashChars) -#define uhash_hashIChars U_ICU_ENTRY_POINT_RENAME(uhash_hashIChars) -#define uhash_hashLong U_ICU_ENTRY_POINT_RENAME(uhash_hashLong) -#define uhash_hashScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_hashScriptSet) -#define uhash_hashUChars U_ICU_ENTRY_POINT_RENAME(uhash_hashUChars) -#define uhash_hashUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_hashUnicodeString) -#define uhash_iget U_ICU_ENTRY_POINT_RENAME(uhash_iget) -#define uhash_igeti U_ICU_ENTRY_POINT_RENAME(uhash_igeti) -#define uhash_init U_ICU_ENTRY_POINT_RENAME(uhash_init) -#define uhash_iput U_ICU_ENTRY_POINT_RENAME(uhash_iput) -#define uhash_iputi U_ICU_ENTRY_POINT_RENAME(uhash_iputi) -#define uhash_iremove U_ICU_ENTRY_POINT_RENAME(uhash_iremove) -#define uhash_iremovei U_ICU_ENTRY_POINT_RENAME(uhash_iremovei) -#define uhash_nextElement U_ICU_ENTRY_POINT_RENAME(uhash_nextElement) -#define uhash_open U_ICU_ENTRY_POINT_RENAME(uhash_open) -#define uhash_openSize U_ICU_ENTRY_POINT_RENAME(uhash_openSize) -#define uhash_put U_ICU_ENTRY_POINT_RENAME(uhash_put) -#define uhash_puti U_ICU_ENTRY_POINT_RENAME(uhash_puti) -#define uhash_remove U_ICU_ENTRY_POINT_RENAME(uhash_remove) -#define uhash_removeAll U_ICU_ENTRY_POINT_RENAME(uhash_removeAll) -#define uhash_removeElement U_ICU_ENTRY_POINT_RENAME(uhash_removeElement) -#define uhash_removei U_ICU_ENTRY_POINT_RENAME(uhash_removei) -#define uhash_setKeyComparator U_ICU_ENTRY_POINT_RENAME(uhash_setKeyComparator) -#define uhash_setKeyDeleter U_ICU_ENTRY_POINT_RENAME(uhash_setKeyDeleter) -#define uhash_setKeyHasher U_ICU_ENTRY_POINT_RENAME(uhash_setKeyHasher) -#define uhash_setResizePolicy U_ICU_ENTRY_POINT_RENAME(uhash_setResizePolicy) -#define uhash_setValueComparator U_ICU_ENTRY_POINT_RENAME(uhash_setValueComparator) -#define uhash_setValueDeleter U_ICU_ENTRY_POINT_RENAME(uhash_setValueDeleter) -#define uidna_IDNToASCII U_ICU_ENTRY_POINT_RENAME(uidna_IDNToASCII) -#define uidna_IDNToUnicode U_ICU_ENTRY_POINT_RENAME(uidna_IDNToUnicode) -#define uidna_close U_ICU_ENTRY_POINT_RENAME(uidna_close) -#define uidna_compare U_ICU_ENTRY_POINT_RENAME(uidna_compare) -#define uidna_labelToASCII U_ICU_ENTRY_POINT_RENAME(uidna_labelToASCII) -#define uidna_labelToASCII_UTF8 U_ICU_ENTRY_POINT_RENAME(uidna_labelToASCII_UTF8) -#define uidna_labelToUnicode U_ICU_ENTRY_POINT_RENAME(uidna_labelToUnicode) -#define uidna_labelToUnicodeUTF8 U_ICU_ENTRY_POINT_RENAME(uidna_labelToUnicodeUTF8) -#define uidna_nameToASCII U_ICU_ENTRY_POINT_RENAME(uidna_nameToASCII) -#define uidna_nameToASCII_UTF8 U_ICU_ENTRY_POINT_RENAME(uidna_nameToASCII_UTF8) -#define uidna_nameToUnicode U_ICU_ENTRY_POINT_RENAME(uidna_nameToUnicode) -#define uidna_nameToUnicodeUTF8 U_ICU_ENTRY_POINT_RENAME(uidna_nameToUnicodeUTF8) -#define uidna_openUTS46 U_ICU_ENTRY_POINT_RENAME(uidna_openUTS46) -#define uidna_toASCII U_ICU_ENTRY_POINT_RENAME(uidna_toASCII) -#define uidna_toUnicode U_ICU_ENTRY_POINT_RENAME(uidna_toUnicode) -#define uiter_current32 U_ICU_ENTRY_POINT_RENAME(uiter_current32) -#define uiter_getState U_ICU_ENTRY_POINT_RENAME(uiter_getState) -#define uiter_next32 U_ICU_ENTRY_POINT_RENAME(uiter_next32) -#define uiter_previous32 U_ICU_ENTRY_POINT_RENAME(uiter_previous32) -#define uiter_setCharacterIterator U_ICU_ENTRY_POINT_RENAME(uiter_setCharacterIterator) -#define uiter_setReplaceable U_ICU_ENTRY_POINT_RENAME(uiter_setReplaceable) -#define uiter_setState U_ICU_ENTRY_POINT_RENAME(uiter_setState) -#define uiter_setString U_ICU_ENTRY_POINT_RENAME(uiter_setString) -#define uiter_setUTF16BE U_ICU_ENTRY_POINT_RENAME(uiter_setUTF16BE) -#define uiter_setUTF8 U_ICU_ENTRY_POINT_RENAME(uiter_setUTF8) -#define uldn_close U_ICU_ENTRY_POINT_RENAME(uldn_close) -#define uldn_getContext U_ICU_ENTRY_POINT_RENAME(uldn_getContext) -#define uldn_getDialectHandling U_ICU_ENTRY_POINT_RENAME(uldn_getDialectHandling) -#define uldn_getLocale U_ICU_ENTRY_POINT_RENAME(uldn_getLocale) -#define uldn_keyDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_keyDisplayName) -#define uldn_keyValueDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_keyValueDisplayName) -#define uldn_languageDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_languageDisplayName) -#define uldn_localeDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_localeDisplayName) -#define uldn_open U_ICU_ENTRY_POINT_RENAME(uldn_open) -#define uldn_openForContext U_ICU_ENTRY_POINT_RENAME(uldn_openForContext) -#define uldn_regionDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_regionDisplayName) -#define uldn_scriptCodeDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_scriptCodeDisplayName) -#define uldn_scriptDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_scriptDisplayName) -#define uldn_variantDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_variantDisplayName) -#define ulist_addItemBeginList U_ICU_ENTRY_POINT_RENAME(ulist_addItemBeginList) -#define ulist_addItemEndList U_ICU_ENTRY_POINT_RENAME(ulist_addItemEndList) -#define ulist_close_keyword_values_iterator U_ICU_ENTRY_POINT_RENAME(ulist_close_keyword_values_iterator) -#define ulist_containsString U_ICU_ENTRY_POINT_RENAME(ulist_containsString) -#define ulist_count_keyword_values U_ICU_ENTRY_POINT_RENAME(ulist_count_keyword_values) -#define ulist_createEmptyList U_ICU_ENTRY_POINT_RENAME(ulist_createEmptyList) -#define ulist_deleteList U_ICU_ENTRY_POINT_RENAME(ulist_deleteList) -#define ulist_getListFromEnum U_ICU_ENTRY_POINT_RENAME(ulist_getListFromEnum) -#define ulist_getListSize U_ICU_ENTRY_POINT_RENAME(ulist_getListSize) -#define ulist_getNext U_ICU_ENTRY_POINT_RENAME(ulist_getNext) -#define ulist_next_keyword_value U_ICU_ENTRY_POINT_RENAME(ulist_next_keyword_value) -#define ulist_resetList U_ICU_ENTRY_POINT_RENAME(ulist_resetList) -#define ulist_reset_keyword_values_iterator U_ICU_ENTRY_POINT_RENAME(ulist_reset_keyword_values_iterator) -#define uloc_acceptLanguage U_ICU_ENTRY_POINT_RENAME(uloc_acceptLanguage) -#define uloc_acceptLanguageFromHTTP U_ICU_ENTRY_POINT_RENAME(uloc_acceptLanguageFromHTTP) -#define uloc_addLikelySubtags U_ICU_ENTRY_POINT_RENAME(uloc_addLikelySubtags) -#define uloc_canonicalize U_ICU_ENTRY_POINT_RENAME(uloc_canonicalize) -#define uloc_countAvailable U_ICU_ENTRY_POINT_RENAME(uloc_countAvailable) -#define uloc_forLanguageTag U_ICU_ENTRY_POINT_RENAME(uloc_forLanguageTag) -#define uloc_getAvailable U_ICU_ENTRY_POINT_RENAME(uloc_getAvailable) -#define uloc_getBaseName U_ICU_ENTRY_POINT_RENAME(uloc_getBaseName) -#define uloc_getCharacterOrientation U_ICU_ENTRY_POINT_RENAME(uloc_getCharacterOrientation) -#define uloc_getCountry U_ICU_ENTRY_POINT_RENAME(uloc_getCountry) -#define uloc_getCurrentCountryID U_ICU_ENTRY_POINT_RENAME(uloc_getCurrentCountryID) -#define uloc_getCurrentLanguageID U_ICU_ENTRY_POINT_RENAME(uloc_getCurrentLanguageID) -#define uloc_getDefault U_ICU_ENTRY_POINT_RENAME(uloc_getDefault) -#define uloc_getDisplayCountry U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayCountry) -#define uloc_getDisplayKeyword U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayKeyword) -#define uloc_getDisplayKeywordValue U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayKeywordValue) -#define uloc_getDisplayLanguage U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayLanguage) -#define uloc_getDisplayName U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayName) -#define uloc_getDisplayScript U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayScript) -#define uloc_getDisplayScriptInContext U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayScriptInContext) -#define uloc_getDisplayVariant U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayVariant) -#define uloc_getISO3Country U_ICU_ENTRY_POINT_RENAME(uloc_getISO3Country) -#define uloc_getISO3Language U_ICU_ENTRY_POINT_RENAME(uloc_getISO3Language) -#define uloc_getISOCountries U_ICU_ENTRY_POINT_RENAME(uloc_getISOCountries) -#define uloc_getISOLanguages U_ICU_ENTRY_POINT_RENAME(uloc_getISOLanguages) -#define uloc_getKeywordValue U_ICU_ENTRY_POINT_RENAME(uloc_getKeywordValue) -#define uloc_getLCID U_ICU_ENTRY_POINT_RENAME(uloc_getLCID) -#define uloc_getLanguage U_ICU_ENTRY_POINT_RENAME(uloc_getLanguage) -#define uloc_getLineOrientation U_ICU_ENTRY_POINT_RENAME(uloc_getLineOrientation) -#define uloc_getLocaleForLCID U_ICU_ENTRY_POINT_RENAME(uloc_getLocaleForLCID) -#define uloc_getName U_ICU_ENTRY_POINT_RENAME(uloc_getName) -#define uloc_getParent U_ICU_ENTRY_POINT_RENAME(uloc_getParent) -#define uloc_getScript U_ICU_ENTRY_POINT_RENAME(uloc_getScript) -#define uloc_getTableStringWithFallback U_ICU_ENTRY_POINT_RENAME(uloc_getTableStringWithFallback) -#define uloc_getVariant U_ICU_ENTRY_POINT_RENAME(uloc_getVariant) -#define uloc_minimizeSubtags U_ICU_ENTRY_POINT_RENAME(uloc_minimizeSubtags) -#define uloc_openKeywordList U_ICU_ENTRY_POINT_RENAME(uloc_openKeywordList) -#define uloc_openKeywords U_ICU_ENTRY_POINT_RENAME(uloc_openKeywords) -#define uloc_setDefault U_ICU_ENTRY_POINT_RENAME(uloc_setDefault) -#define uloc_setKeywordValue U_ICU_ENTRY_POINT_RENAME(uloc_setKeywordValue) -#define uloc_toLanguageTag U_ICU_ENTRY_POINT_RENAME(uloc_toLanguageTag) -#define ulocdata_close U_ICU_ENTRY_POINT_RENAME(ulocdata_close) -#define ulocdata_getCLDRVersion U_ICU_ENTRY_POINT_RENAME(ulocdata_getCLDRVersion) -#define ulocdata_getDelimiter U_ICU_ENTRY_POINT_RENAME(ulocdata_getDelimiter) -#define ulocdata_getExemplarSet U_ICU_ENTRY_POINT_RENAME(ulocdata_getExemplarSet) -#define ulocdata_getLocaleDisplayPattern U_ICU_ENTRY_POINT_RENAME(ulocdata_getLocaleDisplayPattern) -#define ulocdata_getLocaleSeparator U_ICU_ENTRY_POINT_RENAME(ulocdata_getLocaleSeparator) -#define ulocdata_getMeasurementSystem U_ICU_ENTRY_POINT_RENAME(ulocdata_getMeasurementSystem) -#define ulocdata_getNoSubstitute U_ICU_ENTRY_POINT_RENAME(ulocdata_getNoSubstitute) -#define ulocdata_getPaperSize U_ICU_ENTRY_POINT_RENAME(ulocdata_getPaperSize) -#define ulocdata_open U_ICU_ENTRY_POINT_RENAME(ulocdata_open) -#define ulocdata_setNoSubstitute U_ICU_ENTRY_POINT_RENAME(ulocdata_setNoSubstitute) -#define ulocimp_getCountry U_ICU_ENTRY_POINT_RENAME(ulocimp_getCountry) -#define ulocimp_getLanguage U_ICU_ENTRY_POINT_RENAME(ulocimp_getLanguage) -#define ulocimp_getScript U_ICU_ENTRY_POINT_RENAME(ulocimp_getScript) -#define umsg_applyPattern U_ICU_ENTRY_POINT_RENAME(umsg_applyPattern) -#define umsg_autoQuoteApostrophe U_ICU_ENTRY_POINT_RENAME(umsg_autoQuoteApostrophe) -#define umsg_clone U_ICU_ENTRY_POINT_RENAME(umsg_clone) -#define umsg_close U_ICU_ENTRY_POINT_RENAME(umsg_close) -#define umsg_format U_ICU_ENTRY_POINT_RENAME(umsg_format) -#define umsg_getLocale U_ICU_ENTRY_POINT_RENAME(umsg_getLocale) -#define umsg_open U_ICU_ENTRY_POINT_RENAME(umsg_open) -#define umsg_parse U_ICU_ENTRY_POINT_RENAME(umsg_parse) -#define umsg_setLocale U_ICU_ENTRY_POINT_RENAME(umsg_setLocale) -#define umsg_toPattern U_ICU_ENTRY_POINT_RENAME(umsg_toPattern) -#define umsg_vformat U_ICU_ENTRY_POINT_RENAME(umsg_vformat) -#define umsg_vparse U_ICU_ENTRY_POINT_RENAME(umsg_vparse) -#define umtx_lock U_ICU_ENTRY_POINT_RENAME(umtx_lock) -#define umtx_unlock U_ICU_ENTRY_POINT_RENAME(umtx_unlock) -#define uniset_getUnicode32Instance U_ICU_ENTRY_POINT_RENAME(uniset_getUnicode32Instance) -#define unorm2_append U_ICU_ENTRY_POINT_RENAME(unorm2_append) -#define unorm2_close U_ICU_ENTRY_POINT_RENAME(unorm2_close) -#define unorm2_composePair U_ICU_ENTRY_POINT_RENAME(unorm2_composePair) -#define unorm2_getCombiningClass U_ICU_ENTRY_POINT_RENAME(unorm2_getCombiningClass) -#define unorm2_getDecomposition U_ICU_ENTRY_POINT_RENAME(unorm2_getDecomposition) -#define unorm2_getInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getInstance) -#define unorm2_getNFCInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFCInstance) -#define unorm2_getNFDInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFDInstance) -#define unorm2_getNFKCCasefoldInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFKCCasefoldInstance) -#define unorm2_getNFKCInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFKCInstance) -#define unorm2_getNFKDInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFKDInstance) -#define unorm2_getRawDecomposition U_ICU_ENTRY_POINT_RENAME(unorm2_getRawDecomposition) -#define unorm2_hasBoundaryAfter U_ICU_ENTRY_POINT_RENAME(unorm2_hasBoundaryAfter) -#define unorm2_hasBoundaryBefore U_ICU_ENTRY_POINT_RENAME(unorm2_hasBoundaryBefore) -#define unorm2_isInert U_ICU_ENTRY_POINT_RENAME(unorm2_isInert) -#define unorm2_isNormalized U_ICU_ENTRY_POINT_RENAME(unorm2_isNormalized) -#define unorm2_normalize U_ICU_ENTRY_POINT_RENAME(unorm2_normalize) -#define unorm2_normalizeSecondAndAppend U_ICU_ENTRY_POINT_RENAME(unorm2_normalizeSecondAndAppend) -#define unorm2_openFiltered U_ICU_ENTRY_POINT_RENAME(unorm2_openFiltered) -#define unorm2_quickCheck U_ICU_ENTRY_POINT_RENAME(unorm2_quickCheck) -#define unorm2_spanQuickCheckYes U_ICU_ENTRY_POINT_RENAME(unorm2_spanQuickCheckYes) -#define unorm2_swap U_ICU_ENTRY_POINT_RENAME(unorm2_swap) -#define unorm_closeIter U_ICU_ENTRY_POINT_RENAME(unorm_closeIter) -#define unorm_compare U_ICU_ENTRY_POINT_RENAME(unorm_compare) -#define unorm_concatenate U_ICU_ENTRY_POINT_RENAME(unorm_concatenate) -#define unorm_getFCD16 U_ICU_ENTRY_POINT_RENAME(unorm_getFCD16) -#define unorm_getQuickCheck U_ICU_ENTRY_POINT_RENAME(unorm_getQuickCheck) -#define unorm_isNormalized U_ICU_ENTRY_POINT_RENAME(unorm_isNormalized) -#define unorm_isNormalizedWithOptions U_ICU_ENTRY_POINT_RENAME(unorm_isNormalizedWithOptions) -#define unorm_next U_ICU_ENTRY_POINT_RENAME(unorm_next) -#define unorm_normalize U_ICU_ENTRY_POINT_RENAME(unorm_normalize) -#define unorm_openIter U_ICU_ENTRY_POINT_RENAME(unorm_openIter) -#define unorm_previous U_ICU_ENTRY_POINT_RENAME(unorm_previous) -#define unorm_quickCheck U_ICU_ENTRY_POINT_RENAME(unorm_quickCheck) -#define unorm_quickCheckWithOptions U_ICU_ENTRY_POINT_RENAME(unorm_quickCheckWithOptions) -#define unorm_setIter U_ICU_ENTRY_POINT_RENAME(unorm_setIter) -#define unum_applyPattern U_ICU_ENTRY_POINT_RENAME(unum_applyPattern) -#define unum_clone U_ICU_ENTRY_POINT_RENAME(unum_clone) -#define unum_close U_ICU_ENTRY_POINT_RENAME(unum_close) -#define unum_countAvailable U_ICU_ENTRY_POINT_RENAME(unum_countAvailable) -#define unum_format U_ICU_ENTRY_POINT_RENAME(unum_format) -#define unum_formatDecimal U_ICU_ENTRY_POINT_RENAME(unum_formatDecimal) -#define unum_formatDouble U_ICU_ENTRY_POINT_RENAME(unum_formatDouble) -#define unum_formatDoubleCurrency U_ICU_ENTRY_POINT_RENAME(unum_formatDoubleCurrency) -#define unum_formatInt64 U_ICU_ENTRY_POINT_RENAME(unum_formatInt64) -#define unum_formatUFormattable U_ICU_ENTRY_POINT_RENAME(unum_formatUFormattable) -#define unum_getAttribute U_ICU_ENTRY_POINT_RENAME(unum_getAttribute) -#define unum_getAvailable U_ICU_ENTRY_POINT_RENAME(unum_getAvailable) -#define unum_getDoubleAttribute U_ICU_ENTRY_POINT_RENAME(unum_getDoubleAttribute) -#define unum_getLocaleByType U_ICU_ENTRY_POINT_RENAME(unum_getLocaleByType) -#define unum_getSymbol U_ICU_ENTRY_POINT_RENAME(unum_getSymbol) -#define unum_getTextAttribute U_ICU_ENTRY_POINT_RENAME(unum_getTextAttribute) -#define unum_open U_ICU_ENTRY_POINT_RENAME(unum_open) -#define unum_parse U_ICU_ENTRY_POINT_RENAME(unum_parse) -#define unum_parseDecimal U_ICU_ENTRY_POINT_RENAME(unum_parseDecimal) -#define unum_parseDouble U_ICU_ENTRY_POINT_RENAME(unum_parseDouble) -#define unum_parseDoubleCurrency U_ICU_ENTRY_POINT_RENAME(unum_parseDoubleCurrency) -#define unum_parseInt64 U_ICU_ENTRY_POINT_RENAME(unum_parseInt64) -#define unum_parseToUFormattable U_ICU_ENTRY_POINT_RENAME(unum_parseToUFormattable) -#define unum_setAttribute U_ICU_ENTRY_POINT_RENAME(unum_setAttribute) -#define unum_setDoubleAttribute U_ICU_ENTRY_POINT_RENAME(unum_setDoubleAttribute) -#define unum_setSymbol U_ICU_ENTRY_POINT_RENAME(unum_setSymbol) -#define unum_setTextAttribute U_ICU_ENTRY_POINT_RENAME(unum_setTextAttribute) -#define unum_toPattern U_ICU_ENTRY_POINT_RENAME(unum_toPattern) -#define unumsys_close U_ICU_ENTRY_POINT_RENAME(unumsys_close) -#define unumsys_getDescription U_ICU_ENTRY_POINT_RENAME(unumsys_getDescription) -#define unumsys_getName U_ICU_ENTRY_POINT_RENAME(unumsys_getName) -#define unumsys_getRadix U_ICU_ENTRY_POINT_RENAME(unumsys_getRadix) -#define unumsys_isAlgorithmic U_ICU_ENTRY_POINT_RENAME(unumsys_isAlgorithmic) -#define unumsys_open U_ICU_ENTRY_POINT_RENAME(unumsys_open) -#define unumsys_openAvailableNames U_ICU_ENTRY_POINT_RENAME(unumsys_openAvailableNames) -#define unumsys_openByName U_ICU_ENTRY_POINT_RENAME(unumsys_openByName) -#define uplrules_close U_ICU_ENTRY_POINT_RENAME(uplrules_close) -#define uplrules_open U_ICU_ENTRY_POINT_RENAME(uplrules_open) -#define uplrules_openForType U_ICU_ENTRY_POINT_RENAME(uplrules_openForType) -#define uplrules_select U_ICU_ENTRY_POINT_RENAME(uplrules_select) -#define uplug_closeLibrary U_ICU_ENTRY_POINT_RENAME(uplug_closeLibrary) -#define uplug_findLibrary U_ICU_ENTRY_POINT_RENAME(uplug_findLibrary) -#define uplug_getConfiguration U_ICU_ENTRY_POINT_RENAME(uplug_getConfiguration) -#define uplug_getContext U_ICU_ENTRY_POINT_RENAME(uplug_getContext) -#define uplug_getCurrentLevel U_ICU_ENTRY_POINT_RENAME(uplug_getCurrentLevel) -#define uplug_getLibrary U_ICU_ENTRY_POINT_RENAME(uplug_getLibrary) -#define uplug_getLibraryName U_ICU_ENTRY_POINT_RENAME(uplug_getLibraryName) -#define uplug_getPlugInternal U_ICU_ENTRY_POINT_RENAME(uplug_getPlugInternal) -#define uplug_getPlugLevel U_ICU_ENTRY_POINT_RENAME(uplug_getPlugLevel) -#define uplug_getPlugLoadStatus U_ICU_ENTRY_POINT_RENAME(uplug_getPlugLoadStatus) -#define uplug_getPlugName U_ICU_ENTRY_POINT_RENAME(uplug_getPlugName) -#define uplug_getPluginFile U_ICU_ENTRY_POINT_RENAME(uplug_getPluginFile) -#define uplug_getSymbolName U_ICU_ENTRY_POINT_RENAME(uplug_getSymbolName) -#define uplug_init U_ICU_ENTRY_POINT_RENAME(uplug_init) -#define uplug_loadPlugFromEntrypoint U_ICU_ENTRY_POINT_RENAME(uplug_loadPlugFromEntrypoint) -#define uplug_loadPlugFromLibrary U_ICU_ENTRY_POINT_RENAME(uplug_loadPlugFromLibrary) -#define uplug_nextPlug U_ICU_ENTRY_POINT_RENAME(uplug_nextPlug) -#define uplug_openLibrary U_ICU_ENTRY_POINT_RENAME(uplug_openLibrary) -#define uplug_removePlug U_ICU_ENTRY_POINT_RENAME(uplug_removePlug) -#define uplug_setContext U_ICU_ENTRY_POINT_RENAME(uplug_setContext) -#define uplug_setPlugLevel U_ICU_ENTRY_POINT_RENAME(uplug_setPlugLevel) -#define uplug_setPlugName U_ICU_ENTRY_POINT_RENAME(uplug_setPlugName) -#define uplug_setPlugNoUnload U_ICU_ENTRY_POINT_RENAME(uplug_setPlugNoUnload) -#define uprops_getSource U_ICU_ENTRY_POINT_RENAME(uprops_getSource) -#define upropsvec_addPropertyStarts U_ICU_ENTRY_POINT_RENAME(upropsvec_addPropertyStarts) -#define uprv_aestrncpy U_ICU_ENTRY_POINT_RENAME(uprv_aestrncpy) -#define uprv_asciiFromEbcdic U_ICU_ENTRY_POINT_RENAME(uprv_asciiFromEbcdic) -#define uprv_asciitolower U_ICU_ENTRY_POINT_RENAME(uprv_asciitolower) -#define uprv_calloc U_ICU_ENTRY_POINT_RENAME(uprv_calloc) -#define uprv_ceil U_ICU_ENTRY_POINT_RENAME(uprv_ceil) -#define uprv_cnttab_addContraction U_ICU_ENTRY_POINT_RENAME(uprv_cnttab_addContraction) -#define uprv_cnttab_changeContraction U_ICU_ENTRY_POINT_RENAME(uprv_cnttab_changeContraction) -#define uprv_cnttab_changeLastCE U_ICU_ENTRY_POINT_RENAME(uprv_cnttab_changeLastCE) -#define uprv_cnttab_clone U_ICU_ENTRY_POINT_RENAME(uprv_cnttab_clone) -#define uprv_cnttab_close U_ICU_ENTRY_POINT_RENAME(uprv_cnttab_close) -#define uprv_cnttab_constructTable U_ICU_ENTRY_POINT_RENAME(uprv_cnttab_constructTable) -#define uprv_cnttab_findCE U_ICU_ENTRY_POINT_RENAME(uprv_cnttab_findCE) -#define uprv_cnttab_findCP U_ICU_ENTRY_POINT_RENAME(uprv_cnttab_findCP) -#define uprv_cnttab_getCE U_ICU_ENTRY_POINT_RENAME(uprv_cnttab_getCE) -#define uprv_cnttab_insertContraction U_ICU_ENTRY_POINT_RENAME(uprv_cnttab_insertContraction) -#define uprv_cnttab_isTailored U_ICU_ENTRY_POINT_RENAME(uprv_cnttab_isTailored) -#define uprv_cnttab_open U_ICU_ENTRY_POINT_RENAME(uprv_cnttab_open) -#define uprv_cnttab_setContraction U_ICU_ENTRY_POINT_RENAME(uprv_cnttab_setContraction) -#define uprv_collIterateAtEnd U_ICU_ENTRY_POINT_RENAME(uprv_collIterateAtEnd) -#define uprv_compareASCIIPropertyNames U_ICU_ENTRY_POINT_RENAME(uprv_compareASCIIPropertyNames) -#define uprv_compareEBCDICPropertyNames U_ICU_ENTRY_POINT_RENAME(uprv_compareEBCDICPropertyNames) -#define uprv_compareInvAscii U_ICU_ENTRY_POINT_RENAME(uprv_compareInvAscii) -#define uprv_compareInvEbcdic U_ICU_ENTRY_POINT_RENAME(uprv_compareInvEbcdic) -#define uprv_compareInvEbcdicAsAscii U_ICU_ENTRY_POINT_RENAME(uprv_compareInvEbcdicAsAscii) -#define uprv_convertToLCID U_ICU_ENTRY_POINT_RENAME(uprv_convertToLCID) -#define uprv_convertToPosix U_ICU_ENTRY_POINT_RENAME(uprv_convertToPosix) -#define uprv_copyAscii U_ICU_ENTRY_POINT_RENAME(uprv_copyAscii) -#define uprv_copyEbcdic U_ICU_ENTRY_POINT_RENAME(uprv_copyEbcdic) -#define uprv_decContextClearStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextClearStatus) -#define uprv_decContextDefault U_ICU_ENTRY_POINT_RENAME(uprv_decContextDefault) -#define uprv_decContextGetRounding U_ICU_ENTRY_POINT_RENAME(uprv_decContextGetRounding) -#define uprv_decContextGetStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextGetStatus) -#define uprv_decContextRestoreStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextRestoreStatus) -#define uprv_decContextSaveStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextSaveStatus) -#define uprv_decContextSetRounding U_ICU_ENTRY_POINT_RENAME(uprv_decContextSetRounding) -#define uprv_decContextSetStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextSetStatus) -#define uprv_decContextSetStatusFromString U_ICU_ENTRY_POINT_RENAME(uprv_decContextSetStatusFromString) -#define uprv_decContextSetStatusFromStringQuiet U_ICU_ENTRY_POINT_RENAME(uprv_decContextSetStatusFromStringQuiet) -#define uprv_decContextSetStatusQuiet U_ICU_ENTRY_POINT_RENAME(uprv_decContextSetStatusQuiet) -#define uprv_decContextStatusToString U_ICU_ENTRY_POINT_RENAME(uprv_decContextStatusToString) -#define uprv_decContextTestSavedStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextTestSavedStatus) -#define uprv_decContextTestStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextTestStatus) -#define uprv_decContextZeroStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextZeroStatus) -#define uprv_decNumberAbs U_ICU_ENTRY_POINT_RENAME(uprv_decNumberAbs) -#define uprv_decNumberAdd U_ICU_ENTRY_POINT_RENAME(uprv_decNumberAdd) -#define uprv_decNumberAnd U_ICU_ENTRY_POINT_RENAME(uprv_decNumberAnd) -#define uprv_decNumberClass U_ICU_ENTRY_POINT_RENAME(uprv_decNumberClass) -#define uprv_decNumberClassToString U_ICU_ENTRY_POINT_RENAME(uprv_decNumberClassToString) -#define uprv_decNumberCompare U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCompare) -#define uprv_decNumberCompareSignal U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCompareSignal) -#define uprv_decNumberCompareTotal U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCompareTotal) -#define uprv_decNumberCompareTotalMag U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCompareTotalMag) -#define uprv_decNumberCopy U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCopy) -#define uprv_decNumberCopyAbs U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCopyAbs) -#define uprv_decNumberCopyNegate U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCopyNegate) -#define uprv_decNumberCopySign U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCopySign) -#define uprv_decNumberDivide U_ICU_ENTRY_POINT_RENAME(uprv_decNumberDivide) -#define uprv_decNumberDivideInteger U_ICU_ENTRY_POINT_RENAME(uprv_decNumberDivideInteger) -#define uprv_decNumberExp U_ICU_ENTRY_POINT_RENAME(uprv_decNumberExp) -#define uprv_decNumberFMA U_ICU_ENTRY_POINT_RENAME(uprv_decNumberFMA) -#define uprv_decNumberFromInt32 U_ICU_ENTRY_POINT_RENAME(uprv_decNumberFromInt32) -#define uprv_decNumberFromString U_ICU_ENTRY_POINT_RENAME(uprv_decNumberFromString) -#define uprv_decNumberFromUInt32 U_ICU_ENTRY_POINT_RENAME(uprv_decNumberFromUInt32) -#define uprv_decNumberGetBCD U_ICU_ENTRY_POINT_RENAME(uprv_decNumberGetBCD) -#define uprv_decNumberInvert U_ICU_ENTRY_POINT_RENAME(uprv_decNumberInvert) -#define uprv_decNumberIsNormal U_ICU_ENTRY_POINT_RENAME(uprv_decNumberIsNormal) -#define uprv_decNumberIsSubnormal U_ICU_ENTRY_POINT_RENAME(uprv_decNumberIsSubnormal) -#define uprv_decNumberLn U_ICU_ENTRY_POINT_RENAME(uprv_decNumberLn) -#define uprv_decNumberLog10 U_ICU_ENTRY_POINT_RENAME(uprv_decNumberLog10) -#define uprv_decNumberLogB U_ICU_ENTRY_POINT_RENAME(uprv_decNumberLogB) -#define uprv_decNumberMax U_ICU_ENTRY_POINT_RENAME(uprv_decNumberMax) -#define uprv_decNumberMaxMag U_ICU_ENTRY_POINT_RENAME(uprv_decNumberMaxMag) -#define uprv_decNumberMin U_ICU_ENTRY_POINT_RENAME(uprv_decNumberMin) -#define uprv_decNumberMinMag U_ICU_ENTRY_POINT_RENAME(uprv_decNumberMinMag) -#define uprv_decNumberMinus U_ICU_ENTRY_POINT_RENAME(uprv_decNumberMinus) -#define uprv_decNumberMultiply U_ICU_ENTRY_POINT_RENAME(uprv_decNumberMultiply) -#define uprv_decNumberNextMinus U_ICU_ENTRY_POINT_RENAME(uprv_decNumberNextMinus) -#define uprv_decNumberNextPlus U_ICU_ENTRY_POINT_RENAME(uprv_decNumberNextPlus) -#define uprv_decNumberNextToward U_ICU_ENTRY_POINT_RENAME(uprv_decNumberNextToward) -#define uprv_decNumberNormalize U_ICU_ENTRY_POINT_RENAME(uprv_decNumberNormalize) -#define uprv_decNumberOr U_ICU_ENTRY_POINT_RENAME(uprv_decNumberOr) -#define uprv_decNumberPlus U_ICU_ENTRY_POINT_RENAME(uprv_decNumberPlus) -#define uprv_decNumberPower U_ICU_ENTRY_POINT_RENAME(uprv_decNumberPower) -#define uprv_decNumberQuantize U_ICU_ENTRY_POINT_RENAME(uprv_decNumberQuantize) -#define uprv_decNumberReduce U_ICU_ENTRY_POINT_RENAME(uprv_decNumberReduce) -#define uprv_decNumberRemainder U_ICU_ENTRY_POINT_RENAME(uprv_decNumberRemainder) -#define uprv_decNumberRemainderNear U_ICU_ENTRY_POINT_RENAME(uprv_decNumberRemainderNear) -#define uprv_decNumberRescale U_ICU_ENTRY_POINT_RENAME(uprv_decNumberRescale) -#define uprv_decNumberRotate U_ICU_ENTRY_POINT_RENAME(uprv_decNumberRotate) -#define uprv_decNumberSameQuantum U_ICU_ENTRY_POINT_RENAME(uprv_decNumberSameQuantum) -#define uprv_decNumberScaleB U_ICU_ENTRY_POINT_RENAME(uprv_decNumberScaleB) -#define uprv_decNumberSetBCD U_ICU_ENTRY_POINT_RENAME(uprv_decNumberSetBCD) -#define uprv_decNumberShift U_ICU_ENTRY_POINT_RENAME(uprv_decNumberShift) -#define uprv_decNumberSquareRoot U_ICU_ENTRY_POINT_RENAME(uprv_decNumberSquareRoot) -#define uprv_decNumberSubtract U_ICU_ENTRY_POINT_RENAME(uprv_decNumberSubtract) -#define uprv_decNumberToEngString U_ICU_ENTRY_POINT_RENAME(uprv_decNumberToEngString) -#define uprv_decNumberToInt32 U_ICU_ENTRY_POINT_RENAME(uprv_decNumberToInt32) -#define uprv_decNumberToIntegralExact U_ICU_ENTRY_POINT_RENAME(uprv_decNumberToIntegralExact) -#define uprv_decNumberToIntegralValue U_ICU_ENTRY_POINT_RENAME(uprv_decNumberToIntegralValue) -#define uprv_decNumberToString U_ICU_ENTRY_POINT_RENAME(uprv_decNumberToString) -#define uprv_decNumberToUInt32 U_ICU_ENTRY_POINT_RENAME(uprv_decNumberToUInt32) -#define uprv_decNumberTrim U_ICU_ENTRY_POINT_RENAME(uprv_decNumberTrim) -#define uprv_decNumberVersion U_ICU_ENTRY_POINT_RENAME(uprv_decNumberVersion) -#define uprv_decNumberXor U_ICU_ENTRY_POINT_RENAME(uprv_decNumberXor) -#define uprv_decNumberZero U_ICU_ENTRY_POINT_RENAME(uprv_decNumberZero) -#define uprv_deleteUObject U_ICU_ENTRY_POINT_RENAME(uprv_deleteUObject) -#define uprv_delete_collIterate U_ICU_ENTRY_POINT_RENAME(uprv_delete_collIterate) -#define uprv_dl_close U_ICU_ENTRY_POINT_RENAME(uprv_dl_close) -#define uprv_dl_open U_ICU_ENTRY_POINT_RENAME(uprv_dl_open) -#define uprv_dlsym_func U_ICU_ENTRY_POINT_RENAME(uprv_dlsym_func) -#define uprv_eastrncpy U_ICU_ENTRY_POINT_RENAME(uprv_eastrncpy) -#define uprv_ebcdicFromAscii U_ICU_ENTRY_POINT_RENAME(uprv_ebcdicFromAscii) -#define uprv_ebcdicToLowercaseAscii U_ICU_ENTRY_POINT_RENAME(uprv_ebcdicToLowercaseAscii) -#define uprv_ebcdictolower U_ICU_ENTRY_POINT_RENAME(uprv_ebcdictolower) -#define uprv_fabs U_ICU_ENTRY_POINT_RENAME(uprv_fabs) -#define uprv_floor U_ICU_ENTRY_POINT_RENAME(uprv_floor) -#define uprv_fmax U_ICU_ENTRY_POINT_RENAME(uprv_fmax) -#define uprv_fmin U_ICU_ENTRY_POINT_RENAME(uprv_fmin) -#define uprv_fmod U_ICU_ENTRY_POINT_RENAME(uprv_fmod) -#define uprv_free U_ICU_ENTRY_POINT_RENAME(uprv_free) -#define uprv_getCharNameCharacters U_ICU_ENTRY_POINT_RENAME(uprv_getCharNameCharacters) -#define uprv_getDefaultCodepage U_ICU_ENTRY_POINT_RENAME(uprv_getDefaultCodepage) -#define uprv_getDefaultLocaleID U_ICU_ENTRY_POINT_RENAME(uprv_getDefaultLocaleID) -#define uprv_getInfinity U_ICU_ENTRY_POINT_RENAME(uprv_getInfinity) -#define uprv_getMaxCharNameLength U_ICU_ENTRY_POINT_RENAME(uprv_getMaxCharNameLength) -#define uprv_getMaxValues U_ICU_ENTRY_POINT_RENAME(uprv_getMaxValues) -#define uprv_getNaN U_ICU_ENTRY_POINT_RENAME(uprv_getNaN) -#define uprv_getRawUTCtime U_ICU_ENTRY_POINT_RENAME(uprv_getRawUTCtime) -#define uprv_getStaticCurrencyName U_ICU_ENTRY_POINT_RENAME(uprv_getStaticCurrencyName) -#define uprv_getUTCtime U_ICU_ENTRY_POINT_RENAME(uprv_getUTCtime) -#define uprv_haveProperties U_ICU_ENTRY_POINT_RENAME(uprv_haveProperties) -#define uprv_init_collIterate U_ICU_ENTRY_POINT_RENAME(uprv_init_collIterate) -#define uprv_init_pce U_ICU_ENTRY_POINT_RENAME(uprv_init_pce) -#define uprv_int32Comparator U_ICU_ENTRY_POINT_RENAME(uprv_int32Comparator) -#define uprv_isASCIILetter U_ICU_ENTRY_POINT_RENAME(uprv_isASCIILetter) -#define uprv_isInfinite U_ICU_ENTRY_POINT_RENAME(uprv_isInfinite) -#define uprv_isInvariantString U_ICU_ENTRY_POINT_RENAME(uprv_isInvariantString) -#define uprv_isInvariantUString U_ICU_ENTRY_POINT_RENAME(uprv_isInvariantUString) -#define uprv_isNaN U_ICU_ENTRY_POINT_RENAME(uprv_isNaN) -#define uprv_isNegativeInfinity U_ICU_ENTRY_POINT_RENAME(uprv_isNegativeInfinity) -#define uprv_isPositiveInfinity U_ICU_ENTRY_POINT_RENAME(uprv_isPositiveInfinity) -#define uprv_itou U_ICU_ENTRY_POINT_RENAME(uprv_itou) -#define uprv_log U_ICU_ENTRY_POINT_RENAME(uprv_log) -#define uprv_malloc U_ICU_ENTRY_POINT_RENAME(uprv_malloc) -#define uprv_mapFile U_ICU_ENTRY_POINT_RENAME(uprv_mapFile) -#define uprv_max U_ICU_ENTRY_POINT_RENAME(uprv_max) -#define uprv_maxMantissa U_ICU_ENTRY_POINT_RENAME(uprv_maxMantissa) -#define uprv_maximumPtr U_ICU_ENTRY_POINT_RENAME(uprv_maximumPtr) -#define uprv_min U_ICU_ENTRY_POINT_RENAME(uprv_min) -#define uprv_modf U_ICU_ENTRY_POINT_RENAME(uprv_modf) -#define uprv_new_collIterate U_ICU_ENTRY_POINT_RENAME(uprv_new_collIterate) -#define uprv_parseCurrency U_ICU_ENTRY_POINT_RENAME(uprv_parseCurrency) -#define uprv_pathIsAbsolute U_ICU_ENTRY_POINT_RENAME(uprv_pathIsAbsolute) -#define uprv_pow U_ICU_ENTRY_POINT_RENAME(uprv_pow) -#define uprv_pow10 U_ICU_ENTRY_POINT_RENAME(uprv_pow10) -#define uprv_realloc U_ICU_ENTRY_POINT_RENAME(uprv_realloc) -#define uprv_round U_ICU_ENTRY_POINT_RENAME(uprv_round) -#define uprv_sortArray U_ICU_ENTRY_POINT_RENAME(uprv_sortArray) -#define uprv_stableBinarySearch U_ICU_ENTRY_POINT_RENAME(uprv_stableBinarySearch) -#define uprv_strCompare U_ICU_ENTRY_POINT_RENAME(uprv_strCompare) -#define uprv_strdup U_ICU_ENTRY_POINT_RENAME(uprv_strdup) -#define uprv_stricmp U_ICU_ENTRY_POINT_RENAME(uprv_stricmp) -#define uprv_strndup U_ICU_ENTRY_POINT_RENAME(uprv_strndup) -#define uprv_strnicmp U_ICU_ENTRY_POINT_RENAME(uprv_strnicmp) -#define uprv_syntaxError U_ICU_ENTRY_POINT_RENAME(uprv_syntaxError) -#define uprv_timezone U_ICU_ENTRY_POINT_RENAME(uprv_timezone) -#define uprv_toupper U_ICU_ENTRY_POINT_RENAME(uprv_toupper) -#define uprv_trunc U_ICU_ENTRY_POINT_RENAME(uprv_trunc) -#define uprv_tzname U_ICU_ENTRY_POINT_RENAME(uprv_tzname) -#define uprv_tzset U_ICU_ENTRY_POINT_RENAME(uprv_tzset) -#define uprv_uca_addAnElement U_ICU_ENTRY_POINT_RENAME(uprv_uca_addAnElement) -#define uprv_uca_assembleTable U_ICU_ENTRY_POINT_RENAME(uprv_uca_assembleTable) -#define uprv_uca_canonicalClosure U_ICU_ENTRY_POINT_RENAME(uprv_uca_canonicalClosure) -#define uprv_uca_closeTempTable U_ICU_ENTRY_POINT_RENAME(uprv_uca_closeTempTable) -#define uprv_uca_getCodePointFromRaw U_ICU_ENTRY_POINT_RENAME(uprv_uca_getCodePointFromRaw) -#define uprv_uca_getImplicitFromRaw U_ICU_ENTRY_POINT_RENAME(uprv_uca_getImplicitFromRaw) -#define uprv_uca_getRawFromCodePoint U_ICU_ENTRY_POINT_RENAME(uprv_uca_getRawFromCodePoint) -#define uprv_uca_getRawFromImplicit U_ICU_ENTRY_POINT_RENAME(uprv_uca_getRawFromImplicit) -#define uprv_uca_initImplicitConstants U_ICU_ENTRY_POINT_RENAME(uprv_uca_initImplicitConstants) -#define uprv_uca_initTempTable U_ICU_ENTRY_POINT_RENAME(uprv_uca_initTempTable) -#define uprv_uint16Comparator U_ICU_ENTRY_POINT_RENAME(uprv_uint16Comparator) -#define uprv_uint32Comparator U_ICU_ENTRY_POINT_RENAME(uprv_uint32Comparator) -#define uprv_unmapFile U_ICU_ENTRY_POINT_RENAME(uprv_unmapFile) -#define upvec_cloneArray U_ICU_ENTRY_POINT_RENAME(upvec_cloneArray) -#define upvec_close U_ICU_ENTRY_POINT_RENAME(upvec_close) -#define upvec_compact U_ICU_ENTRY_POINT_RENAME(upvec_compact) -#define upvec_compactToUTrie2Handler U_ICU_ENTRY_POINT_RENAME(upvec_compactToUTrie2Handler) -#define upvec_compactToUTrie2WithRowIndexes U_ICU_ENTRY_POINT_RENAME(upvec_compactToUTrie2WithRowIndexes) -#define upvec_getArray U_ICU_ENTRY_POINT_RENAME(upvec_getArray) -#define upvec_getRow U_ICU_ENTRY_POINT_RENAME(upvec_getRow) -#define upvec_getValue U_ICU_ENTRY_POINT_RENAME(upvec_getValue) -#define upvec_open U_ICU_ENTRY_POINT_RENAME(upvec_open) -#define upvec_setValue U_ICU_ENTRY_POINT_RENAME(upvec_setValue) -#define uregex_appendReplacement U_ICU_ENTRY_POINT_RENAME(uregex_appendReplacement) -#define uregex_appendReplacementUText U_ICU_ENTRY_POINT_RENAME(uregex_appendReplacementUText) -#define uregex_appendTail U_ICU_ENTRY_POINT_RENAME(uregex_appendTail) -#define uregex_appendTailUText U_ICU_ENTRY_POINT_RENAME(uregex_appendTailUText) -#define uregex_clone U_ICU_ENTRY_POINT_RENAME(uregex_clone) -#define uregex_close U_ICU_ENTRY_POINT_RENAME(uregex_close) -#define uregex_end U_ICU_ENTRY_POINT_RENAME(uregex_end) -#define uregex_end64 U_ICU_ENTRY_POINT_RENAME(uregex_end64) -#define uregex_find U_ICU_ENTRY_POINT_RENAME(uregex_find) -#define uregex_find64 U_ICU_ENTRY_POINT_RENAME(uregex_find64) -#define uregex_findNext U_ICU_ENTRY_POINT_RENAME(uregex_findNext) -#define uregex_flags U_ICU_ENTRY_POINT_RENAME(uregex_flags) -#define uregex_getFindProgressCallback U_ICU_ENTRY_POINT_RENAME(uregex_getFindProgressCallback) -#define uregex_getMatchCallback U_ICU_ENTRY_POINT_RENAME(uregex_getMatchCallback) -#define uregex_getStackLimit U_ICU_ENTRY_POINT_RENAME(uregex_getStackLimit) -#define uregex_getText U_ICU_ENTRY_POINT_RENAME(uregex_getText) -#define uregex_getTimeLimit U_ICU_ENTRY_POINT_RENAME(uregex_getTimeLimit) -#define uregex_getUText U_ICU_ENTRY_POINT_RENAME(uregex_getUText) -#define uregex_group U_ICU_ENTRY_POINT_RENAME(uregex_group) -#define uregex_groupCount U_ICU_ENTRY_POINT_RENAME(uregex_groupCount) -#define uregex_groupUText U_ICU_ENTRY_POINT_RENAME(uregex_groupUText) -#define uregex_groupUTextDeep U_ICU_ENTRY_POINT_RENAME(uregex_groupUTextDeep) -#define uregex_hasAnchoringBounds U_ICU_ENTRY_POINT_RENAME(uregex_hasAnchoringBounds) -#define uregex_hasTransparentBounds U_ICU_ENTRY_POINT_RENAME(uregex_hasTransparentBounds) -#define uregex_hitEnd U_ICU_ENTRY_POINT_RENAME(uregex_hitEnd) -#define uregex_lookingAt U_ICU_ENTRY_POINT_RENAME(uregex_lookingAt) -#define uregex_lookingAt64 U_ICU_ENTRY_POINT_RENAME(uregex_lookingAt64) -#define uregex_matches U_ICU_ENTRY_POINT_RENAME(uregex_matches) -#define uregex_matches64 U_ICU_ENTRY_POINT_RENAME(uregex_matches64) -#define uregex_open U_ICU_ENTRY_POINT_RENAME(uregex_open) -#define uregex_openC U_ICU_ENTRY_POINT_RENAME(uregex_openC) -#define uregex_openUText U_ICU_ENTRY_POINT_RENAME(uregex_openUText) -#define uregex_pattern U_ICU_ENTRY_POINT_RENAME(uregex_pattern) -#define uregex_patternUText U_ICU_ENTRY_POINT_RENAME(uregex_patternUText) -#define uregex_refreshUText U_ICU_ENTRY_POINT_RENAME(uregex_refreshUText) -#define uregex_regionEnd U_ICU_ENTRY_POINT_RENAME(uregex_regionEnd) -#define uregex_regionEnd64 U_ICU_ENTRY_POINT_RENAME(uregex_regionEnd64) -#define uregex_regionStart U_ICU_ENTRY_POINT_RENAME(uregex_regionStart) -#define uregex_regionStart64 U_ICU_ENTRY_POINT_RENAME(uregex_regionStart64) -#define uregex_replaceAll U_ICU_ENTRY_POINT_RENAME(uregex_replaceAll) -#define uregex_replaceAllUText U_ICU_ENTRY_POINT_RENAME(uregex_replaceAllUText) -#define uregex_replaceFirst U_ICU_ENTRY_POINT_RENAME(uregex_replaceFirst) -#define uregex_replaceFirstUText U_ICU_ENTRY_POINT_RENAME(uregex_replaceFirstUText) -#define uregex_requireEnd U_ICU_ENTRY_POINT_RENAME(uregex_requireEnd) -#define uregex_reset U_ICU_ENTRY_POINT_RENAME(uregex_reset) -#define uregex_reset64 U_ICU_ENTRY_POINT_RENAME(uregex_reset64) -#define uregex_setFindProgressCallback U_ICU_ENTRY_POINT_RENAME(uregex_setFindProgressCallback) -#define uregex_setMatchCallback U_ICU_ENTRY_POINT_RENAME(uregex_setMatchCallback) -#define uregex_setRegion U_ICU_ENTRY_POINT_RENAME(uregex_setRegion) -#define uregex_setRegion64 U_ICU_ENTRY_POINT_RENAME(uregex_setRegion64) -#define uregex_setRegionAndStart U_ICU_ENTRY_POINT_RENAME(uregex_setRegionAndStart) -#define uregex_setStackLimit U_ICU_ENTRY_POINT_RENAME(uregex_setStackLimit) -#define uregex_setText U_ICU_ENTRY_POINT_RENAME(uregex_setText) -#define uregex_setTimeLimit U_ICU_ENTRY_POINT_RENAME(uregex_setTimeLimit) -#define uregex_setUText U_ICU_ENTRY_POINT_RENAME(uregex_setUText) -#define uregex_split U_ICU_ENTRY_POINT_RENAME(uregex_split) -#define uregex_splitUText U_ICU_ENTRY_POINT_RENAME(uregex_splitUText) -#define uregex_start U_ICU_ENTRY_POINT_RENAME(uregex_start) -#define uregex_start64 U_ICU_ENTRY_POINT_RENAME(uregex_start64) -#define uregex_ucstr_unescape_charAt U_ICU_ENTRY_POINT_RENAME(uregex_ucstr_unescape_charAt) -#define uregex_useAnchoringBounds U_ICU_ENTRY_POINT_RENAME(uregex_useAnchoringBounds) -#define uregex_useTransparentBounds U_ICU_ENTRY_POINT_RENAME(uregex_useTransparentBounds) -#define uregex_utext_unescape_charAt U_ICU_ENTRY_POINT_RENAME(uregex_utext_unescape_charAt) -#define uregion_areEqual U_ICU_ENTRY_POINT_RENAME(uregion_areEqual) -#define uregion_contains U_ICU_ENTRY_POINT_RENAME(uregion_contains) -#define uregion_getAvailable U_ICU_ENTRY_POINT_RENAME(uregion_getAvailable) -#define uregion_getContainedRegions U_ICU_ENTRY_POINT_RENAME(uregion_getContainedRegions) -#define uregion_getContainedRegionsOfType U_ICU_ENTRY_POINT_RENAME(uregion_getContainedRegionsOfType) -#define uregion_getContainingRegion U_ICU_ENTRY_POINT_RENAME(uregion_getContainingRegion) -#define uregion_getContainingRegionOfType U_ICU_ENTRY_POINT_RENAME(uregion_getContainingRegionOfType) -#define uregion_getNumericCode U_ICU_ENTRY_POINT_RENAME(uregion_getNumericCode) -#define uregion_getPreferredValues U_ICU_ENTRY_POINT_RENAME(uregion_getPreferredValues) -#define uregion_getRegionCode U_ICU_ENTRY_POINT_RENAME(uregion_getRegionCode) -#define uregion_getRegionFromCode U_ICU_ENTRY_POINT_RENAME(uregion_getRegionFromCode) -#define uregion_getRegionFromNumericCode U_ICU_ENTRY_POINT_RENAME(uregion_getRegionFromNumericCode) -#define uregion_getType U_ICU_ENTRY_POINT_RENAME(uregion_getType) -#define ures_close U_ICU_ENTRY_POINT_RENAME(ures_close) -#define ures_copyResb U_ICU_ENTRY_POINT_RENAME(ures_copyResb) -#define ures_countArrayItems U_ICU_ENTRY_POINT_RENAME(ures_countArrayItems) -#define ures_findResource U_ICU_ENTRY_POINT_RENAME(ures_findResource) -#define ures_findSubResource U_ICU_ENTRY_POINT_RENAME(ures_findSubResource) -#define ures_getBinary U_ICU_ENTRY_POINT_RENAME(ures_getBinary) -#define ures_getByIndex U_ICU_ENTRY_POINT_RENAME(ures_getByIndex) -#define ures_getByKey U_ICU_ENTRY_POINT_RENAME(ures_getByKey) -#define ures_getByKeyWithFallback U_ICU_ENTRY_POINT_RENAME(ures_getByKeyWithFallback) -#define ures_getFunctionalEquivalent U_ICU_ENTRY_POINT_RENAME(ures_getFunctionalEquivalent) -#define ures_getInt U_ICU_ENTRY_POINT_RENAME(ures_getInt) -#define ures_getIntVector U_ICU_ENTRY_POINT_RENAME(ures_getIntVector) -#define ures_getKey U_ICU_ENTRY_POINT_RENAME(ures_getKey) -#define ures_getKeywordValues U_ICU_ENTRY_POINT_RENAME(ures_getKeywordValues) -#define ures_getLocale U_ICU_ENTRY_POINT_RENAME(ures_getLocale) -#define ures_getLocaleByType U_ICU_ENTRY_POINT_RENAME(ures_getLocaleByType) -#define ures_getLocaleInternal U_ICU_ENTRY_POINT_RENAME(ures_getLocaleInternal) -#define ures_getName U_ICU_ENTRY_POINT_RENAME(ures_getName) -#define ures_getNextResource U_ICU_ENTRY_POINT_RENAME(ures_getNextResource) -#define ures_getNextString U_ICU_ENTRY_POINT_RENAME(ures_getNextString) -#define ures_getSize U_ICU_ENTRY_POINT_RENAME(ures_getSize) -#define ures_getString U_ICU_ENTRY_POINT_RENAME(ures_getString) -#define ures_getStringByIndex U_ICU_ENTRY_POINT_RENAME(ures_getStringByIndex) -#define ures_getStringByKey U_ICU_ENTRY_POINT_RENAME(ures_getStringByKey) -#define ures_getStringByKeyWithFallback U_ICU_ENTRY_POINT_RENAME(ures_getStringByKeyWithFallback) -#define ures_getType U_ICU_ENTRY_POINT_RENAME(ures_getType) -#define ures_getUInt U_ICU_ENTRY_POINT_RENAME(ures_getUInt) -#define ures_getUTF8String U_ICU_ENTRY_POINT_RENAME(ures_getUTF8String) -#define ures_getUTF8StringByIndex U_ICU_ENTRY_POINT_RENAME(ures_getUTF8StringByIndex) -#define ures_getUTF8StringByKey U_ICU_ENTRY_POINT_RENAME(ures_getUTF8StringByKey) -#define ures_getVersion U_ICU_ENTRY_POINT_RENAME(ures_getVersion) -#define ures_getVersionByKey U_ICU_ENTRY_POINT_RENAME(ures_getVersionByKey) -#define ures_getVersionNumber U_ICU_ENTRY_POINT_RENAME(ures_getVersionNumber) -#define ures_getVersionNumberInternal U_ICU_ENTRY_POINT_RENAME(ures_getVersionNumberInternal) -#define ures_hasNext U_ICU_ENTRY_POINT_RENAME(ures_hasNext) -#define ures_initStackObject U_ICU_ENTRY_POINT_RENAME(ures_initStackObject) -#define ures_open U_ICU_ENTRY_POINT_RENAME(ures_open) -#define ures_openAvailableLocales U_ICU_ENTRY_POINT_RENAME(ures_openAvailableLocales) -#define ures_openDirect U_ICU_ENTRY_POINT_RENAME(ures_openDirect) -#define ures_openFillIn U_ICU_ENTRY_POINT_RENAME(ures_openFillIn) -#define ures_openU U_ICU_ENTRY_POINT_RENAME(ures_openU) -#define ures_resetIterator U_ICU_ENTRY_POINT_RENAME(ures_resetIterator) -#define ures_swap U_ICU_ENTRY_POINT_RENAME(ures_swap) -#define uscript_breaksBetweenLetters U_ICU_ENTRY_POINT_RENAME(uscript_breaksBetweenLetters) -#define uscript_closeRun U_ICU_ENTRY_POINT_RENAME(uscript_closeRun) -#define uscript_getCode U_ICU_ENTRY_POINT_RENAME(uscript_getCode) -#define uscript_getName U_ICU_ENTRY_POINT_RENAME(uscript_getName) -#define uscript_getSampleString U_ICU_ENTRY_POINT_RENAME(uscript_getSampleString) -#define uscript_getSampleUnicodeString U_ICU_ENTRY_POINT_RENAME(uscript_getSampleUnicodeString) -#define uscript_getScript U_ICU_ENTRY_POINT_RENAME(uscript_getScript) -#define uscript_getScriptExtensions U_ICU_ENTRY_POINT_RENAME(uscript_getScriptExtensions) -#define uscript_getShortName U_ICU_ENTRY_POINT_RENAME(uscript_getShortName) -#define uscript_getUsage U_ICU_ENTRY_POINT_RENAME(uscript_getUsage) -#define uscript_hasScript U_ICU_ENTRY_POINT_RENAME(uscript_hasScript) -#define uscript_isCased U_ICU_ENTRY_POINT_RENAME(uscript_isCased) -#define uscript_isRightToLeft U_ICU_ENTRY_POINT_RENAME(uscript_isRightToLeft) -#define uscript_nextRun U_ICU_ENTRY_POINT_RENAME(uscript_nextRun) -#define uscript_openRun U_ICU_ENTRY_POINT_RENAME(uscript_openRun) -#define uscript_resetRun U_ICU_ENTRY_POINT_RENAME(uscript_resetRun) -#define uscript_setRunText U_ICU_ENTRY_POINT_RENAME(uscript_setRunText) -#define usearch_close U_ICU_ENTRY_POINT_RENAME(usearch_close) -#define usearch_first U_ICU_ENTRY_POINT_RENAME(usearch_first) -#define usearch_following U_ICU_ENTRY_POINT_RENAME(usearch_following) -#define usearch_getAttribute U_ICU_ENTRY_POINT_RENAME(usearch_getAttribute) -#define usearch_getBreakIterator U_ICU_ENTRY_POINT_RENAME(usearch_getBreakIterator) -#define usearch_getCollator U_ICU_ENTRY_POINT_RENAME(usearch_getCollator) -#define usearch_getMatchedLength U_ICU_ENTRY_POINT_RENAME(usearch_getMatchedLength) -#define usearch_getMatchedStart U_ICU_ENTRY_POINT_RENAME(usearch_getMatchedStart) -#define usearch_getMatchedText U_ICU_ENTRY_POINT_RENAME(usearch_getMatchedText) -#define usearch_getOffset U_ICU_ENTRY_POINT_RENAME(usearch_getOffset) -#define usearch_getPattern U_ICU_ENTRY_POINT_RENAME(usearch_getPattern) -#define usearch_getText U_ICU_ENTRY_POINT_RENAME(usearch_getText) -#define usearch_handleNextCanonical U_ICU_ENTRY_POINT_RENAME(usearch_handleNextCanonical) -#define usearch_handleNextExact U_ICU_ENTRY_POINT_RENAME(usearch_handleNextExact) -#define usearch_handlePreviousCanonical U_ICU_ENTRY_POINT_RENAME(usearch_handlePreviousCanonical) -#define usearch_handlePreviousExact U_ICU_ENTRY_POINT_RENAME(usearch_handlePreviousExact) -#define usearch_last U_ICU_ENTRY_POINT_RENAME(usearch_last) -#define usearch_next U_ICU_ENTRY_POINT_RENAME(usearch_next) -#define usearch_open U_ICU_ENTRY_POINT_RENAME(usearch_open) -#define usearch_openFromCollator U_ICU_ENTRY_POINT_RENAME(usearch_openFromCollator) -#define usearch_preceding U_ICU_ENTRY_POINT_RENAME(usearch_preceding) -#define usearch_previous U_ICU_ENTRY_POINT_RENAME(usearch_previous) -#define usearch_reset U_ICU_ENTRY_POINT_RENAME(usearch_reset) -#define usearch_search U_ICU_ENTRY_POINT_RENAME(usearch_search) -#define usearch_searchBackwards U_ICU_ENTRY_POINT_RENAME(usearch_searchBackwards) -#define usearch_setAttribute U_ICU_ENTRY_POINT_RENAME(usearch_setAttribute) -#define usearch_setBreakIterator U_ICU_ENTRY_POINT_RENAME(usearch_setBreakIterator) -#define usearch_setCollator U_ICU_ENTRY_POINT_RENAME(usearch_setCollator) -#define usearch_setOffset U_ICU_ENTRY_POINT_RENAME(usearch_setOffset) -#define usearch_setPattern U_ICU_ENTRY_POINT_RENAME(usearch_setPattern) -#define usearch_setText U_ICU_ENTRY_POINT_RENAME(usearch_setText) -#define uset_add U_ICU_ENTRY_POINT_RENAME(uset_add) -#define uset_addAll U_ICU_ENTRY_POINT_RENAME(uset_addAll) -#define uset_addAllCodePoints U_ICU_ENTRY_POINT_RENAME(uset_addAllCodePoints) -#define uset_addRange U_ICU_ENTRY_POINT_RENAME(uset_addRange) -#define uset_addString U_ICU_ENTRY_POINT_RENAME(uset_addString) -#define uset_applyIntPropertyValue U_ICU_ENTRY_POINT_RENAME(uset_applyIntPropertyValue) -#define uset_applyPattern U_ICU_ENTRY_POINT_RENAME(uset_applyPattern) -#define uset_applyPropertyAlias U_ICU_ENTRY_POINT_RENAME(uset_applyPropertyAlias) -#define uset_charAt U_ICU_ENTRY_POINT_RENAME(uset_charAt) -#define uset_clear U_ICU_ENTRY_POINT_RENAME(uset_clear) -#define uset_clone U_ICU_ENTRY_POINT_RENAME(uset_clone) -#define uset_cloneAsThawed U_ICU_ENTRY_POINT_RENAME(uset_cloneAsThawed) -#define uset_close U_ICU_ENTRY_POINT_RENAME(uset_close) -#define uset_closeOver U_ICU_ENTRY_POINT_RENAME(uset_closeOver) -#define uset_compact U_ICU_ENTRY_POINT_RENAME(uset_compact) -#define uset_complement U_ICU_ENTRY_POINT_RENAME(uset_complement) -#define uset_complementAll U_ICU_ENTRY_POINT_RENAME(uset_complementAll) -#define uset_contains U_ICU_ENTRY_POINT_RENAME(uset_contains) -#define uset_containsAll U_ICU_ENTRY_POINT_RENAME(uset_containsAll) -#define uset_containsAllCodePoints U_ICU_ENTRY_POINT_RENAME(uset_containsAllCodePoints) -#define uset_containsNone U_ICU_ENTRY_POINT_RENAME(uset_containsNone) -#define uset_containsRange U_ICU_ENTRY_POINT_RENAME(uset_containsRange) -#define uset_containsSome U_ICU_ENTRY_POINT_RENAME(uset_containsSome) -#define uset_containsString U_ICU_ENTRY_POINT_RENAME(uset_containsString) -#define uset_equals U_ICU_ENTRY_POINT_RENAME(uset_equals) -#define uset_freeze U_ICU_ENTRY_POINT_RENAME(uset_freeze) -#define uset_getItem U_ICU_ENTRY_POINT_RENAME(uset_getItem) -#define uset_getItemCount U_ICU_ENTRY_POINT_RENAME(uset_getItemCount) -#define uset_getSerializedRange U_ICU_ENTRY_POINT_RENAME(uset_getSerializedRange) -#define uset_getSerializedRangeCount U_ICU_ENTRY_POINT_RENAME(uset_getSerializedRangeCount) -#define uset_getSerializedSet U_ICU_ENTRY_POINT_RENAME(uset_getSerializedSet) -#define uset_indexOf U_ICU_ENTRY_POINT_RENAME(uset_indexOf) -#define uset_isEmpty U_ICU_ENTRY_POINT_RENAME(uset_isEmpty) -#define uset_isFrozen U_ICU_ENTRY_POINT_RENAME(uset_isFrozen) -#define uset_open U_ICU_ENTRY_POINT_RENAME(uset_open) -#define uset_openEmpty U_ICU_ENTRY_POINT_RENAME(uset_openEmpty) -#define uset_openPattern U_ICU_ENTRY_POINT_RENAME(uset_openPattern) -#define uset_openPatternOptions U_ICU_ENTRY_POINT_RENAME(uset_openPatternOptions) -#define uset_remove U_ICU_ENTRY_POINT_RENAME(uset_remove) -#define uset_removeAll U_ICU_ENTRY_POINT_RENAME(uset_removeAll) -#define uset_removeAllStrings U_ICU_ENTRY_POINT_RENAME(uset_removeAllStrings) -#define uset_removeRange U_ICU_ENTRY_POINT_RENAME(uset_removeRange) -#define uset_removeString U_ICU_ENTRY_POINT_RENAME(uset_removeString) -#define uset_resemblesPattern U_ICU_ENTRY_POINT_RENAME(uset_resemblesPattern) -#define uset_retain U_ICU_ENTRY_POINT_RENAME(uset_retain) -#define uset_retainAll U_ICU_ENTRY_POINT_RENAME(uset_retainAll) -#define uset_serialize U_ICU_ENTRY_POINT_RENAME(uset_serialize) -#define uset_serializedContains U_ICU_ENTRY_POINT_RENAME(uset_serializedContains) -#define uset_set U_ICU_ENTRY_POINT_RENAME(uset_set) -#define uset_setSerializedToOne U_ICU_ENTRY_POINT_RENAME(uset_setSerializedToOne) -#define uset_size U_ICU_ENTRY_POINT_RENAME(uset_size) -#define uset_span U_ICU_ENTRY_POINT_RENAME(uset_span) -#define uset_spanBack U_ICU_ENTRY_POINT_RENAME(uset_spanBack) -#define uset_spanBackUTF8 U_ICU_ENTRY_POINT_RENAME(uset_spanBackUTF8) -#define uset_spanUTF8 U_ICU_ENTRY_POINT_RENAME(uset_spanUTF8) -#define uset_toPattern U_ICU_ENTRY_POINT_RENAME(uset_toPattern) -#define uspoof_areConfusable U_ICU_ENTRY_POINT_RENAME(uspoof_areConfusable) -#define uspoof_areConfusableUTF8 U_ICU_ENTRY_POINT_RENAME(uspoof_areConfusableUTF8) -#define uspoof_areConfusableUnicodeString U_ICU_ENTRY_POINT_RENAME(uspoof_areConfusableUnicodeString) -#define uspoof_check U_ICU_ENTRY_POINT_RENAME(uspoof_check) -#define uspoof_checkUTF8 U_ICU_ENTRY_POINT_RENAME(uspoof_checkUTF8) -#define uspoof_checkUnicodeString U_ICU_ENTRY_POINT_RENAME(uspoof_checkUnicodeString) -#define uspoof_clone U_ICU_ENTRY_POINT_RENAME(uspoof_clone) -#define uspoof_close U_ICU_ENTRY_POINT_RENAME(uspoof_close) -#define uspoof_getAllowedChars U_ICU_ENTRY_POINT_RENAME(uspoof_getAllowedChars) -#define uspoof_getAllowedLocales U_ICU_ENTRY_POINT_RENAME(uspoof_getAllowedLocales) -#define uspoof_getAllowedUnicodeSet U_ICU_ENTRY_POINT_RENAME(uspoof_getAllowedUnicodeSet) -#define uspoof_getChecks U_ICU_ENTRY_POINT_RENAME(uspoof_getChecks) -#define uspoof_getInclusionSet U_ICU_ENTRY_POINT_RENAME(uspoof_getInclusionSet) -#define uspoof_getInclusionUnicodeSet U_ICU_ENTRY_POINT_RENAME(uspoof_getInclusionUnicodeSet) -#define uspoof_getRecommendedSet U_ICU_ENTRY_POINT_RENAME(uspoof_getRecommendedSet) -#define uspoof_getRecommendedUnicodeSet U_ICU_ENTRY_POINT_RENAME(uspoof_getRecommendedUnicodeSet) -#define uspoof_getRestrictionLevel U_ICU_ENTRY_POINT_RENAME(uspoof_getRestrictionLevel) -#define uspoof_getSkeleton U_ICU_ENTRY_POINT_RENAME(uspoof_getSkeleton) -#define uspoof_getSkeletonUTF8 U_ICU_ENTRY_POINT_RENAME(uspoof_getSkeletonUTF8) -#define uspoof_getSkeletonUnicodeString U_ICU_ENTRY_POINT_RENAME(uspoof_getSkeletonUnicodeString) -#define uspoof_open U_ICU_ENTRY_POINT_RENAME(uspoof_open) -#define uspoof_openFromSerialized U_ICU_ENTRY_POINT_RENAME(uspoof_openFromSerialized) -#define uspoof_openFromSource U_ICU_ENTRY_POINT_RENAME(uspoof_openFromSource) -#define uspoof_serialize U_ICU_ENTRY_POINT_RENAME(uspoof_serialize) -#define uspoof_setAllowedChars U_ICU_ENTRY_POINT_RENAME(uspoof_setAllowedChars) -#define uspoof_setAllowedLocales U_ICU_ENTRY_POINT_RENAME(uspoof_setAllowedLocales) -#define uspoof_setAllowedUnicodeSet U_ICU_ENTRY_POINT_RENAME(uspoof_setAllowedUnicodeSet) -#define uspoof_setChecks U_ICU_ENTRY_POINT_RENAME(uspoof_setChecks) -#define uspoof_setRestrictionLevel U_ICU_ENTRY_POINT_RENAME(uspoof_setRestrictionLevel) -#define uspoof_swap U_ICU_ENTRY_POINT_RENAME(uspoof_swap) -#define usprep_close U_ICU_ENTRY_POINT_RENAME(usprep_close) -#define usprep_open U_ICU_ENTRY_POINT_RENAME(usprep_open) -#define usprep_openByType U_ICU_ENTRY_POINT_RENAME(usprep_openByType) -#define usprep_prepare U_ICU_ENTRY_POINT_RENAME(usprep_prepare) -#define usprep_swap U_ICU_ENTRY_POINT_RENAME(usprep_swap) -#define ustr_hashCharsN U_ICU_ENTRY_POINT_RENAME(ustr_hashCharsN) -#define ustr_hashICharsN U_ICU_ENTRY_POINT_RENAME(ustr_hashICharsN) -#define ustr_hashUCharsN U_ICU_ENTRY_POINT_RENAME(ustr_hashUCharsN) -#define ustrcase_internalFold U_ICU_ENTRY_POINT_RENAME(ustrcase_internalFold) -#define ustrcase_internalToLower U_ICU_ENTRY_POINT_RENAME(ustrcase_internalToLower) -#define ustrcase_internalToTitle U_ICU_ENTRY_POINT_RENAME(ustrcase_internalToTitle) -#define ustrcase_internalToUpper U_ICU_ENTRY_POINT_RENAME(ustrcase_internalToUpper) -#define ustrcase_map U_ICU_ENTRY_POINT_RENAME(ustrcase_map) -#define ustrcase_setTempCaseMapLocale U_ICU_ENTRY_POINT_RENAME(ustrcase_setTempCaseMapLocale) -#define utext_char32At U_ICU_ENTRY_POINT_RENAME(utext_char32At) -#define utext_clone U_ICU_ENTRY_POINT_RENAME(utext_clone) -#define utext_close U_ICU_ENTRY_POINT_RENAME(utext_close) -#define utext_copy U_ICU_ENTRY_POINT_RENAME(utext_copy) -#define utext_current32 U_ICU_ENTRY_POINT_RENAME(utext_current32) -#define utext_equals U_ICU_ENTRY_POINT_RENAME(utext_equals) -#define utext_extract U_ICU_ENTRY_POINT_RENAME(utext_extract) -#define utext_freeze U_ICU_ENTRY_POINT_RENAME(utext_freeze) -#define utext_getNativeIndex U_ICU_ENTRY_POINT_RENAME(utext_getNativeIndex) -#define utext_getPreviousNativeIndex U_ICU_ENTRY_POINT_RENAME(utext_getPreviousNativeIndex) -#define utext_hasMetaData U_ICU_ENTRY_POINT_RENAME(utext_hasMetaData) -#define utext_isLengthExpensive U_ICU_ENTRY_POINT_RENAME(utext_isLengthExpensive) -#define utext_isWritable U_ICU_ENTRY_POINT_RENAME(utext_isWritable) -#define utext_moveIndex32 U_ICU_ENTRY_POINT_RENAME(utext_moveIndex32) -#define utext_nativeLength U_ICU_ENTRY_POINT_RENAME(utext_nativeLength) -#define utext_next32 U_ICU_ENTRY_POINT_RENAME(utext_next32) -#define utext_next32From U_ICU_ENTRY_POINT_RENAME(utext_next32From) -#define utext_openCharacterIterator U_ICU_ENTRY_POINT_RENAME(utext_openCharacterIterator) -#define utext_openConstUnicodeString U_ICU_ENTRY_POINT_RENAME(utext_openConstUnicodeString) -#define utext_openReplaceable U_ICU_ENTRY_POINT_RENAME(utext_openReplaceable) -#define utext_openUChars U_ICU_ENTRY_POINT_RENAME(utext_openUChars) -#define utext_openUTF8 U_ICU_ENTRY_POINT_RENAME(utext_openUTF8) -#define utext_openUnicodeString U_ICU_ENTRY_POINT_RENAME(utext_openUnicodeString) -#define utext_previous32 U_ICU_ENTRY_POINT_RENAME(utext_previous32) -#define utext_previous32From U_ICU_ENTRY_POINT_RENAME(utext_previous32From) -#define utext_replace U_ICU_ENTRY_POINT_RENAME(utext_replace) -#define utext_setNativeIndex U_ICU_ENTRY_POINT_RENAME(utext_setNativeIndex) -#define utext_setup U_ICU_ENTRY_POINT_RENAME(utext_setup) -#define utf8_appendCharSafeBody U_ICU_ENTRY_POINT_RENAME(utf8_appendCharSafeBody) -#define utf8_back1SafeBody U_ICU_ENTRY_POINT_RENAME(utf8_back1SafeBody) -#define utf8_countTrailBytes U_ICU_ENTRY_POINT_RENAME(utf8_countTrailBytes) -#define utf8_nextCharSafeBody U_ICU_ENTRY_POINT_RENAME(utf8_nextCharSafeBody) -#define utf8_prevCharSafeBody U_ICU_ENTRY_POINT_RENAME(utf8_prevCharSafeBody) -#define utmscale_fromInt64 U_ICU_ENTRY_POINT_RENAME(utmscale_fromInt64) -#define utmscale_getTimeScaleValue U_ICU_ENTRY_POINT_RENAME(utmscale_getTimeScaleValue) -#define utmscale_toInt64 U_ICU_ENTRY_POINT_RENAME(utmscale_toInt64) -#define utrace_cleanup U_ICU_ENTRY_POINT_RENAME(utrace_cleanup) -#define utrace_data U_ICU_ENTRY_POINT_RENAME(utrace_data) -#define utrace_entry U_ICU_ENTRY_POINT_RENAME(utrace_entry) -#define utrace_exit U_ICU_ENTRY_POINT_RENAME(utrace_exit) -#define utrace_format U_ICU_ENTRY_POINT_RENAME(utrace_format) -#define utrace_functionName U_ICU_ENTRY_POINT_RENAME(utrace_functionName) -#define utrace_getFunctions U_ICU_ENTRY_POINT_RENAME(utrace_getFunctions) -#define utrace_getLevel U_ICU_ENTRY_POINT_RENAME(utrace_getLevel) -#define utrace_level U_ICU_ENTRY_POINT_RENAME(utrace_level) -#define utrace_setFunctions U_ICU_ENTRY_POINT_RENAME(utrace_setFunctions) -#define utrace_setLevel U_ICU_ENTRY_POINT_RENAME(utrace_setLevel) -#define utrace_vformat U_ICU_ENTRY_POINT_RENAME(utrace_vformat) -#define utrans_clone U_ICU_ENTRY_POINT_RENAME(utrans_clone) -#define utrans_close U_ICU_ENTRY_POINT_RENAME(utrans_close) -#define utrans_countAvailableIDs U_ICU_ENTRY_POINT_RENAME(utrans_countAvailableIDs) -#define utrans_getAvailableID U_ICU_ENTRY_POINT_RENAME(utrans_getAvailableID) -#define utrans_getID U_ICU_ENTRY_POINT_RENAME(utrans_getID) -#define utrans_getUnicodeID U_ICU_ENTRY_POINT_RENAME(utrans_getUnicodeID) -#define utrans_open U_ICU_ENTRY_POINT_RENAME(utrans_open) -#define utrans_openIDs U_ICU_ENTRY_POINT_RENAME(utrans_openIDs) -#define utrans_openInverse U_ICU_ENTRY_POINT_RENAME(utrans_openInverse) -#define utrans_openU U_ICU_ENTRY_POINT_RENAME(utrans_openU) -#define utrans_register U_ICU_ENTRY_POINT_RENAME(utrans_register) -#define utrans_rep_caseContextIterator U_ICU_ENTRY_POINT_RENAME(utrans_rep_caseContextIterator) -#define utrans_setFilter U_ICU_ENTRY_POINT_RENAME(utrans_setFilter) -#define utrans_stripRules U_ICU_ENTRY_POINT_RENAME(utrans_stripRules) -#define utrans_trans U_ICU_ENTRY_POINT_RENAME(utrans_trans) -#define utrans_transIncremental U_ICU_ENTRY_POINT_RENAME(utrans_transIncremental) -#define utrans_transIncrementalUChars U_ICU_ENTRY_POINT_RENAME(utrans_transIncrementalUChars) -#define utrans_transUChars U_ICU_ENTRY_POINT_RENAME(utrans_transUChars) -#define utrans_transliterator_cleanup U_ICU_ENTRY_POINT_RENAME(utrans_transliterator_cleanup) -#define utrans_unregister U_ICU_ENTRY_POINT_RENAME(utrans_unregister) -#define utrans_unregisterID U_ICU_ENTRY_POINT_RENAME(utrans_unregisterID) -#define utrie2_clone U_ICU_ENTRY_POINT_RENAME(utrie2_clone) -#define utrie2_cloneAsThawed U_ICU_ENTRY_POINT_RENAME(utrie2_cloneAsThawed) -#define utrie2_close U_ICU_ENTRY_POINT_RENAME(utrie2_close) -#define utrie2_enum U_ICU_ENTRY_POINT_RENAME(utrie2_enum) -#define utrie2_enumForLeadSurrogate U_ICU_ENTRY_POINT_RENAME(utrie2_enumForLeadSurrogate) -#define utrie2_freeze U_ICU_ENTRY_POINT_RENAME(utrie2_freeze) -#define utrie2_fromUTrie U_ICU_ENTRY_POINT_RENAME(utrie2_fromUTrie) -#define utrie2_get32 U_ICU_ENTRY_POINT_RENAME(utrie2_get32) -#define utrie2_get32FromLeadSurrogateCodeUnit U_ICU_ENTRY_POINT_RENAME(utrie2_get32FromLeadSurrogateCodeUnit) -#define utrie2_getVersion U_ICU_ENTRY_POINT_RENAME(utrie2_getVersion) -#define utrie2_internalU8NextIndex U_ICU_ENTRY_POINT_RENAME(utrie2_internalU8NextIndex) -#define utrie2_internalU8PrevIndex U_ICU_ENTRY_POINT_RENAME(utrie2_internalU8PrevIndex) -#define utrie2_isFrozen U_ICU_ENTRY_POINT_RENAME(utrie2_isFrozen) -#define utrie2_open U_ICU_ENTRY_POINT_RENAME(utrie2_open) -#define utrie2_openDummy U_ICU_ENTRY_POINT_RENAME(utrie2_openDummy) -#define utrie2_openFromSerialized U_ICU_ENTRY_POINT_RENAME(utrie2_openFromSerialized) -#define utrie2_serialize U_ICU_ENTRY_POINT_RENAME(utrie2_serialize) -#define utrie2_set32 U_ICU_ENTRY_POINT_RENAME(utrie2_set32) -#define utrie2_set32ForLeadSurrogateCodeUnit U_ICU_ENTRY_POINT_RENAME(utrie2_set32ForLeadSurrogateCodeUnit) -#define utrie2_setRange32 U_ICU_ENTRY_POINT_RENAME(utrie2_setRange32) -#define utrie2_swap U_ICU_ENTRY_POINT_RENAME(utrie2_swap) -#define utrie2_swapAnyVersion U_ICU_ENTRY_POINT_RENAME(utrie2_swapAnyVersion) -#define utrie_clone U_ICU_ENTRY_POINT_RENAME(utrie_clone) -#define utrie_close U_ICU_ENTRY_POINT_RENAME(utrie_close) -#define utrie_defaultGetFoldingOffset U_ICU_ENTRY_POINT_RENAME(utrie_defaultGetFoldingOffset) -#define utrie_enum U_ICU_ENTRY_POINT_RENAME(utrie_enum) -#define utrie_get32 U_ICU_ENTRY_POINT_RENAME(utrie_get32) -#define utrie_getData U_ICU_ENTRY_POINT_RENAME(utrie_getData) -#define utrie_open U_ICU_ENTRY_POINT_RENAME(utrie_open) -#define utrie_serialize U_ICU_ENTRY_POINT_RENAME(utrie_serialize) -#define utrie_set32 U_ICU_ENTRY_POINT_RENAME(utrie_set32) -#define utrie_setRange32 U_ICU_ENTRY_POINT_RENAME(utrie_setRange32) -#define utrie_swap U_ICU_ENTRY_POINT_RENAME(utrie_swap) -#define utrie_unserialize U_ICU_ENTRY_POINT_RENAME(utrie_unserialize) -#define utrie_unserializeDummy U_ICU_ENTRY_POINT_RENAME(utrie_unserializeDummy) -#define vzone_clone U_ICU_ENTRY_POINT_RENAME(vzone_clone) -#define vzone_close U_ICU_ENTRY_POINT_RENAME(vzone_close) -#define vzone_countTransitionRules U_ICU_ENTRY_POINT_RENAME(vzone_countTransitionRules) -#define vzone_equals U_ICU_ENTRY_POINT_RENAME(vzone_equals) -#define vzone_getDynamicClassID U_ICU_ENTRY_POINT_RENAME(vzone_getDynamicClassID) -#define vzone_getLastModified U_ICU_ENTRY_POINT_RENAME(vzone_getLastModified) -#define vzone_getNextTransition U_ICU_ENTRY_POINT_RENAME(vzone_getNextTransition) -#define vzone_getOffset U_ICU_ENTRY_POINT_RENAME(vzone_getOffset) -#define vzone_getOffset2 U_ICU_ENTRY_POINT_RENAME(vzone_getOffset2) -#define vzone_getOffset3 U_ICU_ENTRY_POINT_RENAME(vzone_getOffset3) -#define vzone_getPreviousTransition U_ICU_ENTRY_POINT_RENAME(vzone_getPreviousTransition) -#define vzone_getRawOffset U_ICU_ENTRY_POINT_RENAME(vzone_getRawOffset) -#define vzone_getStaticClassID U_ICU_ENTRY_POINT_RENAME(vzone_getStaticClassID) -#define vzone_getTZURL U_ICU_ENTRY_POINT_RENAME(vzone_getTZURL) -#define vzone_hasSameRules U_ICU_ENTRY_POINT_RENAME(vzone_hasSameRules) -#define vzone_inDaylightTime U_ICU_ENTRY_POINT_RENAME(vzone_inDaylightTime) -#define vzone_openData U_ICU_ENTRY_POINT_RENAME(vzone_openData) -#define vzone_openID U_ICU_ENTRY_POINT_RENAME(vzone_openID) -#define vzone_setLastModified U_ICU_ENTRY_POINT_RENAME(vzone_setLastModified) -#define vzone_setRawOffset U_ICU_ENTRY_POINT_RENAME(vzone_setRawOffset) -#define vzone_setTZURL U_ICU_ENTRY_POINT_RENAME(vzone_setTZURL) -#define vzone_useDaylightTime U_ICU_ENTRY_POINT_RENAME(vzone_useDaylightTime) -#define vzone_write U_ICU_ENTRY_POINT_RENAME(vzone_write) -#define vzone_writeFromStart U_ICU_ENTRY_POINT_RENAME(vzone_writeFromStart) -#define vzone_writeSimple U_ICU_ENTRY_POINT_RENAME(vzone_writeSimple) -#define zrule_close U_ICU_ENTRY_POINT_RENAME(zrule_close) -#define zrule_equals U_ICU_ENTRY_POINT_RENAME(zrule_equals) -#define zrule_getDSTSavings U_ICU_ENTRY_POINT_RENAME(zrule_getDSTSavings) -#define zrule_getName U_ICU_ENTRY_POINT_RENAME(zrule_getName) -#define zrule_getRawOffset U_ICU_ENTRY_POINT_RENAME(zrule_getRawOffset) -#define zrule_isEquivalentTo U_ICU_ENTRY_POINT_RENAME(zrule_isEquivalentTo) -#define ztrans_adoptFrom U_ICU_ENTRY_POINT_RENAME(ztrans_adoptFrom) -#define ztrans_adoptTo U_ICU_ENTRY_POINT_RENAME(ztrans_adoptTo) -#define ztrans_clone U_ICU_ENTRY_POINT_RENAME(ztrans_clone) -#define ztrans_close U_ICU_ENTRY_POINT_RENAME(ztrans_close) -#define ztrans_equals U_ICU_ENTRY_POINT_RENAME(ztrans_equals) -#define ztrans_getDynamicClassID U_ICU_ENTRY_POINT_RENAME(ztrans_getDynamicClassID) -#define ztrans_getFrom U_ICU_ENTRY_POINT_RENAME(ztrans_getFrom) -#define ztrans_getStaticClassID U_ICU_ENTRY_POINT_RENAME(ztrans_getStaticClassID) -#define ztrans_getTime U_ICU_ENTRY_POINT_RENAME(ztrans_getTime) -#define ztrans_getTo U_ICU_ENTRY_POINT_RENAME(ztrans_getTo) -#define ztrans_open U_ICU_ENTRY_POINT_RENAME(ztrans_open) -#define ztrans_openEmpty U_ICU_ENTRY_POINT_RENAME(ztrans_openEmpty) -#define ztrans_setFrom U_ICU_ENTRY_POINT_RENAME(ztrans_setFrom) -#define ztrans_setTime U_ICU_ENTRY_POINT_RENAME(ztrans_setTime) -#define ztrans_setTo U_ICU_ENTRY_POINT_RENAME(ztrans_setTo) - -#endif - -#endif diff --git a/Source/WTF/icu/unicode/uscript.h b/Source/WTF/icu/unicode/uscript.h deleted file mode 100644 index 57255c4f9..000000000 --- a/Source/WTF/icu/unicode/uscript.h +++ /dev/null @@ -1,627 +0,0 @@ -/* - ********************************************************************** - * Copyright (C) 1997-2013, International Business Machines - * Corporation and others. All Rights Reserved. - ********************************************************************** - * - * File USCRIPT.H - * - * Modification History: - * - * Date Name Description - * 07/06/2001 Ram Creation. - ****************************************************************************** - */ - -#ifndef USCRIPT_H -#define USCRIPT_H -#include "unicode/utypes.h" - -/** - * \file - * \brief C API: Unicode Script Information - */ - -/** - * Constants for ISO 15924 script codes. - * - * Many of these script codes - those from Unicode's ScriptNames.txt - - * are character property values for Unicode's Script property. - * See UAX #24 Script Names (http://www.unicode.org/reports/tr24/). - * - * Starting with ICU 3.6, constants for most ISO 15924 script codes - * are included (currently excluding private-use codes Qaaa..Qabx). - * For scripts for which there are codes in ISO 15924 but which are not - * used in the Unicode Character Database (UCD), there are no Unicode characters - * associated with those scripts. - * - * For example, there are no characters that have a UCD script code of - * Hans or Hant. All Han ideographs have the Hani script code. - * The Hans and Hant script codes are used with CLDR data. - * - * ISO 15924 script codes are included for use with CLDR and similar. - * - * @stable ICU 2.2 - */ -typedef enum UScriptCode { - /* - * Note: UScriptCode constants and their ISO script code comments - * are parsed by preparseucd.py. - * It matches lines like - * USCRIPT_<Unicode Script value name> = <integer>, / * <ISO script code> * / - */ - - /** @stable ICU 2.2 */ - USCRIPT_INVALID_CODE = -1, - /** @stable ICU 2.2 */ - USCRIPT_COMMON = 0, /* Zyyy */ - /** @stable ICU 2.2 */ - USCRIPT_INHERITED = 1, /* Zinh */ /* "Code for inherited script", for non-spacing combining marks; also Qaai */ - /** @stable ICU 2.2 */ - USCRIPT_ARABIC = 2, /* Arab */ - /** @stable ICU 2.2 */ - USCRIPT_ARMENIAN = 3, /* Armn */ - /** @stable ICU 2.2 */ - USCRIPT_BENGALI = 4, /* Beng */ - /** @stable ICU 2.2 */ - USCRIPT_BOPOMOFO = 5, /* Bopo */ - /** @stable ICU 2.2 */ - USCRIPT_CHEROKEE = 6, /* Cher */ - /** @stable ICU 2.2 */ - USCRIPT_COPTIC = 7, /* Copt */ - /** @stable ICU 2.2 */ - USCRIPT_CYRILLIC = 8, /* Cyrl */ - /** @stable ICU 2.2 */ - USCRIPT_DESERET = 9, /* Dsrt */ - /** @stable ICU 2.2 */ - USCRIPT_DEVANAGARI = 10, /* Deva */ - /** @stable ICU 2.2 */ - USCRIPT_ETHIOPIC = 11, /* Ethi */ - /** @stable ICU 2.2 */ - USCRIPT_GEORGIAN = 12, /* Geor */ - /** @stable ICU 2.2 */ - USCRIPT_GOTHIC = 13, /* Goth */ - /** @stable ICU 2.2 */ - USCRIPT_GREEK = 14, /* Grek */ - /** @stable ICU 2.2 */ - USCRIPT_GUJARATI = 15, /* Gujr */ - /** @stable ICU 2.2 */ - USCRIPT_GURMUKHI = 16, /* Guru */ - /** @stable ICU 2.2 */ - USCRIPT_HAN = 17, /* Hani */ - /** @stable ICU 2.2 */ - USCRIPT_HANGUL = 18, /* Hang */ - /** @stable ICU 2.2 */ - USCRIPT_HEBREW = 19, /* Hebr */ - /** @stable ICU 2.2 */ - USCRIPT_HIRAGANA = 20, /* Hira */ - /** @stable ICU 2.2 */ - USCRIPT_KANNADA = 21, /* Knda */ - /** @stable ICU 2.2 */ - USCRIPT_KATAKANA = 22, /* Kana */ - /** @stable ICU 2.2 */ - USCRIPT_KHMER = 23, /* Khmr */ - /** @stable ICU 2.2 */ - USCRIPT_LAO = 24, /* Laoo */ - /** @stable ICU 2.2 */ - USCRIPT_LATIN = 25, /* Latn */ - /** @stable ICU 2.2 */ - USCRIPT_MALAYALAM = 26, /* Mlym */ - /** @stable ICU 2.2 */ - USCRIPT_MONGOLIAN = 27, /* Mong */ - /** @stable ICU 2.2 */ - USCRIPT_MYANMAR = 28, /* Mymr */ - /** @stable ICU 2.2 */ - USCRIPT_OGHAM = 29, /* Ogam */ - /** @stable ICU 2.2 */ - USCRIPT_OLD_ITALIC = 30, /* Ital */ - /** @stable ICU 2.2 */ - USCRIPT_ORIYA = 31, /* Orya */ - /** @stable ICU 2.2 */ - USCRIPT_RUNIC = 32, /* Runr */ - /** @stable ICU 2.2 */ - USCRIPT_SINHALA = 33, /* Sinh */ - /** @stable ICU 2.2 */ - USCRIPT_SYRIAC = 34, /* Syrc */ - /** @stable ICU 2.2 */ - USCRIPT_TAMIL = 35, /* Taml */ - /** @stable ICU 2.2 */ - USCRIPT_TELUGU = 36, /* Telu */ - /** @stable ICU 2.2 */ - USCRIPT_THAANA = 37, /* Thaa */ - /** @stable ICU 2.2 */ - USCRIPT_THAI = 38, /* Thai */ - /** @stable ICU 2.2 */ - USCRIPT_TIBETAN = 39, /* Tibt */ - /** Canadian_Aboriginal script. @stable ICU 2.6 */ - USCRIPT_CANADIAN_ABORIGINAL = 40, /* Cans */ - /** Canadian_Aboriginal script (alias). @stable ICU 2.2 */ - USCRIPT_UCAS = USCRIPT_CANADIAN_ABORIGINAL, - /** @stable ICU 2.2 */ - USCRIPT_YI = 41, /* Yiii */ - /* New scripts in Unicode 3.2 */ - /** @stable ICU 2.2 */ - USCRIPT_TAGALOG = 42, /* Tglg */ - /** @stable ICU 2.2 */ - USCRIPT_HANUNOO = 43, /* Hano */ - /** @stable ICU 2.2 */ - USCRIPT_BUHID = 44, /* Buhd */ - /** @stable ICU 2.2 */ - USCRIPT_TAGBANWA = 45, /* Tagb */ - - /* New scripts in Unicode 4 */ - /** @stable ICU 2.6 */ - USCRIPT_BRAILLE = 46, /* Brai */ - /** @stable ICU 2.6 */ - USCRIPT_CYPRIOT = 47, /* Cprt */ - /** @stable ICU 2.6 */ - USCRIPT_LIMBU = 48, /* Limb */ - /** @stable ICU 2.6 */ - USCRIPT_LINEAR_B = 49, /* Linb */ - /** @stable ICU 2.6 */ - USCRIPT_OSMANYA = 50, /* Osma */ - /** @stable ICU 2.6 */ - USCRIPT_SHAVIAN = 51, /* Shaw */ - /** @stable ICU 2.6 */ - USCRIPT_TAI_LE = 52, /* Tale */ - /** @stable ICU 2.6 */ - USCRIPT_UGARITIC = 53, /* Ugar */ - - /** New script code in Unicode 4.0.1 @stable ICU 3.0 */ - USCRIPT_KATAKANA_OR_HIRAGANA = 54,/*Hrkt */ - - /* New scripts in Unicode 4.1 */ - /** @stable ICU 3.4 */ - USCRIPT_BUGINESE = 55, /* Bugi */ - /** @stable ICU 3.4 */ - USCRIPT_GLAGOLITIC = 56, /* Glag */ - /** @stable ICU 3.4 */ - USCRIPT_KHAROSHTHI = 57, /* Khar */ - /** @stable ICU 3.4 */ - USCRIPT_SYLOTI_NAGRI = 58, /* Sylo */ - /** @stable ICU 3.4 */ - USCRIPT_NEW_TAI_LUE = 59, /* Talu */ - /** @stable ICU 3.4 */ - USCRIPT_TIFINAGH = 60, /* Tfng */ - /** @stable ICU 3.4 */ - USCRIPT_OLD_PERSIAN = 61, /* Xpeo */ - - /* New script codes from ISO 15924 */ - /** @stable ICU 3.6 */ - USCRIPT_BALINESE = 62, /* Bali */ - /** @stable ICU 3.6 */ - USCRIPT_BATAK = 63, /* Batk */ - /** @stable ICU 3.6 */ - USCRIPT_BLISSYMBOLS = 64, /* Blis */ - /** @stable ICU 3.6 */ - USCRIPT_BRAHMI = 65, /* Brah */ - /** @stable ICU 3.6 */ - USCRIPT_CHAM = 66, /* Cham */ - /** @stable ICU 3.6 */ - USCRIPT_CIRTH = 67, /* Cirt */ - /** @stable ICU 3.6 */ - USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC = 68, /* Cyrs */ - /** @stable ICU 3.6 */ - USCRIPT_DEMOTIC_EGYPTIAN = 69, /* Egyd */ - /** @stable ICU 3.6 */ - USCRIPT_HIERATIC_EGYPTIAN = 70, /* Egyh */ - /** @stable ICU 3.6 */ - USCRIPT_EGYPTIAN_HIEROGLYPHS = 71, /* Egyp */ - /** @stable ICU 3.6 */ - USCRIPT_KHUTSURI = 72, /* Geok */ - /** @stable ICU 3.6 */ - USCRIPT_SIMPLIFIED_HAN = 73, /* Hans */ - /** @stable ICU 3.6 */ - USCRIPT_TRADITIONAL_HAN = 74, /* Hant */ - /** @stable ICU 3.6 */ - USCRIPT_PAHAWH_HMONG = 75, /* Hmng */ - /** @stable ICU 3.6 */ - USCRIPT_OLD_HUNGARIAN = 76, /* Hung */ - /** @stable ICU 3.6 */ - USCRIPT_HARAPPAN_INDUS = 77, /* Inds */ - /** @stable ICU 3.6 */ - USCRIPT_JAVANESE = 78, /* Java */ - /** @stable ICU 3.6 */ - USCRIPT_KAYAH_LI = 79, /* Kali */ - /** @stable ICU 3.6 */ - USCRIPT_LATIN_FRAKTUR = 80, /* Latf */ - /** @stable ICU 3.6 */ - USCRIPT_LATIN_GAELIC = 81, /* Latg */ - /** @stable ICU 3.6 */ - USCRIPT_LEPCHA = 82, /* Lepc */ - /** @stable ICU 3.6 */ - USCRIPT_LINEAR_A = 83, /* Lina */ - /** @stable ICU 4.6 */ - USCRIPT_MANDAIC = 84, /* Mand */ - /** @stable ICU 3.6 */ - USCRIPT_MANDAEAN = USCRIPT_MANDAIC, - /** @stable ICU 3.6 */ - USCRIPT_MAYAN_HIEROGLYPHS = 85, /* Maya */ - /** @stable ICU 4.6 */ - USCRIPT_MEROITIC_HIEROGLYPHS = 86, /* Mero */ - /** @stable ICU 3.6 */ - USCRIPT_MEROITIC = USCRIPT_MEROITIC_HIEROGLYPHS, - /** @stable ICU 3.6 */ - USCRIPT_NKO = 87, /* Nkoo */ - /** @stable ICU 3.6 */ - USCRIPT_ORKHON = 88, /* Orkh */ - /** @stable ICU 3.6 */ - USCRIPT_OLD_PERMIC = 89, /* Perm */ - /** @stable ICU 3.6 */ - USCRIPT_PHAGS_PA = 90, /* Phag */ - /** @stable ICU 3.6 */ - USCRIPT_PHOENICIAN = 91, /* Phnx */ - /** @stable ICU 52 */ - USCRIPT_MIAO = 92, /* Plrd */ - /** @stable ICU 3.6 */ - USCRIPT_PHONETIC_POLLARD = USCRIPT_MIAO, - /** @stable ICU 3.6 */ - USCRIPT_RONGORONGO = 93, /* Roro */ - /** @stable ICU 3.6 */ - USCRIPT_SARATI = 94, /* Sara */ - /** @stable ICU 3.6 */ - USCRIPT_ESTRANGELO_SYRIAC = 95, /* Syre */ - /** @stable ICU 3.6 */ - USCRIPT_WESTERN_SYRIAC = 96, /* Syrj */ - /** @stable ICU 3.6 */ - USCRIPT_EASTERN_SYRIAC = 97, /* Syrn */ - /** @stable ICU 3.6 */ - USCRIPT_TENGWAR = 98, /* Teng */ - /** @stable ICU 3.6 */ - USCRIPT_VAI = 99, /* Vaii */ - /** @stable ICU 3.6 */ - USCRIPT_VISIBLE_SPEECH = 100,/* Visp */ - /** @stable ICU 3.6 */ - USCRIPT_CUNEIFORM = 101,/* Xsux */ - /** @stable ICU 3.6 */ - USCRIPT_UNWRITTEN_LANGUAGES = 102,/* Zxxx */ - /** @stable ICU 3.6 */ - USCRIPT_UNKNOWN = 103,/* Zzzz */ /* Unknown="Code for uncoded script", for unassigned code points */ - - /* New script codes from ISO 15924 */ - /** @stable ICU 3.8 */ - USCRIPT_CARIAN = 104,/* Cari */ - /** @stable ICU 3.8 */ - USCRIPT_JAPANESE = 105,/* Jpan */ - /** @stable ICU 3.8 */ - USCRIPT_LANNA = 106,/* Lana */ - /** @stable ICU 3.8 */ - USCRIPT_LYCIAN = 107,/* Lyci */ - /** @stable ICU 3.8 */ - USCRIPT_LYDIAN = 108,/* Lydi */ - /** @stable ICU 3.8 */ - USCRIPT_OL_CHIKI = 109,/* Olck */ - /** @stable ICU 3.8 */ - USCRIPT_REJANG = 110,/* Rjng */ - /** @stable ICU 3.8 */ - USCRIPT_SAURASHTRA = 111,/* Saur */ - /** @stable ICU 3.8 */ - USCRIPT_SIGN_WRITING = 112,/* Sgnw */ - /** @stable ICU 3.8 */ - USCRIPT_SUNDANESE = 113,/* Sund */ - /** @stable ICU 3.8 */ - USCRIPT_MOON = 114,/* Moon */ - /** @stable ICU 3.8 */ - USCRIPT_MEITEI_MAYEK = 115,/* Mtei */ - - /* New script codes from ISO 15924 */ - /** @stable ICU 4.0 */ - USCRIPT_IMPERIAL_ARAMAIC = 116,/* Armi */ - /** @stable ICU 4.0 */ - USCRIPT_AVESTAN = 117,/* Avst */ - /** @stable ICU 4.0 */ - USCRIPT_CHAKMA = 118,/* Cakm */ - /** @stable ICU 4.0 */ - USCRIPT_KOREAN = 119,/* Kore */ - /** @stable ICU 4.0 */ - USCRIPT_KAITHI = 120,/* Kthi */ - /** @stable ICU 4.0 */ - USCRIPT_MANICHAEAN = 121,/* Mani */ - /** @stable ICU 4.0 */ - USCRIPT_INSCRIPTIONAL_PAHLAVI = 122,/* Phli */ - /** @stable ICU 4.0 */ - USCRIPT_PSALTER_PAHLAVI = 123,/* Phlp */ - /** @stable ICU 4.0 */ - USCRIPT_BOOK_PAHLAVI = 124,/* Phlv */ - /** @stable ICU 4.0 */ - USCRIPT_INSCRIPTIONAL_PARTHIAN = 125,/* Prti */ - /** @stable ICU 4.0 */ - USCRIPT_SAMARITAN = 126,/* Samr */ - /** @stable ICU 4.0 */ - USCRIPT_TAI_VIET = 127,/* Tavt */ - /** @stable ICU 4.0 */ - USCRIPT_MATHEMATICAL_NOTATION = 128,/* Zmth */ - /** @stable ICU 4.0 */ - USCRIPT_SYMBOLS = 129,/* Zsym */ - - /* New script codes from ISO 15924 */ - /** @stable ICU 4.4 */ - USCRIPT_BAMUM = 130,/* Bamu */ - /** @stable ICU 4.4 */ - USCRIPT_LISU = 131,/* Lisu */ - /** @stable ICU 4.4 */ - USCRIPT_NAKHI_GEBA = 132,/* Nkgb */ - /** @stable ICU 4.4 */ - USCRIPT_OLD_SOUTH_ARABIAN = 133,/* Sarb */ - - /* New script codes from ISO 15924 */ - /** @stable ICU 4.6 */ - USCRIPT_BASSA_VAH = 134,/* Bass */ - /** @stable ICU 4.6 */ - USCRIPT_DUPLOYAN_SHORTAND = 135,/* Dupl */ - /** @stable ICU 4.6 */ - USCRIPT_ELBASAN = 136,/* Elba */ - /** @stable ICU 4.6 */ - USCRIPT_GRANTHA = 137,/* Gran */ - /** @stable ICU 4.6 */ - USCRIPT_KPELLE = 138,/* Kpel */ - /** @stable ICU 4.6 */ - USCRIPT_LOMA = 139,/* Loma */ - /** @stable ICU 4.6 */ - USCRIPT_MENDE = 140,/* Mend */ - /** @stable ICU 4.6 */ - USCRIPT_MEROITIC_CURSIVE = 141,/* Merc */ - /** @stable ICU 4.6 */ - USCRIPT_OLD_NORTH_ARABIAN = 142,/* Narb */ - /** @stable ICU 4.6 */ - USCRIPT_NABATAEAN = 143,/* Nbat */ - /** @stable ICU 4.6 */ - USCRIPT_PALMYRENE = 144,/* Palm */ - /** @stable ICU 4.6 */ - USCRIPT_SINDHI = 145,/* Sind */ - /** @stable ICU 4.6 */ - USCRIPT_WARANG_CITI = 146,/* Wara */ - - /** @stable ICU 4.8 */ - USCRIPT_AFAKA = 147,/* Afak */ - /** @stable ICU 4.8 */ - USCRIPT_JURCHEN = 148,/* Jurc */ - /** @stable ICU 4.8 */ - USCRIPT_MRO = 149,/* Mroo */ - /** @stable ICU 4.8 */ - USCRIPT_NUSHU = 150,/* Nshu */ - /** @stable ICU 4.8 */ - USCRIPT_SHARADA = 151,/* Shrd */ - /** @stable ICU 4.8 */ - USCRIPT_SORA_SOMPENG = 152,/* Sora */ - /** @stable ICU 4.8 */ - USCRIPT_TAKRI = 153,/* Takr */ - /** @stable ICU 4.8 */ - USCRIPT_TANGUT = 154,/* Tang */ - /** @stable ICU 4.8 */ - USCRIPT_WOLEAI = 155,/* Wole */ - - /** @stable ICU 49 */ - USCRIPT_ANATOLIAN_HIEROGLYPHS = 156,/* Hluw */ - /** @stable ICU 49 */ - USCRIPT_KHOJKI = 157,/* Khoj */ - /** @stable ICU 49 */ - USCRIPT_TIRHUTA = 158,/* Tirh */ - - /** @stable ICU 52 */ - USCRIPT_CAUCASIAN_ALBANIAN = 159,/* Aghb */ - /** @stable ICU 52 */ - USCRIPT_MAHAJANI = 160,/* Mahj */ - - /* Private use codes from Qaaa - Qabx are not supported */ - - /** @stable ICU 2.2 */ - USCRIPT_CODE_LIMIT = 161 -} UScriptCode; - -/** - * Gets script codes associated with the given locale or ISO 15924 abbreviation or name. - * Fills in USCRIPT_MALAYALAM given "Malayam" OR "Mlym". - * Fills in USCRIPT_LATIN given "en" OR "en_US" - * If required capacity is greater than capacity of the destination buffer then the error code - * is set to U_BUFFER_OVERFLOW_ERROR and the required capacity is returned - * - * <p>Note: To search by short or long script alias only, use - * u_getPropertyValueEnum(UCHAR_SCRIPT, alias) instead. This does - * a fast lookup with no access of the locale data. - * @param nameOrAbbrOrLocale name of the script, as given in - * PropertyValueAliases.txt, or ISO 15924 code or locale - * @param fillIn the UScriptCode buffer to fill in the script code - * @param capacity the capacity (size) fo UScriptCode buffer passed in. - * @param err the error status code. - * @return The number of script codes filled in the buffer passed in - * @stable ICU 2.4 - */ -U_STABLE int32_t U_EXPORT2 -uscript_getCode(const char* nameOrAbbrOrLocale,UScriptCode* fillIn,int32_t capacity,UErrorCode *err); - -/** - * Gets a script name associated with the given script code. - * Returns "Malayam" given USCRIPT_MALAYALAM - * @param scriptCode UScriptCode enum - * @return script long name as given in - * PropertyValueAliases.txt, or NULL if scriptCode is invalid - * @stable ICU 2.4 - */ -U_STABLE const char* U_EXPORT2 -uscript_getName(UScriptCode scriptCode); - -/** - * Gets a script name associated with the given script code. - * Returns "Mlym" given USCRIPT_MALAYALAM - * @param scriptCode UScriptCode enum - * @return script abbreviated name as given in - * PropertyValueAliases.txt, or NULL if scriptCode is invalid - * @stable ICU 2.4 - */ -U_STABLE const char* U_EXPORT2 -uscript_getShortName(UScriptCode scriptCode); - -/** - * Gets the script code associated with the given codepoint. - * Returns USCRIPT_MALAYALAM given 0x0D02 - * @param codepoint UChar32 codepoint - * @param err the error status code. - * @return The UScriptCode, or 0 if codepoint is invalid - * @stable ICU 2.4 - */ -U_STABLE UScriptCode U_EXPORT2 -uscript_getScript(UChar32 codepoint, UErrorCode *err); - -/** - * Do the Script_Extensions of code point c contain script sc? - * If c does not have explicit Script_Extensions, then this tests whether - * c has the Script property value sc. - * - * Some characters are commonly used in multiple scripts. - * For more information, see UAX #24: http://www.unicode.org/reports/tr24/. - * - * The Script_Extensions property is provisional. It may be modified or removed - * in future versions of the Unicode Standard, and thus in ICU. - * @param c code point - * @param sc script code - * @return TRUE if sc is in Script_Extensions(c) - * @stable ICU 49 - */ -U_STABLE UBool U_EXPORT2 -uscript_hasScript(UChar32 c, UScriptCode sc); - -/** - * Writes code point c's Script_Extensions as a list of UScriptCode values - * to the output scripts array and returns the number of script codes. - * - If c does have Script_Extensions, then the Script property value - * (normally Common or Inherited) is not included. - * - If c does not have Script_Extensions, then the one Script code is written to the output array. - * - If c is not a valid code point, then the one USCRIPT_UNKNOWN code is written. - * In other words, if the return value is 1, - * then the output array contains exactly c's single Script code. - * If the return value is n>=2, then the output array contains c's n Script_Extensions script codes. - * - * Some characters are commonly used in multiple scripts. - * For more information, see UAX #24: http://www.unicode.org/reports/tr24/. - * - * If there are more than capacity script codes to be written, then - * U_BUFFER_OVERFLOW_ERROR is set and the number of Script_Extensions is returned. - * (Usual ICU buffer handling behavior.) - * - * The Script_Extensions property is provisional. It may be modified or removed - * in future versions of the Unicode Standard, and thus in ICU. - * @param c code point - * @param scripts output script code array - * @param capacity capacity of the scripts array - * @param errorCode Standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return number of script codes in c's Script_Extensions, or 1 for the single Script value, - * written to scripts unless U_BUFFER_OVERFLOW_ERROR indicates insufficient capacity - * @stable ICU 49 - */ -U_STABLE int32_t U_EXPORT2 -uscript_getScriptExtensions(UChar32 c, - UScriptCode *scripts, int32_t capacity, - UErrorCode *errorCode); - -#ifndef U_HIDE_DRAFT_API - -/** - * Script usage constants. - * See UAX #31 Unicode Identifier and Pattern Syntax. - * http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Exclusion_from_Identifiers - * - * @draft ICU 51 - */ -typedef enum UScriptUsage { - /** Not encoded in Unicode. @draft ICU 51 */ - USCRIPT_USAGE_NOT_ENCODED, - /** Unknown script usage. @draft ICU 51 */ - USCRIPT_USAGE_UNKNOWN, - /** Candidate for Exclusion from Identifiers. @draft ICU 51 */ - USCRIPT_USAGE_EXCLUDED, - /** Limited Use script. @draft ICU 51 */ - USCRIPT_USAGE_LIMITED_USE, - /** Aspirational Use script. @draft ICU 51 */ - USCRIPT_USAGE_ASPIRATIONAL, - /** Recommended script. @draft ICU 51 */ - USCRIPT_USAGE_RECOMMENDED -} UScriptUsage; - -/** - * Writes the script sample character string. - * This string normally consists of one code point but might be longer. - * The string is empty if the script is not encoded. - * - * @param script script code - * @param dest output string array - * @param capacity number of UChars in the dest array - * @param pErrorCode standard ICU in/out error code, must pass U_SUCCESS() on input - * @return the string length, even if U_BUFFER_OVERFLOW_ERROR - * @draft ICU 51 - */ -U_DRAFT int32_t U_EXPORT2 -uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode); - -#if U_SHOW_CPLUSPLUS_API - -U_NAMESPACE_BEGIN -class UnicodeString; -U_NAMESPACE_END - -/** - * Returns the script sample character string. - * This string normally consists of one code point but might be longer. - * The string is empty if the script is not encoded. - * - * @param script script code - * @return the sample character string - * @draft ICU 51 - */ -U_COMMON_API icu::UnicodeString U_EXPORT2 -uscript_getSampleUnicodeString(UScriptCode script); - -#endif - -/** - * Returns the script usage according to UAX #31 Unicode Identifier and Pattern Syntax. - * Returns USCRIPT_USAGE_NOT_ENCODED if the script is not encoded in Unicode. - * - * @param script script code - * @return script usage - * @see UScriptUsage - * @draft ICU 51 - */ -U_DRAFT UScriptUsage U_EXPORT2 -uscript_getUsage(UScriptCode script); - -/** - * Returns TRUE if the script is written right-to-left. - * For example, Arab and Hebr. - * - * @param script script code - * @return TRUE if the script is right-to-left - * @draft ICU 51 - */ -U_DRAFT UBool U_EXPORT2 -uscript_isRightToLeft(UScriptCode script); - -/** - * Returns TRUE if the script allows line breaks between letters (excluding hyphenation). - * Such a script typically requires dictionary-based line breaking. - * For example, Hani and Thai. - * - * @param script script code - * @return TRUE if the script allows line breaks between letters - * @draft ICU 51 - */ -U_DRAFT UBool U_EXPORT2 -uscript_breaksBetweenLetters(UScriptCode script); - -/** - * Returns TRUE if in modern (or most recent) usage of the script case distinctions are customary. - * For example, Latn and Cyrl. - * - * @param script script code - * @return TRUE if the script is cased - * @draft ICU 51 - */ -U_DRAFT UBool U_EXPORT2 -uscript_isCased(UScriptCode script); - -#endif /* U_HIDE_DRAFT_API */ - -#endif diff --git a/Source/WTF/icu/unicode/uset.h b/Source/WTF/icu/unicode/uset.h deleted file mode 100644 index 40510cd41..000000000 --- a/Source/WTF/icu/unicode/uset.h +++ /dev/null @@ -1,1124 +0,0 @@ -/* -******************************************************************************* -* -* Copyright (C) 2002-2012, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: uset.h -* encoding: US-ASCII -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2002mar07 -* created by: Markus W. Scherer -* -* C version of UnicodeSet. -*/ - - -/** - * \file - * \brief C API: Unicode Set - * - * <p>This is a C wrapper around the C++ UnicodeSet class.</p> - */ - -#ifndef __USET_H__ -#define __USET_H__ - -#include "unicode/utypes.h" -#include "unicode/uchar.h" -#include "unicode/localpointer.h" - -#ifndef UCNV_H -struct USet; -/** - * A UnicodeSet. Use the uset_* API to manipulate. Create with - * uset_open*, and destroy with uset_close. - * @stable ICU 2.4 - */ -typedef struct USet USet; -#endif - -/** - * Bitmask values to be passed to uset_openPatternOptions() or - * uset_applyPattern() taking an option parameter. - * @stable ICU 2.4 - */ -enum { - /** - * Ignore white space within patterns unless quoted or escaped. - * @stable ICU 2.4 - */ - USET_IGNORE_SPACE = 1, - - /** - * Enable case insensitive matching. E.g., "[ab]" with this flag - * will match 'a', 'A', 'b', and 'B'. "[^ab]" with this flag will - * match all except 'a', 'A', 'b', and 'B'. This performs a full - * closure over case mappings, e.g. U+017F for s. - * - * The resulting set is a superset of the input for the code points but - * not for the strings. - * It performs a case mapping closure of the code points and adds - * full case folding strings for the code points, and reduces strings of - * the original set to their full case folding equivalents. - * - * This is designed for case-insensitive matches, for example - * in regular expressions. The full code point case closure allows checking of - * an input character directly against the closure set. - * Strings are matched by comparing the case-folded form from the closure - * set with an incremental case folding of the string in question. - * - * The closure set will also contain single code points if the original - * set contained case-equivalent strings (like U+00DF for "ss" or "Ss" etc.). - * This is not necessary (that is, redundant) for the above matching method - * but results in the same closure sets regardless of whether the original - * set contained the code point or a string. - * - * @stable ICU 2.4 - */ - USET_CASE_INSENSITIVE = 2, - - /** - * Enable case insensitive matching. E.g., "[ab]" with this flag - * will match 'a', 'A', 'b', and 'B'. "[^ab]" with this flag will - * match all except 'a', 'A', 'b', and 'B'. This adds the lower-, - * title-, and uppercase mappings as well as the case folding - * of each existing element in the set. - * @stable ICU 3.2 - */ - USET_ADD_CASE_MAPPINGS = 4 -}; - -/** - * Argument values for whether span() and similar functions continue while - * the current character is contained vs. not contained in the set. - * - * The functionality is straightforward for sets with only single code points, - * without strings (which is the common case): - * - USET_SPAN_CONTAINED and USET_SPAN_SIMPLE - * work the same. - * - span() and spanBack() partition any string the same way when - * alternating between span(USET_SPAN_NOT_CONTAINED) and - * span(either "contained" condition). - * - Using a complemented (inverted) set and the opposite span conditions - * yields the same results. - * - * When a set contains multi-code point strings, then these statements may not - * be true, depending on the strings in the set (for example, whether they - * overlap with each other) and the string that is processed. - * For a set with strings: - * - The complement of the set contains the opposite set of code points, - * but the same set of strings. - * Therefore, complementing both the set and the span conditions - * may yield different results. - * - When starting spans at different positions in a string - * (span(s, ...) vs. span(s+1, ...)) the ends of the spans may be different - * because a set string may start before the later position. - * - span(USET_SPAN_SIMPLE) may be shorter than - * span(USET_SPAN_CONTAINED) because it will not recursively try - * all possible paths. - * For example, with a set which contains the three strings "xy", "xya" and "ax", - * span("xyax", USET_SPAN_CONTAINED) will return 4 but - * span("xyax", USET_SPAN_SIMPLE) will return 3. - * span(USET_SPAN_SIMPLE) will never be longer than - * span(USET_SPAN_CONTAINED). - * - With either "contained" condition, span() and spanBack() may partition - * a string in different ways. - * For example, with a set which contains the two strings "ab" and "ba", - * and when processing the string "aba", - * span() will yield contained/not-contained boundaries of { 0, 2, 3 } - * while spanBack() will yield boundaries of { 0, 1, 3 }. - * - * Note: If it is important to get the same boundaries whether iterating forward - * or backward through a string, then either only span() should be used and - * the boundaries cached for backward operation, or an ICU BreakIterator - * could be used. - * - * Note: Unpaired surrogates are treated like surrogate code points. - * Similarly, set strings match only on code point boundaries, - * never in the middle of a surrogate pair. - * Illegal UTF-8 sequences are treated like U+FFFD. - * When processing UTF-8 strings, malformed set strings - * (strings with unpaired surrogates which cannot be converted to UTF-8) - * are ignored. - * - * @stable ICU 3.8 - */ -typedef enum USetSpanCondition { - /** - * Continue a span() while there is no set element at the current position. - * Stops before the first set element (character or string). - * (For code points only, this is like while contains(current)==FALSE). - * - * When span() returns, the substring between where it started and the position - * it returned consists only of characters that are not in the set, - * and none of its strings overlap with the span. - * - * @stable ICU 3.8 - */ - USET_SPAN_NOT_CONTAINED = 0, - /** - * Continue a span() while there is a set element at the current position. - * (For characters only, this is like while contains(current)==TRUE). - * - * When span() returns, the substring between where it started and the position - * it returned consists only of set elements (characters or strings) that are in the set. - * - * If a set contains strings, then the span will be the longest substring - * matching any of the possible concatenations of set elements (characters or strings). - * (There must be a single, non-overlapping concatenation of characters or strings.) - * This is equivalent to a POSIX regular expression for (OR of each set element)*. - * - * @stable ICU 3.8 - */ - USET_SPAN_CONTAINED = 1, - /** - * Continue a span() while there is a set element at the current position. - * (For characters only, this is like while contains(current)==TRUE). - * - * When span() returns, the substring between where it started and the position - * it returned consists only of set elements (characters or strings) that are in the set. - * - * If a set only contains single characters, then this is the same - * as USET_SPAN_CONTAINED. - * - * If a set contains strings, then the span will be the longest substring - * with a match at each position with the longest single set element (character or string). - * - * Use this span condition together with other longest-match algorithms, - * such as ICU converters (ucnv_getUnicodeSet()). - * - * @stable ICU 3.8 - */ - USET_SPAN_SIMPLE = 2, - /** - * One more than the last span condition. - * @stable ICU 3.8 - */ - USET_SPAN_CONDITION_COUNT -} USetSpanCondition; - -enum { - /** - * Capacity of USerializedSet::staticArray. - * Enough for any single-code point set. - * Also provides padding for nice sizeof(USerializedSet). - * @stable ICU 2.4 - */ - USET_SERIALIZED_STATIC_ARRAY_CAPACITY=8 -}; - -/** - * A serialized form of a Unicode set. Limited manipulations are - * possible directly on a serialized set. See below. - * @stable ICU 2.4 - */ -typedef struct USerializedSet { - /** - * The serialized Unicode Set. - * @stable ICU 2.4 - */ - const uint16_t *array; - /** - * The length of the array that contains BMP characters. - * @stable ICU 2.4 - */ - int32_t bmpLength; - /** - * The total length of the array. - * @stable ICU 2.4 - */ - int32_t length; - /** - * A small buffer for the array to reduce memory allocations. - * @stable ICU 2.4 - */ - uint16_t staticArray[USET_SERIALIZED_STATIC_ARRAY_CAPACITY]; -} USerializedSet; - -/********************************************************************* - * USet API - *********************************************************************/ - -/** - * Create an empty USet object. - * Equivalent to uset_open(1, 0). - * @return a newly created USet. The caller must call uset_close() on - * it when done. - * @stable ICU 4.2 - */ -U_STABLE USet* U_EXPORT2 -uset_openEmpty(void); - -/** - * Creates a USet object that contains the range of characters - * start..end, inclusive. If <code>start > end</code> - * then an empty set is created (same as using uset_openEmpty()). - * @param start first character of the range, inclusive - * @param end last character of the range, inclusive - * @return a newly created USet. The caller must call uset_close() on - * it when done. - * @stable ICU 2.4 - */ -U_STABLE USet* U_EXPORT2 -uset_open(UChar32 start, UChar32 end); - -/** - * Creates a set from the given pattern. See the UnicodeSet class - * description for the syntax of the pattern language. - * @param pattern a string specifying what characters are in the set - * @param patternLength the length of the pattern, or -1 if null - * terminated - * @param ec the error code - * @stable ICU 2.4 - */ -U_STABLE USet* U_EXPORT2 -uset_openPattern(const UChar* pattern, int32_t patternLength, - UErrorCode* ec); - -/** - * Creates a set from the given pattern. See the UnicodeSet class - * description for the syntax of the pattern language. - * @param pattern a string specifying what characters are in the set - * @param patternLength the length of the pattern, or -1 if null - * terminated - * @param options bitmask for options to apply to the pattern. - * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE. - * @param ec the error code - * @stable ICU 2.4 - */ -U_STABLE USet* U_EXPORT2 -uset_openPatternOptions(const UChar* pattern, int32_t patternLength, - uint32_t options, - UErrorCode* ec); - -/** - * Disposes of the storage used by a USet object. This function should - * be called exactly once for objects returned by uset_open(). - * @param set the object to dispose of - * @stable ICU 2.4 - */ -U_STABLE void U_EXPORT2 -uset_close(USet* set); - -#if U_SHOW_CPLUSPLUS_API - -U_NAMESPACE_BEGIN - -/** - * \class LocalUSetPointer - * "Smart pointer" class, closes a USet via uset_close(). - * For most methods see the LocalPointerBase base class. - * - * @see LocalPointerBase - * @see LocalPointer - * @stable ICU 4.4 - */ -U_DEFINE_LOCAL_OPEN_POINTER(LocalUSetPointer, USet, uset_close); - -U_NAMESPACE_END - -#endif - -/** - * Returns a copy of this object. - * If this set is frozen, then the clone will be frozen as well. - * Use uset_cloneAsThawed() for a mutable clone of a frozen set. - * @param set the original set - * @return the newly allocated copy of the set - * @see uset_cloneAsThawed - * @stable ICU 3.8 - */ -U_STABLE USet * U_EXPORT2 -uset_clone(const USet *set); - -/** - * Determines whether the set has been frozen (made immutable) or not. - * See the ICU4J Freezable interface for details. - * @param set the set - * @return TRUE/FALSE for whether the set has been frozen - * @see uset_freeze - * @see uset_cloneAsThawed - * @stable ICU 3.8 - */ -U_STABLE UBool U_EXPORT2 -uset_isFrozen(const USet *set); - -/** - * Freeze the set (make it immutable). - * Once frozen, it cannot be unfrozen and is therefore thread-safe - * until it is deleted. - * See the ICU4J Freezable interface for details. - * Freezing the set may also make some operations faster, for example - * uset_contains() and uset_span(). - * A frozen set will not be modified. (It remains frozen.) - * @param set the set - * @return the same set, now frozen - * @see uset_isFrozen - * @see uset_cloneAsThawed - * @stable ICU 3.8 - */ -U_STABLE void U_EXPORT2 -uset_freeze(USet *set); - -/** - * Clone the set and make the clone mutable. - * See the ICU4J Freezable interface for details. - * @param set the set - * @return the mutable clone - * @see uset_freeze - * @see uset_isFrozen - * @see uset_clone - * @stable ICU 3.8 - */ -U_STABLE USet * U_EXPORT2 -uset_cloneAsThawed(const USet *set); - -/** - * Causes the USet object to represent the range <code>start - end</code>. - * If <code>start > end</code> then this USet is set to an empty range. - * A frozen set will not be modified. - * @param set the object to set to the given range - * @param start first character in the set, inclusive - * @param end last character in the set, inclusive - * @stable ICU 3.2 - */ -U_STABLE void U_EXPORT2 -uset_set(USet* set, - UChar32 start, UChar32 end); - -/** - * Modifies the set to represent the set specified by the given - * pattern. See the UnicodeSet class description for the syntax of - * the pattern language. See also the User Guide chapter about UnicodeSet. - * <em>Empties the set passed before applying the pattern.</em> - * A frozen set will not be modified. - * @param set The set to which the pattern is to be applied. - * @param pattern A pointer to UChar string specifying what characters are in the set. - * The character at pattern[0] must be a '['. - * @param patternLength The length of the UChar string. -1 if NUL terminated. - * @param options A bitmask for options to apply to the pattern. - * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE. - * @param status Returns an error if the pattern cannot be parsed. - * @return Upon successful parse, the value is either - * the index of the character after the closing ']' - * of the parsed pattern. - * If the status code indicates failure, then the return value - * is the index of the error in the source. - * - * @stable ICU 2.8 - */ -U_STABLE int32_t U_EXPORT2 -uset_applyPattern(USet *set, - const UChar *pattern, int32_t patternLength, - uint32_t options, - UErrorCode *status); - -/** - * Modifies the set to contain those code points which have the given value - * for the given binary or enumerated property, as returned by - * u_getIntPropertyValue. Prior contents of this set are lost. - * A frozen set will not be modified. - * - * @param set the object to contain the code points defined by the property - * - * @param prop a property in the range UCHAR_BIN_START..UCHAR_BIN_LIMIT-1 - * or UCHAR_INT_START..UCHAR_INT_LIMIT-1 - * or UCHAR_MASK_START..UCHAR_MASK_LIMIT-1. - * - * @param value a value in the range u_getIntPropertyMinValue(prop).. - * u_getIntPropertyMaxValue(prop), with one exception. If prop is - * UCHAR_GENERAL_CATEGORY_MASK, then value should not be a UCharCategory, but - * rather a mask value produced by U_GET_GC_MASK(). This allows grouped - * categories such as [:L:] to be represented. - * - * @param ec error code input/output parameter - * - * @stable ICU 3.2 - */ -U_STABLE void U_EXPORT2 -uset_applyIntPropertyValue(USet* set, - UProperty prop, int32_t value, UErrorCode* ec); - -/** - * Modifies the set to contain those code points which have the - * given value for the given property. Prior contents of this - * set are lost. - * A frozen set will not be modified. - * - * @param set the object to contain the code points defined by the given - * property and value alias - * - * @param prop a string specifying a property alias, either short or long. - * The name is matched loosely. See PropertyAliases.txt for names and a - * description of loose matching. If the value string is empty, then this - * string is interpreted as either a General_Category value alias, a Script - * value alias, a binary property alias, or a special ID. Special IDs are - * matched loosely and correspond to the following sets: - * - * "ANY" = [\\u0000-\\U0010FFFF], - * "ASCII" = [\\u0000-\\u007F], - * "Assigned" = [:^Cn:]. - * - * @param propLength the length of the prop, or -1 if NULL - * - * @param value a string specifying a value alias, either short or long. - * The name is matched loosely. See PropertyValueAliases.txt for names - * and a description of loose matching. In addition to aliases listed, - * numeric values and canonical combining classes may be expressed - * numerically, e.g., ("nv", "0.5") or ("ccc", "220"). The value string - * may also be empty. - * - * @param valueLength the length of the value, or -1 if NULL - * - * @param ec error code input/output parameter - * - * @stable ICU 3.2 - */ -U_STABLE void U_EXPORT2 -uset_applyPropertyAlias(USet* set, - const UChar *prop, int32_t propLength, - const UChar *value, int32_t valueLength, - UErrorCode* ec); - -/** - * Return true if the given position, in the given pattern, appears - * to be the start of a UnicodeSet pattern. - * - * @param pattern a string specifying the pattern - * @param patternLength the length of the pattern, or -1 if NULL - * @param pos the given position - * @stable ICU 3.2 - */ -U_STABLE UBool U_EXPORT2 -uset_resemblesPattern(const UChar *pattern, int32_t patternLength, - int32_t pos); - -/** - * Returns a string representation of this set. If the result of - * calling this function is passed to a uset_openPattern(), it - * will produce another set that is equal to this one. - * @param set the set - * @param result the string to receive the rules, may be NULL - * @param resultCapacity the capacity of result, may be 0 if result is NULL - * @param escapeUnprintable if TRUE then convert unprintable - * character to their hex escape representations, \\uxxxx or - * \\Uxxxxxxxx. Unprintable characters are those other than - * U+000A, U+0020..U+007E. - * @param ec error code. - * @return length of string, possibly larger than resultCapacity - * @stable ICU 2.4 - */ -U_STABLE int32_t U_EXPORT2 -uset_toPattern(const USet* set, - UChar* result, int32_t resultCapacity, - UBool escapeUnprintable, - UErrorCode* ec); - -/** - * Adds the given character to the given USet. After this call, - * uset_contains(set, c) will return TRUE. - * A frozen set will not be modified. - * @param set the object to which to add the character - * @param c the character to add - * @stable ICU 2.4 - */ -U_STABLE void U_EXPORT2 -uset_add(USet* set, UChar32 c); - -/** - * Adds all of the elements in the specified set to this set if - * they're not already present. This operation effectively - * modifies this set so that its value is the <i>union</i> of the two - * sets. The behavior of this operation is unspecified if the specified - * collection is modified while the operation is in progress. - * A frozen set will not be modified. - * - * @param set the object to which to add the set - * @param additionalSet the source set whose elements are to be added to this set. - * @stable ICU 2.6 - */ -U_STABLE void U_EXPORT2 -uset_addAll(USet* set, const USet *additionalSet); - -/** - * Adds the given range of characters to the given USet. After this call, - * uset_contains(set, start, end) will return TRUE. - * A frozen set will not be modified. - * @param set the object to which to add the character - * @param start the first character of the range to add, inclusive - * @param end the last character of the range to add, inclusive - * @stable ICU 2.2 - */ -U_STABLE void U_EXPORT2 -uset_addRange(USet* set, UChar32 start, UChar32 end); - -/** - * Adds the given string to the given USet. After this call, - * uset_containsString(set, str, strLen) will return TRUE. - * A frozen set will not be modified. - * @param set the object to which to add the character - * @param str the string to add - * @param strLen the length of the string or -1 if null terminated. - * @stable ICU 2.4 - */ -U_STABLE void U_EXPORT2 -uset_addString(USet* set, const UChar* str, int32_t strLen); - -/** - * Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"} - * If this set already any particular character, it has no effect on that character. - * A frozen set will not be modified. - * @param set the object to which to add the character - * @param str the source string - * @param strLen the length of the string or -1 if null terminated. - * @stable ICU 3.4 - */ -U_STABLE void U_EXPORT2 -uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen); - -/** - * Removes the given character from the given USet. After this call, - * uset_contains(set, c) will return FALSE. - * A frozen set will not be modified. - * @param set the object from which to remove the character - * @param c the character to remove - * @stable ICU 2.4 - */ -U_STABLE void U_EXPORT2 -uset_remove(USet* set, UChar32 c); - -/** - * Removes the given range of characters from the given USet. After this call, - * uset_contains(set, start, end) will return FALSE. - * A frozen set will not be modified. - * @param set the object to which to add the character - * @param start the first character of the range to remove, inclusive - * @param end the last character of the range to remove, inclusive - * @stable ICU 2.2 - */ -U_STABLE void U_EXPORT2 -uset_removeRange(USet* set, UChar32 start, UChar32 end); - -/** - * Removes the given string to the given USet. After this call, - * uset_containsString(set, str, strLen) will return FALSE. - * A frozen set will not be modified. - * @param set the object to which to add the character - * @param str the string to remove - * @param strLen the length of the string or -1 if null terminated. - * @stable ICU 2.4 - */ -U_STABLE void U_EXPORT2 -uset_removeString(USet* set, const UChar* str, int32_t strLen); - -/** - * Removes from this set all of its elements that are contained in the - * specified set. This operation effectively modifies this - * set so that its value is the <i>asymmetric set difference</i> of - * the two sets. - * A frozen set will not be modified. - * @param set the object from which the elements are to be removed - * @param removeSet the object that defines which elements will be - * removed from this set - * @stable ICU 3.2 - */ -U_STABLE void U_EXPORT2 -uset_removeAll(USet* set, const USet* removeSet); - -/** - * Retain only the elements in this set that are contained in the - * specified range. If <code>start > end</code> then an empty range is - * retained, leaving the set empty. This is equivalent to - * a boolean logic AND, or a set INTERSECTION. - * A frozen set will not be modified. - * - * @param set the object for which to retain only the specified range - * @param start first character, inclusive, of range to be retained - * to this set. - * @param end last character, inclusive, of range to be retained - * to this set. - * @stable ICU 3.2 - */ -U_STABLE void U_EXPORT2 -uset_retain(USet* set, UChar32 start, UChar32 end); - -/** - * Retains only the elements in this set that are contained in the - * specified set. In other words, removes from this set all of - * its elements that are not contained in the specified set. This - * operation effectively modifies this set so that its value is - * the <i>intersection</i> of the two sets. - * A frozen set will not be modified. - * - * @param set the object on which to perform the retain - * @param retain set that defines which elements this set will retain - * @stable ICU 3.2 - */ -U_STABLE void U_EXPORT2 -uset_retainAll(USet* set, const USet* retain); - -/** - * Reallocate this objects internal structures to take up the least - * possible space, without changing this object's value. - * A frozen set will not be modified. - * - * @param set the object on which to perfrom the compact - * @stable ICU 3.2 - */ -U_STABLE void U_EXPORT2 -uset_compact(USet* set); - -/** - * Inverts this set. This operation modifies this set so that - * its value is its complement. This operation does not affect - * the multicharacter strings, if any. - * A frozen set will not be modified. - * @param set the set - * @stable ICU 2.4 - */ -U_STABLE void U_EXPORT2 -uset_complement(USet* set); - -/** - * Complements in this set all elements contained in the specified - * set. Any character in the other set will be removed if it is - * in this set, or will be added if it is not in this set. - * A frozen set will not be modified. - * - * @param set the set with which to complement - * @param complement set that defines which elements will be xor'ed - * from this set. - * @stable ICU 3.2 - */ -U_STABLE void U_EXPORT2 -uset_complementAll(USet* set, const USet* complement); - -/** - * Removes all of the elements from this set. This set will be - * empty after this call returns. - * A frozen set will not be modified. - * @param set the set - * @stable ICU 2.4 - */ -U_STABLE void U_EXPORT2 -uset_clear(USet* set); - -/** - * Close this set over the given attribute. For the attribute - * USET_CASE, the result is to modify this set so that: - * - * 1. For each character or string 'a' in this set, all strings or - * characters 'b' such that foldCase(a) == foldCase(b) are added - * to this set. - * - * 2. For each string 'e' in the resulting set, if e != - * foldCase(e), 'e' will be removed. - * - * Example: [aq\\u00DF{Bc}{bC}{Fi}] => [aAqQ\\u00DF\\uFB01{ss}{bc}{fi}] - * - * (Here foldCase(x) refers to the operation u_strFoldCase, and a - * == b denotes that the contents are the same, not pointer - * comparison.) - * - * A frozen set will not be modified. - * - * @param set the set - * - * @param attributes bitmask for attributes to close over. - * Currently only the USET_CASE bit is supported. Any undefined bits - * are ignored. - * @stable ICU 4.2 - */ -U_STABLE void U_EXPORT2 -uset_closeOver(USet* set, int32_t attributes); - -/** - * Remove all strings from this set. - * - * @param set the set - * @stable ICU 4.2 - */ -U_STABLE void U_EXPORT2 -uset_removeAllStrings(USet* set); - -/** - * Returns TRUE if the given USet contains no characters and no - * strings. - * @param set the set - * @return true if set is empty - * @stable ICU 2.4 - */ -U_STABLE UBool U_EXPORT2 -uset_isEmpty(const USet* set); - -/** - * Returns TRUE if the given USet contains the given character. - * This function works faster with a frozen set. - * @param set the set - * @param c The codepoint to check for within the set - * @return true if set contains c - * @stable ICU 2.4 - */ -U_STABLE UBool U_EXPORT2 -uset_contains(const USet* set, UChar32 c); - -/** - * Returns TRUE if the given USet contains all characters c - * where start <= c && c <= end. - * @param set the set - * @param start the first character of the range to test, inclusive - * @param end the last character of the range to test, inclusive - * @return TRUE if set contains the range - * @stable ICU 2.2 - */ -U_STABLE UBool U_EXPORT2 -uset_containsRange(const USet* set, UChar32 start, UChar32 end); - -/** - * Returns TRUE if the given USet contains the given string. - * @param set the set - * @param str the string - * @param strLen the length of the string or -1 if null terminated. - * @return true if set contains str - * @stable ICU 2.4 - */ -U_STABLE UBool U_EXPORT2 -uset_containsString(const USet* set, const UChar* str, int32_t strLen); - -/** - * Returns the index of the given character within this set, where - * the set is ordered by ascending code point. If the character - * is not in this set, return -1. The inverse of this method is - * <code>charAt()</code>. - * @param set the set - * @param c the character to obtain the index for - * @return an index from 0..size()-1, or -1 - * @stable ICU 3.2 - */ -U_STABLE int32_t U_EXPORT2 -uset_indexOf(const USet* set, UChar32 c); - -/** - * Returns the character at the given index within this set, where - * the set is ordered by ascending code point. If the index is - * out of range, return (UChar32)-1. The inverse of this method is - * <code>indexOf()</code>. - * @param set the set - * @param charIndex an index from 0..size()-1 to obtain the char for - * @return the character at the given index, or (UChar32)-1. - * @stable ICU 3.2 - */ -U_STABLE UChar32 U_EXPORT2 -uset_charAt(const USet* set, int32_t charIndex); - -/** - * Returns the number of characters and strings contained in the given - * USet. - * @param set the set - * @return a non-negative integer counting the characters and strings - * contained in set - * @stable ICU 2.4 - */ -U_STABLE int32_t U_EXPORT2 -uset_size(const USet* set); - -/** - * Returns the number of items in this set. An item is either a range - * of characters or a single multicharacter string. - * @param set the set - * @return a non-negative integer counting the character ranges - * and/or strings contained in set - * @stable ICU 2.4 - */ -U_STABLE int32_t U_EXPORT2 -uset_getItemCount(const USet* set); - -/** - * Returns an item of this set. An item is either a range of - * characters or a single multicharacter string. - * @param set the set - * @param itemIndex a non-negative integer in the range 0.. - * uset_getItemCount(set)-1 - * @param start pointer to variable to receive first character - * in range, inclusive - * @param end pointer to variable to receive last character in range, - * inclusive - * @param str buffer to receive the string, may be NULL - * @param strCapacity capacity of str, or 0 if str is NULL - * @param ec error code - * @return the length of the string (>= 2), or 0 if the item is a - * range, in which case it is the range *start..*end, or -1 if - * itemIndex is out of range - * @stable ICU 2.4 - */ -U_STABLE int32_t U_EXPORT2 -uset_getItem(const USet* set, int32_t itemIndex, - UChar32* start, UChar32* end, - UChar* str, int32_t strCapacity, - UErrorCode* ec); - -/** - * Returns true if set1 contains all the characters and strings - * of set2. It answers the question, 'Is set1 a superset of set2?' - * @param set1 set to be checked for containment - * @param set2 set to be checked for containment - * @return true if the test condition is met - * @stable ICU 3.2 - */ -U_STABLE UBool U_EXPORT2 -uset_containsAll(const USet* set1, const USet* set2); - -/** - * Returns true if this set contains all the characters - * of the given string. This is does not check containment of grapheme - * clusters, like uset_containsString. - * @param set set of characters to be checked for containment - * @param str string containing codepoints to be checked for containment - * @param strLen the length of the string or -1 if null terminated. - * @return true if the test condition is met - * @stable ICU 3.4 - */ -U_STABLE UBool U_EXPORT2 -uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen); - -/** - * Returns true if set1 contains none of the characters and strings - * of set2. It answers the question, 'Is set1 a disjoint set of set2?' - * @param set1 set to be checked for containment - * @param set2 set to be checked for containment - * @return true if the test condition is met - * @stable ICU 3.2 - */ -U_STABLE UBool U_EXPORT2 -uset_containsNone(const USet* set1, const USet* set2); - -/** - * Returns true if set1 contains some of the characters and strings - * of set2. It answers the question, 'Does set1 and set2 have an intersection?' - * @param set1 set to be checked for containment - * @param set2 set to be checked for containment - * @return true if the test condition is met - * @stable ICU 3.2 - */ -U_STABLE UBool U_EXPORT2 -uset_containsSome(const USet* set1, const USet* set2); - -/** - * Returns the length of the initial substring of the input string which - * consists only of characters and strings that are contained in this set - * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE), - * or only of characters and strings that are not contained - * in this set (USET_SPAN_NOT_CONTAINED). - * See USetSpanCondition for details. - * Similar to the strspn() C library function. - * Unpaired surrogates are treated according to contains() of their surrogate code points. - * This function works faster with a frozen set and with a non-negative string length argument. - * @param set the set - * @param s start of the string - * @param length of the string; can be -1 for NUL-terminated - * @param spanCondition specifies the containment condition - * @return the length of the initial substring according to the spanCondition; - * 0 if the start of the string does not fit the spanCondition - * @stable ICU 3.8 - * @see USetSpanCondition - */ -U_STABLE int32_t U_EXPORT2 -uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition); - -/** - * Returns the start of the trailing substring of the input string which - * consists only of characters and strings that are contained in this set - * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE), - * or only of characters and strings that are not contained - * in this set (USET_SPAN_NOT_CONTAINED). - * See USetSpanCondition for details. - * Unpaired surrogates are treated according to contains() of their surrogate code points. - * This function works faster with a frozen set and with a non-negative string length argument. - * @param set the set - * @param s start of the string - * @param length of the string; can be -1 for NUL-terminated - * @param spanCondition specifies the containment condition - * @return the start of the trailing substring according to the spanCondition; - * the string length if the end of the string does not fit the spanCondition - * @stable ICU 3.8 - * @see USetSpanCondition - */ -U_STABLE int32_t U_EXPORT2 -uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition); - -/** - * Returns the length of the initial substring of the input string which - * consists only of characters and strings that are contained in this set - * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE), - * or only of characters and strings that are not contained - * in this set (USET_SPAN_NOT_CONTAINED). - * See USetSpanCondition for details. - * Similar to the strspn() C library function. - * Malformed byte sequences are treated according to contains(0xfffd). - * This function works faster with a frozen set and with a non-negative string length argument. - * @param set the set - * @param s start of the string (UTF-8) - * @param length of the string; can be -1 for NUL-terminated - * @param spanCondition specifies the containment condition - * @return the length of the initial substring according to the spanCondition; - * 0 if the start of the string does not fit the spanCondition - * @stable ICU 3.8 - * @see USetSpanCondition - */ -U_STABLE int32_t U_EXPORT2 -uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition); - -/** - * Returns the start of the trailing substring of the input string which - * consists only of characters and strings that are contained in this set - * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE), - * or only of characters and strings that are not contained - * in this set (USET_SPAN_NOT_CONTAINED). - * See USetSpanCondition for details. - * Malformed byte sequences are treated according to contains(0xfffd). - * This function works faster with a frozen set and with a non-negative string length argument. - * @param set the set - * @param s start of the string (UTF-8) - * @param length of the string; can be -1 for NUL-terminated - * @param spanCondition specifies the containment condition - * @return the start of the trailing substring according to the spanCondition; - * the string length if the end of the string does not fit the spanCondition - * @stable ICU 3.8 - * @see USetSpanCondition - */ -U_STABLE int32_t U_EXPORT2 -uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition); - -/** - * Returns true if set1 contains all of the characters and strings - * of set2, and vis versa. It answers the question, 'Is set1 equal to set2?' - * @param set1 set to be checked for containment - * @param set2 set to be checked for containment - * @return true if the test condition is met - * @stable ICU 3.2 - */ -U_STABLE UBool U_EXPORT2 -uset_equals(const USet* set1, const USet* set2); - -/********************************************************************* - * Serialized set API - *********************************************************************/ - -/** - * Serializes this set into an array of 16-bit integers. Serialization - * (currently) only records the characters in the set; multicharacter - * strings are ignored. - * - * The array - * has following format (each line is one 16-bit integer): - * - * length = (n+2*m) | (m!=0?0x8000:0) - * bmpLength = n; present if m!=0 - * bmp[0] - * bmp[1] - * ... - * bmp[n-1] - * supp-high[0] - * supp-low[0] - * supp-high[1] - * supp-low[1] - * ... - * supp-high[m-1] - * supp-low[m-1] - * - * The array starts with a header. After the header are n bmp - * code points, then m supplementary code points. Either n or m - * or both may be zero. n+2*m is always <= 0x7FFF. - * - * If there are no supplementary characters (if m==0) then the - * header is one 16-bit integer, 'length', with value n. - * - * If there are supplementary characters (if m!=0) then the header - * is two 16-bit integers. The first, 'length', has value - * (n+2*m)|0x8000. The second, 'bmpLength', has value n. - * - * After the header the code points are stored in ascending order. - * Supplementary code points are stored as most significant 16 - * bits followed by least significant 16 bits. - * - * @param set the set - * @param dest pointer to buffer of destCapacity 16-bit integers. - * May be NULL only if destCapacity is zero. - * @param destCapacity size of dest, or zero. Must not be negative. - * @param pErrorCode pointer to the error code. Will be set to - * U_INDEX_OUTOFBOUNDS_ERROR if n+2*m > 0x7FFF. Will be set to - * U_BUFFER_OVERFLOW_ERROR if n+2*m+(m!=0?2:1) > destCapacity. - * @return the total length of the serialized format, including - * the header, that is, n+2*m+(m!=0?2:1), or 0 on error other - * than U_BUFFER_OVERFLOW_ERROR. - * @stable ICU 2.4 - */ -U_STABLE int32_t U_EXPORT2 -uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* pErrorCode); - -/** - * Given a serialized array, fill in the given serialized set object. - * @param fillSet pointer to result - * @param src pointer to start of array - * @param srcLength length of array - * @return true if the given array is valid, otherwise false - * @stable ICU 2.4 - */ -U_STABLE UBool U_EXPORT2 -uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength); - -/** - * Set the USerializedSet to contain the given character (and nothing - * else). - * @param fillSet pointer to result - * @param c The codepoint to set - * @stable ICU 2.4 - */ -U_STABLE void U_EXPORT2 -uset_setSerializedToOne(USerializedSet* fillSet, UChar32 c); - -/** - * Returns TRUE if the given USerializedSet contains the given - * character. - * @param set the serialized set - * @param c The codepoint to check for within the set - * @return true if set contains c - * @stable ICU 2.4 - */ -U_STABLE UBool U_EXPORT2 -uset_serializedContains(const USerializedSet* set, UChar32 c); - -/** - * Returns the number of disjoint ranges of characters contained in - * the given serialized set. Ignores any strings contained in the - * set. - * @param set the serialized set - * @return a non-negative integer counting the character ranges - * contained in set - * @stable ICU 2.4 - */ -U_STABLE int32_t U_EXPORT2 -uset_getSerializedRangeCount(const USerializedSet* set); - -/** - * Returns a range of characters contained in the given serialized - * set. - * @param set the serialized set - * @param rangeIndex a non-negative integer in the range 0.. - * uset_getSerializedRangeCount(set)-1 - * @param pStart pointer to variable to receive first character - * in range, inclusive - * @param pEnd pointer to variable to receive last character in range, - * inclusive - * @return true if rangeIndex is valid, otherwise false - * @stable ICU 2.4 - */ -U_STABLE UBool U_EXPORT2 -uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex, - UChar32* pStart, UChar32* pEnd); - -#endif diff --git a/Source/WTF/icu/unicode/ustring.h b/Source/WTF/icu/unicode/ustring.h deleted file mode 100644 index d2ea31c67..000000000 --- a/Source/WTF/icu/unicode/ustring.h +++ /dev/null @@ -1,1703 +0,0 @@ -/* -********************************************************************** -* Copyright (C) 1998-2012, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* -* File ustring.h -* -* Modification History: -* -* Date Name Description -* 12/07/98 bertrand Creation. -****************************************************************************** -*/ - -#ifndef USTRING_H -#define USTRING_H - -#include "unicode/utypes.h" -#include "unicode/putil.h" -#include "unicode/uiter.h" - -/** - * \def UBRK_TYPEDEF_UBREAK_ITERATOR - * @internal - */ - -#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR -# define UBRK_TYPEDEF_UBREAK_ITERATOR -/** Simple declaration for u_strToTitle() to avoid including unicode/ubrk.h. @stable ICU 2.1*/ - typedef struct UBreakIterator UBreakIterator; -#endif - -/** - * \file - * \brief C API: Unicode string handling functions - * - * These C API functions provide general Unicode string handling. - * - * Some functions are equivalent in name, signature, and behavior to the ANSI C <string.h> - * functions. (For example, they do not check for bad arguments like NULL string pointers.) - * In some cases, only the thread-safe variant of such a function is implemented here - * (see u_strtok_r()). - * - * Other functions provide more Unicode-specific functionality like locale-specific - * upper/lower-casing and string comparison in code point order. - * - * ICU uses 16-bit Unicode (UTF-16) in the form of arrays of UChar code units. - * UTF-16 encodes each Unicode code point with either one or two UChar code units. - * (This is the default form of Unicode, and a forward-compatible extension of the original, - * fixed-width form that was known as UCS-2. UTF-16 superseded UCS-2 with Unicode 2.0 - * in 1996.) - * - * Some APIs accept a 32-bit UChar32 value for a single code point. - * - * ICU also handles 16-bit Unicode text with unpaired surrogates. - * Such text is not well-formed UTF-16. - * Code-point-related functions treat unpaired surrogates as surrogate code points, - * i.e., as separate units. - * - * Although UTF-16 is a variable-width encoding form (like some legacy multi-byte encodings), - * it is much more efficient even for random access because the code unit values - * for single-unit characters vs. lead units vs. trail units are completely disjoint. - * This means that it is easy to determine character (code point) boundaries from - * random offsets in the string. - * - * Unicode (UTF-16) string processing is optimized for the single-unit case. - * Although it is important to support supplementary characters - * (which use pairs of lead/trail code units called "surrogates"), - * their occurrence is rare. Almost all characters in modern use require only - * a single UChar code unit (i.e., their code point values are <=0xffff). - * - * For more details see the User Guide Strings chapter (http://icu-project.org/userguide/strings.html). - * For a discussion of the handling of unpaired surrogates see also - * Jitterbug 2145 and its icu mailing list proposal on 2002-sep-18. - */ - -/** - * \defgroup ustring_ustrlen String Length - * \ingroup ustring_strlen - */ -/*@{*/ -/** - * Determine the length of an array of UChar. - * - * @param s The array of UChars, NULL (U+0000) terminated. - * @return The number of UChars in <code>chars</code>, minus the terminator. - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -u_strlen(const UChar *s); -/*@}*/ - -/** - * Count Unicode code points in the length UChar code units of the string. - * A code point may occupy either one or two UChar code units. - * Counting code points involves reading all code units. - * - * This functions is basically the inverse of the U16_FWD_N() macro (see utf.h). - * - * @param s The input string. - * @param length The number of UChar code units to be checked, or -1 to count all - * code points before the first NUL (U+0000). - * @return The number of code points in the specified code units. - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -u_countChar32(const UChar *s, int32_t length); - -/** - * Check if the string contains more Unicode code points than a certain number. - * This is more efficient than counting all code points in the entire string - * and comparing that number with a threshold. - * This function may not need to scan the string at all if the length is known - * (not -1 for NUL-termination) and falls within a certain range, and - * never needs to count more than 'number+1' code points. - * Logically equivalent to (u_countChar32(s, length)>number). - * A Unicode code point may occupy either one or two UChar code units. - * - * @param s The input string. - * @param length The length of the string, or -1 if it is NUL-terminated. - * @param number The number of code points in the string is compared against - * the 'number' parameter. - * @return Boolean value for whether the string contains more Unicode code points - * than 'number'. Same as (u_countChar32(s, length)>number). - * @stable ICU 2.4 - */ -U_STABLE UBool U_EXPORT2 -u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number); - -/** - * Concatenate two ustrings. Appends a copy of <code>src</code>, - * including the null terminator, to <code>dst</code>. The initial copied - * character from <code>src</code> overwrites the null terminator in <code>dst</code>. - * - * @param dst The destination string. - * @param src The source string. - * @return A pointer to <code>dst</code>. - * @stable ICU 2.0 - */ -U_STABLE UChar* U_EXPORT2 -u_strcat(UChar *dst, - const UChar *src); - -/** - * Concatenate two ustrings. - * Appends at most <code>n</code> characters from <code>src</code> to <code>dst</code>. - * Adds a terminating NUL. - * If src is too long, then only <code>n-1</code> characters will be copied - * before the terminating NUL. - * If <code>n<=0</code> then dst is not modified. - * - * @param dst The destination string. - * @param src The source string (can be NULL/invalid if n<=0). - * @param n The maximum number of characters to append; no-op if <=0. - * @return A pointer to <code>dst</code>. - * @stable ICU 2.0 - */ -U_STABLE UChar* U_EXPORT2 -u_strncat(UChar *dst, - const UChar *src, - int32_t n); - -/** - * Find the first occurrence of a substring in a string. - * The substring is found at code point boundaries. - * That means that if the substring begins with - * a trail surrogate or ends with a lead surrogate, - * then it is found only if these surrogates stand alone in the text. - * Otherwise, the substring edge units would be matched against - * halves of surrogate pairs. - * - * @param s The string to search (NUL-terminated). - * @param substring The substring to find (NUL-terminated). - * @return A pointer to the first occurrence of <code>substring</code> in <code>s</code>, - * or <code>s</code> itself if the <code>substring</code> is empty, - * or <code>NULL</code> if <code>substring</code> is not in <code>s</code>. - * @stable ICU 2.0 - * - * @see u_strrstr - * @see u_strFindFirst - * @see u_strFindLast - */ -U_STABLE UChar * U_EXPORT2 -u_strstr(const UChar *s, const UChar *substring); - -/** - * Find the first occurrence of a substring in a string. - * The substring is found at code point boundaries. - * That means that if the substring begins with - * a trail surrogate or ends with a lead surrogate, - * then it is found only if these surrogates stand alone in the text. - * Otherwise, the substring edge units would be matched against - * halves of surrogate pairs. - * - * @param s The string to search. - * @param length The length of s (number of UChars), or -1 if it is NUL-terminated. - * @param substring The substring to find (NUL-terminated). - * @param subLength The length of substring (number of UChars), or -1 if it is NUL-terminated. - * @return A pointer to the first occurrence of <code>substring</code> in <code>s</code>, - * or <code>s</code> itself if the <code>substring</code> is empty, - * or <code>NULL</code> if <code>substring</code> is not in <code>s</code>. - * @stable ICU 2.4 - * - * @see u_strstr - * @see u_strFindLast - */ -U_STABLE UChar * U_EXPORT2 -u_strFindFirst(const UChar *s, int32_t length, const UChar *substring, int32_t subLength); - -/** - * Find the first occurrence of a BMP code point in a string. - * A surrogate code point is found only if its match in the text is not - * part of a surrogate pair. - * A NUL character is found at the string terminator. - * - * @param s The string to search (NUL-terminated). - * @param c The BMP code point to find. - * @return A pointer to the first occurrence of <code>c</code> in <code>s</code> - * or <code>NULL</code> if <code>c</code> is not in <code>s</code>. - * @stable ICU 2.0 - * - * @see u_strchr32 - * @see u_memchr - * @see u_strstr - * @see u_strFindFirst - */ -U_STABLE UChar * U_EXPORT2 -u_strchr(const UChar *s, UChar c); - -/** - * Find the first occurrence of a code point in a string. - * A surrogate code point is found only if its match in the text is not - * part of a surrogate pair. - * A NUL character is found at the string terminator. - * - * @param s The string to search (NUL-terminated). - * @param c The code point to find. - * @return A pointer to the first occurrence of <code>c</code> in <code>s</code> - * or <code>NULL</code> if <code>c</code> is not in <code>s</code>. - * @stable ICU 2.0 - * - * @see u_strchr - * @see u_memchr32 - * @see u_strstr - * @see u_strFindFirst - */ -U_STABLE UChar * U_EXPORT2 -u_strchr32(const UChar *s, UChar32 c); - -/** - * Find the last occurrence of a substring in a string. - * The substring is found at code point boundaries. - * That means that if the substring begins with - * a trail surrogate or ends with a lead surrogate, - * then it is found only if these surrogates stand alone in the text. - * Otherwise, the substring edge units would be matched against - * halves of surrogate pairs. - * - * @param s The string to search (NUL-terminated). - * @param substring The substring to find (NUL-terminated). - * @return A pointer to the last occurrence of <code>substring</code> in <code>s</code>, - * or <code>s</code> itself if the <code>substring</code> is empty, - * or <code>NULL</code> if <code>substring</code> is not in <code>s</code>. - * @stable ICU 2.4 - * - * @see u_strstr - * @see u_strFindFirst - * @see u_strFindLast - */ -U_STABLE UChar * U_EXPORT2 -u_strrstr(const UChar *s, const UChar *substring); - -/** - * Find the last occurrence of a substring in a string. - * The substring is found at code point boundaries. - * That means that if the substring begins with - * a trail surrogate or ends with a lead surrogate, - * then it is found only if these surrogates stand alone in the text. - * Otherwise, the substring edge units would be matched against - * halves of surrogate pairs. - * - * @param s The string to search. - * @param length The length of s (number of UChars), or -1 if it is NUL-terminated. - * @param substring The substring to find (NUL-terminated). - * @param subLength The length of substring (number of UChars), or -1 if it is NUL-terminated. - * @return A pointer to the last occurrence of <code>substring</code> in <code>s</code>, - * or <code>s</code> itself if the <code>substring</code> is empty, - * or <code>NULL</code> if <code>substring</code> is not in <code>s</code>. - * @stable ICU 2.4 - * - * @see u_strstr - * @see u_strFindLast - */ -U_STABLE UChar * U_EXPORT2 -u_strFindLast(const UChar *s, int32_t length, const UChar *substring, int32_t subLength); - -/** - * Find the last occurrence of a BMP code point in a string. - * A surrogate code point is found only if its match in the text is not - * part of a surrogate pair. - * A NUL character is found at the string terminator. - * - * @param s The string to search (NUL-terminated). - * @param c The BMP code point to find. - * @return A pointer to the last occurrence of <code>c</code> in <code>s</code> - * or <code>NULL</code> if <code>c</code> is not in <code>s</code>. - * @stable ICU 2.4 - * - * @see u_strrchr32 - * @see u_memrchr - * @see u_strrstr - * @see u_strFindLast - */ -U_STABLE UChar * U_EXPORT2 -u_strrchr(const UChar *s, UChar c); - -/** - * Find the last occurrence of a code point in a string. - * A surrogate code point is found only if its match in the text is not - * part of a surrogate pair. - * A NUL character is found at the string terminator. - * - * @param s The string to search (NUL-terminated). - * @param c The code point to find. - * @return A pointer to the last occurrence of <code>c</code> in <code>s</code> - * or <code>NULL</code> if <code>c</code> is not in <code>s</code>. - * @stable ICU 2.4 - * - * @see u_strrchr - * @see u_memchr32 - * @see u_strrstr - * @see u_strFindLast - */ -U_STABLE UChar * U_EXPORT2 -u_strrchr32(const UChar *s, UChar32 c); - -/** - * Locates the first occurrence in the string <code>string</code> of any of the characters - * in the string <code>matchSet</code>. - * Works just like C's strpbrk but with Unicode. - * - * @param string The string in which to search, NUL-terminated. - * @param matchSet A NUL-terminated string defining a set of code points - * for which to search in the text string. - * @return A pointer to the character in <code>string</code> that matches one of the - * characters in <code>matchSet</code>, or NULL if no such character is found. - * @stable ICU 2.0 - */ -U_STABLE UChar * U_EXPORT2 -u_strpbrk(const UChar *string, const UChar *matchSet); - -/** - * Returns the number of consecutive characters in <code>string</code>, - * beginning with the first, that do not occur somewhere in <code>matchSet</code>. - * Works just like C's strcspn but with Unicode. - * - * @param string The string in which to search, NUL-terminated. - * @param matchSet A NUL-terminated string defining a set of code points - * for which to search in the text string. - * @return The number of initial characters in <code>string</code> that do not - * occur in <code>matchSet</code>. - * @see u_strspn - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -u_strcspn(const UChar *string, const UChar *matchSet); - -/** - * Returns the number of consecutive characters in <code>string</code>, - * beginning with the first, that occur somewhere in <code>matchSet</code>. - * Works just like C's strspn but with Unicode. - * - * @param string The string in which to search, NUL-terminated. - * @param matchSet A NUL-terminated string defining a set of code points - * for which to search in the text string. - * @return The number of initial characters in <code>string</code> that do - * occur in <code>matchSet</code>. - * @see u_strcspn - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -u_strspn(const UChar *string, const UChar *matchSet); - -/** - * The string tokenizer API allows an application to break a string into - * tokens. Unlike strtok(), the saveState (the current pointer within the - * original string) is maintained in saveState. In the first call, the - * argument src is a pointer to the string. In subsequent calls to - * return successive tokens of that string, src must be specified as - * NULL. The value saveState is set by this function to maintain the - * function's position within the string, and on each subsequent call - * you must give this argument the same variable. This function does - * handle surrogate pairs. This function is similar to the strtok_r() - * the POSIX Threads Extension (1003.1c-1995) version. - * - * @param src String containing token(s). This string will be modified. - * After the first call to u_strtok_r(), this argument must - * be NULL to get to the next token. - * @param delim Set of delimiter characters (Unicode code points). - * @param saveState The current pointer within the original string, - * which is set by this function. The saveState - * parameter should the address of a local variable of type - * UChar *. (i.e. defined "Uhar *myLocalSaveState" and use - * &myLocalSaveState for this parameter). - * @return A pointer to the next token found in src, or NULL - * when there are no more tokens. - * @stable ICU 2.0 - */ -U_STABLE UChar * U_EXPORT2 -u_strtok_r(UChar *src, - const UChar *delim, - UChar **saveState); - -/** - * Compare two Unicode strings for bitwise equality (code unit order). - * - * @param s1 A string to compare. - * @param s2 A string to compare. - * @return 0 if <code>s1</code> and <code>s2</code> are bitwise equal; a negative - * value if <code>s1</code> is bitwise less than <code>s2,</code>; a positive - * value if <code>s1</code> is bitwise greater than <code>s2</code>. - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -u_strcmp(const UChar *s1, - const UChar *s2); - -/** - * Compare two Unicode strings in code point order. - * See u_strCompare for details. - * - * @param s1 A string to compare. - * @param s2 A string to compare. - * @return a negative/zero/positive integer corresponding to whether - * the first string is less than/equal to/greater than the second one - * in code point order - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -u_strcmpCodePointOrder(const UChar *s1, const UChar *s2); - -/** - * Compare two Unicode strings (binary order). - * - * The comparison can be done in code unit order or in code point order. - * They differ only in UTF-16 when - * comparing supplementary code points (U+10000..U+10ffff) - * to BMP code points near the end of the BMP (i.e., U+e000..U+ffff). - * In code unit order, high BMP code points sort after supplementary code points - * because they are stored as pairs of surrogates which are at U+d800..U+dfff. - * - * This functions works with strings of different explicitly specified lengths - * unlike the ANSI C-like u_strcmp() and u_memcmp() etc. - * NUL-terminated strings are possible with length arguments of -1. - * - * @param s1 First source string. - * @param length1 Length of first source string, or -1 if NUL-terminated. - * - * @param s2 Second source string. - * @param length2 Length of second source string, or -1 if NUL-terminated. - * - * @param codePointOrder Choose between code unit order (FALSE) - * and code point order (TRUE). - * - * @return <0 or 0 or >0 as usual for string comparisons - * - * @stable ICU 2.2 - */ -U_STABLE int32_t U_EXPORT2 -u_strCompare(const UChar *s1, int32_t length1, - const UChar *s2, int32_t length2, - UBool codePointOrder); - -/** - * Compare two Unicode strings (binary order) - * as presented by UCharIterator objects. - * Works otherwise just like u_strCompare(). - * - * Both iterators are reset to their start positions. - * When the function returns, it is undefined where the iterators - * have stopped. - * - * @param iter1 First source string iterator. - * @param iter2 Second source string iterator. - * @param codePointOrder Choose between code unit order (FALSE) - * and code point order (TRUE). - * - * @return <0 or 0 or >0 as usual for string comparisons - * - * @see u_strCompare - * - * @stable ICU 2.6 - */ -U_STABLE int32_t U_EXPORT2 -u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder); - -#ifndef U_COMPARE_CODE_POINT_ORDER -/* see also unistr.h and unorm.h */ -/** - * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc: - * Compare strings in code point order instead of code unit order. - * @stable ICU 2.2 - */ -#define U_COMPARE_CODE_POINT_ORDER 0x8000 -#endif - -/** - * Compare two strings case-insensitively using full case folding. - * This is equivalent to - * u_strCompare(u_strFoldCase(s1, options), - * u_strFoldCase(s2, options), - * (options&U_COMPARE_CODE_POINT_ORDER)!=0). - * - * The comparison can be done in UTF-16 code unit order or in code point order. - * They differ only when comparing supplementary code points (U+10000..U+10ffff) - * to BMP code points near the end of the BMP (i.e., U+e000..U+ffff). - * In code unit order, high BMP code points sort after supplementary code points - * because they are stored as pairs of surrogates which are at U+d800..U+dfff. - * - * This functions works with strings of different explicitly specified lengths - * unlike the ANSI C-like u_strcmp() and u_memcmp() etc. - * NUL-terminated strings are possible with length arguments of -1. - * - * @param s1 First source string. - * @param length1 Length of first source string, or -1 if NUL-terminated. - * - * @param s2 Second source string. - * @param length2 Length of second source string, or -1 if NUL-terminated. - * - * @param options A bit set of options: - * - U_FOLD_CASE_DEFAULT or 0 is used for default options: - * Comparison in code unit order with default case folding. - * - * - U_COMPARE_CODE_POINT_ORDER - * Set to choose code point order instead of code unit order - * (see u_strCompare for details). - * - * - U_FOLD_CASE_EXCLUDE_SPECIAL_I - * - * @param pErrorCode Must be a valid pointer to an error code value, - * which must not indicate a failure before the function call. - * - * @return <0 or 0 or >0 as usual for string comparisons - * - * @stable ICU 2.2 - */ -U_STABLE int32_t U_EXPORT2 -u_strCaseCompare(const UChar *s1, int32_t length1, - const UChar *s2, int32_t length2, - uint32_t options, - UErrorCode *pErrorCode); - -/** - * Compare two ustrings for bitwise equality. - * Compares at most <code>n</code> characters. - * - * @param ucs1 A string to compare (can be NULL/invalid if n<=0). - * @param ucs2 A string to compare (can be NULL/invalid if n<=0). - * @param n The maximum number of characters to compare; always returns 0 if n<=0. - * @return 0 if <code>s1</code> and <code>s2</code> are bitwise equal; a negative - * value if <code>s1</code> is bitwise less than <code>s2</code>; a positive - * value if <code>s1</code> is bitwise greater than <code>s2</code>. - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -u_strncmp(const UChar *ucs1, - const UChar *ucs2, - int32_t n); - -/** - * Compare two Unicode strings in code point order. - * This is different in UTF-16 from u_strncmp() if supplementary characters are present. - * For details, see u_strCompare(). - * - * @param s1 A string to compare. - * @param s2 A string to compare. - * @param n The maximum number of characters to compare. - * @return a negative/zero/positive integer corresponding to whether - * the first string is less than/equal to/greater than the second one - * in code point order - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -u_strncmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t n); - -/** - * Compare two strings case-insensitively using full case folding. - * This is equivalent to u_strcmp(u_strFoldCase(s1, options), u_strFoldCase(s2, options)). - * - * @param s1 A string to compare. - * @param s2 A string to compare. - * @param options A bit set of options: - * - U_FOLD_CASE_DEFAULT or 0 is used for default options: - * Comparison in code unit order with default case folding. - * - * - U_COMPARE_CODE_POINT_ORDER - * Set to choose code point order instead of code unit order - * (see u_strCompare for details). - * - * - U_FOLD_CASE_EXCLUDE_SPECIAL_I - * - * @return A negative, zero, or positive integer indicating the comparison result. - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options); - -/** - * Compare two strings case-insensitively using full case folding. - * This is equivalent to u_strcmp(u_strFoldCase(s1, at most n, options), - * u_strFoldCase(s2, at most n, options)). - * - * @param s1 A string to compare. - * @param s2 A string to compare. - * @param n The maximum number of characters each string to case-fold and then compare. - * @param options A bit set of options: - * - U_FOLD_CASE_DEFAULT or 0 is used for default options: - * Comparison in code unit order with default case folding. - * - * - U_COMPARE_CODE_POINT_ORDER - * Set to choose code point order instead of code unit order - * (see u_strCompare for details). - * - * - U_FOLD_CASE_EXCLUDE_SPECIAL_I - * - * @return A negative, zero, or positive integer indicating the comparison result. - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options); - -/** - * Compare two strings case-insensitively using full case folding. - * This is equivalent to u_strcmp(u_strFoldCase(s1, n, options), - * u_strFoldCase(s2, n, options)). - * - * @param s1 A string to compare. - * @param s2 A string to compare. - * @param length The number of characters in each string to case-fold and then compare. - * @param options A bit set of options: - * - U_FOLD_CASE_DEFAULT or 0 is used for default options: - * Comparison in code unit order with default case folding. - * - * - U_COMPARE_CODE_POINT_ORDER - * Set to choose code point order instead of code unit order - * (see u_strCompare for details). - * - * - U_FOLD_CASE_EXCLUDE_SPECIAL_I - * - * @return A negative, zero, or positive integer indicating the comparison result. - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options); - -/** - * Copy a ustring. Adds a null terminator. - * - * @param dst The destination string. - * @param src The source string. - * @return A pointer to <code>dst</code>. - * @stable ICU 2.0 - */ -U_STABLE UChar* U_EXPORT2 -u_strcpy(UChar *dst, - const UChar *src); - -/** - * Copy a ustring. - * Copies at most <code>n</code> characters. The result will be null terminated - * if the length of <code>src</code> is less than <code>n</code>. - * - * @param dst The destination string. - * @param src The source string (can be NULL/invalid if n<=0). - * @param n The maximum number of characters to copy; no-op if <=0. - * @return A pointer to <code>dst</code>. - * @stable ICU 2.0 - */ -U_STABLE UChar* U_EXPORT2 -u_strncpy(UChar *dst, - const UChar *src, - int32_t n); - -#if !UCONFIG_NO_CONVERSION - -/** - * Copy a byte string encoded in the default codepage to a ustring. - * Adds a null terminator. - * Performs a host byte to UChar conversion - * - * @param dst The destination string. - * @param src The source string. - * @return A pointer to <code>dst</code>. - * @stable ICU 2.0 - */ -U_STABLE UChar* U_EXPORT2 u_uastrcpy(UChar *dst, - const char *src ); - -/** - * Copy a byte string encoded in the default codepage to a ustring. - * Copies at most <code>n</code> characters. The result will be null terminated - * if the length of <code>src</code> is less than <code>n</code>. - * Performs a host byte to UChar conversion - * - * @param dst The destination string. - * @param src The source string. - * @param n The maximum number of characters to copy. - * @return A pointer to <code>dst</code>. - * @stable ICU 2.0 - */ -U_STABLE UChar* U_EXPORT2 u_uastrncpy(UChar *dst, - const char *src, - int32_t n); - -/** - * Copy ustring to a byte string encoded in the default codepage. - * Adds a null terminator. - * Performs a UChar to host byte conversion - * - * @param dst The destination string. - * @param src The source string. - * @return A pointer to <code>dst</code>. - * @stable ICU 2.0 - */ -U_STABLE char* U_EXPORT2 u_austrcpy(char *dst, - const UChar *src ); - -/** - * Copy ustring to a byte string encoded in the default codepage. - * Copies at most <code>n</code> characters. The result will be null terminated - * if the length of <code>src</code> is less than <code>n</code>. - * Performs a UChar to host byte conversion - * - * @param dst The destination string. - * @param src The source string. - * @param n The maximum number of characters to copy. - * @return A pointer to <code>dst</code>. - * @stable ICU 2.0 - */ -U_STABLE char* U_EXPORT2 u_austrncpy(char *dst, - const UChar *src, - int32_t n ); - -#endif - -/** - * Synonym for memcpy(), but with UChars only. - * @param dest The destination string - * @param src The source string (can be NULL/invalid if count<=0) - * @param count The number of characters to copy; no-op if <=0 - * @return A pointer to <code>dest</code> - * @stable ICU 2.0 - */ -U_STABLE UChar* U_EXPORT2 -u_memcpy(UChar *dest, const UChar *src, int32_t count); - -/** - * Synonym for memmove(), but with UChars only. - * @param dest The destination string - * @param src The source string (can be NULL/invalid if count<=0) - * @param count The number of characters to move; no-op if <=0 - * @return A pointer to <code>dest</code> - * @stable ICU 2.0 - */ -U_STABLE UChar* U_EXPORT2 -u_memmove(UChar *dest, const UChar *src, int32_t count); - -/** - * Initialize <code>count</code> characters of <code>dest</code> to <code>c</code>. - * - * @param dest The destination string. - * @param c The character to initialize the string. - * @param count The maximum number of characters to set. - * @return A pointer to <code>dest</code>. - * @stable ICU 2.0 - */ -U_STABLE UChar* U_EXPORT2 -u_memset(UChar *dest, UChar c, int32_t count); - -/** - * Compare the first <code>count</code> UChars of each buffer. - * - * @param buf1 The first string to compare. - * @param buf2 The second string to compare. - * @param count The maximum number of UChars to compare. - * @return When buf1 < buf2, a negative number is returned. - * When buf1 == buf2, 0 is returned. - * When buf1 > buf2, a positive number is returned. - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -u_memcmp(const UChar *buf1, const UChar *buf2, int32_t count); - -/** - * Compare two Unicode strings in code point order. - * This is different in UTF-16 from u_memcmp() if supplementary characters are present. - * For details, see u_strCompare(). - * - * @param s1 A string to compare. - * @param s2 A string to compare. - * @param count The maximum number of characters to compare. - * @return a negative/zero/positive integer corresponding to whether - * the first string is less than/equal to/greater than the second one - * in code point order - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -u_memcmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t count); - -/** - * Find the first occurrence of a BMP code point in a string. - * A surrogate code point is found only if its match in the text is not - * part of a surrogate pair. - * A NUL character is found at the string terminator. - * - * @param s The string to search (contains <code>count</code> UChars). - * @param c The BMP code point to find. - * @param count The length of the string. - * @return A pointer to the first occurrence of <code>c</code> in <code>s</code> - * or <code>NULL</code> if <code>c</code> is not in <code>s</code>. - * @stable ICU 2.0 - * - * @see u_strchr - * @see u_memchr32 - * @see u_strFindFirst - */ -U_STABLE UChar* U_EXPORT2 -u_memchr(const UChar *s, UChar c, int32_t count); - -/** - * Find the first occurrence of a code point in a string. - * A surrogate code point is found only if its match in the text is not - * part of a surrogate pair. - * A NUL character is found at the string terminator. - * - * @param s The string to search (contains <code>count</code> UChars). - * @param c The code point to find. - * @param count The length of the string. - * @return A pointer to the first occurrence of <code>c</code> in <code>s</code> - * or <code>NULL</code> if <code>c</code> is not in <code>s</code>. - * @stable ICU 2.0 - * - * @see u_strchr32 - * @see u_memchr - * @see u_strFindFirst - */ -U_STABLE UChar* U_EXPORT2 -u_memchr32(const UChar *s, UChar32 c, int32_t count); - -/** - * Find the last occurrence of a BMP code point in a string. - * A surrogate code point is found only if its match in the text is not - * part of a surrogate pair. - * A NUL character is found at the string terminator. - * - * @param s The string to search (contains <code>count</code> UChars). - * @param c The BMP code point to find. - * @param count The length of the string. - * @return A pointer to the last occurrence of <code>c</code> in <code>s</code> - * or <code>NULL</code> if <code>c</code> is not in <code>s</code>. - * @stable ICU 2.4 - * - * @see u_strrchr - * @see u_memrchr32 - * @see u_strFindLast - */ -U_STABLE UChar* U_EXPORT2 -u_memrchr(const UChar *s, UChar c, int32_t count); - -/** - * Find the last occurrence of a code point in a string. - * A surrogate code point is found only if its match in the text is not - * part of a surrogate pair. - * A NUL character is found at the string terminator. - * - * @param s The string to search (contains <code>count</code> UChars). - * @param c The code point to find. - * @param count The length of the string. - * @return A pointer to the last occurrence of <code>c</code> in <code>s</code> - * or <code>NULL</code> if <code>c</code> is not in <code>s</code>. - * @stable ICU 2.4 - * - * @see u_strrchr32 - * @see u_memrchr - * @see u_strFindLast - */ -U_STABLE UChar* U_EXPORT2 -u_memrchr32(const UChar *s, UChar32 c, int32_t count); - -/** - * Unicode String literals in C. - * We need one macro to declare a variable for the string - * and to statically preinitialize it if possible, - * and a second macro to dynamically intialize such a string variable if necessary. - * - * The macros are defined for maximum performance. - * They work only for strings that contain "invariant characters", i.e., - * only latin letters, digits, and some punctuation. - * See utypes.h for details. - * - * A pair of macros for a single string must be used with the same - * parameters. - * The string parameter must be a C string literal. - * The length of the string, not including the terminating - * <code>NUL</code>, must be specified as a constant. - * The U_STRING_DECL macro should be invoked exactly once for one - * such string variable before it is used. - * - * Usage: - * <pre> - * U_STRING_DECL(ustringVar1, "Quick-Fox 2", 11); - * U_STRING_DECL(ustringVar2, "jumps 5%", 8); - * static UBool didInit=FALSE; - * - * int32_t function() { - * if(!didInit) { - * U_STRING_INIT(ustringVar1, "Quick-Fox 2", 11); - * U_STRING_INIT(ustringVar2, "jumps 5%", 8); - * didInit=TRUE; - * } - * return u_strcmp(ustringVar1, ustringVar2); - * } - * </pre> - * - * Note that the macros will NOT consistently work if their argument is another <code>#define</code>. - * The following will not work on all platforms, don't use it. - * - * <pre> - * #define GLUCK "Mr. Gluck" - * U_STRING_DECL(var, GLUCK, 9) - * U_STRING_INIT(var, GLUCK, 9) - * </pre> - * - * Instead, use the string literal "Mr. Gluck" as the argument to both macro - * calls. - * - * - * @stable ICU 2.0 - */ -#if defined(U_DECLARE_UTF16) -# define U_STRING_DECL(var, cs, length) static const UChar *var=(const UChar *)U_DECLARE_UTF16(cs) - /**@stable ICU 2.0 */ -# define U_STRING_INIT(var, cs, length) -#elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16))) -# define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]=L ## cs - /**@stable ICU 2.0 */ -# define U_STRING_INIT(var, cs, length) -#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY -# define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]=cs - /**@stable ICU 2.0 */ -# define U_STRING_INIT(var, cs, length) -#else -# define U_STRING_DECL(var, cs, length) static UChar var[(length)+1] - /**@stable ICU 2.0 */ -# define U_STRING_INIT(var, cs, length) u_charsToUChars(cs, var, length+1) -#endif - -/** - * Unescape a string of characters and write the resulting - * Unicode characters to the destination buffer. The following escape - * sequences are recognized: - * - * \\uhhhh 4 hex digits; h in [0-9A-Fa-f] - * \\Uhhhhhhhh 8 hex digits - * \\xhh 1-2 hex digits - * \\x{h...} 1-8 hex digits - * \\ooo 1-3 octal digits; o in [0-7] - * \\cX control-X; X is masked with 0x1F - * - * as well as the standard ANSI C escapes: - * - * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A, - * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B, - * \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C - * - * Anything else following a backslash is generically escaped. For - * example, "[a\\-z]" returns "[a-z]". - * - * If an escape sequence is ill-formed, this method returns an empty - * string. An example of an ill-formed sequence is "\\u" followed by - * fewer than 4 hex digits. - * - * The above characters are recognized in the compiler's codepage, - * that is, they are coded as 'u', '\\', etc. Characters that are - * not parts of escape sequences are converted using u_charsToUChars(). - * - * This function is similar to UnicodeString::unescape() but not - * identical to it. The latter takes a source UnicodeString, so it - * does escape recognition but no conversion. - * - * @param src a zero-terminated string of invariant characters - * @param dest pointer to buffer to receive converted and unescaped - * text and, if there is room, a zero terminator. May be NULL for - * preflighting, in which case no UChars will be written, but the - * return value will still be valid. On error, an empty string is - * stored here (if possible). - * @param destCapacity the number of UChars that may be written at - * dest. Ignored if dest == NULL. - * @return the length of unescaped string. - * @see u_unescapeAt - * @see UnicodeString#unescape() - * @see UnicodeString#unescapeAt() - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -u_unescape(const char *src, - UChar *dest, int32_t destCapacity); - -U_CDECL_BEGIN -/** - * Callback function for u_unescapeAt() that returns a character of - * the source text given an offset and a context pointer. The context - * pointer will be whatever is passed into u_unescapeAt(). - * - * @param offset pointer to the offset that will be passed to u_unescapeAt(). - * @param context an opaque pointer passed directly into u_unescapeAt() - * @return the character represented by the escape sequence at - * offset - * @see u_unescapeAt - * @stable ICU 2.0 - */ -typedef UChar (U_CALLCONV *UNESCAPE_CHAR_AT)(int32_t offset, void *context); -U_CDECL_END - -/** - * Unescape a single sequence. The character at offset-1 is assumed - * (without checking) to be a backslash. This method takes a callback - * pointer to a function that returns the UChar at a given offset. By - * varying this callback, ICU functions are able to unescape char* - * strings, UnicodeString objects, and UFILE pointers. - * - * If offset is out of range, or if the escape sequence is ill-formed, - * (UChar32)0xFFFFFFFF is returned. See documentation of u_unescape() - * for a list of recognized sequences. - * - * @param charAt callback function that returns a UChar of the source - * text given an offset and a context pointer. - * @param offset pointer to the offset that will be passed to charAt. - * The offset value will be updated upon return to point after the - * last parsed character of the escape sequence. On error the offset - * is unchanged. - * @param length the number of characters in the source text. The - * last character of the source text is considered to be at offset - * length-1. - * @param context an opaque pointer passed directly into charAt. - * @return the character represented by the escape sequence at - * offset, or (UChar32)0xFFFFFFFF on error. - * @see u_unescape() - * @see UnicodeString#unescape() - * @see UnicodeString#unescapeAt() - * @stable ICU 2.0 - */ -U_STABLE UChar32 U_EXPORT2 -u_unescapeAt(UNESCAPE_CHAR_AT charAt, - int32_t *offset, - int32_t length, - void *context); - -/** - * Uppercase the characters in a string. - * Casing is locale-dependent and context-sensitive. - * The result may be longer or shorter than the original. - * The source string and the destination buffer are allowed to overlap. - * - * @param dest A buffer for the result string. The result will be zero-terminated if - * the buffer is large enough. - * @param destCapacity The size of the buffer (number of UChars). If it is 0, then - * dest may be NULL and the function will only return the length of the result - * without writing any of the result string. - * @param src The original string - * @param srcLength The length of the original string. If -1, then src must be zero-terminated. - * @param locale The locale to consider, or "" for the root locale or NULL for the default locale. - * @param pErrorCode Must be a valid pointer to an error code value, - * which must not indicate a failure before the function call. - * @return The length of the result string. It may be greater than destCapacity. In that case, - * only some of the result was written to the destination buffer. - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -u_strToUpper(UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - const char *locale, - UErrorCode *pErrorCode); - -/** - * Lowercase the characters in a string. - * Casing is locale-dependent and context-sensitive. - * The result may be longer or shorter than the original. - * The source string and the destination buffer are allowed to overlap. - * - * @param dest A buffer for the result string. The result will be zero-terminated if - * the buffer is large enough. - * @param destCapacity The size of the buffer (number of UChars). If it is 0, then - * dest may be NULL and the function will only return the length of the result - * without writing any of the result string. - * @param src The original string - * @param srcLength The length of the original string. If -1, then src must be zero-terminated. - * @param locale The locale to consider, or "" for the root locale or NULL for the default locale. - * @param pErrorCode Must be a valid pointer to an error code value, - * which must not indicate a failure before the function call. - * @return The length of the result string. It may be greater than destCapacity. In that case, - * only some of the result was written to the destination buffer. - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -u_strToLower(UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - const char *locale, - UErrorCode *pErrorCode); - -#if !UCONFIG_NO_BREAK_ITERATION - -/** - * Titlecase a string. - * Casing is locale-dependent and context-sensitive. - * Titlecasing uses a break iterator to find the first characters of words - * that are to be titlecased. It titlecases those characters and lowercases - * all others. - * - * The titlecase break iterator can be provided to customize for arbitrary - * styles, using rules and dictionaries beyond the standard iterators. - * It may be more efficient to always provide an iterator to avoid - * opening and closing one for each string. - * The standard titlecase iterator for the root locale implements the - * algorithm of Unicode TR 21. - * - * This function uses only the setText(), first() and next() methods of the - * provided break iterator. - * - * The result may be longer or shorter than the original. - * The source string and the destination buffer are allowed to overlap. - * - * @param dest A buffer for the result string. The result will be zero-terminated if - * the buffer is large enough. - * @param destCapacity The size of the buffer (number of UChars). If it is 0, then - * dest may be NULL and the function will only return the length of the result - * without writing any of the result string. - * @param src The original string - * @param srcLength The length of the original string. If -1, then src must be zero-terminated. - * @param titleIter A break iterator to find the first characters of words - * that are to be titlecased. - * If none is provided (NULL), then a standard titlecase - * break iterator is opened. - * @param locale The locale to consider, or "" for the root locale or NULL for the default locale. - * @param pErrorCode Must be a valid pointer to an error code value, - * which must not indicate a failure before the function call. - * @return The length of the result string. It may be greater than destCapacity. In that case, - * only some of the result was written to the destination buffer. - * @stable ICU 2.1 - */ -U_STABLE int32_t U_EXPORT2 -u_strToTitle(UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - UBreakIterator *titleIter, - const char *locale, - UErrorCode *pErrorCode); - -#endif - -/** - * Case-folds the characters in a string. - * - * Case-folding is locale-independent and not context-sensitive, - * but there is an option for whether to include or exclude mappings for dotted I - * and dotless i that are marked with 'T' in CaseFolding.txt. - * - * The result may be longer or shorter than the original. - * The source string and the destination buffer are allowed to overlap. - * - * @param dest A buffer for the result string. The result will be zero-terminated if - * the buffer is large enough. - * @param destCapacity The size of the buffer (number of UChars). If it is 0, then - * dest may be NULL and the function will only return the length of the result - * without writing any of the result string. - * @param src The original string - * @param srcLength The length of the original string. If -1, then src must be zero-terminated. - * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I - * @param pErrorCode Must be a valid pointer to an error code value, - * which must not indicate a failure before the function call. - * @return The length of the result string. It may be greater than destCapacity. In that case, - * only some of the result was written to the destination buffer. - * @stable ICU 2.0 - */ -U_STABLE int32_t U_EXPORT2 -u_strFoldCase(UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - uint32_t options, - UErrorCode *pErrorCode); - -#if defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION -/** - * Convert a UTF-16 string to a wchar_t string. - * If it is known at compile time that wchar_t strings are in UTF-16 or UTF-32, then - * this function simply calls the fast, dedicated function for that. - * Otherwise, two conversions UTF-16 -> default charset -> wchar_t* are performed. - * - * @param dest A buffer for the result string. The result will be zero-terminated if - * the buffer is large enough. - * @param destCapacity The size of the buffer (number of wchar_t's). If it is 0, then - * dest may be NULL and the function will only return the length of the - * result without writing any of the result string (pre-flighting). - * @param pDestLength A pointer to receive the number of units written to the destination. If - * pDestLength!=NULL then *pDestLength is always set to the - * number of output units corresponding to the transformation of - * all the input units, even in case of a buffer overflow. - * @param src The original source string - * @param srcLength The length of the original string. If -1, then src must be zero-terminated. - * @param pErrorCode Must be a valid pointer to an error code value, - * which must not indicate a failure before the function call. - * @return The pointer to destination buffer. - * @stable ICU 2.0 - */ -U_STABLE wchar_t* U_EXPORT2 -u_strToWCS(wchar_t *dest, - int32_t destCapacity, - int32_t *pDestLength, - const UChar *src, - int32_t srcLength, - UErrorCode *pErrorCode); -/** - * Convert a wchar_t string to UTF-16. - * If it is known at compile time that wchar_t strings are in UTF-16 or UTF-32, then - * this function simply calls the fast, dedicated function for that. - * Otherwise, two conversions wchar_t* -> default charset -> UTF-16 are performed. - * - * @param dest A buffer for the result string. The result will be zero-terminated if - * the buffer is large enough. - * @param destCapacity The size of the buffer (number of UChars). If it is 0, then - * dest may be NULL and the function will only return the length of the - * result without writing any of the result string (pre-flighting). - * @param pDestLength A pointer to receive the number of units written to the destination. If - * pDestLength!=NULL then *pDestLength is always set to the - * number of output units corresponding to the transformation of - * all the input units, even in case of a buffer overflow. - * @param src The original source string - * @param srcLength The length of the original string. If -1, then src must be zero-terminated. - * @param pErrorCode Must be a valid pointer to an error code value, - * which must not indicate a failure before the function call. - * @return The pointer to destination buffer. - * @stable ICU 2.0 - */ -U_STABLE UChar* U_EXPORT2 -u_strFromWCS(UChar *dest, - int32_t destCapacity, - int32_t *pDestLength, - const wchar_t *src, - int32_t srcLength, - UErrorCode *pErrorCode); -#endif /* defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION */ - -/** - * Convert a UTF-16 string to UTF-8. - * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set. - * - * @param dest A buffer for the result string. The result will be zero-terminated if - * the buffer is large enough. - * @param destCapacity The size of the buffer (number of chars). If it is 0, then - * dest may be NULL and the function will only return the length of the - * result without writing any of the result string (pre-flighting). - * @param pDestLength A pointer to receive the number of units written to the destination. If - * pDestLength!=NULL then *pDestLength is always set to the - * number of output units corresponding to the transformation of - * all the input units, even in case of a buffer overflow. - * @param src The original source string - * @param srcLength The length of the original string. If -1, then src must be zero-terminated. - * @param pErrorCode Must be a valid pointer to an error code value, - * which must not indicate a failure before the function call. - * @return The pointer to destination buffer. - * @stable ICU 2.0 - * @see u_strToUTF8WithSub - * @see u_strFromUTF8 - */ -U_STABLE char* U_EXPORT2 -u_strToUTF8(char *dest, - int32_t destCapacity, - int32_t *pDestLength, - const UChar *src, - int32_t srcLength, - UErrorCode *pErrorCode); - -/** - * Convert a UTF-8 string to UTF-16. - * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set. - * - * @param dest A buffer for the result string. The result will be zero-terminated if - * the buffer is large enough. - * @param destCapacity The size of the buffer (number of UChars). If it is 0, then - * dest may be NULL and the function will only return the length of the - * result without writing any of the result string (pre-flighting). - * @param pDestLength A pointer to receive the number of units written to the destination. If - * pDestLength!=NULL then *pDestLength is always set to the - * number of output units corresponding to the transformation of - * all the input units, even in case of a buffer overflow. - * @param src The original source string - * @param srcLength The length of the original string. If -1, then src must be zero-terminated. - * @param pErrorCode Must be a valid pointer to an error code value, - * which must not indicate a failure before the function call. - * @return The pointer to destination buffer. - * @stable ICU 2.0 - * @see u_strFromUTF8WithSub - * @see u_strFromUTF8Lenient - */ -U_STABLE UChar* U_EXPORT2 -u_strFromUTF8(UChar *dest, - int32_t destCapacity, - int32_t *pDestLength, - const char *src, - int32_t srcLength, - UErrorCode *pErrorCode); - -/** - * Convert a UTF-16 string to UTF-8. - * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set. - * - * Same as u_strToUTF8() except for the additional subchar which is output for - * illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code. - * With subchar==U_SENTINEL, this function behaves exactly like u_strToUTF8(). - * - * @param dest A buffer for the result string. The result will be zero-terminated if - * the buffer is large enough. - * @param destCapacity The size of the buffer (number of chars). If it is 0, then - * dest may be NULL and the function will only return the length of the - * result without writing any of the result string (pre-flighting). - * @param pDestLength A pointer to receive the number of units written to the destination. If - * pDestLength!=NULL then *pDestLength is always set to the - * number of output units corresponding to the transformation of - * all the input units, even in case of a buffer overflow. - * @param src The original source string - * @param srcLength The length of the original string. If -1, then src must be zero-terminated. - * @param subchar The substitution character to use in place of an illegal input sequence, - * or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead. - * A substitution character can be any valid Unicode code point (up to U+10FFFF) - * except for surrogate code points (U+D800..U+DFFF). - * The recommended value is U+FFFD "REPLACEMENT CHARACTER". - * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0. - * Set to 0 if no substitutions occur or subchar<0. - * pNumSubstitutions can be NULL. - * @param pErrorCode Pointer to a standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return The pointer to destination buffer. - * @see u_strToUTF8 - * @see u_strFromUTF8WithSub - * @stable ICU 3.6 - */ -U_STABLE char* U_EXPORT2 -u_strToUTF8WithSub(char *dest, - int32_t destCapacity, - int32_t *pDestLength, - const UChar *src, - int32_t srcLength, - UChar32 subchar, int32_t *pNumSubstitutions, - UErrorCode *pErrorCode); - -/** - * Convert a UTF-8 string to UTF-16. - * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set. - * - * Same as u_strFromUTF8() except for the additional subchar which is output for - * illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code. - * With subchar==U_SENTINEL, this function behaves exactly like u_strFromUTF8(). - * - * @param dest A buffer for the result string. The result will be zero-terminated if - * the buffer is large enough. - * @param destCapacity The size of the buffer (number of UChars). If it is 0, then - * dest may be NULL and the function will only return the length of the - * result without writing any of the result string (pre-flighting). - * @param pDestLength A pointer to receive the number of units written to the destination. If - * pDestLength!=NULL then *pDestLength is always set to the - * number of output units corresponding to the transformation of - * all the input units, even in case of a buffer overflow. - * @param src The original source string - * @param srcLength The length of the original string. If -1, then src must be zero-terminated. - * @param subchar The substitution character to use in place of an illegal input sequence, - * or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead. - * A substitution character can be any valid Unicode code point (up to U+10FFFF) - * except for surrogate code points (U+D800..U+DFFF). - * The recommended value is U+FFFD "REPLACEMENT CHARACTER". - * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0. - * Set to 0 if no substitutions occur or subchar<0. - * pNumSubstitutions can be NULL. - * @param pErrorCode Pointer to a standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return The pointer to destination buffer. - * @see u_strFromUTF8 - * @see u_strFromUTF8Lenient - * @see u_strToUTF8WithSub - * @stable ICU 3.6 - */ -U_STABLE UChar* U_EXPORT2 -u_strFromUTF8WithSub(UChar *dest, - int32_t destCapacity, - int32_t *pDestLength, - const char *src, - int32_t srcLength, - UChar32 subchar, int32_t *pNumSubstitutions, - UErrorCode *pErrorCode); - -/** - * Convert a UTF-8 string to UTF-16. - * - * Same as u_strFromUTF8() except that this function is designed to be very fast, - * which it achieves by being lenient about malformed UTF-8 sequences. - * This function is intended for use in environments where UTF-8 text is - * expected to be well-formed. - * - * Its semantics are: - * - Well-formed UTF-8 text is correctly converted to well-formed UTF-16 text. - * - The function will not read beyond the input string, nor write beyond - * the destCapacity. - * - Malformed UTF-8 results in "garbage" 16-bit Unicode strings which may not - * be well-formed UTF-16. - * The function will resynchronize to valid code point boundaries - * within a small number of code points after an illegal sequence. - * - Non-shortest forms are not detected and will result in "spoofing" output. - * - * For further performance improvement, if srcLength is given (>=0), - * then it must be destCapacity>=srcLength. - * - * There is no inverse u_strToUTF8Lenient() function because there is practically - * no performance gain from not checking that a UTF-16 string is well-formed. - * - * @param dest A buffer for the result string. The result will be zero-terminated if - * the buffer is large enough. - * @param destCapacity The size of the buffer (number of UChars). If it is 0, then - * dest may be NULL and the function will only return the length of the - * result without writing any of the result string (pre-flighting). - * Unlike for other ICU functions, if srcLength>=0 then it - * must be destCapacity>=srcLength. - * @param pDestLength A pointer to receive the number of units written to the destination. If - * pDestLength!=NULL then *pDestLength is always set to the - * number of output units corresponding to the transformation of - * all the input units, even in case of a buffer overflow. - * Unlike for other ICU functions, if srcLength>=0 but - * destCapacity<srcLength, then *pDestLength will be set to srcLength - * (and U_BUFFER_OVERFLOW_ERROR will be set) - * regardless of the actual result length. - * @param src The original source string - * @param srcLength The length of the original string. If -1, then src must be zero-terminated. - * @param pErrorCode Pointer to a standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return The pointer to destination buffer. - * @see u_strFromUTF8 - * @see u_strFromUTF8WithSub - * @see u_strToUTF8WithSub - * @stable ICU 3.6 - */ -U_STABLE UChar * U_EXPORT2 -u_strFromUTF8Lenient(UChar *dest, - int32_t destCapacity, - int32_t *pDestLength, - const char *src, - int32_t srcLength, - UErrorCode *pErrorCode); - -/** - * Convert a UTF-16 string to UTF-32. - * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set. - * - * @param dest A buffer for the result string. The result will be zero-terminated if - * the buffer is large enough. - * @param destCapacity The size of the buffer (number of UChar32s). If it is 0, then - * dest may be NULL and the function will only return the length of the - * result without writing any of the result string (pre-flighting). - * @param pDestLength A pointer to receive the number of units written to the destination. If - * pDestLength!=NULL then *pDestLength is always set to the - * number of output units corresponding to the transformation of - * all the input units, even in case of a buffer overflow. - * @param src The original source string - * @param srcLength The length of the original string. If -1, then src must be zero-terminated. - * @param pErrorCode Must be a valid pointer to an error code value, - * which must not indicate a failure before the function call. - * @return The pointer to destination buffer. - * @see u_strToUTF32WithSub - * @see u_strFromUTF32 - * @stable ICU 2.0 - */ -U_STABLE UChar32* U_EXPORT2 -u_strToUTF32(UChar32 *dest, - int32_t destCapacity, - int32_t *pDestLength, - const UChar *src, - int32_t srcLength, - UErrorCode *pErrorCode); - -/** - * Convert a UTF-32 string to UTF-16. - * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set. - * - * @param dest A buffer for the result string. The result will be zero-terminated if - * the buffer is large enough. - * @param destCapacity The size of the buffer (number of UChars). If it is 0, then - * dest may be NULL and the function will only return the length of the - * result without writing any of the result string (pre-flighting). - * @param pDestLength A pointer to receive the number of units written to the destination. If - * pDestLength!=NULL then *pDestLength is always set to the - * number of output units corresponding to the transformation of - * all the input units, even in case of a buffer overflow. - * @param src The original source string - * @param srcLength The length of the original string. If -1, then src must be zero-terminated. - * @param pErrorCode Must be a valid pointer to an error code value, - * which must not indicate a failure before the function call. - * @return The pointer to destination buffer. - * @see u_strFromUTF32WithSub - * @see u_strToUTF32 - * @stable ICU 2.0 - */ -U_STABLE UChar* U_EXPORT2 -u_strFromUTF32(UChar *dest, - int32_t destCapacity, - int32_t *pDestLength, - const UChar32 *src, - int32_t srcLength, - UErrorCode *pErrorCode); - -/** - * Convert a UTF-16 string to UTF-32. - * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set. - * - * Same as u_strToUTF32() except for the additional subchar which is output for - * illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code. - * With subchar==U_SENTINEL, this function behaves exactly like u_strToUTF32(). - * - * @param dest A buffer for the result string. The result will be zero-terminated if - * the buffer is large enough. - * @param destCapacity The size of the buffer (number of UChar32s). If it is 0, then - * dest may be NULL and the function will only return the length of the - * result without writing any of the result string (pre-flighting). - * @param pDestLength A pointer to receive the number of units written to the destination. If - * pDestLength!=NULL then *pDestLength is always set to the - * number of output units corresponding to the transformation of - * all the input units, even in case of a buffer overflow. - * @param src The original source string - * @param srcLength The length of the original string. If -1, then src must be zero-terminated. - * @param subchar The substitution character to use in place of an illegal input sequence, - * or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead. - * A substitution character can be any valid Unicode code point (up to U+10FFFF) - * except for surrogate code points (U+D800..U+DFFF). - * The recommended value is U+FFFD "REPLACEMENT CHARACTER". - * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0. - * Set to 0 if no substitutions occur or subchar<0. - * pNumSubstitutions can be NULL. - * @param pErrorCode Pointer to a standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return The pointer to destination buffer. - * @see u_strToUTF32 - * @see u_strFromUTF32WithSub - * @stable ICU 4.2 - */ -U_STABLE UChar32* U_EXPORT2 -u_strToUTF32WithSub(UChar32 *dest, - int32_t destCapacity, - int32_t *pDestLength, - const UChar *src, - int32_t srcLength, - UChar32 subchar, int32_t *pNumSubstitutions, - UErrorCode *pErrorCode); - -/** - * Convert a UTF-32 string to UTF-16. - * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set. - * - * Same as u_strFromUTF32() except for the additional subchar which is output for - * illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code. - * With subchar==U_SENTINEL, this function behaves exactly like u_strFromUTF32(). - * - * @param dest A buffer for the result string. The result will be zero-terminated if - * the buffer is large enough. - * @param destCapacity The size of the buffer (number of UChars). If it is 0, then - * dest may be NULL and the function will only return the length of the - * result without writing any of the result string (pre-flighting). - * @param pDestLength A pointer to receive the number of units written to the destination. If - * pDestLength!=NULL then *pDestLength is always set to the - * number of output units corresponding to the transformation of - * all the input units, even in case of a buffer overflow. - * @param src The original source string - * @param srcLength The length of the original string. If -1, then src must be zero-terminated. - * @param subchar The substitution character to use in place of an illegal input sequence, - * or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead. - * A substitution character can be any valid Unicode code point (up to U+10FFFF) - * except for surrogate code points (U+D800..U+DFFF). - * The recommended value is U+FFFD "REPLACEMENT CHARACTER". - * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0. - * Set to 0 if no substitutions occur or subchar<0. - * pNumSubstitutions can be NULL. - * @param pErrorCode Pointer to a standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return The pointer to destination buffer. - * @see u_strFromUTF32 - * @see u_strToUTF32WithSub - * @stable ICU 4.2 - */ -U_STABLE UChar* U_EXPORT2 -u_strFromUTF32WithSub(UChar *dest, - int32_t destCapacity, - int32_t *pDestLength, - const UChar32 *src, - int32_t srcLength, - UChar32 subchar, int32_t *pNumSubstitutions, - UErrorCode *pErrorCode); - -/** - * Convert a 16-bit Unicode string to Java Modified UTF-8. - * See http://java.sun.com/javase/6/docs/api/java/io/DataInput.html#modified-utf-8 - * - * This function behaves according to the documentation for Java DataOutput.writeUTF() - * except that it does not encode the output length in the destination buffer - * and does not have an output length restriction. - * See http://java.sun.com/javase/6/docs/api/java/io/DataOutput.html#writeUTF(java.lang.String) - * - * The input string need not be well-formed UTF-16. - * (Therefore there is no subchar parameter.) - * - * @param dest A buffer for the result string. The result will be zero-terminated if - * the buffer is large enough. - * @param destCapacity The size of the buffer (number of chars). If it is 0, then - * dest may be NULL and the function will only return the length of the - * result without writing any of the result string (pre-flighting). - * @param pDestLength A pointer to receive the number of units written to the destination. If - * pDestLength!=NULL then *pDestLength is always set to the - * number of output units corresponding to the transformation of - * all the input units, even in case of a buffer overflow. - * @param src The original source string - * @param srcLength The length of the original string. If -1, then src must be zero-terminated. - * @param pErrorCode Pointer to a standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return The pointer to destination buffer. - * @stable ICU 4.4 - * @see u_strToUTF8WithSub - * @see u_strFromJavaModifiedUTF8WithSub - */ -U_STABLE char* U_EXPORT2 -u_strToJavaModifiedUTF8( - char *dest, - int32_t destCapacity, - int32_t *pDestLength, - const UChar *src, - int32_t srcLength, - UErrorCode *pErrorCode); - -/** - * Convert a Java Modified UTF-8 string to a 16-bit Unicode string. - * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set. - * - * This function behaves according to the documentation for Java DataInput.readUTF() - * except that it takes a length parameter rather than - * interpreting the first two input bytes as the length. - * See http://java.sun.com/javase/6/docs/api/java/io/DataInput.html#readUTF() - * - * The output string may not be well-formed UTF-16. - * - * @param dest A buffer for the result string. The result will be zero-terminated if - * the buffer is large enough. - * @param destCapacity The size of the buffer (number of UChars). If it is 0, then - * dest may be NULL and the function will only return the length of the - * result without writing any of the result string (pre-flighting). - * @param pDestLength A pointer to receive the number of units written to the destination. If - * pDestLength!=NULL then *pDestLength is always set to the - * number of output units corresponding to the transformation of - * all the input units, even in case of a buffer overflow. - * @param src The original source string - * @param srcLength The length of the original string. If -1, then src must be zero-terminated. - * @param subchar The substitution character to use in place of an illegal input sequence, - * or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead. - * A substitution character can be any valid Unicode code point (up to U+10FFFF) - * except for surrogate code points (U+D800..U+DFFF). - * The recommended value is U+FFFD "REPLACEMENT CHARACTER". - * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0. - * Set to 0 if no substitutions occur or subchar<0. - * pNumSubstitutions can be NULL. - * @param pErrorCode Pointer to a standard ICU error code. Its input value must - * pass the U_SUCCESS() test, or else the function returns - * immediately. Check for U_FAILURE() on output or use with - * function chaining. (See User Guide for details.) - * @return The pointer to destination buffer. - * @see u_strFromUTF8WithSub - * @see u_strFromUTF8Lenient - * @see u_strToJavaModifiedUTF8 - * @stable ICU 4.4 - */ -U_STABLE UChar* U_EXPORT2 -u_strFromJavaModifiedUTF8WithSub( - UChar *dest, - int32_t destCapacity, - int32_t *pDestLength, - const char *src, - int32_t srcLength, - UChar32 subchar, int32_t *pNumSubstitutions, - UErrorCode *pErrorCode); - -#endif diff --git a/Source/WTF/icu/unicode/utf.h b/Source/WTF/icu/unicode/utf.h deleted file mode 100644 index f5954fe9f..000000000 --- a/Source/WTF/icu/unicode/utf.h +++ /dev/null @@ -1,223 +0,0 @@ -/* -******************************************************************************* -* -* Copyright (C) 1999-2011, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: utf.h -* encoding: US-ASCII -* tab size: 8 (not used) -* indentation:4 -* -* created on: 1999sep09 -* created by: Markus W. Scherer -*/ - -/** - * \file - * \brief C API: Code point macros - * - * This file defines macros for checking whether a code point is - * a surrogate or a non-character etc. - * - * The UChar and UChar32 data types for Unicode code units and code points - * are defined in umachine.h because they can be machine-dependent. - * - * If U_NO_DEFAULT_INCLUDE_UTF_HEADERS is 0 then utf.h is included by utypes.h - * and itself includes utf8.h and utf16.h after some - * common definitions. - * If U_NO_DEFAULT_INCLUDE_UTF_HEADERS is 1 then each of these headers must be - * included explicitly if their definitions are used. - * - * utf8.h and utf16.h define macros for efficiently getting code points - * in and out of UTF-8/16 strings. - * utf16.h macros have "U16_" prefixes. - * utf8.h defines similar macros with "U8_" prefixes for UTF-8 string handling. - * - * ICU mostly processes 16-bit Unicode strings. - * Most of the time, such strings are well-formed UTF-16. - * Single, unpaired surrogates must be handled as well, and are treated in ICU - * like regular code points where possible. - * (Pairs of surrogate code points are indistinguishable from supplementary - * code points encoded as pairs of supplementary code units.) - * - * In fact, almost all Unicode code points in normal text (>99%) - * are on the BMP (<=U+ffff) and even <=U+d7ff. - * ICU functions handle supplementary code points (U+10000..U+10ffff) - * but are optimized for the much more frequently occurring BMP code points. - * - * umachine.h defines UChar to be an unsigned 16-bit integer. - * Where available, UChar is defined to be a char16_t - * or a wchar_t (if that is an unsigned 16-bit type), otherwise uint16_t. - * - * UChar32 is defined to be a signed 32-bit integer (int32_t), large enough for a 21-bit - * Unicode code point (Unicode scalar value, 0..0x10ffff). - * Before ICU 2.4, the definition of UChar32 was similarly platform-dependent as - * the definition of UChar. For details see the documentation for UChar32 itself. - * - * utf.h defines a small number of C macros for single Unicode code points. - * These are simple checks for surrogates and non-characters. - * For actual Unicode character properties see uchar.h. - * - * By default, string operations must be done with error checking in case - * a string is not well-formed UTF-16. - * The macros will detect if a surrogate code unit is unpaired - * (lead unit without trail unit or vice versa) and just return the unit itself - * as the code point. - * - * The regular "safe" macros require that the initial, passed-in string index - * is within bounds. They only check the index when they read more than one - * code unit. This is usually done with code similar to the following loop: - * <pre>while(i<length) { - * U16_NEXT(s, i, length, c); - * // use c - * }</pre> - * - * When it is safe to assume that text is well-formed UTF-16 - * (does not contain single, unpaired surrogates), then one can use - * U16_..._UNSAFE macros. - * These do not check for proper code unit sequences or truncated text and may - * yield wrong results or even cause a crash if they are used with "malformed" - * text. - * In practice, U16_..._UNSAFE macros will produce slightly less code but - * should not be faster because the processing is only different when a - * surrogate code unit is detected, which will be rare. - * - * Similarly for UTF-8, there are "safe" macros without a suffix, - * and U8_..._UNSAFE versions. - * The performance differences are much larger here because UTF-8 provides so - * many opportunities for malformed sequences. - * The unsafe UTF-8 macros are entirely implemented inside the macro definitions - * and are fast, while the safe UTF-8 macros call functions for all but the - * trivial (ASCII) cases. - * (ICU 3.6 optimizes U8_NEXT() and U8_APPEND() to handle most other common - * characters inline as well.) - * - * Unlike with UTF-16, malformed sequences cannot be expressed with distinct - * code point values (0..U+10ffff). They are indicated with negative values instead. - * - * For more information see the ICU User Guide Strings chapter - * (http://userguide.icu-project.org/strings). - * - * <em>Usage:</em> - * ICU coding guidelines for if() statements should be followed when using these macros. - * Compound statements (curly braces {}) must be used for if-else-while... - * bodies and all macro statements should be terminated with semicolon. - * - * @stable ICU 2.4 - */ - -#ifndef __UTF_H__ -#define __UTF_H__ - -#include "unicode/umachine.h" -/* include the utfXX.h after the following definitions */ - -/* single-code point definitions -------------------------------------------- */ - -/** - * Is this code point a Unicode noncharacter? - * @param c 32-bit code point - * @return TRUE or FALSE - * @stable ICU 2.4 - */ -#define U_IS_UNICODE_NONCHAR(c) \ - ((c)>=0xfdd0 && \ - ((uint32_t)(c)<=0xfdef || ((c)&0xfffe)==0xfffe) && \ - (uint32_t)(c)<=0x10ffff) - -/** - * Is c a Unicode code point value (0..U+10ffff) - * that can be assigned a character? - * - * Code points that are not characters include: - * - single surrogate code points (U+d800..U+dfff, 2048 code points) - * - the last two code points on each plane (U+__fffe and U+__ffff, 34 code points) - * - U+fdd0..U+fdef (new with Unicode 3.1, 32 code points) - * - the highest Unicode code point value is U+10ffff - * - * This means that all code points below U+d800 are character code points, - * and that boundary is tested first for performance. - * - * @param c 32-bit code point - * @return TRUE or FALSE - * @stable ICU 2.4 - */ -#define U_IS_UNICODE_CHAR(c) \ - ((uint32_t)(c)<0xd800 || \ - ((uint32_t)(c)>0xdfff && \ - (uint32_t)(c)<=0x10ffff && \ - !U_IS_UNICODE_NONCHAR(c))) - -/** - * Is this code point a BMP code point (U+0000..U+ffff)? - * @param c 32-bit code point - * @return TRUE or FALSE - * @stable ICU 2.8 - */ -#define U_IS_BMP(c) ((uint32_t)(c)<=0xffff) - -/** - * Is this code point a supplementary code point (U+10000..U+10ffff)? - * @param c 32-bit code point - * @return TRUE or FALSE - * @stable ICU 2.8 - */ -#define U_IS_SUPPLEMENTARY(c) ((uint32_t)((c)-0x10000)<=0xfffff) - -/** - * Is this code point a lead surrogate (U+d800..U+dbff)? - * @param c 32-bit code point - * @return TRUE or FALSE - * @stable ICU 2.4 - */ -#define U_IS_LEAD(c) (((c)&0xfffffc00)==0xd800) - -/** - * Is this code point a trail surrogate (U+dc00..U+dfff)? - * @param c 32-bit code point - * @return TRUE or FALSE - * @stable ICU 2.4 - */ -#define U_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00) - -/** - * Is this code point a surrogate (U+d800..U+dfff)? - * @param c 32-bit code point - * @return TRUE or FALSE - * @stable ICU 2.4 - */ -#define U_IS_SURROGATE(c) (((c)&0xfffff800)==0xd800) - -/** - * Assuming c is a surrogate code point (U_IS_SURROGATE(c)), - * is it a lead surrogate? - * @param c 32-bit code point - * @return TRUE or FALSE - * @stable ICU 2.4 - */ -#define U_IS_SURROGATE_LEAD(c) (((c)&0x400)==0) - -/** - * Assuming c is a surrogate code point (U_IS_SURROGATE(c)), - * is it a trail surrogate? - * @param c 32-bit code point - * @return TRUE or FALSE - * @stable ICU 4.2 - */ -#define U_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0) - -/* include the utfXX.h ------------------------------------------------------ */ - -#if !U_NO_DEFAULT_INCLUDE_UTF_HEADERS - -#include "unicode/utf8.h" -#include "unicode/utf16.h" - -/* utf_old.h contains deprecated, pre-ICU 2.4 definitions */ -#include "unicode/utf_old.h" - -#endif /* !U_NO_DEFAULT_INCLUDE_UTF_HEADERS */ - -#endif /* __UTF_H__ */ diff --git a/Source/WTF/icu/unicode/utf16.h b/Source/WTF/icu/unicode/utf16.h deleted file mode 100644 index bdd88a8b9..000000000 --- a/Source/WTF/icu/unicode/utf16.h +++ /dev/null @@ -1,623 +0,0 @@ -/* -******************************************************************************* -* -* Copyright (C) 1999-2012, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: utf16.h -* encoding: US-ASCII -* tab size: 8 (not used) -* indentation:4 -* -* created on: 1999sep09 -* created by: Markus W. Scherer -*/ - -/** - * \file - * \brief C API: 16-bit Unicode handling macros - * - * This file defines macros to deal with 16-bit Unicode (UTF-16) code units and strings. - * - * For more information see utf.h and the ICU User Guide Strings chapter - * (http://userguide.icu-project.org/strings). - * - * <em>Usage:</em> - * ICU coding guidelines for if() statements should be followed when using these macros. - * Compound statements (curly braces {}) must be used for if-else-while... - * bodies and all macro statements should be terminated with semicolon. - */ - -#ifndef __UTF16_H__ -#define __UTF16_H__ - -#include "unicode/umachine.h" -#ifndef __UTF_H__ -# include "unicode/utf.h" -#endif - -/* single-code point definitions -------------------------------------------- */ - -/** - * Does this code unit alone encode a code point (BMP, not a surrogate)? - * @param c 16-bit code unit - * @return TRUE or FALSE - * @stable ICU 2.4 - */ -#define U16_IS_SINGLE(c) !U_IS_SURROGATE(c) - -/** - * Is this code unit a lead surrogate (U+d800..U+dbff)? - * @param c 16-bit code unit - * @return TRUE or FALSE - * @stable ICU 2.4 - */ -#define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800) - -/** - * Is this code unit a trail surrogate (U+dc00..U+dfff)? - * @param c 16-bit code unit - * @return TRUE or FALSE - * @stable ICU 2.4 - */ -#define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00) - -/** - * Is this code unit a surrogate (U+d800..U+dfff)? - * @param c 16-bit code unit - * @return TRUE or FALSE - * @stable ICU 2.4 - */ -#define U16_IS_SURROGATE(c) U_IS_SURROGATE(c) - -/** - * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)), - * is it a lead surrogate? - * @param c 16-bit code unit - * @return TRUE or FALSE - * @stable ICU 2.4 - */ -#define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0) - -/** - * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)), - * is it a trail surrogate? - * @param c 16-bit code unit - * @return TRUE or FALSE - * @stable ICU 4.2 - */ -#define U16_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0) - -/** - * Helper constant for U16_GET_SUPPLEMENTARY. - * @internal - */ -#define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000) - -/** - * Get a supplementary code point value (U+10000..U+10ffff) - * from its lead and trail surrogates. - * The result is undefined if the input values are not - * lead and trail surrogates. - * - * @param lead lead surrogate (U+d800..U+dbff) - * @param trail trail surrogate (U+dc00..U+dfff) - * @return supplementary code point (U+10000..U+10ffff) - * @stable ICU 2.4 - */ -#define U16_GET_SUPPLEMENTARY(lead, trail) \ - (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET) - - -/** - * Get the lead surrogate (0xd800..0xdbff) for a - * supplementary code point (0x10000..0x10ffff). - * @param supplementary 32-bit code point (U+10000..U+10ffff) - * @return lead surrogate (U+d800..U+dbff) for supplementary - * @stable ICU 2.4 - */ -#define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0) - -/** - * Get the trail surrogate (0xdc00..0xdfff) for a - * supplementary code point (0x10000..0x10ffff). - * @param supplementary 32-bit code point (U+10000..U+10ffff) - * @return trail surrogate (U+dc00..U+dfff) for supplementary - * @stable ICU 2.4 - */ -#define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00) - -/** - * How many 16-bit code units are used to encode this Unicode code point? (1 or 2) - * The result is not defined if c is not a Unicode code point (U+0000..U+10ffff). - * @param c 32-bit code point - * @return 1 or 2 - * @stable ICU 2.4 - */ -#define U16_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2) - -/** - * The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff). - * @return 2 - * @stable ICU 2.4 - */ -#define U16_MAX_LENGTH 2 - -/** - * Get a code point from a string at a random-access offset, - * without changing the offset. - * "Unsafe" macro, assumes well-formed UTF-16. - * - * The offset may point to either the lead or trail surrogate unit - * for a supplementary code point, in which case the macro will read - * the adjacent matching surrogate as well. - * The result is undefined if the offset points to a single, unpaired surrogate. - * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT. - * - * @param s const UChar * string - * @param i string offset - * @param c output UChar32 variable - * @see U16_GET - * @stable ICU 2.4 - */ -#define U16_GET_UNSAFE(s, i, c) { \ - (c)=(s)[i]; \ - if(U16_IS_SURROGATE(c)) { \ - if(U16_IS_SURROGATE_LEAD(c)) { \ - (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)+1]); \ - } else { \ - (c)=U16_GET_SUPPLEMENTARY((s)[(i)-1], (c)); \ - } \ - } \ -} - -/** - * Get a code point from a string at a random-access offset, - * without changing the offset. - * "Safe" macro, handles unpaired surrogates and checks for string boundaries. - * - * The offset may point to either the lead or trail surrogate unit - * for a supplementary code point, in which case the macro will read - * the adjacent matching surrogate as well. - * - * The length can be negative for a NUL-terminated string. - * - * If the offset points to a single, unpaired surrogate, then that itself - * will be returned as the code point. - * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT. - * - * @param s const UChar * string - * @param start starting string offset (usually 0) - * @param i string offset, must be start<=i<length - * @param length string length - * @param c output UChar32 variable - * @see U16_GET_UNSAFE - * @stable ICU 2.4 - */ -#define U16_GET(s, start, i, length, c) { \ - (c)=(s)[i]; \ - if(U16_IS_SURROGATE(c)) { \ - uint16_t __c2; \ - if(U16_IS_SURROGATE_LEAD(c)) { \ - if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \ - (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ - } \ - } else { \ - if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ - (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ - } \ - } \ - } \ -} - -/* definitions with forward iteration --------------------------------------- */ - -/** - * Get a code point from a string at a code point boundary offset, - * and advance the offset to the next code point boundary. - * (Post-incrementing forward iteration.) - * "Unsafe" macro, assumes well-formed UTF-16. - * - * The offset may point to the lead surrogate unit - * for a supplementary code point, in which case the macro will read - * the following trail surrogate as well. - * If the offset points to a trail surrogate, then that itself - * will be returned as the code point. - * The result is undefined if the offset points to a single, unpaired lead surrogate. - * - * @param s const UChar * string - * @param i string offset - * @param c output UChar32 variable - * @see U16_NEXT - * @stable ICU 2.4 - */ -#define U16_NEXT_UNSAFE(s, i, c) { \ - (c)=(s)[(i)++]; \ - if(U16_IS_LEAD(c)) { \ - (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)++]); \ - } \ -} - -/** - * Get a code point from a string at a code point boundary offset, - * and advance the offset to the next code point boundary. - * (Post-incrementing forward iteration.) - * "Safe" macro, handles unpaired surrogates and checks for string boundaries. - * - * The length can be negative for a NUL-terminated string. - * - * The offset may point to the lead surrogate unit - * for a supplementary code point, in which case the macro will read - * the following trail surrogate as well. - * If the offset points to a trail surrogate or - * to a single, unpaired lead surrogate, then that itself - * will be returned as the code point. - * - * @param s const UChar * string - * @param i string offset, must be i<length - * @param length string length - * @param c output UChar32 variable - * @see U16_NEXT_UNSAFE - * @stable ICU 2.4 - */ -#define U16_NEXT(s, i, length, c) { \ - (c)=(s)[(i)++]; \ - if(U16_IS_LEAD(c)) { \ - uint16_t __c2; \ - if((i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \ - ++(i); \ - (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ - } \ - } \ -} - -/** - * Append a code point to a string, overwriting 1 or 2 code units. - * The offset points to the current end of the string contents - * and is advanced (post-increment). - * "Unsafe" macro, assumes a valid code point and sufficient space in the string. - * Otherwise, the result is undefined. - * - * @param s const UChar * string buffer - * @param i string offset - * @param c code point to append - * @see U16_APPEND - * @stable ICU 2.4 - */ -#define U16_APPEND_UNSAFE(s, i, c) { \ - if((uint32_t)(c)<=0xffff) { \ - (s)[(i)++]=(uint16_t)(c); \ - } else { \ - (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ - (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ - } \ -} - -/** - * Append a code point to a string, overwriting 1 or 2 code units. - * The offset points to the current end of the string contents - * and is advanced (post-increment). - * "Safe" macro, checks for a valid code point. - * If a surrogate pair is written, checks for sufficient space in the string. - * If the code point is not valid or a trail surrogate does not fit, - * then isError is set to TRUE. - * - * @param s const UChar * string buffer - * @param i string offset, must be i<capacity - * @param capacity size of the string buffer - * @param c code point to append - * @param isError output UBool set to TRUE if an error occurs, otherwise not modified - * @see U16_APPEND_UNSAFE - * @stable ICU 2.4 - */ -#define U16_APPEND(s, i, capacity, c, isError) { \ - if((uint32_t)(c)<=0xffff) { \ - (s)[(i)++]=(uint16_t)(c); \ - } else if((uint32_t)(c)<=0x10ffff && (i)+1<(capacity)) { \ - (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ - (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ - } else /* c>0x10ffff or not enough space */ { \ - (isError)=TRUE; \ - } \ -} - -/** - * Advance the string offset from one code point boundary to the next. - * (Post-incrementing iteration.) - * "Unsafe" macro, assumes well-formed UTF-16. - * - * @param s const UChar * string - * @param i string offset - * @see U16_FWD_1 - * @stable ICU 2.4 - */ -#define U16_FWD_1_UNSAFE(s, i) { \ - if(U16_IS_LEAD((s)[(i)++])) { \ - ++(i); \ - } \ -} - -/** - * Advance the string offset from one code point boundary to the next. - * (Post-incrementing iteration.) - * "Safe" macro, handles unpaired surrogates and checks for string boundaries. - * - * The length can be negative for a NUL-terminated string. - * - * @param s const UChar * string - * @param i string offset, must be i<length - * @param length string length - * @see U16_FWD_1_UNSAFE - * @stable ICU 2.4 - */ -#define U16_FWD_1(s, i, length) { \ - if(U16_IS_LEAD((s)[(i)++]) && (i)!=(length) && U16_IS_TRAIL((s)[i])) { \ - ++(i); \ - } \ -} - -/** - * Advance the string offset from one code point boundary to the n-th next one, - * i.e., move forward by n code points. - * (Post-incrementing iteration.) - * "Unsafe" macro, assumes well-formed UTF-16. - * - * @param s const UChar * string - * @param i string offset - * @param n number of code points to skip - * @see U16_FWD_N - * @stable ICU 2.4 - */ -#define U16_FWD_N_UNSAFE(s, i, n) { \ - int32_t __N=(n); \ - while(__N>0) { \ - U16_FWD_1_UNSAFE(s, i); \ - --__N; \ - } \ -} - -/** - * Advance the string offset from one code point boundary to the n-th next one, - * i.e., move forward by n code points. - * (Post-incrementing iteration.) - * "Safe" macro, handles unpaired surrogates and checks for string boundaries. - * - * The length can be negative for a NUL-terminated string. - * - * @param s const UChar * string - * @param i int32_t string offset, must be i<length - * @param length int32_t string length - * @param n number of code points to skip - * @see U16_FWD_N_UNSAFE - * @stable ICU 2.4 - */ -#define U16_FWD_N(s, i, length, n) { \ - int32_t __N=(n); \ - while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \ - U16_FWD_1(s, i, length); \ - --__N; \ - } \ -} - -/** - * Adjust a random-access offset to a code point boundary - * at the start of a code point. - * If the offset points to the trail surrogate of a surrogate pair, - * then the offset is decremented. - * Otherwise, it is not modified. - * "Unsafe" macro, assumes well-formed UTF-16. - * - * @param s const UChar * string - * @param i string offset - * @see U16_SET_CP_START - * @stable ICU 2.4 - */ -#define U16_SET_CP_START_UNSAFE(s, i) { \ - if(U16_IS_TRAIL((s)[i])) { \ - --(i); \ - } \ -} - -/** - * Adjust a random-access offset to a code point boundary - * at the start of a code point. - * If the offset points to the trail surrogate of a surrogate pair, - * then the offset is decremented. - * Otherwise, it is not modified. - * "Safe" macro, handles unpaired surrogates and checks for string boundaries. - * - * @param s const UChar * string - * @param start starting string offset (usually 0) - * @param i string offset, must be start<=i - * @see U16_SET_CP_START_UNSAFE - * @stable ICU 2.4 - */ -#define U16_SET_CP_START(s, start, i) { \ - if(U16_IS_TRAIL((s)[i]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \ - --(i); \ - } \ -} - -/* definitions with backward iteration -------------------------------------- */ - -/** - * Move the string offset from one code point boundary to the previous one - * and get the code point between them. - * (Pre-decrementing backward iteration.) - * "Unsafe" macro, assumes well-formed UTF-16. - * - * The input offset may be the same as the string length. - * If the offset is behind a trail surrogate unit - * for a supplementary code point, then the macro will read - * the preceding lead surrogate as well. - * If the offset is behind a lead surrogate, then that itself - * will be returned as the code point. - * The result is undefined if the offset is behind a single, unpaired trail surrogate. - * - * @param s const UChar * string - * @param i string offset - * @param c output UChar32 variable - * @see U16_PREV - * @stable ICU 2.4 - */ -#define U16_PREV_UNSAFE(s, i, c) { \ - (c)=(s)[--(i)]; \ - if(U16_IS_TRAIL(c)) { \ - (c)=U16_GET_SUPPLEMENTARY((s)[--(i)], (c)); \ - } \ -} - -/** - * Move the string offset from one code point boundary to the previous one - * and get the code point between them. - * (Pre-decrementing backward iteration.) - * "Safe" macro, handles unpaired surrogates and checks for string boundaries. - * - * The input offset may be the same as the string length. - * If the offset is behind a trail surrogate unit - * for a supplementary code point, then the macro will read - * the preceding lead surrogate as well. - * If the offset is behind a lead surrogate or behind a single, unpaired - * trail surrogate, then that itself - * will be returned as the code point. - * - * @param s const UChar * string - * @param start starting string offset (usually 0) - * @param i string offset, must be start<i - * @param c output UChar32 variable - * @see U16_PREV_UNSAFE - * @stable ICU 2.4 - */ -#define U16_PREV(s, start, i, c) { \ - (c)=(s)[--(i)]; \ - if(U16_IS_TRAIL(c)) { \ - uint16_t __c2; \ - if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ - --(i); \ - (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ - } \ - } \ -} - -/** - * Move the string offset from one code point boundary to the previous one. - * (Pre-decrementing backward iteration.) - * The input offset may be the same as the string length. - * "Unsafe" macro, assumes well-formed UTF-16. - * - * @param s const UChar * string - * @param i string offset - * @see U16_BACK_1 - * @stable ICU 2.4 - */ -#define U16_BACK_1_UNSAFE(s, i) { \ - if(U16_IS_TRAIL((s)[--(i)])) { \ - --(i); \ - } \ -} - -/** - * Move the string offset from one code point boundary to the previous one. - * (Pre-decrementing backward iteration.) - * The input offset may be the same as the string length. - * "Safe" macro, handles unpaired surrogates and checks for string boundaries. - * - * @param s const UChar * string - * @param start starting string offset (usually 0) - * @param i string offset, must be start<i - * @see U16_BACK_1_UNSAFE - * @stable ICU 2.4 - */ -#define U16_BACK_1(s, start, i) { \ - if(U16_IS_TRAIL((s)[--(i)]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \ - --(i); \ - } \ -} - -/** - * Move the string offset from one code point boundary to the n-th one before it, - * i.e., move backward by n code points. - * (Pre-decrementing backward iteration.) - * The input offset may be the same as the string length. - * "Unsafe" macro, assumes well-formed UTF-16. - * - * @param s const UChar * string - * @param i string offset - * @param n number of code points to skip - * @see U16_BACK_N - * @stable ICU 2.4 - */ -#define U16_BACK_N_UNSAFE(s, i, n) { \ - int32_t __N=(n); \ - while(__N>0) { \ - U16_BACK_1_UNSAFE(s, i); \ - --__N; \ - } \ -} - -/** - * Move the string offset from one code point boundary to the n-th one before it, - * i.e., move backward by n code points. - * (Pre-decrementing backward iteration.) - * The input offset may be the same as the string length. - * "Safe" macro, handles unpaired surrogates and checks for string boundaries. - * - * @param s const UChar * string - * @param start start of string - * @param i string offset, must be start<i - * @param n number of code points to skip - * @see U16_BACK_N_UNSAFE - * @stable ICU 2.4 - */ -#define U16_BACK_N(s, start, i, n) { \ - int32_t __N=(n); \ - while(__N>0 && (i)>(start)) { \ - U16_BACK_1(s, start, i); \ - --__N; \ - } \ -} - -/** - * Adjust a random-access offset to a code point boundary after a code point. - * If the offset is behind the lead surrogate of a surrogate pair, - * then the offset is incremented. - * Otherwise, it is not modified. - * The input offset may be the same as the string length. - * "Unsafe" macro, assumes well-formed UTF-16. - * - * @param s const UChar * string - * @param i string offset - * @see U16_SET_CP_LIMIT - * @stable ICU 2.4 - */ -#define U16_SET_CP_LIMIT_UNSAFE(s, i) { \ - if(U16_IS_LEAD((s)[(i)-1])) { \ - ++(i); \ - } \ -} - -/** - * Adjust a random-access offset to a code point boundary after a code point. - * If the offset is behind the lead surrogate of a surrogate pair, - * then the offset is incremented. - * Otherwise, it is not modified. - * The input offset may be the same as the string length. - * "Safe" macro, handles unpaired surrogates and checks for string boundaries. - * - * The length can be negative for a NUL-terminated string. - * - * @param s const UChar * string - * @param start int32_t starting string offset (usually 0) - * @param i int32_t string offset, start<=i<=length - * @param length int32_t string length - * @see U16_SET_CP_LIMIT_UNSAFE - * @stable ICU 2.4 - */ -#define U16_SET_CP_LIMIT(s, start, i, length) { \ - if((start)<(i) && ((i)<(length) || (length)<0) && U16_IS_LEAD((s)[(i)-1]) && U16_IS_TRAIL((s)[i])) { \ - ++(i); \ - } \ -} - -#endif diff --git a/Source/WTF/icu/unicode/utf8.h b/Source/WTF/icu/unicode/utf8.h deleted file mode 100644 index 21e5f3d04..000000000 --- a/Source/WTF/icu/unicode/utf8.h +++ /dev/null @@ -1,830 +0,0 @@ -/* -******************************************************************************* -* -* Copyright (C) 1999-2013, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: utf8.h -* encoding: US-ASCII -* tab size: 8 (not used) -* indentation:4 -* -* created on: 1999sep13 -* created by: Markus W. Scherer -*/ - -/** - * \file - * \brief C API: 8-bit Unicode handling macros - * - * This file defines macros to deal with 8-bit Unicode (UTF-8) code units (bytes) and strings. - * - * For more information see utf.h and the ICU User Guide Strings chapter - * (http://userguide.icu-project.org/strings). - * - * <em>Usage:</em> - * ICU coding guidelines for if() statements should be followed when using these macros. - * Compound statements (curly braces {}) must be used for if-else-while... - * bodies and all macro statements should be terminated with semicolon. - */ - -#ifndef __UTF8_H__ -#define __UTF8_H__ - -#include "unicode/umachine.h" -#ifndef __UTF_H__ -# include "unicode/utf.h" -#endif - -/* internal definitions ----------------------------------------------------- */ - -/** - * \var utf8_countTrailBytes - * Internal array with numbers of trail bytes for any given byte used in - * lead byte position. - * - * This is internal since it is not meant to be called directly by external clients; - * however it is called by public macros in this file and thus must remain stable, - * and should not be hidden when other internal functions are hidden (otherwise - * public macros would fail to compile). - * @internal - */ -#ifdef U_UTF8_IMPL -U_EXPORT const uint8_t -#elif defined(U_STATIC_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) -U_CFUNC const uint8_t -#else -U_CFUNC U_IMPORT const uint8_t /* U_IMPORT2? */ /*U_IMPORT*/ -#endif -utf8_countTrailBytes[256]; - -/** - * Counts the trail bytes for a UTF-8 lead byte. - * Returns 0 for 0..0xbf as well as for 0xfe and 0xff. - * - * This is internal since it is not meant to be called directly by external clients; - * however it is called by public macros in this file and thus must remain stable. - * - * Note: Beginning with ICU 50, the implementation uses a multi-condition expression - * which was shown in 2012 (on x86-64) to compile to fast, branch-free code. - * leadByte is evaluated multiple times. - * - * The pre-ICU 50 implementation used the exported array utf8_countTrailBytes: - * #define U8_COUNT_TRAIL_BYTES(leadByte) (utf8_countTrailBytes[leadByte]) - * leadByte was evaluated exactly once. - * - * @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff. - * @internal - */ -#define U8_COUNT_TRAIL_BYTES(leadByte) \ - ((leadByte)<0xf0 ? \ - ((leadByte)>=0xc0)+((leadByte)>=0xe0) : \ - (leadByte)<0xfe ? 3+((leadByte)>=0xf8)+((leadByte)>=0xfc) : 0) - -/** - * Counts the trail bytes for a UTF-8 lead byte of a valid UTF-8 sequence. - * The maximum supported lead byte is 0xf4 corresponding to U+10FFFF. - * leadByte might be evaluated multiple times. - * - * This is internal since it is not meant to be called directly by external clients; - * however it is called by public macros in this file and thus must remain stable. - * - * @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff. - * @internal - */ -#define U8_COUNT_TRAIL_BYTES_UNSAFE(leadByte) \ - (((leadByte)>=0xc0)+((leadByte)>=0xe0)+((leadByte)>=0xf0)) - -/** - * Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value. - * - * This is internal since it is not meant to be called directly by external clients; - * however it is called by public macros in this file and thus must remain stable. - * @internal - */ -#define U8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1) - -/** - * Function for handling "next code point" with error-checking. - * - * This is internal since it is not meant to be called directly by external clients; - * however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this - * file and thus must remain stable, and should not be hidden when other internal - * functions are hidden (otherwise public macros would fail to compile). - * @internal - */ -U_STABLE UChar32 U_EXPORT2 -utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict); - -/** - * Function for handling "append code point" with error-checking. - * - * This is internal since it is not meant to be called directly by external clients; - * however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this - * file and thus must remain stable, and should not be hidden when other internal - * functions are hidden (otherwise public macros would fail to compile). - * @internal - */ -U_STABLE int32_t U_EXPORT2 -utf8_appendCharSafeBody(uint8_t *s, int32_t i, int32_t length, UChar32 c, UBool *pIsError); - -/** - * Function for handling "previous code point" with error-checking. - * - * This is internal since it is not meant to be called directly by external clients; - * however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this - * file and thus must remain stable, and should not be hidden when other internal - * functions are hidden (otherwise public macros would fail to compile). - * @internal - */ -U_STABLE UChar32 U_EXPORT2 -utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, UBool strict); - -/** - * Function for handling "skip backward one code point" with error-checking. - * - * This is internal since it is not meant to be called directly by external clients; - * however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this - * file and thus must remain stable, and should not be hidden when other internal - * functions are hidden (otherwise public macros would fail to compile). - * @internal - */ -U_STABLE int32_t U_EXPORT2 -utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); - -/* single-code point definitions -------------------------------------------- */ - -/** - * Does this code unit (byte) encode a code point by itself (US-ASCII 0..0x7f)? - * @param c 8-bit code unit (byte) - * @return TRUE or FALSE - * @stable ICU 2.4 - */ -#define U8_IS_SINGLE(c) (((c)&0x80)==0) - -/** - * Is this code unit (byte) a UTF-8 lead byte? - * @param c 8-bit code unit (byte) - * @return TRUE or FALSE - * @stable ICU 2.4 - */ -#define U8_IS_LEAD(c) ((uint8_t)((c)-0xc0)<0x3e) - -/** - * Is this code unit (byte) a UTF-8 trail byte? - * @param c 8-bit code unit (byte) - * @return TRUE or FALSE - * @stable ICU 2.4 - */ -#define U8_IS_TRAIL(c) (((c)&0xc0)==0x80) - -/** - * How many code units (bytes) are used for the UTF-8 encoding - * of this Unicode code point? - * @param c 32-bit code point - * @return 1..4, or 0 if c is a surrogate or not a Unicode code point - * @stable ICU 2.4 - */ -#define U8_LENGTH(c) \ - ((uint32_t)(c)<=0x7f ? 1 : \ - ((uint32_t)(c)<=0x7ff ? 2 : \ - ((uint32_t)(c)<=0xd7ff ? 3 : \ - ((uint32_t)(c)<=0xdfff || (uint32_t)(c)>0x10ffff ? 0 : \ - ((uint32_t)(c)<=0xffff ? 3 : 4)\ - ) \ - ) \ - ) \ - ) - -/** - * The maximum number of UTF-8 code units (bytes) per Unicode code point (U+0000..U+10ffff). - * @return 4 - * @stable ICU 2.4 - */ -#define U8_MAX_LENGTH 4 - -/** - * Get a code point from a string at a random-access offset, - * without changing the offset. - * The offset may point to either the lead byte or one of the trail bytes - * for a code point, in which case the macro will read all of the bytes - * for the code point. - * The result is undefined if the offset points to an illegal UTF-8 - * byte sequence. - * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT. - * - * @param s const uint8_t * string - * @param i string offset - * @param c output UChar32 variable - * @see U8_GET - * @stable ICU 2.4 - */ -#define U8_GET_UNSAFE(s, i, c) { \ - int32_t _u8_get_unsafe_index=(int32_t)(i); \ - U8_SET_CP_START_UNSAFE(s, _u8_get_unsafe_index); \ - U8_NEXT_UNSAFE(s, _u8_get_unsafe_index, c); \ -} - -/** - * Get a code point from a string at a random-access offset, - * without changing the offset. - * The offset may point to either the lead byte or one of the trail bytes - * for a code point, in which case the macro will read all of the bytes - * for the code point. - * - * The length can be negative for a NUL-terminated string. - * - * If the offset points to an illegal UTF-8 byte sequence, then - * c is set to a negative value. - * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT. - * - * @param s const uint8_t * string - * @param start int32_t starting string offset - * @param i int32_t string offset, must be start<=i<length - * @param length int32_t string length - * @param c output UChar32 variable, set to <0 in case of an error - * @see U8_GET_UNSAFE - * @stable ICU 2.4 - */ -#define U8_GET(s, start, i, length, c) { \ - int32_t _u8_get_index=(i); \ - U8_SET_CP_START(s, start, _u8_get_index); \ - U8_NEXT(s, _u8_get_index, length, c); \ -} - -#ifndef U_HIDE_DRAFT_API -/** - * Get a code point from a string at a random-access offset, - * without changing the offset. - * The offset may point to either the lead byte or one of the trail bytes - * for a code point, in which case the macro will read all of the bytes - * for the code point. - * - * The length can be negative for a NUL-terminated string. - * - * If the offset points to an illegal UTF-8 byte sequence, then - * c is set to U+FFFD. - * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT_OR_FFFD. - * - * This macro does not distinguish between a real U+FFFD in the text - * and U+FFFD returned for an ill-formed sequence. - * Use U8_GET() if that distinction is important. - * - * @param s const uint8_t * string - * @param start int32_t starting string offset - * @param i int32_t string offset, must be start<=i<length - * @param length int32_t string length - * @param c output UChar32 variable, set to U+FFFD in case of an error - * @see U8_GET - * @draft ICU 51 - */ -#define U8_GET_OR_FFFD(s, start, i, length, c) { \ - int32_t _u8_get_index=(i); \ - U8_SET_CP_START(s, start, _u8_get_index); \ - U8_NEXT_OR_FFFD(s, _u8_get_index, length, c); \ -} -#endif /* U_HIDE_DRAFT_API */ - -/* definitions with forward iteration --------------------------------------- */ - -/** - * Get a code point from a string at a code point boundary offset, - * and advance the offset to the next code point boundary. - * (Post-incrementing forward iteration.) - * "Unsafe" macro, assumes well-formed UTF-8. - * - * The offset may point to the lead byte of a multi-byte sequence, - * in which case the macro will read the whole sequence. - * The result is undefined if the offset points to a trail byte - * or an illegal UTF-8 sequence. - * - * @param s const uint8_t * string - * @param i string offset - * @param c output UChar32 variable - * @see U8_NEXT - * @stable ICU 2.4 - */ -#define U8_NEXT_UNSAFE(s, i, c) { \ - (c)=(uint8_t)(s)[(i)++]; \ - if((c)>=0x80) { \ - if((c)<0xe0) { \ - (c)=(((c)&0x1f)<<6)|((s)[(i)++]&0x3f); \ - } else if((c)<0xf0) { \ - /* no need for (c&0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ \ - (c)=(UChar)(((c)<<12)|(((s)[i]&0x3f)<<6)|((s)[(i)+1]&0x3f)); \ - (i)+=2; \ - } else { \ - (c)=(((c)&7)<<18)|(((s)[i]&0x3f)<<12)|(((s)[(i)+1]&0x3f)<<6)|((s)[(i)+2]&0x3f); \ - (i)+=3; \ - } \ - } \ -} - -/** - * Get a code point from a string at a code point boundary offset, - * and advance the offset to the next code point boundary. - * (Post-incrementing forward iteration.) - * "Safe" macro, checks for illegal sequences and for string boundaries. - * - * The length can be negative for a NUL-terminated string. - * - * The offset may point to the lead byte of a multi-byte sequence, - * in which case the macro will read the whole sequence. - * If the offset points to a trail byte or an illegal UTF-8 sequence, then - * c is set to a negative value. - * - * @param s const uint8_t * string - * @param i int32_t string offset, must be i<length - * @param length int32_t string length - * @param c output UChar32 variable, set to <0 in case of an error - * @see U8_NEXT_UNSAFE - * @stable ICU 2.4 - */ -#define U8_NEXT(s, i, length, c) { \ - (c)=(uint8_t)(s)[(i)++]; \ - if((c)>=0x80) { \ - uint8_t __t1, __t2; \ - if( /* handle U+1000..U+CFFF inline */ \ - (0xe0<(c) && (c)<=0xec) && \ - (((i)+1)<(length) || (length)<0) && \ - (__t1=(uint8_t)((s)[i]-0x80))<=0x3f && \ - (__t2=(uint8_t)((s)[(i)+1]-0x80))<= 0x3f \ - ) { \ - /* no need for (c&0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ \ - (c)=(UChar)(((c)<<12)|(__t1<<6)|__t2); \ - (i)+=2; \ - } else if( /* handle U+0080..U+07FF inline */ \ - ((c)<0xe0 && (c)>=0xc2) && \ - ((i)!=(length)) && \ - (__t1=(uint8_t)((s)[i]-0x80))<=0x3f \ - ) { \ - (c)=(((c)&0x1f)<<6)|__t1; \ - ++(i); \ - } else { \ - /* function call for "complicated" and error cases */ \ - (c)=utf8_nextCharSafeBody((const uint8_t *)s, &(i), (length), c, -1); \ - } \ - } \ -} - -#ifndef U_HIDE_DRAFT_API -/** - * Get a code point from a string at a code point boundary offset, - * and advance the offset to the next code point boundary. - * (Post-incrementing forward iteration.) - * "Safe" macro, checks for illegal sequences and for string boundaries. - * - * The length can be negative for a NUL-terminated string. - * - * The offset may point to the lead byte of a multi-byte sequence, - * in which case the macro will read the whole sequence. - * If the offset points to a trail byte or an illegal UTF-8 sequence, then - * c is set to U+FFFD. - * - * This macro does not distinguish between a real U+FFFD in the text - * and U+FFFD returned for an ill-formed sequence. - * Use U8_NEXT() if that distinction is important. - * - * @param s const uint8_t * string - * @param i int32_t string offset, must be i<length - * @param length int32_t string length - * @param c output UChar32 variable, set to U+FFFD in case of an error - * @see U8_NEXT - * @draft ICU 51 - */ -#define U8_NEXT_OR_FFFD(s, i, length, c) { \ - (c)=(uint8_t)(s)[(i)++]; \ - if((c)>=0x80) { \ - uint8_t __t1, __t2; \ - if( /* handle U+1000..U+CFFF inline */ \ - (0xe0<(c) && (c)<=0xec) && \ - (((i)+1)<(length) || (length)<0) && \ - (__t1=(uint8_t)((s)[i]-0x80))<=0x3f && \ - (__t2=(uint8_t)((s)[(i)+1]-0x80))<= 0x3f \ - ) { \ - /* no need for (c&0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ \ - (c)=(UChar)(((c)<<12)|(__t1<<6)|__t2); \ - (i)+=2; \ - } else if( /* handle U+0080..U+07FF inline */ \ - ((c)<0xe0 && (c)>=0xc2) && \ - ((i)!=(length)) && \ - (__t1=(uint8_t)((s)[i]-0x80))<=0x3f \ - ) { \ - (c)=(((c)&0x1f)<<6)|__t1; \ - ++(i); \ - } else { \ - /* function call for "complicated" and error cases */ \ - (c)=utf8_nextCharSafeBody((const uint8_t *)s, &(i), (length), c, -3); \ - } \ - } \ -} -#endif /* U_HIDE_DRAFT_API */ - -/** - * Append a code point to a string, overwriting 1 to 4 bytes. - * The offset points to the current end of the string contents - * and is advanced (post-increment). - * "Unsafe" macro, assumes a valid code point and sufficient space in the string. - * Otherwise, the result is undefined. - * - * @param s const uint8_t * string buffer - * @param i string offset - * @param c code point to append - * @see U8_APPEND - * @stable ICU 2.4 - */ -#define U8_APPEND_UNSAFE(s, i, c) { \ - if((uint32_t)(c)<=0x7f) { \ - (s)[(i)++]=(uint8_t)(c); \ - } else { \ - if((uint32_t)(c)<=0x7ff) { \ - (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \ - } else { \ - if((uint32_t)(c)<=0xffff) { \ - (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \ - } else { \ - (s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); \ - (s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); \ - } \ - (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \ - } \ - (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \ - } \ -} - -/** - * Append a code point to a string, overwriting 1 to 4 bytes. - * The offset points to the current end of the string contents - * and is advanced (post-increment). - * "Safe" macro, checks for a valid code point. - * If a non-ASCII code point is written, checks for sufficient space in the string. - * If the code point is not valid or trail bytes do not fit, - * then isError is set to TRUE. - * - * @param s const uint8_t * string buffer - * @param i int32_t string offset, must be i<capacity - * @param capacity int32_t size of the string buffer - * @param c UChar32 code point to append - * @param isError output UBool set to TRUE if an error occurs, otherwise not modified - * @see U8_APPEND_UNSAFE - * @stable ICU 2.4 - */ -#define U8_APPEND(s, i, capacity, c, isError) { \ - if((uint32_t)(c)<=0x7f) { \ - (s)[(i)++]=(uint8_t)(c); \ - } else if((uint32_t)(c)<=0x7ff && (i)+1<(capacity)) { \ - (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \ - (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \ - } else if((uint32_t)(c)<=0xd7ff && (i)+2<(capacity)) { \ - (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \ - (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \ - (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \ - } else { \ - (i)=utf8_appendCharSafeBody(s, (i), (capacity), c, &(isError)); \ - } \ -} - -/** - * Advance the string offset from one code point boundary to the next. - * (Post-incrementing iteration.) - * "Unsafe" macro, assumes well-formed UTF-8. - * - * @param s const uint8_t * string - * @param i string offset - * @see U8_FWD_1 - * @stable ICU 2.4 - */ -#define U8_FWD_1_UNSAFE(s, i) { \ - (i)+=1+U8_COUNT_TRAIL_BYTES_UNSAFE((uint8_t)(s)[i]); \ -} - -/** - * Advance the string offset from one code point boundary to the next. - * (Post-incrementing iteration.) - * "Safe" macro, checks for illegal sequences and for string boundaries. - * - * The length can be negative for a NUL-terminated string. - * - * @param s const uint8_t * string - * @param i int32_t string offset, must be i<length - * @param length int32_t string length - * @see U8_FWD_1_UNSAFE - * @stable ICU 2.4 - */ -#define U8_FWD_1(s, i, length) { \ - uint8_t __b=(uint8_t)(s)[(i)++]; \ - if(U8_IS_LEAD(__b)) { \ - uint8_t __count=U8_COUNT_TRAIL_BYTES(__b); \ - if((i)+__count>(length) && (length)>=0) { \ - __count=(uint8_t)((length)-(i)); \ - } \ - while(__count>0 && U8_IS_TRAIL((s)[i])) { \ - ++(i); \ - --__count; \ - } \ - } \ -} - -/** - * Advance the string offset from one code point boundary to the n-th next one, - * i.e., move forward by n code points. - * (Post-incrementing iteration.) - * "Unsafe" macro, assumes well-formed UTF-8. - * - * @param s const uint8_t * string - * @param i string offset - * @param n number of code points to skip - * @see U8_FWD_N - * @stable ICU 2.4 - */ -#define U8_FWD_N_UNSAFE(s, i, n) { \ - int32_t __N=(n); \ - while(__N>0) { \ - U8_FWD_1_UNSAFE(s, i); \ - --__N; \ - } \ -} - -/** - * Advance the string offset from one code point boundary to the n-th next one, - * i.e., move forward by n code points. - * (Post-incrementing iteration.) - * "Safe" macro, checks for illegal sequences and for string boundaries. - * - * The length can be negative for a NUL-terminated string. - * - * @param s const uint8_t * string - * @param i int32_t string offset, must be i<length - * @param length int32_t string length - * @param n number of code points to skip - * @see U8_FWD_N_UNSAFE - * @stable ICU 2.4 - */ -#define U8_FWD_N(s, i, length, n) { \ - int32_t __N=(n); \ - while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \ - U8_FWD_1(s, i, length); \ - --__N; \ - } \ -} - -/** - * Adjust a random-access offset to a code point boundary - * at the start of a code point. - * If the offset points to a UTF-8 trail byte, - * then the offset is moved backward to the corresponding lead byte. - * Otherwise, it is not modified. - * "Unsafe" macro, assumes well-formed UTF-8. - * - * @param s const uint8_t * string - * @param i string offset - * @see U8_SET_CP_START - * @stable ICU 2.4 - */ -#define U8_SET_CP_START_UNSAFE(s, i) { \ - while(U8_IS_TRAIL((s)[i])) { --(i); } \ -} - -/** - * Adjust a random-access offset to a code point boundary - * at the start of a code point. - * If the offset points to a UTF-8 trail byte, - * then the offset is moved backward to the corresponding lead byte. - * Otherwise, it is not modified. - * "Safe" macro, checks for illegal sequences and for string boundaries. - * - * @param s const uint8_t * string - * @param start int32_t starting string offset (usually 0) - * @param i int32_t string offset, must be start<=i - * @see U8_SET_CP_START_UNSAFE - * @stable ICU 2.4 - */ -#define U8_SET_CP_START(s, start, i) { \ - if(U8_IS_TRAIL((s)[(i)])) { \ - (i)=utf8_back1SafeBody(s, start, (i)); \ - } \ -} - -/* definitions with backward iteration -------------------------------------- */ - -/** - * Move the string offset from one code point boundary to the previous one - * and get the code point between them. - * (Pre-decrementing backward iteration.) - * "Unsafe" macro, assumes well-formed UTF-8. - * - * The input offset may be the same as the string length. - * If the offset is behind a multi-byte sequence, then the macro will read - * the whole sequence. - * If the offset is behind a lead byte, then that itself - * will be returned as the code point. - * The result is undefined if the offset is behind an illegal UTF-8 sequence. - * - * @param s const uint8_t * string - * @param i string offset - * @param c output UChar32 variable - * @see U8_PREV - * @stable ICU 2.4 - */ -#define U8_PREV_UNSAFE(s, i, c) { \ - (c)=(uint8_t)(s)[--(i)]; \ - if(U8_IS_TRAIL(c)) { \ - uint8_t __b, __count=1, __shift=6; \ -\ - /* c is a trail byte */ \ - (c)&=0x3f; \ - for(;;) { \ - __b=(uint8_t)(s)[--(i)]; \ - if(__b>=0xc0) { \ - U8_MASK_LEAD_BYTE(__b, __count); \ - (c)|=(UChar32)__b<<__shift; \ - break; \ - } else { \ - (c)|=(UChar32)(__b&0x3f)<<__shift; \ - ++__count; \ - __shift+=6; \ - } \ - } \ - } \ -} - -/** - * Move the string offset from one code point boundary to the previous one - * and get the code point between them. - * (Pre-decrementing backward iteration.) - * "Safe" macro, checks for illegal sequences and for string boundaries. - * - * The input offset may be the same as the string length. - * If the offset is behind a multi-byte sequence, then the macro will read - * the whole sequence. - * If the offset is behind a lead byte, then that itself - * will be returned as the code point. - * If the offset is behind an illegal UTF-8 sequence, then c is set to a negative value. - * - * @param s const uint8_t * string - * @param start int32_t starting string offset (usually 0) - * @param i int32_t string offset, must be start<i - * @param c output UChar32 variable, set to <0 in case of an error - * @see U8_PREV_UNSAFE - * @stable ICU 2.4 - */ -#define U8_PREV(s, start, i, c) { \ - (c)=(uint8_t)(s)[--(i)]; \ - if((c)>=0x80) { \ - (c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -1); \ - } \ -} - -#ifndef U_HIDE_DRAFT_API -/** - * Move the string offset from one code point boundary to the previous one - * and get the code point between them. - * (Pre-decrementing backward iteration.) - * "Safe" macro, checks for illegal sequences and for string boundaries. - * - * The input offset may be the same as the string length. - * If the offset is behind a multi-byte sequence, then the macro will read - * the whole sequence. - * If the offset is behind a lead byte, then that itself - * will be returned as the code point. - * If the offset is behind an illegal UTF-8 sequence, then c is set to U+FFFD. - * - * This macro does not distinguish between a real U+FFFD in the text - * and U+FFFD returned for an ill-formed sequence. - * Use U8_PREV() if that distinction is important. - * - * @param s const uint8_t * string - * @param start int32_t starting string offset (usually 0) - * @param i int32_t string offset, must be start<i - * @param c output UChar32 variable, set to U+FFFD in case of an error - * @see U8_PREV - * @draft ICU 51 - */ -#define U8_PREV_OR_FFFD(s, start, i, c) { \ - (c)=(uint8_t)(s)[--(i)]; \ - if((c)>=0x80) { \ - (c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -3); \ - } \ -} -#endif /* U_HIDE_DRAFT_API */ - -/** - * Move the string offset from one code point boundary to the previous one. - * (Pre-decrementing backward iteration.) - * The input offset may be the same as the string length. - * "Unsafe" macro, assumes well-formed UTF-8. - * - * @param s const uint8_t * string - * @param i string offset - * @see U8_BACK_1 - * @stable ICU 2.4 - */ -#define U8_BACK_1_UNSAFE(s, i) { \ - while(U8_IS_TRAIL((s)[--(i)])) {} \ -} - -/** - * Move the string offset from one code point boundary to the previous one. - * (Pre-decrementing backward iteration.) - * The input offset may be the same as the string length. - * "Safe" macro, checks for illegal sequences and for string boundaries. - * - * @param s const uint8_t * string - * @param start int32_t starting string offset (usually 0) - * @param i int32_t string offset, must be start<i - * @see U8_BACK_1_UNSAFE - * @stable ICU 2.4 - */ -#define U8_BACK_1(s, start, i) { \ - if(U8_IS_TRAIL((s)[--(i)])) { \ - (i)=utf8_back1SafeBody(s, start, (i)); \ - } \ -} - -/** - * Move the string offset from one code point boundary to the n-th one before it, - * i.e., move backward by n code points. - * (Pre-decrementing backward iteration.) - * The input offset may be the same as the string length. - * "Unsafe" macro, assumes well-formed UTF-8. - * - * @param s const uint8_t * string - * @param i string offset - * @param n number of code points to skip - * @see U8_BACK_N - * @stable ICU 2.4 - */ -#define U8_BACK_N_UNSAFE(s, i, n) { \ - int32_t __N=(n); \ - while(__N>0) { \ - U8_BACK_1_UNSAFE(s, i); \ - --__N; \ - } \ -} - -/** - * Move the string offset from one code point boundary to the n-th one before it, - * i.e., move backward by n code points. - * (Pre-decrementing backward iteration.) - * The input offset may be the same as the string length. - * "Safe" macro, checks for illegal sequences and for string boundaries. - * - * @param s const uint8_t * string - * @param start int32_t index of the start of the string - * @param i int32_t string offset, must be start<i - * @param n number of code points to skip - * @see U8_BACK_N_UNSAFE - * @stable ICU 2.4 - */ -#define U8_BACK_N(s, start, i, n) { \ - int32_t __N=(n); \ - while(__N>0 && (i)>(start)) { \ - U8_BACK_1(s, start, i); \ - --__N; \ - } \ -} - -/** - * Adjust a random-access offset to a code point boundary after a code point. - * If the offset is behind a partial multi-byte sequence, - * then the offset is incremented to behind the whole sequence. - * Otherwise, it is not modified. - * The input offset may be the same as the string length. - * "Unsafe" macro, assumes well-formed UTF-8. - * - * @param s const uint8_t * string - * @param i string offset - * @see U8_SET_CP_LIMIT - * @stable ICU 2.4 - */ -#define U8_SET_CP_LIMIT_UNSAFE(s, i) { \ - U8_BACK_1_UNSAFE(s, i); \ - U8_FWD_1_UNSAFE(s, i); \ -} - -/** - * Adjust a random-access offset to a code point boundary after a code point. - * If the offset is behind a partial multi-byte sequence, - * then the offset is incremented to behind the whole sequence. - * Otherwise, it is not modified. - * The input offset may be the same as the string length. - * "Safe" macro, checks for illegal sequences and for string boundaries. - * - * The length can be negative for a NUL-terminated string. - * - * @param s const uint8_t * string - * @param start int32_t starting string offset (usually 0) - * @param i int32_t string offset, must be start<=i<=length - * @param length int32_t string length - * @see U8_SET_CP_LIMIT_UNSAFE - * @stable ICU 2.4 - */ -#define U8_SET_CP_LIMIT(s, start, i, length) { \ - if((start)<(i) && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \ - U8_BACK_1(s, start, i); \ - U8_FWD_1(s, i, length); \ - } \ -} - -#endif diff --git a/Source/WTF/icu/unicode/utf_old.h b/Source/WTF/icu/unicode/utf_old.h deleted file mode 100644 index f9125b1dd..000000000 --- a/Source/WTF/icu/unicode/utf_old.h +++ /dev/null @@ -1,1169 +0,0 @@ -/* -******************************************************************************* -* -* Copyright (C) 2002-2012, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: utf_old.h -* encoding: US-ASCII -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2002sep21 -* created by: Markus W. Scherer -*/ - -/** - * \file - * \brief C API: Deprecated macros for Unicode string handling - */ - -/** - * - * The macros in utf_old.h are all deprecated and their use discouraged. - * Some of the design principles behind the set of UTF macros - * have changed or proved impractical. - * Almost all of the old "UTF macros" are at least renamed. - * If you are looking for a new equivalent to an old macro, please see the - * comment at the old one. - * - * Brief summary of reasons for deprecation: - * - Switch on UTF_SIZE (selection of UTF-8/16/32 default string processing) - * was impractical. - * - Switch on UTF_SAFE etc. (selection of unsafe/safe/strict default string processing) - * was of little use and impractical. - * - Whole classes of macros became obsolete outside of the UTF_SIZE/UTF_SAFE - * selection framework: UTF32_ macros (all trivial) - * and UTF_ default and intermediate macros (all aliases). - * - The selection framework also caused many macro aliases. - * - Change in Unicode standard: "irregular" sequences (3.0) became illegal (3.2). - * - Change of language in Unicode standard: - * Growing distinction between internal x-bit Unicode strings and external UTF-x - * forms, with the former more lenient. - * Suggests renaming of UTF16_ macros to U16_. - * - The prefix "UTF_" without a width number confused some users. - * - "Safe" append macros needed the addition of an error indicator output. - * - "Safe" UTF-8 macros used legitimate (if rarely used) code point values - * to indicate error conditions. - * - The use of the "_CHAR" infix for code point operations confused some users. - * - * More details: - * - * Until ICU 2.2, utf.h theoretically allowed to choose among UTF-8/16/32 - * for string processing, and among unsafe/safe/strict default macros for that. - * - * It proved nearly impossible to write non-trivial, high-performance code - * that is UTF-generic. - * Unsafe default macros would be dangerous for default string processing, - * and the main reason for the "strict" versions disappeared: - * Between Unicode 3.0 and 3.2 all "irregular" UTF-8 sequences became illegal. - * The only other conditions that "strict" checked for were non-characters, - * which are valid during processing. Only during text input/output should they - * be checked, and at that time other well-formedness checks may be - * necessary or useful as well. - * This can still be done by using U16_NEXT and U_IS_UNICODE_NONCHAR - * or U_IS_UNICODE_CHAR. - * - * The old UTF8_..._SAFE macros also used some normal Unicode code points - * to indicate malformed sequences. - * The new UTF8_ macros without suffix use negative values instead. - * - * The entire contents of utf32.h was moved here without replacement - * because all those macros were trivial and - * were meaningful only in the framework of choosing the UTF size. - * - * See Jitterbug 2150 and its discussion on the ICU mailing list - * in September 2002. - * - * <hr> - * - * <em>Obsolete part</em> of pre-ICU 2.4 utf.h file documentation: - * - * <p>The original concept for these files was for ICU to allow - * in principle to set which UTF (UTF-8/16/32) is used internally - * by defining UTF_SIZE to either 8, 16, or 32. utf.h would then define the UChar type - * accordingly. UTF-16 was the default.</p> - * - * <p>This concept has been abandoned. - * A lot of the ICU source code assumes UChar strings are in UTF-16. - * This is especially true for low-level code like - * conversion, normalization, and collation. - * The utf.h header enforces the default of UTF-16. - * The UTF-8 and UTF-32 macros remain for now for completeness and backward compatibility.</p> - * - * <p>Accordingly, utf.h defines UChar to be an unsigned 16-bit integer. If this matches wchar_t, then - * UChar is defined to be exactly wchar_t, otherwise uint16_t.</p> - * - * <p>UChar32 is defined to be a signed 32-bit integer (int32_t), large enough for a 21-bit - * Unicode code point (Unicode scalar value, 0..0x10ffff). - * Before ICU 2.4, the definition of UChar32 was similarly platform-dependent as - * the definition of UChar. For details see the documentation for UChar32 itself.</p> - * - * <p>utf.h also defines a number of C macros for handling single Unicode code points and - * for using UTF Unicode strings. It includes utf8.h, utf16.h, and utf32.h for the actual - * implementations of those macros and then aliases one set of them (for UTF-16) for general use. - * The UTF-specific macros have the UTF size in the macro name prefixes (UTF16_...), while - * the general alias macros always begin with UTF_...</p> - * - * <p>Many string operations can be done with or without error checking. - * Where such a distinction is useful, there are two versions of the macros, "unsafe" and "safe" - * ones with ..._UNSAFE and ..._SAFE suffixes. The unsafe macros are fast but may cause - * program failures if the strings are not well-formed. The safe macros have an additional, boolean - * parameter "strict". If strict is FALSE, then only illegal sequences are detected. - * Otherwise, irregular sequences and non-characters are detected as well (like single surrogates). - * Safe macros return special error code points for illegal/irregular sequences: - * Typically, U+ffff, or values that would result in a code unit sequence of the same length - * as the erroneous input sequence.<br> - * Note that _UNSAFE macros have fewer parameters: They do not have the strictness parameter, and - * they do not have start/length parameters for boundary checking.</p> - * - * <p>Here, the macros are aliased in two steps: - * In the first step, the UTF-specific macros with UTF16_ prefix and _UNSAFE and _SAFE suffixes are - * aliased according to the UTF_SIZE to macros with UTF_ prefix and the same suffixes and signatures. - * Then, in a second step, the default, general alias macros are set to use either the unsafe or - * the safe/not strict (default) or the safe/strict macro; - * these general macros do not have a strictness parameter.</p> - * - * <p>It is possible to change the default choice for the general alias macros to be unsafe, safe/not strict or safe/strict. - * The default is safe/not strict. It is not recommended to select the unsafe macros as the basis for - * Unicode string handling in ICU! To select this, define UTF_SAFE, UTF_STRICT, or UTF_UNSAFE.</p> - * - * <p>For general use, one should use the default, general macros with UTF_ prefix and no _SAFE/_UNSAFE suffix. - * Only in some cases it may be necessary to control the choice of macro directly and use a less generic alias. - * For example, if it can be assumed that a string is well-formed and the index will stay within the bounds, - * then the _UNSAFE version may be used. - * If a UTF-8 string is to be processed, then the macros with UTF8_ prefixes need to be used.</p> - * - * <hr> - * - * @deprecated ICU 2.4. Use the macros in utf.h, utf16.h, utf8.h instead. - */ - -#ifndef __UTF_OLD_H__ -#define __UTF_OLD_H__ - -#ifndef U_HIDE_DEPRECATED_API - -#include "unicode/utf.h" -#include "unicode/utf8.h" -#include "unicode/utf16.h" - -/* Formerly utf.h, part 1 --------------------------------------------------- */ - -#ifdef U_USE_UTF_DEPRECATES -/** - * Unicode string and array offset and index type. - * ICU always counts Unicode code units (UChars) for - * string offsets, indexes, and lengths, not Unicode code points. - * - * @obsolete ICU 2.6. Use int32_t directly instead since this API will be removed in that release. - */ -typedef int32_t UTextOffset; -#endif - -/** Number of bits in a Unicode string code unit - ICU uses 16-bit Unicode. @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF_SIZE 16 - -/** - * The default choice for general Unicode string macros is to use the ..._SAFE macro implementations - * with strict=FALSE. - * - * @deprecated ICU 2.4. Obsolete, see utf_old.h. - */ -#define UTF_SAFE -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#undef UTF_UNSAFE -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#undef UTF_STRICT - -/** - * UTF8_ERROR_VALUE_1 and UTF8_ERROR_VALUE_2 are special error values for UTF-8, - * which need 1 or 2 bytes in UTF-8: - * \code - * U+0015 = NAK = Negative Acknowledge, C0 control character - * U+009f = highest C1 control character - * \endcode - * - * These are used by UTF8_..._SAFE macros so that they can return an error value - * that needs the same number of code units (bytes) as were seen by - * a macro. They should be tested with UTF_IS_ERROR() or UTF_IS_VALID(). - * - * @deprecated ICU 2.4. Obsolete, see utf_old.h. - */ -#define UTF8_ERROR_VALUE_1 0x15 - -/** - * See documentation on UTF8_ERROR_VALUE_1 for details. - * - * @deprecated ICU 2.4. Obsolete, see utf_old.h. - */ -#define UTF8_ERROR_VALUE_2 0x9f - -/** - * Error value for all UTFs. This code point value will be set by macros with error - * checking if an error is detected. - * - * @deprecated ICU 2.4. Obsolete, see utf_old.h. - */ -#define UTF_ERROR_VALUE 0xffff - -/** - * Is a given 32-bit code an error value - * as returned by one of the macros for any UTF? - * - * @deprecated ICU 2.4. Obsolete, see utf_old.h. - */ -#define UTF_IS_ERROR(c) \ - (((c)&0xfffe)==0xfffe || (c)==UTF8_ERROR_VALUE_1 || (c)==UTF8_ERROR_VALUE_2) - -/** - * This is a combined macro: Is c a valid Unicode value _and_ not an error code? - * - * @deprecated ICU 2.4. Obsolete, see utf_old.h. - */ -#define UTF_IS_VALID(c) \ - (UTF_IS_UNICODE_CHAR(c) && \ - (c)!=UTF8_ERROR_VALUE_1 && (c)!=UTF8_ERROR_VALUE_2) - -/** - * Is this code unit or code point a surrogate (U+d800..U+dfff)? - * @deprecated ICU 2.4. Renamed to U_IS_SURROGATE and U16_IS_SURROGATE, see utf_old.h. - */ -#define UTF_IS_SURROGATE(uchar) (((uchar)&0xfffff800)==0xd800) - -/** - * Is a given 32-bit code point a Unicode noncharacter? - * - * @deprecated ICU 2.4. Renamed to U_IS_UNICODE_NONCHAR, see utf_old.h. - */ -#define UTF_IS_UNICODE_NONCHAR(c) \ - ((c)>=0xfdd0 && \ - ((uint32_t)(c)<=0xfdef || ((c)&0xfffe)==0xfffe) && \ - (uint32_t)(c)<=0x10ffff) - -/** - * Is a given 32-bit value a Unicode code point value (0..U+10ffff) - * that can be assigned a character? - * - * Code points that are not characters include: - * - single surrogate code points (U+d800..U+dfff, 2048 code points) - * - the last two code points on each plane (U+__fffe and U+__ffff, 34 code points) - * - U+fdd0..U+fdef (new with Unicode 3.1, 32 code points) - * - the highest Unicode code point value is U+10ffff - * - * This means that all code points below U+d800 are character code points, - * and that boundary is tested first for performance. - * - * @deprecated ICU 2.4. Renamed to U_IS_UNICODE_CHAR, see utf_old.h. - */ -#define UTF_IS_UNICODE_CHAR(c) \ - ((uint32_t)(c)<0xd800 || \ - ((uint32_t)(c)>0xdfff && \ - (uint32_t)(c)<=0x10ffff && \ - !UTF_IS_UNICODE_NONCHAR(c))) - -/* Formerly utf8.h ---------------------------------------------------------- */ - -/** - * Count the trail bytes for a UTF-8 lead byte. - * @deprecated ICU 2.4. Renamed to U8_COUNT_TRAIL_BYTES, see utf_old.h. - */ -#define UTF8_COUNT_TRAIL_BYTES(leadByte) (utf8_countTrailBytes[(uint8_t)leadByte]) - -/** - * Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value. - * @deprecated ICU 2.4. Renamed to U8_MASK_LEAD_BYTE, see utf_old.h. - */ -#define UTF8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1) - -/** Is this this code point a single code unit (byte)? @deprecated ICU 2.4. Renamed to U8_IS_SINGLE, see utf_old.h. */ -#define UTF8_IS_SINGLE(uchar) (((uchar)&0x80)==0) -/** Is this this code unit the lead code unit (byte) of a code point? @deprecated ICU 2.4. Renamed to U8_IS_LEAD, see utf_old.h. */ -#define UTF8_IS_LEAD(uchar) ((uint8_t)((uchar)-0xc0)<0x3e) -/** Is this this code unit a trailing code unit (byte) of a code point? @deprecated ICU 2.4. Renamed to U8_IS_TRAIL, see utf_old.h. */ -#define UTF8_IS_TRAIL(uchar) (((uchar)&0xc0)==0x80) - -/** Does this scalar Unicode value need multiple code units for storage? @deprecated ICU 2.4. Use U8_LENGTH or test ((uint32_t)(c)>0x7f) instead, see utf_old.h. */ -#define UTF8_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0x7f) - -/** - * Given the lead character, how many bytes are taken by this code point. - * ICU does not deal with code points >0x10ffff - * unless necessary for advancing in the byte stream. - * - * These length macros take into account that for values >0x10ffff - * the UTF8_APPEND_CHAR_SAFE macros would write the error code point 0xffff - * with 3 bytes. - * Code point comparisons need to be in uint32_t because UChar32 - * may be a signed type, and negative values must be recognized. - * - * @deprecated ICU 2.4. Use U8_LENGTH instead, see utf.h. - */ -#if 1 -# define UTF8_CHAR_LENGTH(c) \ - ((uint32_t)(c)<=0x7f ? 1 : \ - ((uint32_t)(c)<=0x7ff ? 2 : \ - ((uint32_t)((c)-0x10000)>0xfffff ? 3 : 4) \ - ) \ - ) -#else -# define UTF8_CHAR_LENGTH(c) \ - ((uint32_t)(c)<=0x7f ? 1 : \ - ((uint32_t)(c)<=0x7ff ? 2 : \ - ((uint32_t)(c)<=0xffff ? 3 : \ - ((uint32_t)(c)<=0x10ffff ? 4 : \ - ((uint32_t)(c)<=0x3ffffff ? 5 : \ - ((uint32_t)(c)<=0x7fffffff ? 6 : 3) \ - ) \ - ) \ - ) \ - ) \ - ) -#endif - -/** The maximum number of bytes per code point. @deprecated ICU 2.4. Renamed to U8_MAX_LENGTH, see utf_old.h. */ -#define UTF8_MAX_CHAR_LENGTH 4 - -/** Average number of code units compared to UTF-16. @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF8_ARRAY_SIZE(size) ((5*(size))/2) - -/** @deprecated ICU 2.4. Renamed to U8_GET_UNSAFE, see utf_old.h. */ -#define UTF8_GET_CHAR_UNSAFE(s, i, c) { \ - int32_t _utf8_get_char_unsafe_index=(int32_t)(i); \ - UTF8_SET_CHAR_START_UNSAFE(s, _utf8_get_char_unsafe_index); \ - UTF8_NEXT_CHAR_UNSAFE(s, _utf8_get_char_unsafe_index, c); \ -} - -/** @deprecated ICU 2.4. Use U8_GET instead, see utf_old.h. */ -#define UTF8_GET_CHAR_SAFE(s, start, i, length, c, strict) { \ - int32_t _utf8_get_char_safe_index=(int32_t)(i); \ - UTF8_SET_CHAR_START_SAFE(s, start, _utf8_get_char_safe_index); \ - UTF8_NEXT_CHAR_SAFE(s, _utf8_get_char_safe_index, length, c, strict); \ -} - -/** @deprecated ICU 2.4. Renamed to U8_NEXT_UNSAFE, see utf_old.h. */ -#define UTF8_NEXT_CHAR_UNSAFE(s, i, c) { \ - (c)=(s)[(i)++]; \ - if((uint8_t)((c)-0xc0)<0x35) { \ - uint8_t __count=UTF8_COUNT_TRAIL_BYTES(c); \ - UTF8_MASK_LEAD_BYTE(c, __count); \ - switch(__count) { \ - /* each following branch falls through to the next one */ \ - case 3: \ - (c)=((c)<<6)|((s)[(i)++]&0x3f); \ - case 2: \ - (c)=((c)<<6)|((s)[(i)++]&0x3f); \ - case 1: \ - (c)=((c)<<6)|((s)[(i)++]&0x3f); \ - /* no other branches to optimize switch() */ \ - break; \ - } \ - } \ -} - -/** @deprecated ICU 2.4. Renamed to U8_APPEND_UNSAFE, see utf_old.h. */ -#define UTF8_APPEND_CHAR_UNSAFE(s, i, c) { \ - if((uint32_t)(c)<=0x7f) { \ - (s)[(i)++]=(uint8_t)(c); \ - } else { \ - if((uint32_t)(c)<=0x7ff) { \ - (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \ - } else { \ - if((uint32_t)(c)<=0xffff) { \ - (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \ - } else { \ - (s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); \ - (s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); \ - } \ - (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \ - } \ - (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \ - } \ -} - -/** @deprecated ICU 2.4. Renamed to U8_FWD_1_UNSAFE, see utf_old.h. */ -#define UTF8_FWD_1_UNSAFE(s, i) { \ - (i)+=1+UTF8_COUNT_TRAIL_BYTES((s)[i]); \ -} - -/** @deprecated ICU 2.4. Renamed to U8_FWD_N_UNSAFE, see utf_old.h. */ -#define UTF8_FWD_N_UNSAFE(s, i, n) { \ - int32_t __N=(n); \ - while(__N>0) { \ - UTF8_FWD_1_UNSAFE(s, i); \ - --__N; \ - } \ -} - -/** @deprecated ICU 2.4. Renamed to U8_SET_CP_START_UNSAFE, see utf_old.h. */ -#define UTF8_SET_CHAR_START_UNSAFE(s, i) { \ - while(UTF8_IS_TRAIL((s)[i])) { --(i); } \ -} - -/** @deprecated ICU 2.4. Use U8_NEXT instead, see utf_old.h. */ -#define UTF8_NEXT_CHAR_SAFE(s, i, length, c, strict) { \ - (c)=(s)[(i)++]; \ - if((c)>=0x80) { \ - if(UTF8_IS_LEAD(c)) { \ - (c)=utf8_nextCharSafeBody(s, &(i), (int32_t)(length), c, strict); \ - } else { \ - (c)=UTF8_ERROR_VALUE_1; \ - } \ - } \ -} - -/** @deprecated ICU 2.4. Use U8_APPEND instead, see utf_old.h. */ -#define UTF8_APPEND_CHAR_SAFE(s, i, length, c) { \ - if((uint32_t)(c)<=0x7f) { \ - (s)[(i)++]=(uint8_t)(c); \ - } else { \ - (i)=utf8_appendCharSafeBody(s, (int32_t)(i), (int32_t)(length), c, NULL); \ - } \ -} - -/** @deprecated ICU 2.4. Renamed to U8_FWD_1, see utf_old.h. */ -#define UTF8_FWD_1_SAFE(s, i, length) U8_FWD_1(s, i, length) - -/** @deprecated ICU 2.4. Renamed to U8_FWD_N, see utf_old.h. */ -#define UTF8_FWD_N_SAFE(s, i, length, n) U8_FWD_N(s, i, length, n) - -/** @deprecated ICU 2.4. Renamed to U8_SET_CP_START, see utf_old.h. */ -#define UTF8_SET_CHAR_START_SAFE(s, start, i) U8_SET_CP_START(s, start, i) - -/** @deprecated ICU 2.4. Renamed to U8_PREV_UNSAFE, see utf_old.h. */ -#define UTF8_PREV_CHAR_UNSAFE(s, i, c) { \ - (c)=(s)[--(i)]; \ - if(UTF8_IS_TRAIL(c)) { \ - uint8_t __b, __count=1, __shift=6; \ -\ - /* c is a trail byte */ \ - (c)&=0x3f; \ - for(;;) { \ - __b=(s)[--(i)]; \ - if(__b>=0xc0) { \ - UTF8_MASK_LEAD_BYTE(__b, __count); \ - (c)|=(UChar32)__b<<__shift; \ - break; \ - } else { \ - (c)|=(UChar32)(__b&0x3f)<<__shift; \ - ++__count; \ - __shift+=6; \ - } \ - } \ - } \ -} - -/** @deprecated ICU 2.4. Renamed to U8_BACK_1_UNSAFE, see utf_old.h. */ -#define UTF8_BACK_1_UNSAFE(s, i) { \ - while(UTF8_IS_TRAIL((s)[--(i)])) {} \ -} - -/** @deprecated ICU 2.4. Renamed to U8_BACK_N_UNSAFE, see utf_old.h. */ -#define UTF8_BACK_N_UNSAFE(s, i, n) { \ - int32_t __N=(n); \ - while(__N>0) { \ - UTF8_BACK_1_UNSAFE(s, i); \ - --__N; \ - } \ -} - -/** @deprecated ICU 2.4. Renamed to U8_SET_CP_LIMIT_UNSAFE, see utf_old.h. */ -#define UTF8_SET_CHAR_LIMIT_UNSAFE(s, i) { \ - UTF8_BACK_1_UNSAFE(s, i); \ - UTF8_FWD_1_UNSAFE(s, i); \ -} - -/** @deprecated ICU 2.4. Use U8_PREV instead, see utf_old.h. */ -#define UTF8_PREV_CHAR_SAFE(s, start, i, c, strict) { \ - (c)=(s)[--(i)]; \ - if((c)>=0x80) { \ - if((c)<=0xbf) { \ - (c)=utf8_prevCharSafeBody(s, start, &(i), c, strict); \ - } else { \ - (c)=UTF8_ERROR_VALUE_1; \ - } \ - } \ -} - -/** @deprecated ICU 2.4. Renamed to U8_BACK_1, see utf_old.h. */ -#define UTF8_BACK_1_SAFE(s, start, i) U8_BACK_1(s, start, i) - -/** @deprecated ICU 2.4. Renamed to U8_BACK_N, see utf_old.h. */ -#define UTF8_BACK_N_SAFE(s, start, i, n) U8_BACK_N(s, start, i, n) - -/** @deprecated ICU 2.4. Renamed to U8_SET_CP_LIMIT, see utf_old.h. */ -#define UTF8_SET_CHAR_LIMIT_SAFE(s, start, i, length) U8_SET_CP_LIMIT(s, start, i, length) - -/* Formerly utf16.h --------------------------------------------------------- */ - -/** Is uchar a first/lead surrogate? @deprecated ICU 2.4. Renamed to U_IS_LEAD and U16_IS_LEAD, see utf_old.h. */ -#define UTF_IS_FIRST_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xd800) - -/** Is uchar a second/trail surrogate? @deprecated ICU 2.4. Renamed to U_IS_TRAIL and U16_IS_TRAIL, see utf_old.h. */ -#define UTF_IS_SECOND_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xdc00) - -/** Assuming c is a surrogate, is it a first/lead surrogate? @deprecated ICU 2.4. Renamed to U_IS_SURROGATE_LEAD and U16_IS_SURROGATE_LEAD, see utf_old.h. */ -#define UTF_IS_SURROGATE_FIRST(c) (((c)&0x400)==0) - -/** Helper constant for UTF16_GET_PAIR_VALUE. @deprecated ICU 2.4. Renamed to U16_SURROGATE_OFFSET, see utf_old.h. */ -#define UTF_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000) - -/** Get the UTF-32 value from the surrogate code units. @deprecated ICU 2.4. Renamed to U16_GET_SUPPLEMENTARY, see utf_old.h. */ -#define UTF16_GET_PAIR_VALUE(first, second) \ - (((first)<<10UL)+(second)-UTF_SURROGATE_OFFSET) - -/** @deprecated ICU 2.4. Renamed to U16_LEAD, see utf_old.h. */ -#define UTF_FIRST_SURROGATE(supplementary) (UChar)(((supplementary)>>10)+0xd7c0) - -/** @deprecated ICU 2.4. Renamed to U16_TRAIL, see utf_old.h. */ -#define UTF_SECOND_SURROGATE(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00) - -/** @deprecated ICU 2.4. Renamed to U16_LEAD, see utf_old.h. */ -#define UTF16_LEAD(supplementary) UTF_FIRST_SURROGATE(supplementary) - -/** @deprecated ICU 2.4. Renamed to U16_TRAIL, see utf_old.h. */ -#define UTF16_TRAIL(supplementary) UTF_SECOND_SURROGATE(supplementary) - -/** @deprecated ICU 2.4. Renamed to U16_IS_SINGLE, see utf_old.h. */ -#define UTF16_IS_SINGLE(uchar) !UTF_IS_SURROGATE(uchar) - -/** @deprecated ICU 2.4. Renamed to U16_IS_LEAD, see utf_old.h. */ -#define UTF16_IS_LEAD(uchar) UTF_IS_FIRST_SURROGATE(uchar) - -/** @deprecated ICU 2.4. Renamed to U16_IS_TRAIL, see utf_old.h. */ -#define UTF16_IS_TRAIL(uchar) UTF_IS_SECOND_SURROGATE(uchar) - -/** Does this scalar Unicode value need multiple code units for storage? @deprecated ICU 2.4. Use U16_LENGTH or test ((uint32_t)(c)>0xffff) instead, see utf_old.h. */ -#define UTF16_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0xffff) - -/** @deprecated ICU 2.4. Renamed to U16_LENGTH, see utf_old.h. */ -#define UTF16_CHAR_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2) - -/** @deprecated ICU 2.4. Renamed to U16_MAX_LENGTH, see utf_old.h. */ -#define UTF16_MAX_CHAR_LENGTH 2 - -/** Average number of code units compared to UTF-16. @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF16_ARRAY_SIZE(size) (size) - -/** - * Get a single code point from an offset that points to any - * of the code units that belong to that code point. - * Assume 0<=i<length. - * - * This could be used for iteration together with - * UTF16_CHAR_LENGTH() and UTF_IS_ERROR(), - * but the use of UTF16_NEXT_CHAR[_UNSAFE]() and - * UTF16_PREV_CHAR[_UNSAFE]() is more efficient for that. - * @deprecated ICU 2.4. Renamed to U16_GET_UNSAFE, see utf_old.h. - */ -#define UTF16_GET_CHAR_UNSAFE(s, i, c) { \ - (c)=(s)[i]; \ - if(UTF_IS_SURROGATE(c)) { \ - if(UTF_IS_SURROGATE_FIRST(c)) { \ - (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)+1]); \ - } else { \ - (c)=UTF16_GET_PAIR_VALUE((s)[(i)-1], (c)); \ - } \ - } \ -} - -/** @deprecated ICU 2.4. Use U16_GET instead, see utf_old.h. */ -#define UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) { \ - (c)=(s)[i]; \ - if(UTF_IS_SURROGATE(c)) { \ - uint16_t __c2; \ - if(UTF_IS_SURROGATE_FIRST(c)) { \ - if((i)+1<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)+1])) { \ - (c)=UTF16_GET_PAIR_VALUE((c), __c2); \ - /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \ - } else if(strict) {\ - /* unmatched first surrogate */ \ - (c)=UTF_ERROR_VALUE; \ - } \ - } else { \ - if((i)-1>=(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \ - (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \ - /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \ - } else if(strict) {\ - /* unmatched second surrogate */ \ - (c)=UTF_ERROR_VALUE; \ - } \ - } \ - } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \ - (c)=UTF_ERROR_VALUE; \ - } \ -} - -/** @deprecated ICU 2.4. Renamed to U16_NEXT_UNSAFE, see utf_old.h. */ -#define UTF16_NEXT_CHAR_UNSAFE(s, i, c) { \ - (c)=(s)[(i)++]; \ - if(UTF_IS_FIRST_SURROGATE(c)) { \ - (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)++]); \ - } \ -} - -/** @deprecated ICU 2.4. Renamed to U16_APPEND_UNSAFE, see utf_old.h. */ -#define UTF16_APPEND_CHAR_UNSAFE(s, i, c) { \ - if((uint32_t)(c)<=0xffff) { \ - (s)[(i)++]=(uint16_t)(c); \ - } else { \ - (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ - (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ - } \ -} - -/** @deprecated ICU 2.4. Renamed to U16_FWD_1_UNSAFE, see utf_old.h. */ -#define UTF16_FWD_1_UNSAFE(s, i) { \ - if(UTF_IS_FIRST_SURROGATE((s)[(i)++])) { \ - ++(i); \ - } \ -} - -/** @deprecated ICU 2.4. Renamed to U16_FWD_N_UNSAFE, see utf_old.h. */ -#define UTF16_FWD_N_UNSAFE(s, i, n) { \ - int32_t __N=(n); \ - while(__N>0) { \ - UTF16_FWD_1_UNSAFE(s, i); \ - --__N; \ - } \ -} - -/** @deprecated ICU 2.4. Renamed to U16_SET_CP_START_UNSAFE, see utf_old.h. */ -#define UTF16_SET_CHAR_START_UNSAFE(s, i) { \ - if(UTF_IS_SECOND_SURROGATE((s)[i])) { \ - --(i); \ - } \ -} - -/** @deprecated ICU 2.4. Use U16_NEXT instead, see utf_old.h. */ -#define UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) { \ - (c)=(s)[(i)++]; \ - if(UTF_IS_FIRST_SURROGATE(c)) { \ - uint16_t __c2; \ - if((i)<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)])) { \ - ++(i); \ - (c)=UTF16_GET_PAIR_VALUE((c), __c2); \ - /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \ - } else if(strict) {\ - /* unmatched first surrogate */ \ - (c)=UTF_ERROR_VALUE; \ - } \ - } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \ - /* unmatched second surrogate or other non-character */ \ - (c)=UTF_ERROR_VALUE; \ - } \ -} - -/** @deprecated ICU 2.4. Use U16_APPEND instead, see utf_old.h. */ -#define UTF16_APPEND_CHAR_SAFE(s, i, length, c) { \ - if((uint32_t)(c)<=0xffff) { \ - (s)[(i)++]=(uint16_t)(c); \ - } else if((uint32_t)(c)<=0x10ffff) { \ - if((i)+1<(length)) { \ - (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ - (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ - } else /* not enough space */ { \ - (s)[(i)++]=UTF_ERROR_VALUE; \ - } \ - } else /* c>0x10ffff, write error value */ { \ - (s)[(i)++]=UTF_ERROR_VALUE; \ - } \ -} - -/** @deprecated ICU 2.4. Renamed to U16_FWD_1, see utf_old.h. */ -#define UTF16_FWD_1_SAFE(s, i, length) U16_FWD_1(s, i, length) - -/** @deprecated ICU 2.4. Renamed to U16_FWD_N, see utf_old.h. */ -#define UTF16_FWD_N_SAFE(s, i, length, n) U16_FWD_N(s, i, length, n) - -/** @deprecated ICU 2.4. Renamed to U16_SET_CP_START, see utf_old.h. */ -#define UTF16_SET_CHAR_START_SAFE(s, start, i) U16_SET_CP_START(s, start, i) - -/** @deprecated ICU 2.4. Renamed to U16_PREV_UNSAFE, see utf_old.h. */ -#define UTF16_PREV_CHAR_UNSAFE(s, i, c) { \ - (c)=(s)[--(i)]; \ - if(UTF_IS_SECOND_SURROGATE(c)) { \ - (c)=UTF16_GET_PAIR_VALUE((s)[--(i)], (c)); \ - } \ -} - -/** @deprecated ICU 2.4. Renamed to U16_BACK_1_UNSAFE, see utf_old.h. */ -#define UTF16_BACK_1_UNSAFE(s, i) { \ - if(UTF_IS_SECOND_SURROGATE((s)[--(i)])) { \ - --(i); \ - } \ -} - -/** @deprecated ICU 2.4. Renamed to U16_BACK_N_UNSAFE, see utf_old.h. */ -#define UTF16_BACK_N_UNSAFE(s, i, n) { \ - int32_t __N=(n); \ - while(__N>0) { \ - UTF16_BACK_1_UNSAFE(s, i); \ - --__N; \ - } \ -} - -/** @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT_UNSAFE, see utf_old.h. */ -#define UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) { \ - if(UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \ - ++(i); \ - } \ -} - -/** @deprecated ICU 2.4. Use U16_PREV instead, see utf_old.h. */ -#define UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) { \ - (c)=(s)[--(i)]; \ - if(UTF_IS_SECOND_SURROGATE(c)) { \ - uint16_t __c2; \ - if((i)>(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \ - --(i); \ - (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \ - /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \ - } else if(strict) {\ - /* unmatched second surrogate */ \ - (c)=UTF_ERROR_VALUE; \ - } \ - } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \ - /* unmatched first surrogate or other non-character */ \ - (c)=UTF_ERROR_VALUE; \ - } \ -} - -/** @deprecated ICU 2.4. Renamed to U16_BACK_1, see utf_old.h. */ -#define UTF16_BACK_1_SAFE(s, start, i) U16_BACK_1(s, start, i) - -/** @deprecated ICU 2.4. Renamed to U16_BACK_N, see utf_old.h. */ -#define UTF16_BACK_N_SAFE(s, start, i, n) U16_BACK_N(s, start, i, n) - -/** @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT, see utf_old.h. */ -#define UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length) - -/* Formerly utf32.h --------------------------------------------------------- */ - -/* -* Old documentation: -* -* This file defines macros to deal with UTF-32 code units and code points. -* Signatures and semantics are the same as for the similarly named macros -* in utf16.h. -* utf32.h is included by utf.h after unicode/umachine.h</p> -* and some common definitions. -* <p><b>Usage:</b> ICU coding guidelines for if() statements should be followed when using these macros. -* Compound statements (curly braces {}) must be used for if-else-while... -* bodies and all macro statements should be terminated with semicolon.</p> -*/ - -/* internal definitions ----------------------------------------------------- */ - -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_IS_SAFE(c, strict) \ - (!(strict) ? \ - (uint32_t)(c)<=0x10ffff : \ - UTF_IS_UNICODE_CHAR(c)) - -/* - * For the semantics of all of these macros, see utf16.h. - * The UTF-32 versions are trivial because any code point is - * encoded using exactly one code unit. - */ - -/* single-code point definitions -------------------------------------------- */ - -/* classes of code unit values */ - -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_IS_SINGLE(uchar) 1 -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_IS_LEAD(uchar) 0 -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_IS_TRAIL(uchar) 0 - -/* number of code units per code point */ - -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_NEED_MULTIPLE_UCHAR(c) 0 -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_CHAR_LENGTH(c) 1 -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_MAX_CHAR_LENGTH 1 - -/* average number of code units compared to UTF-16 */ - -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_ARRAY_SIZE(size) (size) - -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_GET_CHAR_UNSAFE(s, i, c) { \ - (c)=(s)[i]; \ -} - -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_GET_CHAR_SAFE(s, start, i, length, c, strict) { \ - (c)=(s)[i]; \ - if(!UTF32_IS_SAFE(c, strict)) { \ - (c)=UTF_ERROR_VALUE; \ - } \ -} - -/* definitions with forward iteration --------------------------------------- */ - -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_NEXT_CHAR_UNSAFE(s, i, c) { \ - (c)=(s)[(i)++]; \ -} - -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_APPEND_CHAR_UNSAFE(s, i, c) { \ - (s)[(i)++]=(c); \ -} - -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_FWD_1_UNSAFE(s, i) { \ - ++(i); \ -} - -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_FWD_N_UNSAFE(s, i, n) { \ - (i)+=(n); \ -} - -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_SET_CHAR_START_UNSAFE(s, i) { \ -} - -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_NEXT_CHAR_SAFE(s, i, length, c, strict) { \ - (c)=(s)[(i)++]; \ - if(!UTF32_IS_SAFE(c, strict)) { \ - (c)=UTF_ERROR_VALUE; \ - } \ -} - -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_APPEND_CHAR_SAFE(s, i, length, c) { \ - if((uint32_t)(c)<=0x10ffff) { \ - (s)[(i)++]=(c); \ - } else /* c>0x10ffff, write 0xfffd */ { \ - (s)[(i)++]=0xfffd; \ - } \ -} - -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_FWD_1_SAFE(s, i, length) { \ - ++(i); \ -} - -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_FWD_N_SAFE(s, i, length, n) { \ - if(((i)+=(n))>(length)) { \ - (i)=(length); \ - } \ -} - -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_SET_CHAR_START_SAFE(s, start, i) { \ -} - -/* definitions with backward iteration -------------------------------------- */ - -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_PREV_CHAR_UNSAFE(s, i, c) { \ - (c)=(s)[--(i)]; \ -} - -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_BACK_1_UNSAFE(s, i) { \ - --(i); \ -} - -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_BACK_N_UNSAFE(s, i, n) { \ - (i)-=(n); \ -} - -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_SET_CHAR_LIMIT_UNSAFE(s, i) { \ -} - -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_PREV_CHAR_SAFE(s, start, i, c, strict) { \ - (c)=(s)[--(i)]; \ - if(!UTF32_IS_SAFE(c, strict)) { \ - (c)=UTF_ERROR_VALUE; \ - } \ -} - -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_BACK_1_SAFE(s, start, i) { \ - --(i); \ -} - -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_BACK_N_SAFE(s, start, i, n) { \ - (i)-=(n); \ - if((i)<(start)) { \ - (i)=(start); \ - } \ -} - -/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ -#define UTF32_SET_CHAR_LIMIT_SAFE(s, i, length) { \ -} - -/* Formerly utf.h, part 2 --------------------------------------------------- */ - -/** - * Estimate the number of code units for a string based on the number of UTF-16 code units. - * - * @deprecated ICU 2.4. Obsolete, see utf_old.h. - */ -#define UTF_ARRAY_SIZE(size) UTF16_ARRAY_SIZE(size) - -/** @deprecated ICU 2.4. Renamed to U16_GET_UNSAFE, see utf_old.h. */ -#define UTF_GET_CHAR_UNSAFE(s, i, c) UTF16_GET_CHAR_UNSAFE(s, i, c) - -/** @deprecated ICU 2.4. Use U16_GET instead, see utf_old.h. */ -#define UTF_GET_CHAR_SAFE(s, start, i, length, c, strict) UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) - - -/** @deprecated ICU 2.4. Renamed to U16_NEXT_UNSAFE, see utf_old.h. */ -#define UTF_NEXT_CHAR_UNSAFE(s, i, c) UTF16_NEXT_CHAR_UNSAFE(s, i, c) - -/** @deprecated ICU 2.4. Use U16_NEXT instead, see utf_old.h. */ -#define UTF_NEXT_CHAR_SAFE(s, i, length, c, strict) UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) - - -/** @deprecated ICU 2.4. Renamed to U16_APPEND_UNSAFE, see utf_old.h. */ -#define UTF_APPEND_CHAR_UNSAFE(s, i, c) UTF16_APPEND_CHAR_UNSAFE(s, i, c) - -/** @deprecated ICU 2.4. Use U16_APPEND instead, see utf_old.h. */ -#define UTF_APPEND_CHAR_SAFE(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c) - - -/** @deprecated ICU 2.4. Renamed to U16_FWD_1_UNSAFE, see utf_old.h. */ -#define UTF_FWD_1_UNSAFE(s, i) UTF16_FWD_1_UNSAFE(s, i) - -/** @deprecated ICU 2.4. Renamed to U16_FWD_1, see utf_old.h. */ -#define UTF_FWD_1_SAFE(s, i, length) UTF16_FWD_1_SAFE(s, i, length) - - -/** @deprecated ICU 2.4. Renamed to U16_FWD_N_UNSAFE, see utf_old.h. */ -#define UTF_FWD_N_UNSAFE(s, i, n) UTF16_FWD_N_UNSAFE(s, i, n) - -/** @deprecated ICU 2.4. Renamed to U16_FWD_N, see utf_old.h. */ -#define UTF_FWD_N_SAFE(s, i, length, n) UTF16_FWD_N_SAFE(s, i, length, n) - - -/** @deprecated ICU 2.4. Renamed to U16_SET_CP_START_UNSAFE, see utf_old.h. */ -#define UTF_SET_CHAR_START_UNSAFE(s, i) UTF16_SET_CHAR_START_UNSAFE(s, i) - -/** @deprecated ICU 2.4. Renamed to U16_SET_CP_START, see utf_old.h. */ -#define UTF_SET_CHAR_START_SAFE(s, start, i) UTF16_SET_CHAR_START_SAFE(s, start, i) - - -/** @deprecated ICU 2.4. Renamed to U16_PREV_UNSAFE, see utf_old.h. */ -#define UTF_PREV_CHAR_UNSAFE(s, i, c) UTF16_PREV_CHAR_UNSAFE(s, i, c) - -/** @deprecated ICU 2.4. Use U16_PREV instead, see utf_old.h. */ -#define UTF_PREV_CHAR_SAFE(s, start, i, c, strict) UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) - - -/** @deprecated ICU 2.4. Renamed to U16_BACK_1_UNSAFE, see utf_old.h. */ -#define UTF_BACK_1_UNSAFE(s, i) UTF16_BACK_1_UNSAFE(s, i) - -/** @deprecated ICU 2.4. Renamed to U16_BACK_1, see utf_old.h. */ -#define UTF_BACK_1_SAFE(s, start, i) UTF16_BACK_1_SAFE(s, start, i) - - -/** @deprecated ICU 2.4. Renamed to U16_BACK_N_UNSAFE, see utf_old.h. */ -#define UTF_BACK_N_UNSAFE(s, i, n) UTF16_BACK_N_UNSAFE(s, i, n) - -/** @deprecated ICU 2.4. Renamed to U16_BACK_N, see utf_old.h. */ -#define UTF_BACK_N_SAFE(s, start, i, n) UTF16_BACK_N_SAFE(s, start, i, n) - - -/** @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT_UNSAFE, see utf_old.h. */ -#define UTF_SET_CHAR_LIMIT_UNSAFE(s, i) UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) - -/** @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT, see utf_old.h. */ -#define UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length) UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length) - -/* Define default macros (UTF-16 "safe") ------------------------------------ */ - -/** - * Does this code unit alone encode a code point (BMP, not a surrogate)? - * Same as UTF16_IS_SINGLE. - * @deprecated ICU 2.4. Renamed to U_IS_SINGLE and U16_IS_SINGLE, see utf_old.h. - */ -#define UTF_IS_SINGLE(uchar) U16_IS_SINGLE(uchar) - -/** - * Is this code unit the first one of several (a lead surrogate)? - * Same as UTF16_IS_LEAD. - * @deprecated ICU 2.4. Renamed to U_IS_LEAD and U16_IS_LEAD, see utf_old.h. - */ -#define UTF_IS_LEAD(uchar) U16_IS_LEAD(uchar) - -/** - * Is this code unit one of several but not the first one (a trail surrogate)? - * Same as UTF16_IS_TRAIL. - * @deprecated ICU 2.4. Renamed to U_IS_TRAIL and U16_IS_TRAIL, see utf_old.h. - */ -#define UTF_IS_TRAIL(uchar) U16_IS_TRAIL(uchar) - -/** - * Does this code point require multiple code units (is it a supplementary code point)? - * Same as UTF16_NEED_MULTIPLE_UCHAR. - * @deprecated ICU 2.4. Use U16_LENGTH or test ((uint32_t)(c)>0xffff) instead. - */ -#define UTF_NEED_MULTIPLE_UCHAR(c) UTF16_NEED_MULTIPLE_UCHAR(c) - -/** - * How many code units are used to encode this code point (1 or 2)? - * Same as UTF16_CHAR_LENGTH. - * @deprecated ICU 2.4. Renamed to U16_LENGTH, see utf_old.h. - */ -#define UTF_CHAR_LENGTH(c) U16_LENGTH(c) - -/** - * How many code units are used at most for any Unicode code point (2)? - * Same as UTF16_MAX_CHAR_LENGTH. - * @deprecated ICU 2.4. Renamed to U16_MAX_LENGTH, see utf_old.h. - */ -#define UTF_MAX_CHAR_LENGTH U16_MAX_LENGTH - -/** - * Set c to the code point that contains the code unit i. - * i could point to the lead or the trail surrogate for the code point. - * i is not modified. - * Same as UTF16_GET_CHAR. - * \pre 0<=i<length - * - * @deprecated ICU 2.4. Renamed to U16_GET, see utf_old.h. - */ -#define UTF_GET_CHAR(s, start, i, length, c) U16_GET(s, start, i, length, c) - -/** - * Set c to the code point that starts at code unit i - * and advance i to beyond the code units of this code point (post-increment). - * i must point to the first code unit of a code point. - * Otherwise c is set to the trail unit (surrogate) itself. - * Same as UTF16_NEXT_CHAR. - * \pre 0<=i<length - * \post 0<i<=length - * - * @deprecated ICU 2.4. Renamed to U16_NEXT, see utf_old.h. - */ -#define UTF_NEXT_CHAR(s, i, length, c) U16_NEXT(s, i, length, c) - -/** - * Append the code units of code point c to the string at index i - * and advance i to beyond the new code units (post-increment). - * The code units beginning at index i will be overwritten. - * Same as UTF16_APPEND_CHAR. - * \pre 0<=c<=0x10ffff - * \pre 0<=i<length - * \post 0<i<=length - * - * @deprecated ICU 2.4. Use U16_APPEND instead, see utf_old.h. - */ -#define UTF_APPEND_CHAR(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c) - -/** - * Advance i to beyond the code units of the code point that begins at i. - * I.e., advance i by one code point. - * Same as UTF16_FWD_1. - * \pre 0<=i<length - * \post 0<i<=length - * - * @deprecated ICU 2.4. Renamed to U16_FWD_1, see utf_old.h. - */ -#define UTF_FWD_1(s, i, length) U16_FWD_1(s, i, length) - -/** - * Advance i to beyond the code units of the n code points where the first one begins at i. - * I.e., advance i by n code points. - * Same as UT16_FWD_N. - * \pre 0<=i<length - * \post 0<i<=length - * - * @deprecated ICU 2.4. Renamed to U16_FWD_N, see utf_old.h. - */ -#define UTF_FWD_N(s, i, length, n) U16_FWD_N(s, i, length, n) - -/** - * Take the random-access index i and adjust it so that it points to the beginning - * of a code point. - * The input index points to any code unit of a code point and is moved to point to - * the first code unit of the same code point. i is never incremented. - * In other words, if i points to a trail surrogate that is preceded by a matching - * lead surrogate, then i is decremented. Otherwise it is not modified. - * This can be used to start an iteration with UTF_NEXT_CHAR() from a random index. - * Same as UTF16_SET_CHAR_START. - * \pre start<=i<length - * \post start<=i<length - * - * @deprecated ICU 2.4. Renamed to U16_SET_CP_START, see utf_old.h. - */ -#define UTF_SET_CHAR_START(s, start, i) U16_SET_CP_START(s, start, i) - -/** - * Set c to the code point that has code units before i - * and move i backward (towards the beginning of the string) - * to the first code unit of this code point (pre-increment). - * i must point to the first code unit after the last unit of a code point (i==length is allowed). - * Same as UTF16_PREV_CHAR. - * \pre start<i<=length - * \post start<=i<length - * - * @deprecated ICU 2.4. Renamed to U16_PREV, see utf_old.h. - */ -#define UTF_PREV_CHAR(s, start, i, c) U16_PREV(s, start, i, c) - -/** - * Move i backward (towards the beginning of the string) - * to the first code unit of the code point that has code units before i. - * I.e., move i backward by one code point. - * i must point to the first code unit after the last unit of a code point (i==length is allowed). - * Same as UTF16_BACK_1. - * \pre start<i<=length - * \post start<=i<length - * - * @deprecated ICU 2.4. Renamed to U16_BACK_1, see utf_old.h. - */ -#define UTF_BACK_1(s, start, i) U16_BACK_1(s, start, i) - -/** - * Move i backward (towards the beginning of the string) - * to the first code unit of the n code points that have code units before i. - * I.e., move i backward by n code points. - * i must point to the first code unit after the last unit of a code point (i==length is allowed). - * Same as UTF16_BACK_N. - * \pre start<i<=length - * \post start<=i<length - * - * @deprecated ICU 2.4. Renamed to U16_BACK_N, see utf_old.h. - */ -#define UTF_BACK_N(s, start, i, n) U16_BACK_N(s, start, i, n) - -/** - * Take the random-access index i and adjust it so that it points beyond - * a code point. The input index points beyond any code unit - * of a code point and is moved to point beyond the last code unit of the same - * code point. i is never decremented. - * In other words, if i points to a trail surrogate that is preceded by a matching - * lead surrogate, then i is incremented. Otherwise it is not modified. - * This can be used to start an iteration with UTF_PREV_CHAR() from a random index. - * Same as UTF16_SET_CHAR_LIMIT. - * \pre start<i<=length - * \post start<i<=length - * - * @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT, see utf_old.h. - */ -#define UTF_SET_CHAR_LIMIT(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length) - -#endif /* U_HIDE_DEPRECATED_API */ - -#endif - diff --git a/Source/WTF/icu/unicode/utypes.h b/Source/WTF/icu/unicode/utypes.h deleted file mode 100644 index 8f924c9d1..000000000 --- a/Source/WTF/icu/unicode/utypes.h +++ /dev/null @@ -1,723 +0,0 @@ -/* -********************************************************************** -* Copyright (C) 1996-2012, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* -* FILE NAME : UTYPES.H (formerly ptypes.h) -* -* Date Name Description -* 12/11/96 helena Creation. -* 02/27/97 aliu Added typedefs for UClassID, int8, int16, int32, -* uint8, uint16, and uint32. -* 04/01/97 aliu Added XP_CPLUSPLUS and modified to work under C as -* well as C++. -* Modified to use memcpy() for uprv_arrayCopy() fns. -* 04/14/97 aliu Added TPlatformUtilities. -* 05/07/97 aliu Added import/export specifiers (replacing the old -* broken EXT_CLASS). Added version number for our -* code. Cleaned up header. -* 6/20/97 helena Java class name change. -* 08/11/98 stephen UErrorCode changed from typedef to enum -* 08/12/98 erm Changed T_ANALYTIC_PACKAGE_VERSION to 3 -* 08/14/98 stephen Added uprv_arrayCopy() for int8_t, int16_t, int32_t -* 12/09/98 jfitz Added BUFFER_OVERFLOW_ERROR (bug 1100066) -* 04/20/99 stephen Cleaned up & reworked for autoconf. -* Renamed to utypes.h. -* 05/05/99 stephen Changed to use <inttypes.h> -* 12/07/99 helena Moved copyright notice string from ucnv_bld.h here. -******************************************************************************* -*/ - -#ifndef UTYPES_H -#define UTYPES_H - - -#include "unicode/umachine.h" -#include "unicode/uversion.h" -#include "unicode/uconfig.h" -#include <float.h> - -#if !U_NO_DEFAULT_INCLUDE_UTF_HEADERS -# include "unicode/utf.h" -#endif - -/*! - * \file - * \brief Basic definitions for ICU, for both C and C++ APIs - * - * This file defines basic types, constants, and enumerations directly or - * indirectly by including other header files, especially utf.h for the - * basic character and string definitions and umachine.h for consistent - * integer and other types. - */ - - -/** - * \def U_SHOW_CPLUSPLUS_API - * @internal - */ -#ifdef __cplusplus -# ifndef U_SHOW_CPLUSPLUS_API -# define U_SHOW_CPLUSPLUS_API 0 -# endif -#else -# undef U_SHOW_CPLUSPLUS_API -# define U_SHOW_CPLUSPLUS_API 0 -#endif - -/** @{ API visibility control */ - -/** - * \def U_HIDE_DRAFT_API - * Define this to 1 to request that draft API be "hidden" - * @internal - */ -/** - * \def U_HIDE_INTERNAL_API - * Define this to 1 to request that internal API be "hidden" - * @internal - */ -#if !U_DEFAULT_SHOW_DRAFT && !defined(U_SHOW_DRAFT_API) -#define U_HIDE_DRAFT_API 1 -#endif -#if !U_DEFAULT_SHOW_DRAFT && !defined(U_SHOW_INTERNAL_API) -#define U_HIDE_INTERNAL_API 1 -#endif - -/** @} */ - -/*===========================================================================*/ -/* ICUDATA naming scheme */ -/*===========================================================================*/ - -/** - * \def U_ICUDATA_TYPE_LETTER - * - * This is a platform-dependent string containing one letter: - * - b for big-endian, ASCII-family platforms - * - l for little-endian, ASCII-family platforms - * - e for big-endian, EBCDIC-family platforms - * This letter is part of the common data file name. - * @stable ICU 2.0 - */ - -/** - * \def U_ICUDATA_TYPE_LITLETTER - * The non-string form of U_ICUDATA_TYPE_LETTER - * @stable ICU 2.0 - */ -#if U_CHARSET_FAMILY -# if U_IS_BIG_ENDIAN - /* EBCDIC - should always be BE */ -# define U_ICUDATA_TYPE_LETTER "e" -# define U_ICUDATA_TYPE_LITLETTER e -# else -# error "Don't know what to do with little endian EBCDIC!" -# define U_ICUDATA_TYPE_LETTER "x" -# define U_ICUDATA_TYPE_LITLETTER x -# endif -#else -# if U_IS_BIG_ENDIAN - /* Big-endian ASCII */ -# define U_ICUDATA_TYPE_LETTER "b" -# define U_ICUDATA_TYPE_LITLETTER b -# else - /* Little-endian ASCII */ -# define U_ICUDATA_TYPE_LETTER "l" -# define U_ICUDATA_TYPE_LITLETTER l -# endif -#endif - -/** - * A single string literal containing the icudata stub name. i.e. 'icudt18e' for - * ICU 1.8.x on EBCDIC, etc.. - * @stable ICU 2.0 - */ -#define U_ICUDATA_NAME "icudt" U_ICU_VERSION_SHORT U_ICUDATA_TYPE_LETTER -#ifndef U_HIDE_INTERNAL_API -#define U_USRDATA_NAME "usrdt" U_ICU_VERSION_SHORT U_ICUDATA_TYPE_LETTER /**< @internal */ -#define U_USE_USRDATA 1 /**< @internal */ -#endif /* U_HIDE_INTERNAL_API */ - -/** - * U_ICU_ENTRY_POINT is the name of the DLL entry point to the ICU data library. - * Defined as a literal, not a string. - * Tricky Preprocessor use - ## operator replaces macro paramters with the literal string - * from the corresponding macro invocation, _before_ other macro substitutions. - * Need a nested \#defines to get the actual version numbers rather than - * the literal text U_ICU_VERSION_MAJOR_NUM into the name. - * The net result will be something of the form - * \#define U_ICU_ENTRY_POINT icudt19_dat - * @stable ICU 2.4 - */ -#define U_ICUDATA_ENTRY_POINT U_DEF2_ICUDATA_ENTRY_POINT(U_ICU_VERSION_MAJOR_NUM,U_LIB_SUFFIX_C_NAME) - -#ifndef U_HIDE_INTERNAL_API -/** - * Do not use. Note that it's OK for the 2nd argument to be undefined (literal). - * @internal - */ -#define U_DEF2_ICUDATA_ENTRY_POINT(major,suff) U_DEF_ICUDATA_ENTRY_POINT(major,suff) - -/** - * Do not use. - * @internal - */ -#ifndef U_DEF_ICUDATA_ENTRY_POINT -/* affected by symbol renaming. See platform.h */ -#ifndef U_LIB_SUFFIX_C_NAME -#define U_DEF_ICUDATA_ENTRY_POINT(major, suff) icudt##major##_dat -#else -#define U_DEF_ICUDATA_ENTRY_POINT(major, suff) icudt##suff ## major##_dat -#endif -#endif -#endif /* U_HIDE_INTERNAL_API */ - -/** - * \def NULL - * Define NULL if necessary, to 0 for C++ and to ((void *)0) for C. - * @stable ICU 2.0 - */ -#ifndef NULL -#ifdef __cplusplus -#define NULL 0 -#else -#define NULL ((void *)0) -#endif -#endif - -/*===========================================================================*/ -/* Calendar/TimeZone data types */ -/*===========================================================================*/ - -/** - * Date and Time data type. - * This is a primitive data type that holds the date and time - * as the number of milliseconds since 1970-jan-01, 00:00 UTC. - * UTC leap seconds are ignored. - * @stable ICU 2.0 - */ -typedef double UDate; - -/** The number of milliseconds per second @stable ICU 2.0 */ -#define U_MILLIS_PER_SECOND (1000) -/** The number of milliseconds per minute @stable ICU 2.0 */ -#define U_MILLIS_PER_MINUTE (60000) -/** The number of milliseconds per hour @stable ICU 2.0 */ -#define U_MILLIS_PER_HOUR (3600000) -/** The number of milliseconds per day @stable ICU 2.0 */ -#define U_MILLIS_PER_DAY (86400000) - -/** - * Maximum UDate value - * @stable ICU 4.8 - */ -#define U_DATE_MAX DBL_MAX - -/** - * Minimum UDate value - * @stable ICU 4.8 - */ -#define U_DATE_MIN -U_DATE_MAX - -/*===========================================================================*/ -/* Shared library/DLL import-export API control */ -/*===========================================================================*/ - -/* - * Control of symbol import/export. - * ICU is separated into three libraries. - */ - -/** - * \def U_COMBINED_IMPLEMENTATION - * Set to export library symbols from inside the ICU library - * when all of ICU is in a single library. - * This can be set as a compiler option while building ICU, and it - * needs to be the first one tested to override U_COMMON_API, U_I18N_API, etc. - * @stable ICU 2.0 - */ - -/** - * \def U_DATA_API - * Set to export library symbols from inside the stubdata library, - * and to import them from outside. - * @stable ICU 3.0 - */ - -/** - * \def U_COMMON_API - * Set to export library symbols from inside the common library, - * and to import them from outside. - * @stable ICU 2.0 - */ - -/** - * \def U_I18N_API - * Set to export library symbols from inside the i18n library, - * and to import them from outside. - * @stable ICU 2.0 - */ - -/** - * \def U_LAYOUT_API - * Set to export library symbols from inside the layout engine library, - * and to import them from outside. - * @stable ICU 2.0 - */ - -/** - * \def U_LAYOUTEX_API - * Set to export library symbols from inside the layout extensions library, - * and to import them from outside. - * @stable ICU 2.6 - */ - -/** - * \def U_IO_API - * Set to export library symbols from inside the ustdio library, - * and to import them from outside. - * @stable ICU 2.0 - */ - -/** - * \def U_TOOLUTIL_API - * Set to export library symbols from inside the toolutil library, - * and to import them from outside. - * @stable ICU 3.4 - */ - -#if defined(U_COMBINED_IMPLEMENTATION) -#define U_DATA_API U_EXPORT -#define U_COMMON_API U_EXPORT -#define U_I18N_API U_EXPORT -#define U_LAYOUT_API U_EXPORT -#define U_LAYOUTEX_API U_EXPORT -#define U_IO_API U_EXPORT -#define U_TOOLUTIL_API U_EXPORT -#elif defined(U_STATIC_IMPLEMENTATION) -#define U_DATA_API -#define U_COMMON_API -#define U_I18N_API -#define U_LAYOUT_API -#define U_LAYOUTEX_API -#define U_IO_API -#define U_TOOLUTIL_API -#elif defined(U_COMMON_IMPLEMENTATION) -#define U_DATA_API U_IMPORT -#define U_COMMON_API U_EXPORT -#define U_I18N_API U_IMPORT -#define U_LAYOUT_API U_IMPORT -#define U_LAYOUTEX_API U_IMPORT -#define U_IO_API U_IMPORT -#define U_TOOLUTIL_API U_IMPORT -#elif defined(U_I18N_IMPLEMENTATION) -#define U_DATA_API U_IMPORT -#define U_COMMON_API U_IMPORT -#define U_I18N_API U_EXPORT -#define U_LAYOUT_API U_IMPORT -#define U_LAYOUTEX_API U_IMPORT -#define U_IO_API U_IMPORT -#define U_TOOLUTIL_API U_IMPORT -#elif defined(U_LAYOUT_IMPLEMENTATION) -#define U_DATA_API U_IMPORT -#define U_COMMON_API U_IMPORT -#define U_I18N_API U_IMPORT -#define U_LAYOUT_API U_EXPORT -#define U_LAYOUTEX_API U_IMPORT -#define U_IO_API U_IMPORT -#define U_TOOLUTIL_API U_IMPORT -#elif defined(U_LAYOUTEX_IMPLEMENTATION) -#define U_DATA_API U_IMPORT -#define U_COMMON_API U_IMPORT -#define U_I18N_API U_IMPORT -#define U_LAYOUT_API U_IMPORT -#define U_LAYOUTEX_API U_EXPORT -#define U_IO_API U_IMPORT -#define U_TOOLUTIL_API U_IMPORT -#elif defined(U_IO_IMPLEMENTATION) -#define U_DATA_API U_IMPORT -#define U_COMMON_API U_IMPORT -#define U_I18N_API U_IMPORT -#define U_LAYOUT_API U_IMPORT -#define U_LAYOUTEX_API U_IMPORT -#define U_IO_API U_EXPORT -#define U_TOOLUTIL_API U_IMPORT -#elif defined(U_TOOLUTIL_IMPLEMENTATION) -#define U_DATA_API U_IMPORT -#define U_COMMON_API U_IMPORT -#define U_I18N_API U_IMPORT -#define U_LAYOUT_API U_IMPORT -#define U_LAYOUTEX_API U_IMPORT -#define U_IO_API U_IMPORT -#define U_TOOLUTIL_API U_EXPORT -#else -#define U_DATA_API U_IMPORT -#define U_COMMON_API U_IMPORT -#define U_I18N_API U_IMPORT -#define U_LAYOUT_API U_IMPORT -#define U_LAYOUTEX_API U_IMPORT -#define U_IO_API U_IMPORT -#define U_TOOLUTIL_API U_IMPORT -#endif - -/** - * \def U_STANDARD_CPP_NAMESPACE - * Control of C++ Namespace - * @stable ICU 2.0 - */ -#ifdef __cplusplus -#define U_STANDARD_CPP_NAMESPACE :: -#else -#define U_STANDARD_CPP_NAMESPACE -#endif - - -/*===========================================================================*/ -/* Global delete operator */ -/*===========================================================================*/ - -/* - * The ICU4C library must not use the global new and delete operators. - * These operators here are defined to enable testing for this. - * See Jitterbug 2581 for details of why this is necessary. - * - * Verification that ICU4C's memory usage is correct, i.e., - * that global new/delete are not used: - * - * a) Check for imports of global new/delete (see uobject.cpp for details) - * b) Verify that new is never imported. - * c) Verify that delete is only imported from object code for interface/mixin classes. - * d) Add global delete and delete[] only for the ICU4C library itself - * and define them in a way that crashes or otherwise easily shows a problem. - * - * The following implements d). - * The operator implementations crash; this is intentional and used for library debugging. - * - * Note: This is currently only done on Windows because - * some Linux/Unix compilers have problems with defining global new/delete. - * On Windows, it is _MSC_VER>=1200 for MSVC 6.0 and higher. - */ -#if defined(__cplusplus) && U_DEBUG && U_OVERRIDE_CXX_ALLOCATION && (_MSC_VER>=1200) && !defined(U_STATIC_IMPLEMENTATION) && (defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) || defined(U_LAYOUT_IMPLEMENTATION) || defined(U_LAYOUTEX_IMPLEMENTATION)) - -#ifndef U_HIDE_INTERNAL_API -/** - * Global operator new, defined only inside ICU4C, must not be used. - * Crashes intentionally. - * @internal - */ -inline void * -operator new(size_t /*size*/) { - char *q=NULL; - *q=5; /* break it */ - return q; -} - -#ifdef _Ret_bytecap_ -/* This is only needed to suppress a Visual C++ 2008 warning for operator new[]. */ -_Ret_bytecap_(_Size) -#endif -/** - * Global operator new[], defined only inside ICU4C, must not be used. - * Crashes intentionally. - * @internal - */ -inline void * -operator new[](size_t /*size*/) { - char *q=NULL; - *q=5; /* break it */ - return q; -} - -/** - * Global operator delete, defined only inside ICU4C, must not be used. - * Crashes intentionally. - * @internal - */ -inline void -operator delete(void * /*p*/) { - char *q=NULL; - *q=5; /* break it */ -} - -/** - * Global operator delete[], defined only inside ICU4C, must not be used. - * Crashes intentionally. - * @internal - */ -inline void -operator delete[](void * /*p*/) { - char *q=NULL; - *q=5; /* break it */ -} - -#endif /* U_HIDE_INTERNAL_API */ -#endif - -/*===========================================================================*/ -/* UErrorCode */ -/*===========================================================================*/ - -/** - * Error code to replace exception handling, so that the code is compatible with all C++ compilers, - * and to use the same mechanism for C and C++. - * - * \par - * ICU functions that take a reference (C++) or a pointer (C) to a UErrorCode - * first test if(U_FAILURE(errorCode)) { return immediately; } - * so that in a chain of such functions the first one that sets an error code - * causes the following ones to not perform any operations. - * - * \par - * Error codes should be tested using U_FAILURE() and U_SUCCESS(). - * @stable ICU 2.0 - */ -typedef enum UErrorCode { - /* The ordering of U_ERROR_INFO_START Vs U_USING_FALLBACK_WARNING looks weird - * and is that way because VC++ debugger displays first encountered constant, - * which is not the what the code is used for - */ - - U_USING_FALLBACK_WARNING = -128, /**< A resource bundle lookup returned a fallback result (not an error) */ - - U_ERROR_WARNING_START = -128, /**< Start of information results (semantically successful) */ - - U_USING_DEFAULT_WARNING = -127, /**< A resource bundle lookup returned a result from the root locale (not an error) */ - - U_SAFECLONE_ALLOCATED_WARNING = -126, /**< A SafeClone operation required allocating memory (informational only) */ - - U_STATE_OLD_WARNING = -125, /**< ICU has to use compatibility layer to construct the service. Expect performance/memory usage degradation. Consider upgrading */ - - U_STRING_NOT_TERMINATED_WARNING = -124,/**< An output string could not be NUL-terminated because output length==destCapacity. */ - - U_SORT_KEY_TOO_SHORT_WARNING = -123, /**< Number of levels requested in getBound is higher than the number of levels in the sort key */ - - U_AMBIGUOUS_ALIAS_WARNING = -122, /**< This converter alias can go to different converter implementations */ - - U_DIFFERENT_UCA_VERSION = -121, /**< ucol_open encountered a mismatch between UCA version and collator image version, so the collator was constructed from rules. No impact to further function */ - - U_PLUGIN_CHANGED_LEVEL_WARNING = -120, /**< A plugin caused a level change. May not be an error, but later plugins may not load. */ - - U_ERROR_WARNING_LIMIT, /**< This must always be the last warning value to indicate the limit for UErrorCode warnings (last warning code +1) */ - - - U_ZERO_ERROR = 0, /**< No error, no warning. */ - - U_ILLEGAL_ARGUMENT_ERROR = 1, /**< Start of codes indicating failure */ - U_MISSING_RESOURCE_ERROR = 2, /**< The requested resource cannot be found */ - U_INVALID_FORMAT_ERROR = 3, /**< Data format is not what is expected */ - U_FILE_ACCESS_ERROR = 4, /**< The requested file cannot be found */ - U_INTERNAL_PROGRAM_ERROR = 5, /**< Indicates a bug in the library code */ - U_MESSAGE_PARSE_ERROR = 6, /**< Unable to parse a message (message format) */ - U_MEMORY_ALLOCATION_ERROR = 7, /**< Memory allocation error */ - U_INDEX_OUTOFBOUNDS_ERROR = 8, /**< Trying to access the index that is out of bounds */ - U_PARSE_ERROR = 9, /**< Equivalent to Java ParseException */ - U_INVALID_CHAR_FOUND = 10, /**< Character conversion: Unmappable input sequence. In other APIs: Invalid character. */ - U_TRUNCATED_CHAR_FOUND = 11, /**< Character conversion: Incomplete input sequence. */ - U_ILLEGAL_CHAR_FOUND = 12, /**< Character conversion: Illegal input sequence/combination of input units. */ - U_INVALID_TABLE_FORMAT = 13, /**< Conversion table file found, but corrupted */ - U_INVALID_TABLE_FILE = 14, /**< Conversion table file not found */ - U_BUFFER_OVERFLOW_ERROR = 15, /**< A result would not fit in the supplied buffer */ - U_UNSUPPORTED_ERROR = 16, /**< Requested operation not supported in current context */ - U_RESOURCE_TYPE_MISMATCH = 17, /**< an operation is requested over a resource that does not support it */ - U_ILLEGAL_ESCAPE_SEQUENCE = 18, /**< ISO-2022 illlegal escape sequence */ - U_UNSUPPORTED_ESCAPE_SEQUENCE = 19, /**< ISO-2022 unsupported escape sequence */ - U_NO_SPACE_AVAILABLE = 20, /**< No space available for in-buffer expansion for Arabic shaping */ - U_CE_NOT_FOUND_ERROR = 21, /**< Currently used only while setting variable top, but can be used generally */ - U_PRIMARY_TOO_LONG_ERROR = 22, /**< User tried to set variable top to a primary that is longer than two bytes */ - U_STATE_TOO_OLD_ERROR = 23, /**< ICU cannot construct a service from this state, as it is no longer supported */ - U_TOO_MANY_ALIASES_ERROR = 24, /**< There are too many aliases in the path to the requested resource. - It is very possible that a circular alias definition has occured */ - U_ENUM_OUT_OF_SYNC_ERROR = 25, /**< UEnumeration out of sync with underlying collection */ - U_INVARIANT_CONVERSION_ERROR = 26, /**< Unable to convert a UChar* string to char* with the invariant converter. */ - U_INVALID_STATE_ERROR = 27, /**< Requested operation can not be completed with ICU in its current state */ - U_COLLATOR_VERSION_MISMATCH = 28, /**< Collator version is not compatible with the base version */ - U_USELESS_COLLATOR_ERROR = 29, /**< Collator is options only and no base is specified */ - U_NO_WRITE_PERMISSION = 30, /**< Attempt to modify read-only or constant data. */ - - U_STANDARD_ERROR_LIMIT, /**< This must always be the last value to indicate the limit for standard errors */ - /* - * the error code range 0x10000 0x10100 are reserved for Transliterator - */ - U_BAD_VARIABLE_DEFINITION=0x10000,/**< Missing '$' or duplicate variable name */ - U_PARSE_ERROR_START = 0x10000, /**< Start of Transliterator errors */ - U_MALFORMED_RULE, /**< Elements of a rule are misplaced */ - U_MALFORMED_SET, /**< A UnicodeSet pattern is invalid*/ - U_MALFORMED_SYMBOL_REFERENCE, /**< UNUSED as of ICU 2.4 */ - U_MALFORMED_UNICODE_ESCAPE, /**< A Unicode escape pattern is invalid*/ - U_MALFORMED_VARIABLE_DEFINITION, /**< A variable definition is invalid */ - U_MALFORMED_VARIABLE_REFERENCE, /**< A variable reference is invalid */ - U_MISMATCHED_SEGMENT_DELIMITERS, /**< UNUSED as of ICU 2.4 */ - U_MISPLACED_ANCHOR_START, /**< A start anchor appears at an illegal position */ - U_MISPLACED_CURSOR_OFFSET, /**< A cursor offset occurs at an illegal position */ - U_MISPLACED_QUANTIFIER, /**< A quantifier appears after a segment close delimiter */ - U_MISSING_OPERATOR, /**< A rule contains no operator */ - U_MISSING_SEGMENT_CLOSE, /**< UNUSED as of ICU 2.4 */ - U_MULTIPLE_ANTE_CONTEXTS, /**< More than one ante context */ - U_MULTIPLE_CURSORS, /**< More than one cursor */ - U_MULTIPLE_POST_CONTEXTS, /**< More than one post context */ - U_TRAILING_BACKSLASH, /**< A dangling backslash */ - U_UNDEFINED_SEGMENT_REFERENCE, /**< A segment reference does not correspond to a defined segment */ - U_UNDEFINED_VARIABLE, /**< A variable reference does not correspond to a defined variable */ - U_UNQUOTED_SPECIAL, /**< A special character was not quoted or escaped */ - U_UNTERMINATED_QUOTE, /**< A closing single quote is missing */ - U_RULE_MASK_ERROR, /**< A rule is hidden by an earlier more general rule */ - U_MISPLACED_COMPOUND_FILTER, /**< A compound filter is in an invalid location */ - U_MULTIPLE_COMPOUND_FILTERS, /**< More than one compound filter */ - U_INVALID_RBT_SYNTAX, /**< A "::id" rule was passed to the RuleBasedTransliterator parser */ - U_INVALID_PROPERTY_PATTERN, /**< UNUSED as of ICU 2.4 */ - U_MALFORMED_PRAGMA, /**< A 'use' pragma is invlalid */ - U_UNCLOSED_SEGMENT, /**< A closing ')' is missing */ - U_ILLEGAL_CHAR_IN_SEGMENT, /**< UNUSED as of ICU 2.4 */ - U_VARIABLE_RANGE_EXHAUSTED, /**< Too many stand-ins generated for the given variable range */ - U_VARIABLE_RANGE_OVERLAP, /**< The variable range overlaps characters used in rules */ - U_ILLEGAL_CHARACTER, /**< A special character is outside its allowed context */ - U_INTERNAL_TRANSLITERATOR_ERROR, /**< Internal transliterator system error */ - U_INVALID_ID, /**< A "::id" rule specifies an unknown transliterator */ - U_INVALID_FUNCTION, /**< A "&fn()" rule specifies an unknown transliterator */ - U_PARSE_ERROR_LIMIT, /**< The limit for Transliterator errors */ - - /* - * the error code range 0x10100 0x10200 are reserved for formatting API parsing error - */ - U_UNEXPECTED_TOKEN=0x10100, /**< Syntax error in format pattern */ - U_FMT_PARSE_ERROR_START=0x10100, /**< Start of format library errors */ - U_MULTIPLE_DECIMAL_SEPARATORS, /**< More than one decimal separator in number pattern */ - U_MULTIPLE_DECIMAL_SEPERATORS = U_MULTIPLE_DECIMAL_SEPARATORS, /**< Typo: kept for backward compatibility. Use U_MULTIPLE_DECIMAL_SEPARATORS */ - U_MULTIPLE_EXPONENTIAL_SYMBOLS, /**< More than one exponent symbol in number pattern */ - U_MALFORMED_EXPONENTIAL_PATTERN, /**< Grouping symbol in exponent pattern */ - U_MULTIPLE_PERCENT_SYMBOLS, /**< More than one percent symbol in number pattern */ - U_MULTIPLE_PERMILL_SYMBOLS, /**< More than one permill symbol in number pattern */ - U_MULTIPLE_PAD_SPECIFIERS, /**< More than one pad symbol in number pattern */ - U_PATTERN_SYNTAX_ERROR, /**< Syntax error in format pattern */ - U_ILLEGAL_PAD_POSITION, /**< Pad symbol misplaced in number pattern */ - U_UNMATCHED_BRACES, /**< Braces do not match in message pattern */ - U_UNSUPPORTED_PROPERTY, /**< UNUSED as of ICU 2.4 */ - U_UNSUPPORTED_ATTRIBUTE, /**< UNUSED as of ICU 2.4 */ - U_ARGUMENT_TYPE_MISMATCH, /**< Argument name and argument index mismatch in MessageFormat functions */ - U_DUPLICATE_KEYWORD, /**< Duplicate keyword in PluralFormat */ - U_UNDEFINED_KEYWORD, /**< Undefined Plural keyword */ - U_DEFAULT_KEYWORD_MISSING, /**< Missing DEFAULT rule in plural rules */ - U_DECIMAL_NUMBER_SYNTAX_ERROR, /**< Decimal number syntax error */ - U_FORMAT_INEXACT_ERROR, /**< Cannot format a number exactly and rounding mode is ROUND_UNNECESSARY @stable ICU 4.8 */ - U_FMT_PARSE_ERROR_LIMIT, /**< The limit for format library errors */ - - /* - * the error code range 0x10200 0x102ff are reserved for Break Iterator related error - */ - U_BRK_INTERNAL_ERROR=0x10200, /**< An internal error (bug) was detected. */ - U_BRK_ERROR_START=0x10200, /**< Start of codes indicating Break Iterator failures */ - U_BRK_HEX_DIGITS_EXPECTED, /**< Hex digits expected as part of a escaped char in a rule. */ - U_BRK_SEMICOLON_EXPECTED, /**< Missing ';' at the end of a RBBI rule. */ - U_BRK_RULE_SYNTAX, /**< Syntax error in RBBI rule. */ - U_BRK_UNCLOSED_SET, /**< UnicodeSet witing an RBBI rule missing a closing ']'. */ - U_BRK_ASSIGN_ERROR, /**< Syntax error in RBBI rule assignment statement. */ - U_BRK_VARIABLE_REDFINITION, /**< RBBI rule $Variable redefined. */ - U_BRK_MISMATCHED_PAREN, /**< Mis-matched parentheses in an RBBI rule. */ - U_BRK_NEW_LINE_IN_QUOTED_STRING, /**< Missing closing quote in an RBBI rule. */ - U_BRK_UNDEFINED_VARIABLE, /**< Use of an undefined $Variable in an RBBI rule. */ - U_BRK_INIT_ERROR, /**< Initialization failure. Probable missing ICU Data. */ - U_BRK_RULE_EMPTY_SET, /**< Rule contains an empty Unicode Set. */ - U_BRK_UNRECOGNIZED_OPTION, /**< !!option in RBBI rules not recognized. */ - U_BRK_MALFORMED_RULE_TAG, /**< The {nnn} tag on a rule is mal formed */ - U_BRK_ERROR_LIMIT, /**< This must always be the last value to indicate the limit for Break Iterator failures */ - - /* - * The error codes in the range 0x10300-0x103ff are reserved for regular expression related errrs - */ - U_REGEX_INTERNAL_ERROR=0x10300, /**< An internal error (bug) was detected. */ - U_REGEX_ERROR_START=0x10300, /**< Start of codes indicating Regexp failures */ - U_REGEX_RULE_SYNTAX, /**< Syntax error in regexp pattern. */ - U_REGEX_INVALID_STATE, /**< RegexMatcher in invalid state for requested operation */ - U_REGEX_BAD_ESCAPE_SEQUENCE, /**< Unrecognized backslash escape sequence in pattern */ - U_REGEX_PROPERTY_SYNTAX, /**< Incorrect Unicode property */ - U_REGEX_UNIMPLEMENTED, /**< Use of regexp feature that is not yet implemented. */ - U_REGEX_MISMATCHED_PAREN, /**< Incorrectly nested parentheses in regexp pattern. */ - U_REGEX_NUMBER_TOO_BIG, /**< Decimal number is too large. */ - U_REGEX_BAD_INTERVAL, /**< Error in {min,max} interval */ - U_REGEX_MAX_LT_MIN, /**< In {min,max}, max is less than min. */ - U_REGEX_INVALID_BACK_REF, /**< Back-reference to a non-existent capture group. */ - U_REGEX_INVALID_FLAG, /**< Invalid value for match mode flags. */ - U_REGEX_LOOK_BEHIND_LIMIT, /**< Look-Behind pattern matches must have a bounded maximum length. */ - U_REGEX_SET_CONTAINS_STRING, /**< Regexps cannot have UnicodeSets containing strings.*/ - U_REGEX_OCTAL_TOO_BIG, /**< Octal character constants must be <= 0377. */ - U_REGEX_MISSING_CLOSE_BRACKET, /**< Missing closing bracket on a bracket expression. */ - U_REGEX_INVALID_RANGE, /**< In a character range [x-y], x is greater than y. */ - U_REGEX_STACK_OVERFLOW, /**< Regular expression backtrack stack overflow. */ - U_REGEX_TIME_OUT, /**< Maximum allowed match time exceeded */ - U_REGEX_STOPPED_BY_CALLER, /**< Matching operation aborted by user callback fn. */ - U_REGEX_ERROR_LIMIT, /**< This must always be the last value to indicate the limit for regexp errors */ - - /* - * The error code in the range 0x10400-0x104ff are reserved for IDNA related error codes - */ - U_IDNA_PROHIBITED_ERROR=0x10400, - U_IDNA_ERROR_START=0x10400, - U_IDNA_UNASSIGNED_ERROR, - U_IDNA_CHECK_BIDI_ERROR, - U_IDNA_STD3_ASCII_RULES_ERROR, - U_IDNA_ACE_PREFIX_ERROR, - U_IDNA_VERIFICATION_ERROR, - U_IDNA_LABEL_TOO_LONG_ERROR, - U_IDNA_ZERO_LENGTH_LABEL_ERROR, - U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR, - U_IDNA_ERROR_LIMIT, - /* - * Aliases for StringPrep - */ - U_STRINGPREP_PROHIBITED_ERROR = U_IDNA_PROHIBITED_ERROR, - U_STRINGPREP_UNASSIGNED_ERROR = U_IDNA_UNASSIGNED_ERROR, - U_STRINGPREP_CHECK_BIDI_ERROR = U_IDNA_CHECK_BIDI_ERROR, - - /* - * The error code in the range 0x10500-0x105ff are reserved for Plugin related error codes - */ - U_PLUGIN_ERROR_START=0x10500, /**< Start of codes indicating plugin failures */ - U_PLUGIN_TOO_HIGH=0x10500, /**< The plugin's level is too high to be loaded right now. */ - U_PLUGIN_DIDNT_SET_LEVEL, /**< The plugin didn't call uplug_setPlugLevel in response to a QUERY */ - U_PLUGIN_ERROR_LIMIT, /**< This must always be the last value to indicate the limit for plugin errors */ - - U_ERROR_LIMIT=U_PLUGIN_ERROR_LIMIT /**< This must always be the last value to indicate the limit for UErrorCode (last error code +1) */ -} UErrorCode; - -/* Use the following to determine if an UErrorCode represents */ -/* operational success or failure. */ - -#ifdef __cplusplus - /** - * Does the error code indicate success? - * @stable ICU 2.0 - */ - static - inline UBool U_SUCCESS(UErrorCode code) { return (UBool)(code<=U_ZERO_ERROR); } - /** - * Does the error code indicate a failure? - * @stable ICU 2.0 - */ - static - inline UBool U_FAILURE(UErrorCode code) { return (UBool)(code>U_ZERO_ERROR); } -#else - /** - * Does the error code indicate success? - * @stable ICU 2.0 - */ -# define U_SUCCESS(x) ((x)<=U_ZERO_ERROR) - /** - * Does the error code indicate a failure? - * @stable ICU 2.0 - */ -# define U_FAILURE(x) ((x)>U_ZERO_ERROR) -#endif - -/** - * Return a string for a UErrorCode value. - * The string will be the same as the name of the error code constant - * in the UErrorCode enum above. - * @stable ICU 2.0 - */ -U_STABLE const char * U_EXPORT2 -u_errorName(UErrorCode code); - - -#endif /* _UTYPES */ diff --git a/Source/WTF/icu/unicode/uvernum.h b/Source/WTF/icu/unicode/uvernum.h deleted file mode 100644 index bd0b0c989..000000000 --- a/Source/WTF/icu/unicode/uvernum.h +++ /dev/null @@ -1,167 +0,0 @@ -/* -******************************************************************************* -* Copyright (C) 2000-2013, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* -* file name: uvernum.h -* encoding: US-ASCII -* tab size: 8 (not used) -* indentation:4 -* -* Created by: Vladimir Weinstein -* Updated by: Steven R. Loomis -* -*/ - -/** - * \file - * \brief C API: definitions of ICU version numbers - * - * This file is included by uversion.h and other files. This file contains only - * macros and definitions. The actual version numbers are defined here. - */ - - /* - * IMPORTANT: When updating version, the following things need to be done: - * source/common/unicode/uvernum.h - this file: update major, minor, - * patchlevel, suffix, version, short version constants, namespace, - * renaming macro, and copyright - * - * The following files need to be updated as well, which can be done - * by running the UNIX makefile target 'update-windows-makefiles' in icu/source. - * - * - * source/common/common.vcproj - update 'Output file name' on the link tab so - * that it contains the new major/minor combination - * source/i18n/i18n.vcproj - same as for the common.vcproj - * source/layout/layout.vcproj - same as for the common.vcproj - * source/layoutex/layoutex.vcproj - same - * source/stubdata/stubdata.vcproj - same as for the common.vcproj - * source/io/io.vcproj - same as for the common.vcproj - * source/data/makedata.mak - change U_ICUDATA_NAME so that it contains - * the new major/minor combination and the Unicode version. - */ - -#ifndef UVERNUM_H -#define UVERNUM_H - -/** The standard copyright notice that gets compiled into each library. - * This value will change in the subsequent releases of ICU - * @stable ICU 2.4 - */ -#define U_COPYRIGHT_STRING \ - " Copyright (C) 2013, International Business Machines Corporation and others. All Rights Reserved. " - -/** The current ICU major version as an integer. - * This value will change in the subsequent releases of ICU - * @stable ICU 2.4 - */ -#define U_ICU_VERSION_MAJOR_NUM 52 - -/** The current ICU minor version as an integer. - * This value will change in the subsequent releases of ICU - * @stable ICU 2.6 - */ -#define U_ICU_VERSION_MINOR_NUM 1 - -/** The current ICU patchlevel version as an integer. - * This value will change in the subsequent releases of ICU - * @stable ICU 2.4 - */ -#define U_ICU_VERSION_PATCHLEVEL_NUM 0 - -/** The current ICU build level version as an integer. - * This value is for use by ICU clients. It defaults to 0. - * @stable ICU 4.0 - */ -#ifndef U_ICU_VERSION_BUILDLEVEL_NUM -#define U_ICU_VERSION_BUILDLEVEL_NUM 0 -#endif - -/** Glued version suffix for renamers - * This value will change in the subsequent releases of ICU - * @stable ICU 2.6 - */ -#define U_ICU_VERSION_SUFFIX _52 - -/** - * \def U_DEF2_ICU_ENTRY_POINT_RENAME - * @internal - */ -/** - * \def U_DEF_ICU_ENTRY_POINT_RENAME - * @internal - */ -/** Glued version suffix function for renamers - * This value will change in the subsequent releases of ICU. - * If a custom suffix (such as matching library suffixes) is desired, this can be modified. - * Note that if present, platform.h may contain an earlier definition of this macro. - * \def U_ICU_ENTRY_POINT_RENAME - * @stable ICU 4.2 - */ - -#ifndef U_ICU_ENTRY_POINT_RENAME -#ifdef U_HAVE_LIB_SUFFIX -#define U_DEF_ICU_ENTRY_POINT_RENAME(x,y,z) x ## y ## z -#define U_DEF2_ICU_ENTRY_POINT_RENAME(x,y,z) U_DEF_ICU_ENTRY_POINT_RENAME(x,y,z) -#define U_ICU_ENTRY_POINT_RENAME(x) U_DEF2_ICU_ENTRY_POINT_RENAME(x,U_ICU_VERSION_SUFFIX,U_LIB_SUFFIX_C_NAME) -#else -#define U_DEF_ICU_ENTRY_POINT_RENAME(x,y) x ## y -#define U_DEF2_ICU_ENTRY_POINT_RENAME(x,y) U_DEF_ICU_ENTRY_POINT_RENAME(x,y) -#define U_ICU_ENTRY_POINT_RENAME(x) U_DEF2_ICU_ENTRY_POINT_RENAME(x,U_ICU_VERSION_SUFFIX) -#endif -#endif - -/** The current ICU library version as a dotted-decimal string. The patchlevel - * only appears in this string if it non-zero. - * This value will change in the subsequent releases of ICU - * @stable ICU 2.4 - */ -#define U_ICU_VERSION "52.1" - -/** The current ICU library major/minor version as a string without dots, for library name suffixes. - * This value will change in the subsequent releases of ICU - * @stable ICU 2.6 - */ -#define U_ICU_VERSION_SHORT "52" - -#ifndef U_HIDE_INTERNAL_API -/** Data version in ICU4C. - * @internal ICU 4.4 Internal Use Only - **/ -#define U_ICU_DATA_VERSION "52.1" -#endif /* U_HIDE_INTERNAL_API */ - -/*=========================================================================== - * ICU collation framework version information - * Version info that can be obtained from a collator is affected by these - * numbers in a secret and magic way. Please use collator version as whole - *=========================================================================== - */ - -/** - * Collation runtime version (sort key generator, strcoll). - * If the version is different, sort keys for the same string could be different. - * This value may change in subsequent releases of ICU. - * @stable ICU 2.4 - */ -#define UCOL_RUNTIME_VERSION 7 - -/** - * Collation builder code version. - * When this is different, the same tailoring might result - * in assigning different collation elements to code points. - * This value may change in subsequent releases of ICU. - * @stable ICU 2.4 - */ -#define UCOL_BUILDER_VERSION 8 - -/** - * This is the version of collation tailorings. - * This value may change in subsequent releases of ICU. - * @stable ICU 2.4 - */ -#define UCOL_TAILORINGS_VERSION 1 - -#endif diff --git a/Source/WTF/icu/unicode/uversion.h b/Source/WTF/icu/unicode/uversion.h deleted file mode 100644 index 74e309105..000000000 --- a/Source/WTF/icu/unicode/uversion.h +++ /dev/null @@ -1,193 +0,0 @@ -/* -******************************************************************************* -* Copyright (C) 2000-2011, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* -* file name: uversion.h -* encoding: US-ASCII -* tab size: 8 (not used) -* indentation:4 -* -* Created by: Vladimir Weinstein -* -* Gets included by utypes.h and Windows .rc files -*/ - -/** - * \file - * \brief C API: API for accessing ICU version numbers. - */ -/*===========================================================================*/ -/* Main ICU version information */ -/*===========================================================================*/ - -#ifndef UVERSION_H -#define UVERSION_H - -#include "unicode/umachine.h" - -/* Actual version info lives in uvernum.h */ -#include "unicode/uvernum.h" - -/** Maximum length of the copyright string. - * @stable ICU 2.4 - */ -#define U_COPYRIGHT_STRING_LENGTH 128 - -/** An ICU version consists of up to 4 numbers from 0..255. - * @stable ICU 2.4 - */ -#define U_MAX_VERSION_LENGTH 4 - -/** In a string, ICU version fields are delimited by dots. - * @stable ICU 2.4 - */ -#define U_VERSION_DELIMITER '.' - -/** The maximum length of an ICU version string. - * @stable ICU 2.4 - */ -#define U_MAX_VERSION_STRING_LENGTH 20 - -/** The binary form of a version on ICU APIs is an array of 4 uint8_t. - * To compare two versions, use memcmp(v1,v2,sizeof(UVersionInfo)). - * @stable ICU 2.4 - */ -typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH]; - -/*===========================================================================*/ -/* C++ namespace if supported. Versioned unless versioning is disabled. */ -/*===========================================================================*/ - -/** - * \def U_NAMESPACE_BEGIN - * This is used to begin a declaration of a public ICU C++ API. - * When not compiling for C++, it does nothing. - * When compiling for C++, it begins an extern "C++" linkage block (to protect - * against cases in which an external client includes ICU header files inside - * an extern "C" linkage block). - * - * It also begins a versioned-ICU-namespace block. - * @stable ICU 2.4 - */ - -/** - * \def U_NAMESPACE_END - * This is used to end a declaration of a public ICU C++ API. - * When not compiling for C++, it does nothing. - * When compiling for C++, it ends the extern "C++" block begun by - * U_NAMESPACE_BEGIN. - * - * It also ends the versioned-ICU-namespace block begun by U_NAMESPACE_BEGIN. - * @stable ICU 2.4 - */ - -/** - * \def U_NAMESPACE_USE - * This is used to specify that the rest of the code uses the - * public ICU C++ API namespace. - * This is invoked by default; we recommend that you turn it off: - * See the "Recommended Build Options" section of the ICU4C readme - * (http://source.icu-project.org/repos/icu/icu/trunk/readme.html#RecBuild) - * @stable ICU 2.4 - */ - -/** - * \def U_NAMESPACE_QUALIFIER - * This is used to qualify that a function or class is part of - * the public ICU C++ API namespace. - * - * This macro is unnecessary since ICU 49 requires namespace support. - * You can just use "icu::" instead. - * @stable ICU 2.4 - */ - -/* Define namespace symbols if the compiler supports it. */ -#ifdef __cplusplus -# if U_DISABLE_RENAMING -# define U_ICU_NAMESPACE icu - namespace U_ICU_NAMESPACE { } -# else -# define U_ICU_NAMESPACE U_ICU_ENTRY_POINT_RENAME(icu) - namespace U_ICU_NAMESPACE { } - namespace icu = U_ICU_NAMESPACE; -# endif - -# define U_NAMESPACE_BEGIN extern "C++" { namespace U_ICU_NAMESPACE { -# define U_NAMESPACE_END } } -# define U_NAMESPACE_USE using namespace U_ICU_NAMESPACE; -# define U_NAMESPACE_QUALIFIER U_ICU_NAMESPACE:: - -# ifndef U_USING_ICU_NAMESPACE -# define U_USING_ICU_NAMESPACE 1 -# endif -# if U_USING_ICU_NAMESPACE - U_NAMESPACE_USE -# endif -#else -# define U_NAMESPACE_BEGIN -# define U_NAMESPACE_END -# define U_NAMESPACE_USE -# define U_NAMESPACE_QUALIFIER -#endif - -/*===========================================================================*/ -/* General version helper functions. Definitions in putil.c */ -/*===========================================================================*/ - -/** - * Parse a string with dotted-decimal version information and - * fill in a UVersionInfo structure with the result. - * Definition of this function lives in putil.c - * - * @param versionArray The destination structure for the version information. - * @param versionString A string with dotted-decimal version information, - * with up to four non-negative number fields with - * values of up to 255 each. - * @stable ICU 2.4 - */ -U_STABLE void U_EXPORT2 -u_versionFromString(UVersionInfo versionArray, const char *versionString); - -/** - * Parse a Unicode string with dotted-decimal version information and - * fill in a UVersionInfo structure with the result. - * Definition of this function lives in putil.c - * - * @param versionArray The destination structure for the version information. - * @param versionString A Unicode string with dotted-decimal version - * information, with up to four non-negative number - * fields with values of up to 255 each. - * @stable ICU 4.2 - */ -U_STABLE void U_EXPORT2 -u_versionFromUString(UVersionInfo versionArray, const UChar *versionString); - - -/** - * Write a string with dotted-decimal version information according - * to the input UVersionInfo. - * Definition of this function lives in putil.c - * - * @param versionArray The version information to be written as a string. - * @param versionString A string buffer that will be filled in with - * a string corresponding to the numeric version - * information in versionArray. - * The buffer size must be at least U_MAX_VERSION_STRING_LENGTH. - * @stable ICU 2.4 - */ -U_STABLE void U_EXPORT2 -u_versionToString(const UVersionInfo versionArray, char *versionString); - -/** - * Gets the ICU release version. The version array stores the version information - * for ICU. For example, release "1.3.31.2" is then represented as 0x01031F02. - * Definition of this function lives in putil.c - * - * @param versionArray the version # information, the result will be filled in - * @stable ICU 2.0 - */ -U_STABLE void U_EXPORT2 -u_getVersion(UVersionInfo versionArray); -#endif |