diff options
Diffstat (limited to 'vendor/icu/include')
31 files changed, 26239 insertions, 0 deletions
diff --git a/vendor/icu/include/unicode/bytestream.h b/vendor/icu/include/unicode/bytestream.h new file mode 100644 index 0000000000..8c03acd638 --- /dev/null +++ b/vendor/icu/include/unicode/bytestream.h @@ -0,0 +1,272 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +// Copyright (C) 2009-2012, International Business Machines +// Corporation and others. All Rights Reserved. +// +// Copyright 2007 Google Inc. All Rights Reserved. +// Author: sanjay@google.com (Sanjay Ghemawat) +// +// Abstract interface that consumes a sequence of bytes (ByteSink). +// +// Used so that we can write a single piece of code that can operate +// on a variety of output string types. +// +// Various implementations of this interface are provided: +// ByteSink: +// CheckedArrayByteSink Write to a flat array, with bounds checking +// StringByteSink Write to an STL string + +// This code is a contribution of Google code, and the style used here is +// a compromise between the original Google code and the ICU coding guidelines. +// For example, data types are ICU-ified (size_t,int->int32_t), +// and API comments doxygen-ified, but function names and behavior are +// as in the original, if possible. +// Assertion-style error handling, not available in ICU, was changed to +// parameter "pinning" similar to UnicodeString. +// +// In addition, this is only a partial port of the original Google code, +// limited to what was needed so far. The (nearly) complete original code +// is in the ICU svn repository at icuhtml/trunk/design/strings/contrib +// (see ICU ticket 6765, r25517). + +#ifndef __BYTESTREAM_H__ +#define __BYTESTREAM_H__ + +/** + * \file + * \brief C++ API: Interface for writing bytes, and implementation classes. + */ + +#include <unicode/utypes.h> +#include <unicode/uobject.h> +#include <unicode/std_string.h> + +U_NAMESPACE_BEGIN + +/** + * A ByteSink can be filled with bytes. + * @stable ICU 4.2 + */ +class U_COMMON_API ByteSink : public UMemory { +public: + /** + * Default constructor. + * @stable ICU 4.2 + */ + ByteSink() { } + /** + * Virtual destructor. + * @stable ICU 4.2 + */ + virtual ~ByteSink(); + + /** + * Append "bytes[0,n-1]" to this. + * @param bytes the pointer to the bytes + * @param n the number of bytes; must be non-negative + * @stable ICU 4.2 + */ + virtual void Append(const char* bytes, int32_t n) = 0; + + /** + * Returns a writable buffer for appending and writes the buffer's capacity to + * *result_capacity. Guarantees *result_capacity>=min_capacity. + * May return a pointer to the caller-owned scratch buffer which must have + * scratch_capacity>=min_capacity. + * The returned buffer is only valid until the next operation + * on this ByteSink. + * + * After writing at most *result_capacity bytes, call Append() with the + * pointer returned from this function and the number of bytes written. + * Many Append() implementations will avoid copying bytes if this function + * returned an internal buffer. + * + * Partial usage example: + * int32_t capacity; + * char* buffer = sink->GetAppendBuffer(..., &capacity); + * ... Write n bytes into buffer, with n <= capacity. + * sink->Append(buffer, n); + * In many implementations, that call to Append will avoid copying bytes. + * + * If the ByteSink allocates or reallocates an internal buffer, it should use + * the desired_capacity_hint if appropriate. + * If a caller cannot provide a reasonable guess at the desired capacity, + * it should pass desired_capacity_hint=0. + * + * If a non-scratch buffer is returned, the caller may only pass + * a prefix to it to Append(). + * That is, it is not correct to pass an interior pointer to Append(). + * + * The default implementation always returns the scratch buffer. + * + * @param min_capacity required minimum capacity of the returned buffer; + * must be non-negative + * @param desired_capacity_hint desired capacity of the returned buffer; + * must be non-negative + * @param scratch default caller-owned buffer + * @param scratch_capacity capacity of the scratch buffer + * @param result_capacity pointer to an integer which will be set to the + * capacity of the returned buffer + * @return a buffer with *result_capacity>=min_capacity + * @stable ICU 4.2 + */ + virtual char* GetAppendBuffer(int32_t min_capacity, + int32_t desired_capacity_hint, + char* scratch, int32_t scratch_capacity, + int32_t* result_capacity); + + /** + * Flush internal buffers. + * Some byte sinks use internal buffers or provide buffering + * and require calling Flush() at the end of the stream. + * The ByteSink should be ready for further Append() calls after Flush(). + * The default implementation of Flush() does nothing. + * @stable ICU 4.2 + */ + virtual void Flush(); + +private: + ByteSink(const ByteSink &) = delete; + ByteSink &operator=(const ByteSink &) = delete; +}; + +// ------------------------------------------------------------- +// Some standard implementations + +/** + * Implementation of ByteSink that writes to a flat byte array, + * with bounds-checking: + * This sink will not write more than capacity bytes to outbuf. + * If more than capacity bytes are Append()ed, then excess bytes are ignored, + * and Overflowed() will return true. + * Overflow does not cause a runtime error. + * @stable ICU 4.2 + */ +class U_COMMON_API CheckedArrayByteSink : public ByteSink { +public: + /** + * Constructs a ByteSink that will write to outbuf[0..capacity-1]. + * @param outbuf buffer to write to + * @param capacity size of the buffer + * @stable ICU 4.2 + */ + CheckedArrayByteSink(char* outbuf, int32_t capacity); + /** + * Destructor. + * @stable ICU 4.2 + */ + virtual ~CheckedArrayByteSink(); + /** + * Returns the sink to its original state, without modifying the buffer. + * Useful for reusing both the buffer and the sink for multiple streams. + * Resets the state to NumberOfBytesWritten()=NumberOfBytesAppended()=0 + * and Overflowed()=FALSE. + * @return *this + * @stable ICU 4.6 + */ + virtual CheckedArrayByteSink& Reset(); + /** + * Append "bytes[0,n-1]" to this. + * @param bytes the pointer to the bytes + * @param n the number of bytes; must be non-negative + * @stable ICU 4.2 + */ + virtual void Append(const char* bytes, int32_t n); + /** + * Returns a writable buffer for appending and writes the buffer's capacity to + * *result_capacity. For details see the base class documentation. + * @param min_capacity required minimum capacity of the returned buffer; + * must be non-negative + * @param desired_capacity_hint desired capacity of the returned buffer; + * must be non-negative + * @param scratch default caller-owned buffer + * @param scratch_capacity capacity of the scratch buffer + * @param result_capacity pointer to an integer which will be set to the + * capacity of the returned buffer + * @return a buffer with *result_capacity>=min_capacity + * @stable ICU 4.2 + */ + virtual char* GetAppendBuffer(int32_t min_capacity, + int32_t desired_capacity_hint, + char* scratch, int32_t scratch_capacity, + int32_t* result_capacity); + /** + * Returns the number of bytes actually written to the sink. + * @return number of bytes written to the buffer + * @stable ICU 4.2 + */ + int32_t NumberOfBytesWritten() const { return size_; } + /** + * Returns true if any bytes were discarded, i.e., if there was an + * attempt to write more than 'capacity' bytes. + * @return TRUE if more than 'capacity' bytes were Append()ed + * @stable ICU 4.2 + */ + UBool Overflowed() const { return overflowed_; } + /** + * Returns the number of bytes appended to the sink. + * If Overflowed() then NumberOfBytesAppended()>NumberOfBytesWritten() + * else they return the same number. + * @return number of bytes written to the buffer + * @stable ICU 4.6 + */ + int32_t NumberOfBytesAppended() const { return appended_; } +private: + char* outbuf_; + const int32_t capacity_; + int32_t size_; + int32_t appended_; + UBool overflowed_; + + CheckedArrayByteSink() = delete; + CheckedArrayByteSink(const CheckedArrayByteSink &) = delete; + CheckedArrayByteSink &operator=(const CheckedArrayByteSink &) = delete; +}; + +/** + * Implementation of ByteSink that writes to a "string". + * The StringClass is usually instantiated with a std::string. + * @stable ICU 4.2 + */ +template<typename StringClass> +class StringByteSink : public ByteSink { + public: + /** + * Constructs a ByteSink that will append bytes to the dest string. + * @param dest pointer to string object to append to + * @stable ICU 4.2 + */ + StringByteSink(StringClass* dest) : dest_(dest) { } +#ifndef U_HIDE_DRAFT_API + /** + * Constructs a ByteSink that reserves append capacity and will append bytes to the dest string. + * + * @param dest pointer to string object to append to + * @param initialAppendCapacity capacity beyond dest->length() to be reserve()d + * @draft ICU 60 + */ + StringByteSink(StringClass* dest, int32_t initialAppendCapacity) : dest_(dest) { + if (initialAppendCapacity > 0 && + (uint32_t)initialAppendCapacity > (dest->capacity() - dest->length())) { + dest->reserve(dest->length() + initialAppendCapacity); + } + } +#endif // U_HIDE_DRAFT_API + /** + * Append "bytes[0,n-1]" to this. + * @param data the pointer to the bytes + * @param n the number of bytes; must be non-negative + * @stable ICU 4.2 + */ + virtual void Append(const char* data, int32_t n) { dest_->append(data, n); } + private: + StringClass* dest_; + + StringByteSink() = delete; + StringByteSink(const StringByteSink &) = delete; + StringByteSink &operator=(const StringByteSink &) = delete; +}; + +U_NAMESPACE_END + +#endif // __BYTESTREAM_H__ diff --git a/vendor/icu/include/unicode/char16ptr.h b/vendor/icu/include/unicode/char16ptr.h new file mode 100644 index 0000000000..bb6b5fec31 --- /dev/null +++ b/vendor/icu/include/unicode/char16ptr.h @@ -0,0 +1,302 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// char16ptr.h +// created: 2017feb28 Markus W. Scherer + +#ifndef __CHAR16PTR_H__ +#define __CHAR16PTR_H__ + +#include <cstddef> +#include <unicode/utypes.h> + +/** + * \file + * \brief C++ API: char16_t pointer wrappers with + * implicit conversion from bit-compatible raw pointer types. + * Also conversion functions from char16_t * to UChar * and OldUChar *. + */ + +U_NAMESPACE_BEGIN + +/** + * \def U_ALIASING_BARRIER + * Barrier for pointer anti-aliasing optimizations even across function boundaries. + * @internal + */ +#ifdef U_ALIASING_BARRIER + // Use the predefined value. +#elif (defined(__clang__) || defined(__GNUC__)) && U_PLATFORM != U_PF_BROWSER_NATIVE_CLIENT +# define U_ALIASING_BARRIER(ptr) asm volatile("" : : "rm"(ptr) : "memory") +#endif + +/** + * char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types. + * @stable ICU 59 + */ +class U_COMMON_API Char16Ptr U_FINAL { +public: + /** + * Copies the pointer. + * @param p pointer + * @stable ICU 59 + */ + inline Char16Ptr(char16_t *p); +#if !U_CHAR16_IS_TYPEDEF + /** + * Converts the pointer to char16_t *. + * @param p pointer to be converted + * @stable ICU 59 + */ + inline Char16Ptr(uint16_t *p); +#endif +#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) + /** + * Converts the pointer to char16_t *. + * (Only defined if U_SIZEOF_WCHAR_T==2.) + * @param p pointer to be converted + * @stable ICU 59 + */ + inline Char16Ptr(wchar_t *p); +#endif + /** + * nullptr constructor. + * @param p nullptr + * @stable ICU 59 + */ + inline Char16Ptr(std::nullptr_t p); + /** + * Destructor. + * @stable ICU 59 + */ + inline ~Char16Ptr(); + + /** + * Pointer access. + * @return the wrapped pointer + * @stable ICU 59 + */ + inline char16_t *get() const; + /** + * char16_t pointer access via type conversion (e.g., static_cast). + * @return the wrapped pointer + * @stable ICU 59 + */ + inline operator char16_t *() const { return get(); } + +private: + Char16Ptr() = delete; + +#ifdef U_ALIASING_BARRIER + template<typename T> static char16_t *cast(T *t) { + U_ALIASING_BARRIER(t); + return reinterpret_cast<char16_t *>(t); + } + + char16_t *p_; +#else + union { + char16_t *cp; + uint16_t *up; + wchar_t *wp; + } u_; +#endif +}; + +#ifdef U_ALIASING_BARRIER + +Char16Ptr::Char16Ptr(char16_t *p) : p_(p) {} +#if !U_CHAR16_IS_TYPEDEF +Char16Ptr::Char16Ptr(uint16_t *p) : p_(cast(p)) {} +#endif +#if U_SIZEOF_WCHAR_T==2 +Char16Ptr::Char16Ptr(wchar_t *p) : p_(cast(p)) {} +#endif +Char16Ptr::Char16Ptr(std::nullptr_t p) : p_(p) {} +Char16Ptr::~Char16Ptr() { + U_ALIASING_BARRIER(p_); +} + +char16_t *Char16Ptr::get() const { return p_; } + +#else + +Char16Ptr::Char16Ptr(char16_t *p) { u_.cp = p; } +#if !U_CHAR16_IS_TYPEDEF +Char16Ptr::Char16Ptr(uint16_t *p) { u_.up = p; } +#endif +#if U_SIZEOF_WCHAR_T==2 +Char16Ptr::Char16Ptr(wchar_t *p) { u_.wp = p; } +#endif +Char16Ptr::Char16Ptr(std::nullptr_t p) { u_.cp = p; } +Char16Ptr::~Char16Ptr() {} + +char16_t *Char16Ptr::get() const { return u_.cp; } + +#endif + +/** + * const char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types. + * @stable ICU 59 + */ +class U_COMMON_API ConstChar16Ptr U_FINAL { +public: + /** + * Copies the pointer. + * @param p pointer + * @stable ICU 59 + */ + inline ConstChar16Ptr(const char16_t *p); +#if !U_CHAR16_IS_TYPEDEF + /** + * Converts the pointer to char16_t *. + * @param p pointer to be converted + * @stable ICU 59 + */ + inline ConstChar16Ptr(const uint16_t *p); +#endif +#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) + /** + * Converts the pointer to char16_t *. + * (Only defined if U_SIZEOF_WCHAR_T==2.) + * @param p pointer to be converted + * @stable ICU 59 + */ + inline ConstChar16Ptr(const wchar_t *p); +#endif + /** + * nullptr constructor. + * @param p nullptr + * @stable ICU 59 + */ + inline ConstChar16Ptr(const std::nullptr_t p); + + /** + * Destructor. + * @stable ICU 59 + */ + inline ~ConstChar16Ptr(); + + /** + * Pointer access. + * @return the wrapped pointer + * @stable ICU 59 + */ + inline const char16_t *get() const; + /** + * char16_t pointer access via type conversion (e.g., static_cast). + * @return the wrapped pointer + * @stable ICU 59 + */ + inline operator const char16_t *() const { return get(); } + +private: + ConstChar16Ptr() = delete; + +#ifdef U_ALIASING_BARRIER + template<typename T> static const char16_t *cast(const T *t) { + U_ALIASING_BARRIER(t); + return reinterpret_cast<const char16_t *>(t); + } + + const char16_t *p_; +#else + union { + const char16_t *cp; + const uint16_t *up; + const wchar_t *wp; + } u_; +#endif +}; + +#ifdef U_ALIASING_BARRIER + +ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) : p_(p) {} +#if !U_CHAR16_IS_TYPEDEF +ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) : p_(cast(p)) {} +#endif +#if U_SIZEOF_WCHAR_T==2 +ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) : p_(cast(p)) {} +#endif +ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) : p_(p) {} +ConstChar16Ptr::~ConstChar16Ptr() { + U_ALIASING_BARRIER(p_); +} + +const char16_t *ConstChar16Ptr::get() const { return p_; } + +#else + +ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) { u_.cp = p; } +#if !U_CHAR16_IS_TYPEDEF +ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) { u_.up = p; } +#endif +#if U_SIZEOF_WCHAR_T==2 +ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) { u_.wp = p; } +#endif +ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) { u_.cp = p; } +ConstChar16Ptr::~ConstChar16Ptr() {} + +const char16_t *ConstChar16Ptr::get() const { return u_.cp; } + +#endif + +/** + * Converts from const char16_t * to const UChar *. + * Includes an aliasing barrier if available. + * @param p pointer + * @return p as const UChar * + * @stable ICU 59 + */ +inline const UChar *toUCharPtr(const char16_t *p) { +#ifdef U_ALIASING_BARRIER + U_ALIASING_BARRIER(p); +#endif + return reinterpret_cast<const UChar *>(p); +} + +/** + * Converts from char16_t * to UChar *. + * Includes an aliasing barrier if available. + * @param p pointer + * @return p as UChar * + * @stable ICU 59 + */ +inline UChar *toUCharPtr(char16_t *p) { +#ifdef U_ALIASING_BARRIER + U_ALIASING_BARRIER(p); +#endif + return reinterpret_cast<UChar *>(p); +} + +/** + * Converts from const char16_t * to const OldUChar *. + * Includes an aliasing barrier if available. + * @param p pointer + * @return p as const OldUChar * + * @stable ICU 59 + */ +inline const OldUChar *toOldUCharPtr(const char16_t *p) { +#ifdef U_ALIASING_BARRIER + U_ALIASING_BARRIER(p); +#endif + return reinterpret_cast<const OldUChar *>(p); +} + +/** + * Converts from char16_t * to OldUChar *. + * Includes an aliasing barrier if available. + * @param p pointer + * @return p as OldUChar * + * @stable ICU 59 + */ +inline OldUChar *toOldUCharPtr(char16_t *p) { +#ifdef U_ALIASING_BARRIER + U_ALIASING_BARRIER(p); +#endif + return reinterpret_cast<OldUChar *>(p); +} + +U_NAMESPACE_END + +#endif // __CHAR16PTR_H__ diff --git a/vendor/icu/include/unicode/localpointer.h b/vendor/icu/include/unicode/localpointer.h new file mode 100644 index 0000000000..d981943d52 --- /dev/null +++ b/vendor/icu/include/unicode/localpointer.h @@ -0,0 +1,524 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2009-2016, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: localpointer.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2009nov13 +* created by: Markus W. Scherer +*/ + +#ifndef __LOCALPOINTER_H__ +#define __LOCALPOINTER_H__ + +/** + * \file + * \brief C++ API: "Smart pointers" for use with and in ICU4C C++ code. + * + * These classes are inspired by + * - std::auto_ptr + * - boost::scoped_ptr & boost::scoped_array + * - Taligent Safe Pointers (TOnlyPointerTo) + * + * but none of those provide for all of the goals for ICU smart pointers: + * - Smart pointer owns the object and releases it when it goes out of scope. + * - No transfer of ownership via copy/assignment to reduce misuse. Simpler & more robust. + * - ICU-compatible: No exceptions. + * - Need to be able to orphan/release the pointer and its ownership. + * - Need variants for normal C++ object pointers, C++ arrays, and ICU C service objects. + * + * For details see http://site.icu-project.org/design/cpp/scoped_ptr + */ + +#include <unicode/utypes.h> + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * "Smart pointer" base class; do not use directly: use LocalPointer etc. + * + * Base class for smart pointer classes that do not throw exceptions. + * + * Do not use this base class directly, since it does not delete its pointer. + * A subclass must implement methods that delete the pointer: + * Destructor and adoptInstead(). + * + * There is no operator T *() provided because the programmer must decide + * whether to use getAlias() (without transfer of ownership) or orphan() + * (with transfer of ownership and NULLing of the pointer). + * + * @see LocalPointer + * @see LocalArray + * @see U_DEFINE_LOCAL_OPEN_POINTER + * @stable ICU 4.4 + */ +template<typename T> +class LocalPointerBase { +public: + /** + * Constructor takes ownership. + * @param p simple pointer to an object that is adopted + * @stable ICU 4.4 + */ + explicit LocalPointerBase(T *p=NULL) : ptr(p) {} + /** + * Destructor deletes the object it owns. + * Subclass must override: Base class does nothing. + * @stable ICU 4.4 + */ + ~LocalPointerBase() { /* delete ptr; */ } + /** + * NULL check. + * @return TRUE if ==NULL + * @stable ICU 4.4 + */ + UBool isNull() const { return ptr==NULL; } + /** + * NULL check. + * @return TRUE if !=NULL + * @stable ICU 4.4 + */ + UBool isValid() const { return ptr!=NULL; } + /** + * Comparison with a simple pointer, so that existing code + * with ==NULL need not be changed. + * @param other simple pointer for comparison + * @return true if this pointer value equals other + * @stable ICU 4.4 + */ + bool operator==(const T *other) const { return ptr==other; } + /** + * Comparison with a simple pointer, so that existing code + * with !=NULL need not be changed. + * @param other simple pointer for comparison + * @return true if this pointer value differs from other + * @stable ICU 4.4 + */ + bool operator!=(const T *other) const { return ptr!=other; } + /** + * Access without ownership change. + * @return the pointer value + * @stable ICU 4.4 + */ + T *getAlias() const { return ptr; } + /** + * Access without ownership change. + * @return the pointer value as a reference + * @stable ICU 4.4 + */ + T &operator*() const { return *ptr; } + /** + * Access without ownership change. + * @return the pointer value + * @stable ICU 4.4 + */ + T *operator->() const { return ptr; } + /** + * Gives up ownership; the internal pointer becomes NULL. + * @return the pointer value; + * caller becomes responsible for deleting the object + * @stable ICU 4.4 + */ + T *orphan() { + T *p=ptr; + ptr=NULL; + return p; + } + /** + * Deletes the object it owns, + * and adopts (takes ownership of) the one passed in. + * Subclass must override: Base class does not delete the object. + * @param p simple pointer to an object that is adopted + * @stable ICU 4.4 + */ + void adoptInstead(T *p) { + // delete ptr; + ptr=p; + } +protected: + /** + * Actual pointer. + * @internal + */ + T *ptr; +private: + // No comparison operators with other LocalPointerBases. + bool operator==(const LocalPointerBase<T> &other); + bool operator!=(const LocalPointerBase<T> &other); + // No ownership sharing: No copy constructor, no assignment operator. + LocalPointerBase(const LocalPointerBase<T> &other); + void operator=(const LocalPointerBase<T> &other); + // No heap allocation. Use only on the stack. + static void * U_EXPORT2 operator new(size_t size); + static void * U_EXPORT2 operator new[](size_t size); +#if U_HAVE_PLACEMENT_NEW + static void * U_EXPORT2 operator new(size_t, void *ptr); +#endif +}; + +/** + * "Smart pointer" class, deletes objects via the standard C++ delete operator. + * For most methods see the LocalPointerBase base class. + * + * Usage example: + * \code + * LocalPointer<UnicodeString> s(new UnicodeString((UChar32)0x50005)); + * int32_t length=s->length(); // 2 + * char16_t lead=s->charAt(0); // 0xd900 + * if(some condition) { return; } // no need to explicitly delete the pointer + * s.adoptInstead(new UnicodeString((char16_t)0xfffc)); + * length=s->length(); // 1 + * // no need to explicitly delete the pointer + * \endcode + * + * @see LocalPointerBase + * @stable ICU 4.4 + */ +template<typename T> +class LocalPointer : public LocalPointerBase<T> { +public: + using LocalPointerBase<T>::operator*; + using LocalPointerBase<T>::operator->; + /** + * Constructor takes ownership. + * @param p simple pointer to an object that is adopted + * @stable ICU 4.4 + */ + explicit LocalPointer(T *p=NULL) : LocalPointerBase<T>(p) {} + /** + * Constructor takes ownership and reports an error if NULL. + * + * This constructor is intended to be used with other-class constructors + * that may report a failure UErrorCode, + * so that callers need to check only for U_FAILURE(errorCode) + * and not also separately for isNull(). + * + * @param p simple pointer to an object that is adopted + * @param errorCode in/out UErrorCode, set to U_MEMORY_ALLOCATION_ERROR + * if p==NULL and no other failure code had been set + * @stable ICU 55 + */ + LocalPointer(T *p, UErrorCode &errorCode) : LocalPointerBase<T>(p) { + if(p==NULL && U_SUCCESS(errorCode)) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + } + } + /** + * Move constructor, leaves src with isNull(). + * @param src source smart pointer + * @stable ICU 56 + */ + LocalPointer(LocalPointer<T> &&src) U_NOEXCEPT : LocalPointerBase<T>(src.ptr) { + src.ptr=NULL; + } + /** + * Destructor deletes the object it owns. + * @stable ICU 4.4 + */ + ~LocalPointer() { + delete LocalPointerBase<T>::ptr; + } + /** + * Move assignment operator, leaves src with isNull(). + * The behavior is undefined if *this and src are the same object. + * @param src source smart pointer + * @return *this + * @stable ICU 56 + */ + LocalPointer<T> &operator=(LocalPointer<T> &&src) U_NOEXCEPT { + return moveFrom(src); + } + // do not use #ifndef U_HIDE_DRAFT_API for moveFrom, needed by non-draft API + /** + * Move assignment, leaves src with isNull(). + * The behavior is undefined if *this and src are the same object. + * + * Can be called explicitly, does not need C++11 support. + * @param src source smart pointer + * @return *this + * @draft ICU 56 + */ + LocalPointer<T> &moveFrom(LocalPointer<T> &src) U_NOEXCEPT { + delete LocalPointerBase<T>::ptr; + LocalPointerBase<T>::ptr=src.ptr; + src.ptr=NULL; + return *this; + } + /** + * Swap pointers. + * @param other other smart pointer + * @stable ICU 56 + */ + void swap(LocalPointer<T> &other) U_NOEXCEPT { + T *temp=LocalPointerBase<T>::ptr; + LocalPointerBase<T>::ptr=other.ptr; + other.ptr=temp; + } + /** + * Non-member LocalPointer swap function. + * @param p1 will get p2's pointer + * @param p2 will get p1's pointer + * @stable ICU 56 + */ + friend inline void swap(LocalPointer<T> &p1, LocalPointer<T> &p2) U_NOEXCEPT { + p1.swap(p2); + } + /** + * Deletes the object it owns, + * and adopts (takes ownership of) the one passed in. + * @param p simple pointer to an object that is adopted + * @stable ICU 4.4 + */ + void adoptInstead(T *p) { + delete LocalPointerBase<T>::ptr; + LocalPointerBase<T>::ptr=p; + } + /** + * Deletes the object it owns, + * and adopts (takes ownership of) the one passed in. + * + * If U_FAILURE(errorCode), then the current object is retained and the new one deleted. + * + * If U_SUCCESS(errorCode) but the input pointer is NULL, + * then U_MEMORY_ALLOCATION_ERROR is set, + * the current object is deleted, and NULL is set. + * + * @param p simple pointer to an object that is adopted + * @param errorCode in/out UErrorCode, set to U_MEMORY_ALLOCATION_ERROR + * if p==NULL and no other failure code had been set + * @stable ICU 55 + */ + void adoptInsteadAndCheckErrorCode(T *p, UErrorCode &errorCode) { + if(U_SUCCESS(errorCode)) { + delete LocalPointerBase<T>::ptr; + LocalPointerBase<T>::ptr=p; + if(p==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + } + } else { + delete p; + } + } +}; + +/** + * "Smart pointer" class, deletes objects via the C++ array delete[] operator. + * For most methods see the LocalPointerBase base class. + * Adds operator[] for array item access. + * + * Usage example: + * \code + * LocalArray<UnicodeString> a(new UnicodeString[2]); + * a[0].append((char16_t)0x61); + * if(some condition) { return; } // no need to explicitly delete the array + * a.adoptInstead(new UnicodeString[4]); + * a[3].append((char16_t)0x62).append((char16_t)0x63).reverse(); + * // no need to explicitly delete the array + * \endcode + * + * @see LocalPointerBase + * @stable ICU 4.4 + */ +template<typename T> +class LocalArray : public LocalPointerBase<T> { +public: + using LocalPointerBase<T>::operator*; + using LocalPointerBase<T>::operator->; + /** + * Constructor takes ownership. + * @param p simple pointer to an array of T objects that is adopted + * @stable ICU 4.4 + */ + explicit LocalArray(T *p=NULL) : LocalPointerBase<T>(p) {} + /** + * Constructor takes ownership and reports an error if NULL. + * + * This constructor is intended to be used with other-class constructors + * that may report a failure UErrorCode, + * so that callers need to check only for U_FAILURE(errorCode) + * and not also separately for isNull(). + * + * @param p simple pointer to an array of T objects that is adopted + * @param errorCode in/out UErrorCode, set to U_MEMORY_ALLOCATION_ERROR + * if p==NULL and no other failure code had been set + * @stable ICU 56 + */ + LocalArray(T *p, UErrorCode &errorCode) : LocalPointerBase<T>(p) { + if(p==NULL && U_SUCCESS(errorCode)) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + } + } + /** + * Move constructor, leaves src with isNull(). + * @param src source smart pointer + * @stable ICU 56 + */ + LocalArray(LocalArray<T> &&src) U_NOEXCEPT : LocalPointerBase<T>(src.ptr) { + src.ptr=NULL; + } + /** + * Destructor deletes the array it owns. + * @stable ICU 4.4 + */ + ~LocalArray() { + delete[] LocalPointerBase<T>::ptr; + } + /** + * Move assignment operator, leaves src with isNull(). + * The behavior is undefined if *this and src are the same object. + * @param src source smart pointer + * @return *this + * @stable ICU 56 + */ + LocalArray<T> &operator=(LocalArray<T> &&src) U_NOEXCEPT { + return moveFrom(src); + } + // do not use #ifndef U_HIDE_DRAFT_API for moveFrom, needed by non-draft API + /** + * Move assignment, leaves src with isNull(). + * The behavior is undefined if *this and src are the same object. + * + * Can be called explicitly, does not need C++11 support. + * @param src source smart pointer + * @return *this + * @draft ICU 56 + */ + LocalArray<T> &moveFrom(LocalArray<T> &src) U_NOEXCEPT { + delete[] LocalPointerBase<T>::ptr; + LocalPointerBase<T>::ptr=src.ptr; + src.ptr=NULL; + return *this; + } + /** + * Swap pointers. + * @param other other smart pointer + * @stable ICU 56 + */ + void swap(LocalArray<T> &other) U_NOEXCEPT { + T *temp=LocalPointerBase<T>::ptr; + LocalPointerBase<T>::ptr=other.ptr; + other.ptr=temp; + } + /** + * Non-member LocalArray swap function. + * @param p1 will get p2's pointer + * @param p2 will get p1's pointer + * @stable ICU 56 + */ + friend inline void swap(LocalArray<T> &p1, LocalArray<T> &p2) U_NOEXCEPT { + p1.swap(p2); + } + /** + * Deletes the array it owns, + * and adopts (takes ownership of) the one passed in. + * @param p simple pointer to an array of T objects that is adopted + * @stable ICU 4.4 + */ + void adoptInstead(T *p) { + delete[] LocalPointerBase<T>::ptr; + LocalPointerBase<T>::ptr=p; + } + /** + * Deletes the array it owns, + * and adopts (takes ownership of) the one passed in. + * + * If U_FAILURE(errorCode), then the current array is retained and the new one deleted. + * + * If U_SUCCESS(errorCode) but the input pointer is NULL, + * then U_MEMORY_ALLOCATION_ERROR is set, + * the current array is deleted, and NULL is set. + * + * @param p simple pointer to an array of T objects that is adopted + * @param errorCode in/out UErrorCode, set to U_MEMORY_ALLOCATION_ERROR + * if p==NULL and no other failure code had been set + * @stable ICU 56 + */ + void adoptInsteadAndCheckErrorCode(T *p, UErrorCode &errorCode) { + if(U_SUCCESS(errorCode)) { + delete[] LocalPointerBase<T>::ptr; + LocalPointerBase<T>::ptr=p; + if(p==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + } + } else { + delete[] p; + } + } + /** + * Array item access (writable). + * No index bounds check. + * @param i array index + * @return reference to the array item + * @stable ICU 4.4 + */ + T &operator[](ptrdiff_t i) const { return LocalPointerBase<T>::ptr[i]; } +}; + +/** + * \def U_DEFINE_LOCAL_OPEN_POINTER + * "Smart pointer" definition macro, deletes objects via the closeFunction. + * Defines a subclass of LocalPointerBase which works just + * like LocalPointer<Type> except that this subclass will use the closeFunction + * rather than the C++ delete operator. + * + * Usage example: + * \code + * LocalUCaseMapPointer csm(ucasemap_open(localeID, options, &errorCode)); + * utf8OutLength=ucasemap_utf8ToLower(csm.getAlias(), + * utf8Out, (int32_t)sizeof(utf8Out), + * utf8In, utf8InLength, &errorCode); + * if(U_FAILURE(errorCode)) { return; } // no need to explicitly delete the UCaseMap + * \endcode + * + * @see LocalPointerBase + * @see LocalPointer + * @stable ICU 4.4 + */ +#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction) \ + class LocalPointerClassName : public LocalPointerBase<Type> { \ + public: \ + using LocalPointerBase<Type>::operator*; \ + using LocalPointerBase<Type>::operator->; \ + explicit LocalPointerClassName(Type *p=NULL) : LocalPointerBase<Type>(p) {} \ + LocalPointerClassName(LocalPointerClassName &&src) U_NOEXCEPT \ + : LocalPointerBase<Type>(src.ptr) { \ + src.ptr=NULL; \ + } \ + ~LocalPointerClassName() { if (ptr != NULL) { closeFunction(ptr); } } \ + LocalPointerClassName &operator=(LocalPointerClassName &&src) U_NOEXCEPT { \ + return moveFrom(src); \ + } \ + LocalPointerClassName &moveFrom(LocalPointerClassName &src) U_NOEXCEPT { \ + if (ptr != NULL) { closeFunction(ptr); } \ + LocalPointerBase<Type>::ptr=src.ptr; \ + src.ptr=NULL; \ + return *this; \ + } \ + void swap(LocalPointerClassName &other) U_NOEXCEPT { \ + Type *temp=LocalPointerBase<Type>::ptr; \ + LocalPointerBase<Type>::ptr=other.ptr; \ + other.ptr=temp; \ + } \ + friend inline void swap(LocalPointerClassName &p1, LocalPointerClassName &p2) U_NOEXCEPT { \ + p1.swap(p2); \ + } \ + void adoptInstead(Type *p) { \ + if (ptr != NULL) { closeFunction(ptr); } \ + ptr=p; \ + } \ + } + +U_NAMESPACE_END + +#endif /* U_SHOW_CPLUSPLUS_API */ +#endif /* __LOCALPOINTER_H__ */ diff --git a/vendor/icu/include/unicode/platform.h b/vendor/icu/include/unicode/platform.h new file mode 100644 index 0000000000..317d8120d1 --- /dev/null +++ b/vendor/icu/include/unicode/platform.h @@ -0,0 +1,865 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1997-2016, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* FILE NAME : platform.h +* +* Date Name Description +* 05/13/98 nos Creation (content moved here from ptypes.h). +* 03/02/99 stephen Added AS400 support. +* 03/30/99 stephen Added Linux support. +* 04/13/99 stephen Reworked for autoconf. +****************************************************************************** +*/ + +#ifndef _PLATFORM_H +#define _PLATFORM_H + +#include <unicode/uconfig.h> +#include <unicode/uvernum.h> + +/** + * \file + * \brief Basic types for the platform. + * + * This file used to be generated by autoconf/configure. + * Starting with ICU 49, platform.h is a normal source file, + * to simplify cross-compiling and working with non-autoconf/make build systems. + * + * When a value in this file does not work on a platform, then please + * try to derive it from the U_PLATFORM value + * (for which we might need a new value constant in rare cases) + * and/or from other macros that are predefined by the compiler + * or defined in standard (POSIX or platform or compiler) headers. + * + * As a temporary workaround, you can add an explicit <code>#define</code> for some macros + * before it is first tested, or add an equivalent -D macro definition + * to the compiler's command line. + * + * Note: Some compilers provide ways to show the predefined macros. + * For example, with gcc you can compile an empty .c file and have the compiler + * print the predefined macros with + * \code + * gcc -E -dM -x c /dev/null | sort + * \endcode + * (You can provide an actual empty .c file rather than /dev/null. + * <code>-x c++</code> is for C++.) + */ + +/** + * Define some things so that they can be documented. + * @internal + */ +#ifdef U_IN_DOXYGEN +/* + * Problem: "platform.h:335: warning: documentation for unknown define U_HAVE_STD_STRING found." means that U_HAVE_STD_STRING is not documented. + * Solution: #define any defines for non @internal API here, so that they are visible in the docs. If you just set PREDEFINED in Doxyfile.in, they won't be documented. + */ + +/* None for now. */ +#endif + +/** + * \def U_PLATFORM + * The U_PLATFORM macro defines the platform we're on. + * + * We used to define one different, value-less macro per platform. + * That made it hard to know the set of relevant platforms and macros, + * and hard to deal with variants of platforms. + * + * Starting with ICU 49, we define platforms as numeric macros, + * with ranges of values for related platforms and their variants. + * The U_PLATFORM macro is set to one of these values. + * + * Historical note from the Solaris Wikipedia article: + * AT&T and Sun collaborated on a project to merge the most popular Unix variants + * on the market at that time: BSD, System V, and Xenix. + * This became Unix System V Release 4 (SVR4). + * + * @internal + */ + +/** Unknown platform. @internal */ +#define U_PF_UNKNOWN 0 +/** Windows @internal */ +#define U_PF_WINDOWS 1000 +/** MinGW. Windows, calls to Win32 API, but using GNU gcc and binutils. @internal */ +#define U_PF_MINGW 1800 +/** + * Cygwin. Windows, calls to cygwin1.dll for Posix functions, + * using MSVC or GNU gcc and binutils. + * @internal + */ +#define U_PF_CYGWIN 1900 +/* Reserve 2000 for U_PF_UNIX? */ +/** HP-UX is based on UNIX System V. @internal */ +#define U_PF_HPUX 2100 +/** Solaris is a Unix operating system based on SVR4. @internal */ +#define U_PF_SOLARIS 2600 +/** BSD is a UNIX operating system derivative. @internal */ +#define U_PF_BSD 3000 +/** AIX is based on UNIX System V Releases and 4.3 BSD. @internal */ +#define U_PF_AIX 3100 +/** IRIX is based on UNIX System V with BSD extensions. @internal */ +#define U_PF_IRIX 3200 +/** + * Darwin is a POSIX-compliant operating system, composed of code developed by Apple, + * as well as code derived from NeXTSTEP, BSD, and other projects, + * built around the Mach kernel. + * Darwin forms the core set of components upon which Mac OS X, Apple TV, and iOS are based. + * (Original description modified from WikiPedia.) + * @internal + */ +#define U_PF_DARWIN 3500 +/** iPhone OS (iOS) is a derivative of Mac OS X. @internal */ +#define U_PF_IPHONE 3550 +/** QNX is a commercial Unix-like real-time operating system related to BSD. @internal */ +#define U_PF_QNX 3700 +/** Linux is a Unix-like operating system. @internal */ +#define U_PF_LINUX 4000 +/** + * Native Client is pretty close to Linux. + * See https://developer.chrome.com/native-client and + * http://www.chromium.org/nativeclient + * @internal + */ +#define U_PF_BROWSER_NATIVE_CLIENT 4020 +/** Android is based on Linux. @internal */ +#define U_PF_ANDROID 4050 +/** Fuchsia is a POSIX-ish platform. @internal */ +#define U_PF_FUCHSIA 4100 +/* Maximum value for Linux-based platform is 4499 */ +/** z/OS is the successor to OS/390 which was the successor to MVS. @internal */ +#define U_PF_OS390 9000 +/** "IBM i" is the current name of what used to be i5/OS and earlier OS/400. @internal */ +#define U_PF_OS400 9400 + +#ifdef U_PLATFORM + /* Use the predefined value. */ +#elif defined(__MINGW32__) +# define U_PLATFORM U_PF_MINGW +#elif defined(__CYGWIN__) +# define U_PLATFORM U_PF_CYGWIN +#elif defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) +# define U_PLATFORM U_PF_WINDOWS +#elif defined(__ANDROID__) +# define U_PLATFORM U_PF_ANDROID + /* Android wchar_t support depends on the API level. */ +# include <android/api-level.h> +#elif defined(__pnacl__) || defined(__native_client__) +# define U_PLATFORM U_PF_BROWSER_NATIVE_CLIENT +#elif defined(__Fuchsia__) +# define U_PLATFORM U_PF_FUCHSIA +#elif defined(linux) || defined(__linux__) || defined(__linux) +# define U_PLATFORM U_PF_LINUX +#elif defined(__APPLE__) && defined(__MACH__) +# include <TargetConditionals.h> +# if defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE /* variant of TARGET_OS_MAC */ +# define U_PLATFORM U_PF_IPHONE +# else +# define U_PLATFORM U_PF_DARWIN +# endif +#elif defined(BSD) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__MirBSD__) +# if defined(__FreeBSD__) +# include <sys/endian.h> +# endif +# define U_PLATFORM U_PF_BSD +#elif defined(sun) || defined(__sun) + /* Check defined(__SVR4) || defined(__svr4__) to distinguish Solaris from SunOS? */ +# define U_PLATFORM U_PF_SOLARIS +# if defined(__GNUC__) + /* Solaris/GCC needs this header file to get the proper endianness. Normally, this + * header file is included with stddef.h but on Solairs/GCC, the GCC version of stddef.h + * is included which does not include this header file. + */ +# include <sys/isa_defs.h> +# endif +#elif defined(_AIX) || defined(__TOS_AIX__) +# define U_PLATFORM U_PF_AIX +#elif defined(_hpux) || defined(hpux) || defined(__hpux) +# define U_PLATFORM U_PF_HPUX +#elif defined(sgi) || defined(__sgi) +# define U_PLATFORM U_PF_IRIX +#elif defined(__QNX__) || defined(__QNXNTO__) +# define U_PLATFORM U_PF_QNX +#elif defined(__TOS_MVS__) +# define U_PLATFORM U_PF_OS390 +#elif defined(__OS400__) || defined(__TOS_OS400__) +# define U_PLATFORM U_PF_OS400 +#else +# define U_PLATFORM U_PF_UNKNOWN +#endif + +/** + * \def UPRV_INCOMPLETE_CPP11_SUPPORT + * This switch turns off ICU 60 NumberFormatter code. + * By default, this switch is enabled on AIX and z/OS, + * which have poor C++11 support. + * + * NOTE: This switch is intended to be temporary; see #13393. + * + * @internal + */ +#ifndef UPRV_INCOMPLETE_CPP11_SUPPORT +# define UPRV_INCOMPLETE_CPP11_SUPPORT (U_PLATFORM == U_PF_AIX || U_PLATFORM == U_PF_OS390 || U_PLATFORM == U_PF_SOLARIS ) +#endif + +/** + * \def CYGWINMSVC + * Defined if this is Windows with Cygwin, but using MSVC rather than gcc. + * Otherwise undefined. + * @internal + */ +/* Commented out because this is already set in mh-cygwin-msvc +#if U_PLATFORM == U_PF_CYGWIN && defined(_MSC_VER) +# define CYGWINMSVC +#endif +*/ + +/** + * \def U_PLATFORM_USES_ONLY_WIN32_API + * Defines whether the platform uses only the Win32 API. + * Set to 1 for Windows/MSVC and MinGW but not Cygwin. + * @internal + */ +#ifdef U_PLATFORM_USES_ONLY_WIN32_API + /* Use the predefined value. */ +#elif (U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_MINGW) || defined(CYGWINMSVC) +# define U_PLATFORM_USES_ONLY_WIN32_API 1 +#else + /* Cygwin implements POSIX. */ +# define U_PLATFORM_USES_ONLY_WIN32_API 0 +#endif + +/** + * \def U_PLATFORM_HAS_WIN32_API + * Defines whether the Win32 API is available on the platform. + * Set to 1 for Windows/MSVC, MinGW and Cygwin. + * @internal + */ +#ifdef U_PLATFORM_HAS_WIN32_API + /* Use the predefined value. */ +#elif U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN +# define U_PLATFORM_HAS_WIN32_API 1 +#else +# define U_PLATFORM_HAS_WIN32_API 0 +#endif + +/** + * \def U_PLATFORM_HAS_WINUWP_API + * Defines whether target is intended for Universal Windows Platform API + * Set to 1 for Windows10 Release Solution Configuration + * @internal + */ +#ifdef U_PLATFORM_HAS_WINUWP_API + /* Use the predefined value. */ +#else +# define U_PLATFORM_HAS_WINUWP_API 0 +#endif + +/** + * \def U_PLATFORM_IMPLEMENTS_POSIX + * Defines whether the platform implements (most of) the POSIX API. + * Set to 1 for Cygwin and most other platforms. + * @internal + */ +#ifdef U_PLATFORM_IMPLEMENTS_POSIX + /* Use the predefined value. */ +#elif U_PLATFORM_USES_ONLY_WIN32_API +# define U_PLATFORM_IMPLEMENTS_POSIX 0 +#else +# define U_PLATFORM_IMPLEMENTS_POSIX 1 +#endif + +/** + * \def U_PLATFORM_IS_LINUX_BASED + * Defines whether the platform is Linux or one of its derivatives. + * @internal + */ +#ifdef U_PLATFORM_IS_LINUX_BASED + /* Use the predefined value. */ +#elif U_PF_LINUX <= U_PLATFORM && U_PLATFORM <= 4499 +# define U_PLATFORM_IS_LINUX_BASED 1 +#else +# define U_PLATFORM_IS_LINUX_BASED 0 +#endif + +/** + * \def U_PLATFORM_IS_DARWIN_BASED + * Defines whether the platform is Darwin or one of its derivatives. + * @internal + */ +#ifdef U_PLATFORM_IS_DARWIN_BASED + /* Use the predefined value. */ +#elif U_PF_DARWIN <= U_PLATFORM && U_PLATFORM <= U_PF_IPHONE +# define U_PLATFORM_IS_DARWIN_BASED 1 +#else +# define U_PLATFORM_IS_DARWIN_BASED 0 +#endif + +/** + * \def U_HAVE_STDINT_H + * Defines whether stdint.h is available. It is a C99 standard header. + * We used to include inttypes.h which includes stdint.h but we usually do not need + * the additional definitions from inttypes.h. + * @internal + */ +#ifdef U_HAVE_STDINT_H + /* Use the predefined value. */ +#elif U_PLATFORM_USES_ONLY_WIN32_API +# if defined(__BORLANDC__) || U_PLATFORM == U_PF_MINGW || (defined(_MSC_VER) && _MSC_VER>=1600) + /* Windows Visual Studio 9 and below do not have stdint.h & inttypes.h, but VS 2010 adds them. */ +# define U_HAVE_STDINT_H 1 +# else +# define U_HAVE_STDINT_H 0 +# endif +#elif U_PLATFORM == U_PF_SOLARIS + /* Solaris has inttypes.h but not stdint.h. */ +# define U_HAVE_STDINT_H 0 +#elif U_PLATFORM == U_PF_AIX && !defined(_AIX51) && defined(_POWER) + /* PPC AIX <= 4.3 has inttypes.h but not stdint.h. */ +# define U_HAVE_STDINT_H 0 +#else +# define U_HAVE_STDINT_H 1 +#endif + +/** + * \def U_HAVE_INTTYPES_H + * Defines whether inttypes.h is available. It is a C99 standard header. + * We include inttypes.h where it is available but stdint.h is not. + * @internal + */ +#ifdef U_HAVE_INTTYPES_H + /* Use the predefined value. */ +#elif U_PLATFORM == U_PF_SOLARIS + /* Solaris has inttypes.h but not stdint.h. */ +# define U_HAVE_INTTYPES_H 1 +#elif U_PLATFORM == U_PF_AIX && !defined(_AIX51) && defined(_POWER) + /* PPC AIX <= 4.3 has inttypes.h but not stdint.h. */ +# define U_HAVE_INTTYPES_H 1 +#else + /* Most platforms have both inttypes.h and stdint.h, or neither. */ +# define U_HAVE_INTTYPES_H U_HAVE_STDINT_H +#endif + +/*===========================================================================*/ +/** @{ Compiler and environment features */ +/*===========================================================================*/ + +/** + * \def U_GCC_MAJOR_MINOR + * Indicates whether the compiler is gcc (test for != 0), + * and if so, contains its major (times 100) and minor version numbers. + * If the compiler is not gcc, then U_GCC_MAJOR_MINOR == 0. + * + * For example, for testing for whether we have gcc, and whether it's 4.6 or higher, + * use "#if U_GCC_MAJOR_MINOR >= 406". + * @internal + */ +#ifdef __GNUC__ +# define U_GCC_MAJOR_MINOR (__GNUC__ * 100 + __GNUC_MINOR__) +#else +# define U_GCC_MAJOR_MINOR 0 +#endif + +/** + * \def U_IS_BIG_ENDIAN + * Determines the endianness of the platform. + * @internal + */ +#ifdef U_IS_BIG_ENDIAN + /* Use the predefined value. */ +#elif defined(BYTE_ORDER) && defined(BIG_ENDIAN) +# define U_IS_BIG_ENDIAN (BYTE_ORDER == BIG_ENDIAN) +#elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) + /* gcc */ +# define U_IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +#elif defined(__BIG_ENDIAN__) || defined(_BIG_ENDIAN) +# define U_IS_BIG_ENDIAN 1 +#elif defined(__LITTLE_ENDIAN__) || defined(_LITTLE_ENDIAN) +# define U_IS_BIG_ENDIAN 0 +#elif U_PLATFORM == U_PF_OS390 || U_PLATFORM == U_PF_OS400 || defined(__s390__) || defined(__s390x__) + /* These platforms do not appear to predefine any endianness macros. */ +# define U_IS_BIG_ENDIAN 1 +#elif defined(_PA_RISC1_0) || defined(_PA_RISC1_1) || defined(_PA_RISC2_0) + /* HPPA do not appear to predefine any endianness macros. */ +# define U_IS_BIG_ENDIAN 1 +#elif defined(sparc) || defined(__sparc) || defined(__sparc__) + /* Some sparc based systems (e.g. Linux) do not predefine any endianness macros. */ +# define U_IS_BIG_ENDIAN 1 +#else +# define U_IS_BIG_ENDIAN 0 +#endif + +/** + * \def U_HAVE_PLACEMENT_NEW + * Determines whether to override placement new and delete for STL. + * @stable ICU 2.6 + */ +#ifdef U_HAVE_PLACEMENT_NEW + /* Use the predefined value. */ +#elif defined(__BORLANDC__) +# define U_HAVE_PLACEMENT_NEW 0 +#else +# define U_HAVE_PLACEMENT_NEW 1 +#endif + +/** + * \def U_HAVE_DEBUG_LOCATION_NEW + * Define this to define the MFC debug version of the operator new. + * + * @stable ICU 3.4 + */ +#ifdef U_HAVE_DEBUG_LOCATION_NEW + /* Use the predefined value. */ +#elif defined(_MSC_VER) +# define U_HAVE_DEBUG_LOCATION_NEW 1 +#else +# define U_HAVE_DEBUG_LOCATION_NEW 0 +#endif + +/* Compatibility with compilers other than clang: http://clang.llvm.org/docs/LanguageExtensions.html */ +#ifndef __has_attribute +# define __has_attribute(x) 0 +#endif +#ifndef __has_cpp_attribute +# define __has_cpp_attribute(x) 0 +#endif +#ifndef __has_builtin +# define __has_builtin(x) 0 +#endif +#ifndef __has_feature +# define __has_feature(x) 0 +#endif +#ifndef __has_extension +# define __has_extension(x) 0 +#endif +#ifndef __has_warning +# define __has_warning(x) 0 +#endif + +/** + * \def U_MALLOC_ATTR + * Attribute to mark functions as malloc-like + * @internal + */ +#if defined(__GNUC__) && __GNUC__>=3 +# define U_MALLOC_ATTR __attribute__ ((__malloc__)) +#else +# define U_MALLOC_ATTR +#endif + +/** + * \def U_ALLOC_SIZE_ATTR + * Attribute to specify the size of the allocated buffer for malloc-like functions + * @internal + */ +#if (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) || __has_attribute(alloc_size) +# define U_ALLOC_SIZE_ATTR(X) __attribute__ ((alloc_size(X))) +# define U_ALLOC_SIZE_ATTR2(X,Y) __attribute__ ((alloc_size(X,Y))) +#else +# define U_ALLOC_SIZE_ATTR(X) +# define U_ALLOC_SIZE_ATTR2(X,Y) +#endif + +/** + * \def U_CPLUSPLUS_VERSION + * 0 if no C++; 1, 11, 14, ... if C++. + * Support for specific features cannot always be determined by the C++ version alone. + * @internal + */ +#ifdef U_CPLUSPLUS_VERSION +# if U_CPLUSPLUS_VERSION != 0 && !defined(__cplusplus) +# undef U_CPLUSPLUS_VERSION +# define U_CPLUSPLUS_VERSION 0 +# endif + /* Otherwise use the predefined value. */ +#elif !defined(__cplusplus) +# define U_CPLUSPLUS_VERSION 0 +#elif __cplusplus >= 201402L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201402L) +# define U_CPLUSPLUS_VERSION 14 +#elif __cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L) +# define U_CPLUSPLUS_VERSION 11 +#else + // C++98 or C++03 +# define U_CPLUSPLUS_VERSION 1 +#endif + +#if (U_PLATFORM == U_PF_AIX || U_PLATFORM == U_PF_OS390) && defined(__cplusplus) &&(U_CPLUSPLUS_VERSION < 11) +// add in std::nullptr_t +namespace std { + typedef decltype(nullptr) nullptr_t; +}; +#endif + +/** + * \def U_NOEXCEPT + * "noexcept" if supported, otherwise empty. + * Some code, especially STL containers, uses move semantics of objects only + * if the move constructor and the move operator are declared as not throwing exceptions. + * @internal + */ +#ifdef U_NOEXCEPT + /* Use the predefined value. */ +#elif defined(_HAS_EXCEPTIONS) && !_HAS_EXCEPTIONS /* Visual Studio */ +# define U_NOEXCEPT +#elif U_CPLUSPLUS_VERSION >= 11 || __has_feature(cxx_noexcept) || __has_extension(cxx_noexcept) \ + || (defined(_MSC_VER) && _MSC_VER >= 1900) /* Visual Studio 2015 */ +# define U_NOEXCEPT noexcept +#else +# define U_NOEXCEPT +#endif + +/** + * \def U_FALLTHROUGH + * Annotate intentional fall-through between switch labels. + * http://clang.llvm.org/docs/AttributeReference.html#fallthrough-clang-fallthrough + * @internal + */ +#ifndef __cplusplus + // Not for C. +#elif defined(U_FALLTHROUGH) + // Use the predefined value. +#elif defined(__clang__) + // Test for compiler vs. feature separately. + // Other compilers might choke on the feature test. +# if __has_cpp_attribute(clang::fallthrough) || \ + (__has_feature(cxx_attributes) && __has_warning("-Wimplicit-fallthrough")) +# define U_FALLTHROUGH [[clang::fallthrough]] +# endif +#endif + +#ifndef U_FALLTHROUGH +# define U_FALLTHROUGH +#endif + +/** @} */ + +/*===========================================================================*/ +/** @{ Character data types */ +/*===========================================================================*/ + +/** + * U_CHARSET_FAMILY is equal to this value when the platform is an ASCII based platform. + * @stable ICU 2.0 + */ +#define U_ASCII_FAMILY 0 + +/** + * U_CHARSET_FAMILY is equal to this value when the platform is an EBCDIC based platform. + * @stable ICU 2.0 + */ +#define U_EBCDIC_FAMILY 1 + +/** + * \def U_CHARSET_FAMILY + * + * <p>These definitions allow to specify the encoding of text + * in the char data type as defined by the platform and the compiler. + * It is enough to determine the code point values of "invariant characters", + * which are the ones shared by all encodings that are in use + * on a given platform.</p> + * + * <p>Those "invariant characters" should be all the uppercase and lowercase + * latin letters, the digits, the space, and "basic punctuation". + * Also, '\\n', '\\r', '\\t' should be available.</p> + * + * <p>The list of "invariant characters" is:<br> + * \code + * A-Z a-z 0-9 SPACE " % & ' ( ) * + , - . / : ; < = > ? _ + * \endcode + * <br> + * (52 letters + 10 numbers + 20 punc/sym/space = 82 total)</p> + * + * <p>This matches the IBM Syntactic Character Set (CS 640).</p> + * + * <p>In other words, all the graphic characters in 7-bit ASCII should + * be safely accessible except the following:</p> + * + * \code + * '\' <backslash> + * '[' <left bracket> + * ']' <right bracket> + * '{' <left brace> + * '}' <right brace> + * '^' <circumflex> + * '~' <tilde> + * '!' <exclamation mark> + * '#' <number sign> + * '|' <vertical line> + * '$' <dollar sign> + * '@' <commercial at> + * '`' <grave accent> + * \endcode + * @stable ICU 2.0 + */ +#ifdef U_CHARSET_FAMILY + /* Use the predefined value. */ +#elif U_PLATFORM == U_PF_OS390 && (!defined(__CHARSET_LIB) || !__CHARSET_LIB) +# define U_CHARSET_FAMILY U_EBCDIC_FAMILY +#elif U_PLATFORM == U_PF_OS400 && !defined(__UTF32__) +# define U_CHARSET_FAMILY U_EBCDIC_FAMILY +#else +# define U_CHARSET_FAMILY U_ASCII_FAMILY +#endif + +/** + * \def U_CHARSET_IS_UTF8 + * + * Hardcode the default charset to UTF-8. + * + * If this is set to 1, then + * - ICU will assume that all non-invariant char*, StringPiece, std::string etc. + * contain UTF-8 text, regardless of what the system API uses + * - some ICU code will use fast functions like u_strFromUTF8() + * rather than the more general and more heavy-weight conversion API (ucnv.h) + * - ucnv_getDefaultName() always returns "UTF-8" + * - ucnv_setDefaultName() is disabled and will not change the default charset + * - static builds of ICU are smaller + * - more functionality is available with the UCONFIG_NO_CONVERSION build-time + * configuration option (see unicode/uconfig.h) + * - the UCONFIG_NO_CONVERSION build option in uconfig.h is more usable + * + * @stable ICU 4.2 + * @see UCONFIG_NO_CONVERSION + */ +#ifdef U_CHARSET_IS_UTF8 + /* Use the predefined value. */ +#elif U_PLATFORM_IS_LINUX_BASED || U_PLATFORM_IS_DARWIN_BASED +# define U_CHARSET_IS_UTF8 1 +#else +# define U_CHARSET_IS_UTF8 0 +#endif + +/** @} */ + +/*===========================================================================*/ +/** @{ Information about wchar support */ +/*===========================================================================*/ + +/** + * \def U_HAVE_WCHAR_H + * Indicates whether <wchar.h> is available (1) or not (0). Set to 1 by default. + * + * @stable ICU 2.0 + */ +#ifdef U_HAVE_WCHAR_H + /* Use the predefined value. */ +#elif U_PLATFORM == U_PF_ANDROID && __ANDROID_API__ < 9 + /* + * Android before Gingerbread (Android 2.3, API level 9) did not support wchar_t. + * The type and header existed, but the library functions did not work as expected. + * The size of wchar_t was 1 but L"xyz" string literals had 32-bit units anyway. + */ +# define U_HAVE_WCHAR_H 0 +#else +# define U_HAVE_WCHAR_H 1 +#endif + +/** + * \def U_SIZEOF_WCHAR_T + * U_SIZEOF_WCHAR_T==sizeof(wchar_t) + * + * @stable ICU 2.0 + */ +#ifdef U_SIZEOF_WCHAR_T + /* Use the predefined value. */ +#elif (U_PLATFORM == U_PF_ANDROID && __ANDROID_API__ < 9) + /* + * Classic Mac OS and Mac OS X before 10.3 (Panther) did not support wchar_t or wstring. + * Newer Mac OS X has size 4. + */ +# define U_SIZEOF_WCHAR_T 1 +#elif U_PLATFORM_HAS_WIN32_API || U_PLATFORM == U_PF_CYGWIN +# define U_SIZEOF_WCHAR_T 2 +#elif U_PLATFORM == U_PF_AIX + /* + * AIX 6.1 information, section "Wide character data representation": + * "... the wchar_t datatype is 32-bit in the 64-bit environment and + * 16-bit in the 32-bit environment." + * and + * "All locales use Unicode for their wide character code values (process code), + * except the IBM-eucTW codeset." + */ +# ifdef __64BIT__ +# define U_SIZEOF_WCHAR_T 4 +# else +# define U_SIZEOF_WCHAR_T 2 +# endif +#elif U_PLATFORM == U_PF_OS390 + /* + * z/OS V1R11 information center, section "LP64 | ILP32": + * "In 31-bit mode, the size of long and pointers is 4 bytes and the size of wchar_t is 2 bytes. + * Under LP64, the size of long and pointer is 8 bytes and the size of wchar_t is 4 bytes." + */ +# ifdef _LP64 +# define U_SIZEOF_WCHAR_T 4 +# else +# define U_SIZEOF_WCHAR_T 2 +# endif +#elif U_PLATFORM == U_PF_OS400 +# if defined(__UTF32__) + /* + * LOCALETYPE(*LOCALEUTF) is specified. + * Wide-character strings are in UTF-32, + * narrow-character strings are in UTF-8. + */ +# define U_SIZEOF_WCHAR_T 4 +# elif defined(__UCS2__) + /* + * LOCALETYPE(*LOCALEUCS2) is specified. + * Wide-character strings are in UCS-2, + * narrow-character strings are in EBCDIC. + */ +# define U_SIZEOF_WCHAR_T 2 +#else + /* + * LOCALETYPE(*CLD) or LOCALETYPE(*LOCALE) is specified. + * Wide-character strings are in 16-bit EBCDIC, + * narrow-character strings are in EBCDIC. + */ +# define U_SIZEOF_WCHAR_T 2 +# endif +#else +# define U_SIZEOF_WCHAR_T 4 +#endif + +#ifndef U_HAVE_WCSCPY +#define U_HAVE_WCSCPY U_HAVE_WCHAR_H +#endif + +/** @} */ + +/** + * \def U_HAVE_CHAR16_T + * Defines whether the char16_t type is available for UTF-16 + * and u"abc" UTF-16 string literals are supported. + * This is a new standard type and standard string literal syntax in C++0x + * but has been available in some compilers before. + * @internal + */ +#ifdef U_HAVE_CHAR16_T + /* Use the predefined value. */ +#else + /* + * Notes: + * Visual Studio 2010 (_MSC_VER==1600) defines char16_t as a typedef + * and does not support u"abc" string literals. + * Visual Studio 2015 (_MSC_VER>=1900) and above adds support for + * both char16_t and u"abc" string literals. + * gcc 4.4 defines the __CHAR16_TYPE__ macro to a usable type but + * does not support u"abc" string literals. + * C++11 and C11 require support for UTF-16 literals + * TODO: Fix for plain C. Doesn't work on Mac. + */ +# if U_CPLUSPLUS_VERSION >= 11 || (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) +# define U_HAVE_CHAR16_T 1 +# else +# define U_HAVE_CHAR16_T 0 +# endif +#endif + +/** + * @{ + * \def U_DECLARE_UTF16 + * Do not use this macro because it is not defined on all platforms. + * Use the UNICODE_STRING or U_STRING_DECL macros instead. + * @internal + */ +#ifdef U_DECLARE_UTF16 + /* Use the predefined value. */ +#elif U_HAVE_CHAR16_T \ + || (defined(__xlC__) && defined(__IBM_UTF_LITERAL) && U_SIZEOF_WCHAR_T != 2) \ + || (defined(__HP_aCC) && __HP_aCC >= 035000) \ + || (defined(__HP_cc) && __HP_cc >= 111106) +# define U_DECLARE_UTF16(string) u ## string +#elif U_SIZEOF_WCHAR_T == 2 \ + && (U_CHARSET_FAMILY == 0 || (U_PF_OS390 <= U_PLATFORM && U_PLATFORM <= U_PF_OS400 && defined(__UCS2__))) +# define U_DECLARE_UTF16(string) L ## string +#else + /* Leave U_DECLARE_UTF16 undefined. See unistr.h. */ +#endif + +/** @} */ + +/*===========================================================================*/ +/** @{ Symbol import-export control */ +/*===========================================================================*/ + +#ifdef U_EXPORT + /* Use the predefined value. */ +#elif defined(U_STATIC_IMPLEMENTATION) +# define U_EXPORT +#elif defined(__GNUC__) +# define U_EXPORT __attribute__((visibility("default"))) +#elif (defined(__SUNPRO_CC) && __SUNPRO_CC >= 0x550) \ + || (defined(__SUNPRO_C) && __SUNPRO_C >= 0x550) +# define U_EXPORT __global +/*#elif defined(__HP_aCC) || defined(__HP_cc) +# define U_EXPORT __declspec(dllexport)*/ +#elif defined(_MSC_VER) +# define U_EXPORT __declspec(dllexport) +#else +# define U_EXPORT +#endif + +/* U_CALLCONV is releated to U_EXPORT2 */ +#ifdef U_EXPORT2 + /* Use the predefined value. */ +#elif defined(_MSC_VER) +# define U_EXPORT2 __cdecl +#else +# define U_EXPORT2 +#endif + +#ifdef U_IMPORT + /* Use the predefined value. */ +#elif defined(_MSC_VER) + /* Windows needs to export/import data. */ +# define U_IMPORT __declspec(dllimport) +#else +# define U_IMPORT +#endif + +/** + * \def U_CALLCONV + * Similar to U_CDECL_BEGIN/U_CDECL_END, this qualifier is necessary + * in callback function typedefs to make sure that the calling convention + * is compatible. + * + * This is only used for non-ICU-API functions. + * When a function is a public ICU API, + * you must use the U_CAPI and U_EXPORT2 qualifiers. + * + * Please note, you need to use U_CALLCONV after the *. + * + * NO : "static const char U_CALLCONV *func( . . . )" + * YES: "static const char* U_CALLCONV func( . . . )" + * + * @stable ICU 2.0 + */ +#if U_PLATFORM == U_PF_OS390 && defined(__cplusplus) +# define U_CALLCONV __cdecl +#else +# define U_CALLCONV U_EXPORT2 +#endif + +/** + * \def U_CALLCONV_FPTR + * Similar to U_CALLCONV, but only used on function pointers. + * @internal + */ +#if U_PLATFORM == U_PF_OS390 && defined(__cplusplus) +# define U_CALLCONV_FPTR U_CALLCONV +#else +# define U_CALLCONV_FPTR +#endif +/* @} */ + +#endif diff --git a/vendor/icu/include/unicode/ptypes.h b/vendor/icu/include/unicode/ptypes.h new file mode 100644 index 0000000000..252da5519e --- /dev/null +++ b/vendor/icu/include/unicode/ptypes.h @@ -0,0 +1,128 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1997-2012, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* FILE NAME : ptypes.h +* +* Date Name Description +* 05/13/98 nos Creation (content moved here from ptypes.h). +* 03/02/99 stephen Added AS400 support. +* 03/30/99 stephen Added Linux support. +* 04/13/99 stephen Reworked for autoconf. +* 09/18/08 srl Moved basic types back to ptypes.h from platform.h +****************************************************************************** +*/ + +/** + * \file + * \brief C API: Definitions of integer types of various widths + */ + +#ifndef _PTYPES_H +#define _PTYPES_H + +/** + * \def __STDC_LIMIT_MACROS + * According to the Linux stdint.h, the ISO C99 standard specifies that in C++ implementations + * macros like INT32_MIN and UINTPTR_MAX should only be defined if explicitly requested. + * We need to define __STDC_LIMIT_MACROS before including stdint.h in C++ code + * that uses such limit macros. + * @internal + */ +#ifndef __STDC_LIMIT_MACROS +#define __STDC_LIMIT_MACROS +#endif + +/* NULL, size_t, wchar_t */ +#include <stddef.h> + +/* + * If all compilers provided all of the C99 headers and types, + * we would just unconditionally #include <stdint.h> here + * and not need any of the stuff after including platform.h. + */ + +/* Find out if we have stdint.h etc. */ +#include <unicode/platform.h> + +/*===========================================================================*/ +/* Generic data types */ +/*===========================================================================*/ + +/* If your platform does not have the <stdint.h> header, you may + need to edit the typedefs in the #else section below. + Use #if...#else...#endif with predefined compiler macros if possible. */ +#if U_HAVE_STDINT_H + +/* + * We mostly need <stdint.h> (which defines the standard integer types) but not <inttypes.h>. + * <inttypes.h> includes <stdint.h> and adds the printf/scanf helpers PRId32, SCNx16 etc. + * which we almost never use, plus stuff like imaxabs() which we never use. + */ +#include <stdint.h> + +#if U_PLATFORM == U_PF_OS390 +/* The features header is needed to get (u)int64_t sometimes. */ +#include <features.h> +/* z/OS has <stdint.h>, but some versions are missing uint8_t (APAR PK62248). */ +#if !defined(__uint8_t) +#define __uint8_t 1 +typedef unsigned char uint8_t; +#endif +#endif /* U_PLATFORM == U_PF_OS390 */ + +#elif U_HAVE_INTTYPES_H + +# include <inttypes.h> + +#else /* neither U_HAVE_STDINT_H nor U_HAVE_INTTYPES_H */ + +#if ! U_HAVE_INT8_T +typedef signed char int8_t; +#endif + +#if ! U_HAVE_UINT8_T +typedef unsigned char uint8_t; +#endif + +#if ! U_HAVE_INT16_T +typedef signed short int16_t; +#endif + +#if ! U_HAVE_UINT16_T +typedef unsigned short uint16_t; +#endif + +#if ! U_HAVE_INT32_T +typedef signed int int32_t; +#endif + +#if ! U_HAVE_UINT32_T +typedef unsigned int uint32_t; +#endif + +#if ! U_HAVE_INT64_T +#ifdef _MSC_VER + typedef signed __int64 int64_t; +#else + typedef signed long long int64_t; +#endif +#endif + +#if ! U_HAVE_UINT64_T +#ifdef _MSC_VER + typedef unsigned __int64 uint64_t; +#else + typedef unsigned long long uint64_t; +#endif +#endif + +#endif /* U_HAVE_STDINT_H / U_HAVE_INTTYPES_H */ + +#endif /* _PTYPES_H */ diff --git a/vendor/icu/include/unicode/putil.h b/vendor/icu/include/unicode/putil.h new file mode 100644 index 0000000000..4975c4abf9 --- /dev/null +++ b/vendor/icu/include/unicode/putil.h @@ -0,0 +1,183 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1997-2014, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* FILE NAME : putil.h +* +* Date Name Description +* 05/14/98 nos Creation (content moved here from utypes.h). +* 06/17/99 erm Added IEEE_754 +* 07/22/98 stephen Added IEEEremainder, max, min, trunc +* 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity +* 08/24/98 stephen Added longBitsFromDouble +* 03/02/99 stephen Removed openFile(). Added AS400 support. +* 04/15/99 stephen Converted to C +* 11/15/99 helena Integrated S/390 changes for IEEE support. +* 01/11/00 helena Added u_getVersion. +****************************************************************************** +*/ + +#ifndef PUTIL_H +#define PUTIL_H + +#include <unicode/utypes.h> + /** + * \file + * \brief C API: Platform Utilities + */ + +/*==========================================================================*/ +/* Platform utilities */ +/*==========================================================================*/ + +/** + * Platform utilities isolates the platform dependencies of the + * library. For each platform which this code is ported to, these + * functions may have to be re-implemented. + */ + +/** + * Return the ICU data directory. + * The data directory is where common format ICU data files (.dat files) + * are loaded from. Note that normal use of the built-in ICU + * facilities does not require loading of an external data file; + * unless you are adding custom data to ICU, the data directory + * does not need to be set. + * + * The data directory is determined as follows: + * If u_setDataDirectory() has been called, that is it, otherwise + * if the ICU_DATA environment variable is set, use that, otherwise + * If a data directory was specified at ICU build time + * <code> + * \code + * #define ICU_DATA_DIR "path" + * \endcode + * </code> use that, + * otherwise no data directory is available. + * + * @return the data directory, or an empty string ("") if no data directory has + * been specified. + * + * @stable ICU 2.0 + */ +U_STABLE const char* U_EXPORT2 u_getDataDirectory(void); + + +/** + * Set the ICU data directory. + * The data directory is where common format ICU data files (.dat files) + * are loaded from. Note that normal use of the built-in ICU + * facilities does not require loading of an external data file; + * unless you are adding custom data to ICU, the data directory + * does not need to be set. + * + * This function should be called at most once in a process, before the + * first ICU operation (e.g., u_init()) that will require the loading of an + * ICU data file. + * This function is not thread-safe. Use it before calling ICU APIs from + * multiple threads. + * + * @param directory The directory to be set. + * + * @see u_init + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 u_setDataDirectory(const char *directory); + +#ifndef U_HIDE_INTERNAL_API +/** + * Return the time zone files override directory, or an empty string if + * no directory was specified. Certain time zone resources will be preferentially + * loaded from individual files in this directory. + * + * @return the time zone data override directory. + * @internal + */ +U_INTERNAL const char * U_EXPORT2 u_getTimeZoneFilesDirectory(UErrorCode *status); + +/** + * Set the time zone files override directory. + * This function is not thread safe; it must not be called concurrently with + * u_getTimeZoneFilesDirectory() or any other use of ICU time zone functions. + * This function should only be called before using any ICU service that + * will access the time zone data. + * @internal + */ +U_INTERNAL void U_EXPORT2 u_setTimeZoneFilesDirectory(const char *path, UErrorCode *status); +#endif /* U_HIDE_INTERNAL_API */ + + +/** + * @{ + * Filesystem file and path separator characters. + * Example: '/' and ':' on Unix, '\\' and ';' on Windows. + * @stable ICU 2.0 + */ +#if U_PLATFORM_USES_ONLY_WIN32_API +# define U_FILE_SEP_CHAR '\\' +# define U_FILE_ALT_SEP_CHAR '/' +# define U_PATH_SEP_CHAR ';' +# define U_FILE_SEP_STRING "\\" +# define U_FILE_ALT_SEP_STRING "/" +# define U_PATH_SEP_STRING ";" +#else +# define U_FILE_SEP_CHAR '/' +# define U_FILE_ALT_SEP_CHAR '/' +# define U_PATH_SEP_CHAR ':' +# define U_FILE_SEP_STRING "/" +# define U_FILE_ALT_SEP_STRING "/" +# define U_PATH_SEP_STRING ":" +#endif + +/** @} */ + +/** + * Convert char characters to UChar characters. + * This utility function is useful only for "invariant characters" + * that are encoded in the platform default encoding. + * They are a small, constant subset of the encoding and include + * just the latin letters, digits, and some punctuation. + * For details, see U_CHARSET_FAMILY. + * + * @param cs Input string, points to <code>length</code> + * character bytes from a subset of the platform encoding. + * @param us Output string, points to memory for <code>length</code> + * Unicode characters. + * @param length The number of characters to convert; this may + * include the terminating <code>NUL</code>. + * + * @see U_CHARSET_FAMILY + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +u_charsToUChars(const char *cs, UChar *us, int32_t length); + +/** + * Convert UChar characters to char characters. + * This utility function is useful only for "invariant characters" + * that can be encoded in the platform default encoding. + * They are a small, constant subset of the encoding and include + * just the latin letters, digits, and some punctuation. + * For details, see U_CHARSET_FAMILY. + * + * @param us Input string, points to <code>length</code> + * Unicode characters that can be encoded with the + * codepage-invariant subset of the platform encoding. + * @param cs Output string, points to memory for <code>length</code> + * character bytes. + * @param length The number of characters to convert; this may + * include the terminating <code>NUL</code>. + * + * @see U_CHARSET_FAMILY + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +u_UCharsToChars(const UChar *us, char *cs, int32_t length); + +#endif diff --git a/vendor/icu/include/unicode/rep.h b/vendor/icu/include/unicode/rep.h new file mode 100644 index 0000000000..b4fa87d32b --- /dev/null +++ b/vendor/icu/include/unicode/rep.h @@ -0,0 +1,263 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +************************************************************************** +* Copyright (C) 1999-2012, International Business Machines Corporation and +* others. All Rights Reserved. +************************************************************************** +* Date Name Description +* 11/17/99 aliu Creation. Ported from java. Modified to +* match current UnicodeString API. Forced +* to use name "handleReplaceBetween" because +* of existing methods in UnicodeString. +************************************************************************** +*/ + +#ifndef REP_H +#define REP_H + +#include <unicode/uobject.h> + +/** + * \file + * \brief C++ API: Replaceable String + */ + +U_NAMESPACE_BEGIN + +class UnicodeString; + +/** + * <code>Replaceable</code> is an abstract base class representing a + * string of characters that supports the replacement of a range of + * itself with a new string of characters. It is used by APIs that + * change a piece of text while retaining metadata. Metadata is data + * other than the Unicode characters returned by char32At(). One + * example of metadata is style attributes; another is an edit + * history, marking each character with an author and revision number. + * + * <p>An implicit aspect of the <code>Replaceable</code> API is that + * during a replace operation, new characters take on the metadata of + * the old characters. For example, if the string "the <b>bold</b> + * font" has range (4, 8) replaced with "strong", then it becomes "the + * <b>strong</b> font". + * + * <p><code>Replaceable</code> specifies ranges using a start + * offset and a limit offset. The range of characters thus specified + * includes the characters at offset start..limit-1. That is, the + * start offset is inclusive, and the limit offset is exclusive. + * + * <p><code>Replaceable</code> also includes API to access characters + * in the string: <code>length()</code>, <code>charAt()</code>, + * <code>char32At()</code>, and <code>extractBetween()</code>. + * + * <p>For a subclass to support metadata, typical behavior of + * <code>replace()</code> is the following: + * <ul> + * <li>Set the metadata of the new text to the metadata of the first + * character replaced</li> + * <li>If no characters are replaced, use the metadata of the + * previous character</li> + * <li>If there is no previous character (i.e. start == 0), use the + * following character</li> + * <li>If there is no following character (i.e. the replaceable was + * empty), use default metadata.<br> + * <li>If the code point U+FFFF is seen, it should be interpreted as + * a special marker having no metadata<li> + * </li> + * </ul> + * If this is not the behavior, the subclass should document any differences. + * @author Alan Liu + * @stable ICU 2.0 + */ +class U_COMMON_API Replaceable : public UObject { + +public: + /** + * Destructor. + * @stable ICU 2.0 + */ + virtual ~Replaceable(); + + /** + * Returns the number of 16-bit code units in the text. + * @return number of 16-bit code units in text + * @stable ICU 1.8 + */ + inline int32_t length() const; + + /** + * Returns the 16-bit code unit at the given offset into the text. + * @param offset an integer between 0 and <code>length()</code>-1 + * inclusive + * @return 16-bit code unit of text at given offset + * @stable ICU 1.8 + */ + inline char16_t charAt(int32_t offset) const; + + /** + * Returns the 32-bit code point at the given 16-bit offset into + * the text. This assumes the text is stored as 16-bit code units + * with surrogate pairs intermixed. If the offset of a leading or + * trailing code unit of a surrogate pair is given, return the + * code point of the surrogate pair. + * + * @param offset an integer between 0 and <code>length()</code>-1 + * inclusive + * @return 32-bit code point of text at given offset + * @stable ICU 1.8 + */ + inline UChar32 char32At(int32_t offset) const; + + /** + * Copies characters in the range [<tt>start</tt>, <tt>limit</tt>) + * into the UnicodeString <tt>target</tt>. + * @param start offset of first character which will be copied + * @param limit offset immediately following the last character to + * be copied + * @param target UnicodeString into which to copy characters. + * @return A reference to <TT>target</TT> + * @stable ICU 2.1 + */ + virtual void extractBetween(int32_t start, + int32_t limit, + UnicodeString& target) const = 0; + + /** + * Replaces a substring of this object with the given text. If the + * characters being replaced have metadata, the new characters + * that replace them should be given the same metadata. + * + * <p>Subclasses must ensure that if the text between start and + * limit is equal to the replacement text, that replace has no + * effect. That is, any metadata + * should be unaffected. In addition, subclasses are encouraged to + * check for initial and trailing identical characters, and make a + * smaller replacement if possible. This will preserve as much + * metadata as possible. + * @param start the beginning index, inclusive; <code>0 <= start + * <= limit</code>. + * @param limit the ending index, exclusive; <code>start <= limit + * <= length()</code>. + * @param text the text to replace characters <code>start</code> + * to <code>limit - 1</code> + * @stable ICU 2.0 + */ + virtual void handleReplaceBetween(int32_t start, + int32_t limit, + const UnicodeString& text) = 0; + // Note: All other methods in this class take the names of + // existing UnicodeString methods. This method is the exception. + // It is named differently because all replace methods of + // UnicodeString return a UnicodeString&. The 'between' is + // required in order to conform to the UnicodeString naming + // convention; API taking start/length are named <operation>, and + // those taking start/limit are named <operationBetween>. The + // 'handle' is added because 'replaceBetween' and + // 'doReplaceBetween' are already taken. + + /** + * Copies a substring of this object, retaining metadata. + * This method is used to duplicate or reorder substrings. + * The destination index must not overlap the source range. + * + * @param start the beginning index, inclusive; <code>0 <= start <= + * limit</code>. + * @param limit the ending index, exclusive; <code>start <= limit <= + * length()</code>. + * @param dest the destination index. The characters from + * <code>start..limit-1</code> will be copied to <code>dest</code>. + * Implementations of this method may assume that <code>dest <= start || + * dest >= limit</code>. + * @stable ICU 2.0 + */ + virtual void copy(int32_t start, int32_t limit, int32_t dest) = 0; + + /** + * Returns true if this object contains metadata. If a + * Replaceable object has metadata, calls to the Replaceable API + * must be made so as to preserve metadata. If it does not, calls + * to the Replaceable API may be optimized to improve performance. + * The default implementation returns true. + * @return true if this object contains metadata + * @stable ICU 2.2 + */ + virtual UBool hasMetaData() const; + + /** + * Clone this object, an instance of a subclass of Replaceable. + * Clones can be used concurrently in multiple threads. + * If a subclass does not implement clone(), or if an error occurs, + * then NULL is returned. + * The clone functions in all subclasses return a pointer to a Replaceable + * because some compilers do not support covariant (same-as-this) + * return types; cast to the appropriate subclass if necessary. + * The caller must delete the clone. + * + * @return a clone of this object + * + * @see getDynamicClassID + * @stable ICU 2.6 + */ + virtual Replaceable *clone() const; + +protected: + + /** + * Default constructor. + * @stable ICU 2.4 + */ + inline Replaceable(); + + /* + * Assignment operator not declared. The compiler will provide one + * which does nothing since this class does not contain any data members. + * API/code coverage may show the assignment operator as present and + * untested - ignore. + * Subclasses need this assignment operator if they use compiler-provided + * assignment operators of their own. An alternative to not declaring one + * here would be to declare and empty-implement a protected or public one. + Replaceable &Replaceable::operator=(const Replaceable &); + */ + + /** + * Virtual version of length(). + * @stable ICU 2.4 + */ + virtual int32_t getLength() const = 0; + + /** + * Virtual version of charAt(). + * @stable ICU 2.4 + */ + virtual char16_t getCharAt(int32_t offset) const = 0; + + /** + * Virtual version of char32At(). + * @stable ICU 2.4 + */ + virtual UChar32 getChar32At(int32_t offset) const = 0; +}; + +inline Replaceable::Replaceable() {} + +inline int32_t +Replaceable::length() const { + return getLength(); +} + +inline char16_t +Replaceable::charAt(int32_t offset) const { + return getCharAt(offset); +} + +inline UChar32 +Replaceable::char32At(int32_t offset) const { + return getChar32At(offset); +} + +// There is no rep.cpp, see unistr.cpp for Replaceable function implementations. + +U_NAMESPACE_END + +#endif diff --git a/vendor/icu/include/unicode/std_string.h b/vendor/icu/include/unicode/std_string.h new file mode 100644 index 0000000000..ba5f007ac6 --- /dev/null +++ b/vendor/icu/include/unicode/std_string.h @@ -0,0 +1,37 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2009-2014, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: std_string.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2009feb19 +* created by: Markus W. Scherer +*/ + +#ifndef __STD_STRING_H__ +#define __STD_STRING_H__ + +/** + * \file + * \brief C++ API: Central ICU header for including the C++ standard <string> + * header and for related definitions. + */ + +#include <unicode/utypes.h> + +// Workaround for a libstdc++ bug before libstdc++4.6 (2011). +// https://bugs.llvm.org/show_bug.cgi?id=13364 +#if defined(__GLIBCXX__) +namespace std { class type_info; } +#endif +#include <string> + +#endif // __STD_STRING_H__ diff --git a/vendor/icu/include/unicode/stringoptions.h b/vendor/icu/include/unicode/stringoptions.h new file mode 100644 index 0000000000..f735f17229 --- /dev/null +++ b/vendor/icu/include/unicode/stringoptions.h @@ -0,0 +1,198 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// stringoptions.h +// created: 2017jun08 Markus W. Scherer + +#ifndef __STRINGOPTIONS_H__ +#define __STRINGOPTIONS_H__ + +#include <unicode/utypes.h> + +/** + * \file + * \brief C API: Bit set option bit constants for various string and character processing functions. + */ + +/** + * Option value for case folding: Use default mappings defined in CaseFolding.txt. + * + * @stable ICU 2.0 + */ +#define U_FOLD_CASE_DEFAULT 0 + +/** + * Option value for case folding: + * + * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I + * and dotless i appropriately for Turkic languages (tr, az). + * + * Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that + * are to be included for default mappings and + * excluded for the Turkic-specific mappings. + * + * Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that + * are to be excluded for default mappings and + * included for the Turkic-specific mappings. + * + * @stable ICU 2.0 + */ +#define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1 + +#ifndef U_HIDE_DRAFT_API + +/** + * Titlecase the string as a whole rather than each word. + * (Titlecase only the character at index 0, possibly adjusted.) + * Option bits value for titlecasing APIs that take an options bit set. + * + * It is an error to specify multiple titlecasing iterator options together, + * including both an options bit and an explicit BreakIterator. + * + * @see U_TITLECASE_ADJUST_TO_CASED + * @draft ICU 60 + */ +#define U_TITLECASE_WHOLE_STRING 0x20 + +/** + * Titlecase sentences rather than words. + * (Titlecase only the first character of each sentence, possibly adjusted.) + * Option bits value for titlecasing APIs that take an options bit set. + * + * It is an error to specify multiple titlecasing iterator options together, + * including both an options bit and an explicit BreakIterator. + * + * @see U_TITLECASE_ADJUST_TO_CASED + * @draft ICU 60 + */ +#define U_TITLECASE_SENTENCES 0x40 + +#endif // U_HIDE_DRAFT_API + +/** + * Do not lowercase non-initial parts of words when titlecasing. + * Option bit for titlecasing APIs that take an options bit set. + * + * By default, titlecasing will titlecase the character at each + * (possibly adjusted) BreakIterator index and + * lowercase all other characters up to the next iterator index. + * With this option, the other characters will not be modified. + * + * @see U_TITLECASE_ADJUST_TO_CASED + * @see UnicodeString::toTitle + * @see CaseMap::toTitle + * @see ucasemap_setOptions + * @see ucasemap_toTitle + * @see ucasemap_utf8ToTitle + * @stable ICU 3.8 + */ +#define U_TITLECASE_NO_LOWERCASE 0x100 + +/** + * Do not adjust the titlecasing BreakIterator indexes; + * titlecase exactly the characters at breaks from the iterator. + * Option bit for titlecasing APIs that take an options bit set. + * + * By default, titlecasing will take each break iterator index, + * adjust it to the next relevant character (see U_TITLECASE_ADJUST_TO_CASED), + * and titlecase that one. + * + * Other characters are lowercased. + * + * It is an error to specify multiple titlecasing adjustment options together. + * + * @see U_TITLECASE_ADJUST_TO_CASED + * @see U_TITLECASE_NO_LOWERCASE + * @see UnicodeString::toTitle + * @see CaseMap::toTitle + * @see ucasemap_setOptions + * @see ucasemap_toTitle + * @see ucasemap_utf8ToTitle + * @stable ICU 3.8 + */ +#define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200 + +#ifndef U_HIDE_DRAFT_API + +/** + * Adjust each titlecasing BreakIterator index to the next cased character. + * (See the Unicode Standard, chapter 3, Default Case Conversion, R3 toTitlecase(X).) + * Option bit for titlecasing APIs that take an options bit set. + * + * This used to be the default index adjustment in ICU. + * Since ICU 60, the default index adjustment is to the next character that is + * a letter, number, symbol, or private use code point. + * (Uncased modifier letters are skipped.) + * The difference in behavior is small for word titlecasing, + * but the new adjustment is much better for whole-string and sentence titlecasing: + * It yields "49ers" and "«丰(abc)»" instead of "49Ers" and "«丰(Abc)»". + * + * It is an error to specify multiple titlecasing adjustment options together. + * + * @see U_TITLECASE_NO_BREAK_ADJUSTMENT + * @draft ICU 60 + */ +#define U_TITLECASE_ADJUST_TO_CASED 0x400 + +/** + * Option for string transformation functions to not first reset the Edits object. + * Used for example in some case-mapping and normalization functions. + * + * @see CaseMap + * @see Edits + * @see Normalizer2 + * @draft ICU 60 + */ +#define U_EDITS_NO_RESET 0x2000 + +/** + * Omit unchanged text when recording how source substrings + * relate to changed and unchanged result substrings. + * Used for example in some case-mapping and normalization functions. + * + * @see CaseMap + * @see Edits + * @see Normalizer2 + * @draft ICU 60 + */ +#define U_OMIT_UNCHANGED_TEXT 0x4000 + +#endif // U_HIDE_DRAFT_API + +/** + * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc: + * Compare strings in code point order instead of code unit order. + * @stable ICU 2.2 + */ +#define U_COMPARE_CODE_POINT_ORDER 0x8000 + +/** + * Option bit for unorm_compare: + * Perform case-insensitive comparison. + * @stable ICU 2.2 + */ +#define U_COMPARE_IGNORE_CASE 0x10000 + +/** + * Option bit for unorm_compare: + * Both input strings are assumed to fulfill FCD conditions. + * @stable ICU 2.2 + */ +#define UNORM_INPUT_IS_FCD 0x20000 + +// Related definitions elsewhere. +// Options that are not meaningful in the same functions +// can share the same bits. +// +// Public: +// unicode/unorm.h #define UNORM_COMPARE_NORM_OPTIONS_SHIFT 20 +// +// Internal: (may change or be removed) +// ucase.h #define _STRCASECMP_OPTIONS_MASK 0xffff +// ucase.h #define _FOLD_CASE_OPTIONS_MASK 7 +// ucasemap_imp.h #define U_TITLECASE_ITERATOR_MASK 0xe0 +// ucasemap_imp.h #define U_TITLECASE_ADJUSTMENT_MASK 0x600 +// ustr_imp.h #define _STRNCMP_STYLE 0x1000 +// unormcmp.cpp #define _COMPARE_EQUIV 0x80000 + +#endif // __STRINGOPTIONS_H__ diff --git a/vendor/icu/include/unicode/stringpiece.h b/vendor/icu/include/unicode/stringpiece.h new file mode 100644 index 0000000000..fa36fd63f6 --- /dev/null +++ b/vendor/icu/include/unicode/stringpiece.h @@ -0,0 +1,224 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +// Copyright (C) 2009-2013, International Business Machines +// Corporation and others. All Rights Reserved. +// +// Copyright 2001 and onwards Google Inc. +// Author: Sanjay Ghemawat + +// This code is a contribution of Google code, and the style used here is +// a compromise between the original Google code and the ICU coding guidelines. +// For example, data types are ICU-ified (size_t,int->int32_t), +// and API comments doxygen-ified, but function names and behavior are +// as in the original, if possible. +// Assertion-style error handling, not available in ICU, was changed to +// parameter "pinning" similar to UnicodeString. +// +// In addition, this is only a partial port of the original Google code, +// limited to what was needed so far. The (nearly) complete original code +// is in the ICU svn repository at icuhtml/trunk/design/strings/contrib +// (see ICU ticket 6765, r25517). + +#ifndef __STRINGPIECE_H__ +#define __STRINGPIECE_H__ + +/** + * \file + * \brief C++ API: StringPiece: Read-only byte string wrapper class. + */ + +#include <unicode/utypes.h> +#include <unicode/uobject.h> +#include <unicode/std_string.h> + +// Arghh! I wish C++ literals were "string". + +U_NAMESPACE_BEGIN + +/** + * A string-like object that points to a sized piece of memory. + * + * We provide non-explicit singleton constructors so users can pass + * in a "const char*" or a "string" wherever a "StringPiece" is + * expected. + * + * Functions or methods may use StringPiece parameters to accept either a + * "const char*" or a "string" value that will be implicitly converted to a + * StringPiece. + * + * Systematic usage of StringPiece is encouraged as it will reduce unnecessary + * conversions from "const char*" to "string" and back again. + * + * @stable ICU 4.2 + */ +class U_COMMON_API StringPiece : public UMemory { + private: + const char* ptr_; + int32_t length_; + + public: + /** + * Default constructor, creates an empty StringPiece. + * @stable ICU 4.2 + */ + StringPiece() : ptr_(NULL), length_(0) { } + /** + * Constructs from a NUL-terminated const char * pointer. + * @param str a NUL-terminated const char * pointer + * @stable ICU 4.2 + */ + StringPiece(const char* str); + /** + * Constructs from a std::string. + * @stable ICU 4.2 + */ + StringPiece(const std::string& str) + : ptr_(str.data()), length_(static_cast<int32_t>(str.size())) { } + /** + * Constructs from a const char * pointer and a specified length. + * @param offset a const char * pointer (need not be terminated) + * @param len the length of the string; must be non-negative + * @stable ICU 4.2 + */ + StringPiece(const char* offset, int32_t len) : ptr_(offset), length_(len) { } + /** + * Substring of another StringPiece. + * @param x the other StringPiece + * @param pos start position in x; must be non-negative and <= x.length(). + * @stable ICU 4.2 + */ + StringPiece(const StringPiece& x, int32_t pos); + /** + * Substring of another StringPiece. + * @param x the other StringPiece + * @param pos start position in x; must be non-negative and <= x.length(). + * @param len length of the substring; + * must be non-negative and will be pinned to at most x.length() - pos. + * @stable ICU 4.2 + */ + StringPiece(const StringPiece& x, int32_t pos, int32_t len); + + /** + * Returns the string pointer. May be NULL if it is empty. + * + * data() may return a pointer to a buffer with embedded NULs, and the + * returned buffer may or may not be null terminated. Therefore it is + * typically a mistake to pass data() to a routine that expects a NUL + * terminated string. + * @return the string pointer + * @stable ICU 4.2 + */ + const char* data() const { return ptr_; } + /** + * Returns the string length. Same as length(). + * @return the string length + * @stable ICU 4.2 + */ + int32_t size() const { return length_; } + /** + * Returns the string length. Same as size(). + * @return the string length + * @stable ICU 4.2 + */ + int32_t length() const { return length_; } + /** + * Returns whether the string is empty. + * @return TRUE if the string is empty + * @stable ICU 4.2 + */ + UBool empty() const { return length_ == 0; } + + /** + * Sets to an empty string. + * @stable ICU 4.2 + */ + void clear() { ptr_ = NULL; length_ = 0; } + + /** + * Reset the stringpiece to refer to new data. + * @param xdata pointer the new string data. Need not be nul terminated. + * @param len the length of the new data + * @stable ICU 4.8 + */ + void set(const char* xdata, int32_t len) { ptr_ = xdata; length_ = len; } + + /** + * Reset the stringpiece to refer to new data. + * @param str a pointer to a NUL-terminated string. + * @stable ICU 4.8 + */ + void set(const char* str); + + /** + * Removes the first n string units. + * @param n prefix length, must be non-negative and <=length() + * @stable ICU 4.2 + */ + void remove_prefix(int32_t n) { + if (n >= 0) { + if (n > length_) { + n = length_; + } + ptr_ += n; + length_ -= n; + } + } + + /** + * Removes the last n string units. + * @param n suffix length, must be non-negative and <=length() + * @stable ICU 4.2 + */ + void remove_suffix(int32_t n) { + if (n >= 0) { + if (n <= length_) { + length_ -= n; + } else { + length_ = 0; + } + } + } + + /** + * Maximum integer, used as a default value for substring methods. + * @stable ICU 4.2 + */ + static const int32_t npos; // = 0x7fffffff; + + /** + * Returns a substring of this StringPiece. + * @param pos start position; must be non-negative and <= length(). + * @param len length of the substring; + * must be non-negative and will be pinned to at most length() - pos. + * @return the substring StringPiece + * @stable ICU 4.2 + */ + StringPiece substr(int32_t pos, int32_t len = npos) const { + return StringPiece(*this, pos, len); + } +}; + +/** + * Global operator == for StringPiece + * @param x The first StringPiece to compare. + * @param y The second StringPiece to compare. + * @return TRUE if the string data is equal + * @stable ICU 4.8 + */ +U_EXPORT UBool U_EXPORT2 +operator==(const StringPiece& x, const StringPiece& y); + +/** + * Global operator != for StringPiece + * @param x The first StringPiece to compare. + * @param y The second StringPiece to compare. + * @return TRUE if the string data is not equal + * @stable ICU 4.8 + */ +inline UBool operator!=(const StringPiece& x, const StringPiece& y) { + return !(x == y); +} + +U_NAMESPACE_END + +#endif // __STRINGPIECE_H__ diff --git a/vendor/icu/include/unicode/ubidi.h b/vendor/icu/include/unicode/ubidi.h new file mode 100644 index 0000000000..9b11369380 --- /dev/null +++ b/vendor/icu/include/unicode/ubidi.h @@ -0,0 +1,2201 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1999-2013, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* file name: ubidi.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 1999jul27 +* created by: Markus W. Scherer, updated by Matitiahu Allouche +*/ + +#ifndef UBIDI_H +#define UBIDI_H + +#include <unicode/utypes.h> +#include <unicode/uchar.h> +#include <unicode/localpointer.h> + +/** + *\file + * \brief C API: Bidi algorithm + * + * <h2>Bidi algorithm for ICU</h2> + * + * This is an implementation of the Unicode Bidirectional Algorithm. + * The algorithm is defined in the + * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>.<p> + * + * Note: Libraries that perform a bidirectional algorithm and + * reorder strings accordingly are sometimes called "Storage Layout Engines". + * ICU's Bidi and shaping (u_shapeArabic()) APIs can be used at the core of such + * "Storage Layout Engines". + * + * <h3>General remarks about the API:</h3> + * + * In functions with an error code parameter, + * the <code>pErrorCode</code> pointer must be valid + * and the value that it points to must not indicate a failure before + * the function call. Otherwise, the function returns immediately. + * After the function call, the value indicates success or failure.<p> + * + * The "limit" of a sequence of characters is the position just after their + * last character, i.e., one more than that position.<p> + * + * Some of the API functions provide access to "runs". + * Such a "run" is defined as a sequence of characters + * that are at the same embedding level + * after performing the Bidi algorithm.<p> + * + * @author Markus W. Scherer + * @version 1.0 + * + * + * <h4> Sample code for the ICU Bidi API </h4> + * + * <h5>Rendering a paragraph with the ICU Bidi API</h5> + * + * This is (hypothetical) sample code that illustrates + * how the ICU Bidi API could be used to render a paragraph of text. + * Rendering code depends highly on the graphics system, + * therefore this sample code must make a lot of assumptions, + * which may or may not match any existing graphics system's properties. + * + * <p>The basic assumptions are:</p> + * <ul> + * <li>Rendering is done from left to right on a horizontal line.</li> + * <li>A run of single-style, unidirectional text can be rendered at once.</li> + * <li>Such a run of text is passed to the graphics system with + * characters (code units) in logical order.</li> + * <li>The line-breaking algorithm is very complicated + * and Locale-dependent - + * and therefore its implementation omitted from this sample code.</li> + * </ul> + * + * <pre> + * \code + *#include "unicode/ubidi.h" + * + *typedef enum { + * styleNormal=0, styleSelected=1, + * styleBold=2, styleItalics=4, + * styleSuper=8, styleSub=16 + *} Style; + * + *typedef struct { int32_t limit; Style style; } StyleRun; + * + *int getTextWidth(const UChar *text, int32_t start, int32_t limit, + * const StyleRun *styleRuns, int styleRunCount); + * + * // set *pLimit and *pStyleRunLimit for a line + * // from text[start] and from styleRuns[styleRunStart] + * // using ubidi_getLogicalRun(para, ...) + *void getLineBreak(const UChar *text, int32_t start, int32_t *pLimit, + * UBiDi *para, + * const StyleRun *styleRuns, int styleRunStart, int *pStyleRunLimit, + * int *pLineWidth); + * + * // render runs on a line sequentially, always from left to right + * + * // prepare rendering a new line + * void startLine(UBiDiDirection textDirection, int lineWidth); + * + * // render a run of text and advance to the right by the run width + * // the text[start..limit-1] is always in logical order + * void renderRun(const UChar *text, int32_t start, int32_t limit, + * UBiDiDirection textDirection, Style style); + * + * // We could compute a cross-product + * // from the style runs with the directional runs + * // and then reorder it. + * // Instead, here we iterate over each run type + * // and render the intersections - + * // with shortcuts in simple (and common) cases. + * // renderParagraph() is the main function. + * + * // render a directional run with + * // (possibly) multiple style runs intersecting with it + * void renderDirectionalRun(const UChar *text, + * int32_t start, int32_t limit, + * UBiDiDirection direction, + * const StyleRun *styleRuns, int styleRunCount) { + * int i; + * + * // iterate over style runs + * if(direction==UBIDI_LTR) { + * int styleLimit; + * + * for(i=0; i<styleRunCount; ++i) { + * styleLimit=styleRun[i].limit; + * if(start<styleLimit) { + * if(styleLimit>limit) { styleLimit=limit; } + * renderRun(text, start, styleLimit, + * direction, styleRun[i].style); + * if(styleLimit==limit) { break; } + * start=styleLimit; + * } + * } + * } else { + * int styleStart; + * + * for(i=styleRunCount-1; i>=0; --i) { + * if(i>0) { + * styleStart=styleRun[i-1].limit; + * } else { + * styleStart=0; + * } + * if(limit>=styleStart) { + * if(styleStart<start) { styleStart=start; } + * renderRun(text, styleStart, limit, + * direction, styleRun[i].style); + * if(styleStart==start) { break; } + * limit=styleStart; + * } + * } + * } + * } + * + * // the line object represents text[start..limit-1] + * void renderLine(UBiDi *line, const UChar *text, + * int32_t start, int32_t limit, + * const StyleRun *styleRuns, int styleRunCount) { + * UBiDiDirection direction=ubidi_getDirection(line); + * if(direction!=UBIDI_MIXED) { + * // unidirectional + * if(styleRunCount<=1) { + * renderRun(text, start, limit, direction, styleRuns[0].style); + * } else { + * renderDirectionalRun(text, start, limit, + * direction, styleRuns, styleRunCount); + * } + * } else { + * // mixed-directional + * int32_t count, i, length; + * UBiDiLevel level; + * + * count=ubidi_countRuns(para, pErrorCode); + * if(U_SUCCESS(*pErrorCode)) { + * if(styleRunCount<=1) { + * Style style=styleRuns[0].style; + * + * // iterate over directional runs + * for(i=0; i<count; ++i) { + * direction=ubidi_getVisualRun(para, i, &start, &length); + * renderRun(text, start, start+length, direction, style); + * } + * } else { + * int32_t j; + * + * // iterate over both directional and style runs + * for(i=0; i<count; ++i) { + * direction=ubidi_getVisualRun(line, i, &start, &length); + * renderDirectionalRun(text, start, start+length, + * direction, styleRuns, styleRunCount); + * } + * } + * } + * } + * } + * + *void renderParagraph(const UChar *text, int32_t length, + * UBiDiDirection textDirection, + * const StyleRun *styleRuns, int styleRunCount, + * int lineWidth, + * UErrorCode *pErrorCode) { + * UBiDi *para; + * + * if(pErrorCode==NULL || U_FAILURE(*pErrorCode) || length<=0) { + * return; + * } + * + * para=ubidi_openSized(length, 0, pErrorCode); + * if(para==NULL) { return; } + * + * ubidi_setPara(para, text, length, + * textDirection ? UBIDI_DEFAULT_RTL : UBIDI_DEFAULT_LTR, + * NULL, pErrorCode); + * if(U_SUCCESS(*pErrorCode)) { + * UBiDiLevel paraLevel=1&ubidi_getParaLevel(para); + * StyleRun styleRun={ length, styleNormal }; + * int width; + * + * if(styleRuns==NULL || styleRunCount<=0) { + * styleRunCount=1; + * styleRuns=&styleRun; + * } + * + * // assume styleRuns[styleRunCount-1].limit>=length + * + * width=getTextWidth(text, 0, length, styleRuns, styleRunCount); + * if(width<=lineWidth) { + * // everything fits onto one line + * + * // prepare rendering a new line from either left or right + * startLine(paraLevel, width); + * + * renderLine(para, text, 0, length, + * styleRuns, styleRunCount); + * } else { + * UBiDi *line; + * + * // we need to render several lines + * line=ubidi_openSized(length, 0, pErrorCode); + * if(line!=NULL) { + * int32_t start=0, limit; + * int styleRunStart=0, styleRunLimit; + * + * for(;;) { + * limit=length; + * styleRunLimit=styleRunCount; + * getLineBreak(text, start, &limit, para, + * styleRuns, styleRunStart, &styleRunLimit, + * &width); + * ubidi_setLine(para, start, limit, line, pErrorCode); + * if(U_SUCCESS(*pErrorCode)) { + * // prepare rendering a new line + * // from either left or right + * startLine(paraLevel, width); + * + * renderLine(line, text, start, limit, + * styleRuns+styleRunStart, + * styleRunLimit-styleRunStart); + * } + * if(limit==length) { break; } + * start=limit; + * styleRunStart=styleRunLimit-1; + * if(start>=styleRuns[styleRunStart].limit) { + * ++styleRunStart; + * } + * } + * + * ubidi_close(line); + * } + * } + * } + * + * ubidi_close(para); + *} + *\endcode + * </pre> + */ + +/*DOCXX_TAG*/ +/*@{*/ + +/** + * UBiDiLevel is the type of the level values in this + * Bidi implementation. + * It holds an embedding level and indicates the visual direction + * by its bit 0 (even/odd value).<p> + * + * It can also hold non-level values for the + * <code>paraLevel</code> and <code>embeddingLevels</code> + * arguments of <code>ubidi_setPara()</code>; there: + * <ul> + * <li>bit 7 of an <code>embeddingLevels[]</code> + * value indicates whether the using application is + * specifying the level of a character to <i>override</i> whatever the + * Bidi implementation would resolve it to.</li> + * <li><code>paraLevel</code> can be set to the + * pseudo-level values <code>UBIDI_DEFAULT_LTR</code> + * and <code>UBIDI_DEFAULT_RTL</code>.</li> + * </ul> + * + * @see ubidi_setPara + * + * <p>The related constants are not real, valid level values. + * <code>UBIDI_DEFAULT_XXX</code> can be used to specify + * a default for the paragraph level for + * when the <code>ubidi_setPara()</code> function + * shall determine it but there is no + * strongly typed character in the input.<p> + * + * Note that the value for <code>UBIDI_DEFAULT_LTR</code> is even + * and the one for <code>UBIDI_DEFAULT_RTL</code> is odd, + * just like with normal LTR and RTL level values - + * these special values are designed that way. Also, the implementation + * assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd. + * + * @see UBIDI_DEFAULT_LTR + * @see UBIDI_DEFAULT_RTL + * @see UBIDI_LEVEL_OVERRIDE + * @see UBIDI_MAX_EXPLICIT_LEVEL + * @stable ICU 2.0 + */ +typedef uint8_t UBiDiLevel; + +/** Paragraph level setting.<p> + * + * Constant indicating that the base direction depends on the first strong + * directional character in the text according to the Unicode Bidirectional + * Algorithm. If no strong directional character is present, + * then set the paragraph level to 0 (left-to-right).<p> + * + * If this value is used in conjunction with reordering modes + * <code>UBIDI_REORDER_INVERSE_LIKE_DIRECT</code> or + * <code>UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder + * is assumed to be visual LTR, and the text after reordering is required + * to be the corresponding logical string with appropriate contextual + * direction. The direction of the result string will be RTL if either + * the righmost or leftmost strong character of the source text is RTL + * or Arabic Letter, the direction will be LTR otherwise.<p> + * + * If reordering option <code>UBIDI_OPTION_INSERT_MARKS</code> is set, an RLM may + * be added at the beginning of the result string to ensure round trip + * (that the result string, when reordered back to visual, will produce + * the original source text). + * @see UBIDI_REORDER_INVERSE_LIKE_DIRECT + * @see UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL + * @stable ICU 2.0 + */ +#define UBIDI_DEFAULT_LTR 0xfe + +/** Paragraph level setting.<p> + * + * Constant indicating that the base direction depends on the first strong + * directional character in the text according to the Unicode Bidirectional + * Algorithm. If no strong directional character is present, + * then set the paragraph level to 1 (right-to-left).<p> + * + * If this value is used in conjunction with reordering modes + * <code>UBIDI_REORDER_INVERSE_LIKE_DIRECT</code> or + * <code>UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder + * is assumed to be visual LTR, and the text after reordering is required + * to be the corresponding logical string with appropriate contextual + * direction. The direction of the result string will be RTL if either + * the righmost or leftmost strong character of the source text is RTL + * or Arabic Letter, or if the text contains no strong character; + * the direction will be LTR otherwise.<p> + * + * If reordering option <code>UBIDI_OPTION_INSERT_MARKS</code> is set, an RLM may + * be added at the beginning of the result string to ensure round trip + * (that the result string, when reordered back to visual, will produce + * the original source text). + * @see UBIDI_REORDER_INVERSE_LIKE_DIRECT + * @see UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL + * @stable ICU 2.0 + */ +#define UBIDI_DEFAULT_RTL 0xff + +/** + * Maximum explicit embedding level. + * (The maximum resolved level can be up to <code>UBIDI_MAX_EXPLICIT_LEVEL+1</code>). + * @stable ICU 2.0 + */ +#define UBIDI_MAX_EXPLICIT_LEVEL 125 + +/** Bit flag for level input. + * Overrides directional properties. + * @stable ICU 2.0 + */ +#define UBIDI_LEVEL_OVERRIDE 0x80 + +/** + * Special value which can be returned by the mapping functions when a logical + * index has no corresponding visual index or vice-versa. This may happen + * for the logical-to-visual mapping of a Bidi control when option + * <code>#UBIDI_OPTION_REMOVE_CONTROLS</code> is specified. This can also happen + * for the visual-to-logical mapping of a Bidi mark (LRM or RLM) inserted + * by option <code>#UBIDI_OPTION_INSERT_MARKS</code>. + * @see ubidi_getVisualIndex + * @see ubidi_getVisualMap + * @see ubidi_getLogicalIndex + * @see ubidi_getLogicalMap + * @stable ICU 3.6 + */ +#define UBIDI_MAP_NOWHERE (-1) + +/** + * <code>UBiDiDirection</code> values indicate the text direction. + * @stable ICU 2.0 + */ +enum UBiDiDirection { + /** Left-to-right text. This is a 0 value. + * <ul> + * <li>As return value for <code>ubidi_getDirection()</code>, it means + * that the source string contains no right-to-left characters, or + * that the source string is empty and the paragraph level is even. + * <li> As return value for <code>ubidi_getBaseDirection()</code>, it + * means that the first strong character of the source string has + * a left-to-right direction. + * </ul> + * @stable ICU 2.0 + */ + UBIDI_LTR, + /** Right-to-left text. This is a 1 value. + * <ul> + * <li>As return value for <code>ubidi_getDirection()</code>, it means + * that the source string contains no left-to-right characters, or + * that the source string is empty and the paragraph level is odd. + * <li> As return value for <code>ubidi_getBaseDirection()</code>, it + * means that the first strong character of the source string has + * a right-to-left direction. + * </ul> + * @stable ICU 2.0 + */ + UBIDI_RTL, + /** Mixed-directional text. + * <p>As return value for <code>ubidi_getDirection()</code>, it means + * that the source string contains both left-to-right and + * right-to-left characters. + * @stable ICU 2.0 + */ + UBIDI_MIXED, + /** No strongly directional text. + * <p>As return value for <code>ubidi_getBaseDirection()</code>, it means + * that the source string is missing or empty, or contains neither left-to-right + * nor right-to-left characters. + * @stable ICU 4.6 + */ + UBIDI_NEUTRAL +}; + +/** @stable ICU 2.0 */ +typedef enum UBiDiDirection UBiDiDirection; + +/** + * Forward declaration of the <code>UBiDi</code> structure for the declaration of + * the API functions. Its fields are implementation-specific.<p> + * This structure holds information about a paragraph (or multiple paragraphs) + * of text with Bidi-algorithm-related details, or about one line of + * such a paragraph.<p> + * Reordering can be done on a line, or on one or more paragraphs which are + * then interpreted each as one single line. + * @stable ICU 2.0 + */ +struct UBiDi; + +/** @stable ICU 2.0 */ +typedef struct UBiDi UBiDi; + +/** + * Allocate a <code>UBiDi</code> structure. + * Such an object is initially empty. It is assigned + * the Bidi properties of a piece of text containing one or more paragraphs + * by <code>ubidi_setPara()</code> + * or the Bidi properties of a line within a paragraph by + * <code>ubidi_setLine()</code>.<p> + * This object can be reused for as long as it is not deallocated + * by calling <code>ubidi_close()</code>.<p> + * <code>ubidi_setPara()</code> and <code>ubidi_setLine()</code> will allocate + * additional memory for internal structures as necessary. + * + * @return An empty <code>UBiDi</code> object. + * @stable ICU 2.0 + */ +U_STABLE UBiDi * U_EXPORT2 +ubidi_open(void); + +/** + * Allocate a <code>UBiDi</code> structure with preallocated memory + * for internal structures. + * This function provides a <code>UBiDi</code> object like <code>ubidi_open()</code> + * with no arguments, but it also preallocates memory for internal structures + * according to the sizings supplied by the caller.<p> + * Subsequent functions will not allocate any more memory, and are thus + * guaranteed not to fail because of lack of memory.<p> + * The preallocation can be limited to some of the internal memory + * by setting some values to 0 here. That means that if, e.g., + * <code>maxRunCount</code> cannot be reasonably predetermined and should not + * be set to <code>maxLength</code> (the only failproof value) to avoid + * wasting memory, then <code>maxRunCount</code> could be set to 0 here + * and the internal structures that are associated with it will be allocated + * on demand, just like with <code>ubidi_open()</code>. + * + * @param maxLength is the maximum text or line length that internal memory + * will be preallocated for. An attempt to associate this object with a + * longer text will fail, unless this value is 0, which leaves the allocation + * up to the implementation. + * + * @param maxRunCount is the maximum anticipated number of same-level runs + * that internal memory will be preallocated for. An attempt to access + * visual runs on an object that was not preallocated for as many runs + * as the text was actually resolved to will fail, + * unless this value is 0, which leaves the allocation up to the implementation.<br><br> + * The number of runs depends on the actual text and maybe anywhere between + * 1 and <code>maxLength</code>. It is typically small. + * + * @param pErrorCode must be a valid pointer to an error code value. + * + * @return An empty <code>UBiDi</code> object with preallocated memory. + * @stable ICU 2.0 + */ +U_STABLE UBiDi * U_EXPORT2 +ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode); + +/** + * <code>ubidi_close()</code> must be called to free the memory + * associated with a UBiDi object.<p> + * + * <strong>Important: </strong> + * A parent <code>UBiDi</code> object must not be destroyed or reused if + * it still has children. + * If a <code>UBiDi</code> object has become the <i>child</i> + * of another one (its <i>parent</i>) by calling + * <code>ubidi_setLine()</code>, then the child object must + * be destroyed (closed) or reused (by calling + * <code>ubidi_setPara()</code> or <code>ubidi_setLine()</code>) + * before the parent object. + * + * @param pBiDi is a <code>UBiDi</code> object. + * + * @see ubidi_setPara + * @see ubidi_setLine + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ubidi_close(UBiDi *pBiDi); + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalUBiDiPointer + * "Smart pointer" class, closes a UBiDi via ubidi_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @stable ICU 4.4 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalUBiDiPointer, UBiDi, ubidi_close); + +U_NAMESPACE_END + +#endif + +/** + * Modify the operation of the Bidi algorithm such that it + * approximates an "inverse Bidi" algorithm. This function + * must be called before <code>ubidi_setPara()</code>. + * + * <p>The normal operation of the Bidi algorithm as described + * in the Unicode Technical Report is to take text stored in logical + * (keyboard, typing) order and to determine the reordering of it for visual + * rendering. + * Some legacy systems store text in visual order, and for operations + * with standard, Unicode-based algorithms, the text needs to be transformed + * to logical order. This is effectively the inverse algorithm of the + * described Bidi algorithm. Note that there is no standard algorithm for + * this "inverse Bidi" and that the current implementation provides only an + * approximation of "inverse Bidi".</p> + * + * <p>With <code>isInverse</code> set to <code>TRUE</code>, + * this function changes the behavior of some of the subsequent functions + * in a way that they can be used for the inverse Bidi algorithm. + * Specifically, runs of text with numeric characters will be treated in a + * special way and may need to be surrounded with LRM characters when they are + * written in reordered sequence.</p> + * + * <p>Output runs should be retrieved using <code>ubidi_getVisualRun()</code>. + * Since the actual input for "inverse Bidi" is visually ordered text and + * <code>ubidi_getVisualRun()</code> gets the reordered runs, these are actually + * the runs of the logically ordered output.</p> + * + * <p>Calling this function with argument <code>isInverse</code> set to + * <code>TRUE</code> is equivalent to calling + * <code>ubidi_setReorderingMode</code> with argument + * <code>reorderingMode</code> + * set to <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>.<br> + * Calling this function with argument <code>isInverse</code> set to + * <code>FALSE</code> is equivalent to calling + * <code>ubidi_setReorderingMode</code> with argument + * <code>reorderingMode</code> + * set to <code>#UBIDI_REORDER_DEFAULT</code>. + * + * @param pBiDi is a <code>UBiDi</code> object. + * + * @param isInverse specifies "forward" or "inverse" Bidi operation. + * + * @see ubidi_setPara + * @see ubidi_writeReordered + * @see ubidi_setReorderingMode + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ubidi_setInverse(UBiDi *pBiDi, UBool isInverse); + +/** + * Is this Bidi object set to perform the inverse Bidi algorithm? + * <p>Note: calling this function after setting the reordering mode with + * <code>ubidi_setReorderingMode</code> will return <code>TRUE</code> if the + * reordering mode was set to <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>, + * <code>FALSE</code> for all other values.</p> + * + * @param pBiDi is a <code>UBiDi</code> object. + * @return TRUE if the Bidi object is set to perform the inverse Bidi algorithm + * by handling numbers as L. + * + * @see ubidi_setInverse + * @see ubidi_setReorderingMode + * @stable ICU 2.0 + */ + +U_STABLE UBool U_EXPORT2 +ubidi_isInverse(UBiDi *pBiDi); + +/** + * Specify whether block separators must be allocated level zero, + * so that successive paragraphs will progress from left to right. + * This function must be called before <code>ubidi_setPara()</code>. + * Paragraph separators (B) may appear in the text. Setting them to level zero + * means that all paragraph separators (including one possibly appearing + * in the last text position) are kept in the reordered text after the text + * that they follow in the source text. + * When this feature is not enabled, a paragraph separator at the last + * position of the text before reordering will go to the first position + * of the reordered text when the paragraph level is odd. + * + * @param pBiDi is a <code>UBiDi</code> object. + * + * @param orderParagraphsLTR specifies whether paragraph separators (B) must + * receive level 0, so that successive paragraphs progress from left to right. + * + * @see ubidi_setPara + * @stable ICU 3.4 + */ +U_STABLE void U_EXPORT2 +ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR); + +/** + * Is this Bidi object set to allocate level 0 to block separators so that + * successive paragraphs progress from left to right? + * + * @param pBiDi is a <code>UBiDi</code> object. + * @return TRUE if the Bidi object is set to allocate level 0 to block + * separators. + * + * @see ubidi_orderParagraphsLTR + * @stable ICU 3.4 + */ +U_STABLE UBool U_EXPORT2 +ubidi_isOrderParagraphsLTR(UBiDi *pBiDi); + +/** + * <code>UBiDiReorderingMode</code> values indicate which variant of the Bidi + * algorithm to use. + * + * @see ubidi_setReorderingMode + * @stable ICU 3.6 + */ +typedef enum UBiDiReorderingMode { + /** Regular Logical to Visual Bidi algorithm according to Unicode. + * This is a 0 value. + * @stable ICU 3.6 */ + UBIDI_REORDER_DEFAULT = 0, + /** Logical to Visual algorithm which handles numbers in a way which + * mimics the behavior of Windows XP. + * @stable ICU 3.6 */ + UBIDI_REORDER_NUMBERS_SPECIAL, + /** Logical to Visual algorithm grouping numbers with adjacent R characters + * (reversible algorithm). + * @stable ICU 3.6 */ + UBIDI_REORDER_GROUP_NUMBERS_WITH_R, + /** Reorder runs only to transform a Logical LTR string to the Logical RTL + * string with the same display, or vice-versa.<br> + * If this mode is set together with option + * <code>#UBIDI_OPTION_INSERT_MARKS</code>, some Bidi controls in the source + * text may be removed and other controls may be added to produce the + * minimum combination which has the required display. + * @stable ICU 3.6 */ + UBIDI_REORDER_RUNS_ONLY, + /** Visual to Logical algorithm which handles numbers like L + * (same algorithm as selected by <code>ubidi_setInverse(TRUE)</code>. + * @see ubidi_setInverse + * @stable ICU 3.6 */ + UBIDI_REORDER_INVERSE_NUMBERS_AS_L, + /** Visual to Logical algorithm equivalent to the regular Logical to Visual + * algorithm. + * @stable ICU 3.6 */ + UBIDI_REORDER_INVERSE_LIKE_DIRECT, + /** Inverse Bidi (Visual to Logical) algorithm for the + * <code>UBIDI_REORDER_NUMBERS_SPECIAL</code> Bidi algorithm. + * @stable ICU 3.6 */ + UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL, +#ifndef U_HIDE_DEPRECATED_API + /** + * Number of values for reordering mode. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + UBIDI_REORDER_COUNT +#endif // U_HIDE_DEPRECATED_API +} UBiDiReorderingMode; + +/** + * Modify the operation of the Bidi algorithm such that it implements some + * variant to the basic Bidi algorithm or approximates an "inverse Bidi" + * algorithm, depending on different values of the "reordering mode". + * This function must be called before <code>ubidi_setPara()</code>, and stays + * in effect until called again with a different argument. + * + * <p>The normal operation of the Bidi algorithm as described + * in the Unicode Standard Annex #9 is to take text stored in logical + * (keyboard, typing) order and to determine how to reorder it for visual + * rendering.</p> + * + * <p>With the reordering mode set to a value other than + * <code>#UBIDI_REORDER_DEFAULT</code>, this function changes the behavior of + * some of the subsequent functions in a way such that they implement an + * inverse Bidi algorithm or some other algorithm variants.</p> + * + * <p>Some legacy systems store text in visual order, and for operations + * with standard, Unicode-based algorithms, the text needs to be transformed + * into logical order. This is effectively the inverse algorithm of the + * described Bidi algorithm. Note that there is no standard algorithm for + * this "inverse Bidi", so a number of variants are implemented here.</p> + * + * <p>In other cases, it may be desirable to emulate some variant of the + * Logical to Visual algorithm (e.g. one used in MS Windows), or perform a + * Logical to Logical transformation.</p> + * + * <ul> + * <li>When the reordering mode is set to <code>#UBIDI_REORDER_DEFAULT</code>, + * the standard Bidi Logical to Visual algorithm is applied.</li> + * + * <li>When the reordering mode is set to + * <code>#UBIDI_REORDER_NUMBERS_SPECIAL</code>, + * the algorithm used to perform Bidi transformations when calling + * <code>ubidi_setPara</code> should approximate the algorithm used in + * Microsoft Windows XP rather than strictly conform to the Unicode Bidi + * algorithm. + * <br> + * The differences between the basic algorithm and the algorithm addressed + * by this option are as follows: + * <ul> + * <li>Within text at an even embedding level, the sequence "123AB" + * (where AB represent R or AL letters) is transformed to "123BA" by the + * Unicode algorithm and to "BA123" by the Windows algorithm.</li> + * <li>Arabic-Indic numbers (AN) are handled by the Windows algorithm just + * like regular numbers (EN).</li> + * </ul></li> + * + * <li>When the reordering mode is set to + * <code>#UBIDI_REORDER_GROUP_NUMBERS_WITH_R</code>, + * numbers located between LTR text and RTL text are associated with the RTL + * text. For instance, an LTR paragraph with content "abc 123 DEF" (where + * upper case letters represent RTL characters) will be transformed to + * "abc FED 123" (and not "abc 123 FED"), "DEF 123 abc" will be transformed + * to "123 FED abc" and "123 FED abc" will be transformed to "DEF 123 abc". + * This makes the algorithm reversible and makes it useful when round trip + * (from visual to logical and back to visual) must be achieved without + * adding LRM characters. However, this is a variation from the standard + * Unicode Bidi algorithm.<br> + * The source text should not contain Bidi control characters other than LRM + * or RLM.</li> + * + * <li>When the reordering mode is set to + * <code>#UBIDI_REORDER_RUNS_ONLY</code>, + * a "Logical to Logical" transformation must be performed: + * <ul> + * <li>If the default text level of the source text (argument <code>paraLevel</code> + * in <code>ubidi_setPara</code>) is even, the source text will be handled as + * LTR logical text and will be transformed to the RTL logical text which has + * the same LTR visual display.</li> + * <li>If the default level of the source text is odd, the source text + * will be handled as RTL logical text and will be transformed to the + * LTR logical text which has the same LTR visual display.</li> + * </ul> + * This mode may be needed when logical text which is basically Arabic or + * Hebrew, with possible included numbers or phrases in English, has to be + * displayed as if it had an even embedding level (this can happen if the + * displaying application treats all text as if it was basically LTR). + * <br> + * This mode may also be needed in the reverse case, when logical text which is + * basically English, with possible included phrases in Arabic or Hebrew, has to + * be displayed as if it had an odd embedding level. + * <br> + * Both cases could be handled by adding LRE or RLE at the head of the text, + * if the display subsystem supports these formatting controls. If it does not, + * the problem may be handled by transforming the source text in this mode + * before displaying it, so that it will be displayed properly.<br> + * The source text should not contain Bidi control characters other than LRM + * or RLM.</li> + * + * <li>When the reordering mode is set to + * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>, an "inverse Bidi" algorithm + * is applied. + * Runs of text with numeric characters will be treated like LTR letters and + * may need to be surrounded with LRM characters when they are written in + * reordered sequence (the option <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> can + * be used with function <code>ubidi_writeReordered</code> to this end. This + * mode is equivalent to calling <code>ubidi_setInverse()</code> with + * argument <code>isInverse</code> set to <code>TRUE</code>.</li> + * + * <li>When the reordering mode is set to + * <code>#UBIDI_REORDER_INVERSE_LIKE_DIRECT</code>, the "direct" Logical to Visual + * Bidi algorithm is used as an approximation of an "inverse Bidi" algorithm. + * This mode is similar to mode <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code> + * but is closer to the regular Bidi algorithm. + * <br> + * For example, an LTR paragraph with the content "FED 123 456 CBA" (where + * upper case represents RTL characters) will be transformed to + * "ABC 456 123 DEF", as opposed to "DEF 123 456 ABC" + * with mode <code>UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>.<br> + * When used in conjunction with option + * <code>#UBIDI_OPTION_INSERT_MARKS</code>, this mode generally + * adds Bidi marks to the output significantly more sparingly than mode + * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code> with option + * <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> in calls to + * <code>ubidi_writeReordered</code>.</li> + * + * <li>When the reordering mode is set to + * <code>#UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the Logical to Visual + * Bidi algorithm used in Windows XP is used as an approximation of an "inverse Bidi" algorithm. + * <br> + * For example, an LTR paragraph with the content "abc FED123" (where + * upper case represents RTL characters) will be transformed to "abc 123DEF."</li> + * </ul> + * + * <p>In all the reordering modes specifying an "inverse Bidi" algorithm + * (i.e. those with a name starting with <code>UBIDI_REORDER_INVERSE</code>), + * output runs should be retrieved using + * <code>ubidi_getVisualRun()</code>, and the output text with + * <code>ubidi_writeReordered()</code>. The caller should keep in mind that in + * "inverse Bidi" modes the input is actually visually ordered text and + * reordered output returned by <code>ubidi_getVisualRun()</code> or + * <code>ubidi_writeReordered()</code> are actually runs or character string + * of logically ordered output.<br> + * For all the "inverse Bidi" modes, the source text should not contain + * Bidi control characters other than LRM or RLM.</p> + * + * <p>Note that option <code>#UBIDI_OUTPUT_REVERSE</code> of + * <code>ubidi_writeReordered</code> has no useful meaning and should not be + * used in conjunction with any value of the reordering mode specifying + * "inverse Bidi" or with value <code>UBIDI_REORDER_RUNS_ONLY</code>. + * + * @param pBiDi is a <code>UBiDi</code> object. + * @param reorderingMode specifies the required variant of the Bidi algorithm. + * + * @see UBiDiReorderingMode + * @see ubidi_setInverse + * @see ubidi_setPara + * @see ubidi_writeReordered + * @stable ICU 3.6 + */ +U_STABLE void U_EXPORT2 +ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode); + +/** + * What is the requested reordering mode for a given Bidi object? + * + * @param pBiDi is a <code>UBiDi</code> object. + * @return the current reordering mode of the Bidi object + * @see ubidi_setReorderingMode + * @stable ICU 3.6 + */ +U_STABLE UBiDiReorderingMode U_EXPORT2 +ubidi_getReorderingMode(UBiDi *pBiDi); + +/** + * <code>UBiDiReorderingOption</code> values indicate which options are + * specified to affect the Bidi algorithm. + * + * @see ubidi_setReorderingOptions + * @stable ICU 3.6 + */ +typedef enum UBiDiReorderingOption { + /** + * option value for <code>ubidi_setReorderingOptions</code>: + * disable all the options which can be set with this function + * @see ubidi_setReorderingOptions + * @stable ICU 3.6 + */ + UBIDI_OPTION_DEFAULT = 0, + + /** + * option bit for <code>ubidi_setReorderingOptions</code>: + * insert Bidi marks (LRM or RLM) when needed to ensure correct result of + * a reordering to a Logical order + * + * <p>This option must be set or reset before calling + * <code>ubidi_setPara</code>.</p> + * + * <p>This option is significant only with reordering modes which generate + * a result with Logical order, specifically:</p> + * <ul> + * <li><code>#UBIDI_REORDER_RUNS_ONLY</code></li> + * <li><code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code></li> + * <li><code>#UBIDI_REORDER_INVERSE_LIKE_DIRECT</code></li> + * <li><code>#UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code></li> + * </ul> + * + * <p>If this option is set in conjunction with reordering mode + * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code> or with calling + * <code>ubidi_setInverse(TRUE)</code>, it implies + * option <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> + * in calls to function <code>ubidi_writeReordered()</code>.</p> + * + * <p>For other reordering modes, a minimum number of LRM or RLM characters + * will be added to the source text after reordering it so as to ensure + * round trip, i.e. when applying the inverse reordering mode on the + * resulting logical text with removal of Bidi marks + * (option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code> set before calling + * <code>ubidi_setPara()</code> or option <code>#UBIDI_REMOVE_BIDI_CONTROLS</code> + * in <code>ubidi_writeReordered</code>), the result will be identical to the + * source text in the first transformation. + * + * <p>This option will be ignored if specified together with option + * <code>#UBIDI_OPTION_REMOVE_CONTROLS</code>. It inhibits option + * <code>UBIDI_REMOVE_BIDI_CONTROLS</code> in calls to function + * <code>ubidi_writeReordered()</code> and it implies option + * <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> in calls to function + * <code>ubidi_writeReordered()</code> if the reordering mode is + * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>.</p> + * + * @see ubidi_setReorderingMode + * @see ubidi_setReorderingOptions + * @stable ICU 3.6 + */ + UBIDI_OPTION_INSERT_MARKS = 1, + + /** + * option bit for <code>ubidi_setReorderingOptions</code>: + * remove Bidi control characters + * + * <p>This option must be set or reset before calling + * <code>ubidi_setPara</code>.</p> + * + * <p>This option nullifies option <code>#UBIDI_OPTION_INSERT_MARKS</code>. + * It inhibits option <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> in calls + * to function <code>ubidi_writeReordered()</code> and it implies option + * <code>#UBIDI_REMOVE_BIDI_CONTROLS</code> in calls to that function.</p> + * + * @see ubidi_setReorderingMode + * @see ubidi_setReorderingOptions + * @stable ICU 3.6 + */ + UBIDI_OPTION_REMOVE_CONTROLS = 2, + + /** + * option bit for <code>ubidi_setReorderingOptions</code>: + * process the output as part of a stream to be continued + * + * <p>This option must be set or reset before calling + * <code>ubidi_setPara</code>.</p> + * + * <p>This option specifies that the caller is interested in processing large + * text object in parts. + * The results of the successive calls are expected to be concatenated by the + * caller. Only the call for the last part will have this option bit off.</p> + * + * <p>When this option bit is on, <code>ubidi_setPara()</code> may process + * less than the full source text in order to truncate the text at a meaningful + * boundary. The caller should call <code>ubidi_getProcessedLength()</code> + * immediately after calling <code>ubidi_setPara()</code> in order to + * determine how much of the source text has been processed. + * Source text beyond that length should be resubmitted in following calls to + * <code>ubidi_setPara</code>. The processed length may be less than + * the length of the source text if a character preceding the last character of + * the source text constitutes a reasonable boundary (like a block separator) + * for text to be continued.<br> + * If the last character of the source text constitutes a reasonable + * boundary, the whole text will be processed at once.<br> + * If nowhere in the source text there exists + * such a reasonable boundary, the processed length will be zero.<br> + * The caller should check for such an occurrence and do one of the following: + * <ul><li>submit a larger amount of text with a better chance to include + * a reasonable boundary.</li> + * <li>resubmit the same text after turning off option + * <code>UBIDI_OPTION_STREAMING</code>.</li></ul> + * In all cases, this option should be turned off before processing the last + * part of the text.</p> + * + * <p>When the <code>UBIDI_OPTION_STREAMING</code> option is used, + * it is recommended to call <code>ubidi_orderParagraphsLTR()</code> with + * argument <code>orderParagraphsLTR</code> set to <code>TRUE</code> before + * calling <code>ubidi_setPara</code> so that later paragraphs may be + * concatenated to previous paragraphs on the right.</p> + * + * @see ubidi_setReorderingMode + * @see ubidi_setReorderingOptions + * @see ubidi_getProcessedLength + * @see ubidi_orderParagraphsLTR + * @stable ICU 3.6 + */ + UBIDI_OPTION_STREAMING = 4 +} UBiDiReorderingOption; + +/** + * Specify which of the reordering options + * should be applied during Bidi transformations. + * + * @param pBiDi is a <code>UBiDi</code> object. + * @param reorderingOptions is a combination of zero or more of the following + * options: + * <code>#UBIDI_OPTION_DEFAULT</code>, <code>#UBIDI_OPTION_INSERT_MARKS</code>, + * <code>#UBIDI_OPTION_REMOVE_CONTROLS</code>, <code>#UBIDI_OPTION_STREAMING</code>. + * + * @see ubidi_getReorderingOptions + * @stable ICU 3.6 + */ +U_STABLE void U_EXPORT2 +ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions); + +/** + * What are the reordering options applied to a given Bidi object? + * + * @param pBiDi is a <code>UBiDi</code> object. + * @return the current reordering options of the Bidi object + * @see ubidi_setReorderingOptions + * @stable ICU 3.6 + */ +U_STABLE uint32_t U_EXPORT2 +ubidi_getReorderingOptions(UBiDi *pBiDi); + +/** + * Set the context before a call to ubidi_setPara().<p> + * + * ubidi_setPara() computes the left-right directionality for a given piece + * of text which is supplied as one of its arguments. Sometimes this piece + * of text (the "main text") should be considered in context, because text + * appearing before ("prologue") and/or after ("epilogue") the main text + * may affect the result of this computation.<p> + * + * This function specifies the prologue and/or the epilogue for the next + * call to ubidi_setPara(). The characters specified as prologue and + * epilogue should not be modified by the calling program until the call + * to ubidi_setPara() has returned. If successive calls to ubidi_setPara() + * all need specification of a context, ubidi_setContext() must be called + * before each call to ubidi_setPara(). In other words, a context is not + * "remembered" after the following successful call to ubidi_setPara().<p> + * + * If a call to ubidi_setPara() specifies UBIDI_DEFAULT_LTR or + * UBIDI_DEFAULT_RTL as paraLevel and is preceded by a call to + * ubidi_setContext() which specifies a prologue, the paragraph level will + * be computed taking in consideration the text in the prologue.<p> + * + * When ubidi_setPara() is called without a previous call to + * ubidi_setContext, the main text is handled as if preceded and followed + * by strong directional characters at the current paragraph level. + * Calling ubidi_setContext() with specification of a prologue will change + * this behavior by handling the main text as if preceded by the last + * strong character appearing in the prologue, if any. + * Calling ubidi_setContext() with specification of an epilogue will change + * the behavior of ubidi_setPara() by handling the main text as if followed + * by the first strong character or digit appearing in the epilogue, if any.<p> + * + * Note 1: if <code>ubidi_setContext</code> is called repeatedly without + * calling <code>ubidi_setPara</code>, the earlier calls have no effect, + * only the last call will be remembered for the next call to + * <code>ubidi_setPara</code>.<p> + * + * Note 2: calling <code>ubidi_setContext(pBiDi, NULL, 0, NULL, 0, &errorCode)</code> + * cancels any previous setting of non-empty prologue or epilogue. + * The next call to <code>ubidi_setPara()</code> will process no + * prologue or epilogue.<p> + * + * Note 3: users must be aware that even after setting the context + * before a call to ubidi_setPara() to perform e.g. a logical to visual + * transformation, the resulting string may not be identical to what it + * would have been if all the text, including prologue and epilogue, had + * been processed together.<br> + * Example (upper case letters represent RTL characters):<br> + * prologue = "<code>abc DE</code>"<br> + * epilogue = none<br> + * main text = "<code>FGH xyz</code>"<br> + * paraLevel = UBIDI_LTR<br> + * display without prologue = "<code>HGF xyz</code>" + * ("HGF" is adjacent to "xyz")<br> + * display with prologue = "<code>abc HGFED xyz</code>" + * ("HGF" is not adjacent to "xyz")<br> + * + * @param pBiDi is a paragraph <code>UBiDi</code> object. + * + * @param prologue is a pointer to the text which precedes the text that + * will be specified in a coming call to ubidi_setPara(). + * If there is no prologue to consider, then <code>proLength</code> + * must be zero and this pointer can be NULL. + * + * @param proLength is the length of the prologue; if <code>proLength==-1</code> + * then the prologue must be zero-terminated. + * Otherwise proLength must be >= 0. If <code>proLength==0</code>, it means + * that there is no prologue to consider. + * + * @param epilogue is a pointer to the text which follows the text that + * will be specified in a coming call to ubidi_setPara(). + * If there is no epilogue to consider, then <code>epiLength</code> + * must be zero and this pointer can be NULL. + * + * @param epiLength is the length of the epilogue; if <code>epiLength==-1</code> + * then the epilogue must be zero-terminated. + * Otherwise epiLength must be >= 0. If <code>epiLength==0</code>, it means + * that there is no epilogue to consider. + * + * @param pErrorCode must be a valid pointer to an error code value. + * + * @see ubidi_setPara + * @stable ICU 4.8 + */ +U_STABLE void U_EXPORT2 +ubidi_setContext(UBiDi *pBiDi, + const UChar *prologue, int32_t proLength, + const UChar *epilogue, int32_t epiLength, + UErrorCode *pErrorCode); + +/** + * Perform the Unicode Bidi algorithm. It is defined in the + * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>, + * version 13, + * also described in The Unicode Standard, Version 4.0 .<p> + * + * This function takes a piece of plain text containing one or more paragraphs, + * with or without externally specified embedding levels from <i>styled</i> + * text and computes the left-right-directionality of each character.<p> + * + * If the entire text is all of the same directionality, then + * the function may not perform all the steps described by the algorithm, + * i.e., some levels may not be the same as if all steps were performed. + * This is not relevant for unidirectional text.<br> + * For example, in pure LTR text with numbers the numbers would get + * a resolved level of 2 higher than the surrounding text according to + * the algorithm. This implementation may set all resolved levels to + * the same value in such a case.<p> + * + * The text can be composed of multiple paragraphs. Occurrence of a block + * separator in the text terminates a paragraph, and whatever comes next starts + * a new paragraph. The exception to this rule is when a Carriage Return (CR) + * is followed by a Line Feed (LF). Both CR and LF are block separators, but + * in that case, the pair of characters is considered as terminating the + * preceding paragraph, and a new paragraph will be started by a character + * coming after the LF. + * + * @param pBiDi A <code>UBiDi</code> object allocated with <code>ubidi_open()</code> + * which will be set to contain the reordering information, + * especially the resolved levels for all the characters in <code>text</code>. + * + * @param text is a pointer to the text that the Bidi algorithm will be performed on. + * This pointer is stored in the UBiDi object and can be retrieved + * with <code>ubidi_getText()</code>.<br> + * <strong>Note:</strong> the text must be (at least) <code>length</code> long. + * + * @param length is the length of the text; if <code>length==-1</code> then + * the text must be zero-terminated. + * + * @param paraLevel specifies the default level for the text; + * it is typically 0 (LTR) or 1 (RTL). + * If the function shall determine the paragraph level from the text, + * then <code>paraLevel</code> can be set to + * either <code>#UBIDI_DEFAULT_LTR</code> + * or <code>#UBIDI_DEFAULT_RTL</code>; if the text contains multiple + * paragraphs, the paragraph level shall be determined separately for + * each paragraph; if a paragraph does not include any strongly typed + * character, then the desired default is used (0 for LTR or 1 for RTL). + * Any other value between 0 and <code>#UBIDI_MAX_EXPLICIT_LEVEL</code> + * is also valid, with odd levels indicating RTL. + * + * @param embeddingLevels (in) may be used to preset the embedding and override levels, + * ignoring characters like LRE and PDF in the text. + * A level overrides the directional property of its corresponding + * (same index) character if the level has the + * <code>#UBIDI_LEVEL_OVERRIDE</code> bit set.<br><br> + * Aside from that bit, it must be + * <code>paraLevel<=embeddingLevels[]<=UBIDI_MAX_EXPLICIT_LEVEL</code>, + * except that level 0 is always allowed. + * Level 0 for a paragraph separator prevents reordering of paragraphs; + * this only works reliably if <code>#UBIDI_LEVEL_OVERRIDE</code> + * is also set for paragraph separators. + * Level 0 for other characters is treated as a wildcard + * and is lifted up to the resolved level of the surrounding paragraph.<br><br> + * <strong>Caution: </strong>A copy of this pointer, not of the levels, + * will be stored in the <code>UBiDi</code> object; + * the <code>embeddingLevels</code> array must not be + * deallocated before the <code>UBiDi</code> structure is destroyed or reused, + * and the <code>embeddingLevels</code> + * should not be modified to avoid unexpected results on subsequent Bidi operations. + * However, the <code>ubidi_setPara()</code> and + * <code>ubidi_setLine()</code> functions may modify some or all of the levels.<br><br> + * After the <code>UBiDi</code> object is reused or destroyed, the caller + * must take care of the deallocation of the <code>embeddingLevels</code> array.<br><br> + * <strong>Note:</strong> the <code>embeddingLevels</code> array must be + * at least <code>length</code> long. + * This pointer can be <code>NULL</code> if this + * value is not necessary. + * + * @param pErrorCode must be a valid pointer to an error code value. + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length, + UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels, + UErrorCode *pErrorCode); + +/** + * <code>ubidi_setLine()</code> sets a <code>UBiDi</code> to + * contain the reordering information, especially the resolved levels, + * for all the characters in a line of text. This line of text is + * specified by referring to a <code>UBiDi</code> object representing + * this information for a piece of text containing one or more paragraphs, + * and by specifying a range of indexes in this text.<p> + * In the new line object, the indexes will range from 0 to <code>limit-start-1</code>.<p> + * + * This is used after calling <code>ubidi_setPara()</code> + * for a piece of text, and after line-breaking on that text. + * It is not necessary if each paragraph is treated as a single line.<p> + * + * After line-breaking, rules (L1) and (L2) for the treatment of + * trailing WS and for reordering are performed on + * a <code>UBiDi</code> object that represents a line.<p> + * + * <strong>Important: </strong><code>pLineBiDi</code> shares data with + * <code>pParaBiDi</code>. + * You must destroy or reuse <code>pLineBiDi</code> before <code>pParaBiDi</code>. + * In other words, you must destroy or reuse the <code>UBiDi</code> object for a line + * before the object for its parent paragraph.<p> + * + * The text pointer that was stored in <code>pParaBiDi</code> is also copied, + * and <code>start</code> is added to it so that it points to the beginning of the + * line for this object. + * + * @param pParaBiDi is the parent paragraph object. It must have been set + * by a successful call to ubidi_setPara. + * + * @param start is the line's first index into the text. + * + * @param limit is just behind the line's last index into the text + * (its last index +1).<br> + * It must be <code>0<=start<limit<=</code>containing paragraph limit. + * If the specified line crosses a paragraph boundary, the function + * will terminate with error code U_ILLEGAL_ARGUMENT_ERROR. + * + * @param pLineBiDi is the object that will now represent a line of the text. + * + * @param pErrorCode must be a valid pointer to an error code value. + * + * @see ubidi_setPara + * @see ubidi_getProcessedLength + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ubidi_setLine(const UBiDi *pParaBiDi, + int32_t start, int32_t limit, + UBiDi *pLineBiDi, + UErrorCode *pErrorCode); + +/** + * Get the directionality of the text. + * + * @param pBiDi is the paragraph or line <code>UBiDi</code> object. + * + * @return a value of <code>UBIDI_LTR</code>, <code>UBIDI_RTL</code> + * or <code>UBIDI_MIXED</code> + * that indicates if the entire text + * represented by this object is unidirectional, + * and which direction, or if it is mixed-directional. + * Note - The value <code>UBIDI_NEUTRAL</code> is never returned from this method. + * + * @see UBiDiDirection + * @stable ICU 2.0 + */ +U_STABLE UBiDiDirection U_EXPORT2 +ubidi_getDirection(const UBiDi *pBiDi); + +/** + * Gets the base direction of the text provided according + * to the Unicode Bidirectional Algorithm. The base direction + * is derived from the first character in the string with bidirectional + * character type L, R, or AL. If the first such character has type L, + * <code>UBIDI_LTR</code> is returned. If the first such character has + * type R or AL, <code>UBIDI_RTL</code> is returned. If the string does + * not contain any character of these types, then + * <code>UBIDI_NEUTRAL</code> is returned. + * + * This is a lightweight function for use when only the base direction + * is needed and no further bidi processing of the text is needed. + * + * @param text is a pointer to the text whose base + * direction is needed. + * Note: the text must be (at least) @c length long. + * + * @param length is the length of the text; + * if <code>length==-1</code> then the text + * must be zero-terminated. + * + * @return <code>UBIDI_LTR</code>, <code>UBIDI_RTL</code>, + * <code>UBIDI_NEUTRAL</code> + * + * @see UBiDiDirection + * @stable ICU 4.6 + */ +U_STABLE UBiDiDirection U_EXPORT2 +ubidi_getBaseDirection(const UChar *text, int32_t length ); + +/** + * Get the pointer to the text. + * + * @param pBiDi is the paragraph or line <code>UBiDi</code> object. + * + * @return The pointer to the text that the UBiDi object was created for. + * + * @see ubidi_setPara + * @see ubidi_setLine + * @stable ICU 2.0 + */ +U_STABLE const UChar * U_EXPORT2 +ubidi_getText(const UBiDi *pBiDi); + +/** + * Get the length of the text. + * + * @param pBiDi is the paragraph or line <code>UBiDi</code> object. + * + * @return The length of the text that the UBiDi object was created for. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +ubidi_getLength(const UBiDi *pBiDi); + +/** + * Get the paragraph level of the text. + * + * @param pBiDi is the paragraph or line <code>UBiDi</code> object. + * + * @return The paragraph level. If there are multiple paragraphs, their + * level may vary if the required paraLevel is UBIDI_DEFAULT_LTR or + * UBIDI_DEFAULT_RTL. In that case, the level of the first paragraph + * is returned. + * + * @see UBiDiLevel + * @see ubidi_getParagraph + * @see ubidi_getParagraphByIndex + * @stable ICU 2.0 + */ +U_STABLE UBiDiLevel U_EXPORT2 +ubidi_getParaLevel(const UBiDi *pBiDi); + +/** + * Get the number of paragraphs. + * + * @param pBiDi is the paragraph or line <code>UBiDi</code> object. + * + * @return The number of paragraphs. + * @stable ICU 3.4 + */ +U_STABLE int32_t U_EXPORT2 +ubidi_countParagraphs(UBiDi *pBiDi); + +/** + * Get a paragraph, given a position within the text. + * This function returns information about a paragraph.<br> + * Note: if the paragraph index is known, it is more efficient to + * retrieve the paragraph information using ubidi_getParagraphByIndex().<p> + * + * @param pBiDi is the paragraph or line <code>UBiDi</code> object. + * + * @param charIndex is the index of a character within the text, in the + * range <code>[0..ubidi_getProcessedLength(pBiDi)-1]</code>. + * + * @param pParaStart will receive the index of the first character of the + * paragraph in the text. + * This pointer can be <code>NULL</code> if this + * value is not necessary. + * + * @param pParaLimit will receive the limit of the paragraph. + * The l-value that you point to here may be the + * same expression (variable) as the one for + * <code>charIndex</code>. + * This pointer can be <code>NULL</code> if this + * value is not necessary. + * + * @param pParaLevel will receive the level of the paragraph. + * This pointer can be <code>NULL</code> if this + * value is not necessary. + * + * @param pErrorCode must be a valid pointer to an error code value. + * + * @return The index of the paragraph containing the specified position. + * + * @see ubidi_getProcessedLength + * @stable ICU 3.4 + */ +U_STABLE int32_t U_EXPORT2 +ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex, int32_t *pParaStart, + int32_t *pParaLimit, UBiDiLevel *pParaLevel, + UErrorCode *pErrorCode); + +/** + * Get a paragraph, given the index of this paragraph. + * + * This function returns information about a paragraph.<p> + * + * @param pBiDi is the paragraph <code>UBiDi</code> object. + * + * @param paraIndex is the number of the paragraph, in the + * range <code>[0..ubidi_countParagraphs(pBiDi)-1]</code>. + * + * @param pParaStart will receive the index of the first character of the + * paragraph in the text. + * This pointer can be <code>NULL</code> if this + * value is not necessary. + * + * @param pParaLimit will receive the limit of the paragraph. + * This pointer can be <code>NULL</code> if this + * value is not necessary. + * + * @param pParaLevel will receive the level of the paragraph. + * This pointer can be <code>NULL</code> if this + * value is not necessary. + * + * @param pErrorCode must be a valid pointer to an error code value. + * + * @stable ICU 3.4 + */ +U_STABLE void U_EXPORT2 +ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex, + int32_t *pParaStart, int32_t *pParaLimit, + UBiDiLevel *pParaLevel, UErrorCode *pErrorCode); + +/** + * Get the level for one character. + * + * @param pBiDi is the paragraph or line <code>UBiDi</code> object. + * + * @param charIndex the index of a character. It must be in the range + * [0..ubidi_getProcessedLength(pBiDi)]. + * + * @return The level for the character at charIndex (0 if charIndex is not + * in the valid range). + * + * @see UBiDiLevel + * @see ubidi_getProcessedLength + * @stable ICU 2.0 + */ +U_STABLE UBiDiLevel U_EXPORT2 +ubidi_getLevelAt(const UBiDi *pBiDi, int32_t charIndex); + +/** + * Get an array of levels for each character.<p> + * + * Note that this function may allocate memory under some + * circumstances, unlike <code>ubidi_getLevelAt()</code>. + * + * @param pBiDi is the paragraph or line <code>UBiDi</code> object, whose + * text length must be strictly positive. + * + * @param pErrorCode must be a valid pointer to an error code value. + * + * @return The levels array for the text, + * or <code>NULL</code> if an error occurs. + * + * @see UBiDiLevel + * @see ubidi_getProcessedLength + * @stable ICU 2.0 + */ +U_STABLE const UBiDiLevel * U_EXPORT2 +ubidi_getLevels(UBiDi *pBiDi, UErrorCode *pErrorCode); + +/** + * Get a logical run. + * This function returns information about a run and is used + * to retrieve runs in logical order.<p> + * This is especially useful for line-breaking on a paragraph. + * + * @param pBiDi is the paragraph or line <code>UBiDi</code> object. + * + * @param logicalPosition is a logical position within the source text. + * + * @param pLogicalLimit will receive the limit of the corresponding run. + * The l-value that you point to here may be the + * same expression (variable) as the one for + * <code>logicalPosition</code>. + * This pointer can be <code>NULL</code> if this + * value is not necessary. + * + * @param pLevel will receive the level of the corresponding run. + * This pointer can be <code>NULL</code> if this + * value is not necessary. + * + * @see ubidi_getProcessedLength + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ubidi_getLogicalRun(const UBiDi *pBiDi, int32_t logicalPosition, + int32_t *pLogicalLimit, UBiDiLevel *pLevel); + +/** + * Get the number of runs. + * This function may invoke the actual reordering on the + * <code>UBiDi</code> object, after <code>ubidi_setPara()</code> + * may have resolved only the levels of the text. Therefore, + * <code>ubidi_countRuns()</code> may have to allocate memory, + * and may fail doing so. + * + * @param pBiDi is the paragraph or line <code>UBiDi</code> object. + * + * @param pErrorCode must be a valid pointer to an error code value. + * + * @return The number of runs. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +ubidi_countRuns(UBiDi *pBiDi, UErrorCode *pErrorCode); + +/** + * Get one run's logical start, length, and directionality, + * which can be 0 for LTR or 1 for RTL. + * In an RTL run, the character at the logical start is + * visually on the right of the displayed run. + * The length is the number of characters in the run.<p> + * <code>ubidi_countRuns()</code> should be called + * before the runs are retrieved. + * + * @param pBiDi is the paragraph or line <code>UBiDi</code> object. + * + * @param runIndex is the number of the run in visual order, in the + * range <code>[0..ubidi_countRuns(pBiDi)-1]</code>. + * + * @param pLogicalStart is the first logical character index in the text. + * The pointer may be <code>NULL</code> if this index is not needed. + * + * @param pLength is the number of characters (at least one) in the run. + * The pointer may be <code>NULL</code> if this is not needed. + * + * @return the directionality of the run, + * <code>UBIDI_LTR==0</code> or <code>UBIDI_RTL==1</code>, + * never <code>UBIDI_MIXED</code>, + * never <code>UBIDI_NEUTRAL</code>. + * + * @see ubidi_countRuns + * + * Example: + * <pre> + * \code + * int32_t i, count=ubidi_countRuns(pBiDi), + * logicalStart, visualIndex=0, length; + * for(i=0; i<count; ++i) { + * if(UBIDI_LTR==ubidi_getVisualRun(pBiDi, i, &logicalStart, &length)) { + * do { // LTR + * show_char(text[logicalStart++], visualIndex++); + * } while(--length>0); + * } else { + * logicalStart+=length; // logicalLimit + * do { // RTL + * show_char(text[--logicalStart], visualIndex++); + * } while(--length>0); + * } + * } + *\endcode + * </pre> + * + * Note that in right-to-left runs, code like this places + * second surrogates before first ones (which is generally a bad idea) + * and combining characters before base characters. + * <p> + * Use of <code>ubidi_writeReordered()</code>, optionally with the + * <code>#UBIDI_KEEP_BASE_COMBINING</code> option, can be considered in order + * to avoid these issues. + * @stable ICU 2.0 + */ +U_STABLE UBiDiDirection U_EXPORT2 +ubidi_getVisualRun(UBiDi *pBiDi, int32_t runIndex, + int32_t *pLogicalStart, int32_t *pLength); + +/** + * Get the visual position from a logical text position. + * If such a mapping is used many times on the same + * <code>UBiDi</code> object, then calling + * <code>ubidi_getLogicalMap()</code> is more efficient.<p> + * + * The value returned may be <code>#UBIDI_MAP_NOWHERE</code> if there is no + * visual position because the corresponding text character is a Bidi control + * removed from output by the option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code>. + * <p> + * When the visual output is altered by using options of + * <code>ubidi_writeReordered()</code> such as <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>, + * <code>UBIDI_KEEP_BASE_COMBINING</code>, <code>UBIDI_OUTPUT_REVERSE</code>, + * <code>UBIDI_REMOVE_BIDI_CONTROLS</code>, the visual position returned may not + * be correct. It is advised to use, when possible, reordering options + * such as <code>UBIDI_OPTION_INSERT_MARKS</code> and <code>UBIDI_OPTION_REMOVE_CONTROLS</code>. + * <p> + * Note that in right-to-left runs, this mapping places + * second surrogates before first ones (which is generally a bad idea) + * and combining characters before base characters. + * Use of <code>ubidi_writeReordered()</code>, optionally with the + * <code>#UBIDI_KEEP_BASE_COMBINING</code> option can be considered instead + * of using the mapping, in order to avoid these issues. + * + * @param pBiDi is the paragraph or line <code>UBiDi</code> object. + * + * @param logicalIndex is the index of a character in the text. + * + * @param pErrorCode must be a valid pointer to an error code value. + * + * @return The visual position of this character. + * + * @see ubidi_getLogicalMap + * @see ubidi_getLogicalIndex + * @see ubidi_getProcessedLength + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +ubidi_getVisualIndex(UBiDi *pBiDi, int32_t logicalIndex, UErrorCode *pErrorCode); + +/** + * Get the logical text position from a visual position. + * If such a mapping is used many times on the same + * <code>UBiDi</code> object, then calling + * <code>ubidi_getVisualMap()</code> is more efficient.<p> + * + * The value returned may be <code>#UBIDI_MAP_NOWHERE</code> if there is no + * logical position because the corresponding text character is a Bidi mark + * inserted in the output by option <code>#UBIDI_OPTION_INSERT_MARKS</code>. + * <p> + * This is the inverse function to <code>ubidi_getVisualIndex()</code>. + * <p> + * When the visual output is altered by using options of + * <code>ubidi_writeReordered()</code> such as <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>, + * <code>UBIDI_KEEP_BASE_COMBINING</code>, <code>UBIDI_OUTPUT_REVERSE</code>, + * <code>UBIDI_REMOVE_BIDI_CONTROLS</code>, the logical position returned may not + * be correct. It is advised to use, when possible, reordering options + * such as <code>UBIDI_OPTION_INSERT_MARKS</code> and <code>UBIDI_OPTION_REMOVE_CONTROLS</code>. + * + * @param pBiDi is the paragraph or line <code>UBiDi</code> object. + * + * @param visualIndex is the visual position of a character. + * + * @param pErrorCode must be a valid pointer to an error code value. + * + * @return The index of this character in the text. + * + * @see ubidi_getVisualMap + * @see ubidi_getVisualIndex + * @see ubidi_getResultLength + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +ubidi_getLogicalIndex(UBiDi *pBiDi, int32_t visualIndex, UErrorCode *pErrorCode); + +/** + * Get a logical-to-visual index map (array) for the characters in the UBiDi + * (paragraph or line) object. + * <p> + * Some values in the map may be <code>#UBIDI_MAP_NOWHERE</code> if the + * corresponding text characters are Bidi controls removed from the visual + * output by the option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code>. + * <p> + * When the visual output is altered by using options of + * <code>ubidi_writeReordered()</code> such as <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>, + * <code>UBIDI_KEEP_BASE_COMBINING</code>, <code>UBIDI_OUTPUT_REVERSE</code>, + * <code>UBIDI_REMOVE_BIDI_CONTROLS</code>, the visual positions returned may not + * be correct. It is advised to use, when possible, reordering options + * such as <code>UBIDI_OPTION_INSERT_MARKS</code> and <code>UBIDI_OPTION_REMOVE_CONTROLS</code>. + * <p> + * Note that in right-to-left runs, this mapping places + * second surrogates before first ones (which is generally a bad idea) + * and combining characters before base characters. + * Use of <code>ubidi_writeReordered()</code>, optionally with the + * <code>#UBIDI_KEEP_BASE_COMBINING</code> option can be considered instead + * of using the mapping, in order to avoid these issues. + * + * @param pBiDi is the paragraph or line <code>UBiDi</code> object. + * + * @param indexMap is a pointer to an array of <code>ubidi_getProcessedLength()</code> + * indexes which will reflect the reordering of the characters. + * If option <code>#UBIDI_OPTION_INSERT_MARKS</code> is set, the number + * of elements allocated in <code>indexMap</code> must be no less than + * <code>ubidi_getResultLength()</code>. + * The array does not need to be initialized.<br><br> + * The index map will result in <code>indexMap[logicalIndex]==visualIndex</code>. + * + * @param pErrorCode must be a valid pointer to an error code value. + * + * @see ubidi_getVisualMap + * @see ubidi_getVisualIndex + * @see ubidi_getProcessedLength + * @see ubidi_getResultLength + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ubidi_getLogicalMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode); + +/** + * Get a visual-to-logical index map (array) for the characters in the UBiDi + * (paragraph or line) object. + * <p> + * Some values in the map may be <code>#UBIDI_MAP_NOWHERE</code> if the + * corresponding text characters are Bidi marks inserted in the visual output + * by the option <code>#UBIDI_OPTION_INSERT_MARKS</code>. + * <p> + * When the visual output is altered by using options of + * <code>ubidi_writeReordered()</code> such as <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>, + * <code>UBIDI_KEEP_BASE_COMBINING</code>, <code>UBIDI_OUTPUT_REVERSE</code>, + * <code>UBIDI_REMOVE_BIDI_CONTROLS</code>, the logical positions returned may not + * be correct. It is advised to use, when possible, reordering options + * such as <code>UBIDI_OPTION_INSERT_MARKS</code> and <code>UBIDI_OPTION_REMOVE_CONTROLS</code>. + * + * @param pBiDi is the paragraph or line <code>UBiDi</code> object. + * + * @param indexMap is a pointer to an array of <code>ubidi_getResultLength()</code> + * indexes which will reflect the reordering of the characters. + * If option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code> is set, the number + * of elements allocated in <code>indexMap</code> must be no less than + * <code>ubidi_getProcessedLength()</code>. + * The array does not need to be initialized.<br><br> + * The index map will result in <code>indexMap[visualIndex]==logicalIndex</code>. + * + * @param pErrorCode must be a valid pointer to an error code value. + * + * @see ubidi_getLogicalMap + * @see ubidi_getLogicalIndex + * @see ubidi_getProcessedLength + * @see ubidi_getResultLength + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ubidi_getVisualMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode); + +/** + * This is a convenience function that does not use a UBiDi object. + * It is intended to be used for when an application has determined the levels + * of objects (character sequences) and just needs to have them reordered (L2). + * This is equivalent to using <code>ubidi_getLogicalMap()</code> on a + * <code>UBiDi</code> object. + * + * @param levels is an array with <code>length</code> levels that have been determined by + * the application. + * + * @param length is the number of levels in the array, or, semantically, + * the number of objects to be reordered. + * It must be <code>length>0</code>. + * + * @param indexMap is a pointer to an array of <code>length</code> + * indexes which will reflect the reordering of the characters. + * The array does not need to be initialized.<p> + * The index map will result in <code>indexMap[logicalIndex]==visualIndex</code>. + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ubidi_reorderLogical(const UBiDiLevel *levels, int32_t length, int32_t *indexMap); + +/** + * This is a convenience function that does not use a UBiDi object. + * It is intended to be used for when an application has determined the levels + * of objects (character sequences) and just needs to have them reordered (L2). + * This is equivalent to using <code>ubidi_getVisualMap()</code> on a + * <code>UBiDi</code> object. + * + * @param levels is an array with <code>length</code> levels that have been determined by + * the application. + * + * @param length is the number of levels in the array, or, semantically, + * the number of objects to be reordered. + * It must be <code>length>0</code>. + * + * @param indexMap is a pointer to an array of <code>length</code> + * indexes which will reflect the reordering of the characters. + * The array does not need to be initialized.<p> + * The index map will result in <code>indexMap[visualIndex]==logicalIndex</code>. + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ubidi_reorderVisual(const UBiDiLevel *levels, int32_t length, int32_t *indexMap); + +/** + * Invert an index map. + * The index mapping of the first map is inverted and written to + * the second one. + * + * @param srcMap is an array with <code>length</code> elements + * which defines the original mapping from a source array containing + * <code>length</code> elements to a destination array. + * Some elements of the source array may have no mapping in the + * destination array. In that case, their value will be + * the special value <code>UBIDI_MAP_NOWHERE</code>. + * All elements must be >=0 or equal to <code>UBIDI_MAP_NOWHERE</code>. + * Some elements may have a value >= <code>length</code>, if the + * destination array has more elements than the source array. + * There must be no duplicate indexes (two or more elements with the + * same value except <code>UBIDI_MAP_NOWHERE</code>). + * + * @param destMap is an array with a number of elements equal to 1 + the highest + * value in <code>srcMap</code>. + * <code>destMap</code> will be filled with the inverse mapping. + * If element with index i in <code>srcMap</code> has a value k different + * from <code>UBIDI_MAP_NOWHERE</code>, this means that element i of + * the source array maps to element k in the destination array. + * The inverse map will have value i in its k-th element. + * For all elements of the destination array which do not map to + * an element in the source array, the corresponding element in the + * inverse map will have a value equal to <code>UBIDI_MAP_NOWHERE</code>. + * + * @param length is the length of each array. + * @see UBIDI_MAP_NOWHERE + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +ubidi_invertMap(const int32_t *srcMap, int32_t *destMap, int32_t length); + +/** option flags for ubidi_writeReordered() */ + +/** + * option bit for ubidi_writeReordered(): + * keep combining characters after their base characters in RTL runs + * + * @see ubidi_writeReordered + * @stable ICU 2.0 + */ +#define UBIDI_KEEP_BASE_COMBINING 1 + +/** + * option bit for ubidi_writeReordered(): + * replace characters with the "mirrored" property in RTL runs + * by their mirror-image mappings + * + * @see ubidi_writeReordered + * @stable ICU 2.0 + */ +#define UBIDI_DO_MIRRORING 2 + +/** + * option bit for ubidi_writeReordered(): + * surround the run with LRMs if necessary; + * this is part of the approximate "inverse Bidi" algorithm + * + * <p>This option does not imply corresponding adjustment of the index + * mappings.</p> + * + * @see ubidi_setInverse + * @see ubidi_writeReordered + * @stable ICU 2.0 + */ +#define UBIDI_INSERT_LRM_FOR_NUMERIC 4 + +/** + * option bit for ubidi_writeReordered(): + * remove Bidi control characters + * (this does not affect #UBIDI_INSERT_LRM_FOR_NUMERIC) + * + * <p>This option does not imply corresponding adjustment of the index + * mappings.</p> + * + * @see ubidi_writeReordered + * @stable ICU 2.0 + */ +#define UBIDI_REMOVE_BIDI_CONTROLS 8 + +/** + * option bit for ubidi_writeReordered(): + * write the output in reverse order + * + * <p>This has the same effect as calling <code>ubidi_writeReordered()</code> + * first without this option, and then calling + * <code>ubidi_writeReverse()</code> without mirroring. + * Doing this in the same step is faster and avoids a temporary buffer. + * An example for using this option is output to a character terminal that + * is designed for RTL scripts and stores text in reverse order.</p> + * + * @see ubidi_writeReordered + * @stable ICU 2.0 + */ +#define UBIDI_OUTPUT_REVERSE 16 + +/** + * Get the length of the source text processed by the last call to + * <code>ubidi_setPara()</code>. This length may be different from the length + * of the source text if option <code>#UBIDI_OPTION_STREAMING</code> + * has been set. + * <br> + * Note that whenever the length of the text affects the execution or the + * result of a function, it is the processed length which must be considered, + * except for <code>ubidi_setPara</code> (which receives unprocessed source + * text) and <code>ubidi_getLength</code> (which returns the original length + * of the source text).<br> + * In particular, the processed length is the one to consider in the following + * cases: + * <ul> + * <li>maximum value of the <code>limit</code> argument of + * <code>ubidi_setLine</code></li> + * <li>maximum value of the <code>charIndex</code> argument of + * <code>ubidi_getParagraph</code></li> + * <li>maximum value of the <code>charIndex</code> argument of + * <code>ubidi_getLevelAt</code></li> + * <li>number of elements in the array returned by <code>ubidi_getLevels</code></li> + * <li>maximum value of the <code>logicalStart</code> argument of + * <code>ubidi_getLogicalRun</code></li> + * <li>maximum value of the <code>logicalIndex</code> argument of + * <code>ubidi_getVisualIndex</code></li> + * <li>number of elements filled in the <code>*indexMap</code> argument of + * <code>ubidi_getLogicalMap</code></li> + * <li>length of text processed by <code>ubidi_writeReordered</code></li> + * </ul> + * + * @param pBiDi is the paragraph <code>UBiDi</code> object. + * + * @return The length of the part of the source text processed by + * the last call to <code>ubidi_setPara</code>. + * @see ubidi_setPara + * @see UBIDI_OPTION_STREAMING + * @stable ICU 3.6 + */ +U_STABLE int32_t U_EXPORT2 +ubidi_getProcessedLength(const UBiDi *pBiDi); + +/** + * Get the length of the reordered text resulting from the last call to + * <code>ubidi_setPara()</code>. This length may be different from the length + * of the source text if option <code>#UBIDI_OPTION_INSERT_MARKS</code> + * or option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code> has been set. + * <br> + * This resulting length is the one to consider in the following cases: + * <ul> + * <li>maximum value of the <code>visualIndex</code> argument of + * <code>ubidi_getLogicalIndex</code></li> + * <li>number of elements of the <code>*indexMap</code> argument of + * <code>ubidi_getVisualMap</code></li> + * </ul> + * Note that this length stays identical to the source text length if + * Bidi marks are inserted or removed using option bits of + * <code>ubidi_writeReordered</code>, or if option + * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code> has been set. + * + * @param pBiDi is the paragraph <code>UBiDi</code> object. + * + * @return The length of the reordered text resulting from + * the last call to <code>ubidi_setPara</code>. + * @see ubidi_setPara + * @see UBIDI_OPTION_INSERT_MARKS + * @see UBIDI_OPTION_REMOVE_CONTROLS + * @stable ICU 3.6 + */ +U_STABLE int32_t U_EXPORT2 +ubidi_getResultLength(const UBiDi *pBiDi); + +U_CDECL_BEGIN + +#ifndef U_HIDE_DEPRECATED_API +/** + * Value returned by <code>UBiDiClassCallback</code> callbacks when + * there is no need to override the standard Bidi class for a given code point. + * + * This constant is deprecated; use u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)+1 instead. + * + * @see UBiDiClassCallback + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ +#define U_BIDI_CLASS_DEFAULT U_CHAR_DIRECTION_COUNT +#endif // U_HIDE_DEPRECATED_API + +/** + * Callback type declaration for overriding default Bidi class values with + * custom ones. + * <p>Usually, the function pointer will be propagated to a <code>UBiDi</code> + * object by calling the <code>ubidi_setClassCallback()</code> function; + * then the callback will be invoked by the UBA implementation any time the + * class of a character is to be determined.</p> + * + * @param context is a pointer to the callback private data. + * + * @param c is the code point to get a Bidi class for. + * + * @return The directional property / Bidi class for the given code point + * <code>c</code> if the default class has been overridden, or + * <code>#U_BIDI_CLASS_DEFAULT=u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)+1</code> + * if the standard Bidi class value for <code>c</code> is to be used. + * @see ubidi_setClassCallback + * @see ubidi_getClassCallback + * @stable ICU 3.6 + */ +typedef UCharDirection U_CALLCONV +UBiDiClassCallback(const void *context, UChar32 c); + +U_CDECL_END + +/** + * Retrieve the Bidi class for a given code point. + * <p>If a <code>#UBiDiClassCallback</code> callback is defined and returns a + * value other than <code>#U_BIDI_CLASS_DEFAULT=u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)+1</code>, + * that value is used; otherwise the default class determination mechanism is invoked.</p> + * + * @param pBiDi is the paragraph <code>UBiDi</code> object. + * + * @param c is the code point whose Bidi class must be retrieved. + * + * @return The Bidi class for character <code>c</code> based + * on the given <code>pBiDi</code> instance. + * @see UBiDiClassCallback + * @stable ICU 3.6 + */ +U_STABLE UCharDirection U_EXPORT2 +ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c); + +/** + * Set the callback function and callback data used by the UBA + * implementation for Bidi class determination. + * <p>This may be useful for assigning Bidi classes to PUA characters, or + * for special application needs. For instance, an application may want to + * handle all spaces like L or R characters (according to the base direction) + * when creating the visual ordering of logical lines which are part of a report + * organized in columns: there should not be interaction between adjacent + * cells.<p> + * + * @param pBiDi is the paragraph <code>UBiDi</code> object. + * + * @param newFn is the new callback function pointer. + * + * @param newContext is the new callback context pointer. This can be NULL. + * + * @param oldFn fillin: Returns the old callback function pointer. This can be + * NULL. + * + * @param oldContext fillin: Returns the old callback's context. This can be + * NULL. + * + * @param pErrorCode must be a valid pointer to an error code value. + * + * @see ubidi_getClassCallback + * @stable ICU 3.6 + */ +U_STABLE void U_EXPORT2 +ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn, + const void *newContext, UBiDiClassCallback **oldFn, + const void **oldContext, UErrorCode *pErrorCode); + +/** + * Get the current callback function used for Bidi class determination. + * + * @param pBiDi is the paragraph <code>UBiDi</code> object. + * + * @param fn fillin: Returns the callback function pointer. + * + * @param context fillin: Returns the callback's private context. + * + * @see ubidi_setClassCallback + * @stable ICU 3.6 + */ +U_STABLE void U_EXPORT2 +ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context); + +/** + * Take a <code>UBiDi</code> object containing the reordering + * information for a piece of text (one or more paragraphs) set by + * <code>ubidi_setPara()</code> or for a line of text set by + * <code>ubidi_setLine()</code> and write a reordered string to the + * destination buffer. + * + * This function preserves the integrity of characters with multiple + * code units and (optionally) combining characters. + * Characters in RTL runs can be replaced by mirror-image characters + * in the destination buffer. Note that "real" mirroring has + * to be done in a rendering engine by glyph selection + * and that for many "mirrored" characters there are no + * Unicode characters as mirror-image equivalents. + * There are also options to insert or remove Bidi control + * characters; see the description of the <code>destSize</code> + * and <code>options</code> parameters and of the option bit flags. + * + * @param pBiDi A pointer to a <code>UBiDi</code> object that + * is set by <code>ubidi_setPara()</code> or + * <code>ubidi_setLine()</code> and contains the reordering + * information for the text that it was defined for, + * as well as a pointer to that text.<br><br> + * The text was aliased (only the pointer was stored + * without copying the contents) and must not have been modified + * since the <code>ubidi_setPara()</code> call. + * + * @param dest A pointer to where the reordered text is to be copied. + * The source text and <code>dest[destSize]</code> + * must not overlap. + * + * @param destSize The size of the <code>dest</code> buffer, + * in number of UChars. + * If the <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code> + * option is set, then the destination length could be + * as large as + * <code>ubidi_getLength(pBiDi)+2*ubidi_countRuns(pBiDi)</code>. + * If the <code>UBIDI_REMOVE_BIDI_CONTROLS</code> option + * is set, then the destination length may be less than + * <code>ubidi_getLength(pBiDi)</code>. + * If none of these options is set, then the destination length + * will be exactly <code>ubidi_getProcessedLength(pBiDi)</code>. + * + * @param options A bit set of options for the reordering that control + * how the reordered text is written. + * The options include mirroring the characters on a code + * point basis and inserting LRM characters, which is used + * especially for transforming visually stored text + * to logically stored text (although this is still an + * imperfect implementation of an "inverse Bidi" algorithm + * because it uses the "forward Bidi" algorithm at its core). + * The available options are: + * <code>#UBIDI_DO_MIRRORING</code>, + * <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code>, + * <code>#UBIDI_KEEP_BASE_COMBINING</code>, + * <code>#UBIDI_OUTPUT_REVERSE</code>, + * <code>#UBIDI_REMOVE_BIDI_CONTROLS</code> + * + * @param pErrorCode must be a valid pointer to an error code value. + * + * @return The length of the output string. + * + * @see ubidi_getProcessedLength + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +ubidi_writeReordered(UBiDi *pBiDi, + UChar *dest, int32_t destSize, + uint16_t options, + UErrorCode *pErrorCode); + +/** + * Reverse a Right-To-Left run of Unicode text. + * + * This function preserves the integrity of characters with multiple + * code units and (optionally) combining characters. + * Characters can be replaced by mirror-image characters + * in the destination buffer. Note that "real" mirroring has + * to be done in a rendering engine by glyph selection + * and that for many "mirrored" characters there are no + * Unicode characters as mirror-image equivalents. + * There are also options to insert or remove Bidi control + * characters. + * + * This function is the implementation for reversing RTL runs as part + * of <code>ubidi_writeReordered()</code>. For detailed descriptions + * of the parameters, see there. + * Since no Bidi controls are inserted here, the output string length + * will never exceed <code>srcLength</code>. + * + * @see ubidi_writeReordered + * + * @param src A pointer to the RTL run text. + * + * @param srcLength The length of the RTL run. + * + * @param dest A pointer to where the reordered text is to be copied. + * <code>src[srcLength]</code> and <code>dest[destSize]</code> + * must not overlap. + * + * @param destSize The size of the <code>dest</code> buffer, + * in number of UChars. + * If the <code>UBIDI_REMOVE_BIDI_CONTROLS</code> option + * is set, then the destination length may be less than + * <code>srcLength</code>. + * If this option is not set, then the destination length + * will be exactly <code>srcLength</code>. + * + * @param options A bit set of options for the reordering that control + * how the reordered text is written. + * See the <code>options</code> parameter in <code>ubidi_writeReordered()</code>. + * + * @param pErrorCode must be a valid pointer to an error code value. + * + * @return The length of the output string. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +ubidi_writeReverse(const UChar *src, int32_t srcLength, + UChar *dest, int32_t destSize, + uint16_t options, + UErrorCode *pErrorCode); + +/*#define BIDI_SAMPLE_CODE*/ +/*@}*/ + +#endif diff --git a/vendor/icu/include/unicode/uchar.h b/vendor/icu/include/unicode/uchar.h new file mode 100644 index 0000000000..ad5d2f1c98 --- /dev/null +++ b/vendor/icu/include/unicode/uchar.h @@ -0,0 +1,3731 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 1997-2016, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* +* File UCHAR.H +* +* Modification History: +* +* Date Name Description +* 04/02/97 aliu Creation. +* 03/29/99 helena Updated for C APIs. +* 4/15/99 Madhu Updated for C Implementation and Javadoc +* 5/20/99 Madhu Added the function u_getVersion() +* 8/19/1999 srl Upgraded scripts to Unicode 3.0 +* 8/27/1999 schererm UCharDirection constants: U_... +* 11/11/1999 weiv added u_isalnum(), cleaned comments +* 01/11/2000 helena Renamed u_getVersion to u_getUnicodeVersion(). +****************************************************************************** +*/ + +#ifndef UCHAR_H +#define UCHAR_H + +#include <unicode/utypes.h> +#include <unicode/stringoptions.h> + +U_CDECL_BEGIN + +/*==========================================================================*/ +/* Unicode version number */ +/*==========================================================================*/ +/** + * Unicode version number, default for the current ICU version. + * The actual Unicode Character Database (UCD) data is stored in uprops.dat + * and may be generated from UCD files from a different Unicode version. + * Call u_getUnicodeVersion to get the actual Unicode version of the data. + * + * @see u_getUnicodeVersion + * @stable ICU 2.0 + */ +#define U_UNICODE_VERSION "10.0" + +/** + * \file + * \brief C API: Unicode Properties + * + * This C API provides low-level access to the Unicode Character Database. + * In addition to raw property values, some convenience functions calculate + * derived properties, for example for Java-style programming. + * + * Unicode assigns each code point (not just assigned character) values for + * many properties. + * Most of them are simple boolean flags, or constants from a small enumerated list. + * For some properties, values are strings or other relatively more complex types. + * + * For more information see + * "About the Unicode Character Database" (http://www.unicode.org/ucd/) + * and the ICU User Guide chapter on Properties (http://icu-project.org/userguide/properties.html). + * + * Many functions are designed to match java.lang.Character functions. + * See the individual function documentation, + * and see the JDK 1.4 java.lang.Character documentation + * at http://java.sun.com/j2se/1.4/docs/api/java/lang/Character.html + * + * There are also functions that provide easy migration from C/POSIX functions + * like isblank(). Their use is generally discouraged because the C/POSIX + * standards do not define their semantics beyond the ASCII range, which means + * that different implementations exhibit very different behavior. + * Instead, Unicode properties should be used directly. + * + * There are also only a few, broad C/POSIX character classes, and they tend + * to be used for conflicting purposes. For example, the "isalpha()" class + * is sometimes used to determine word boundaries, while a more sophisticated + * approach would at least distinguish initial letters from continuation + * characters (the latter including combining marks). + * (In ICU, BreakIterator is the most sophisticated API for word boundaries.) + * Another example: There is no "istitle()" class for titlecase characters. + * + * ICU 3.4 and later provides API access for all twelve C/POSIX character classes. + * ICU implements them according to the Standard Recommendations in + * Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions + * (http://www.unicode.org/reports/tr18/#Compatibility_Properties). + * + * API access for C/POSIX character classes is as follows: + * - alpha: u_isUAlphabetic(c) or u_hasBinaryProperty(c, UCHAR_ALPHABETIC) + * - lower: u_isULowercase(c) or u_hasBinaryProperty(c, UCHAR_LOWERCASE) + * - upper: u_isUUppercase(c) or u_hasBinaryProperty(c, UCHAR_UPPERCASE) + * - punct: u_ispunct(c) + * - digit: u_isdigit(c) or u_charType(c)==U_DECIMAL_DIGIT_NUMBER + * - xdigit: u_isxdigit(c) or u_hasBinaryProperty(c, UCHAR_POSIX_XDIGIT) + * - alnum: u_hasBinaryProperty(c, UCHAR_POSIX_ALNUM) + * - space: u_isUWhiteSpace(c) or u_hasBinaryProperty(c, UCHAR_WHITE_SPACE) + * - blank: u_isblank(c) or u_hasBinaryProperty(c, UCHAR_POSIX_BLANK) + * - cntrl: u_charType(c)==U_CONTROL_CHAR + * - graph: u_hasBinaryProperty(c, UCHAR_POSIX_GRAPH) + * - print: u_hasBinaryProperty(c, UCHAR_POSIX_PRINT) + * + * Note: Some of the u_isxyz() functions in uchar.h predate, and do not match, + * the Standard Recommendations in UTS #18. Instead, they match Java + * functions according to their API documentation. + * + * \htmlonly + * The C/POSIX character classes are also available in UnicodeSet patterns, + * using patterns like [:graph:] or \p{graph}. + * \endhtmlonly + * + * Note: There are several ICU whitespace functions. + * Comparison: + * - u_isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property; + * most of general categories "Z" (separators) + most whitespace ISO controls + * (including no-break spaces, but excluding IS1..IS4) + * - u_isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces + * - u_isJavaSpaceChar: Java isSpaceChar; just Z (including no-break spaces) + * - u_isspace: Z + whitespace ISO controls (including no-break spaces) + * - u_isblank: "horizontal spaces" = TAB + Zs + */ + +/** + * Constants. + */ + +/** The lowest Unicode code point value. Code points are non-negative. @stable ICU 2.0 */ +#define UCHAR_MIN_VALUE 0 + +/** + * The highest Unicode code point value (scalar value) according to + * The Unicode Standard. This is a 21-bit value (20.1 bits, rounded up). + * For a single character, UChar32 is a simple type that can hold any code point value. + * + * @see UChar32 + * @stable ICU 2.0 + */ +#define UCHAR_MAX_VALUE 0x10ffff + +/** + * Get a single-bit bit set (a flag) from a bit number 0..31. + * @stable ICU 2.1 + */ +#define U_MASK(x) ((uint32_t)1<<(x)) + +/** + * Selection constants for Unicode properties. + * These constants are used in functions like u_hasBinaryProperty to select + * one of the Unicode properties. + * + * The properties APIs are intended to reflect Unicode properties as defined + * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR). + * + * For details about the properties see + * UAX #44: Unicode Character Database (http://www.unicode.org/reports/tr44/). + * + * Important: If ICU is built with UCD files from Unicode versions below, e.g., 3.2, + * then properties marked with "new in Unicode 3.2" are not or not fully available. + * Check u_getUnicodeVersion to be sure. + * + * @see u_hasBinaryProperty + * @see u_getIntPropertyValue + * @see u_getUnicodeVersion + * @stable ICU 2.1 + */ +typedef enum UProperty { + /* + * Note: UProperty constants are parsed by preparseucd.py. + * It matches lines like + * UCHAR_<Unicode property name>=<integer>, + */ + + /* Note: Place UCHAR_ALPHABETIC before UCHAR_BINARY_START so that + debuggers display UCHAR_ALPHABETIC as the symbolic name for 0, + rather than UCHAR_BINARY_START. Likewise for other *_START + identifiers. */ + + /** Binary property Alphabetic. Same as u_isUAlphabetic, different from u_isalpha. + Lu+Ll+Lt+Lm+Lo+Nl+Other_Alphabetic @stable ICU 2.1 */ + UCHAR_ALPHABETIC=0, + /** First constant for binary Unicode properties. @stable ICU 2.1 */ + UCHAR_BINARY_START=UCHAR_ALPHABETIC, + /** Binary property ASCII_Hex_Digit. 0-9 A-F a-f @stable ICU 2.1 */ + UCHAR_ASCII_HEX_DIGIT=1, + /** Binary property Bidi_Control. + Format controls which have specific functions + in the Bidi Algorithm. @stable ICU 2.1 */ + UCHAR_BIDI_CONTROL=2, + /** Binary property Bidi_Mirrored. + Characters that may change display in RTL text. + Same as u_isMirrored. + See Bidi Algorithm, UTR 9. @stable ICU 2.1 */ + UCHAR_BIDI_MIRRORED=3, + /** Binary property Dash. Variations of dashes. @stable ICU 2.1 */ + UCHAR_DASH=4, + /** Binary property Default_Ignorable_Code_Point (new in Unicode 3.2). + Ignorable in most processing. + <2060..206F, FFF0..FFFB, E0000..E0FFF>+Other_Default_Ignorable_Code_Point+(Cf+Cc+Cs-White_Space) @stable ICU 2.1 */ + UCHAR_DEFAULT_IGNORABLE_CODE_POINT=5, + /** Binary property Deprecated (new in Unicode 3.2). + The usage of deprecated characters is strongly discouraged. @stable ICU 2.1 */ + UCHAR_DEPRECATED=6, + /** Binary property Diacritic. Characters that linguistically modify + the meaning of another character to which they apply. @stable ICU 2.1 */ + UCHAR_DIACRITIC=7, + /** Binary property Extender. + Extend the value or shape of a preceding alphabetic character, + e.g., length and iteration marks. @stable ICU 2.1 */ + UCHAR_EXTENDER=8, + /** Binary property Full_Composition_Exclusion. + CompositionExclusions.txt+Singleton Decompositions+ + Non-Starter Decompositions. @stable ICU 2.1 */ + UCHAR_FULL_COMPOSITION_EXCLUSION=9, + /** Binary property Grapheme_Base (new in Unicode 3.2). + For programmatic determination of grapheme cluster boundaries. + [0..10FFFF]-Cc-Cf-Cs-Co-Cn-Zl-Zp-Grapheme_Link-Grapheme_Extend-CGJ @stable ICU 2.1 */ + UCHAR_GRAPHEME_BASE=10, + /** Binary property Grapheme_Extend (new in Unicode 3.2). + For programmatic determination of grapheme cluster boundaries. + Me+Mn+Mc+Other_Grapheme_Extend-Grapheme_Link-CGJ @stable ICU 2.1 */ + UCHAR_GRAPHEME_EXTEND=11, + /** Binary property Grapheme_Link (new in Unicode 3.2). + For programmatic determination of grapheme cluster boundaries. @stable ICU 2.1 */ + UCHAR_GRAPHEME_LINK=12, + /** Binary property Hex_Digit. + Characters commonly used for hexadecimal numbers. @stable ICU 2.1 */ + UCHAR_HEX_DIGIT=13, + /** Binary property Hyphen. Dashes used to mark connections + between pieces of words, plus the Katakana middle dot. @stable ICU 2.1 */ + UCHAR_HYPHEN=14, + /** Binary property ID_Continue. + Characters that can continue an identifier. + DerivedCoreProperties.txt also says "NOTE: Cf characters should be filtered out." + ID_Start+Mn+Mc+Nd+Pc @stable ICU 2.1 */ + UCHAR_ID_CONTINUE=15, + /** Binary property ID_Start. + Characters that can start an identifier. + Lu+Ll+Lt+Lm+Lo+Nl @stable ICU 2.1 */ + UCHAR_ID_START=16, + /** Binary property Ideographic. + CJKV ideographs. @stable ICU 2.1 */ + UCHAR_IDEOGRAPHIC=17, + /** Binary property IDS_Binary_Operator (new in Unicode 3.2). + For programmatic determination of + Ideographic Description Sequences. @stable ICU 2.1 */ + UCHAR_IDS_BINARY_OPERATOR=18, + /** Binary property IDS_Trinary_Operator (new in Unicode 3.2). + For programmatic determination of + Ideographic Description Sequences. @stable ICU 2.1 */ + UCHAR_IDS_TRINARY_OPERATOR=19, + /** Binary property Join_Control. + Format controls for cursive joining and ligation. @stable ICU 2.1 */ + UCHAR_JOIN_CONTROL=20, + /** Binary property Logical_Order_Exception (new in Unicode 3.2). + Characters that do not use logical order and + require special handling in most processing. @stable ICU 2.1 */ + UCHAR_LOGICAL_ORDER_EXCEPTION=21, + /** Binary property Lowercase. Same as u_isULowercase, different from u_islower. + Ll+Other_Lowercase @stable ICU 2.1 */ + UCHAR_LOWERCASE=22, + /** Binary property Math. Sm+Other_Math @stable ICU 2.1 */ + UCHAR_MATH=23, + /** Binary property Noncharacter_Code_Point. + Code points that are explicitly defined as illegal + for the encoding of characters. @stable ICU 2.1 */ + UCHAR_NONCHARACTER_CODE_POINT=24, + /** Binary property Quotation_Mark. @stable ICU 2.1 */ + UCHAR_QUOTATION_MARK=25, + /** Binary property Radical (new in Unicode 3.2). + For programmatic determination of + Ideographic Description Sequences. @stable ICU 2.1 */ + UCHAR_RADICAL=26, + /** Binary property Soft_Dotted (new in Unicode 3.2). + Characters with a "soft dot", like i or j. + An accent placed on these characters causes + the dot to disappear. @stable ICU 2.1 */ + UCHAR_SOFT_DOTTED=27, + /** Binary property Terminal_Punctuation. + Punctuation characters that generally mark + the end of textual units. @stable ICU 2.1 */ + UCHAR_TERMINAL_PUNCTUATION=28, + /** Binary property Unified_Ideograph (new in Unicode 3.2). + For programmatic determination of + Ideographic Description Sequences. @stable ICU 2.1 */ + UCHAR_UNIFIED_IDEOGRAPH=29, + /** Binary property Uppercase. Same as u_isUUppercase, different from u_isupper. + Lu+Other_Uppercase @stable ICU 2.1 */ + UCHAR_UPPERCASE=30, + /** Binary property White_Space. + Same as u_isUWhiteSpace, different from u_isspace and u_isWhitespace. + Space characters+TAB+CR+LF-ZWSP-ZWNBSP @stable ICU 2.1 */ + UCHAR_WHITE_SPACE=31, + /** Binary property XID_Continue. + ID_Continue modified to allow closure under + normalization forms NFKC and NFKD. @stable ICU 2.1 */ + UCHAR_XID_CONTINUE=32, + /** Binary property XID_Start. ID_Start modified to allow + closure under normalization forms NFKC and NFKD. @stable ICU 2.1 */ + UCHAR_XID_START=33, + /** Binary property Case_Sensitive. Either the source of a case + mapping or _in_ the target of a case mapping. Not the same as + the general category Cased_Letter. @stable ICU 2.6 */ + UCHAR_CASE_SENSITIVE=34, + /** Binary property STerm (new in Unicode 4.0.1). + Sentence Terminal. Used in UAX #29: Text Boundaries + (http://www.unicode.org/reports/tr29/) + @stable ICU 3.0 */ + UCHAR_S_TERM=35, + /** Binary property Variation_Selector (new in Unicode 4.0.1). + Indicates all those characters that qualify as Variation Selectors. + For details on the behavior of these characters, + see StandardizedVariants.html and 15.6 Variation Selectors. + @stable ICU 3.0 */ + UCHAR_VARIATION_SELECTOR=36, + /** Binary property NFD_Inert. + ICU-specific property for characters that are inert under NFD, + i.e., they do not interact with adjacent characters. + See the documentation for the Normalizer2 class and the + Normalizer2::isInert() method. + @stable ICU 3.0 */ + UCHAR_NFD_INERT=37, + /** Binary property NFKD_Inert. + ICU-specific property for characters that are inert under NFKD, + i.e., they do not interact with adjacent characters. + See the documentation for the Normalizer2 class and the + Normalizer2::isInert() method. + @stable ICU 3.0 */ + UCHAR_NFKD_INERT=38, + /** Binary property NFC_Inert. + ICU-specific property for characters that are inert under NFC, + i.e., they do not interact with adjacent characters. + See the documentation for the Normalizer2 class and the + Normalizer2::isInert() method. + @stable ICU 3.0 */ + UCHAR_NFC_INERT=39, + /** Binary property NFKC_Inert. + ICU-specific property for characters that are inert under NFKC, + i.e., they do not interact with adjacent characters. + See the documentation for the Normalizer2 class and the + Normalizer2::isInert() method. + @stable ICU 3.0 */ + UCHAR_NFKC_INERT=40, + /** Binary Property Segment_Starter. + ICU-specific property for characters that are starters in terms of + Unicode normalization and combining character sequences. + They have ccc=0 and do not occur in non-initial position of the + canonical decomposition of any character + (like a-umlaut in NFD and a Jamo T in an NFD(Hangul LVT)). + ICU uses this property for segmenting a string for generating a set of + canonically equivalent strings, e.g. for canonical closure while + processing collation tailoring rules. + @stable ICU 3.0 */ + UCHAR_SEGMENT_STARTER=41, + /** Binary property Pattern_Syntax (new in Unicode 4.1). + See UAX #31 Identifier and Pattern Syntax + (http://www.unicode.org/reports/tr31/) + @stable ICU 3.4 */ + UCHAR_PATTERN_SYNTAX=42, + /** Binary property Pattern_White_Space (new in Unicode 4.1). + See UAX #31 Identifier and Pattern Syntax + (http://www.unicode.org/reports/tr31/) + @stable ICU 3.4 */ + UCHAR_PATTERN_WHITE_SPACE=43, + /** Binary property alnum (a C/POSIX character class). + Implemented according to the UTS #18 Annex C Standard Recommendation. + See the uchar.h file documentation. + @stable ICU 3.4 */ + UCHAR_POSIX_ALNUM=44, + /** Binary property blank (a C/POSIX character class). + Implemented according to the UTS #18 Annex C Standard Recommendation. + See the uchar.h file documentation. + @stable ICU 3.4 */ + UCHAR_POSIX_BLANK=45, + /** Binary property graph (a C/POSIX character class). + Implemented according to the UTS #18 Annex C Standard Recommendation. + See the uchar.h file documentation. + @stable ICU 3.4 */ + UCHAR_POSIX_GRAPH=46, + /** Binary property print (a C/POSIX character class). + Implemented according to the UTS #18 Annex C Standard Recommendation. + See the uchar.h file documentation. + @stable ICU 3.4 */ + UCHAR_POSIX_PRINT=47, + /** Binary property xdigit (a C/POSIX character class). + Implemented according to the UTS #18 Annex C Standard Recommendation. + See the uchar.h file documentation. + @stable ICU 3.4 */ + UCHAR_POSIX_XDIGIT=48, + /** Binary property Cased. For Lowercase, Uppercase and Titlecase characters. @stable ICU 4.4 */ + UCHAR_CASED=49, + /** Binary property Case_Ignorable. Used in context-sensitive case mappings. @stable ICU 4.4 */ + UCHAR_CASE_IGNORABLE=50, + /** Binary property Changes_When_Lowercased. @stable ICU 4.4 */ + UCHAR_CHANGES_WHEN_LOWERCASED=51, + /** Binary property Changes_When_Uppercased. @stable ICU 4.4 */ + UCHAR_CHANGES_WHEN_UPPERCASED=52, + /** Binary property Changes_When_Titlecased. @stable ICU 4.4 */ + UCHAR_CHANGES_WHEN_TITLECASED=53, + /** Binary property Changes_When_Casefolded. @stable ICU 4.4 */ + UCHAR_CHANGES_WHEN_CASEFOLDED=54, + /** Binary property Changes_When_Casemapped. @stable ICU 4.4 */ + UCHAR_CHANGES_WHEN_CASEMAPPED=55, + /** Binary property Changes_When_NFKC_Casefolded. @stable ICU 4.4 */ + UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED=56, + /** + * Binary property Emoji. + * See http://www.unicode.org/reports/tr51/#Emoji_Properties + * + * @stable ICU 57 + */ + UCHAR_EMOJI=57, + /** + * Binary property Emoji_Presentation. + * See http://www.unicode.org/reports/tr51/#Emoji_Properties + * + * @stable ICU 57 + */ + UCHAR_EMOJI_PRESENTATION=58, + /** + * Binary property Emoji_Modifier. + * See http://www.unicode.org/reports/tr51/#Emoji_Properties + * + * @stable ICU 57 + */ + UCHAR_EMOJI_MODIFIER=59, + /** + * Binary property Emoji_Modifier_Base. + * See http://www.unicode.org/reports/tr51/#Emoji_Properties + * + * @stable ICU 57 + */ + UCHAR_EMOJI_MODIFIER_BASE=60, + /** + * Binary property Emoji_Component. + * See http://www.unicode.org/reports/tr51/#Emoji_Properties + * + * @stable ICU 60 + */ + UCHAR_EMOJI_COMPONENT=61, + /** + * Binary property Regional_Indicator. + * @stable ICU 60 + */ + UCHAR_REGIONAL_INDICATOR=62, + /** + * Binary property Prepended_Concatenation_Mark. + * @stable ICU 60 + */ + UCHAR_PREPENDED_CONCATENATION_MARK=63, +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the last constant for binary Unicode properties. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + UCHAR_BINARY_LIMIT, +#endif // U_HIDE_DEPRECATED_API + + /** Enumerated property Bidi_Class. + Same as u_charDirection, returns UCharDirection values. @stable ICU 2.2 */ + UCHAR_BIDI_CLASS=0x1000, + /** First constant for enumerated/integer Unicode properties. @stable ICU 2.2 */ + UCHAR_INT_START=UCHAR_BIDI_CLASS, + /** Enumerated property Block. + Same as ublock_getCode, returns UBlockCode values. @stable ICU 2.2 */ + UCHAR_BLOCK=0x1001, + /** Enumerated property Canonical_Combining_Class. + Same as u_getCombiningClass, returns 8-bit numeric values. @stable ICU 2.2 */ + UCHAR_CANONICAL_COMBINING_CLASS=0x1002, + /** Enumerated property Decomposition_Type. + Returns UDecompositionType values. @stable ICU 2.2 */ + UCHAR_DECOMPOSITION_TYPE=0x1003, + /** Enumerated property East_Asian_Width. + See http://www.unicode.org/reports/tr11/ + Returns UEastAsianWidth values. @stable ICU 2.2 */ + UCHAR_EAST_ASIAN_WIDTH=0x1004, + /** Enumerated property General_Category. + Same as u_charType, returns UCharCategory values. @stable ICU 2.2 */ + UCHAR_GENERAL_CATEGORY=0x1005, + /** Enumerated property Joining_Group. + Returns UJoiningGroup values. @stable ICU 2.2 */ + UCHAR_JOINING_GROUP=0x1006, + /** Enumerated property Joining_Type. + Returns UJoiningType values. @stable ICU 2.2 */ + UCHAR_JOINING_TYPE=0x1007, + /** Enumerated property Line_Break. + Returns ULineBreak values. @stable ICU 2.2 */ + UCHAR_LINE_BREAK=0x1008, + /** Enumerated property Numeric_Type. + Returns UNumericType values. @stable ICU 2.2 */ + UCHAR_NUMERIC_TYPE=0x1009, + /** Enumerated property Script. + Same as uscript_getScript, returns UScriptCode values. @stable ICU 2.2 */ + UCHAR_SCRIPT=0x100A, + /** Enumerated property Hangul_Syllable_Type, new in Unicode 4. + Returns UHangulSyllableType values. @stable ICU 2.6 */ + UCHAR_HANGUL_SYLLABLE_TYPE=0x100B, + /** Enumerated property NFD_Quick_Check. + Returns UNormalizationCheckResult values. @stable ICU 3.0 */ + UCHAR_NFD_QUICK_CHECK=0x100C, + /** Enumerated property NFKD_Quick_Check. + Returns UNormalizationCheckResult values. @stable ICU 3.0 */ + UCHAR_NFKD_QUICK_CHECK=0x100D, + /** Enumerated property NFC_Quick_Check. + Returns UNormalizationCheckResult values. @stable ICU 3.0 */ + UCHAR_NFC_QUICK_CHECK=0x100E, + /** Enumerated property NFKC_Quick_Check. + Returns UNormalizationCheckResult values. @stable ICU 3.0 */ + UCHAR_NFKC_QUICK_CHECK=0x100F, + /** Enumerated property Lead_Canonical_Combining_Class. + ICU-specific property for the ccc of the first code point + of the decomposition, or lccc(c)=ccc(NFD(c)[0]). + Useful for checking for canonically ordered text; + see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD . + Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. @stable ICU 3.0 */ + UCHAR_LEAD_CANONICAL_COMBINING_CLASS=0x1010, + /** Enumerated property Trail_Canonical_Combining_Class. + ICU-specific property for the ccc of the last code point + of the decomposition, or tccc(c)=ccc(NFD(c)[last]). + Useful for checking for canonically ordered text; + see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD . + Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. @stable ICU 3.0 */ + UCHAR_TRAIL_CANONICAL_COMBINING_CLASS=0x1011, + /** Enumerated property Grapheme_Cluster_Break (new in Unicode 4.1). + Used in UAX #29: Text Boundaries + (http://www.unicode.org/reports/tr29/) + Returns UGraphemeClusterBreak values. @stable ICU 3.4 */ + UCHAR_GRAPHEME_CLUSTER_BREAK=0x1012, + /** Enumerated property Sentence_Break (new in Unicode 4.1). + Used in UAX #29: Text Boundaries + (http://www.unicode.org/reports/tr29/) + Returns USentenceBreak values. @stable ICU 3.4 */ + UCHAR_SENTENCE_BREAK=0x1013, + /** Enumerated property Word_Break (new in Unicode 4.1). + Used in UAX #29: Text Boundaries + (http://www.unicode.org/reports/tr29/) + Returns UWordBreakValues values. @stable ICU 3.4 */ + UCHAR_WORD_BREAK=0x1014, + /** Enumerated property Bidi_Paired_Bracket_Type (new in Unicode 6.3). + Used in UAX #9: Unicode Bidirectional Algorithm + (http://www.unicode.org/reports/tr9/) + Returns UBidiPairedBracketType values. @stable ICU 52 */ + UCHAR_BIDI_PAIRED_BRACKET_TYPE=0x1015, +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the last constant for enumerated/integer Unicode properties. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + UCHAR_INT_LIMIT=0x1016, +#endif // U_HIDE_DEPRECATED_API + + /** Bitmask property General_Category_Mask. + This is the General_Category property returned as a bit mask. + When used in u_getIntPropertyValue(c), same as U_MASK(u_charType(c)), + returns bit masks for UCharCategory values where exactly one bit is set. + When used with u_getPropertyValueName() and u_getPropertyValueEnum(), + a multi-bit mask is used for sets of categories like "Letters". + Mask values should be cast to uint32_t. + @stable ICU 2.4 */ + UCHAR_GENERAL_CATEGORY_MASK=0x2000, + /** First constant for bit-mask Unicode properties. @stable ICU 2.4 */ + UCHAR_MASK_START=UCHAR_GENERAL_CATEGORY_MASK, +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the last constant for bit-mask Unicode properties. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + UCHAR_MASK_LIMIT=0x2001, +#endif // U_HIDE_DEPRECATED_API + + /** Double property Numeric_Value. + Corresponds to u_getNumericValue. @stable ICU 2.4 */ + UCHAR_NUMERIC_VALUE=0x3000, + /** First constant for double Unicode properties. @stable ICU 2.4 */ + UCHAR_DOUBLE_START=UCHAR_NUMERIC_VALUE, +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the last constant for double Unicode properties. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + UCHAR_DOUBLE_LIMIT=0x3001, +#endif // U_HIDE_DEPRECATED_API + + /** String property Age. + Corresponds to u_charAge. @stable ICU 2.4 */ + UCHAR_AGE=0x4000, + /** First constant for string Unicode properties. @stable ICU 2.4 */ + UCHAR_STRING_START=UCHAR_AGE, + /** String property Bidi_Mirroring_Glyph. + Corresponds to u_charMirror. @stable ICU 2.4 */ + UCHAR_BIDI_MIRRORING_GLYPH=0x4001, + /** String property Case_Folding. + Corresponds to u_strFoldCase in ustring.h. @stable ICU 2.4 */ + UCHAR_CASE_FOLDING=0x4002, +#ifndef U_HIDE_DEPRECATED_API + /** Deprecated string property ISO_Comment. + Corresponds to u_getISOComment. @deprecated ICU 49 */ + UCHAR_ISO_COMMENT=0x4003, +#endif /* U_HIDE_DEPRECATED_API */ + /** String property Lowercase_Mapping. + Corresponds to u_strToLower in ustring.h. @stable ICU 2.4 */ + UCHAR_LOWERCASE_MAPPING=0x4004, + /** String property Name. + Corresponds to u_charName. @stable ICU 2.4 */ + UCHAR_NAME=0x4005, + /** String property Simple_Case_Folding. + Corresponds to u_foldCase. @stable ICU 2.4 */ + UCHAR_SIMPLE_CASE_FOLDING=0x4006, + /** String property Simple_Lowercase_Mapping. + Corresponds to u_tolower. @stable ICU 2.4 */ + UCHAR_SIMPLE_LOWERCASE_MAPPING=0x4007, + /** String property Simple_Titlecase_Mapping. + Corresponds to u_totitle. @stable ICU 2.4 */ + UCHAR_SIMPLE_TITLECASE_MAPPING=0x4008, + /** String property Simple_Uppercase_Mapping. + Corresponds to u_toupper. @stable ICU 2.4 */ + UCHAR_SIMPLE_UPPERCASE_MAPPING=0x4009, + /** String property Titlecase_Mapping. + Corresponds to u_strToTitle in ustring.h. @stable ICU 2.4 */ + UCHAR_TITLECASE_MAPPING=0x400A, +#ifndef U_HIDE_DEPRECATED_API + /** String property Unicode_1_Name. + This property is of little practical value. + Beginning with ICU 49, ICU APIs return an empty string for this property. + Corresponds to u_charName(U_UNICODE_10_CHAR_NAME). @deprecated ICU 49 */ + UCHAR_UNICODE_1_NAME=0x400B, +#endif /* U_HIDE_DEPRECATED_API */ + /** String property Uppercase_Mapping. + Corresponds to u_strToUpper in ustring.h. @stable ICU 2.4 */ + UCHAR_UPPERCASE_MAPPING=0x400C, + /** String property Bidi_Paired_Bracket (new in Unicode 6.3). + Corresponds to u_getBidiPairedBracket. @stable ICU 52 */ + UCHAR_BIDI_PAIRED_BRACKET=0x400D, +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the last constant for string Unicode properties. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + UCHAR_STRING_LIMIT=0x400E, +#endif // U_HIDE_DEPRECATED_API + + /** Miscellaneous property Script_Extensions (new in Unicode 6.0). + Some characters are commonly used in multiple scripts. + For more information, see UAX #24: http://www.unicode.org/reports/tr24/. + Corresponds to uscript_hasScript and uscript_getScriptExtensions in uscript.h. + @stable ICU 4.6 */ + UCHAR_SCRIPT_EXTENSIONS=0x7000, + /** First constant for Unicode properties with unusual value types. @stable ICU 4.6 */ + UCHAR_OTHER_PROPERTY_START=UCHAR_SCRIPT_EXTENSIONS, +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the last constant for Unicode properties with unusual value types. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + UCHAR_OTHER_PROPERTY_LIMIT=0x7001, +#endif // U_HIDE_DEPRECATED_API + + /** Represents a nonexistent or invalid property or property value. @stable ICU 2.4 */ + UCHAR_INVALID_CODE = -1 +} UProperty; + +/** + * Data for enumerated Unicode general category types. + * See http://www.unicode.org/Public/UNIDATA/UnicodeData.html . + * @stable ICU 2.0 + */ +typedef enum UCharCategory +{ + /* + * Note: UCharCategory constants and their API comments are parsed by preparseucd.py. + * It matches pairs of lines like + * / ** <Unicode 2-letter General_Category value> comment... * / + * U_<[A-Z_]+> = <integer>, + */ + + /** Non-category for unassigned and non-character code points. @stable ICU 2.0 */ + U_UNASSIGNED = 0, + /** Cn "Other, Not Assigned (no characters in [UnicodeData.txt] have this property)" (same as U_UNASSIGNED!) @stable ICU 2.0 */ + U_GENERAL_OTHER_TYPES = 0, + /** Lu @stable ICU 2.0 */ + U_UPPERCASE_LETTER = 1, + /** Ll @stable ICU 2.0 */ + U_LOWERCASE_LETTER = 2, + /** Lt @stable ICU 2.0 */ + U_TITLECASE_LETTER = 3, + /** Lm @stable ICU 2.0 */ + U_MODIFIER_LETTER = 4, + /** Lo @stable ICU 2.0 */ + U_OTHER_LETTER = 5, + /** Mn @stable ICU 2.0 */ + U_NON_SPACING_MARK = 6, + /** Me @stable ICU 2.0 */ + U_ENCLOSING_MARK = 7, + /** Mc @stable ICU 2.0 */ + U_COMBINING_SPACING_MARK = 8, + /** Nd @stable ICU 2.0 */ + U_DECIMAL_DIGIT_NUMBER = 9, + /** Nl @stable ICU 2.0 */ + U_LETTER_NUMBER = 10, + /** No @stable ICU 2.0 */ + U_OTHER_NUMBER = 11, + /** Zs @stable ICU 2.0 */ + U_SPACE_SEPARATOR = 12, + /** Zl @stable ICU 2.0 */ + U_LINE_SEPARATOR = 13, + /** Zp @stable ICU 2.0 */ + U_PARAGRAPH_SEPARATOR = 14, + /** Cc @stable ICU 2.0 */ + U_CONTROL_CHAR = 15, + /** Cf @stable ICU 2.0 */ + U_FORMAT_CHAR = 16, + /** Co @stable ICU 2.0 */ + U_PRIVATE_USE_CHAR = 17, + /** Cs @stable ICU 2.0 */ + U_SURROGATE = 18, + /** Pd @stable ICU 2.0 */ + U_DASH_PUNCTUATION = 19, + /** Ps @stable ICU 2.0 */ + U_START_PUNCTUATION = 20, + /** Pe @stable ICU 2.0 */ + U_END_PUNCTUATION = 21, + /** Pc @stable ICU 2.0 */ + U_CONNECTOR_PUNCTUATION = 22, + /** Po @stable ICU 2.0 */ + U_OTHER_PUNCTUATION = 23, + /** Sm @stable ICU 2.0 */ + U_MATH_SYMBOL = 24, + /** Sc @stable ICU 2.0 */ + U_CURRENCY_SYMBOL = 25, + /** Sk @stable ICU 2.0 */ + U_MODIFIER_SYMBOL = 26, + /** So @stable ICU 2.0 */ + U_OTHER_SYMBOL = 27, + /** Pi @stable ICU 2.0 */ + U_INITIAL_PUNCTUATION = 28, + /** Pf @stable ICU 2.0 */ + U_FINAL_PUNCTUATION = 29, + /** + * One higher than the last enum UCharCategory constant. + * This numeric value is stable (will not change), see + * http://www.unicode.org/policies/stability_policy.html#Property_Value + * + * @stable ICU 2.0 + */ + U_CHAR_CATEGORY_COUNT +} UCharCategory; + +/** + * U_GC_XX_MASK constants are bit flags corresponding to Unicode + * general category values. + * For each category, the nth bit is set if the numeric value of the + * corresponding UCharCategory constant is n. + * + * There are also some U_GC_Y_MASK constants for groups of general categories + * like L for all letter categories. + * + * @see u_charType + * @see U_GET_GC_MASK + * @see UCharCategory + * @stable ICU 2.1 + */ +#define U_GC_CN_MASK U_MASK(U_GENERAL_OTHER_TYPES) + +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_LU_MASK U_MASK(U_UPPERCASE_LETTER) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_LL_MASK U_MASK(U_LOWERCASE_LETTER) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_LT_MASK U_MASK(U_TITLECASE_LETTER) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_LM_MASK U_MASK(U_MODIFIER_LETTER) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_LO_MASK U_MASK(U_OTHER_LETTER) + +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_MN_MASK U_MASK(U_NON_SPACING_MARK) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_ME_MASK U_MASK(U_ENCLOSING_MARK) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_MC_MASK U_MASK(U_COMBINING_SPACING_MARK) + +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_ND_MASK U_MASK(U_DECIMAL_DIGIT_NUMBER) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_NL_MASK U_MASK(U_LETTER_NUMBER) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_NO_MASK U_MASK(U_OTHER_NUMBER) + +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_ZS_MASK U_MASK(U_SPACE_SEPARATOR) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_ZL_MASK U_MASK(U_LINE_SEPARATOR) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_ZP_MASK U_MASK(U_PARAGRAPH_SEPARATOR) + +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_CC_MASK U_MASK(U_CONTROL_CHAR) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_CF_MASK U_MASK(U_FORMAT_CHAR) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_CO_MASK U_MASK(U_PRIVATE_USE_CHAR) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_CS_MASK U_MASK(U_SURROGATE) + +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_PD_MASK U_MASK(U_DASH_PUNCTUATION) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_PS_MASK U_MASK(U_START_PUNCTUATION) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_PE_MASK U_MASK(U_END_PUNCTUATION) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_PC_MASK U_MASK(U_CONNECTOR_PUNCTUATION) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_PO_MASK U_MASK(U_OTHER_PUNCTUATION) + +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_SM_MASK U_MASK(U_MATH_SYMBOL) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_SC_MASK U_MASK(U_CURRENCY_SYMBOL) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_SK_MASK U_MASK(U_MODIFIER_SYMBOL) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_SO_MASK U_MASK(U_OTHER_SYMBOL) + +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_PI_MASK U_MASK(U_INITIAL_PUNCTUATION) +/** Mask constant for a UCharCategory. @stable ICU 2.1 */ +#define U_GC_PF_MASK U_MASK(U_FINAL_PUNCTUATION) + + +/** Mask constant for multiple UCharCategory bits (L Letters). @stable ICU 2.1 */ +#define U_GC_L_MASK \ + (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK|U_GC_LM_MASK|U_GC_LO_MASK) + +/** Mask constant for multiple UCharCategory bits (LC Cased Letters). @stable ICU 2.1 */ +#define U_GC_LC_MASK \ + (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK) + +/** Mask constant for multiple UCharCategory bits (M Marks). @stable ICU 2.1 */ +#define U_GC_M_MASK (U_GC_MN_MASK|U_GC_ME_MASK|U_GC_MC_MASK) + +/** Mask constant for multiple UCharCategory bits (N Numbers). @stable ICU 2.1 */ +#define U_GC_N_MASK (U_GC_ND_MASK|U_GC_NL_MASK|U_GC_NO_MASK) + +/** Mask constant for multiple UCharCategory bits (Z Separators). @stable ICU 2.1 */ +#define U_GC_Z_MASK (U_GC_ZS_MASK|U_GC_ZL_MASK|U_GC_ZP_MASK) + +/** Mask constant for multiple UCharCategory bits (C Others). @stable ICU 2.1 */ +#define U_GC_C_MASK \ + (U_GC_CN_MASK|U_GC_CC_MASK|U_GC_CF_MASK|U_GC_CO_MASK|U_GC_CS_MASK) + +/** Mask constant for multiple UCharCategory bits (P Punctuation). @stable ICU 2.1 */ +#define U_GC_P_MASK \ + (U_GC_PD_MASK|U_GC_PS_MASK|U_GC_PE_MASK|U_GC_PC_MASK|U_GC_PO_MASK| \ + U_GC_PI_MASK|U_GC_PF_MASK) + +/** Mask constant for multiple UCharCategory bits (S Symbols). @stable ICU 2.1 */ +#define U_GC_S_MASK (U_GC_SM_MASK|U_GC_SC_MASK|U_GC_SK_MASK|U_GC_SO_MASK) + +/** + * This specifies the language directional property of a character set. + * @stable ICU 2.0 + */ +typedef enum UCharDirection { + /* + * Note: UCharDirection constants and their API comments are parsed by preparseucd.py. + * It matches pairs of lines like + * / ** <Unicode 1..3-letter Bidi_Class value> comment... * / + * U_<[A-Z_]+> = <integer>, + */ + + /** L @stable ICU 2.0 */ + U_LEFT_TO_RIGHT = 0, + /** R @stable ICU 2.0 */ + U_RIGHT_TO_LEFT = 1, + /** EN @stable ICU 2.0 */ + U_EUROPEAN_NUMBER = 2, + /** ES @stable ICU 2.0 */ + U_EUROPEAN_NUMBER_SEPARATOR = 3, + /** ET @stable ICU 2.0 */ + U_EUROPEAN_NUMBER_TERMINATOR = 4, + /** AN @stable ICU 2.0 */ + U_ARABIC_NUMBER = 5, + /** CS @stable ICU 2.0 */ + U_COMMON_NUMBER_SEPARATOR = 6, + /** B @stable ICU 2.0 */ + U_BLOCK_SEPARATOR = 7, + /** S @stable ICU 2.0 */ + U_SEGMENT_SEPARATOR = 8, + /** WS @stable ICU 2.0 */ + U_WHITE_SPACE_NEUTRAL = 9, + /** ON @stable ICU 2.0 */ + U_OTHER_NEUTRAL = 10, + /** LRE @stable ICU 2.0 */ + U_LEFT_TO_RIGHT_EMBEDDING = 11, + /** LRO @stable ICU 2.0 */ + U_LEFT_TO_RIGHT_OVERRIDE = 12, + /** AL @stable ICU 2.0 */ + U_RIGHT_TO_LEFT_ARABIC = 13, + /** RLE @stable ICU 2.0 */ + U_RIGHT_TO_LEFT_EMBEDDING = 14, + /** RLO @stable ICU 2.0 */ + U_RIGHT_TO_LEFT_OVERRIDE = 15, + /** PDF @stable ICU 2.0 */ + U_POP_DIRECTIONAL_FORMAT = 16, + /** NSM @stable ICU 2.0 */ + U_DIR_NON_SPACING_MARK = 17, + /** BN @stable ICU 2.0 */ + U_BOUNDARY_NEUTRAL = 18, + /** FSI @stable ICU 52 */ + U_FIRST_STRONG_ISOLATE = 19, + /** LRI @stable ICU 52 */ + U_LEFT_TO_RIGHT_ISOLATE = 20, + /** RLI @stable ICU 52 */ + U_RIGHT_TO_LEFT_ISOLATE = 21, + /** PDI @stable ICU 52 */ + U_POP_DIRECTIONAL_ISOLATE = 22, +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest UCharDirection value. + * The highest value is available via u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS). + * + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + U_CHAR_DIRECTION_COUNT +#endif // U_HIDE_DEPRECATED_API +} UCharDirection; + +/** + * Bidi Paired Bracket Type constants. + * + * @see UCHAR_BIDI_PAIRED_BRACKET_TYPE + * @stable ICU 52 + */ +typedef enum UBidiPairedBracketType { + /* + * Note: UBidiPairedBracketType constants are parsed by preparseucd.py. + * It matches lines like + * U_BPT_<Unicode Bidi_Paired_Bracket_Type value name> + */ + + /** Not a paired bracket. @stable ICU 52 */ + U_BPT_NONE, + /** Open paired bracket. @stable ICU 52 */ + U_BPT_OPEN, + /** Close paired bracket. @stable ICU 52 */ + U_BPT_CLOSE, +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal UBidiPairedBracketType value. + * The highest value is available via u_getIntPropertyMaxValue(UCHAR_BIDI_PAIRED_BRACKET_TYPE). + * + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + U_BPT_COUNT /* 3 */ +#endif // U_HIDE_DEPRECATED_API +} UBidiPairedBracketType; + +/** + * Constants for Unicode blocks, see the Unicode Data file Blocks.txt + * @stable ICU 2.0 + */ +enum UBlockCode { + /* + * Note: UBlockCode constants are parsed by preparseucd.py. + * It matches lines like + * UBLOCK_<Unicode Block value name> = <integer>, + */ + + /** New No_Block value in Unicode 4. @stable ICU 2.6 */ + UBLOCK_NO_BLOCK = 0, /*[none]*/ /* Special range indicating No_Block */ + + /** @stable ICU 2.0 */ + UBLOCK_BASIC_LATIN = 1, /*[0000]*/ + + /** @stable ICU 2.0 */ + UBLOCK_LATIN_1_SUPPLEMENT=2, /*[0080]*/ + + /** @stable ICU 2.0 */ + UBLOCK_LATIN_EXTENDED_A =3, /*[0100]*/ + + /** @stable ICU 2.0 */ + UBLOCK_LATIN_EXTENDED_B =4, /*[0180]*/ + + /** @stable ICU 2.0 */ + UBLOCK_IPA_EXTENSIONS =5, /*[0250]*/ + + /** @stable ICU 2.0 */ + UBLOCK_SPACING_MODIFIER_LETTERS =6, /*[02B0]*/ + + /** @stable ICU 2.0 */ + UBLOCK_COMBINING_DIACRITICAL_MARKS =7, /*[0300]*/ + + /** + * Unicode 3.2 renames this block to "Greek and Coptic". + * @stable ICU 2.0 + */ + UBLOCK_GREEK =8, /*[0370]*/ + + /** @stable ICU 2.0 */ + UBLOCK_CYRILLIC =9, /*[0400]*/ + + /** @stable ICU 2.0 */ + UBLOCK_ARMENIAN =10, /*[0530]*/ + + /** @stable ICU 2.0 */ + UBLOCK_HEBREW =11, /*[0590]*/ + + /** @stable ICU 2.0 */ + UBLOCK_ARABIC =12, /*[0600]*/ + + /** @stable ICU 2.0 */ + UBLOCK_SYRIAC =13, /*[0700]*/ + + /** @stable ICU 2.0 */ + UBLOCK_THAANA =14, /*[0780]*/ + + /** @stable ICU 2.0 */ + UBLOCK_DEVANAGARI =15, /*[0900]*/ + + /** @stable ICU 2.0 */ + UBLOCK_BENGALI =16, /*[0980]*/ + + /** @stable ICU 2.0 */ + UBLOCK_GURMUKHI =17, /*[0A00]*/ + + /** @stable ICU 2.0 */ + UBLOCK_GUJARATI =18, /*[0A80]*/ + + /** @stable ICU 2.0 */ + UBLOCK_ORIYA =19, /*[0B00]*/ + + /** @stable ICU 2.0 */ + UBLOCK_TAMIL =20, /*[0B80]*/ + + /** @stable ICU 2.0 */ + UBLOCK_TELUGU =21, /*[0C00]*/ + + /** @stable ICU 2.0 */ + UBLOCK_KANNADA =22, /*[0C80]*/ + + /** @stable ICU 2.0 */ + UBLOCK_MALAYALAM =23, /*[0D00]*/ + + /** @stable ICU 2.0 */ + UBLOCK_SINHALA =24, /*[0D80]*/ + + /** @stable ICU 2.0 */ + UBLOCK_THAI =25, /*[0E00]*/ + + /** @stable ICU 2.0 */ + UBLOCK_LAO =26, /*[0E80]*/ + + /** @stable ICU 2.0 */ + UBLOCK_TIBETAN =27, /*[0F00]*/ + + /** @stable ICU 2.0 */ + UBLOCK_MYANMAR =28, /*[1000]*/ + + /** @stable ICU 2.0 */ + UBLOCK_GEORGIAN =29, /*[10A0]*/ + + /** @stable ICU 2.0 */ + UBLOCK_HANGUL_JAMO =30, /*[1100]*/ + + /** @stable ICU 2.0 */ + UBLOCK_ETHIOPIC =31, /*[1200]*/ + + /** @stable ICU 2.0 */ + UBLOCK_CHEROKEE =32, /*[13A0]*/ + + /** @stable ICU 2.0 */ + UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =33, /*[1400]*/ + + /** @stable ICU 2.0 */ + UBLOCK_OGHAM =34, /*[1680]*/ + + /** @stable ICU 2.0 */ + UBLOCK_RUNIC =35, /*[16A0]*/ + + /** @stable ICU 2.0 */ + UBLOCK_KHMER =36, /*[1780]*/ + + /** @stable ICU 2.0 */ + UBLOCK_MONGOLIAN =37, /*[1800]*/ + + /** @stable ICU 2.0 */ + UBLOCK_LATIN_EXTENDED_ADDITIONAL =38, /*[1E00]*/ + + /** @stable ICU 2.0 */ + UBLOCK_GREEK_EXTENDED =39, /*[1F00]*/ + + /** @stable ICU 2.0 */ + UBLOCK_GENERAL_PUNCTUATION =40, /*[2000]*/ + + /** @stable ICU 2.0 */ + UBLOCK_SUPERSCRIPTS_AND_SUBSCRIPTS =41, /*[2070]*/ + + /** @stable ICU 2.0 */ + UBLOCK_CURRENCY_SYMBOLS =42, /*[20A0]*/ + + /** + * Unicode 3.2 renames this block to "Combining Diacritical Marks for Symbols". + * @stable ICU 2.0 + */ + UBLOCK_COMBINING_MARKS_FOR_SYMBOLS =43, /*[20D0]*/ + + /** @stable ICU 2.0 */ + UBLOCK_LETTERLIKE_SYMBOLS =44, /*[2100]*/ + + /** @stable ICU 2.0 */ + UBLOCK_NUMBER_FORMS =45, /*[2150]*/ + + /** @stable ICU 2.0 */ + UBLOCK_ARROWS =46, /*[2190]*/ + + /** @stable ICU 2.0 */ + UBLOCK_MATHEMATICAL_OPERATORS =47, /*[2200]*/ + + /** @stable ICU 2.0 */ + UBLOCK_MISCELLANEOUS_TECHNICAL =48, /*[2300]*/ + + /** @stable ICU 2.0 */ + UBLOCK_CONTROL_PICTURES =49, /*[2400]*/ + + /** @stable ICU 2.0 */ + UBLOCK_OPTICAL_CHARACTER_RECOGNITION =50, /*[2440]*/ + + /** @stable ICU 2.0 */ + UBLOCK_ENCLOSED_ALPHANUMERICS =51, /*[2460]*/ + + /** @stable ICU 2.0 */ + UBLOCK_BOX_DRAWING =52, /*[2500]*/ + + /** @stable ICU 2.0 */ + UBLOCK_BLOCK_ELEMENTS =53, /*[2580]*/ + + /** @stable ICU 2.0 */ + UBLOCK_GEOMETRIC_SHAPES =54, /*[25A0]*/ + + /** @stable ICU 2.0 */ + UBLOCK_MISCELLANEOUS_SYMBOLS =55, /*[2600]*/ + + /** @stable ICU 2.0 */ + UBLOCK_DINGBATS =56, /*[2700]*/ + + /** @stable ICU 2.0 */ + UBLOCK_BRAILLE_PATTERNS =57, /*[2800]*/ + + /** @stable ICU 2.0 */ + UBLOCK_CJK_RADICALS_SUPPLEMENT =58, /*[2E80]*/ + + /** @stable ICU 2.0 */ + UBLOCK_KANGXI_RADICALS =59, /*[2F00]*/ + + /** @stable ICU 2.0 */ + UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS =60, /*[2FF0]*/ + + /** @stable ICU 2.0 */ + UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION =61, /*[3000]*/ + + /** @stable ICU 2.0 */ + UBLOCK_HIRAGANA =62, /*[3040]*/ + + /** @stable ICU 2.0 */ + UBLOCK_KATAKANA =63, /*[30A0]*/ + + /** @stable ICU 2.0 */ + UBLOCK_BOPOMOFO =64, /*[3100]*/ + + /** @stable ICU 2.0 */ + UBLOCK_HANGUL_COMPATIBILITY_JAMO =65, /*[3130]*/ + + /** @stable ICU 2.0 */ + UBLOCK_KANBUN =66, /*[3190]*/ + + /** @stable ICU 2.0 */ + UBLOCK_BOPOMOFO_EXTENDED =67, /*[31A0]*/ + + /** @stable ICU 2.0 */ + UBLOCK_ENCLOSED_CJK_LETTERS_AND_MONTHS =68, /*[3200]*/ + + /** @stable ICU 2.0 */ + UBLOCK_CJK_COMPATIBILITY =69, /*[3300]*/ + + /** @stable ICU 2.0 */ + UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =70, /*[3400]*/ + + /** @stable ICU 2.0 */ + UBLOCK_CJK_UNIFIED_IDEOGRAPHS =71, /*[4E00]*/ + + /** @stable ICU 2.0 */ + UBLOCK_YI_SYLLABLES =72, /*[A000]*/ + + /** @stable ICU 2.0 */ + UBLOCK_YI_RADICALS =73, /*[A490]*/ + + /** @stable ICU 2.0 */ + UBLOCK_HANGUL_SYLLABLES =74, /*[AC00]*/ + + /** @stable ICU 2.0 */ + UBLOCK_HIGH_SURROGATES =75, /*[D800]*/ + + /** @stable ICU 2.0 */ + UBLOCK_HIGH_PRIVATE_USE_SURROGATES =76, /*[DB80]*/ + + /** @stable ICU 2.0 */ + UBLOCK_LOW_SURROGATES =77, /*[DC00]*/ + + /** + * Same as UBLOCK_PRIVATE_USE. + * Until Unicode 3.1.1, the corresponding block name was "Private Use", + * and multiple code point ranges had this block. + * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and + * adds separate blocks for the supplementary PUAs. + * + * @stable ICU 2.0 + */ + UBLOCK_PRIVATE_USE_AREA =78, /*[E000]*/ + /** + * Same as UBLOCK_PRIVATE_USE_AREA. + * Until Unicode 3.1.1, the corresponding block name was "Private Use", + * and multiple code point ranges had this block. + * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and + * adds separate blocks for the supplementary PUAs. + * + * @stable ICU 2.0 + */ + UBLOCK_PRIVATE_USE = UBLOCK_PRIVATE_USE_AREA, + + /** @stable ICU 2.0 */ + UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS =79, /*[F900]*/ + + /** @stable ICU 2.0 */ + UBLOCK_ALPHABETIC_PRESENTATION_FORMS =80, /*[FB00]*/ + + /** @stable ICU 2.0 */ + UBLOCK_ARABIC_PRESENTATION_FORMS_A =81, /*[FB50]*/ + + /** @stable ICU 2.0 */ + UBLOCK_COMBINING_HALF_MARKS =82, /*[FE20]*/ + + /** @stable ICU 2.0 */ + UBLOCK_CJK_COMPATIBILITY_FORMS =83, /*[FE30]*/ + + /** @stable ICU 2.0 */ + UBLOCK_SMALL_FORM_VARIANTS =84, /*[FE50]*/ + + /** @stable ICU 2.0 */ + UBLOCK_ARABIC_PRESENTATION_FORMS_B =85, /*[FE70]*/ + + /** @stable ICU 2.0 */ + UBLOCK_SPECIALS =86, /*[FFF0]*/ + + /** @stable ICU 2.0 */ + UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS =87, /*[FF00]*/ + + /* New blocks in Unicode 3.1 */ + + /** @stable ICU 2.0 */ + UBLOCK_OLD_ITALIC = 88, /*[10300]*/ + /** @stable ICU 2.0 */ + UBLOCK_GOTHIC = 89, /*[10330]*/ + /** @stable ICU 2.0 */ + UBLOCK_DESERET = 90, /*[10400]*/ + /** @stable ICU 2.0 */ + UBLOCK_BYZANTINE_MUSICAL_SYMBOLS = 91, /*[1D000]*/ + /** @stable ICU 2.0 */ + UBLOCK_MUSICAL_SYMBOLS = 92, /*[1D100]*/ + /** @stable ICU 2.0 */ + UBLOCK_MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 93, /*[1D400]*/ + /** @stable ICU 2.0 */ + UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 94, /*[20000]*/ + /** @stable ICU 2.0 */ + UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 95, /*[2F800]*/ + /** @stable ICU 2.0 */ + UBLOCK_TAGS = 96, /*[E0000]*/ + + /* New blocks in Unicode 3.2 */ + + /** @stable ICU 3.0 */ + UBLOCK_CYRILLIC_SUPPLEMENT = 97, /*[0500]*/ + /** + * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". + * @stable ICU 2.2 + */ + UBLOCK_CYRILLIC_SUPPLEMENTARY = UBLOCK_CYRILLIC_SUPPLEMENT, + /** @stable ICU 2.2 */ + UBLOCK_TAGALOG = 98, /*[1700]*/ + /** @stable ICU 2.2 */ + UBLOCK_HANUNOO = 99, /*[1720]*/ + /** @stable ICU 2.2 */ + UBLOCK_BUHID = 100, /*[1740]*/ + /** @stable ICU 2.2 */ + UBLOCK_TAGBANWA = 101, /*[1760]*/ + /** @stable ICU 2.2 */ + UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 102, /*[27C0]*/ + /** @stable ICU 2.2 */ + UBLOCK_SUPPLEMENTAL_ARROWS_A = 103, /*[27F0]*/ + /** @stable ICU 2.2 */ + UBLOCK_SUPPLEMENTAL_ARROWS_B = 104, /*[2900]*/ + /** @stable ICU 2.2 */ + UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = 105, /*[2980]*/ + /** @stable ICU 2.2 */ + UBLOCK_SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 106, /*[2A00]*/ + /** @stable ICU 2.2 */ + UBLOCK_KATAKANA_PHONETIC_EXTENSIONS = 107, /*[31F0]*/ + /** @stable ICU 2.2 */ + UBLOCK_VARIATION_SELECTORS = 108, /*[FE00]*/ + /** @stable ICU 2.2 */ + UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_A = 109, /*[F0000]*/ + /** @stable ICU 2.2 */ + UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B = 110, /*[100000]*/ + + /* New blocks in Unicode 4 */ + + /** @stable ICU 2.6 */ + UBLOCK_LIMBU = 111, /*[1900]*/ + /** @stable ICU 2.6 */ + UBLOCK_TAI_LE = 112, /*[1950]*/ + /** @stable ICU 2.6 */ + UBLOCK_KHMER_SYMBOLS = 113, /*[19E0]*/ + /** @stable ICU 2.6 */ + UBLOCK_PHONETIC_EXTENSIONS = 114, /*[1D00]*/ + /** @stable ICU 2.6 */ + UBLOCK_MISCELLANEOUS_SYMBOLS_AND_ARROWS = 115, /*[2B00]*/ + /** @stable ICU 2.6 */ + UBLOCK_YIJING_HEXAGRAM_SYMBOLS = 116, /*[4DC0]*/ + /** @stable ICU 2.6 */ + UBLOCK_LINEAR_B_SYLLABARY = 117, /*[10000]*/ + /** @stable ICU 2.6 */ + UBLOCK_LINEAR_B_IDEOGRAMS = 118, /*[10080]*/ + /** @stable ICU 2.6 */ + UBLOCK_AEGEAN_NUMBERS = 119, /*[10100]*/ + /** @stable ICU 2.6 */ + UBLOCK_UGARITIC = 120, /*[10380]*/ + /** @stable ICU 2.6 */ + UBLOCK_SHAVIAN = 121, /*[10450]*/ + /** @stable ICU 2.6 */ + UBLOCK_OSMANYA = 122, /*[10480]*/ + /** @stable ICU 2.6 */ + UBLOCK_CYPRIOT_SYLLABARY = 123, /*[10800]*/ + /** @stable ICU 2.6 */ + UBLOCK_TAI_XUAN_JING_SYMBOLS = 124, /*[1D300]*/ + /** @stable ICU 2.6 */ + UBLOCK_VARIATION_SELECTORS_SUPPLEMENT = 125, /*[E0100]*/ + + /* New blocks in Unicode 4.1 */ + + /** @stable ICU 3.4 */ + UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION = 126, /*[1D200]*/ + /** @stable ICU 3.4 */ + UBLOCK_ANCIENT_GREEK_NUMBERS = 127, /*[10140]*/ + /** @stable ICU 3.4 */ + UBLOCK_ARABIC_SUPPLEMENT = 128, /*[0750]*/ + /** @stable ICU 3.4 */ + UBLOCK_BUGINESE = 129, /*[1A00]*/ + /** @stable ICU 3.4 */ + UBLOCK_CJK_STROKES = 130, /*[31C0]*/ + /** @stable ICU 3.4 */ + UBLOCK_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 131, /*[1DC0]*/ + /** @stable ICU 3.4 */ + UBLOCK_COPTIC = 132, /*[2C80]*/ + /** @stable ICU 3.4 */ + UBLOCK_ETHIOPIC_EXTENDED = 133, /*[2D80]*/ + /** @stable ICU 3.4 */ + UBLOCK_ETHIOPIC_SUPPLEMENT = 134, /*[1380]*/ + /** @stable ICU 3.4 */ + UBLOCK_GEORGIAN_SUPPLEMENT = 135, /*[2D00]*/ + /** @stable ICU 3.4 */ + UBLOCK_GLAGOLITIC = 136, /*[2C00]*/ + /** @stable ICU 3.4 */ + UBLOCK_KHAROSHTHI = 137, /*[10A00]*/ + /** @stable ICU 3.4 */ + UBLOCK_MODIFIER_TONE_LETTERS = 138, /*[A700]*/ + /** @stable ICU 3.4 */ + UBLOCK_NEW_TAI_LUE = 139, /*[1980]*/ + /** @stable ICU 3.4 */ + UBLOCK_OLD_PERSIAN = 140, /*[103A0]*/ + /** @stable ICU 3.4 */ + UBLOCK_PHONETIC_EXTENSIONS_SUPPLEMENT = 141, /*[1D80]*/ + /** @stable ICU 3.4 */ + UBLOCK_SUPPLEMENTAL_PUNCTUATION = 142, /*[2E00]*/ + /** @stable ICU 3.4 */ + UBLOCK_SYLOTI_NAGRI = 143, /*[A800]*/ + /** @stable ICU 3.4 */ + UBLOCK_TIFINAGH = 144, /*[2D30]*/ + /** @stable ICU 3.4 */ + UBLOCK_VERTICAL_FORMS = 145, /*[FE10]*/ + + /* New blocks in Unicode 5.0 */ + + /** @stable ICU 3.6 */ + UBLOCK_NKO = 146, /*[07C0]*/ + /** @stable ICU 3.6 */ + UBLOCK_BALINESE = 147, /*[1B00]*/ + /** @stable ICU 3.6 */ + UBLOCK_LATIN_EXTENDED_C = 148, /*[2C60]*/ + /** @stable ICU 3.6 */ + UBLOCK_LATIN_EXTENDED_D = 149, /*[A720]*/ + /** @stable ICU 3.6 */ + UBLOCK_PHAGS_PA = 150, /*[A840]*/ + /** @stable ICU 3.6 */ + UBLOCK_PHOENICIAN = 151, /*[10900]*/ + /** @stable ICU 3.6 */ + UBLOCK_CUNEIFORM = 152, /*[12000]*/ + /** @stable ICU 3.6 */ + UBLOCK_CUNEIFORM_NUMBERS_AND_PUNCTUATION = 153, /*[12400]*/ + /** @stable ICU 3.6 */ + UBLOCK_COUNTING_ROD_NUMERALS = 154, /*[1D360]*/ + + /* New blocks in Unicode 5.1 */ + + /** @stable ICU 4.0 */ + UBLOCK_SUNDANESE = 155, /*[1B80]*/ + /** @stable ICU 4.0 */ + UBLOCK_LEPCHA = 156, /*[1C00]*/ + /** @stable ICU 4.0 */ + UBLOCK_OL_CHIKI = 157, /*[1C50]*/ + /** @stable ICU 4.0 */ + UBLOCK_CYRILLIC_EXTENDED_A = 158, /*[2DE0]*/ + /** @stable ICU 4.0 */ + UBLOCK_VAI = 159, /*[A500]*/ + /** @stable ICU 4.0 */ + UBLOCK_CYRILLIC_EXTENDED_B = 160, /*[A640]*/ + /** @stable ICU 4.0 */ + UBLOCK_SAURASHTRA = 161, /*[A880]*/ + /** @stable ICU 4.0 */ + UBLOCK_KAYAH_LI = 162, /*[A900]*/ + /** @stable ICU 4.0 */ + UBLOCK_REJANG = 163, /*[A930]*/ + /** @stable ICU 4.0 */ + UBLOCK_CHAM = 164, /*[AA00]*/ + /** @stable ICU 4.0 */ + UBLOCK_ANCIENT_SYMBOLS = 165, /*[10190]*/ + /** @stable ICU 4.0 */ + UBLOCK_PHAISTOS_DISC = 166, /*[101D0]*/ + /** @stable ICU 4.0 */ + UBLOCK_LYCIAN = 167, /*[10280]*/ + /** @stable ICU 4.0 */ + UBLOCK_CARIAN = 168, /*[102A0]*/ + /** @stable ICU 4.0 */ + UBLOCK_LYDIAN = 169, /*[10920]*/ + /** @stable ICU 4.0 */ + UBLOCK_MAHJONG_TILES = 170, /*[1F000]*/ + /** @stable ICU 4.0 */ + UBLOCK_DOMINO_TILES = 171, /*[1F030]*/ + + /* New blocks in Unicode 5.2 */ + + /** @stable ICU 4.4 */ + UBLOCK_SAMARITAN = 172, /*[0800]*/ + /** @stable ICU 4.4 */ + UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 173, /*[18B0]*/ + /** @stable ICU 4.4 */ + UBLOCK_TAI_THAM = 174, /*[1A20]*/ + /** @stable ICU 4.4 */ + UBLOCK_VEDIC_EXTENSIONS = 175, /*[1CD0]*/ + /** @stable ICU 4.4 */ + UBLOCK_LISU = 176, /*[A4D0]*/ + /** @stable ICU 4.4 */ + UBLOCK_BAMUM = 177, /*[A6A0]*/ + /** @stable ICU 4.4 */ + UBLOCK_COMMON_INDIC_NUMBER_FORMS = 178, /*[A830]*/ + /** @stable ICU 4.4 */ + UBLOCK_DEVANAGARI_EXTENDED = 179, /*[A8E0]*/ + /** @stable ICU 4.4 */ + UBLOCK_HANGUL_JAMO_EXTENDED_A = 180, /*[A960]*/ + /** @stable ICU 4.4 */ + UBLOCK_JAVANESE = 181, /*[A980]*/ + /** @stable ICU 4.4 */ + UBLOCK_MYANMAR_EXTENDED_A = 182, /*[AA60]*/ + /** @stable ICU 4.4 */ + UBLOCK_TAI_VIET = 183, /*[AA80]*/ + /** @stable ICU 4.4 */ + UBLOCK_MEETEI_MAYEK = 184, /*[ABC0]*/ + /** @stable ICU 4.4 */ + UBLOCK_HANGUL_JAMO_EXTENDED_B = 185, /*[D7B0]*/ + /** @stable ICU 4.4 */ + UBLOCK_IMPERIAL_ARAMAIC = 186, /*[10840]*/ + /** @stable ICU 4.4 */ + UBLOCK_OLD_SOUTH_ARABIAN = 187, /*[10A60]*/ + /** @stable ICU 4.4 */ + UBLOCK_AVESTAN = 188, /*[10B00]*/ + /** @stable ICU 4.4 */ + UBLOCK_INSCRIPTIONAL_PARTHIAN = 189, /*[10B40]*/ + /** @stable ICU 4.4 */ + UBLOCK_INSCRIPTIONAL_PAHLAVI = 190, /*[10B60]*/ + /** @stable ICU 4.4 */ + UBLOCK_OLD_TURKIC = 191, /*[10C00]*/ + /** @stable ICU 4.4 */ + UBLOCK_RUMI_NUMERAL_SYMBOLS = 192, /*[10E60]*/ + /** @stable ICU 4.4 */ + UBLOCK_KAITHI = 193, /*[11080]*/ + /** @stable ICU 4.4 */ + UBLOCK_EGYPTIAN_HIEROGLYPHS = 194, /*[13000]*/ + /** @stable ICU 4.4 */ + UBLOCK_ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 195, /*[1F100]*/ + /** @stable ICU 4.4 */ + UBLOCK_ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 196, /*[1F200]*/ + /** @stable ICU 4.4 */ + UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 197, /*[2A700]*/ + + /* New blocks in Unicode 6.0 */ + + /** @stable ICU 4.6 */ + UBLOCK_MANDAIC = 198, /*[0840]*/ + /** @stable ICU 4.6 */ + UBLOCK_BATAK = 199, /*[1BC0]*/ + /** @stable ICU 4.6 */ + UBLOCK_ETHIOPIC_EXTENDED_A = 200, /*[AB00]*/ + /** @stable ICU 4.6 */ + UBLOCK_BRAHMI = 201, /*[11000]*/ + /** @stable ICU 4.6 */ + UBLOCK_BAMUM_SUPPLEMENT = 202, /*[16800]*/ + /** @stable ICU 4.6 */ + UBLOCK_KANA_SUPPLEMENT = 203, /*[1B000]*/ + /** @stable ICU 4.6 */ + UBLOCK_PLAYING_CARDS = 204, /*[1F0A0]*/ + /** @stable ICU 4.6 */ + UBLOCK_MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 205, /*[1F300]*/ + /** @stable ICU 4.6 */ + UBLOCK_EMOTICONS = 206, /*[1F600]*/ + /** @stable ICU 4.6 */ + UBLOCK_TRANSPORT_AND_MAP_SYMBOLS = 207, /*[1F680]*/ + /** @stable ICU 4.6 */ + UBLOCK_ALCHEMICAL_SYMBOLS = 208, /*[1F700]*/ + /** @stable ICU 4.6 */ + UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 209, /*[2B740]*/ + + /* New blocks in Unicode 6.1 */ + + /** @stable ICU 49 */ + UBLOCK_ARABIC_EXTENDED_A = 210, /*[08A0]*/ + /** @stable ICU 49 */ + UBLOCK_ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 211, /*[1EE00]*/ + /** @stable ICU 49 */ + UBLOCK_CHAKMA = 212, /*[11100]*/ + /** @stable ICU 49 */ + UBLOCK_MEETEI_MAYEK_EXTENSIONS = 213, /*[AAE0]*/ + /** @stable ICU 49 */ + UBLOCK_MEROITIC_CURSIVE = 214, /*[109A0]*/ + /** @stable ICU 49 */ + UBLOCK_MEROITIC_HIEROGLYPHS = 215, /*[10980]*/ + /** @stable ICU 49 */ + UBLOCK_MIAO = 216, /*[16F00]*/ + /** @stable ICU 49 */ + UBLOCK_SHARADA = 217, /*[11180]*/ + /** @stable ICU 49 */ + UBLOCK_SORA_SOMPENG = 218, /*[110D0]*/ + /** @stable ICU 49 */ + UBLOCK_SUNDANESE_SUPPLEMENT = 219, /*[1CC0]*/ + /** @stable ICU 49 */ + UBLOCK_TAKRI = 220, /*[11680]*/ + + /* New blocks in Unicode 7.0 */ + + /** @stable ICU 54 */ + UBLOCK_BASSA_VAH = 221, /*[16AD0]*/ + /** @stable ICU 54 */ + UBLOCK_CAUCASIAN_ALBANIAN = 222, /*[10530]*/ + /** @stable ICU 54 */ + UBLOCK_COPTIC_EPACT_NUMBERS = 223, /*[102E0]*/ + /** @stable ICU 54 */ + UBLOCK_COMBINING_DIACRITICAL_MARKS_EXTENDED = 224, /*[1AB0]*/ + /** @stable ICU 54 */ + UBLOCK_DUPLOYAN = 225, /*[1BC00]*/ + /** @stable ICU 54 */ + UBLOCK_ELBASAN = 226, /*[10500]*/ + /** @stable ICU 54 */ + UBLOCK_GEOMETRIC_SHAPES_EXTENDED = 227, /*[1F780]*/ + /** @stable ICU 54 */ + UBLOCK_GRANTHA = 228, /*[11300]*/ + /** @stable ICU 54 */ + UBLOCK_KHOJKI = 229, /*[11200]*/ + /** @stable ICU 54 */ + UBLOCK_KHUDAWADI = 230, /*[112B0]*/ + /** @stable ICU 54 */ + UBLOCK_LATIN_EXTENDED_E = 231, /*[AB30]*/ + /** @stable ICU 54 */ + UBLOCK_LINEAR_A = 232, /*[10600]*/ + /** @stable ICU 54 */ + UBLOCK_MAHAJANI = 233, /*[11150]*/ + /** @stable ICU 54 */ + UBLOCK_MANICHAEAN = 234, /*[10AC0]*/ + /** @stable ICU 54 */ + UBLOCK_MENDE_KIKAKUI = 235, /*[1E800]*/ + /** @stable ICU 54 */ + UBLOCK_MODI = 236, /*[11600]*/ + /** @stable ICU 54 */ + UBLOCK_MRO = 237, /*[16A40]*/ + /** @stable ICU 54 */ + UBLOCK_MYANMAR_EXTENDED_B = 238, /*[A9E0]*/ + /** @stable ICU 54 */ + UBLOCK_NABATAEAN = 239, /*[10880]*/ + /** @stable ICU 54 */ + UBLOCK_OLD_NORTH_ARABIAN = 240, /*[10A80]*/ + /** @stable ICU 54 */ + UBLOCK_OLD_PERMIC = 241, /*[10350]*/ + /** @stable ICU 54 */ + UBLOCK_ORNAMENTAL_DINGBATS = 242, /*[1F650]*/ + /** @stable ICU 54 */ + UBLOCK_PAHAWH_HMONG = 243, /*[16B00]*/ + /** @stable ICU 54 */ + UBLOCK_PALMYRENE = 244, /*[10860]*/ + /** @stable ICU 54 */ + UBLOCK_PAU_CIN_HAU = 245, /*[11AC0]*/ + /** @stable ICU 54 */ + UBLOCK_PSALTER_PAHLAVI = 246, /*[10B80]*/ + /** @stable ICU 54 */ + UBLOCK_SHORTHAND_FORMAT_CONTROLS = 247, /*[1BCA0]*/ + /** @stable ICU 54 */ + UBLOCK_SIDDHAM = 248, /*[11580]*/ + /** @stable ICU 54 */ + UBLOCK_SINHALA_ARCHAIC_NUMBERS = 249, /*[111E0]*/ + /** @stable ICU 54 */ + UBLOCK_SUPPLEMENTAL_ARROWS_C = 250, /*[1F800]*/ + /** @stable ICU 54 */ + UBLOCK_TIRHUTA = 251, /*[11480]*/ + /** @stable ICU 54 */ + UBLOCK_WARANG_CITI = 252, /*[118A0]*/ + + /* New blocks in Unicode 8.0 */ + + /** @stable ICU 56 */ + UBLOCK_AHOM = 253, /*[11700]*/ + /** @stable ICU 56 */ + UBLOCK_ANATOLIAN_HIEROGLYPHS = 254, /*[14400]*/ + /** @stable ICU 56 */ + UBLOCK_CHEROKEE_SUPPLEMENT = 255, /*[AB70]*/ + /** @stable ICU 56 */ + UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E = 256, /*[2B820]*/ + /** @stable ICU 56 */ + UBLOCK_EARLY_DYNASTIC_CUNEIFORM = 257, /*[12480]*/ + /** @stable ICU 56 */ + UBLOCK_HATRAN = 258, /*[108E0]*/ + /** @stable ICU 56 */ + UBLOCK_MULTANI = 259, /*[11280]*/ + /** @stable ICU 56 */ + UBLOCK_OLD_HUNGARIAN = 260, /*[10C80]*/ + /** @stable ICU 56 */ + UBLOCK_SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS = 261, /*[1F900]*/ + /** @stable ICU 56 */ + UBLOCK_SUTTON_SIGNWRITING = 262, /*[1D800]*/ + + /* New blocks in Unicode 9.0 */ + + /** @stable ICU 58 */ + UBLOCK_ADLAM = 263, /*[1E900]*/ + /** @stable ICU 58 */ + UBLOCK_BHAIKSUKI = 264, /*[11C00]*/ + /** @stable ICU 58 */ + UBLOCK_CYRILLIC_EXTENDED_C = 265, /*[1C80]*/ + /** @stable ICU 58 */ + UBLOCK_GLAGOLITIC_SUPPLEMENT = 266, /*[1E000]*/ + /** @stable ICU 58 */ + UBLOCK_IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION = 267, /*[16FE0]*/ + /** @stable ICU 58 */ + UBLOCK_MARCHEN = 268, /*[11C70]*/ + /** @stable ICU 58 */ + UBLOCK_MONGOLIAN_SUPPLEMENT = 269, /*[11660]*/ + /** @stable ICU 58 */ + UBLOCK_NEWA = 270, /*[11400]*/ + /** @stable ICU 58 */ + UBLOCK_OSAGE = 271, /*[104B0]*/ + /** @stable ICU 58 */ + UBLOCK_TANGUT = 272, /*[17000]*/ + /** @stable ICU 58 */ + UBLOCK_TANGUT_COMPONENTS = 273, /*[18800]*/ + + // New blocks in Unicode 10.0 + + /** @stable ICU 60 */ + UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F = 274, /*[2CEB0]*/ + /** @stable ICU 60 */ + UBLOCK_KANA_EXTENDED_A = 275, /*[1B100]*/ + /** @stable ICU 60 */ + UBLOCK_MASARAM_GONDI = 276, /*[11D00]*/ + /** @stable ICU 60 */ + UBLOCK_NUSHU = 277, /*[1B170]*/ + /** @stable ICU 60 */ + UBLOCK_SOYOMBO = 278, /*[11A50]*/ + /** @stable ICU 60 */ + UBLOCK_SYRIAC_SUPPLEMENT = 279, /*[0860]*/ + /** @stable ICU 60 */ + UBLOCK_ZANABAZAR_SQUARE = 280, /*[11A00]*/ + +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal UBlockCode value. + * The highest value is available via u_getIntPropertyMaxValue(UCHAR_BLOCK). + * + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + UBLOCK_COUNT = 281, +#endif // U_HIDE_DEPRECATED_API + + /** @stable ICU 2.0 */ + UBLOCK_INVALID_CODE=-1 +}; + +/** @stable ICU 2.0 */ +typedef enum UBlockCode UBlockCode; + +/** + * East Asian Width constants. + * + * @see UCHAR_EAST_ASIAN_WIDTH + * @see u_getIntPropertyValue + * @stable ICU 2.2 + */ +typedef enum UEastAsianWidth { + /* + * Note: UEastAsianWidth constants are parsed by preparseucd.py. + * It matches lines like + * U_EA_<Unicode East_Asian_Width value name> + */ + + U_EA_NEUTRAL, /*[N]*/ + U_EA_AMBIGUOUS, /*[A]*/ + U_EA_HALFWIDTH, /*[H]*/ + U_EA_FULLWIDTH, /*[F]*/ + U_EA_NARROW, /*[Na]*/ + U_EA_WIDE, /*[W]*/ +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal UEastAsianWidth value. + * The highest value is available via u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH). + * + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + U_EA_COUNT +#endif // U_HIDE_DEPRECATED_API +} UEastAsianWidth; + +/** + * Selector constants for u_charName(). + * u_charName() returns the "modern" name of a + * Unicode character; or the name that was defined in + * Unicode version 1.0, before the Unicode standard merged + * with ISO-10646; or an "extended" name that gives each + * Unicode code point a unique name. + * + * @see u_charName + * @stable ICU 2.0 + */ +typedef enum UCharNameChoice { + /** Unicode character name (Name property). @stable ICU 2.0 */ + U_UNICODE_CHAR_NAME, +#ifndef U_HIDE_DEPRECATED_API + /** + * The Unicode_1_Name property value which is of little practical value. + * Beginning with ICU 49, ICU APIs return an empty string for this name choice. + * @deprecated ICU 49 + */ + U_UNICODE_10_CHAR_NAME, +#endif /* U_HIDE_DEPRECATED_API */ + /** Standard or synthetic character name. @stable ICU 2.0 */ + U_EXTENDED_CHAR_NAME = U_UNICODE_CHAR_NAME+2, + /** Corrected name from NameAliases.txt. @stable ICU 4.4 */ + U_CHAR_NAME_ALIAS, +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal UCharNameChoice value. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + U_CHAR_NAME_CHOICE_COUNT +#endif // U_HIDE_DEPRECATED_API +} UCharNameChoice; + +/** + * Selector constants for u_getPropertyName() and + * u_getPropertyValueName(). These selectors are used to choose which + * name is returned for a given property or value. All properties and + * values have a long name. Most have a short name, but some do not. + * Unicode allows for additional names, beyond the long and short + * name, which would be indicated by U_LONG_PROPERTY_NAME + i, where + * i=1, 2,... + * + * @see u_getPropertyName() + * @see u_getPropertyValueName() + * @stable ICU 2.4 + */ +typedef enum UPropertyNameChoice { + U_SHORT_PROPERTY_NAME, + U_LONG_PROPERTY_NAME, +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal UPropertyNameChoice value. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + U_PROPERTY_NAME_CHOICE_COUNT +#endif // U_HIDE_DEPRECATED_API +} UPropertyNameChoice; + +/** + * Decomposition Type constants. + * + * @see UCHAR_DECOMPOSITION_TYPE + * @stable ICU 2.2 + */ +typedef enum UDecompositionType { + /* + * Note: UDecompositionType constants are parsed by preparseucd.py. + * It matches lines like + * U_DT_<Unicode Decomposition_Type value name> + */ + + U_DT_NONE, /*[none]*/ + U_DT_CANONICAL, /*[can]*/ + U_DT_COMPAT, /*[com]*/ + U_DT_CIRCLE, /*[enc]*/ + U_DT_FINAL, /*[fin]*/ + U_DT_FONT, /*[font]*/ + U_DT_FRACTION, /*[fra]*/ + U_DT_INITIAL, /*[init]*/ + U_DT_ISOLATED, /*[iso]*/ + U_DT_MEDIAL, /*[med]*/ + U_DT_NARROW, /*[nar]*/ + U_DT_NOBREAK, /*[nb]*/ + U_DT_SMALL, /*[sml]*/ + U_DT_SQUARE, /*[sqr]*/ + U_DT_SUB, /*[sub]*/ + U_DT_SUPER, /*[sup]*/ + U_DT_VERTICAL, /*[vert]*/ + U_DT_WIDE, /*[wide]*/ +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal UDecompositionType value. + * The highest value is available via u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE). + * + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + U_DT_COUNT /* 18 */ +#endif // U_HIDE_DEPRECATED_API +} UDecompositionType; + +/** + * Joining Type constants. + * + * @see UCHAR_JOINING_TYPE + * @stable ICU 2.2 + */ +typedef enum UJoiningType { + /* + * Note: UJoiningType constants are parsed by preparseucd.py. + * It matches lines like + * U_JT_<Unicode Joining_Type value name> + */ + + U_JT_NON_JOINING, /*[U]*/ + U_JT_JOIN_CAUSING, /*[C]*/ + U_JT_DUAL_JOINING, /*[D]*/ + U_JT_LEFT_JOINING, /*[L]*/ + U_JT_RIGHT_JOINING, /*[R]*/ + U_JT_TRANSPARENT, /*[T]*/ +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal UJoiningType value. + * The highest value is available via u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE). + * + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + U_JT_COUNT /* 6 */ +#endif // U_HIDE_DEPRECATED_API +} UJoiningType; + +/** + * Joining Group constants. + * + * @see UCHAR_JOINING_GROUP + * @stable ICU 2.2 + */ +typedef enum UJoiningGroup { + /* + * Note: UJoiningGroup constants are parsed by preparseucd.py. + * It matches lines like + * U_JG_<Unicode Joining_Group value name> + */ + + U_JG_NO_JOINING_GROUP, + U_JG_AIN, + U_JG_ALAPH, + U_JG_ALEF, + U_JG_BEH, + U_JG_BETH, + U_JG_DAL, + U_JG_DALATH_RISH, + U_JG_E, + U_JG_FEH, + U_JG_FINAL_SEMKATH, + U_JG_GAF, + U_JG_GAMAL, + U_JG_HAH, + U_JG_TEH_MARBUTA_GOAL, /**< @stable ICU 4.6 */ + U_JG_HAMZA_ON_HEH_GOAL=U_JG_TEH_MARBUTA_GOAL, + U_JG_HE, + U_JG_HEH, + U_JG_HEH_GOAL, + U_JG_HETH, + U_JG_KAF, + U_JG_KAPH, + U_JG_KNOTTED_HEH, + U_JG_LAM, + U_JG_LAMADH, + U_JG_MEEM, + U_JG_MIM, + U_JG_NOON, + U_JG_NUN, + U_JG_PE, + U_JG_QAF, + U_JG_QAPH, + U_JG_REH, + U_JG_REVERSED_PE, + U_JG_SAD, + U_JG_SADHE, + U_JG_SEEN, + U_JG_SEMKATH, + U_JG_SHIN, + U_JG_SWASH_KAF, + U_JG_SYRIAC_WAW, + U_JG_TAH, + U_JG_TAW, + U_JG_TEH_MARBUTA, + U_JG_TETH, + U_JG_WAW, + U_JG_YEH, + U_JG_YEH_BARREE, + U_JG_YEH_WITH_TAIL, + U_JG_YUDH, + U_JG_YUDH_HE, + U_JG_ZAIN, + U_JG_FE, /**< @stable ICU 2.6 */ + U_JG_KHAPH, /**< @stable ICU 2.6 */ + U_JG_ZHAIN, /**< @stable ICU 2.6 */ + U_JG_BURUSHASKI_YEH_BARREE, /**< @stable ICU 4.0 */ + U_JG_FARSI_YEH, /**< @stable ICU 4.4 */ + U_JG_NYA, /**< @stable ICU 4.4 */ + U_JG_ROHINGYA_YEH, /**< @stable ICU 49 */ + U_JG_MANICHAEAN_ALEPH, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_AYIN, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_BETH, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_DALETH, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_DHAMEDH, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_FIVE, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_GIMEL, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_HETH, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_HUNDRED, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_KAPH, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_LAMEDH, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_MEM, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_NUN, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_ONE, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_PE, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_QOPH, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_RESH, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_SADHE, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_SAMEKH, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_TAW, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_TEN, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_TETH, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_THAMEDH, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_TWENTY, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_WAW, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_YODH, /**< @stable ICU 54 */ + U_JG_MANICHAEAN_ZAYIN, /**< @stable ICU 54 */ + U_JG_STRAIGHT_WAW, /**< @stable ICU 54 */ + U_JG_AFRICAN_FEH, /**< @stable ICU 58 */ + U_JG_AFRICAN_NOON, /**< @stable ICU 58 */ + U_JG_AFRICAN_QAF, /**< @stable ICU 58 */ + + U_JG_MALAYALAM_BHA, /**< @stable ICU 60 */ + U_JG_MALAYALAM_JA, /**< @stable ICU 60 */ + U_JG_MALAYALAM_LLA, /**< @stable ICU 60 */ + U_JG_MALAYALAM_LLLA, /**< @stable ICU 60 */ + U_JG_MALAYALAM_NGA, /**< @stable ICU 60 */ + U_JG_MALAYALAM_NNA, /**< @stable ICU 60 */ + U_JG_MALAYALAM_NNNA, /**< @stable ICU 60 */ + U_JG_MALAYALAM_NYA, /**< @stable ICU 60 */ + U_JG_MALAYALAM_RA, /**< @stable ICU 60 */ + U_JG_MALAYALAM_SSA, /**< @stable ICU 60 */ + U_JG_MALAYALAM_TTA, /**< @stable ICU 60 */ + +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal UJoiningGroup value. + * The highest value is available via u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP). + * + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + U_JG_COUNT +#endif // U_HIDE_DEPRECATED_API +} UJoiningGroup; + +/** + * Grapheme Cluster Break constants. + * + * @see UCHAR_GRAPHEME_CLUSTER_BREAK + * @stable ICU 3.4 + */ +typedef enum UGraphemeClusterBreak { + /* + * Note: UGraphemeClusterBreak constants are parsed by preparseucd.py. + * It matches lines like + * U_GCB_<Unicode Grapheme_Cluster_Break value name> + */ + + U_GCB_OTHER = 0, /*[XX]*/ + U_GCB_CONTROL = 1, /*[CN]*/ + U_GCB_CR = 2, /*[CR]*/ + U_GCB_EXTEND = 3, /*[EX]*/ + U_GCB_L = 4, /*[L]*/ + U_GCB_LF = 5, /*[LF]*/ + U_GCB_LV = 6, /*[LV]*/ + U_GCB_LVT = 7, /*[LVT]*/ + U_GCB_T = 8, /*[T]*/ + U_GCB_V = 9, /*[V]*/ + /** @stable ICU 4.0 */ + U_GCB_SPACING_MARK = 10, /*[SM]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */ + /** @stable ICU 4.0 */ + U_GCB_PREPEND = 11, /*[PP]*/ + /** @stable ICU 50 */ + U_GCB_REGIONAL_INDICATOR = 12, /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ + /** @stable ICU 58 */ + U_GCB_E_BASE = 13, /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */ + /** @stable ICU 58 */ + U_GCB_E_BASE_GAZ = 14, /*[EBG]*/ + /** @stable ICU 58 */ + U_GCB_E_MODIFIER = 15, /*[EM]*/ + /** @stable ICU 58 */ + U_GCB_GLUE_AFTER_ZWJ = 16, /*[GAZ]*/ + /** @stable ICU 58 */ + U_GCB_ZWJ = 17, /*[ZWJ]*/ +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal UGraphemeClusterBreak value. + * The highest value is available via u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK). + * + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + U_GCB_COUNT = 18 +#endif // U_HIDE_DEPRECATED_API +} UGraphemeClusterBreak; + +/** + * Word Break constants. + * (UWordBreak is a pre-existing enum type in ubrk.h for word break status tags.) + * + * @see UCHAR_WORD_BREAK + * @stable ICU 3.4 + */ +typedef enum UWordBreakValues { + /* + * Note: UWordBreakValues constants are parsed by preparseucd.py. + * It matches lines like + * U_WB_<Unicode Word_Break value name> + */ + + U_WB_OTHER = 0, /*[XX]*/ + U_WB_ALETTER = 1, /*[LE]*/ + U_WB_FORMAT = 2, /*[FO]*/ + U_WB_KATAKANA = 3, /*[KA]*/ + U_WB_MIDLETTER = 4, /*[ML]*/ + U_WB_MIDNUM = 5, /*[MN]*/ + U_WB_NUMERIC = 6, /*[NU]*/ + U_WB_EXTENDNUMLET = 7, /*[EX]*/ + /** @stable ICU 4.0 */ + U_WB_CR = 8, /*[CR]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */ + /** @stable ICU 4.0 */ + U_WB_EXTEND = 9, /*[Extend]*/ + /** @stable ICU 4.0 */ + U_WB_LF = 10, /*[LF]*/ + /** @stable ICU 4.0 */ + U_WB_MIDNUMLET =11, /*[MB]*/ + /** @stable ICU 4.0 */ + U_WB_NEWLINE =12, /*[NL]*/ + /** @stable ICU 50 */ + U_WB_REGIONAL_INDICATOR = 13, /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ + /** @stable ICU 52 */ + U_WB_HEBREW_LETTER = 14, /*[HL]*/ /* from here on: new in Unicode 6.3/ICU 52 */ + /** @stable ICU 52 */ + U_WB_SINGLE_QUOTE = 15, /*[SQ]*/ + /** @stable ICU 52 */ + U_WB_DOUBLE_QUOTE = 16, /*[DQ]*/ + /** @stable ICU 58 */ + U_WB_E_BASE = 17, /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */ + /** @stable ICU 58 */ + U_WB_E_BASE_GAZ = 18, /*[EBG]*/ + /** @stable ICU 58 */ + U_WB_E_MODIFIER = 19, /*[EM]*/ + /** @stable ICU 58 */ + U_WB_GLUE_AFTER_ZWJ = 20, /*[GAZ]*/ + /** @stable ICU 58 */ + U_WB_ZWJ = 21, /*[ZWJ]*/ +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal UWordBreakValues value. + * The highest value is available via u_getIntPropertyMaxValue(UCHAR_WORD_BREAK). + * + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + U_WB_COUNT = 22 +#endif // U_HIDE_DEPRECATED_API +} UWordBreakValues; + +/** + * Sentence Break constants. + * + * @see UCHAR_SENTENCE_BREAK + * @stable ICU 3.4 + */ +typedef enum USentenceBreak { + /* + * Note: USentenceBreak constants are parsed by preparseucd.py. + * It matches lines like + * U_SB_<Unicode Sentence_Break value name> + */ + + U_SB_OTHER = 0, /*[XX]*/ + U_SB_ATERM = 1, /*[AT]*/ + U_SB_CLOSE = 2, /*[CL]*/ + U_SB_FORMAT = 3, /*[FO]*/ + U_SB_LOWER = 4, /*[LO]*/ + U_SB_NUMERIC = 5, /*[NU]*/ + U_SB_OLETTER = 6, /*[LE]*/ + U_SB_SEP = 7, /*[SE]*/ + U_SB_SP = 8, /*[SP]*/ + U_SB_STERM = 9, /*[ST]*/ + U_SB_UPPER = 10, /*[UP]*/ + U_SB_CR = 11, /*[CR]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */ + U_SB_EXTEND = 12, /*[EX]*/ + U_SB_LF = 13, /*[LF]*/ + U_SB_SCONTINUE = 14, /*[SC]*/ +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal USentenceBreak value. + * The highest value is available via u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK). + * + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + U_SB_COUNT = 15 +#endif // U_HIDE_DEPRECATED_API +} USentenceBreak; + +/** + * Line Break constants. + * + * @see UCHAR_LINE_BREAK + * @stable ICU 2.2 + */ +typedef enum ULineBreak { + /* + * Note: ULineBreak constants are parsed by preparseucd.py. + * It matches lines like + * U_LB_<Unicode Line_Break value name> + */ + + U_LB_UNKNOWN = 0, /*[XX]*/ + U_LB_AMBIGUOUS = 1, /*[AI]*/ + U_LB_ALPHABETIC = 2, /*[AL]*/ + U_LB_BREAK_BOTH = 3, /*[B2]*/ + U_LB_BREAK_AFTER = 4, /*[BA]*/ + U_LB_BREAK_BEFORE = 5, /*[BB]*/ + U_LB_MANDATORY_BREAK = 6, /*[BK]*/ + U_LB_CONTINGENT_BREAK = 7, /*[CB]*/ + U_LB_CLOSE_PUNCTUATION = 8, /*[CL]*/ + U_LB_COMBINING_MARK = 9, /*[CM]*/ + U_LB_CARRIAGE_RETURN = 10, /*[CR]*/ + U_LB_EXCLAMATION = 11, /*[EX]*/ + U_LB_GLUE = 12, /*[GL]*/ + U_LB_HYPHEN = 13, /*[HY]*/ + U_LB_IDEOGRAPHIC = 14, /*[ID]*/ + /** Renamed from the misspelled "inseperable" in Unicode 4.0.1/ICU 3.0 @stable ICU 3.0 */ + U_LB_INSEPARABLE = 15, /*[IN]*/ + U_LB_INSEPERABLE = U_LB_INSEPARABLE, + U_LB_INFIX_NUMERIC = 16, /*[IS]*/ + U_LB_LINE_FEED = 17, /*[LF]*/ + U_LB_NONSTARTER = 18, /*[NS]*/ + U_LB_NUMERIC = 19, /*[NU]*/ + U_LB_OPEN_PUNCTUATION = 20, /*[OP]*/ + U_LB_POSTFIX_NUMERIC = 21, /*[PO]*/ + U_LB_PREFIX_NUMERIC = 22, /*[PR]*/ + U_LB_QUOTATION = 23, /*[QU]*/ + U_LB_COMPLEX_CONTEXT = 24, /*[SA]*/ + U_LB_SURROGATE = 25, /*[SG]*/ + U_LB_SPACE = 26, /*[SP]*/ + U_LB_BREAK_SYMBOLS = 27, /*[SY]*/ + U_LB_ZWSPACE = 28, /*[ZW]*/ + /** @stable ICU 2.6 */ + U_LB_NEXT_LINE = 29, /*[NL]*/ /* from here on: new in Unicode 4/ICU 2.6 */ + /** @stable ICU 2.6 */ + U_LB_WORD_JOINER = 30, /*[WJ]*/ + /** @stable ICU 3.4 */ + U_LB_H2 = 31, /*[H2]*/ /* from here on: new in Unicode 4.1/ICU 3.4 */ + /** @stable ICU 3.4 */ + U_LB_H3 = 32, /*[H3]*/ + /** @stable ICU 3.4 */ + U_LB_JL = 33, /*[JL]*/ + /** @stable ICU 3.4 */ + U_LB_JT = 34, /*[JT]*/ + /** @stable ICU 3.4 */ + U_LB_JV = 35, /*[JV]*/ + /** @stable ICU 4.4 */ + U_LB_CLOSE_PARENTHESIS = 36, /*[CP]*/ /* new in Unicode 5.2/ICU 4.4 */ + /** @stable ICU 49 */ + U_LB_CONDITIONAL_JAPANESE_STARTER = 37,/*[CJ]*/ /* new in Unicode 6.1/ICU 49 */ + /** @stable ICU 49 */ + U_LB_HEBREW_LETTER = 38, /*[HL]*/ /* new in Unicode 6.1/ICU 49 */ + /** @stable ICU 50 */ + U_LB_REGIONAL_INDICATOR = 39,/*[RI]*/ /* new in Unicode 6.2/ICU 50 */ + /** @stable ICU 58 */ + U_LB_E_BASE = 40, /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */ + /** @stable ICU 58 */ + U_LB_E_MODIFIER = 41, /*[EM]*/ + /** @stable ICU 58 */ + U_LB_ZWJ = 42, /*[ZWJ]*/ +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal ULineBreak value. + * The highest value is available via u_getIntPropertyMaxValue(UCHAR_LINE_BREAK). + * + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + U_LB_COUNT = 43 +#endif // U_HIDE_DEPRECATED_API +} ULineBreak; + +/** + * Numeric Type constants. + * + * @see UCHAR_NUMERIC_TYPE + * @stable ICU 2.2 + */ +typedef enum UNumericType { + /* + * Note: UNumericType constants are parsed by preparseucd.py. + * It matches lines like + * U_NT_<Unicode Numeric_Type value name> + */ + + U_NT_NONE, /*[None]*/ + U_NT_DECIMAL, /*[de]*/ + U_NT_DIGIT, /*[di]*/ + U_NT_NUMERIC, /*[nu]*/ +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal UNumericType value. + * The highest value is available via u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE). + * + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + U_NT_COUNT +#endif // U_HIDE_DEPRECATED_API +} UNumericType; + +/** + * Hangul Syllable Type constants. + * + * @see UCHAR_HANGUL_SYLLABLE_TYPE + * @stable ICU 2.6 + */ +typedef enum UHangulSyllableType { + /* + * Note: UHangulSyllableType constants are parsed by preparseucd.py. + * It matches lines like + * U_HST_<Unicode Hangul_Syllable_Type value name> + */ + + U_HST_NOT_APPLICABLE, /*[NA]*/ + U_HST_LEADING_JAMO, /*[L]*/ + U_HST_VOWEL_JAMO, /*[V]*/ + U_HST_TRAILING_JAMO, /*[T]*/ + U_HST_LV_SYLLABLE, /*[LV]*/ + U_HST_LVT_SYLLABLE, /*[LVT]*/ +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal UHangulSyllableType value. + * The highest value is available via u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE). + * + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + U_HST_COUNT +#endif // U_HIDE_DEPRECATED_API +} UHangulSyllableType; + +/** + * Check a binary Unicode property for a code point. + * + * Unicode, especially in version 3.2, defines many more properties than the + * original set in UnicodeData.txt. + * + * The properties APIs are intended to reflect Unicode properties as defined + * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR). + * For details about the properties see http://www.unicode.org/ucd/ . + * For names of Unicode properties see the UCD file PropertyAliases.txt. + * + * Important: If ICU is built with UCD files from Unicode versions below 3.2, + * then properties marked with "new in Unicode 3.2" are not or not fully available. + * + * @param c Code point to test. + * @param which UProperty selector constant, identifies which binary property to check. + * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT. + * @return TRUE or FALSE according to the binary Unicode property value for c. + * Also FALSE if 'which' is out of bounds or if the Unicode version + * does not have data for the property at all, or not for this code point. + * + * @see UProperty + * @see u_getIntPropertyValue + * @see u_getUnicodeVersion + * @stable ICU 2.1 + */ +U_STABLE UBool U_EXPORT2 +u_hasBinaryProperty(UChar32 c, UProperty which); + +/** + * Check if a code point has the Alphabetic Unicode property. + * Same as u_hasBinaryProperty(c, UCHAR_ALPHABETIC). + * This is different from u_isalpha! + * @param c Code point to test + * @return true if the code point has the Alphabetic Unicode property, false otherwise + * + * @see UCHAR_ALPHABETIC + * @see u_isalpha + * @see u_hasBinaryProperty + * @stable ICU 2.1 + */ +U_STABLE UBool U_EXPORT2 +u_isUAlphabetic(UChar32 c); + +/** + * Check if a code point has the Lowercase Unicode property. + * Same as u_hasBinaryProperty(c, UCHAR_LOWERCASE). + * This is different from u_islower! + * @param c Code point to test + * @return true if the code point has the Lowercase Unicode property, false otherwise + * + * @see UCHAR_LOWERCASE + * @see u_islower + * @see u_hasBinaryProperty + * @stable ICU 2.1 + */ +U_STABLE UBool U_EXPORT2 +u_isULowercase(UChar32 c); + +/** + * Check if a code point has the Uppercase Unicode property. + * Same as u_hasBinaryProperty(c, UCHAR_UPPERCASE). + * This is different from u_isupper! + * @param c Code point to test + * @return true if the code point has the Uppercase Unicode property, false otherwise + * + * @see UCHAR_UPPERCASE + * @see u_isupper + * @see u_hasBinaryProperty + * @stable ICU 2.1 + */ +U_STABLE UBool U_EXPORT2 +u_isUUppercase(UChar32 c); + +/** + * Check if a code point has the White_Space Unicode property. + * Same as u_hasBinaryProperty(c, UCHAR_WHITE_SPACE). + * This is different from both u_isspace and u_isWhitespace! + * + * Note: There are several ICU whitespace functions; please see the uchar.h + * file documentation for a detailed comparison. + * + * @param c Code point to test + * @return true if the code point has the White_Space Unicode property, false otherwise. + * + * @see UCHAR_WHITE_SPACE + * @see u_isWhitespace + * @see u_isspace + * @see u_isJavaSpaceChar + * @see u_hasBinaryProperty + * @stable ICU 2.1 + */ +U_STABLE UBool U_EXPORT2 +u_isUWhiteSpace(UChar32 c); + +/** + * Get the property value for an enumerated or integer Unicode property for a code point. + * Also returns binary and mask property values. + * + * Unicode, especially in version 3.2, defines many more properties than the + * original set in UnicodeData.txt. + * + * The properties APIs are intended to reflect Unicode properties as defined + * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR). + * For details about the properties see http://www.unicode.org/ . + * For names of Unicode properties see the UCD file PropertyAliases.txt. + * + * Sample usage: + * UEastAsianWidth ea=(UEastAsianWidth)u_getIntPropertyValue(c, UCHAR_EAST_ASIAN_WIDTH); + * UBool b=(UBool)u_getIntPropertyValue(c, UCHAR_IDEOGRAPHIC); + * + * @param c Code point to test. + * @param which UProperty selector constant, identifies which property to check. + * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT + * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT + * or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT. + * @return Numeric value that is directly the property value or, + * for enumerated properties, corresponds to the numeric value of the enumerated + * constant of the respective property value enumeration type + * (cast to enum type if necessary). + * Returns 0 or 1 (for FALSE/TRUE) for binary Unicode properties. + * Returns a bit-mask for mask properties. + * Returns 0 if 'which' is out of bounds or if the Unicode version + * does not have data for the property at all, or not for this code point. + * + * @see UProperty + * @see u_hasBinaryProperty + * @see u_getIntPropertyMinValue + * @see u_getIntPropertyMaxValue + * @see u_getUnicodeVersion + * @stable ICU 2.2 + */ +U_STABLE int32_t U_EXPORT2 +u_getIntPropertyValue(UChar32 c, UProperty which); + +/** + * Get the minimum value for an enumerated/integer/binary Unicode property. + * Can be used together with u_getIntPropertyMaxValue + * to allocate arrays of UnicodeSet or similar. + * + * @param which UProperty selector constant, identifies which binary property to check. + * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT + * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT. + * @return Minimum value returned by u_getIntPropertyValue for a Unicode property. + * 0 if the property selector is out of range. + * + * @see UProperty + * @see u_hasBinaryProperty + * @see u_getUnicodeVersion + * @see u_getIntPropertyMaxValue + * @see u_getIntPropertyValue + * @stable ICU 2.2 + */ +U_STABLE int32_t U_EXPORT2 +u_getIntPropertyMinValue(UProperty which); + +/** + * Get the maximum value for an enumerated/integer/binary Unicode property. + * Can be used together with u_getIntPropertyMinValue + * to allocate arrays of UnicodeSet or similar. + * + * Examples for min/max values (for Unicode 3.2): + * + * - UCHAR_BIDI_CLASS: 0/18 (U_LEFT_TO_RIGHT/U_BOUNDARY_NEUTRAL) + * - UCHAR_SCRIPT: 0/45 (USCRIPT_COMMON/USCRIPT_TAGBANWA) + * - UCHAR_IDEOGRAPHIC: 0/1 (FALSE/TRUE) + * + * For undefined UProperty constant values, min/max values will be 0/-1. + * + * @param which UProperty selector constant, identifies which binary property to check. + * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT + * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT. + * @return Maximum value returned by u_getIntPropertyValue for a Unicode property. + * <=0 if the property selector is out of range. + * + * @see UProperty + * @see u_hasBinaryProperty + * @see u_getUnicodeVersion + * @see u_getIntPropertyMaxValue + * @see u_getIntPropertyValue + * @stable ICU 2.2 + */ +U_STABLE int32_t U_EXPORT2 +u_getIntPropertyMaxValue(UProperty which); + +/** + * Get the numeric value for a Unicode code point as defined in the + * Unicode Character Database. + * + * A "double" return type is necessary because + * some numeric values are fractions, negative, or too large for int32_t. + * + * For characters without any numeric values in the Unicode Character Database, + * this function will return U_NO_NUMERIC_VALUE. + * Note: This is different from the Unicode Standard which specifies NaN as the default value. + * (NaN is not available on all platforms.) + * + * Similar to java.lang.Character.getNumericValue(), but u_getNumericValue() + * also supports negative values, large values, and fractions, + * while Java's getNumericValue() returns values 10..35 for ASCII letters. + * + * @param c Code point to get the numeric value for. + * @return Numeric value of c, or U_NO_NUMERIC_VALUE if none is defined. + * + * @see U_NO_NUMERIC_VALUE + * @stable ICU 2.2 + */ +U_STABLE double U_EXPORT2 +u_getNumericValue(UChar32 c); + +/** + * Special value that is returned by u_getNumericValue when + * no numeric value is defined for a code point. + * + * @see u_getNumericValue + * @stable ICU 2.2 + */ +#define U_NO_NUMERIC_VALUE ((double)-123456789.) + +/** + * Determines whether the specified code point has the general category "Ll" + * (lowercase letter). + * + * Same as java.lang.Character.isLowerCase(). + * + * This misses some characters that are also lowercase but + * have a different general category value. + * In order to include those, use UCHAR_LOWERCASE. + * + * In addition to being equivalent to a Java function, this also serves + * as a C/POSIX migration function. + * See the comments about C/POSIX character classification functions in the + * documentation at the top of this header file. + * + * @param c the code point to be tested + * @return TRUE if the code point is an Ll lowercase letter + * + * @see UCHAR_LOWERCASE + * @see u_isupper + * @see u_istitle + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +u_islower(UChar32 c); + +/** + * Determines whether the specified code point has the general category "Lu" + * (uppercase letter). + * + * Same as java.lang.Character.isUpperCase(). + * + * This misses some characters that are also uppercase but + * have a different general category value. + * In order to include those, use UCHAR_UPPERCASE. + * + * In addition to being equivalent to a Java function, this also serves + * as a C/POSIX migration function. + * See the comments about C/POSIX character classification functions in the + * documentation at the top of this header file. + * + * @param c the code point to be tested + * @return TRUE if the code point is an Lu uppercase letter + * + * @see UCHAR_UPPERCASE + * @see u_islower + * @see u_istitle + * @see u_tolower + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +u_isupper(UChar32 c); + +/** + * Determines whether the specified code point is a titlecase letter. + * True for general category "Lt" (titlecase letter). + * + * Same as java.lang.Character.isTitleCase(). + * + * @param c the code point to be tested + * @return TRUE if the code point is an Lt titlecase letter + * + * @see u_isupper + * @see u_islower + * @see u_totitle + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +u_istitle(UChar32 c); + +/** + * Determines whether the specified code point is a digit character according to Java. + * True for characters with general category "Nd" (decimal digit numbers). + * Beginning with Unicode 4, this is the same as + * testing for the Numeric_Type of Decimal. + * + * Same as java.lang.Character.isDigit(). + * + * In addition to being equivalent to a Java function, this also serves + * as a C/POSIX migration function. + * See the comments about C/POSIX character classification functions in the + * documentation at the top of this header file. + * + * @param c the code point to be tested + * @return TRUE if the code point is a digit character according to Character.isDigit() + * + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +u_isdigit(UChar32 c); + +/** + * Determines whether the specified code point is a letter character. + * True for general categories "L" (letters). + * + * Same as java.lang.Character.isLetter(). + * + * In addition to being equivalent to a Java function, this also serves + * as a C/POSIX migration function. + * See the comments about C/POSIX character classification functions in the + * documentation at the top of this header file. + * + * @param c the code point to be tested + * @return TRUE if the code point is a letter character + * + * @see u_isdigit + * @see u_isalnum + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +u_isalpha(UChar32 c); + +/** + * Determines whether the specified code point is an alphanumeric character + * (letter or digit) according to Java. + * True for characters with general categories + * "L" (letters) and "Nd" (decimal digit numbers). + * + * Same as java.lang.Character.isLetterOrDigit(). + * + * In addition to being equivalent to a Java function, this also serves + * as a C/POSIX migration function. + * See the comments about C/POSIX character classification functions in the + * documentation at the top of this header file. + * + * @param c the code point to be tested + * @return TRUE if the code point is an alphanumeric character according to Character.isLetterOrDigit() + * + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +u_isalnum(UChar32 c); + +/** + * Determines whether the specified code point is a hexadecimal digit. + * This is equivalent to u_digit(c, 16)>=0. + * True for characters with general category "Nd" (decimal digit numbers) + * as well as Latin letters a-f and A-F in both ASCII and Fullwidth ASCII. + * (That is, for letters with code points + * 0041..0046, 0061..0066, FF21..FF26, FF41..FF46.) + * + * In order to narrow the definition of hexadecimal digits to only ASCII + * characters, use (c<=0x7f && u_isxdigit(c)). + * + * This is a C/POSIX migration function. + * See the comments about C/POSIX character classification functions in the + * documentation at the top of this header file. + * + * @param c the code point to be tested + * @return TRUE if the code point is a hexadecimal digit + * + * @stable ICU 2.6 + */ +U_STABLE UBool U_EXPORT2 +u_isxdigit(UChar32 c); + +/** + * Determines whether the specified code point is a punctuation character. + * True for characters with general categories "P" (punctuation). + * + * This is a C/POSIX migration function. + * See the comments about C/POSIX character classification functions in the + * documentation at the top of this header file. + * + * @param c the code point to be tested + * @return TRUE if the code point is a punctuation character + * + * @stable ICU 2.6 + */ +U_STABLE UBool U_EXPORT2 +u_ispunct(UChar32 c); + +/** + * Determines whether the specified code point is a "graphic" character + * (printable, excluding spaces). + * TRUE for all characters except those with general categories + * "Cc" (control codes), "Cf" (format controls), "Cs" (surrogates), + * "Cn" (unassigned), and "Z" (separators). + * + * This is a C/POSIX migration function. + * See the comments about C/POSIX character classification functions in the + * documentation at the top of this header file. + * + * @param c the code point to be tested + * @return TRUE if the code point is a "graphic" character + * + * @stable ICU 2.6 + */ +U_STABLE UBool U_EXPORT2 +u_isgraph(UChar32 c); + +/** + * Determines whether the specified code point is a "blank" or "horizontal space", + * a character that visibly separates words on a line. + * The following are equivalent definitions: + * + * TRUE for Unicode White_Space characters except for "vertical space controls" + * where "vertical space controls" are the following characters: + * U+000A (LF) U+000B (VT) U+000C (FF) U+000D (CR) U+0085 (NEL) U+2028 (LS) U+2029 (PS) + * + * same as + * + * TRUE for U+0009 (TAB) and characters with general category "Zs" (space separators). + * + * Note: There are several ICU whitespace functions; please see the uchar.h + * file documentation for a detailed comparison. + * + * This is a C/POSIX migration function. + * See the comments about C/POSIX character classification functions in the + * documentation at the top of this header file. + * + * @param c the code point to be tested + * @return TRUE if the code point is a "blank" + * + * @stable ICU 2.6 + */ +U_STABLE UBool U_EXPORT2 +u_isblank(UChar32 c); + +/** + * Determines whether the specified code point is "defined", + * which usually means that it is assigned a character. + * True for general categories other than "Cn" (other, not assigned), + * i.e., true for all code points mentioned in UnicodeData.txt. + * + * Note that non-character code points (e.g., U+FDD0) are not "defined" + * (they are Cn), but surrogate code points are "defined" (Cs). + * + * Same as java.lang.Character.isDefined(). + * + * @param c the code point to be tested + * @return TRUE if the code point is assigned a character + * + * @see u_isdigit + * @see u_isalpha + * @see u_isalnum + * @see u_isupper + * @see u_islower + * @see u_istitle + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +u_isdefined(UChar32 c); + +/** + * Determines if the specified character is a space character or not. + * + * Note: There are several ICU whitespace functions; please see the uchar.h + * file documentation for a detailed comparison. + * + * This is a C/POSIX migration function. + * See the comments about C/POSIX character classification functions in the + * documentation at the top of this header file. + * + * @param c the character to be tested + * @return true if the character is a space character; false otherwise. + * + * @see u_isJavaSpaceChar + * @see u_isWhitespace + * @see u_isUWhiteSpace + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +u_isspace(UChar32 c); + +/** + * Determine if the specified code point is a space character according to Java. + * True for characters with general categories "Z" (separators), + * which does not include control codes (e.g., TAB or Line Feed). + * + * Same as java.lang.Character.isSpaceChar(). + * + * Note: There are several ICU whitespace functions; please see the uchar.h + * file documentation for a detailed comparison. + * + * @param c the code point to be tested + * @return TRUE if the code point is a space character according to Character.isSpaceChar() + * + * @see u_isspace + * @see u_isWhitespace + * @see u_isUWhiteSpace + * @stable ICU 2.6 + */ +U_STABLE UBool U_EXPORT2 +u_isJavaSpaceChar(UChar32 c); + +/** + * Determines if the specified code point is a whitespace character according to Java/ICU. + * A character is considered to be a Java whitespace character if and only + * if it satisfies one of the following criteria: + * + * - It is a Unicode Separator character (categories "Z" = "Zs" or "Zl" or "Zp"), but is not + * also a non-breaking space (U+00A0 NBSP or U+2007 Figure Space or U+202F Narrow NBSP). + * - It is U+0009 HORIZONTAL TABULATION. + * - It is U+000A LINE FEED. + * - It is U+000B VERTICAL TABULATION. + * - It is U+000C FORM FEED. + * - It is U+000D CARRIAGE RETURN. + * - It is U+001C FILE SEPARATOR. + * - It is U+001D GROUP SEPARATOR. + * - It is U+001E RECORD SEPARATOR. + * - It is U+001F UNIT SEPARATOR. + * + * This API tries to sync with the semantics of Java's + * java.lang.Character.isWhitespace(), but it may not return + * the exact same results because of the Unicode version + * difference. + * + * Note: Unicode 4.0.1 changed U+200B ZERO WIDTH SPACE from a Space Separator (Zs) + * to a Format Control (Cf). Since then, isWhitespace(0x200b) returns false. + * See http://www.unicode.org/versions/Unicode4.0.1/ + * + * Note: There are several ICU whitespace functions; please see the uchar.h + * file documentation for a detailed comparison. + * + * @param c the code point to be tested + * @return TRUE if the code point is a whitespace character according to Java/ICU + * + * @see u_isspace + * @see u_isJavaSpaceChar + * @see u_isUWhiteSpace + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +u_isWhitespace(UChar32 c); + +/** + * Determines whether the specified code point is a control character + * (as defined by this function). + * A control character is one of the following: + * - ISO 8-bit control character (U+0000..U+001f and U+007f..U+009f) + * - U_CONTROL_CHAR (Cc) + * - U_FORMAT_CHAR (Cf) + * - U_LINE_SEPARATOR (Zl) + * - U_PARAGRAPH_SEPARATOR (Zp) + * + * This is a C/POSIX migration function. + * See the comments about C/POSIX character classification functions in the + * documentation at the top of this header file. + * + * @param c the code point to be tested + * @return TRUE if the code point is a control character + * + * @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT + * @see u_isprint + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +u_iscntrl(UChar32 c); + +/** + * Determines whether the specified code point is an ISO control code. + * True for U+0000..U+001f and U+007f..U+009f (general category "Cc"). + * + * Same as java.lang.Character.isISOControl(). + * + * @param c the code point to be tested + * @return TRUE if the code point is an ISO control code + * + * @see u_iscntrl + * @stable ICU 2.6 + */ +U_STABLE UBool U_EXPORT2 +u_isISOControl(UChar32 c); + +/** + * Determines whether the specified code point is a printable character. + * True for general categories <em>other</em> than "C" (controls). + * + * This is a C/POSIX migration function. + * See the comments about C/POSIX character classification functions in the + * documentation at the top of this header file. + * + * @param c the code point to be tested + * @return TRUE if the code point is a printable character + * + * @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT + * @see u_iscntrl + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +u_isprint(UChar32 c); + +/** + * Determines whether the specified code point is a base character. + * True for general categories "L" (letters), "N" (numbers), + * "Mc" (spacing combining marks), and "Me" (enclosing marks). + * + * Note that this is different from the Unicode definition in + * chapter 3.5, conformance clause D13, + * which defines base characters to be all characters (not Cn) + * that do not graphically combine with preceding characters (M) + * and that are neither control (Cc) or format (Cf) characters. + * + * @param c the code point to be tested + * @return TRUE if the code point is a base character according to this function + * + * @see u_isalpha + * @see u_isdigit + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +u_isbase(UChar32 c); + +/** + * Returns the bidirectional category value for the code point, + * which is used in the Unicode bidirectional algorithm + * (UAX #9 http://www.unicode.org/reports/tr9/). + * Note that some <em>unassigned</em> code points have bidi values + * of R or AL because they are in blocks that are reserved + * for Right-To-Left scripts. + * + * Same as java.lang.Character.getDirectionality() + * + * @param c the code point to be tested + * @return the bidirectional category (UCharDirection) value + * + * @see UCharDirection + * @stable ICU 2.0 + */ +U_STABLE UCharDirection U_EXPORT2 +u_charDirection(UChar32 c); + +/** + * Determines whether the code point has the Bidi_Mirrored property. + * This property is set for characters that are commonly used in + * Right-To-Left contexts and need to be displayed with a "mirrored" + * glyph. + * + * Same as java.lang.Character.isMirrored(). + * Same as UCHAR_BIDI_MIRRORED + * + * @param c the code point to be tested + * @return TRUE if the character has the Bidi_Mirrored property + * + * @see UCHAR_BIDI_MIRRORED + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +u_isMirrored(UChar32 c); + +/** + * Maps the specified character to a "mirror-image" character. + * For characters with the Bidi_Mirrored property, implementations + * sometimes need a "poor man's" mapping to another Unicode + * character (code point) such that the default glyph may serve + * as the mirror-image of the default glyph of the specified + * character. This is useful for text conversion to and from + * codepages with visual order, and for displays without glyph + * selection capabilities. + * + * @param c the code point to be mapped + * @return another Unicode code point that may serve as a mirror-image + * substitute, or c itself if there is no such mapping or c + * does not have the Bidi_Mirrored property + * + * @see UCHAR_BIDI_MIRRORED + * @see u_isMirrored + * @stable ICU 2.0 + */ +U_STABLE UChar32 U_EXPORT2 +u_charMirror(UChar32 c); + +/** + * Maps the specified character to its paired bracket character. + * For Bidi_Paired_Bracket_Type!=None, this is the same as u_charMirror(). + * Otherwise c itself is returned. + * See http://www.unicode.org/reports/tr9/ + * + * @param c the code point to be mapped + * @return the paired bracket code point, + * or c itself if there is no such mapping + * (Bidi_Paired_Bracket_Type=None) + * + * @see UCHAR_BIDI_PAIRED_BRACKET + * @see UCHAR_BIDI_PAIRED_BRACKET_TYPE + * @see u_charMirror + * @stable ICU 52 + */ +U_STABLE UChar32 U_EXPORT2 +u_getBidiPairedBracket(UChar32 c); + +/** + * Returns the general category value for the code point. + * + * Same as java.lang.Character.getType(). + * + * @param c the code point to be tested + * @return the general category (UCharCategory) value + * + * @see UCharCategory + * @stable ICU 2.0 + */ +U_STABLE int8_t U_EXPORT2 +u_charType(UChar32 c); + +/** + * Get a single-bit bit set for the general category of a character. + * This bit set can be compared bitwise with U_GC_SM_MASK, U_GC_L_MASK, etc. + * Same as U_MASK(u_charType(c)). + * + * @param c the code point to be tested + * @return a single-bit mask corresponding to the general category (UCharCategory) value + * + * @see u_charType + * @see UCharCategory + * @see U_GC_CN_MASK + * @stable ICU 2.1 + */ +#define U_GET_GC_MASK(c) U_MASK(u_charType(c)) + +/** + * Callback from u_enumCharTypes(), is called for each contiguous range + * of code points c (where start<=c<limit) + * with the same Unicode general category ("character type"). + * + * The callback function can stop the enumeration by returning FALSE. + * + * @param context an opaque pointer, as passed into utrie_enum() + * @param start the first code point in a contiguous range with value + * @param limit one past the last code point in a contiguous range with value + * @param type the general category for all code points in [start..limit[ + * @return FALSE to stop the enumeration + * + * @stable ICU 2.1 + * @see UCharCategory + * @see u_enumCharTypes + */ +typedef UBool U_CALLCONV +UCharEnumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type); + +/** + * Enumerate efficiently all code points with their Unicode general categories. + * + * This is useful for building data structures (e.g., UnicodeSet's), + * for enumerating all assigned code points (type!=U_UNASSIGNED), etc. + * + * For each contiguous range of code points with a given general category ("character type"), + * the UCharEnumTypeRange function is called. + * Adjacent ranges have different types. + * The Unicode Standard guarantees that the numeric value of the type is 0..31. + * + * @param enumRange a pointer to a function that is called for each contiguous range + * of code points with the same general category + * @param context an opaque pointer that is passed on to the callback function + * + * @stable ICU 2.1 + * @see UCharCategory + * @see UCharEnumTypeRange + */ +U_STABLE void U_EXPORT2 +u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context); + +#if !UCONFIG_NO_NORMALIZATION + +/** + * Returns the combining class of the code point as specified in UnicodeData.txt. + * + * @param c the code point of the character + * @return the combining class of the character + * @stable ICU 2.0 + */ +U_STABLE uint8_t U_EXPORT2 +u_getCombiningClass(UChar32 c); + +#endif + +/** + * Returns the decimal digit value of a decimal digit character. + * Such characters have the general category "Nd" (decimal digit numbers) + * and a Numeric_Type of Decimal. + * + * Unlike ICU releases before 2.6, no digit values are returned for any + * Han characters because Han number characters are often used with a special + * Chinese-style number format (with characters for powers of 10 in between) + * instead of in decimal-positional notation. + * Unicode 4 explicitly assigns Han number characters the Numeric_Type + * Numeric instead of Decimal. + * See Jitterbug 1483 for more details. + * + * Use u_getIntPropertyValue(c, UCHAR_NUMERIC_TYPE) and u_getNumericValue() + * for complete numeric Unicode properties. + * + * @param c the code point for which to get the decimal digit value + * @return the decimal digit value of c, + * or -1 if c is not a decimal digit character + * + * @see u_getNumericValue + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_charDigitValue(UChar32 c); + +/** + * Returns the Unicode allocation block that contains the character. + * + * @param c the code point to be tested + * @return the block value (UBlockCode) for c + * + * @see UBlockCode + * @stable ICU 2.0 + */ +U_STABLE UBlockCode U_EXPORT2 +ublock_getCode(UChar32 c); + +/** + * Retrieve the name of a Unicode character. + * Depending on <code>nameChoice</code>, the character name written + * into the buffer is the "modern" name or the name that was defined + * in Unicode version 1.0. + * The name contains only "invariant" characters + * like A-Z, 0-9, space, and '-'. + * Unicode 1.0 names are only retrieved if they are different from the modern + * names and if the data file contains the data for them. gennames may or may + * not be called with a command line option to include 1.0 names in unames.dat. + * + * @param code The character (code point) for which to get the name. + * It must be <code>0<=code<=0x10ffff</code>. + * @param nameChoice Selector for which name to get. + * @param buffer Destination address for copying the name. + * The name will always be zero-terminated. + * If there is no name, then the buffer will be set to the empty string. + * @param bufferLength <code>==sizeof(buffer)</code> + * @param pErrorCode Pointer to a UErrorCode variable; + * check for <code>U_SUCCESS()</code> after <code>u_charName()</code> + * returns. + * @return The length of the name, or 0 if there is no name for this character. + * If the bufferLength is less than or equal to the length, then the buffer + * contains the truncated name and the returned length indicates the full + * length of the name. + * The length does not include the zero-termination. + * + * @see UCharNameChoice + * @see u_charFromName + * @see u_enumCharNames + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_charName(UChar32 code, UCharNameChoice nameChoice, + char *buffer, int32_t bufferLength, + UErrorCode *pErrorCode); + +#ifndef U_HIDE_DEPRECATED_API +/** + * Returns an empty string. + * Used to return the ISO 10646 comment for a character. + * The Unicode ISO_Comment property is deprecated and has no values. + * + * @param c The character (code point) for which to get the ISO comment. + * It must be <code>0<=c<=0x10ffff</code>. + * @param dest Destination address for copying the comment. + * The comment will be zero-terminated if possible. + * If there is no comment, then the buffer will be set to the empty string. + * @param destCapacity <code>==sizeof(dest)</code> + * @param pErrorCode Pointer to a UErrorCode variable; + * check for <code>U_SUCCESS()</code> after <code>u_getISOComment()</code> + * returns. + * @return 0 + * + * @deprecated ICU 49 + */ +U_DEPRECATED int32_t U_EXPORT2 +u_getISOComment(UChar32 c, + char *dest, int32_t destCapacity, + UErrorCode *pErrorCode); +#endif /* U_HIDE_DEPRECATED_API */ + +/** + * Find a Unicode character by its name and return its code point value. + * The name is matched exactly and completely. + * If the name does not correspond to a code point, <i>pErrorCode</i> + * is set to <code>U_INVALID_CHAR_FOUND</code>. + * A Unicode 1.0 name is matched only if it differs from the modern name. + * Unicode names are all uppercase. Extended names are lowercase followed + * by an uppercase hexadecimal number, and within angle brackets. + * + * @param nameChoice Selector for which name to match. + * @param name The name to match. + * @param pErrorCode Pointer to a UErrorCode variable + * @return The Unicode value of the code point with the given name, + * or an undefined value if there is no such code point. + * + * @see UCharNameChoice + * @see u_charName + * @see u_enumCharNames + * @stable ICU 1.7 + */ +U_STABLE UChar32 U_EXPORT2 +u_charFromName(UCharNameChoice nameChoice, + const char *name, + UErrorCode *pErrorCode); + +/** + * Type of a callback function for u_enumCharNames() that gets called + * for each Unicode character with the code point value and + * the character name. + * If such a function returns FALSE, then the enumeration is stopped. + * + * @param context The context pointer that was passed to u_enumCharNames(). + * @param code The Unicode code point for the character with this name. + * @param nameChoice Selector for which kind of names is enumerated. + * @param name The character's name, zero-terminated. + * @param length The length of the name. + * @return TRUE if the enumeration should continue, FALSE to stop it. + * + * @see UCharNameChoice + * @see u_enumCharNames + * @stable ICU 1.7 + */ +typedef UBool U_CALLCONV UEnumCharNamesFn(void *context, + UChar32 code, + UCharNameChoice nameChoice, + const char *name, + int32_t length); + +/** + * Enumerate all assigned Unicode characters between the start and limit + * code points (start inclusive, limit exclusive) and call a function + * for each, passing the code point value and the character name. + * For Unicode 1.0 names, only those are enumerated that differ from the + * modern names. + * + * @param start The first code point in the enumeration range. + * @param limit One more than the last code point in the enumeration range + * (the first one after the range). + * @param fn The function that is to be called for each character name. + * @param context An arbitrary pointer that is passed to the function. + * @param nameChoice Selector for which kind of names to enumerate. + * @param pErrorCode Pointer to a UErrorCode variable + * + * @see UCharNameChoice + * @see UEnumCharNamesFn + * @see u_charName + * @see u_charFromName + * @stable ICU 1.7 + */ +U_STABLE void U_EXPORT2 +u_enumCharNames(UChar32 start, UChar32 limit, + UEnumCharNamesFn *fn, + void *context, + UCharNameChoice nameChoice, + UErrorCode *pErrorCode); + +/** + * Return the Unicode name for a given property, as given in the + * Unicode database file PropertyAliases.txt. + * + * In addition, this function maps the property + * UCHAR_GENERAL_CATEGORY_MASK to the synthetic names "gcm" / + * "General_Category_Mask". These names are not in + * PropertyAliases.txt. + * + * @param property UProperty selector other than UCHAR_INVALID_CODE. + * If out of range, NULL is returned. + * + * @param nameChoice selector for which name to get. If out of range, + * NULL is returned. All properties have a long name. Most + * have a short name, but some do not. Unicode allows for + * additional names; if present these will be returned by + * U_LONG_PROPERTY_NAME + i, where i=1, 2,... + * + * @return a pointer to the name, or NULL if either the + * property or the nameChoice is out of range. If a given + * nameChoice returns NULL, then all larger values of + * nameChoice will return NULL, with one exception: if NULL is + * returned for U_SHORT_PROPERTY_NAME, then + * U_LONG_PROPERTY_NAME (and higher) may still return a + * non-NULL value. The returned pointer is valid until + * u_cleanup() is called. + * + * @see UProperty + * @see UPropertyNameChoice + * @stable ICU 2.4 + */ +U_STABLE const char* U_EXPORT2 +u_getPropertyName(UProperty property, + UPropertyNameChoice nameChoice); + +/** + * Return the UProperty enum for a given property name, as specified + * in the Unicode database file PropertyAliases.txt. Short, long, and + * any other variants are recognized. + * + * In addition, this function maps the synthetic names "gcm" / + * "General_Category_Mask" to the property + * UCHAR_GENERAL_CATEGORY_MASK. These names are not in + * PropertyAliases.txt. + * + * @param alias the property name to be matched. The name is compared + * using "loose matching" as described in PropertyAliases.txt. + * + * @return a UProperty enum, or UCHAR_INVALID_CODE if the given name + * does not match any property. + * + * @see UProperty + * @stable ICU 2.4 + */ +U_STABLE UProperty U_EXPORT2 +u_getPropertyEnum(const char* alias); + +/** + * Return the Unicode name for a given property value, as given in the + * Unicode database file PropertyValueAliases.txt. + * + * Note: Some of the names in PropertyValueAliases.txt can only be + * retrieved using UCHAR_GENERAL_CATEGORY_MASK, not + * UCHAR_GENERAL_CATEGORY. These include: "C" / "Other", "L" / + * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P" + * / "Punctuation", "S" / "Symbol", and "Z" / "Separator". + * + * @param property UProperty selector constant. + * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT + * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT + * or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT. + * If out of range, NULL is returned. + * + * @param value selector for a value for the given property. If out + * of range, NULL is returned. In general, valid values range + * from 0 up to some maximum. There are a few exceptions: + * (1.) UCHAR_BLOCK values begin at the non-zero value + * UBLOCK_BASIC_LATIN. (2.) UCHAR_CANONICAL_COMBINING_CLASS + * values are not contiguous and range from 0..240. (3.) + * UCHAR_GENERAL_CATEGORY_MASK values are not values of + * UCharCategory, but rather mask values produced by + * U_GET_GC_MASK(). This allows grouped categories such as + * [:L:] to be represented. Mask values range + * non-contiguously from 1..U_GC_P_MASK. + * + * @param nameChoice selector for which name to get. If out of range, + * NULL is returned. All values have a long name. Most have + * a short name, but some do not. Unicode allows for + * additional names; if present these will be returned by + * U_LONG_PROPERTY_NAME + i, where i=1, 2,... + + * @return a pointer to the name, or NULL if either the + * property or the nameChoice is out of range. If a given + * nameChoice returns NULL, then all larger values of + * nameChoice will return NULL, with one exception: if NULL is + * returned for U_SHORT_PROPERTY_NAME, then + * U_LONG_PROPERTY_NAME (and higher) may still return a + * non-NULL value. The returned pointer is valid until + * u_cleanup() is called. + * + * @see UProperty + * @see UPropertyNameChoice + * @stable ICU 2.4 + */ +U_STABLE const char* U_EXPORT2 +u_getPropertyValueName(UProperty property, + int32_t value, + UPropertyNameChoice nameChoice); + +/** + * Return the property value integer for a given value name, as + * specified in the Unicode database file PropertyValueAliases.txt. + * Short, long, and any other variants are recognized. + * + * Note: Some of the names in PropertyValueAliases.txt will only be + * recognized with UCHAR_GENERAL_CATEGORY_MASK, not + * UCHAR_GENERAL_CATEGORY. These include: "C" / "Other", "L" / + * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P" + * / "Punctuation", "S" / "Symbol", and "Z" / "Separator". + * + * @param property UProperty selector constant. + * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT + * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT + * or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT. + * If out of range, UCHAR_INVALID_CODE is returned. + * + * @param alias the value name to be matched. The name is compared + * using "loose matching" as described in + * PropertyValueAliases.txt. + * + * @return a value integer or UCHAR_INVALID_CODE if the given name + * does not match any value of the given property, or if the + * property is invalid. Note: UCHAR_GENERAL_CATEGORY_MASK values + * are not values of UCharCategory, but rather mask values + * produced by U_GET_GC_MASK(). This allows grouped + * categories such as [:L:] to be represented. + * + * @see UProperty + * @stable ICU 2.4 + */ +U_STABLE int32_t U_EXPORT2 +u_getPropertyValueEnum(UProperty property, + const char* alias); + +/** + * Determines if the specified character is permissible as the + * first character in an identifier according to Unicode + * (The Unicode Standard, Version 3.0, chapter 5.16 Identifiers). + * True for characters with general categories "L" (letters) and "Nl" (letter numbers). + * + * Same as java.lang.Character.isUnicodeIdentifierStart(). + * Same as UCHAR_ID_START + * + * @param c the code point to be tested + * @return TRUE if the code point may start an identifier + * + * @see UCHAR_ID_START + * @see u_isalpha + * @see u_isIDPart + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +u_isIDStart(UChar32 c); + +/** + * Determines if the specified character is permissible + * in an identifier according to Java. + * True for characters with general categories "L" (letters), + * "Nl" (letter numbers), "Nd" (decimal digits), + * "Mc" and "Mn" (combining marks), "Pc" (connecting punctuation), and + * u_isIDIgnorable(c). + * + * Same as java.lang.Character.isUnicodeIdentifierPart(). + * Almost the same as Unicode's ID_Continue (UCHAR_ID_CONTINUE) + * except that Unicode recommends to ignore Cf which is less than + * u_isIDIgnorable(c). + * + * @param c the code point to be tested + * @return TRUE if the code point may occur in an identifier according to Java + * + * @see UCHAR_ID_CONTINUE + * @see u_isIDStart + * @see u_isIDIgnorable + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +u_isIDPart(UChar32 c); + +/** + * Determines if the specified character should be regarded + * as an ignorable character in an identifier, + * according to Java. + * True for characters with general category "Cf" (format controls) as well as + * non-whitespace ISO controls + * (U+0000..U+0008, U+000E..U+001B, U+007F..U+009F). + * + * Same as java.lang.Character.isIdentifierIgnorable(). + * + * Note that Unicode just recommends to ignore Cf (format controls). + * + * @param c the code point to be tested + * @return TRUE if the code point is ignorable in identifiers according to Java + * + * @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT + * @see u_isIDStart + * @see u_isIDPart + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +u_isIDIgnorable(UChar32 c); + +/** + * Determines if the specified character is permissible as the + * first character in a Java identifier. + * In addition to u_isIDStart(c), true for characters with + * general categories "Sc" (currency symbols) and "Pc" (connecting punctuation). + * + * Same as java.lang.Character.isJavaIdentifierStart(). + * + * @param c the code point to be tested + * @return TRUE if the code point may start a Java identifier + * + * @see u_isJavaIDPart + * @see u_isalpha + * @see u_isIDStart + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +u_isJavaIDStart(UChar32 c); + +/** + * Determines if the specified character is permissible + * in a Java identifier. + * In addition to u_isIDPart(c), true for characters with + * general category "Sc" (currency symbols). + * + * Same as java.lang.Character.isJavaIdentifierPart(). + * + * @param c the code point to be tested + * @return TRUE if the code point may occur in a Java identifier + * + * @see u_isIDIgnorable + * @see u_isJavaIDStart + * @see u_isalpha + * @see u_isdigit + * @see u_isIDPart + * @stable ICU 2.0 + */ +U_STABLE UBool U_EXPORT2 +u_isJavaIDPart(UChar32 c); + +/** + * The given character is mapped to its lowercase equivalent according to + * UnicodeData.txt; if the character has no lowercase equivalent, the character + * itself is returned. + * + * Same as java.lang.Character.toLowerCase(). + * + * This function only returns the simple, single-code point case mapping. + * Full case mappings should be used whenever possible because they produce + * better results by working on whole strings. + * They take into account the string context and the language and can map + * to a result string with a different length as appropriate. + * Full case mappings are applied by the string case mapping functions, + * see ustring.h and the UnicodeString class. + * See also the User Guide chapter on C/POSIX migration: + * http://icu-project.org/userguide/posix.html#case_mappings + * + * @param c the code point to be mapped + * @return the Simple_Lowercase_Mapping of the code point, if any; + * otherwise the code point itself. + * @stable ICU 2.0 + */ +U_STABLE UChar32 U_EXPORT2 +u_tolower(UChar32 c); + +/** + * The given character is mapped to its uppercase equivalent according to UnicodeData.txt; + * if the character has no uppercase equivalent, the character itself is + * returned. + * + * Same as java.lang.Character.toUpperCase(). + * + * This function only returns the simple, single-code point case mapping. + * Full case mappings should be used whenever possible because they produce + * better results by working on whole strings. + * They take into account the string context and the language and can map + * to a result string with a different length as appropriate. + * Full case mappings are applied by the string case mapping functions, + * see ustring.h and the UnicodeString class. + * See also the User Guide chapter on C/POSIX migration: + * http://icu-project.org/userguide/posix.html#case_mappings + * + * @param c the code point to be mapped + * @return the Simple_Uppercase_Mapping of the code point, if any; + * otherwise the code point itself. + * @stable ICU 2.0 + */ +U_STABLE UChar32 U_EXPORT2 +u_toupper(UChar32 c); + +/** + * The given character is mapped to its titlecase equivalent + * according to UnicodeData.txt; + * if none is defined, the character itself is returned. + * + * Same as java.lang.Character.toTitleCase(). + * + * This function only returns the simple, single-code point case mapping. + * Full case mappings should be used whenever possible because they produce + * better results by working on whole strings. + * They take into account the string context and the language and can map + * to a result string with a different length as appropriate. + * Full case mappings are applied by the string case mapping functions, + * see ustring.h and the UnicodeString class. + * See also the User Guide chapter on C/POSIX migration: + * http://icu-project.org/userguide/posix.html#case_mappings + * + * @param c the code point to be mapped + * @return the Simple_Titlecase_Mapping of the code point, if any; + * otherwise the code point itself. + * @stable ICU 2.0 + */ +U_STABLE UChar32 U_EXPORT2 +u_totitle(UChar32 c); + +/** + * The given character is mapped to its case folding equivalent according to + * UnicodeData.txt and CaseFolding.txt; + * if the character has no case folding equivalent, the character + * itself is returned. + * + * This function only returns the simple, single-code point case mapping. + * Full case mappings should be used whenever possible because they produce + * better results by working on whole strings. + * They take into account the string context and the language and can map + * to a result string with a different length as appropriate. + * Full case mappings are applied by the string case mapping functions, + * see ustring.h and the UnicodeString class. + * See also the User Guide chapter on C/POSIX migration: + * http://icu-project.org/userguide/posix.html#case_mappings + * + * @param c the code point to be mapped + * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I + * @return the Simple_Case_Folding of the code point, if any; + * otherwise the code point itself. + * @stable ICU 2.0 + */ +U_STABLE UChar32 U_EXPORT2 +u_foldCase(UChar32 c, uint32_t options); + +/** + * Returns the decimal digit value of the code point in the + * specified radix. + * + * If the radix is not in the range <code>2<=radix<=36</code> or if the + * value of <code>c</code> is not a valid digit in the specified + * radix, <code>-1</code> is returned. A character is a valid digit + * if at least one of the following is true: + * <ul> + * <li>The character has a decimal digit value. + * Such characters have the general category "Nd" (decimal digit numbers) + * and a Numeric_Type of Decimal. + * In this case the value is the character's decimal digit value.</li> + * <li>The character is one of the uppercase Latin letters + * <code>'A'</code> through <code>'Z'</code>. + * In this case the value is <code>c-'A'+10</code>.</li> + * <li>The character is one of the lowercase Latin letters + * <code>'a'</code> through <code>'z'</code>. + * In this case the value is <code>ch-'a'+10</code>.</li> + * <li>Latin letters from both the ASCII range (0061..007A, 0041..005A) + * as well as from the Fullwidth ASCII range (FF41..FF5A, FF21..FF3A) + * are recognized.</li> + * </ul> + * + * Same as java.lang.Character.digit(). + * + * @param ch the code point to be tested. + * @param radix the radix. + * @return the numeric value represented by the character in the + * specified radix, + * or -1 if there is no value or if the value exceeds the radix. + * + * @see UCHAR_NUMERIC_TYPE + * @see u_forDigit + * @see u_charDigitValue + * @see u_isdigit + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_digit(UChar32 ch, int8_t radix); + +/** + * Determines the character representation for a specific digit in + * the specified radix. If the value of <code>radix</code> is not a + * valid radix, or the value of <code>digit</code> is not a valid + * digit in the specified radix, the null character + * (<code>U+0000</code>) is returned. + * <p> + * The <code>radix</code> argument is valid if it is greater than or + * equal to 2 and less than or equal to 36. + * The <code>digit</code> argument is valid if + * <code>0 <= digit < radix</code>. + * <p> + * If the digit is less than 10, then + * <code>'0' + digit</code> is returned. Otherwise, the value + * <code>'a' + digit - 10</code> is returned. + * + * Same as java.lang.Character.forDigit(). + * + * @param digit the number to convert to a character. + * @param radix the radix. + * @return the <code>char</code> representation of the specified digit + * in the specified radix. + * + * @see u_digit + * @see u_charDigitValue + * @see u_isdigit + * @stable ICU 2.0 + */ +U_STABLE UChar32 U_EXPORT2 +u_forDigit(int32_t digit, int8_t radix); + +/** + * Get the "age" of the code point. + * The "age" is the Unicode version when the code point was first + * designated (as a non-character or for Private Use) + * or assigned a character. + * This can be useful to avoid emitting code points to receiving + * processes that do not accept newer characters. + * The data is from the UCD file DerivedAge.txt. + * + * @param c The code point. + * @param versionArray The Unicode version number array, to be filled in. + * + * @stable ICU 2.1 + */ +U_STABLE void U_EXPORT2 +u_charAge(UChar32 c, UVersionInfo versionArray); + +/** + * Gets the Unicode version information. + * The version array is filled in with the version information + * for the Unicode standard that is currently used by ICU. + * For example, Unicode version 3.1.1 is represented as an array with + * the values { 3, 1, 1, 0 }. + * + * @param versionArray an output array that will be filled in with + * the Unicode version number + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +u_getUnicodeVersion(UVersionInfo versionArray); + +#if !UCONFIG_NO_NORMALIZATION +/** + * Get the FC_NFKC_Closure property string for a character. + * See Unicode Standard Annex #15 for details, search for "FC_NFKC_Closure" + * or for "FNC": http://www.unicode.org/reports/tr15/ + * + * @param c The character (code point) for which to get the FC_NFKC_Closure string. + * It must be <code>0<=c<=0x10ffff</code>. + * @param dest Destination address for copying the string. + * The string will be zero-terminated if possible. + * If there is no FC_NFKC_Closure string, + * then the buffer will be set to the empty string. + * @param destCapacity <code>==sizeof(dest)</code> + * @param pErrorCode Pointer to a UErrorCode variable. + * @return The length of the string, or 0 if there is no FC_NFKC_Closure string for this character. + * If the destCapacity is less than or equal to the length, then the buffer + * contains the truncated name and the returned length indicates the full + * length of the name. + * The length does not include the zero-termination. + * + * @stable ICU 2.2 + */ +U_STABLE int32_t U_EXPORT2 +u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode); + +#endif + + +U_CDECL_END + +#endif /*_UCHAR*/ +/*eof*/ diff --git a/vendor/icu/include/unicode/uclean.h b/vendor/icu/include/unicode/uclean.h new file mode 100644 index 0000000000..f685b6581a --- /dev/null +++ b/vendor/icu/include/unicode/uclean.h @@ -0,0 +1,262 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* Copyright (C) 2001-2014, International Business Machines +* Corporation and others. All Rights Reserved. +****************************************************************************** +* file name: uclean.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2001July05 +* created by: George Rhoten +*/ + +#ifndef __UCLEAN_H__ +#define __UCLEAN_H__ + +#include <unicode/utypes.h> +/** + * \file + * \brief C API: Initialize and clean up ICU + */ + +/** + * Initialize ICU. + * + * Use of this function is optional. It is OK to simply use ICU + * services and functions without first having initialized + * ICU by calling u_init(). + * + * u_init() will attempt to load some part of ICU's data, and is + * useful as a test for configuration or installation problems that + * leave the ICU data inaccessible. A successful invocation of u_init() + * does not, however, guarantee that all ICU data is accessible. + * + * Multiple calls to u_init() cause no harm, aside from the small amount + * of time required. + * + * In old versions of ICU, u_init() was required in multi-threaded applications + * to ensure the thread safety of ICU. u_init() is no longer needed for this purpose. + * + * @param status An ICU UErrorCode parameter. It must not be <code>NULL</code>. + * An Error will be returned if some required part of ICU data can not + * be loaded or initialized. + * The function returns immediately if the input error code indicates a + * failure, as usual. + * + * @stable ICU 2.6 + */ +U_STABLE void U_EXPORT2 +u_init(UErrorCode *status); + +#ifndef U_HIDE_SYSTEM_API +/** + * Clean up the system resources, such as allocated memory or open files, + * used in all ICU libraries. This will free/delete all memory owned by the + * ICU libraries, and return them to their original load state. All open ICU + * items (collators, resource bundles, converters, etc.) must be closed before + * calling this function, otherwise ICU may not free its allocated memory + * (e.g. close your converters and resource bundles before calling this + * function). Generally, this function should be called once just before + * an application exits. For applications that dynamically load and unload + * the ICU libraries (relatively uncommon), u_cleanup() should be called + * just before the library unload. + * <p> + * u_cleanup() also clears any ICU heap functions, mutex functions or + * trace functions that may have been set for the process. + * This has the effect of restoring ICU to its initial condition, before + * any of these override functions were installed. Refer to + * u_setMemoryFunctions(), u_setMutexFunctions and + * utrace_setFunctions(). If ICU is to be reinitialized after + * calling u_cleanup(), these runtime override functions will need to + * be set up again if they are still required. + * <p> + * u_cleanup() is not thread safe. All other threads should stop using ICU + * before calling this function. + * <p> + * Any open ICU items will be left in an undefined state by u_cleanup(), + * and any subsequent attempt to use such an item will give unpredictable + * results. + * <p> + * After calling u_cleanup(), an application may continue to use ICU by + * calling u_init(). An application must invoke u_init() first from one single + * thread before allowing other threads call u_init(). All threads existing + * at the time of the first thread's call to u_init() must also call + * u_init() themselves before continuing with other ICU operations. + * <p> + * The use of u_cleanup() just before an application terminates is optional, + * but it should be called only once for performance reasons. The primary + * benefit is to eliminate reports of memory or resource leaks originating + * in ICU code from the results generated by heap analysis tools. + * <p> + * <strong>Use this function with great care!</strong> + * </p> + * + * @stable ICU 2.0 + * @system + */ +U_STABLE void U_EXPORT2 +u_cleanup(void); + +U_CDECL_BEGIN +/** + * Pointer type for a user supplied memory allocation function. + * @param context user supplied value, obtained from u_setMemoryFunctions(). + * @param size The number of bytes to be allocated + * @return Pointer to the newly allocated memory, or NULL if the allocation failed. + * @stable ICU 2.8 + * @system + */ +typedef void *U_CALLCONV UMemAllocFn(const void *context, size_t size); +/** + * Pointer type for a user supplied memory re-allocation function. + * @param context user supplied value, obtained from u_setMemoryFunctions(). + * @param size The number of bytes to be allocated + * @return Pointer to the newly allocated memory, or NULL if the allocation failed. + * @stable ICU 2.8 + * @system + */ +typedef void *U_CALLCONV UMemReallocFn(const void *context, void *mem, size_t size); +/** + * Pointer type for a user supplied memory free function. Behavior should be + * similar the standard C library free(). + * @param context user supplied value, obtained from u_setMemoryFunctions(). + * @param mem Pointer to the memory block to be resized + * @param size The new size for the block + * @return Pointer to the resized memory block, or NULL if the resizing failed. + * @stable ICU 2.8 + * @system + */ +typedef void U_CALLCONV UMemFreeFn (const void *context, void *mem); + +/** + * Set the functions that ICU will use for memory allocation. + * Use of this function is optional; by default (without this function), ICU will + * use the standard C library malloc() and free() functions. + * This function can only be used when ICU is in an initial, unused state, before + * u_init() has been called. + * @param context This pointer value will be saved, and then (later) passed as + * a parameter to the memory functions each time they + * are called. + * @param a Pointer to a user-supplied malloc function. + * @param r Pointer to a user-supplied realloc function. + * @param f Pointer to a user-supplied free function. + * @param status Receives error values. + * @stable ICU 2.8 + * @system + */ +U_STABLE void U_EXPORT2 +u_setMemoryFunctions(const void *context, UMemAllocFn * U_CALLCONV_FPTR a, UMemReallocFn * U_CALLCONV_FPTR r, UMemFreeFn * U_CALLCONV_FPTR f, + UErrorCode *status); + +U_CDECL_END + +#ifndef U_HIDE_DEPRECATED_API +/********************************************************************************* + * + * Deprecated Functions + * + * The following functions for user supplied mutexes are no longer supported. + * Any attempt to use them will return a U_UNSUPPORTED_ERROR. + * + **********************************************************************************/ + +/** + * An opaque pointer type that represents an ICU mutex. + * For user-implemented mutexes, the value will typically point to a + * struct or object that implements the mutex. + * @deprecated ICU 52. This type is no longer supported. + * @system + */ +typedef void *UMTX; + +U_CDECL_BEGIN +/** + * Function Pointer type for a user supplied mutex initialization function. + * The user-supplied function will be called by ICU whenever ICU needs to create a + * new mutex. The function implementation should create a mutex, and store a pointer + * to something that uniquely identifies the mutex into the UMTX that is supplied + * as a parameter. + * @param context user supplied value, obtained from u_setMutexFunctions(). + * @param mutex Receives a pointer that identifies the new mutex. + * The mutex init function must set the UMTX to a non-null value. + * Subsequent calls by ICU to lock, unlock, or destroy a mutex will + * identify the mutex by the UMTX value. + * @param status Error status. Report errors back to ICU by setting this variable + * with an error code. + * @deprecated ICU 52. This function is no longer supported. + * @system + */ +typedef void U_CALLCONV UMtxInitFn (const void *context, UMTX *mutex, UErrorCode* status); + + +/** + * Function Pointer type for a user supplied mutex functions. + * One of the user-supplied functions with this signature will be called by ICU + * whenever ICU needs to lock, unlock, or destroy a mutex. + * @param context user supplied value, obtained from u_setMutexFunctions(). + * @param mutex specify the mutex on which to operate. + * @deprecated ICU 52. This function is no longer supported. + * @system + */ +typedef void U_CALLCONV UMtxFn (const void *context, UMTX *mutex); +U_CDECL_END + +/** + * Set the functions that ICU will use for mutex operations + * Use of this function is optional; by default (without this function), ICU will + * directly access system functions for mutex operations + * This function can only be used when ICU is in an initial, unused state, before + * u_init() has been called. + * @param context This pointer value will be saved, and then (later) passed as + * a parameter to the user-supplied mutex functions each time they + * are called. + * @param init Pointer to a mutex initialization function. Must be non-null. + * @param destroy Pointer to the mutex destroy function. Must be non-null. + * @param lock pointer to the mutex lock function. Must be non-null. + * @param unlock Pointer to the mutex unlock function. Must be non-null. + * @param status Receives error values. + * @deprecated ICU 52. This function is no longer supported. + * @system + */ +U_DEPRECATED void U_EXPORT2 +u_setMutexFunctions(const void *context, UMtxInitFn *init, UMtxFn *destroy, UMtxFn *lock, UMtxFn *unlock, + UErrorCode *status); + + +/** + * Pointer type for a user supplied atomic increment or decrement function. + * @param context user supplied value, obtained from u_setAtomicIncDecFunctions(). + * @param p Pointer to a 32 bit int to be incremented or decremented + * @return The value of the variable after the inc or dec operation. + * @deprecated ICU 52. This function is no longer supported. + * @system + */ +typedef int32_t U_CALLCONV UMtxAtomicFn(const void *context, int32_t *p); + +/** + * Set the functions that ICU will use for atomic increment and decrement of int32_t values. + * Use of this function is optional; by default (without this function), ICU will + * use its own internal implementation of atomic increment/decrement. + * This function can only be used when ICU is in an initial, unused state, before + * u_init() has been called. + * @param context This pointer value will be saved, and then (later) passed as + * a parameter to the increment and decrement functions each time they + * are called. This function can only be called + * @param inc Pointer to a function to do an atomic increment operation. Must be non-null. + * @param dec Pointer to a function to do an atomic decrement operation. Must be non-null. + * @param status Receives error values. + * @deprecated ICU 52. This function is no longer supported. + * @system + */ +U_DEPRECATED void U_EXPORT2 +u_setAtomicIncDecFunctions(const void *context, UMtxAtomicFn *inc, UMtxAtomicFn *dec, + UErrorCode *status); + +#endif /* U_HIDE_DEPRECATED_API */ +#endif /* U_HIDE_SYSTEM_API */ + +#endif diff --git a/vendor/icu/include/unicode/uconfig.h b/vendor/icu/include/unicode/uconfig.h new file mode 100644 index 0000000000..7ddf4e6adf --- /dev/null +++ b/vendor/icu/include/unicode/uconfig.h @@ -0,0 +1,455 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 2002-2016, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* file name: uconfig.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2002sep19 +* created by: Markus W. Scherer +*/ + +#ifndef __UCONFIG_H__ +#define __UCONFIG_H__ + + +/*! + * \file + * \brief User-configurable settings + * + * Miscellaneous switches: + * + * A number of macros affect a variety of minor aspects of ICU. + * Most of them used to be defined elsewhere (e.g., in utypes.h or platform.h) + * and moved here to make them easier to find. + * + * Switches for excluding parts of ICU library code modules: + * + * Changing these macros allows building partial, smaller libraries for special purposes. + * By default, all modules are built. + * The switches are fairly coarse, controlling large modules. + * Basic services cannot be turned off. + * + * Building with any of these options does not guarantee that the + * ICU build process will completely work. It is recommended that + * the ICU libraries and data be built using the normal build. + * At that time you should remove the data used by those services. + * After building the ICU data library, you should rebuild the ICU + * libraries with these switches customized to your needs. + * + * @stable ICU 2.4 + */ + +/** + * If this switch is defined, ICU will attempt to load a header file named "uconfig_local.h" + * prior to determining default settings for uconfig variables. + * + * @internal ICU 4.0 + */ +#if defined(UCONFIG_USE_LOCAL) +#include "uconfig_local.h" +#endif + +/** + * \def U_DEBUG + * Determines whether to include debugging code. + * Automatically set on Windows, but most compilers do not have + * related predefined macros. + * @internal + */ +#ifdef U_DEBUG + /* Use the predefined value. */ +#elif defined(_DEBUG) + /* + * _DEBUG is defined by Visual Studio debug compilation. + * Do *not* test for its NDEBUG macro: It is an orthogonal macro + * which disables assert(). + */ +# define U_DEBUG 1 +# else +# define U_DEBUG 0 +#endif + +/** + * Determines whether to enable auto cleanup of libraries. + * @internal + */ +#ifndef UCLN_NO_AUTO_CLEANUP +#define UCLN_NO_AUTO_CLEANUP 1 +#endif + +/** + * \def U_DISABLE_RENAMING + * Determines whether to disable renaming or not. + * @internal + */ +#ifndef U_DISABLE_RENAMING +#define U_DISABLE_RENAMING 0 +#endif + +/** + * \def U_NO_DEFAULT_INCLUDE_UTF_HEADERS + * Determines whether utypes.h includes utf.h, utf8.h, utf16.h and utf_old.h. + * utypes.h includes those headers if this macro is defined to 0. + * Otherwise, each those headers must be included explicitly when using one of their macros. + * Defaults to 0 for backward compatibility, except inside ICU. + * @stable ICU 49 + */ +#ifdef U_NO_DEFAULT_INCLUDE_UTF_HEADERS + /* Use the predefined value. */ +#elif defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || \ + defined(U_IO_IMPLEMENTATION) || defined(U_LAYOUT_IMPLEMENTATION) || defined(U_LAYOUTEX_IMPLEMENTATION) || \ + defined(U_TOOLUTIL_IMPLEMENTATION) +# define U_NO_DEFAULT_INCLUDE_UTF_HEADERS 1 +#else +# define U_NO_DEFAULT_INCLUDE_UTF_HEADERS 0 +#endif + +/** + * \def U_OVERRIDE_CXX_ALLOCATION + * Determines whether to override new and delete. + * ICU is normally built such that all of its C++ classes, via their UMemory base, + * override operators new and delete to use its internal, customizable, + * non-exception-throwing memory allocation functions. (Default value 1 for this macro.) + * + * This is especially important when the application and its libraries use multiple heaps. + * For example, on Windows, this allows the ICU DLL to be used by + * applications that statically link the C Runtime library. + * + * @stable ICU 2.2 + */ +#ifndef U_OVERRIDE_CXX_ALLOCATION +#define U_OVERRIDE_CXX_ALLOCATION 1 +#endif + +/** + * \def U_ENABLE_TRACING + * Determines whether to enable tracing. + * @internal + */ +#ifndef U_ENABLE_TRACING +#define U_ENABLE_TRACING 0 +#endif + +/** + * \def UCONFIG_ENABLE_PLUGINS + * Determines whether to enable ICU plugins. + * @internal + */ +#ifndef UCONFIG_ENABLE_PLUGINS +#define UCONFIG_ENABLE_PLUGINS 0 +#endif + +/** + * \def U_ENABLE_DYLOAD + * Whether to enable Dynamic loading in ICU. + * @internal + */ +#ifndef U_ENABLE_DYLOAD +#define U_ENABLE_DYLOAD 1 +#endif + +/** + * \def U_CHECK_DYLOAD + * Whether to test Dynamic loading as an OS capability. + * @internal + */ +#ifndef U_CHECK_DYLOAD +#define U_CHECK_DYLOAD 1 +#endif + +/** + * \def U_DEFAULT_SHOW_DRAFT + * Do we allow ICU users to use the draft APIs by default? + * @internal + */ +#ifndef U_DEFAULT_SHOW_DRAFT +#define U_DEFAULT_SHOW_DRAFT 1 +#endif + +/*===========================================================================*/ +/* Custom icu entry point renaming */ +/*===========================================================================*/ + +/** + * \def U_HAVE_LIB_SUFFIX + * 1 if a custom library suffix is set. + * @internal + */ +#ifdef U_HAVE_LIB_SUFFIX + /* Use the predefined value. */ +#elif defined(U_LIB_SUFFIX_C_NAME) +# define U_HAVE_LIB_SUFFIX 1 +#endif + +/** + * \def U_LIB_SUFFIX_C_NAME_STRING + * Defines the library suffix as a string with C syntax. + * @internal + */ +#ifdef U_LIB_SUFFIX_C_NAME_STRING + /* Use the predefined value. */ +#elif defined(U_LIB_SUFFIX_C_NAME) +# define CONVERT_TO_STRING(s) #s +# define U_LIB_SUFFIX_C_NAME_STRING CONVERT_TO_STRING(U_LIB_SUFFIX_C_NAME) +#else +# define U_LIB_SUFFIX_C_NAME_STRING "" +#endif + +/* common/i18n library switches --------------------------------------------- */ + +/** + * \def UCONFIG_ONLY_COLLATION + * This switch turns off modules that are not needed for collation. + * + * It does not turn off legacy conversion because that is necessary + * for ICU to work on EBCDIC platforms (for the default converter). + * If you want "only collation" and do not build for EBCDIC, + * then you can define UCONFIG_NO_CONVERSION or UCONFIG_NO_LEGACY_CONVERSION to 1 as well. + * + * @stable ICU 2.4 + */ +#ifndef UCONFIG_ONLY_COLLATION +# define UCONFIG_ONLY_COLLATION 0 +#endif + +#if UCONFIG_ONLY_COLLATION + /* common library */ +# define UCONFIG_NO_BREAK_ITERATION 1 +# define UCONFIG_NO_IDNA 1 + + /* i18n library */ +# if UCONFIG_NO_COLLATION +# error Contradictory collation switches in uconfig.h. +# endif +# define UCONFIG_NO_FORMATTING 1 +# define UCONFIG_NO_TRANSLITERATION 1 +# define UCONFIG_NO_REGULAR_EXPRESSIONS 1 +#endif + +/* common library switches -------------------------------------------------- */ + +/** + * \def UCONFIG_NO_FILE_IO + * This switch turns off all file access in the common library + * where file access is only used for data loading. + * ICU data must then be provided in the form of a data DLL (or with an + * equivalent way to link to the data residing in an executable, + * as in building a combined library with both the common library's code and + * the data), or via udata_setCommonData(). + * Application data must be provided via udata_setAppData() or by using + * "open" functions that take pointers to data, for example ucol_openBinary(). + * + * File access is not used at all in the i18n library. + * + * File access cannot be turned off for the icuio library or for the ICU + * test suites and ICU tools. + * + * @stable ICU 3.6 + */ +#ifndef UCONFIG_NO_FILE_IO +# define UCONFIG_NO_FILE_IO 0 +#endif + +#if UCONFIG_NO_FILE_IO && defined(U_TIMEZONE_FILES_DIR) +# error Contradictory file io switches in uconfig.h. +#endif + +/** + * \def UCONFIG_NO_CONVERSION + * ICU will not completely build (compiling the tools fails) with this + * switch turned on. + * This switch turns off all converters. + * + * You may want to use this together with U_CHARSET_IS_UTF8 defined to 1 + * in utypes.h if char* strings in your environment are always in UTF-8. + * + * @stable ICU 3.2 + * @see U_CHARSET_IS_UTF8 + */ +#ifndef UCONFIG_NO_CONVERSION +# define UCONFIG_NO_CONVERSION 0 +#endif + +#if UCONFIG_NO_CONVERSION +# define UCONFIG_NO_LEGACY_CONVERSION 1 +#endif + +/** + * \def UCONFIG_ONLY_HTML_CONVERSION + * This switch turns off all of the converters NOT listed in + * the HTML encoding standard: + * http://www.w3.org/TR/encoding/#names-and-labels + * + * This is not possible on EBCDIC platforms + * because they need ibm-37 or ibm-1047 default converters. + * + * @stable ICU 55 + */ +#ifndef UCONFIG_ONLY_HTML_CONVERSION +# define UCONFIG_ONLY_HTML_CONVERSION 0 +#endif + +/** + * \def UCONFIG_NO_LEGACY_CONVERSION + * This switch turns off all converters except for + * - Unicode charsets (UTF-7/8/16/32, CESU-8, SCSU, BOCU-1) + * - US-ASCII + * - ISO-8859-1 + * + * Turning off legacy conversion is not possible on EBCDIC platforms + * because they need ibm-37 or ibm-1047 default converters. + * + * @stable ICU 2.4 + */ +#ifndef UCONFIG_NO_LEGACY_CONVERSION +# define UCONFIG_NO_LEGACY_CONVERSION 0 +#endif + +/** + * \def UCONFIG_NO_NORMALIZATION + * This switch turns off normalization. + * It implies turning off several other services as well, for example + * collation and IDNA. + * + * @stable ICU 2.6 + */ +#ifndef UCONFIG_NO_NORMALIZATION +# define UCONFIG_NO_NORMALIZATION 0 +#endif + +#if UCONFIG_NO_NORMALIZATION + /* common library */ + /* ICU 50 CJK dictionary BreakIterator uses normalization */ +# define UCONFIG_NO_BREAK_ITERATION 1 + /* IDNA (UTS #46) is implemented via normalization */ +# define UCONFIG_NO_IDNA 1 + + /* i18n library */ +# if UCONFIG_ONLY_COLLATION +# error Contradictory collation switches in uconfig.h. +# endif +# define UCONFIG_NO_COLLATION 1 +# define UCONFIG_NO_TRANSLITERATION 1 +#endif + +/** + * \def UCONFIG_NO_BREAK_ITERATION + * This switch turns off break iteration. + * + * @stable ICU 2.4 + */ +#ifndef UCONFIG_NO_BREAK_ITERATION +# define UCONFIG_NO_BREAK_ITERATION 0 +#endif + +/** + * \def UCONFIG_NO_IDNA + * This switch turns off IDNA. + * + * @stable ICU 2.6 + */ +#ifndef UCONFIG_NO_IDNA +# define UCONFIG_NO_IDNA 0 +#endif + +/** + * \def UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE + * Determines the default UMessagePatternApostropheMode. + * See the documentation for that enum. + * + * @stable ICU 4.8 + */ +#ifndef UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE +# define UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE UMSGPAT_APOS_DOUBLE_OPTIONAL +#endif + +/* i18n library switches ---------------------------------------------------- */ + +/** + * \def UCONFIG_NO_COLLATION + * This switch turns off collation and collation-based string search. + * + * @stable ICU 2.4 + */ +#ifndef UCONFIG_NO_COLLATION +# define UCONFIG_NO_COLLATION 0 +#endif + +/** + * \def UCONFIG_NO_FORMATTING + * This switch turns off formatting and calendar/timezone services. + * + * @stable ICU 2.4 + */ +#ifndef UCONFIG_NO_FORMATTING +# define UCONFIG_NO_FORMATTING 0 +#endif + +/** + * \def UCONFIG_NO_TRANSLITERATION + * This switch turns off transliteration. + * + * @stable ICU 2.4 + */ +#ifndef UCONFIG_NO_TRANSLITERATION +# define UCONFIG_NO_TRANSLITERATION 0 +#endif + +/** + * \def UCONFIG_NO_REGULAR_EXPRESSIONS + * This switch turns off regular expressions. + * + * @stable ICU 2.4 + */ +#ifndef UCONFIG_NO_REGULAR_EXPRESSIONS +# define UCONFIG_NO_REGULAR_EXPRESSIONS 0 +#endif + +/** + * \def UCONFIG_NO_SERVICE + * This switch turns off service registration. + * + * @stable ICU 3.2 + */ +#ifndef UCONFIG_NO_SERVICE +# define UCONFIG_NO_SERVICE 0 +#endif + +/** + * \def UCONFIG_HAVE_PARSEALLINPUT + * This switch turns on the "parse all input" attribute. Binary incompatible. + * + * @internal + */ +#ifndef UCONFIG_HAVE_PARSEALLINPUT +# define UCONFIG_HAVE_PARSEALLINPUT 1 +#endif + + +/** + * \def UCONFIG_FORMAT_FASTPATHS_49 + * This switch turns on other formatting fastpaths. Binary incompatible in object DecimalFormat and DecimalFormatSymbols + * + * @internal + */ +#ifndef UCONFIG_FORMAT_FASTPATHS_49 +# define UCONFIG_FORMAT_FASTPATHS_49 1 +#endif + +/** + * \def UCONFIG_NO_FILTERED_BREAK_ITERATION + * This switch turns off filtered break iteration code. + * + * @internal + */ +#ifndef UCONFIG_NO_FILTERED_BREAK_ITERATION +# define UCONFIG_NO_FILTERED_BREAK_ITERATION 0 +#endif + +#endif diff --git a/vendor/icu/include/unicode/udata.h b/vendor/icu/include/unicode/udata.h new file mode 100644 index 0000000000..0106be0c93 --- /dev/null +++ b/vendor/icu/include/unicode/udata.h @@ -0,0 +1,437 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1999-2014, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* file name: udata.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 1999oct25 +* created by: Markus W. Scherer +*/ + +#ifndef __UDATA_H__ +#define __UDATA_H__ + +#include <unicode/utypes.h> +#include <unicode/localpointer.h> + +U_CDECL_BEGIN + +/** + * \file + * \brief C API: Data loading interface + * + * <h2>Information about data loading interface</h2> + * + * This API is used to find and efficiently load data for ICU and applications + * using ICU. It provides an abstract interface that specifies a data type and + * name to find and load the data. Normally this API is used by other ICU APIs + * to load required data out of the ICU data library, but it can be used to + * load data out of other places. + * + * See the User Guide Data Management chapter. + */ + +#ifndef U_HIDE_INTERNAL_API +/** + * Character used to separate package names from tree names + * @internal ICU 3.0 + */ +#define U_TREE_SEPARATOR '-' + +/** + * String used to separate package names from tree names + * @internal ICU 3.0 + */ +#define U_TREE_SEPARATOR_STRING "-" + +/** + * Character used to separate parts of entry names + * @internal ICU 3.0 + */ +#define U_TREE_ENTRY_SEP_CHAR '/' + +/** + * String used to separate parts of entry names + * @internal ICU 3.0 + */ +#define U_TREE_ENTRY_SEP_STRING "/" + +/** + * Alias for standard ICU data + * @internal ICU 3.0 + */ +#define U_ICUDATA_ALIAS "ICUDATA" + +#endif /* U_HIDE_INTERNAL_API */ + +/** + * UDataInfo contains the properties about the requested data. + * This is meta data. + * + * <p>This structure may grow in the future, indicated by the + * <code>size</code> field.</p> + * + * <p>ICU data must be at least 8-aligned, and should be 16-aligned. + * The UDataInfo struct begins 4 bytes after the start of the data item, + * so it is 4-aligned. + * + * <p>The platform data property fields help determine if a data + * file can be efficiently used on a given machine. + * The particular fields are of importance only if the data + * is affected by the properties - if there is integer data + * with word sizes > 1 byte, char* text, or UChar* text.</p> + * + * <p>The implementation for the <code>udata_open[Choice]()</code> + * functions may reject data based on the value in <code>isBigEndian</code>. + * No other field is used by the <code>udata</code> API implementation.</p> + * + * <p>The <code>dataFormat</code> may be used to identify + * the kind of data, e.g. a converter table.</p> + * + * <p>The <code>formatVersion</code> field should be used to + * make sure that the format can be interpreted. + * It may be a good idea to check only for the one or two highest + * of the version elements to allow the data memory to + * get more or somewhat rearranged contents, for as long + * as the using code can still interpret the older contents.</p> + * + * <p>The <code>dataVersion</code> field is intended to be a + * common place to store the source version of the data; + * for data from the Unicode character database, this could + * reflect the Unicode version.</p> + * + * @stable ICU 2.0 + */ +typedef struct { + /** sizeof(UDataInfo) + * @stable ICU 2.0 */ + uint16_t size; + + /** unused, set to 0 + * @stable ICU 2.0*/ + uint16_t reservedWord; + + /* platform data properties */ + /** 0 for little-endian machine, 1 for big-endian + * @stable ICU 2.0 */ + uint8_t isBigEndian; + + /** see U_CHARSET_FAMILY values in utypes.h + * @stable ICU 2.0*/ + uint8_t charsetFamily; + + /** sizeof(UChar), one of { 1, 2, 4 } + * @stable ICU 2.0*/ + uint8_t sizeofUChar; + + /** unused, set to 0 + * @stable ICU 2.0*/ + uint8_t reservedByte; + + /** data format identifier + * @stable ICU 2.0*/ + uint8_t dataFormat[4]; + + /** versions: [0] major [1] minor [2] milli [3] micro + * @stable ICU 2.0*/ + uint8_t formatVersion[4]; + + /** versions: [0] major [1] minor [2] milli [3] micro + * @stable ICU 2.0*/ + uint8_t dataVersion[4]; +} UDataInfo; + +/* API for reading data -----------------------------------------------------*/ + +/** + * Forward declaration of the data memory type. + * @stable ICU 2.0 + */ +typedef struct UDataMemory UDataMemory; + +/** + * Callback function for udata_openChoice(). + * @param context parameter passed into <code>udata_openChoice()</code>. + * @param type The type of the data as passed into <code>udata_openChoice()</code>. + * It may be <code>NULL</code>. + * @param name The name of the data as passed into <code>udata_openChoice()</code>. + * @param pInfo A pointer to the <code>UDataInfo</code> structure + * of data that has been loaded and will be returned + * by <code>udata_openChoice()</code> if this function + * returns <code>TRUE</code>. + * @return TRUE if the current data memory is acceptable + * @stable ICU 2.0 + */ +typedef UBool U_CALLCONV +UDataMemoryIsAcceptable(void *context, + const char *type, const char *name, + const UDataInfo *pInfo); + + +/** + * Convenience function. + * This function works the same as <code>udata_openChoice</code> + * except that any data that matches the type and name + * is assumed to be acceptable. + * @param path Specifies an absolute path and/or a basename for the + * finding of the data in the file system. + * <code>NULL</code> for ICU data. + * @param type A string that specifies the type of data to be loaded. + * For example, resource bundles are loaded with type "res", + * conversion tables with type "cnv". + * This may be <code>NULL</code> or empty. + * @param name A string that specifies the name of the data. + * @param pErrorCode An ICU UErrorCode parameter. It must not be <code>NULL</code>. + * @return A pointer (handle) to a data memory object, or <code>NULL</code> + * if an error occurs. Call <code>udata_getMemory()</code> + * to get a pointer to the actual data. + * + * @see udata_openChoice + * @stable ICU 2.0 + */ +U_STABLE UDataMemory * U_EXPORT2 +udata_open(const char *path, const char *type, const char *name, + UErrorCode *pErrorCode); + +/** + * Data loading function. + * This function is used to find and load efficiently data for + * ICU and applications using ICU. + * It provides an abstract interface that allows to specify a data + * type and name to find and load the data. + * + * <p>The implementation depends on platform properties and user preferences + * and may involve loading shared libraries (DLLs), mapping + * files into memory, or fopen()/fread() files. + * It may also involve using static memory or database queries etc. + * Several or all data items may be combined into one entity + * (DLL, memory-mappable file).</p> + * + * <p>The data is always preceded by a header that includes + * a <code>UDataInfo</code> structure. + * The caller's <code>isAcceptable()</code> function is called to make + * sure that the data is useful. It may be called several times if it + * rejects the data and there is more than one location with data + * matching the type and name.</p> + * + * <p>If <code>path==NULL</code>, then ICU data is loaded. + * Otherwise, it is separated into a basename and a basename-less directory string. + * The basename is used as the data package name, and the directory is + * logically prepended to the ICU data directory string.</p> + * + * <p>For details about ICU data loading see the User Guide + * Data Management chapter. (http://icu-project.org/userguide/icudata.html)</p> + * + * @param path Specifies an absolute path and/or a basename for the + * finding of the data in the file system. + * <code>NULL</code> for ICU data. + * @param type A string that specifies the type of data to be loaded. + * For example, resource bundles are loaded with type "res", + * conversion tables with type "cnv". + * This may be <code>NULL</code> or empty. + * @param name A string that specifies the name of the data. + * @param isAcceptable This function is called to verify that loaded data + * is useful for the client code. If it returns FALSE + * for all data items, then <code>udata_openChoice()</code> + * will return with an error. + * @param context Arbitrary parameter to be passed into isAcceptable. + * @param pErrorCode An ICU UErrorCode parameter. It must not be <code>NULL</code>. + * @return A pointer (handle) to a data memory object, or <code>NULL</code> + * if an error occurs. Call <code>udata_getMemory()</code> + * to get a pointer to the actual data. + * @stable ICU 2.0 + */ +U_STABLE UDataMemory * U_EXPORT2 +udata_openChoice(const char *path, const char *type, const char *name, + UDataMemoryIsAcceptable *isAcceptable, void *context, + UErrorCode *pErrorCode); + +/** + * Close the data memory. + * This function must be called to allow the system to + * release resources associated with this data memory. + * @param pData The pointer to data memory object + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +udata_close(UDataMemory *pData); + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalUDataMemoryPointer + * "Smart pointer" class, closes a UDataMemory via udata_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @stable ICU 4.4 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalUDataMemoryPointer, UDataMemory, udata_close); + +U_NAMESPACE_END + +#endif + +/** + * Get the pointer to the actual data inside the data memory. + * The data is read-only. + * + * ICU data must be at least 8-aligned, and should be 16-aligned. + * + * @param pData The pointer to data memory object + * @stable ICU 2.0 + */ +U_STABLE const void * U_EXPORT2 +udata_getMemory(UDataMemory *pData); + +/** + * Get the information from the data memory header. + * This allows to get access to the header containing + * platform data properties etc. which is not part of + * the data itself and can therefore not be accessed + * via the pointer that <code>udata_getMemory()</code> returns. + * + * @param pData pointer to the data memory object + * @param pInfo pointer to a UDataInfo object; + * its <code>size</code> field must be set correctly, + * typically to <code>sizeof(UDataInfo)</code>. + * + * <code>*pInfo</code> will be filled with the UDataInfo structure + * in the data memory object. If this structure is smaller than + * <code>pInfo->size</code>, then the <code>size</code> will be + * adjusted and only part of the structure will be filled. + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +udata_getInfo(UDataMemory *pData, UDataInfo *pInfo); + +/** + * This function bypasses the normal ICU data loading process and + * allows you to force ICU's system data to come out of a user-specified + * area in memory. + * + * ICU data must be at least 8-aligned, and should be 16-aligned. + * See http://userguide.icu-project.org/icudata + * + * The format of this data is that of the icu common data file, as is + * generated by the pkgdata tool with mode=common or mode=dll. + * You can read in a whole common mode file and pass the address to the start of the + * data, or (with the appropriate link options) pass in the pointer to + * the data that has been loaded from a dll by the operating system, + * as shown in this code: + * + * extern const char U_IMPORT U_ICUDATA_ENTRY_POINT []; + * // U_ICUDATA_ENTRY_POINT is same as entry point specified to pkgdata tool + * UErrorCode status = U_ZERO_ERROR; + * + * udata_setCommonData(&U_ICUDATA_ENTRY_POINT, &status); + * + * It is important that the declaration be as above. The entry point + * must not be declared as an extern void*. + * + * Starting with ICU 4.4, it is possible to set several data packages, + * one per call to this function. + * udata_open() will look for data in the multiple data packages in the order + * in which they were set. + * The position of the linked-in or default-name ICU .data package in the + * search list depends on when the first data item is loaded that is not contained + * in the already explicitly set packages. + * If data was loaded implicitly before the first call to this function + * (for example, via opening a converter, constructing a UnicodeString + * from default-codepage data, using formatting or collation APIs, etc.), + * then the default data will be first in the list. + * + * This function has no effect on application (non ICU) data. See udata_setAppData() + * for similar functionality for application data. + * + * @param data pointer to ICU common data + * @param err outgoing error status <code>U_USING_DEFAULT_WARNING, U_UNSUPPORTED_ERROR</code> + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +udata_setCommonData(const void *data, UErrorCode *err); + + +/** + * This function bypasses the normal ICU data loading process for application-specific + * data and allows you to force the it to come out of a user-specified + * pointer. + * + * ICU data must be at least 8-aligned, and should be 16-aligned. + * See http://userguide.icu-project.org/icudata + * + * The format of this data is that of the icu common data file, like 'icudt26l.dat' + * or the corresponding shared library (DLL) file. + * The application must read in or otherwise construct an image of the data and then + * pass the address of it to this function. + * + * + * Warning: setAppData will set a U_USING_DEFAULT_WARNING code if + * data with the specifed path that has already been opened, or + * if setAppData with the same path has already been called. + * Any such calls to setAppData will have no effect. + * + * + * @param packageName the package name by which the application will refer + * to (open) this data + * @param data pointer to the data + * @param err outgoing error status <code>U_USING_DEFAULT_WARNING, U_UNSUPPORTED_ERROR</code> + * @see udata_setCommonData + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +udata_setAppData(const char *packageName, const void *data, UErrorCode *err); + +/** + * Possible settings for udata_setFileAccess() + * @see udata_setFileAccess + * @stable ICU 3.4 + */ +typedef enum UDataFileAccess { + /** ICU looks for data in single files first, then in packages. (default) @stable ICU 3.4 */ + UDATA_FILES_FIRST, + /** An alias for the default access mode. @stable ICU 3.4 */ + UDATA_DEFAULT_ACCESS = UDATA_FILES_FIRST, + /** ICU only loads data from packages, not from single files. @stable ICU 3.4 */ + UDATA_ONLY_PACKAGES, + /** ICU loads data from packages first, and only from single files + if the data cannot be found in a package. @stable ICU 3.4 */ + UDATA_PACKAGES_FIRST, + /** ICU does not access the file system for data loading. @stable ICU 3.4 */ + UDATA_NO_FILES, +#ifndef U_HIDE_DEPRECATED_API + /** + * Number of real UDataFileAccess values. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + UDATA_FILE_ACCESS_COUNT +#endif // U_HIDE_DEPRECATED_API +} UDataFileAccess; + +/** + * This function may be called to control how ICU loads data. It must be called + * before any ICU data is loaded, including application data loaded with + * ures/ResourceBundle or udata APIs. This function is not multithread safe. + * The results of calling it while other threads are loading data are undefined. + * @param access The type of file access to be used + * @param status Error code. + * @see UDataFileAccess + * @stable ICU 3.4 + */ +U_STABLE void U_EXPORT2 +udata_setFileAccess(UDataFileAccess access, UErrorCode *status); + +U_CDECL_END + +#endif diff --git a/vendor/icu/include/unicode/uiter.h b/vendor/icu/include/unicode/uiter.h new file mode 100644 index 0000000000..c15fc50d23 --- /dev/null +++ b/vendor/icu/include/unicode/uiter.h @@ -0,0 +1,709 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2002-2011 International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: uiter.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2002jan18 +* created by: Markus W. Scherer +*/ + +#ifndef __UITER_H__ +#define __UITER_H__ + +/** + * \file + * \brief C API: Unicode Character Iteration + * + * @see UCharIterator + */ + +#include <unicode/utypes.h> + +#if U_SHOW_CPLUSPLUS_API + U_NAMESPACE_BEGIN + + class CharacterIterator; + class Replaceable; + + U_NAMESPACE_END +#endif + +U_CDECL_BEGIN + +struct UCharIterator; +typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharIterator. @stable ICU 2.1 */ + +/** + * Origin constants for UCharIterator.getIndex() and UCharIterator.move(). + * @see UCharIteratorMove + * @see UCharIterator + * @stable ICU 2.1 + */ +typedef enum UCharIteratorOrigin { + UITER_START, UITER_CURRENT, UITER_LIMIT, UITER_ZERO, UITER_LENGTH +} UCharIteratorOrigin; + +/** Constants for UCharIterator. @stable ICU 2.6 */ +enum { + /** + * Constant value that may be returned by UCharIteratorMove + * indicating that the final UTF-16 index is not known, but that the move succeeded. + * This can occur when moving relative to limit or length, or + * when moving relative to the current index after a setState() + * when the current UTF-16 index is not known. + * + * It would be very inefficient to have to count from the beginning of the text + * just to get the current/limit/length index after moving relative to it. + * The actual index can be determined with getIndex(UITER_CURRENT) + * which will count the UChars if necessary. + * + * @stable ICU 2.6 + */ + UITER_UNKNOWN_INDEX=-2 +}; + + +/** + * Constant for UCharIterator getState() indicating an error or + * an unknown state. + * Returned by uiter_getState()/UCharIteratorGetState + * when an error occurs. + * Also, some UCharIterator implementations may not be able to return + * a valid state for each position. This will be clearly documented + * for each such iterator (none of the public ones here). + * + * @stable ICU 2.6 + */ +#define UITER_NO_STATE ((uint32_t)0xffffffff) + +/** + * Function type declaration for UCharIterator.getIndex(). + * + * Gets the current position, or the start or limit of the + * iteration range. + * + * This function may perform slowly for UITER_CURRENT after setState() was called, + * or for UITER_LENGTH, because an iterator implementation may have to count + * UChars if the underlying storage is not UTF-16. + * + * @param iter the UCharIterator structure ("this pointer") + * @param origin get the 0, start, limit, length, or current index + * @return the requested index, or U_SENTINEL in an error condition + * + * @see UCharIteratorOrigin + * @see UCharIterator + * @stable ICU 2.1 + */ +typedef int32_t U_CALLCONV +UCharIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin); + +/** + * Function type declaration for UCharIterator.move(). + * + * Use iter->move(iter, index, UITER_ZERO) like CharacterIterator::setIndex(index). + * + * Moves the current position relative to the start or limit of the + * iteration range, or relative to the current position itself. + * The movement is expressed in numbers of code units forward + * or backward by specifying a positive or negative delta. + * Out of bounds movement will be pinned to the start or limit. + * + * This function may perform slowly for moving relative to UITER_LENGTH + * because an iterator implementation may have to count the rest of the + * UChars if the native storage is not UTF-16. + * + * When moving relative to the limit or length, or + * relative to the current position after setState() was called, + * move() may return UITER_UNKNOWN_INDEX (-2) to avoid an inefficient + * determination of the actual UTF-16 index. + * The actual index can be determined with getIndex(UITER_CURRENT) + * which will count the UChars if necessary. + * See UITER_UNKNOWN_INDEX for details. + * + * @param iter the UCharIterator structure ("this pointer") + * @param delta can be positive, zero, or negative + * @param origin move relative to the 0, start, limit, length, or current index + * @return the new index, or U_SENTINEL on an error condition, + * or UITER_UNKNOWN_INDEX when the index is not known. + * + * @see UCharIteratorOrigin + * @see UCharIterator + * @see UITER_UNKNOWN_INDEX + * @stable ICU 2.1 + */ +typedef int32_t U_CALLCONV +UCharIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin); + +/** + * Function type declaration for UCharIterator.hasNext(). + * + * Check if current() and next() can still + * return another code unit. + * + * @param iter the UCharIterator structure ("this pointer") + * @return boolean value for whether current() and next() can still return another code unit + * + * @see UCharIterator + * @stable ICU 2.1 + */ +typedef UBool U_CALLCONV +UCharIteratorHasNext(UCharIterator *iter); + +/** + * Function type declaration for UCharIterator.hasPrevious(). + * + * Check if previous() can still return another code unit. + * + * @param iter the UCharIterator structure ("this pointer") + * @return boolean value for whether previous() can still return another code unit + * + * @see UCharIterator + * @stable ICU 2.1 + */ +typedef UBool U_CALLCONV +UCharIteratorHasPrevious(UCharIterator *iter); + +/** + * Function type declaration for UCharIterator.current(). + * + * Return the code unit at the current position, + * or U_SENTINEL if there is none (index is at the limit). + * + * @param iter the UCharIterator structure ("this pointer") + * @return the current code unit + * + * @see UCharIterator + * @stable ICU 2.1 + */ +typedef UChar32 U_CALLCONV +UCharIteratorCurrent(UCharIterator *iter); + +/** + * Function type declaration for UCharIterator.next(). + * + * Return the code unit at the current index and increment + * the index (post-increment, like s[i++]), + * or return U_SENTINEL if there is none (index is at the limit). + * + * @param iter the UCharIterator structure ("this pointer") + * @return the current code unit (and post-increment the current index) + * + * @see UCharIterator + * @stable ICU 2.1 + */ +typedef UChar32 U_CALLCONV +UCharIteratorNext(UCharIterator *iter); + +/** + * Function type declaration for UCharIterator.previous(). + * + * Decrement the index and return the code unit from there + * (pre-decrement, like s[--i]), + * or return U_SENTINEL if there is none (index is at the start). + * + * @param iter the UCharIterator structure ("this pointer") + * @return the previous code unit (after pre-decrementing the current index) + * + * @see UCharIterator + * @stable ICU 2.1 + */ +typedef UChar32 U_CALLCONV +UCharIteratorPrevious(UCharIterator *iter); + +/** + * Function type declaration for UCharIterator.reservedFn(). + * Reserved for future use. + * + * @param iter the UCharIterator structure ("this pointer") + * @param something some integer argument + * @return some integer + * + * @see UCharIterator + * @stable ICU 2.1 + */ +typedef int32_t U_CALLCONV +UCharIteratorReserved(UCharIterator *iter, int32_t something); + +/** + * Function type declaration for UCharIterator.getState(). + * + * Get the "state" of the iterator in the form of a single 32-bit word. + * It is recommended that the state value be calculated to be as small as + * is feasible. For strings with limited lengths, fewer than 32 bits may + * be sufficient. + * + * This is used together with setState()/UCharIteratorSetState + * to save and restore the iterator position more efficiently than with + * getIndex()/move(). + * + * The iterator state is defined as a uint32_t value because it is designed + * for use in ucol_nextSortKeyPart() which provides 32 bits to store the state + * of the character iterator. + * + * With some UCharIterator implementations (e.g., UTF-8), + * getting and setting the UTF-16 index with existing functions + * (getIndex(UITER_CURRENT) followed by move(pos, UITER_ZERO)) is possible but + * relatively slow because the iterator has to "walk" from a known index + * to the requested one. + * This takes more time the farther it needs to go. + * + * An opaque state value allows an iterator implementation to provide + * an internal index (UTF-8: the source byte array index) for + * fast, constant-time restoration. + * + * After calling setState(), a getIndex(UITER_CURRENT) may be slow because + * the UTF-16 index may not be restored as well, but the iterator can deliver + * the correct text contents and move relative to the current position + * without performance degradation. + * + * Some UCharIterator implementations may not be able to return + * a valid state for each position, in which case they return UITER_NO_STATE instead. + * This will be clearly documented for each such iterator (none of the public ones here). + * + * @param iter the UCharIterator structure ("this pointer") + * @return the state word + * + * @see UCharIterator + * @see UCharIteratorSetState + * @see UITER_NO_STATE + * @stable ICU 2.6 + */ +typedef uint32_t U_CALLCONV +UCharIteratorGetState(const UCharIterator *iter); + +/** + * Function type declaration for UCharIterator.setState(). + * + * Restore the "state" of the iterator using a state word from a getState() call. + * The iterator object need not be the same one as for which getState() was called, + * but it must be of the same type (set up using the same uiter_setXYZ function) + * and it must iterate over the same string + * (binary identical regardless of memory address). + * For more about the state word see UCharIteratorGetState. + * + * After calling setState(), a getIndex(UITER_CURRENT) may be slow because + * the UTF-16 index may not be restored as well, but the iterator can deliver + * the correct text contents and move relative to the current position + * without performance degradation. + * + * @param iter the UCharIterator structure ("this pointer") + * @param state the state word from a getState() call + * on a same-type, same-string iterator + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * + * @see UCharIterator + * @see UCharIteratorGetState + * @stable ICU 2.6 + */ +typedef void U_CALLCONV +UCharIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode); + + +/** + * C API for code unit iteration. + * This can be used as a C wrapper around + * CharacterIterator, Replaceable, or implemented using simple strings, etc. + * + * There are two roles for using UCharIterator: + * + * A "provider" sets the necessary function pointers and controls the "protected" + * fields of the UCharIterator structure. A "provider" passes a UCharIterator + * into C APIs that need a UCharIterator as an abstract, flexible string interface. + * + * Implementations of such C APIs are "callers" of UCharIterator functions; + * they only use the "public" function pointers and never access the "protected" + * fields directly. + * + * The current() and next() functions only check the current index against the + * limit, and previous() only checks the current index against the start, + * to see if the iterator already reached the end of the iteration range. + * + * The assumption - in all iterators - is that the index is moved via the API, + * which means it won't go out of bounds, or the index is modified by + * user code that knows enough about the iterator implementation to set valid + * index values. + * + * UCharIterator functions return code unit values 0..0xffff, + * or U_SENTINEL if the iteration bounds are reached. + * + * @stable ICU 2.1 + */ +struct UCharIterator { + /** + * (protected) Pointer to string or wrapped object or similar. + * Not used by caller. + * @stable ICU 2.1 + */ + const void *context; + + /** + * (protected) Length of string or similar. + * Not used by caller. + * @stable ICU 2.1 + */ + int32_t length; + + /** + * (protected) Start index or similar. + * Not used by caller. + * @stable ICU 2.1 + */ + int32_t start; + + /** + * (protected) Current index or similar. + * Not used by caller. + * @stable ICU 2.1 + */ + int32_t index; + + /** + * (protected) Limit index or similar. + * Not used by caller. + * @stable ICU 2.1 + */ + int32_t limit; + + /** + * (protected) Used by UTF-8 iterators and possibly others. + * @stable ICU 2.1 + */ + int32_t reservedField; + + /** + * (public) Returns the current position or the + * start or limit index of the iteration range. + * + * @see UCharIteratorGetIndex + * @stable ICU 2.1 + */ + UCharIteratorGetIndex *getIndex; + + /** + * (public) Moves the current position relative to the start or limit of the + * iteration range, or relative to the current position itself. + * The movement is expressed in numbers of code units forward + * or backward by specifying a positive or negative delta. + * + * @see UCharIteratorMove + * @stable ICU 2.1 + */ + UCharIteratorMove *move; + + /** + * (public) Check if current() and next() can still + * return another code unit. + * + * @see UCharIteratorHasNext + * @stable ICU 2.1 + */ + UCharIteratorHasNext *hasNext; + + /** + * (public) Check if previous() can still return another code unit. + * + * @see UCharIteratorHasPrevious + * @stable ICU 2.1 + */ + UCharIteratorHasPrevious *hasPrevious; + + /** + * (public) Return the code unit at the current position, + * or U_SENTINEL if there is none (index is at the limit). + * + * @see UCharIteratorCurrent + * @stable ICU 2.1 + */ + UCharIteratorCurrent *current; + + /** + * (public) Return the code unit at the current index and increment + * the index (post-increment, like s[i++]), + * or return U_SENTINEL if there is none (index is at the limit). + * + * @see UCharIteratorNext + * @stable ICU 2.1 + */ + UCharIteratorNext *next; + + /** + * (public) Decrement the index and return the code unit from there + * (pre-decrement, like s[--i]), + * or return U_SENTINEL if there is none (index is at the start). + * + * @see UCharIteratorPrevious + * @stable ICU 2.1 + */ + UCharIteratorPrevious *previous; + + /** + * (public) Reserved for future use. Currently NULL. + * + * @see UCharIteratorReserved + * @stable ICU 2.1 + */ + UCharIteratorReserved *reservedFn; + + /** + * (public) Return the state of the iterator, to be restored later with setState(). + * This function pointer is NULL if the iterator does not implement it. + * + * @see UCharIteratorGet + * @stable ICU 2.6 + */ + UCharIteratorGetState *getState; + + /** + * (public) Restore the iterator state from the state word from a call + * to getState(). + * This function pointer is NULL if the iterator does not implement it. + * + * @see UCharIteratorSet + * @stable ICU 2.6 + */ + UCharIteratorSetState *setState; +}; + +/** + * Helper function for UCharIterator to get the code point + * at the current index. + * + * Return the code point that includes the code unit at the current position, + * or U_SENTINEL if there is none (index is at the limit). + * If the current code unit is a lead or trail surrogate, + * then the following or preceding surrogate is used to form + * the code point value. + * + * @param iter the UCharIterator structure ("this pointer") + * @return the current code point + * + * @see UCharIterator + * @see U16_GET + * @see UnicodeString::char32At() + * @stable ICU 2.1 + */ +U_STABLE UChar32 U_EXPORT2 +uiter_current32(UCharIterator *iter); + +/** + * Helper function for UCharIterator to get the next code point. + * + * Return the code point at the current index and increment + * the index (post-increment, like s[i++]), + * or return U_SENTINEL if there is none (index is at the limit). + * + * @param iter the UCharIterator structure ("this pointer") + * @return the current code point (and post-increment the current index) + * + * @see UCharIterator + * @see U16_NEXT + * @stable ICU 2.1 + */ +U_STABLE UChar32 U_EXPORT2 +uiter_next32(UCharIterator *iter); + +/** + * Helper function for UCharIterator to get the previous code point. + * + * Decrement the index and return the code point from there + * (pre-decrement, like s[--i]), + * or return U_SENTINEL if there is none (index is at the start). + * + * @param iter the UCharIterator structure ("this pointer") + * @return the previous code point (after pre-decrementing the current index) + * + * @see UCharIterator + * @see U16_PREV + * @stable ICU 2.1 + */ +U_STABLE UChar32 U_EXPORT2 +uiter_previous32(UCharIterator *iter); + +/** + * Get the "state" of the iterator in the form of a single 32-bit word. + * This is a convenience function that calls iter->getState(iter) + * if iter->getState is not NULL; + * if it is NULL or any other error occurs, then UITER_NO_STATE is returned. + * + * Some UCharIterator implementations may not be able to return + * a valid state for each position, in which case they return UITER_NO_STATE instead. + * This will be clearly documented for each such iterator (none of the public ones here). + * + * @param iter the UCharIterator structure ("this pointer") + * @return the state word + * + * @see UCharIterator + * @see UCharIteratorGetState + * @see UITER_NO_STATE + * @stable ICU 2.6 + */ +U_STABLE uint32_t U_EXPORT2 +uiter_getState(const UCharIterator *iter); + +/** + * Restore the "state" of the iterator using a state word from a getState() call. + * This is a convenience function that calls iter->setState(iter, state, pErrorCode) + * if iter->setState is not NULL; if it is NULL, then U_UNSUPPORTED_ERROR is set. + * + * @param iter the UCharIterator structure ("this pointer") + * @param state the state word from a getState() call + * on a same-type, same-string iterator + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * + * @see UCharIterator + * @see UCharIteratorSetState + * @stable ICU 2.6 + */ +U_STABLE void U_EXPORT2 +uiter_setState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode); + +/** + * Set up a UCharIterator to iterate over a string. + * + * Sets the UCharIterator function pointers for iteration over the string s + * with iteration boundaries start=index=0 and length=limit=string length. + * The "provider" may set the start, index, and limit values at any time + * within the range 0..length. + * The length field will be ignored. + * + * The string pointer s is set into UCharIterator.context without copying + * or reallocating the string contents. + * + * getState() simply returns the current index. + * move() will always return the final index. + * + * @param iter UCharIterator structure to be set for iteration + * @param s String to iterate over + * @param length Length of s, or -1 if NUL-terminated + * + * @see UCharIterator + * @stable ICU 2.1 + */ +U_STABLE void U_EXPORT2 +uiter_setString(UCharIterator *iter, const UChar *s, int32_t length); + +/** + * Set up a UCharIterator to iterate over a UTF-16BE string + * (byte vector with a big-endian pair of bytes per UChar). + * + * Everything works just like with a normal UChar iterator (uiter_setString), + * except that UChars are assembled from byte pairs, + * and that the length argument here indicates an even number of bytes. + * + * getState() simply returns the current index. + * move() will always return the final index. + * + * @param iter UCharIterator structure to be set for iteration + * @param s UTF-16BE string to iterate over + * @param length Length of s as an even number of bytes, or -1 if NUL-terminated + * (NUL means pair of 0 bytes at even index from s) + * + * @see UCharIterator + * @see uiter_setString + * @stable ICU 2.6 + */ +U_STABLE void U_EXPORT2 +uiter_setUTF16BE(UCharIterator *iter, const char *s, int32_t length); + +/** + * Set up a UCharIterator to iterate over a UTF-8 string. + * + * Sets the UCharIterator function pointers for iteration over the UTF-8 string s + * with UTF-8 iteration boundaries 0 and length. + * The implementation counts the UTF-16 index on the fly and + * lazily evaluates the UTF-16 length of the text. + * + * The start field is used as the UTF-8 offset, the limit field as the UTF-8 length. + * When the reservedField is not 0, then it contains a supplementary code point + * and the UTF-16 index is between the two corresponding surrogates. + * At that point, the UTF-8 index is behind that code point. + * + * The UTF-8 string pointer s is set into UCharIterator.context without copying + * or reallocating the string contents. + * + * getState() returns a state value consisting of + * - the current UTF-8 source byte index (bits 31..1) + * - a flag (bit 0) that indicates whether the UChar position is in the middle + * of a surrogate pair + * (from a 4-byte UTF-8 sequence for the corresponding supplementary code point) + * + * getState() cannot also encode the UTF-16 index in the state value. + * move(relative to limit or length), or + * move(relative to current) after setState(), may return UITER_UNKNOWN_INDEX. + * + * @param iter UCharIterator structure to be set for iteration + * @param s UTF-8 string to iterate over + * @param length Length of s in bytes, or -1 if NUL-terminated + * + * @see UCharIterator + * @stable ICU 2.6 + */ +U_STABLE void U_EXPORT2 +uiter_setUTF8(UCharIterator *iter, const char *s, int32_t length); + +#if U_SHOW_CPLUSPLUS_API + +/** + * Set up a UCharIterator to wrap around a C++ CharacterIterator. + * + * Sets the UCharIterator function pointers for iteration using the + * CharacterIterator charIter. + * + * The CharacterIterator pointer charIter is set into UCharIterator.context + * without copying or cloning the CharacterIterator object. + * The other "protected" UCharIterator fields are set to 0 and will be ignored. + * The iteration index and boundaries are controlled by the CharacterIterator. + * + * getState() simply returns the current index. + * move() will always return the final index. + * + * @param iter UCharIterator structure to be set for iteration + * @param charIter CharacterIterator to wrap + * + * @see UCharIterator + * @stable ICU 2.1 + */ +U_STABLE void U_EXPORT2 +uiter_setCharacterIterator(UCharIterator *iter, icu::CharacterIterator *charIter); + +/** + * Set up a UCharIterator to iterate over a C++ Replaceable. + * + * Sets the UCharIterator function pointers for iteration over the + * Replaceable rep with iteration boundaries start=index=0 and + * length=limit=rep->length(). + * The "provider" may set the start, index, and limit values at any time + * within the range 0..length=rep->length(). + * The length field will be ignored. + * + * The Replaceable pointer rep is set into UCharIterator.context without copying + * or cloning/reallocating the Replaceable object. + * + * getState() simply returns the current index. + * move() will always return the final index. + * + * @param iter UCharIterator structure to be set for iteration + * @param rep Replaceable to iterate over + * + * @see UCharIterator + * @stable ICU 2.1 + */ +U_STABLE void U_EXPORT2 +uiter_setReplaceable(UCharIterator *iter, const icu::Replaceable *rep); + +#endif + +U_CDECL_END + +#endif diff --git a/vendor/icu/include/unicode/umachine.h b/vendor/icu/include/unicode/umachine.h new file mode 100644 index 0000000000..909c0a5d7d --- /dev/null +++ b/vendor/icu/include/unicode/umachine.h @@ -0,0 +1,424 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1999-2015, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* file name: umachine.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 1999sep13 +* created by: Markus W. Scherer +* +* This file defines basic types and constants for ICU to be +* platform-independent. umachine.h and utf.h are included into +* utypes.h to provide all the general definitions for ICU. +* All of these definitions used to be in utypes.h before +* the UTF-handling macros made this unmaintainable. +*/ + +#ifndef __UMACHINE_H__ +#define __UMACHINE_H__ + + +/** + * \file + * \brief Basic types and constants for UTF + * + * <h2> Basic types and constants for UTF </h2> + * This file defines basic types and constants for utf.h to be + * platform-independent. umachine.h and utf.h are included into + * utypes.h to provide all the general definitions for ICU. + * All of these definitions used to be in utypes.h before + * the UTF-handling macros made this unmaintainable. + * + */ +/*==========================================================================*/ +/* Include platform-dependent definitions */ +/* which are contained in the platform-specific file platform.h */ +/*==========================================================================*/ + +#include <unicode/ptypes.h> /* platform.h is included in ptypes.h */ + +/* + * ANSI C headers: + * stddef.h defines wchar_t + */ +#include <stddef.h> + +/*==========================================================================*/ +/* For C wrappers, we use the symbol U_STABLE. */ +/* This works properly if the includer is C or C++. */ +/* Functions are declared U_STABLE return-type U_EXPORT2 function-name()... */ +/*==========================================================================*/ + +/** + * \def U_CFUNC + * This is used in a declaration of a library private ICU C function. + * @stable ICU 2.4 + */ + +/** + * \def U_CDECL_BEGIN + * This is used to begin a declaration of a library private ICU C API. + * @stable ICU 2.4 + */ + +/** + * \def U_CDECL_END + * This is used to end a declaration of a library private ICU C API + * @stable ICU 2.4 + */ + +#ifdef __cplusplus +# define U_CFUNC extern "C" +# define U_CDECL_BEGIN extern "C" { +# define U_CDECL_END } +#else +# define U_CFUNC extern +# define U_CDECL_BEGIN +# define U_CDECL_END +#endif + +#ifndef U_ATTRIBUTE_DEPRECATED +/** + * \def U_ATTRIBUTE_DEPRECATED + * This is used for GCC specific attributes + * @internal + */ +#if U_GCC_MAJOR_MINOR >= 302 +# define U_ATTRIBUTE_DEPRECATED __attribute__ ((deprecated)) +/** + * \def U_ATTRIBUTE_DEPRECATED + * This is used for Visual C++ specific attributes + * @internal + */ +#elif defined(_MSC_VER) && (_MSC_VER >= 1400) +# define U_ATTRIBUTE_DEPRECATED __declspec(deprecated) +#else +# define U_ATTRIBUTE_DEPRECATED +#endif +#endif + +/** This is used to declare a function as a public ICU C API @stable ICU 2.0*/ +#define U_CAPI U_CFUNC U_EXPORT +/** This is used to declare a function as a stable public ICU C API*/ +#define U_STABLE U_CAPI +/** This is used to declare a function as a draft public ICU C API */ +#define U_DRAFT U_CAPI +/** This is used to declare a function as a deprecated public ICU C API */ +#define U_DEPRECATED U_CAPI U_ATTRIBUTE_DEPRECATED +/** This is used to declare a function as an obsolete public ICU C API */ +#define U_OBSOLETE U_CAPI +/** This is used to declare a function as an internal ICU C API */ +#define U_INTERNAL U_CAPI + +/** + * \def U_OVERRIDE + * Defined to the C++11 "override" keyword if available. + * Denotes a class or member which is an override of the base class. + * May result in an error if it applied to something not an override. + * @internal + */ + +/** + * \def U_FINAL + * Defined to the C++11 "final" keyword if available. + * Denotes a class or member which may not be overridden in subclasses. + * May result in an error if subclasses attempt to override. + * @internal + */ + +#if U_CPLUSPLUS_VERSION >= 11 +/* C++11 */ +#ifndef U_OVERRIDE +#define U_OVERRIDE override +#endif +#ifndef U_FINAL +#define U_FINAL final +#endif +#else +/* not C++11 - define to nothing */ +#ifndef U_OVERRIDE +#define U_OVERRIDE +#endif +#ifndef U_FINAL +#define U_FINAL +#endif +#endif + +/*==========================================================================*/ +/* limits for int32_t etc., like in POSIX inttypes.h */ +/*==========================================================================*/ + +#ifndef INT8_MIN +/** The smallest value an 8 bit signed integer can hold @stable ICU 2.0 */ +# define INT8_MIN ((int8_t)(-128)) +#endif +#ifndef INT16_MIN +/** The smallest value a 16 bit signed integer can hold @stable ICU 2.0 */ +# define INT16_MIN ((int16_t)(-32767-1)) +#endif +#ifndef INT32_MIN +/** The smallest value a 32 bit signed integer can hold @stable ICU 2.0 */ +# define INT32_MIN ((int32_t)(-2147483647-1)) +#endif + +#ifndef INT8_MAX +/** The largest value an 8 bit signed integer can hold @stable ICU 2.0 */ +# define INT8_MAX ((int8_t)(127)) +#endif +#ifndef INT16_MAX +/** The largest value a 16 bit signed integer can hold @stable ICU 2.0 */ +# define INT16_MAX ((int16_t)(32767)) +#endif +#ifndef INT32_MAX +/** The largest value a 32 bit signed integer can hold @stable ICU 2.0 */ +# define INT32_MAX ((int32_t)(2147483647)) +#endif + +#ifndef UINT8_MAX +/** The largest value an 8 bit unsigned integer can hold @stable ICU 2.0 */ +# define UINT8_MAX ((uint8_t)(255U)) +#endif +#ifndef UINT16_MAX +/** The largest value a 16 bit unsigned integer can hold @stable ICU 2.0 */ +# define UINT16_MAX ((uint16_t)(65535U)) +#endif +#ifndef UINT32_MAX +/** The largest value a 32 bit unsigned integer can hold @stable ICU 2.0 */ +# define UINT32_MAX ((uint32_t)(4294967295U)) +#endif + +#if defined(U_INT64_T_UNAVAILABLE) +# error int64_t is required for decimal format and rule-based number format. +#else +# ifndef INT64_C +/** + * Provides a platform independent way to specify a signed 64-bit integer constant. + * note: may be wrong for some 64 bit platforms - ensure your compiler provides INT64_C + * @stable ICU 2.8 + */ +# define INT64_C(c) c ## LL +# endif +# ifndef UINT64_C +/** + * Provides a platform independent way to specify an unsigned 64-bit integer constant. + * note: may be wrong for some 64 bit platforms - ensure your compiler provides UINT64_C + * @stable ICU 2.8 + */ +# define UINT64_C(c) c ## ULL +# endif +# ifndef U_INT64_MIN +/** The smallest value a 64 bit signed integer can hold @stable ICU 2.8 */ +# define U_INT64_MIN ((int64_t)(INT64_C(-9223372036854775807)-1)) +# endif +# ifndef U_INT64_MAX +/** The largest value a 64 bit signed integer can hold @stable ICU 2.8 */ +# define U_INT64_MAX ((int64_t)(INT64_C(9223372036854775807))) +# endif +# ifndef U_UINT64_MAX +/** The largest value a 64 bit unsigned integer can hold @stable ICU 2.8 */ +# define U_UINT64_MAX ((uint64_t)(UINT64_C(18446744073709551615))) +# endif +#endif + +/*==========================================================================*/ +/* Boolean data type */ +/*==========================================================================*/ + +/** The ICU boolean type @stable ICU 2.0 */ +typedef int8_t UBool; + +#ifndef TRUE +/** The TRUE value of a UBool @stable ICU 2.0 */ +# define TRUE 1 +#endif +#ifndef FALSE +/** The FALSE value of a UBool @stable ICU 2.0 */ +# define FALSE 0 +#endif + + +/*==========================================================================*/ +/* Unicode data types */ +/*==========================================================================*/ + +/* wchar_t-related definitions -------------------------------------------- */ + +/* + * \def U_WCHAR_IS_UTF16 + * Defined if wchar_t uses UTF-16. + * + * @stable ICU 2.0 + */ +/* + * \def U_WCHAR_IS_UTF32 + * Defined if wchar_t uses UTF-32. + * + * @stable ICU 2.0 + */ +#if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) +# ifdef __STDC_ISO_10646__ +# if (U_SIZEOF_WCHAR_T==2) +# define U_WCHAR_IS_UTF16 +# elif (U_SIZEOF_WCHAR_T==4) +# define U_WCHAR_IS_UTF32 +# endif +# elif defined __UCS2__ +# if (U_PF_OS390 <= U_PLATFORM && U_PLATFORM <= U_PF_OS400) && (U_SIZEOF_WCHAR_T==2) +# define U_WCHAR_IS_UTF16 +# endif +# elif defined(__UCS4__) || (U_PLATFORM == U_PF_OS400 && defined(__UTF32__)) +# if (U_SIZEOF_WCHAR_T==4) +# define U_WCHAR_IS_UTF32 +# endif +# elif U_PLATFORM_IS_DARWIN_BASED || (U_SIZEOF_WCHAR_T==4 && U_PLATFORM_IS_LINUX_BASED) +# define U_WCHAR_IS_UTF32 +# elif U_PLATFORM_HAS_WIN32_API +# define U_WCHAR_IS_UTF16 +# endif +#endif + +/* UChar and UChar32 definitions -------------------------------------------- */ + +/** Number of bytes in a UChar. @stable ICU 2.0 */ +#define U_SIZEOF_UCHAR 2 + +/** + * \def U_CHAR16_IS_TYPEDEF + * If 1, then char16_t is a typedef and not a real type (yet) + * @internal + */ +#if (U_PLATFORM == U_PF_AIX) && defined(__cplusplus) &&(U_CPLUSPLUS_VERSION < 11) +// for AIX, uchar.h needs to be included +# include <uchar.h> +# define U_CHAR16_IS_TYPEDEF 1 +#elif defined(_MSC_VER) && (_MSC_VER < 1900) +// Versions of Visual Studio/MSVC below 2015 do not support char16_t as a real type, +// and instead use a typedef. https://msdn.microsoft.com/library/bb531344.aspx +# define U_CHAR16_IS_TYPEDEF 1 +#else +# define U_CHAR16_IS_TYPEDEF 0 +#endif + + +/** + * \var UChar + * + * The base type for UTF-16 code units and pointers. + * Unsigned 16-bit integer. + * Starting with ICU 59, C++ API uses char16_t directly, while C API continues to use UChar. + * + * UChar is configurable by defining the macro UCHAR_TYPE + * on the preprocessor or compiler command line: + * -DUCHAR_TYPE=uint16_t or -DUCHAR_TYPE=wchar_t (if U_SIZEOF_WCHAR_T==2) etc. + * (The UCHAR_TYPE can also be #defined earlier in this file, for outside the ICU library code.) + * This is for transitional use from application code that uses uint16_t or wchar_t for UTF-16. + * + * The default is UChar=char16_t. + * + * C++11 defines char16_t as bit-compatible with uint16_t, but as a distinct type. + * + * In C, char16_t is a simple typedef of uint_least16_t. + * ICU requires uint_least16_t=uint16_t for data memory mapping. + * On macOS, char16_t is not available because the uchar.h standard header is missing. + * + * @stable ICU 4.4 + */ + +#if 1 + // #if 1 is normal. UChar defaults to char16_t in C++. + // For configuration testing of UChar=uint16_t temporarily change this to #if 0. + // The intltest Makefile #defines UCHAR_TYPE=char16_t, + // so we only #define it to uint16_t if it is undefined so far. +#elif !defined(UCHAR_TYPE) +# define UCHAR_TYPE uint16_t +#endif + +#if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || \ + defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) + // Inside the ICU library code, never configurable. + typedef char16_t UChar; +#elif defined(UCHAR_TYPE) + typedef UCHAR_TYPE UChar; +#elif defined(__cplusplus) + typedef char16_t UChar; +#else + typedef uint16_t UChar; +#endif + +/** + * \var OldUChar + * Default ICU 58 definition of UChar. + * A base type for UTF-16 code units and pointers. + * Unsigned 16-bit integer. + * + * Define OldUChar to be wchar_t if that is 16 bits wide. + * If wchar_t is not 16 bits wide, then define UChar to be uint16_t. + * + * This makes the definition of OldUChar platform-dependent + * but allows direct string type compatibility with platforms with + * 16-bit wchar_t types. + * + * This is how UChar was defined in ICU 58, for transition convenience. + * Exception: ICU 58 UChar was defined to UCHAR_TYPE if that macro was defined. + * The current UChar responds to UCHAR_TYPE but OldUChar does not. + * + * @stable ICU 59 + */ +#if U_SIZEOF_WCHAR_T==2 + typedef wchar_t OldUChar; +#elif defined(__CHAR16_TYPE__) + typedef __CHAR16_TYPE__ OldUChar; +#else + typedef uint16_t OldUChar; +#endif + +/** + * Define UChar32 as a type for single Unicode code points. + * UChar32 is a signed 32-bit integer (same as int32_t). + * + * The Unicode code point range is 0..0x10ffff. + * All other values (negative or >=0x110000) are illegal as Unicode code points. + * They may be used as sentinel values to indicate "done", "error" + * or similar non-code point conditions. + * + * Before ICU 2.4 (Jitterbug 2146), UChar32 was defined + * to be wchar_t if that is 32 bits wide (wchar_t may be signed or unsigned) + * or else to be uint32_t. + * That is, the definition of UChar32 was platform-dependent. + * + * @see U_SENTINEL + * @stable ICU 2.4 + */ +typedef int32_t UChar32; + +/** + * This value is intended for sentinel values for APIs that + * (take or) return single code points (UChar32). + * It is outside of the Unicode code point range 0..0x10ffff. + * + * For example, a "done" or "error" value in a new API + * could be indicated with U_SENTINEL. + * + * ICU APIs designed before ICU 2.4 usually define service-specific "done" + * values, mostly 0xffff. + * Those may need to be distinguished from + * actual U+ffff text contents by calling functions like + * CharacterIterator::hasNext() or UnicodeString::length(). + * + * @return -1 + * @see UChar32 + * @stable ICU 2.4 + */ +#define U_SENTINEL (-1) + +#include <unicode/urename.h> + +#endif diff --git a/vendor/icu/include/unicode/unistr.h b/vendor/icu/include/unicode/unistr.h new file mode 100644 index 0000000000..c160301a9a --- /dev/null +++ b/vendor/icu/include/unicode/unistr.h @@ -0,0 +1,4771 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 1998-2016, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* +* File unistr.h +* +* Modification History: +* +* Date Name Description +* 09/25/98 stephen Creation. +* 11/11/98 stephen Changed per 11/9 code review. +* 04/20/99 stephen Overhauled per 4/16 code review. +* 11/18/99 aliu Made to inherit from Replaceable. Added method +* handleReplaceBetween(); other methods unchanged. +* 06/25/01 grhoten Remove dependency on iostream. +****************************************************************************** +*/ + +#ifndef UNISTR_H +#define UNISTR_H + +/** + * \file + * \brief C++ API: Unicode String + */ + +#include <cstddef> +#include <unicode/utypes.h> +#include <unicode/char16ptr.h> +#include <unicode/rep.h> +#include <unicode/std_string.h> +#include <unicode/stringpiece.h> +#include <unicode/bytestream.h> + +struct UConverter; // unicode/ucnv.h + +#ifndef USTRING_H +/** + * \ingroup ustring_ustrlen + */ +U_STABLE int32_t U_EXPORT2 +u_strlen(const UChar *s); +#endif + +U_NAMESPACE_BEGIN + +#if !UCONFIG_NO_BREAK_ITERATION +class BreakIterator; // unicode/brkiter.h +#endif +class Edits; + +U_NAMESPACE_END + +// Not #ifndef U_HIDE_INTERNAL_API because UnicodeString needs the UStringCaseMapper. +/** + * Internal string case mapping function type. + * All error checking must be done. + * src and dest must not overlap. + * @internal + */ +typedef int32_t U_CALLCONV +UStringCaseMapper(int32_t caseLocale, uint32_t options, +#if !UCONFIG_NO_BREAK_ITERATION + icu::BreakIterator *iter, +#endif + char16_t *dest, int32_t destCapacity, + const char16_t *src, int32_t srcLength, + icu::Edits *edits, + UErrorCode &errorCode); + +U_NAMESPACE_BEGIN + +class Locale; // unicode/locid.h +class StringCharacterIterator; +class UnicodeStringAppendable; // unicode/appendable.h + +/* The <iostream> include has been moved to unicode/ustream.h */ + +/** + * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor + * which constructs a Unicode string from an invariant-character char * string. + * About invariant characters see utypes.h. + * This constructor has no runtime dependency on conversion code and is + * therefore recommended over ones taking a charset name string + * (where the empty string "" indicates invariant-character conversion). + * + * @stable ICU 3.2 + */ +#define US_INV icu::UnicodeString::kInvariant + +/** + * Unicode String literals in C++. + * + * Note: these macros are not recommended for new code. + * Prior to the availability of C++11 and u"unicode string literals", + * these macros were provided for portability and efficiency when + * initializing UnicodeStrings from literals. + * + * They work only for strings that contain "invariant characters", i.e., + * only latin letters, digits, and some punctuation. + * See utypes.h for details. + * + * The string parameter must be a C string literal. + * The length of the string, not including the terminating + * <code>NUL</code>, must be specified as a constant. + * @stable ICU 2.0 + */ +#if !U_CHAR16_IS_TYPEDEF +# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, u ## cs, _length) +#else +# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const char16_t*)u ## cs, _length) +#endif + +/** + * Unicode String literals in C++. + * Dependent on the platform properties, different UnicodeString + * constructors should be used to create a UnicodeString object from + * a string literal. + * The macros are defined for improved performance. + * They work only for strings that contain "invariant characters", i.e., + * only latin letters, digits, and some punctuation. + * See utypes.h for details. + * + * The string parameter must be a C string literal. + * @stable ICU 2.0 + */ +#define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1) + +/** + * \def UNISTR_FROM_CHAR_EXPLICIT + * This can be defined to be empty or "explicit". + * If explicit, then the UnicodeString(char16_t) and UnicodeString(UChar32) + * constructors are marked as explicit, preventing their inadvertent use. + * @stable ICU 49 + */ +#ifndef UNISTR_FROM_CHAR_EXPLICIT +# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) + // Auto-"explicit" in ICU library code. +# define UNISTR_FROM_CHAR_EXPLICIT explicit +# else + // Empty by default for source code compatibility. +# define UNISTR_FROM_CHAR_EXPLICIT +# endif +#endif + +/** + * \def UNISTR_FROM_STRING_EXPLICIT + * This can be defined to be empty or "explicit". + * If explicit, then the UnicodeString(const char *) and UnicodeString(const char16_t *) + * constructors are marked as explicit, preventing their inadvertent use. + * + * In particular, this helps prevent accidentally depending on ICU conversion code + * by passing a string literal into an API with a const UnicodeString & parameter. + * @stable ICU 49 + */ +#ifndef UNISTR_FROM_STRING_EXPLICIT +# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) + // Auto-"explicit" in ICU library code. +# define UNISTR_FROM_STRING_EXPLICIT explicit +# else + // Empty by default for source code compatibility. +# define UNISTR_FROM_STRING_EXPLICIT +# endif +#endif + +/** + * \def UNISTR_OBJECT_SIZE + * Desired sizeof(UnicodeString) in bytes. + * It should be a multiple of sizeof(pointer) to avoid unusable space for padding. + * The object size may want to be a multiple of 16 bytes, + * which is a common granularity for heap allocation. + * + * Any space inside the object beyond sizeof(vtable pointer) + 2 + * is available for storing short strings inside the object. + * The bigger the object, the longer a string that can be stored inside the object, + * without additional heap allocation. + * + * Depending on a platform's pointer size, pointer alignment requirements, + * and struct padding, the compiler will usually round up sizeof(UnicodeString) + * to 4 * sizeof(pointer) (or 3 * sizeof(pointer) for P128 data models), + * to hold the fields for heap-allocated strings. + * Such a minimum size also ensures that the object is easily large enough + * to hold at least 2 char16_ts, for one supplementary code point (U16_MAX_LENGTH). + * + * sizeof(UnicodeString) >= 48 should work for all known platforms. + * + * For example, on a 64-bit machine where sizeof(vtable pointer) is 8, + * sizeof(UnicodeString) = 64 would leave space for + * (64 - sizeof(vtable pointer) - 2) / U_SIZEOF_UCHAR = (64 - 8 - 2) / 2 = 27 + * char16_ts stored inside the object. + * + * The minimum object size on a 64-bit machine would be + * 4 * sizeof(pointer) = 4 * 8 = 32 bytes, + * and the internal buffer would hold up to 11 char16_ts in that case. + * + * @see U16_MAX_LENGTH + * @stable ICU 56 + */ +#ifndef UNISTR_OBJECT_SIZE +# define UNISTR_OBJECT_SIZE 64 +#endif + +/** + * UnicodeString is a string class that stores Unicode characters directly and provides + * similar functionality as the Java String and StringBuffer/StringBuilder classes. + * It is a concrete implementation of the abstract class Replaceable (for transliteration). + * + * A UnicodeString may also "alias" an external array of characters + * (that is, point to it, rather than own the array) + * whose lifetime must then at least match the lifetime of the aliasing object. + * This aliasing may be preserved when returning a UnicodeString by value, + * depending on the compiler and the function implementation, + * via Return Value Optimization (RVO) or the move assignment operator. + * (However, the copy assignment operator does not preserve aliasing.) + * For details see the description of storage models at the end of the class API docs + * and in the User Guide chapter linked from there. + * + * The UnicodeString class is not suitable for subclassing. + * + * <p>For an overview of Unicode strings in C and C++ see the + * <a href="http://userguide.icu-project.org/strings#TOC-Strings-in-C-C-">User Guide Strings chapter</a>.</p> + * + * <p>In ICU, a Unicode string consists of 16-bit Unicode <em>code units</em>. + * A Unicode character may be stored with either one code unit + * (the most common case) or with a matched pair of special code units + * ("surrogates"). The data type for code units is char16_t. + * For single-character handling, a Unicode character code <em>point</em> is a value + * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.</p> + * + * <p>Indexes and offsets into and lengths of strings always count code units, not code points. + * This is the same as with multi-byte char* strings in traditional string handling. + * Operations on partial strings typically do not test for code point boundaries. + * If necessary, the user needs to take care of such boundaries by testing for the code unit + * values or by using functions like + * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit() + * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).</p> + * + * UnicodeString methods are more lenient with regard to input parameter values + * than other ICU APIs. In particular: + * - If indexes are out of bounds for a UnicodeString object + * (<0 or >length()) then they are "pinned" to the nearest boundary. + * - If primitive string pointer values (e.g., const char16_t * or char *) + * for input strings are NULL, then those input string parameters are treated + * as if they pointed to an empty string. + * However, this is <em>not</em> the case for char * parameters for charset names + * or other IDs. + * - Most UnicodeString methods do not take a UErrorCode parameter because + * there are usually very few opportunities for failure other than a shortage + * of memory, error codes in low-level C++ string methods would be inconvenient, + * and the error code as the last parameter (ICU convention) would prevent + * the use of default parameter values. + * Instead, such methods set the UnicodeString into a "bogus" state + * (see isBogus()) if an error occurs. + * + * In string comparisons, two UnicodeString objects that are both "bogus" + * compare equal (to be transitive and prevent endless loops in sorting), + * and a "bogus" string compares less than any non-"bogus" one. + * + * Const UnicodeString methods are thread-safe. Multiple threads can use + * const methods on the same UnicodeString object simultaneously, + * but non-const methods must not be called concurrently (in multiple threads) + * with any other (const or non-const) methods. + * + * Similarly, const UnicodeString & parameters are thread-safe. + * One object may be passed in as such a parameter concurrently in multiple threads. + * This includes the const UnicodeString & parameters for + * copy construction, assignment, and cloning. + * + * <p>UnicodeString uses several storage methods. + * String contents can be stored inside the UnicodeString object itself, + * in an allocated and shared buffer, or in an outside buffer that is "aliased". + * Most of this is done transparently, but careful aliasing in particular provides + * significant performance improvements. + * Also, the internal buffer is accessible via special functions. + * For details see the + * <a href="http://userguide.icu-project.org/strings#TOC-Maximizing-Performance-with-the-UnicodeString-Storage-Model">User Guide Strings chapter</a>.</p> + * + * @see utf.h + * @see CharacterIterator + * @stable ICU 2.0 + */ +class U_COMMON_API UnicodeString : public Replaceable +{ +public: + + /** + * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor + * which constructs a Unicode string from an invariant-character char * string. + * Use the macro US_INV instead of the full qualification for this value. + * + * @see US_INV + * @stable ICU 3.2 + */ + enum EInvariant { + /** + * @see EInvariant + * @stable ICU 3.2 + */ + kInvariant + }; + + //======================================== + // Read-only operations + //======================================== + + /* Comparison - bitwise only - for international comparison use collation */ + + /** + * Equality operator. Performs only bitwise comparison. + * @param text The UnicodeString to compare to this one. + * @return TRUE if <TT>text</TT> contains the same characters as this one, + * FALSE otherwise. + * @stable ICU 2.0 + */ + inline UBool operator== (const UnicodeString& text) const; + + /** + * Inequality operator. Performs only bitwise comparison. + * @param text The UnicodeString to compare to this one. + * @return FALSE if <TT>text</TT> contains the same characters as this one, + * TRUE otherwise. + * @stable ICU 2.0 + */ + inline UBool operator!= (const UnicodeString& text) const; + + /** + * Greater than operator. Performs only bitwise comparison. + * @param text The UnicodeString to compare to this one. + * @return TRUE if the characters in this are bitwise + * greater than the characters in <code>text</code>, FALSE otherwise + * @stable ICU 2.0 + */ + inline UBool operator> (const UnicodeString& text) const; + + /** + * Less than operator. Performs only bitwise comparison. + * @param text The UnicodeString to compare to this one. + * @return TRUE if the characters in this are bitwise + * less than the characters in <code>text</code>, FALSE otherwise + * @stable ICU 2.0 + */ + inline UBool operator< (const UnicodeString& text) const; + + /** + * Greater than or equal operator. Performs only bitwise comparison. + * @param text The UnicodeString to compare to this one. + * @return TRUE if the characters in this are bitwise + * greater than or equal to the characters in <code>text</code>, FALSE otherwise + * @stable ICU 2.0 + */ + inline UBool operator>= (const UnicodeString& text) const; + + /** + * Less than or equal operator. Performs only bitwise comparison. + * @param text The UnicodeString to compare to this one. + * @return TRUE if the characters in this are bitwise + * less than or equal to the characters in <code>text</code>, FALSE otherwise + * @stable ICU 2.0 + */ + inline UBool operator<= (const UnicodeString& text) const; + + /** + * Compare the characters bitwise in this UnicodeString to + * the characters in <code>text</code>. + * @param text The UnicodeString to compare to this one. + * @return The result of bitwise character comparison: 0 if this + * contains the same characters as <code>text</code>, -1 if the characters in + * this are bitwise less than the characters in <code>text</code>, +1 if the + * characters in this are bitwise greater than the characters + * in <code>text</code>. + * @stable ICU 2.0 + */ + inline int8_t compare(const UnicodeString& text) const; + + /** + * Compare the characters bitwise in the range + * [<TT>start</TT>, <TT>start + length</TT>) with the characters + * in the <b>entire string</b> <TT>text</TT>. + * (The parameters "start" and "length" are not applied to the other text "text".) + * @param start the offset at which the compare operation begins + * @param length the number of characters of text to compare. + * @param text the other text to be compared against this string. + * @return The result of bitwise character comparison: 0 if this + * contains the same characters as <code>text</code>, -1 if the characters in + * this are bitwise less than the characters in <code>text</code>, +1 if the + * characters in this are bitwise greater than the characters + * in <code>text</code>. + * @stable ICU 2.0 + */ + inline int8_t compare(int32_t start, + int32_t length, + const UnicodeString& text) const; + + /** + * Compare the characters bitwise in the range + * [<TT>start</TT>, <TT>start + length</TT>) with the characters + * in <TT>srcText</TT> in the range + * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). + * @param start the offset at which the compare operation begins + * @param length the number of characters in this to compare. + * @param srcText the text to be compared + * @param srcStart the offset into <TT>srcText</TT> to start comparison + * @param srcLength the number of characters in <TT>src</TT> to compare + * @return The result of bitwise character comparison: 0 if this + * contains the same characters as <code>srcText</code>, -1 if the characters in + * this are bitwise less than the characters in <code>srcText</code>, +1 if the + * characters in this are bitwise greater than the characters + * in <code>srcText</code>. + * @stable ICU 2.0 + */ + inline int8_t compare(int32_t start, + int32_t length, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength) const; + + /** + * Compare the characters bitwise in this UnicodeString with the first + * <TT>srcLength</TT> characters in <TT>srcChars</TT>. + * @param srcChars The characters to compare to this UnicodeString. + * @param srcLength the number of characters in <TT>srcChars</TT> to compare + * @return The result of bitwise character comparison: 0 if this + * contains the same characters as <code>srcChars</code>, -1 if the characters in + * this are bitwise less than the characters in <code>srcChars</code>, +1 if the + * characters in this are bitwise greater than the characters + * in <code>srcChars</code>. + * @stable ICU 2.0 + */ + inline int8_t compare(ConstChar16Ptr srcChars, + int32_t srcLength) const; + + /** + * Compare the characters bitwise in the range + * [<TT>start</TT>, <TT>start + length</TT>) with the first + * <TT>length</TT> characters in <TT>srcChars</TT> + * @param start the offset at which the compare operation begins + * @param length the number of characters to compare. + * @param srcChars the characters to be compared + * @return The result of bitwise character comparison: 0 if this + * contains the same characters as <code>srcChars</code>, -1 if the characters in + * this are bitwise less than the characters in <code>srcChars</code>, +1 if the + * characters in this are bitwise greater than the characters + * in <code>srcChars</code>. + * @stable ICU 2.0 + */ + inline int8_t compare(int32_t start, + int32_t length, + const char16_t *srcChars) const; + + /** + * Compare the characters bitwise in the range + * [<TT>start</TT>, <TT>start + length</TT>) with the characters + * in <TT>srcChars</TT> in the range + * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). + * @param start the offset at which the compare operation begins + * @param length the number of characters in this to compare + * @param srcChars the characters to be compared + * @param srcStart the offset into <TT>srcChars</TT> to start comparison + * @param srcLength the number of characters in <TT>srcChars</TT> to compare + * @return The result of bitwise character comparison: 0 if this + * contains the same characters as <code>srcChars</code>, -1 if the characters in + * this are bitwise less than the characters in <code>srcChars</code>, +1 if the + * characters in this are bitwise greater than the characters + * in <code>srcChars</code>. + * @stable ICU 2.0 + */ + inline int8_t compare(int32_t start, + int32_t length, + const char16_t *srcChars, + int32_t srcStart, + int32_t srcLength) const; + + /** + * Compare the characters bitwise in the range + * [<TT>start</TT>, <TT>limit</TT>) with the characters + * in <TT>srcText</TT> in the range + * [<TT>srcStart</TT>, <TT>srcLimit</TT>). + * @param start the offset at which the compare operation begins + * @param limit the offset immediately following the compare operation + * @param srcText the text to be compared + * @param srcStart the offset into <TT>srcText</TT> to start comparison + * @param srcLimit the offset into <TT>srcText</TT> to limit comparison + * @return The result of bitwise character comparison: 0 if this + * contains the same characters as <code>srcText</code>, -1 if the characters in + * this are bitwise less than the characters in <code>srcText</code>, +1 if the + * characters in this are bitwise greater than the characters + * in <code>srcText</code>. + * @stable ICU 2.0 + */ + inline int8_t compareBetween(int32_t start, + int32_t limit, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLimit) const; + + /** + * Compare two Unicode strings in code point order. + * The result may be different from the results of compare(), operator<, etc. + * if supplementary characters are present: + * + * In UTF-16, supplementary characters (with code points U+10000 and above) are + * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, + * which means that they compare as less than some other BMP characters like U+feff. + * This function compares Unicode strings in code point order. + * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. + * + * @param text Another string to compare this one to. + * @return a negative/zero/positive integer corresponding to whether + * this string is less than/equal to/greater than the second one + * in code point order + * @stable ICU 2.0 + */ + inline int8_t compareCodePointOrder(const UnicodeString& text) const; + + /** + * Compare two Unicode strings in code point order. + * The result may be different from the results of compare(), operator<, etc. + * if supplementary characters are present: + * + * In UTF-16, supplementary characters (with code points U+10000 and above) are + * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, + * which means that they compare as less than some other BMP characters like U+feff. + * This function compares Unicode strings in code point order. + * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. + * + * @param start The start offset in this string at which the compare operation begins. + * @param length The number of code units from this string to compare. + * @param srcText Another string to compare this one to. + * @return a negative/zero/positive integer corresponding to whether + * this string is less than/equal to/greater than the second one + * in code point order + * @stable ICU 2.0 + */ + inline int8_t compareCodePointOrder(int32_t start, + int32_t length, + const UnicodeString& srcText) const; + + /** + * Compare two Unicode strings in code point order. + * The result may be different from the results of compare(), operator<, etc. + * if supplementary characters are present: + * + * In UTF-16, supplementary characters (with code points U+10000 and above) are + * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, + * which means that they compare as less than some other BMP characters like U+feff. + * This function compares Unicode strings in code point order. + * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. + * + * @param start The start offset in this string at which the compare operation begins. + * @param length The number of code units from this string to compare. + * @param srcText Another string to compare this one to. + * @param srcStart The start offset in that string at which the compare operation begins. + * @param srcLength The number of code units from that string to compare. + * @return a negative/zero/positive integer corresponding to whether + * this string is less than/equal to/greater than the second one + * in code point order + * @stable ICU 2.0 + */ + inline int8_t compareCodePointOrder(int32_t start, + int32_t length, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength) const; + + /** + * Compare two Unicode strings in code point order. + * The result may be different from the results of compare(), operator<, etc. + * if supplementary characters are present: + * + * In UTF-16, supplementary characters (with code points U+10000 and above) are + * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, + * which means that they compare as less than some other BMP characters like U+feff. + * This function compares Unicode strings in code point order. + * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. + * + * @param srcChars A pointer to another string to compare this one to. + * @param srcLength The number of code units from that string to compare. + * @return a negative/zero/positive integer corresponding to whether + * this string is less than/equal to/greater than the second one + * in code point order + * @stable ICU 2.0 + */ + inline int8_t compareCodePointOrder(ConstChar16Ptr srcChars, + int32_t srcLength) const; + + /** + * Compare two Unicode strings in code point order. + * The result may be different from the results of compare(), operator<, etc. + * if supplementary characters are present: + * + * In UTF-16, supplementary characters (with code points U+10000 and above) are + * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, + * which means that they compare as less than some other BMP characters like U+feff. + * This function compares Unicode strings in code point order. + * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. + * + * @param start The start offset in this string at which the compare operation begins. + * @param length The number of code units from this string to compare. + * @param srcChars A pointer to another string to compare this one to. + * @return a negative/zero/positive integer corresponding to whether + * this string is less than/equal to/greater than the second one + * in code point order + * @stable ICU 2.0 + */ + inline int8_t compareCodePointOrder(int32_t start, + int32_t length, + const char16_t *srcChars) const; + + /** + * Compare two Unicode strings in code point order. + * The result may be different from the results of compare(), operator<, etc. + * if supplementary characters are present: + * + * In UTF-16, supplementary characters (with code points U+10000 and above) are + * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, + * which means that they compare as less than some other BMP characters like U+feff. + * This function compares Unicode strings in code point order. + * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. + * + * @param start The start offset in this string at which the compare operation begins. + * @param length The number of code units from this string to compare. + * @param srcChars A pointer to another string to compare this one to. + * @param srcStart The start offset in that string at which the compare operation begins. + * @param srcLength The number of code units from that string to compare. + * @return a negative/zero/positive integer corresponding to whether + * this string is less than/equal to/greater than the second one + * in code point order + * @stable ICU 2.0 + */ + inline int8_t compareCodePointOrder(int32_t start, + int32_t length, + const char16_t *srcChars, + int32_t srcStart, + int32_t srcLength) const; + + /** + * Compare two Unicode strings in code point order. + * The result may be different from the results of compare(), operator<, etc. + * if supplementary characters are present: + * + * In UTF-16, supplementary characters (with code points U+10000 and above) are + * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, + * which means that they compare as less than some other BMP characters like U+feff. + * This function compares Unicode strings in code point order. + * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined. + * + * @param start The start offset in this string at which the compare operation begins. + * @param limit The offset after the last code unit from this string to compare. + * @param srcText Another string to compare this one to. + * @param srcStart The start offset in that string at which the compare operation begins. + * @param srcLimit The offset after the last code unit from that string to compare. + * @return a negative/zero/positive integer corresponding to whether + * this string is less than/equal to/greater than the second one + * in code point order + * @stable ICU 2.0 + */ + inline int8_t compareCodePointOrderBetween(int32_t start, + int32_t limit, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLimit) const; + + /** + * Compare two strings case-insensitively using full case folding. + * This is equivalent to this->foldCase(options).compare(text.foldCase(options)). + * + * @param text Another string to compare this one to. + * @param options A bit set of options: + * - U_FOLD_CASE_DEFAULT or 0 is used for default options: + * Comparison in code unit order with default case folding. + * + * - U_COMPARE_CODE_POINT_ORDER + * Set to choose code point order instead of code unit order + * (see u_strCompare for details). + * + * - U_FOLD_CASE_EXCLUDE_SPECIAL_I + * + * @return A negative, zero, or positive integer indicating the comparison result. + * @stable ICU 2.0 + */ + inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const; + + /** + * Compare two strings case-insensitively using full case folding. + * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)). + * + * @param start The start offset in this string at which the compare operation begins. + * @param length The number of code units from this string to compare. + * @param srcText Another string to compare this one to. + * @param options A bit set of options: + * - U_FOLD_CASE_DEFAULT or 0 is used for default options: + * Comparison in code unit order with default case folding. + * + * - U_COMPARE_CODE_POINT_ORDER + * Set to choose code point order instead of code unit order + * (see u_strCompare for details). + * + * - U_FOLD_CASE_EXCLUDE_SPECIAL_I + * + * @return A negative, zero, or positive integer indicating the comparison result. + * @stable ICU 2.0 + */ + inline int8_t caseCompare(int32_t start, + int32_t length, + const UnicodeString& srcText, + uint32_t options) const; + + /** + * Compare two strings case-insensitively using full case folding. + * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)). + * + * @param start The start offset in this string at which the compare operation begins. + * @param length The number of code units from this string to compare. + * @param srcText Another string to compare this one to. + * @param srcStart The start offset in that string at which the compare operation begins. + * @param srcLength The number of code units from that string to compare. + * @param options A bit set of options: + * - U_FOLD_CASE_DEFAULT or 0 is used for default options: + * Comparison in code unit order with default case folding. + * + * - U_COMPARE_CODE_POINT_ORDER + * Set to choose code point order instead of code unit order + * (see u_strCompare for details). + * + * - U_FOLD_CASE_EXCLUDE_SPECIAL_I + * + * @return A negative, zero, or positive integer indicating the comparison result. + * @stable ICU 2.0 + */ + inline int8_t caseCompare(int32_t start, + int32_t length, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength, + uint32_t options) const; + + /** + * Compare two strings case-insensitively using full case folding. + * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)). + * + * @param srcChars A pointer to another string to compare this one to. + * @param srcLength The number of code units from that string to compare. + * @param options A bit set of options: + * - U_FOLD_CASE_DEFAULT or 0 is used for default options: + * Comparison in code unit order with default case folding. + * + * - U_COMPARE_CODE_POINT_ORDER + * Set to choose code point order instead of code unit order + * (see u_strCompare for details). + * + * - U_FOLD_CASE_EXCLUDE_SPECIAL_I + * + * @return A negative, zero, or positive integer indicating the comparison result. + * @stable ICU 2.0 + */ + inline int8_t caseCompare(ConstChar16Ptr srcChars, + int32_t srcLength, + uint32_t options) const; + + /** + * Compare two strings case-insensitively using full case folding. + * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)). + * + * @param start The start offset in this string at which the compare operation begins. + * @param length The number of code units from this string to compare. + * @param srcChars A pointer to another string to compare this one to. + * @param options A bit set of options: + * - U_FOLD_CASE_DEFAULT or 0 is used for default options: + * Comparison in code unit order with default case folding. + * + * - U_COMPARE_CODE_POINT_ORDER + * Set to choose code point order instead of code unit order + * (see u_strCompare for details). + * + * - U_FOLD_CASE_EXCLUDE_SPECIAL_I + * + * @return A negative, zero, or positive integer indicating the comparison result. + * @stable ICU 2.0 + */ + inline int8_t caseCompare(int32_t start, + int32_t length, + const char16_t *srcChars, + uint32_t options) const; + + /** + * Compare two strings case-insensitively using full case folding. + * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)). + * + * @param start The start offset in this string at which the compare operation begins. + * @param length The number of code units from this string to compare. + * @param srcChars A pointer to another string to compare this one to. + * @param srcStart The start offset in that string at which the compare operation begins. + * @param srcLength The number of code units from that string to compare. + * @param options A bit set of options: + * - U_FOLD_CASE_DEFAULT or 0 is used for default options: + * Comparison in code unit order with default case folding. + * + * - U_COMPARE_CODE_POINT_ORDER + * Set to choose code point order instead of code unit order + * (see u_strCompare for details). + * + * - U_FOLD_CASE_EXCLUDE_SPECIAL_I + * + * @return A negative, zero, or positive integer indicating the comparison result. + * @stable ICU 2.0 + */ + inline int8_t caseCompare(int32_t start, + int32_t length, + const char16_t *srcChars, + int32_t srcStart, + int32_t srcLength, + uint32_t options) const; + + /** + * Compare two strings case-insensitively using full case folding. + * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)). + * + * @param start The start offset in this string at which the compare operation begins. + * @param limit The offset after the last code unit from this string to compare. + * @param srcText Another string to compare this one to. + * @param srcStart The start offset in that string at which the compare operation begins. + * @param srcLimit The offset after the last code unit from that string to compare. + * @param options A bit set of options: + * - U_FOLD_CASE_DEFAULT or 0 is used for default options: + * Comparison in code unit order with default case folding. + * + * - U_COMPARE_CODE_POINT_ORDER + * Set to choose code point order instead of code unit order + * (see u_strCompare for details). + * + * - U_FOLD_CASE_EXCLUDE_SPECIAL_I + * + * @return A negative, zero, or positive integer indicating the comparison result. + * @stable ICU 2.0 + */ + inline int8_t caseCompareBetween(int32_t start, + int32_t limit, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLimit, + uint32_t options) const; + + /** + * Determine if this starts with the characters in <TT>text</TT> + * @param text The text to match. + * @return TRUE if this starts with the characters in <TT>text</TT>, + * FALSE otherwise + * @stable ICU 2.0 + */ + inline UBool startsWith(const UnicodeString& text) const; + + /** + * Determine if this starts with the characters in <TT>srcText</TT> + * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). + * @param srcText The text to match. + * @param srcStart the offset into <TT>srcText</TT> to start matching + * @param srcLength the number of characters in <TT>srcText</TT> to match + * @return TRUE if this starts with the characters in <TT>text</TT>, + * FALSE otherwise + * @stable ICU 2.0 + */ + inline UBool startsWith(const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength) const; + + /** + * Determine if this starts with the characters in <TT>srcChars</TT> + * @param srcChars The characters to match. + * @param srcLength the number of characters in <TT>srcChars</TT> + * @return TRUE if this starts with the characters in <TT>srcChars</TT>, + * FALSE otherwise + * @stable ICU 2.0 + */ + inline UBool startsWith(ConstChar16Ptr srcChars, + int32_t srcLength) const; + + /** + * Determine if this ends with the characters in <TT>srcChars</TT> + * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). + * @param srcChars The characters to match. + * @param srcStart the offset into <TT>srcText</TT> to start matching + * @param srcLength the number of characters in <TT>srcChars</TT> to match + * @return TRUE if this ends with the characters in <TT>srcChars</TT>, FALSE otherwise + * @stable ICU 2.0 + */ + inline UBool startsWith(const char16_t *srcChars, + int32_t srcStart, + int32_t srcLength) const; + + /** + * Determine if this ends with the characters in <TT>text</TT> + * @param text The text to match. + * @return TRUE if this ends with the characters in <TT>text</TT>, + * FALSE otherwise + * @stable ICU 2.0 + */ + inline UBool endsWith(const UnicodeString& text) const; + + /** + * Determine if this ends with the characters in <TT>srcText</TT> + * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). + * @param srcText The text to match. + * @param srcStart the offset into <TT>srcText</TT> to start matching + * @param srcLength the number of characters in <TT>srcText</TT> to match + * @return TRUE if this ends with the characters in <TT>text</TT>, + * FALSE otherwise + * @stable ICU 2.0 + */ + inline UBool endsWith(const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength) const; + + /** + * Determine if this ends with the characters in <TT>srcChars</TT> + * @param srcChars The characters to match. + * @param srcLength the number of characters in <TT>srcChars</TT> + * @return TRUE if this ends with the characters in <TT>srcChars</TT>, + * FALSE otherwise + * @stable ICU 2.0 + */ + inline UBool endsWith(ConstChar16Ptr srcChars, + int32_t srcLength) const; + + /** + * Determine if this ends with the characters in <TT>srcChars</TT> + * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). + * @param srcChars The characters to match. + * @param srcStart the offset into <TT>srcText</TT> to start matching + * @param srcLength the number of characters in <TT>srcChars</TT> to match + * @return TRUE if this ends with the characters in <TT>srcChars</TT>, + * FALSE otherwise + * @stable ICU 2.0 + */ + inline UBool endsWith(const char16_t *srcChars, + int32_t srcStart, + int32_t srcLength) const; + + + /* Searching - bitwise only */ + + /** + * Locate in this the first occurrence of the characters in <TT>text</TT>, + * using bitwise comparison. + * @param text The text to search for. + * @return The offset into this of the start of <TT>text</TT>, + * or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t indexOf(const UnicodeString& text) const; + + /** + * Locate in this the first occurrence of the characters in <TT>text</TT> + * starting at offset <TT>start</TT>, using bitwise comparison. + * @param text The text to search for. + * @param start The offset at which searching will start. + * @return The offset into this of the start of <TT>text</TT>, + * or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t indexOf(const UnicodeString& text, + int32_t start) const; + + /** + * Locate in this the first occurrence in the range + * [<TT>start</TT>, <TT>start + length</TT>) of the characters + * in <TT>text</TT>, using bitwise comparison. + * @param text The text to search for. + * @param start The offset at which searching will start. + * @param length The number of characters to search + * @return The offset into this of the start of <TT>text</TT>, + * or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t indexOf(const UnicodeString& text, + int32_t start, + int32_t length) const; + + /** + * Locate in this the first occurrence in the range + * [<TT>start</TT>, <TT>start + length</TT>) of the characters + * in <TT>srcText</TT> in the range + * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>), + * using bitwise comparison. + * @param srcText The text to search for. + * @param srcStart the offset into <TT>srcText</TT> at which + * to start matching + * @param srcLength the number of characters in <TT>srcText</TT> to match + * @param start the offset into this at which to start matching + * @param length the number of characters in this to search + * @return The offset into this of the start of <TT>text</TT>, + * or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t indexOf(const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength, + int32_t start, + int32_t length) const; + + /** + * Locate in this the first occurrence of the characters in + * <TT>srcChars</TT> + * starting at offset <TT>start</TT>, using bitwise comparison. + * @param srcChars The text to search for. + * @param srcLength the number of characters in <TT>srcChars</TT> to match + * @param start the offset into this at which to start matching + * @return The offset into this of the start of <TT>text</TT>, + * or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t indexOf(const char16_t *srcChars, + int32_t srcLength, + int32_t start) const; + + /** + * Locate in this the first occurrence in the range + * [<TT>start</TT>, <TT>start + length</TT>) of the characters + * in <TT>srcChars</TT>, using bitwise comparison. + * @param srcChars The text to search for. + * @param srcLength the number of characters in <TT>srcChars</TT> + * @param start The offset at which searching will start. + * @param length The number of characters to search + * @return The offset into this of the start of <TT>srcChars</TT>, + * or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t indexOf(ConstChar16Ptr srcChars, + int32_t srcLength, + int32_t start, + int32_t length) const; + + /** + * Locate in this the first occurrence in the range + * [<TT>start</TT>, <TT>start + length</TT>) of the characters + * in <TT>srcChars</TT> in the range + * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>), + * using bitwise comparison. + * @param srcChars The text to search for. + * @param srcStart the offset into <TT>srcChars</TT> at which + * to start matching + * @param srcLength the number of characters in <TT>srcChars</TT> to match + * @param start the offset into this at which to start matching + * @param length the number of characters in this to search + * @return The offset into this of the start of <TT>text</TT>, + * or -1 if not found. + * @stable ICU 2.0 + */ + int32_t indexOf(const char16_t *srcChars, + int32_t srcStart, + int32_t srcLength, + int32_t start, + int32_t length) const; + + /** + * Locate in this the first occurrence of the BMP code point <code>c</code>, + * using bitwise comparison. + * @param c The code unit to search for. + * @return The offset into this of <TT>c</TT>, or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t indexOf(char16_t c) const; + + /** + * Locate in this the first occurrence of the code point <TT>c</TT>, + * using bitwise comparison. + * + * @param c The code point to search for. + * @return The offset into this of <TT>c</TT>, or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t indexOf(UChar32 c) const; + + /** + * Locate in this the first occurrence of the BMP code point <code>c</code>, + * starting at offset <TT>start</TT>, using bitwise comparison. + * @param c The code unit to search for. + * @param start The offset at which searching will start. + * @return The offset into this of <TT>c</TT>, or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t indexOf(char16_t c, + int32_t start) const; + + /** + * Locate in this the first occurrence of the code point <TT>c</TT> + * starting at offset <TT>start</TT>, using bitwise comparison. + * + * @param c The code point to search for. + * @param start The offset at which searching will start. + * @return The offset into this of <TT>c</TT>, or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t indexOf(UChar32 c, + int32_t start) const; + + /** + * Locate in this the first occurrence of the BMP code point <code>c</code> + * in the range [<TT>start</TT>, <TT>start + length</TT>), + * using bitwise comparison. + * @param c The code unit to search for. + * @param start the offset into this at which to start matching + * @param length the number of characters in this to search + * @return The offset into this of <TT>c</TT>, or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t indexOf(char16_t c, + int32_t start, + int32_t length) const; + + /** + * Locate in this the first occurrence of the code point <TT>c</TT> + * in the range [<TT>start</TT>, <TT>start + length</TT>), + * using bitwise comparison. + * + * @param c The code point to search for. + * @param start the offset into this at which to start matching + * @param length the number of characters in this to search + * @return The offset into this of <TT>c</TT>, or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t indexOf(UChar32 c, + int32_t start, + int32_t length) const; + + /** + * Locate in this the last occurrence of the characters in <TT>text</TT>, + * using bitwise comparison. + * @param text The text to search for. + * @return The offset into this of the start of <TT>text</TT>, + * or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t lastIndexOf(const UnicodeString& text) const; + + /** + * Locate in this the last occurrence of the characters in <TT>text</TT> + * starting at offset <TT>start</TT>, using bitwise comparison. + * @param text The text to search for. + * @param start The offset at which searching will start. + * @return The offset into this of the start of <TT>text</TT>, + * or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t lastIndexOf(const UnicodeString& text, + int32_t start) const; + + /** + * Locate in this the last occurrence in the range + * [<TT>start</TT>, <TT>start + length</TT>) of the characters + * in <TT>text</TT>, using bitwise comparison. + * @param text The text to search for. + * @param start The offset at which searching will start. + * @param length The number of characters to search + * @return The offset into this of the start of <TT>text</TT>, + * or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t lastIndexOf(const UnicodeString& text, + int32_t start, + int32_t length) const; + + /** + * Locate in this the last occurrence in the range + * [<TT>start</TT>, <TT>start + length</TT>) of the characters + * in <TT>srcText</TT> in the range + * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>), + * using bitwise comparison. + * @param srcText The text to search for. + * @param srcStart the offset into <TT>srcText</TT> at which + * to start matching + * @param srcLength the number of characters in <TT>srcText</TT> to match + * @param start the offset into this at which to start matching + * @param length the number of characters in this to search + * @return The offset into this of the start of <TT>text</TT>, + * or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t lastIndexOf(const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength, + int32_t start, + int32_t length) const; + + /** + * Locate in this the last occurrence of the characters in <TT>srcChars</TT> + * starting at offset <TT>start</TT>, using bitwise comparison. + * @param srcChars The text to search for. + * @param srcLength the number of characters in <TT>srcChars</TT> to match + * @param start the offset into this at which to start matching + * @return The offset into this of the start of <TT>text</TT>, + * or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t lastIndexOf(const char16_t *srcChars, + int32_t srcLength, + int32_t start) const; + + /** + * Locate in this the last occurrence in the range + * [<TT>start</TT>, <TT>start + length</TT>) of the characters + * in <TT>srcChars</TT>, using bitwise comparison. + * @param srcChars The text to search for. + * @param srcLength the number of characters in <TT>srcChars</TT> + * @param start The offset at which searching will start. + * @param length The number of characters to search + * @return The offset into this of the start of <TT>srcChars</TT>, + * or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t lastIndexOf(ConstChar16Ptr srcChars, + int32_t srcLength, + int32_t start, + int32_t length) const; + + /** + * Locate in this the last occurrence in the range + * [<TT>start</TT>, <TT>start + length</TT>) of the characters + * in <TT>srcChars</TT> in the range + * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>), + * using bitwise comparison. + * @param srcChars The text to search for. + * @param srcStart the offset into <TT>srcChars</TT> at which + * to start matching + * @param srcLength the number of characters in <TT>srcChars</TT> to match + * @param start the offset into this at which to start matching + * @param length the number of characters in this to search + * @return The offset into this of the start of <TT>text</TT>, + * or -1 if not found. + * @stable ICU 2.0 + */ + int32_t lastIndexOf(const char16_t *srcChars, + int32_t srcStart, + int32_t srcLength, + int32_t start, + int32_t length) const; + + /** + * Locate in this the last occurrence of the BMP code point <code>c</code>, + * using bitwise comparison. + * @param c The code unit to search for. + * @return The offset into this of <TT>c</TT>, or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t lastIndexOf(char16_t c) const; + + /** + * Locate in this the last occurrence of the code point <TT>c</TT>, + * using bitwise comparison. + * + * @param c The code point to search for. + * @return The offset into this of <TT>c</TT>, or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t lastIndexOf(UChar32 c) const; + + /** + * Locate in this the last occurrence of the BMP code point <code>c</code> + * starting at offset <TT>start</TT>, using bitwise comparison. + * @param c The code unit to search for. + * @param start The offset at which searching will start. + * @return The offset into this of <TT>c</TT>, or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t lastIndexOf(char16_t c, + int32_t start) const; + + /** + * Locate in this the last occurrence of the code point <TT>c</TT> + * starting at offset <TT>start</TT>, using bitwise comparison. + * + * @param c The code point to search for. + * @param start The offset at which searching will start. + * @return The offset into this of <TT>c</TT>, or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t lastIndexOf(UChar32 c, + int32_t start) const; + + /** + * Locate in this the last occurrence of the BMP code point <code>c</code> + * in the range [<TT>start</TT>, <TT>start + length</TT>), + * using bitwise comparison. + * @param c The code unit to search for. + * @param start the offset into this at which to start matching + * @param length the number of characters in this to search + * @return The offset into this of <TT>c</TT>, or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t lastIndexOf(char16_t c, + int32_t start, + int32_t length) const; + + /** + * Locate in this the last occurrence of the code point <TT>c</TT> + * in the range [<TT>start</TT>, <TT>start + length</TT>), + * using bitwise comparison. + * + * @param c The code point to search for. + * @param start the offset into this at which to start matching + * @param length the number of characters in this to search + * @return The offset into this of <TT>c</TT>, or -1 if not found. + * @stable ICU 2.0 + */ + inline int32_t lastIndexOf(UChar32 c, + int32_t start, + int32_t length) const; + + + /* Character access */ + + /** + * Return the code unit at offset <tt>offset</tt>. + * If the offset is not valid (0..length()-1) then U+ffff is returned. + * @param offset a valid offset into the text + * @return the code unit at offset <tt>offset</tt> + * or 0xffff if the offset is not valid for this string + * @stable ICU 2.0 + */ + inline char16_t charAt(int32_t offset) const; + + /** + * Return the code unit at offset <tt>offset</tt>. + * If the offset is not valid (0..length()-1) then U+ffff is returned. + * @param offset a valid offset into the text + * @return the code unit at offset <tt>offset</tt> + * @stable ICU 2.0 + */ + inline char16_t operator[] (int32_t offset) const; + + /** + * Return the code point that contains the code unit + * at offset <tt>offset</tt>. + * If the offset is not valid (0..length()-1) then U+ffff is returned. + * @param offset a valid offset into the text + * that indicates the text offset of any of the code units + * that will be assembled into a code point (21-bit value) and returned + * @return the code point of text at <tt>offset</tt> + * or 0xffff if the offset is not valid for this string + * @stable ICU 2.0 + */ + UChar32 char32At(int32_t offset) const; + + /** + * Adjust a random-access offset so that + * it points to the beginning of a Unicode character. + * The offset that is passed in points to + * any code unit of a code point, + * while the returned offset will point to the first code unit + * of the same code point. + * In UTF-16, if the input offset points to a second surrogate + * of a surrogate pair, then the returned offset will point + * to the first surrogate. + * @param offset a valid offset into one code point of the text + * @return offset of the first code unit of the same code point + * @see U16_SET_CP_START + * @stable ICU 2.0 + */ + int32_t getChar32Start(int32_t offset) const; + + /** + * Adjust a random-access offset so that + * it points behind a Unicode character. + * The offset that is passed in points behind + * any code unit of a code point, + * while the returned offset will point behind the last code unit + * of the same code point. + * In UTF-16, if the input offset points behind the first surrogate + * (i.e., to the second surrogate) + * of a surrogate pair, then the returned offset will point + * behind the second surrogate (i.e., to the first surrogate). + * @param offset a valid offset after any code unit of a code point of the text + * @return offset of the first code unit after the same code point + * @see U16_SET_CP_LIMIT + * @stable ICU 2.0 + */ + int32_t getChar32Limit(int32_t offset) const; + + /** + * Move the code unit index along the string by delta code points. + * Interpret the input index as a code unit-based offset into the string, + * move the index forward or backward by delta code points, and + * return the resulting index. + * The input index should point to the first code unit of a code point, + * if there is more than one. + * + * Both input and output indexes are code unit-based as for all + * string indexes/offsets in ICU (and other libraries, like MBCS char*). + * If delta<0 then the index is moved backward (toward the start of the string). + * If delta>0 then the index is moved forward (toward the end of the string). + * + * This behaves like CharacterIterator::move32(delta, kCurrent). + * + * Behavior for out-of-bounds indexes: + * <code>moveIndex32</code> pins the input index to 0..length(), i.e., + * if the input index<0 then it is pinned to 0; + * if it is index>length() then it is pinned to length(). + * Afterwards, the index is moved by <code>delta</code> code points + * forward or backward, + * but no further backward than to 0 and no further forward than to length(). + * The resulting index return value will be in between 0 and length(), inclusively. + * + * Examples: + * <pre> + * // s has code points 'a' U+10000 'b' U+10ffff U+2029 + * UnicodeString s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unescape(); + * + * // initial index: position of U+10000 + * int32_t index=1; + * + * // the following examples will all result in index==4, position of U+10ffff + * + * // skip 2 code points from some position in the string + * index=s.moveIndex32(index, 2); // skips U+10000 and 'b' + * + * // go to the 3rd code point from the start of s (0-based) + * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b' + * + * // go to the next-to-last code point of s + * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff + * </pre> + * + * @param index input code unit index + * @param delta (signed) code point count to move the index forward or backward + * in the string + * @return the resulting code unit index + * @stable ICU 2.0 + */ + int32_t moveIndex32(int32_t index, int32_t delta) const; + + /* Substring extraction */ + + /** + * Copy the characters in the range + * [<tt>start</tt>, <tt>start + length</tt>) into the array <tt>dst</tt>, + * beginning at <tt>dstStart</tt>. + * If the string aliases to <code>dst</code> itself as an external buffer, + * then extract() will not copy the contents. + * + * @param start offset of first character which will be copied into the array + * @param length the number of characters to extract + * @param dst array in which to copy characters. The length of <tt>dst</tt> + * must be at least (<tt>dstStart + length</tt>). + * @param dstStart the offset in <TT>dst</TT> where the first character + * will be extracted + * @stable ICU 2.0 + */ + inline void extract(int32_t start, + int32_t length, + Char16Ptr dst, + int32_t dstStart = 0) const; + + /** + * Copy the contents of the string into dest. + * This is a convenience function that + * checks if there is enough space in dest, + * extracts the entire string if possible, + * and NUL-terminates dest if possible. + * + * If the string fits into dest but cannot be NUL-terminated + * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING. + * If the string itself does not fit into dest + * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR. + * + * If the string aliases to <code>dest</code> itself as an external buffer, + * then extract() will not copy the contents. + * + * @param dest Destination string buffer. + * @param destCapacity Number of char16_ts available at dest. + * @param errorCode ICU error code. + * @return length() + * @stable ICU 2.0 + */ + int32_t + extract(Char16Ptr dest, int32_t destCapacity, + UErrorCode &errorCode) const; + + /** + * Copy the characters in the range + * [<tt>start</tt>, <tt>start + length</tt>) into the UnicodeString + * <tt>target</tt>. + * @param start offset of first character which will be copied + * @param length the number of characters to extract + * @param target UnicodeString into which to copy characters. + * @return A reference to <TT>target</TT> + * @stable ICU 2.0 + */ + inline void extract(int32_t start, + int32_t length, + UnicodeString& target) const; + + /** + * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>) + * into the array <tt>dst</tt>, beginning at <tt>dstStart</tt>. + * @param start offset of first character which will be copied into the array + * @param limit offset immediately following the last character to be copied + * @param dst array in which to copy characters. The length of <tt>dst</tt> + * must be at least (<tt>dstStart + (limit - start)</tt>). + * @param dstStart the offset in <TT>dst</TT> where the first character + * will be extracted + * @stable ICU 2.0 + */ + inline void extractBetween(int32_t start, + int32_t limit, + char16_t *dst, + int32_t dstStart = 0) const; + + /** + * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>) + * into the UnicodeString <tt>target</tt>. Replaceable API. + * @param start offset of first character which will be copied + * @param limit offset immediately following the last character to be copied + * @param target UnicodeString into which to copy characters. + * @return A reference to <TT>target</TT> + * @stable ICU 2.0 + */ + virtual void extractBetween(int32_t start, + int32_t limit, + UnicodeString& target) const; + + /** + * Copy the characters in the range + * [<tt>start</TT>, <tt>start + startLength</TT>) into an array of characters. + * All characters must be invariant (see utypes.h). + * Use US_INV as the last, signature-distinguishing parameter. + * + * This function does not write any more than <code>targetCapacity</code> + * characters but returns the length of the entire output string + * so that one can allocate a larger buffer and call the function again + * if necessary. + * The output string is NUL-terminated if possible. + * + * @param start offset of first character which will be copied + * @param startLength the number of characters to extract + * @param target the target buffer for extraction, can be NULL + * if targetLength is 0 + * @param targetCapacity the length of the target buffer + * @param inv Signature-distinguishing paramater, use US_INV. + * @return the output string length, not including the terminating NUL + * @stable ICU 3.2 + */ + int32_t extract(int32_t start, + int32_t startLength, + char *target, + int32_t targetCapacity, + enum EInvariant inv) const; + +#if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION + + /** + * Copy the characters in the range + * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters + * in the platform's default codepage. + * This function does not write any more than <code>targetLength</code> + * characters but returns the length of the entire output string + * so that one can allocate a larger buffer and call the function again + * if necessary. + * The output string is NUL-terminated if possible. + * + * @param start offset of first character which will be copied + * @param startLength the number of characters to extract + * @param target the target buffer for extraction + * @param targetLength the length of the target buffer + * If <TT>target</TT> is NULL, then the number of bytes required for + * <TT>target</TT> is returned. + * @return the output string length, not including the terminating NUL + * @stable ICU 2.0 + */ + int32_t extract(int32_t start, + int32_t startLength, + char *target, + uint32_t targetLength) const; + +#endif + +#if !UCONFIG_NO_CONVERSION + + /** + * Copy the characters in the range + * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters + * in a specified codepage. + * The output string is NUL-terminated. + * + * Recommendation: For invariant-character strings use + * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const + * because it avoids object code dependencies of UnicodeString on + * the conversion code. + * + * @param start offset of first character which will be copied + * @param startLength the number of characters to extract + * @param target the target buffer for extraction + * @param codepage the desired codepage for the characters. 0 has + * the special meaning of the default codepage + * If <code>codepage</code> is an empty string (<code>""</code>), + * then a simple conversion is performed on the codepage-invariant + * subset ("invariant characters") of the platform encoding. See utypes.h. + * If <TT>target</TT> is NULL, then the number of bytes required for + * <TT>target</TT> is returned. It is assumed that the target is big enough + * to fit all of the characters. + * @return the output string length, not including the terminating NUL + * @stable ICU 2.0 + */ + inline int32_t extract(int32_t start, + int32_t startLength, + char *target, + const char *codepage = 0) const; + + /** + * Copy the characters in the range + * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters + * in a specified codepage. + * This function does not write any more than <code>targetLength</code> + * characters but returns the length of the entire output string + * so that one can allocate a larger buffer and call the function again + * if necessary. + * The output string is NUL-terminated if possible. + * + * Recommendation: For invariant-character strings use + * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const + * because it avoids object code dependencies of UnicodeString on + * the conversion code. + * + * @param start offset of first character which will be copied + * @param startLength the number of characters to extract + * @param target the target buffer for extraction + * @param targetLength the length of the target buffer + * @param codepage the desired codepage for the characters. 0 has + * the special meaning of the default codepage + * If <code>codepage</code> is an empty string (<code>""</code>), + * then a simple conversion is performed on the codepage-invariant + * subset ("invariant characters") of the platform encoding. See utypes.h. + * If <TT>target</TT> is NULL, then the number of bytes required for + * <TT>target</TT> is returned. + * @return the output string length, not including the terminating NUL + * @stable ICU 2.0 + */ + int32_t extract(int32_t start, + int32_t startLength, + char *target, + uint32_t targetLength, + const char *codepage) const; + + /** + * Convert the UnicodeString into a codepage string using an existing UConverter. + * The output string is NUL-terminated if possible. + * + * This function avoids the overhead of opening and closing a converter if + * multiple strings are extracted. + * + * @param dest destination string buffer, can be NULL if destCapacity==0 + * @param destCapacity the number of chars available at dest + * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called), + * or NULL for the default converter + * @param errorCode normal ICU error code + * @return the length of the output string, not counting the terminating NUL; + * if the length is greater than destCapacity, then the string will not fit + * and a buffer of the indicated length would need to be passed in + * @stable ICU 2.0 + */ + int32_t extract(char *dest, int32_t destCapacity, + UConverter *cnv, + UErrorCode &errorCode) const; + +#endif + + /** + * Create a temporary substring for the specified range. + * Unlike the substring constructor and setTo() functions, + * the object returned here will be a read-only alias (using getBuffer()) + * rather than copying the text. + * As a result, this substring operation is much faster but requires + * that the original string not be modified or deleted during the lifetime + * of the returned substring object. + * @param start offset of the first character visible in the substring + * @param length length of the substring + * @return a read-only alias UnicodeString object for the substring + * @stable ICU 4.4 + */ + UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const; + + /** + * Create a temporary substring for the specified range. + * Same as tempSubString(start, length) except that the substring range + * is specified as a (start, limit) pair (with an exclusive limit index) + * rather than a (start, length) pair. + * @param start offset of the first character visible in the substring + * @param limit offset immediately following the last character visible in the substring + * @return a read-only alias UnicodeString object for the substring + * @stable ICU 4.4 + */ + inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const; + + /** + * Convert the UnicodeString to UTF-8 and write the result + * to a ByteSink. This is called by toUTF8String(). + * Unpaired surrogates are replaced with U+FFFD. + * Calls u_strToUTF8WithSub(). + * + * @param sink A ByteSink to which the UTF-8 version of the string is written. + * sink.Flush() is called at the end. + * @stable ICU 4.2 + * @see toUTF8String + */ + void toUTF8(ByteSink &sink) const; + + /** + * Convert the UnicodeString to UTF-8 and append the result + * to a standard string. + * Unpaired surrogates are replaced with U+FFFD. + * Calls toUTF8(). + * + * @param result A standard string (or a compatible object) + * to which the UTF-8 version of the string is appended. + * @return The string object. + * @stable ICU 4.2 + * @see toUTF8 + */ + template<typename StringClass> + StringClass &toUTF8String(StringClass &result) const { + StringByteSink<StringClass> sbs(&result, length()); + toUTF8(sbs); + return result; + } + + /** + * Convert the UnicodeString to UTF-32. + * Unpaired surrogates are replaced with U+FFFD. + * Calls u_strToUTF32WithSub(). + * + * @param utf32 destination string buffer, can be NULL if capacity==0 + * @param capacity the number of UChar32s available at utf32 + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return The length of the UTF-32 string. + * @see fromUTF32 + * @stable ICU 4.2 + */ + int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const; + + /* Length operations */ + + /** + * Return the length of the UnicodeString object. + * The length is the number of char16_t code units are in the UnicodeString. + * If you want the number of code points, please use countChar32(). + * @return the length of the UnicodeString object + * @see countChar32 + * @stable ICU 2.0 + */ + inline int32_t length(void) const; + + /** + * Count Unicode code points in the length char16_t code units of the string. + * A code point may occupy either one or two char16_t code units. + * Counting code points involves reading all code units. + * + * This functions is basically the inverse of moveIndex32(). + * + * @param start the index of the first code unit to check + * @param length the number of char16_t code units to check + * @return the number of code points in the specified code units + * @see length + * @stable ICU 2.0 + */ + int32_t + countChar32(int32_t start=0, int32_t length=INT32_MAX) const; + + /** + * Check if the length char16_t code units of the string + * contain more Unicode code points than a certain number. + * This is more efficient than counting all code points in this part of the string + * and comparing that number with a threshold. + * This function may not need to scan the string at all if the length + * falls within a certain range, and + * never needs to count more than 'number+1' code points. + * Logically equivalent to (countChar32(start, length)>number). + * A Unicode code point may occupy either one or two char16_t code units. + * + * @param start the index of the first code unit to check (0 for the entire string) + * @param length the number of char16_t code units to check + * (use INT32_MAX for the entire string; remember that start/length + * values are pinned) + * @param number The number of code points in the (sub)string is compared against + * the 'number' parameter. + * @return Boolean value for whether the string contains more Unicode code points + * than 'number'. Same as (u_countChar32(s, length)>number). + * @see countChar32 + * @see u_strHasMoreChar32Than + * @stable ICU 2.4 + */ + UBool + hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const; + + /** + * Determine if this string is empty. + * @return TRUE if this string contains 0 characters, FALSE otherwise. + * @stable ICU 2.0 + */ + inline UBool isEmpty(void) const; + + /** + * Return the capacity of the internal buffer of the UnicodeString object. + * This is useful together with the getBuffer functions. + * See there for details. + * + * @return the number of char16_ts available in the internal buffer + * @see getBuffer + * @stable ICU 2.0 + */ + inline int32_t getCapacity(void) const; + + /* Other operations */ + + /** + * Generate a hash code for this object. + * @return The hash code of this UnicodeString. + * @stable ICU 2.0 + */ + inline int32_t hashCode(void) const; + + /** + * Determine if this object contains a valid string. + * A bogus string has no value. It is different from an empty string, + * although in both cases isEmpty() returns TRUE and length() returns 0. + * setToBogus() and isBogus() can be used to indicate that no string value is available. + * For a bogus string, getBuffer() and getTerminatedBuffer() return NULL, and + * length() returns 0. + * + * @return TRUE if the string is bogus/invalid, FALSE otherwise + * @see setToBogus() + * @stable ICU 2.0 + */ + inline UBool isBogus(void) const; + + + //======================================== + // Write operations + //======================================== + + /* Assignment operations */ + + /** + * Assignment operator. Replace the characters in this UnicodeString + * with the characters from <TT>srcText</TT>. + * + * Starting with ICU 2.4, the assignment operator and the copy constructor + * allocate a new buffer and copy the buffer contents even for readonly aliases. + * By contrast, the fastCopyFrom() function implements the old, + * more efficient but less safe behavior + * of making this string also a readonly alias to the same buffer. + * + * If the source object has an "open" buffer from getBuffer(minCapacity), + * then the copy is an empty string. + * + * @param srcText The text containing the characters to replace + * @return a reference to this + * @stable ICU 2.0 + * @see fastCopyFrom + */ + UnicodeString &operator=(const UnicodeString &srcText); + + /** + * Almost the same as the assignment operator. + * Replace the characters in this UnicodeString + * with the characters from <code>srcText</code>. + * + * This function works the same as the assignment operator + * for all strings except for ones that are readonly aliases. + * + * Starting with ICU 2.4, the assignment operator and the copy constructor + * allocate a new buffer and copy the buffer contents even for readonly aliases. + * This function implements the old, more efficient but less safe behavior + * of making this string also a readonly alias to the same buffer. + * + * The fastCopyFrom function must be used only if it is known that the lifetime of + * this UnicodeString does not exceed the lifetime of the aliased buffer + * including its contents, for example for strings from resource bundles + * or aliases to string constants. + * + * If the source object has an "open" buffer from getBuffer(minCapacity), + * then the copy is an empty string. + * + * @param src The text containing the characters to replace. + * @return a reference to this + * @stable ICU 2.4 + */ + UnicodeString &fastCopyFrom(const UnicodeString &src); + + /** + * Move assignment operator, might leave src in bogus state. + * This string will have the same contents and state that the source string had. + * The behavior is undefined if *this and src are the same object. + * @param src source string + * @return *this + * @stable ICU 56 + */ + UnicodeString &operator=(UnicodeString &&src) U_NOEXCEPT { + return moveFrom(src); + } + + // do not use #ifndef U_HIDE_DRAFT_API for moveFrom, needed by non-draft API + /** + * Move assignment, might leave src in bogus state. + * This string will have the same contents and state that the source string had. + * The behavior is undefined if *this and src are the same object. + * + * Can be called explicitly, does not need C++11 support. + * @param src source string + * @return *this + * @draft ICU 56 + */ + UnicodeString &moveFrom(UnicodeString &src) U_NOEXCEPT; + + /** + * Swap strings. + * @param other other string + * @stable ICU 56 + */ + void swap(UnicodeString &other) U_NOEXCEPT; + + /** + * Non-member UnicodeString swap function. + * @param s1 will get s2's contents and state + * @param s2 will get s1's contents and state + * @stable ICU 56 + */ + friend U_COMMON_API inline void U_EXPORT2 + swap(UnicodeString &s1, UnicodeString &s2) U_NOEXCEPT { + s1.swap(s2); + } + + /** + * Assignment operator. Replace the characters in this UnicodeString + * with the code unit <TT>ch</TT>. + * @param ch the code unit to replace + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& operator= (char16_t ch); + + /** + * Assignment operator. Replace the characters in this UnicodeString + * with the code point <TT>ch</TT>. + * @param ch the code point to replace + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& operator= (UChar32 ch); + + /** + * Set the text in the UnicodeString object to the characters + * in <TT>srcText</TT> in the range + * [<TT>srcStart</TT>, <TT>srcText.length()</TT>). + * <TT>srcText</TT> is not modified. + * @param srcText the source for the new characters + * @param srcStart the offset into <TT>srcText</TT> where new characters + * will be obtained + * @return a reference to this + * @stable ICU 2.2 + */ + inline UnicodeString& setTo(const UnicodeString& srcText, + int32_t srcStart); + + /** + * Set the text in the UnicodeString object to the characters + * in <TT>srcText</TT> in the range + * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). + * <TT>srcText</TT> is not modified. + * @param srcText the source for the new characters + * @param srcStart the offset into <TT>srcText</TT> where new characters + * will be obtained + * @param srcLength the number of characters in <TT>srcText</TT> in the + * replace string. + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& setTo(const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength); + + /** + * Set the text in the UnicodeString object to the characters in + * <TT>srcText</TT>. + * <TT>srcText</TT> is not modified. + * @param srcText the source for the new characters + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& setTo(const UnicodeString& srcText); + + /** + * Set the characters in the UnicodeString object to the characters + * in <TT>srcChars</TT>. <TT>srcChars</TT> is not modified. + * @param srcChars the source for the new characters + * @param srcLength the number of Unicode characters in srcChars. + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& setTo(const char16_t *srcChars, + int32_t srcLength); + + /** + * Set the characters in the UnicodeString object to the code unit + * <TT>srcChar</TT>. + * @param srcChar the code unit which becomes the UnicodeString's character + * content + * @return a reference to this + * @stable ICU 2.0 + */ + UnicodeString& setTo(char16_t srcChar); + + /** + * Set the characters in the UnicodeString object to the code point + * <TT>srcChar</TT>. + * @param srcChar the code point which becomes the UnicodeString's character + * content + * @return a reference to this + * @stable ICU 2.0 + */ + UnicodeString& setTo(UChar32 srcChar); + + /** + * Aliasing setTo() function, analogous to the readonly-aliasing char16_t* constructor. + * The text will be used for the UnicodeString object, but + * it will not be released when the UnicodeString is destroyed. + * This has copy-on-write semantics: + * When the string is modified, then the buffer is first copied into + * newly allocated memory. + * The aliased buffer is never modified. + * + * In an assignment to another UnicodeString, when using the copy constructor + * or the assignment operator, the text will be copied. + * When using fastCopyFrom(), the text will be aliased again, + * so that both strings then alias the same readonly-text. + * + * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated. + * This must be true if <code>textLength==-1</code>. + * @param text The characters to alias for the UnicodeString. + * @param textLength The number of Unicode characters in <code>text</code> to alias. + * If -1, then this constructor will determine the length + * by calling <code>u_strlen()</code>. + * @return a reference to this + * @stable ICU 2.0 + */ + UnicodeString &setTo(UBool isTerminated, + ConstChar16Ptr text, + int32_t textLength); + + /** + * Aliasing setTo() function, analogous to the writable-aliasing char16_t* constructor. + * The text will be used for the UnicodeString object, but + * it will not be released when the UnicodeString is destroyed. + * This has write-through semantics: + * For as long as the capacity of the buffer is sufficient, write operations + * will directly affect the buffer. When more capacity is necessary, then + * a new buffer will be allocated and the contents copied as with regularly + * constructed strings. + * In an assignment to another UnicodeString, the buffer will be copied. + * The extract(Char16Ptr dst) function detects whether the dst pointer is the same + * as the string buffer itself and will in this case not copy the contents. + * + * @param buffer The characters to alias for the UnicodeString. + * @param buffLength The number of Unicode characters in <code>buffer</code> to alias. + * @param buffCapacity The size of <code>buffer</code> in char16_ts. + * @return a reference to this + * @stable ICU 2.0 + */ + UnicodeString &setTo(char16_t *buffer, + int32_t buffLength, + int32_t buffCapacity); + + /** + * Make this UnicodeString object invalid. + * The string will test TRUE with isBogus(). + * + * A bogus string has no value. It is different from an empty string. + * It can be used to indicate that no string value is available. + * getBuffer() and getTerminatedBuffer() return NULL, and + * length() returns 0. + * + * This utility function is used throughout the UnicodeString + * implementation to indicate that a UnicodeString operation failed, + * and may be used in other functions, + * especially but not exclusively when such functions do not + * take a UErrorCode for simplicity. + * + * The following methods, and no others, will clear a string object's bogus flag: + * - remove() + * - remove(0, INT32_MAX) + * - truncate(0) + * - operator=() (assignment operator) + * - setTo(...) + * + * The simplest ways to turn a bogus string into an empty one + * is to use the remove() function. + * Examples for other functions that are equivalent to "set to empty string": + * \code + * if(s.isBogus()) { + * s.remove(); // set to an empty string (remove all), or + * s.remove(0, INT32_MAX); // set to an empty string (remove all), or + * s.truncate(0); // set to an empty string (complete truncation), or + * s=UnicodeString(); // assign an empty string, or + * s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or + * static const char16_t nul=0; + * s.setTo(&nul, 0); // set to an empty C Unicode string + * } + * \endcode + * + * @see isBogus() + * @stable ICU 2.0 + */ + void setToBogus(); + + /** + * Set the character at the specified offset to the specified character. + * @param offset A valid offset into the text of the character to set + * @param ch The new character + * @return A reference to this + * @stable ICU 2.0 + */ + UnicodeString& setCharAt(int32_t offset, + char16_t ch); + + + /* Append operations */ + + /** + * Append operator. Append the code unit <TT>ch</TT> to the UnicodeString + * object. + * @param ch the code unit to be appended + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& operator+= (char16_t ch); + + /** + * Append operator. Append the code point <TT>ch</TT> to the UnicodeString + * object. + * @param ch the code point to be appended + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& operator+= (UChar32 ch); + + /** + * Append operator. Append the characters in <TT>srcText</TT> to the + * UnicodeString object. <TT>srcText</TT> is not modified. + * @param srcText the source for the new characters + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& operator+= (const UnicodeString& srcText); + + /** + * Append the characters + * in <TT>srcText</TT> in the range + * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the + * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT> + * is not modified. + * @param srcText the source for the new characters + * @param srcStart the offset into <TT>srcText</TT> where new characters + * will be obtained + * @param srcLength the number of characters in <TT>srcText</TT> in + * the append string + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& append(const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength); + + /** + * Append the characters in <TT>srcText</TT> to the UnicodeString object. + * <TT>srcText</TT> is not modified. + * @param srcText the source for the new characters + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& append(const UnicodeString& srcText); + + /** + * Append the characters in <TT>srcChars</TT> in the range + * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the UnicodeString + * object at offset + * <TT>start</TT>. <TT>srcChars</TT> is not modified. + * @param srcChars the source for the new characters + * @param srcStart the offset into <TT>srcChars</TT> where new characters + * will be obtained + * @param srcLength the number of characters in <TT>srcChars</TT> in + * the append string; can be -1 if <TT>srcChars</TT> is NUL-terminated + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& append(const char16_t *srcChars, + int32_t srcStart, + int32_t srcLength); + + /** + * Append the characters in <TT>srcChars</TT> to the UnicodeString object + * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified. + * @param srcChars the source for the new characters + * @param srcLength the number of Unicode characters in <TT>srcChars</TT>; + * can be -1 if <TT>srcChars</TT> is NUL-terminated + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& append(ConstChar16Ptr srcChars, + int32_t srcLength); + + /** + * Append the code unit <TT>srcChar</TT> to the UnicodeString object. + * @param srcChar the code unit to append + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& append(char16_t srcChar); + + /** + * Append the code point <TT>srcChar</TT> to the UnicodeString object. + * @param srcChar the code point to append + * @return a reference to this + * @stable ICU 2.0 + */ + UnicodeString& append(UChar32 srcChar); + + + /* Insert operations */ + + /** + * Insert the characters in <TT>srcText</TT> in the range + * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString + * object at offset <TT>start</TT>. <TT>srcText</TT> is not modified. + * @param start the offset where the insertion begins + * @param srcText the source for the new characters + * @param srcStart the offset into <TT>srcText</TT> where new characters + * will be obtained + * @param srcLength the number of characters in <TT>srcText</TT> in + * the insert string + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& insert(int32_t start, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength); + + /** + * Insert the characters in <TT>srcText</TT> into the UnicodeString object + * at offset <TT>start</TT>. <TT>srcText</TT> is not modified. + * @param start the offset where the insertion begins + * @param srcText the source for the new characters + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& insert(int32_t start, + const UnicodeString& srcText); + + /** + * Insert the characters in <TT>srcChars</TT> in the range + * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString + * object at offset <TT>start</TT>. <TT>srcChars</TT> is not modified. + * @param start the offset at which the insertion begins + * @param srcChars the source for the new characters + * @param srcStart the offset into <TT>srcChars</TT> where new characters + * will be obtained + * @param srcLength the number of characters in <TT>srcChars</TT> + * in the insert string + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& insert(int32_t start, + const char16_t *srcChars, + int32_t srcStart, + int32_t srcLength); + + /** + * Insert the characters in <TT>srcChars</TT> into the UnicodeString object + * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified. + * @param start the offset where the insertion begins + * @param srcChars the source for the new characters + * @param srcLength the number of Unicode characters in srcChars. + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& insert(int32_t start, + ConstChar16Ptr srcChars, + int32_t srcLength); + + /** + * Insert the code unit <TT>srcChar</TT> into the UnicodeString object at + * offset <TT>start</TT>. + * @param start the offset at which the insertion occurs + * @param srcChar the code unit to insert + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& insert(int32_t start, + char16_t srcChar); + + /** + * Insert the code point <TT>srcChar</TT> into the UnicodeString object at + * offset <TT>start</TT>. + * @param start the offset at which the insertion occurs + * @param srcChar the code point to insert + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& insert(int32_t start, + UChar32 srcChar); + + + /* Replace operations */ + + /** + * Replace the characters in the range + * [<TT>start</TT>, <TT>start + length</TT>) with the characters in + * <TT>srcText</TT> in the range + * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). + * <TT>srcText</TT> is not modified. + * @param start the offset at which the replace operation begins + * @param length the number of characters to replace. The character at + * <TT>start + length</TT> is not modified. + * @param srcText the source for the new characters + * @param srcStart the offset into <TT>srcText</TT> where new characters + * will be obtained + * @param srcLength the number of characters in <TT>srcText</TT> in + * the replace string + * @return a reference to this + * @stable ICU 2.0 + */ + UnicodeString& replace(int32_t start, + int32_t length, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength); + + /** + * Replace the characters in the range + * [<TT>start</TT>, <TT>start + length</TT>) + * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is + * not modified. + * @param start the offset at which the replace operation begins + * @param length the number of characters to replace. The character at + * <TT>start + length</TT> is not modified. + * @param srcText the source for the new characters + * @return a reference to this + * @stable ICU 2.0 + */ + UnicodeString& replace(int32_t start, + int32_t length, + const UnicodeString& srcText); + + /** + * Replace the characters in the range + * [<TT>start</TT>, <TT>start + length</TT>) with the characters in + * <TT>srcChars</TT> in the range + * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). <TT>srcChars</TT> + * is not modified. + * @param start the offset at which the replace operation begins + * @param length the number of characters to replace. The character at + * <TT>start + length</TT> is not modified. + * @param srcChars the source for the new characters + * @param srcStart the offset into <TT>srcChars</TT> where new characters + * will be obtained + * @param srcLength the number of characters in <TT>srcChars</TT> + * in the replace string + * @return a reference to this + * @stable ICU 2.0 + */ + UnicodeString& replace(int32_t start, + int32_t length, + const char16_t *srcChars, + int32_t srcStart, + int32_t srcLength); + + /** + * Replace the characters in the range + * [<TT>start</TT>, <TT>start + length</TT>) with the characters in + * <TT>srcChars</TT>. <TT>srcChars</TT> is not modified. + * @param start the offset at which the replace operation begins + * @param length number of characters to replace. The character at + * <TT>start + length</TT> is not modified. + * @param srcChars the source for the new characters + * @param srcLength the number of Unicode characters in srcChars + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& replace(int32_t start, + int32_t length, + ConstChar16Ptr srcChars, + int32_t srcLength); + + /** + * Replace the characters in the range + * [<TT>start</TT>, <TT>start + length</TT>) with the code unit + * <TT>srcChar</TT>. + * @param start the offset at which the replace operation begins + * @param length the number of characters to replace. The character at + * <TT>start + length</TT> is not modified. + * @param srcChar the new code unit + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& replace(int32_t start, + int32_t length, + char16_t srcChar); + + /** + * Replace the characters in the range + * [<TT>start</TT>, <TT>start + length</TT>) with the code point + * <TT>srcChar</TT>. + * @param start the offset at which the replace operation begins + * @param length the number of characters to replace. The character at + * <TT>start + length</TT> is not modified. + * @param srcChar the new code point + * @return a reference to this + * @stable ICU 2.0 + */ + UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar); + + /** + * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>) + * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is not modified. + * @param start the offset at which the replace operation begins + * @param limit the offset immediately following the replace range + * @param srcText the source for the new characters + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& replaceBetween(int32_t start, + int32_t limit, + const UnicodeString& srcText); + + /** + * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>) + * with the characters in <TT>srcText</TT> in the range + * [<TT>srcStart</TT>, <TT>srcLimit</TT>). <TT>srcText</TT> is not modified. + * @param start the offset at which the replace operation begins + * @param limit the offset immediately following the replace range + * @param srcText the source for the new characters + * @param srcStart the offset into <TT>srcChars</TT> where new characters + * will be obtained + * @param srcLimit the offset immediately following the range to copy + * in <TT>srcText</TT> + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& replaceBetween(int32_t start, + int32_t limit, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLimit); + + /** + * Replace a substring of this object with the given text. + * @param start the beginning index, inclusive; <code>0 <= start + * <= limit</code>. + * @param limit the ending index, exclusive; <code>start <= limit + * <= length()</code>. + * @param text the text to replace characters <code>start</code> + * to <code>limit - 1</code> + * @stable ICU 2.0 + */ + virtual void handleReplaceBetween(int32_t start, + int32_t limit, + const UnicodeString& text); + + /** + * Replaceable API + * @return TRUE if it has MetaData + * @stable ICU 2.4 + */ + virtual UBool hasMetaData() const; + + /** + * Copy a substring of this object, retaining attribute (out-of-band) + * information. This method is used to duplicate or reorder substrings. + * The destination index must not overlap the source range. + * + * @param start the beginning index, inclusive; <code>0 <= start <= + * limit</code>. + * @param limit the ending index, exclusive; <code>start <= limit <= + * length()</code>. + * @param dest the destination index. The characters from + * <code>start..limit-1</code> will be copied to <code>dest</code>. + * Implementations of this method may assume that <code>dest <= start || + * dest >= limit</code>. + * @stable ICU 2.0 + */ + virtual void copy(int32_t start, int32_t limit, int32_t dest); + + /* Search and replace operations */ + + /** + * Replace all occurrences of characters in oldText with the characters + * in newText + * @param oldText the text containing the search text + * @param newText the text containing the replacement text + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& findAndReplace(const UnicodeString& oldText, + const UnicodeString& newText); + + /** + * Replace all occurrences of characters in oldText with characters + * in newText + * in the range [<TT>start</TT>, <TT>start + length</TT>). + * @param start the start of the range in which replace will performed + * @param length the length of the range in which replace will be performed + * @param oldText the text containing the search text + * @param newText the text containing the replacement text + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& findAndReplace(int32_t start, + int32_t length, + const UnicodeString& oldText, + const UnicodeString& newText); + + /** + * Replace all occurrences of characters in oldText in the range + * [<TT>oldStart</TT>, <TT>oldStart + oldLength</TT>) with the characters + * in newText in the range + * [<TT>newStart</TT>, <TT>newStart + newLength</TT>) + * in the range [<TT>start</TT>, <TT>start + length</TT>). + * @param start the start of the range in which replace will performed + * @param length the length of the range in which replace will be performed + * @param oldText the text containing the search text + * @param oldStart the start of the search range in <TT>oldText</TT> + * @param oldLength the length of the search range in <TT>oldText</TT> + * @param newText the text containing the replacement text + * @param newStart the start of the replacement range in <TT>newText</TT> + * @param newLength the length of the replacement range in <TT>newText</TT> + * @return a reference to this + * @stable ICU 2.0 + */ + UnicodeString& findAndReplace(int32_t start, + int32_t length, + const UnicodeString& oldText, + int32_t oldStart, + int32_t oldLength, + const UnicodeString& newText, + int32_t newStart, + int32_t newLength); + + + /* Remove operations */ + + /** + * Remove all characters from the UnicodeString object. + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& remove(void); + + /** + * Remove the characters in the range + * [<TT>start</TT>, <TT>start + length</TT>) from the UnicodeString object. + * @param start the offset of the first character to remove + * @param length the number of characters to remove + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& remove(int32_t start, + int32_t length = (int32_t)INT32_MAX); + + /** + * Remove the characters in the range + * [<TT>start</TT>, <TT>limit</TT>) from the UnicodeString object. + * @param start the offset of the first character to remove + * @param limit the offset immediately following the range to remove + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& removeBetween(int32_t start, + int32_t limit = (int32_t)INT32_MAX); + + /** + * Retain only the characters in the range + * [<code>start</code>, <code>limit</code>) from the UnicodeString object. + * Removes characters before <code>start</code> and at and after <code>limit</code>. + * @param start the offset of the first character to retain + * @param limit the offset immediately following the range to retain + * @return a reference to this + * @stable ICU 4.4 + */ + inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX); + + /* Length operations */ + + /** + * Pad the start of this UnicodeString with the character <TT>padChar</TT>. + * If the length of this UnicodeString is less than targetLength, + * length() - targetLength copies of padChar will be added to the + * beginning of this UnicodeString. + * @param targetLength the desired length of the string + * @param padChar the character to use for padding. Defaults to + * space (U+0020) + * @return TRUE if the text was padded, FALSE otherwise. + * @stable ICU 2.0 + */ + UBool padLeading(int32_t targetLength, + char16_t padChar = 0x0020); + + /** + * Pad the end of this UnicodeString with the character <TT>padChar</TT>. + * If the length of this UnicodeString is less than targetLength, + * length() - targetLength copies of padChar will be added to the + * end of this UnicodeString. + * @param targetLength the desired length of the string + * @param padChar the character to use for padding. Defaults to + * space (U+0020) + * @return TRUE if the text was padded, FALSE otherwise. + * @stable ICU 2.0 + */ + UBool padTrailing(int32_t targetLength, + char16_t padChar = 0x0020); + + /** + * Truncate this UnicodeString to the <TT>targetLength</TT>. + * @param targetLength the desired length of this UnicodeString. + * @return TRUE if the text was truncated, FALSE otherwise + * @stable ICU 2.0 + */ + inline UBool truncate(int32_t targetLength); + + /** + * Trims leading and trailing whitespace from this UnicodeString. + * @return a reference to this + * @stable ICU 2.0 + */ + UnicodeString& trim(void); + + + /* Miscellaneous operations */ + + /** + * Reverse this UnicodeString in place. + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& reverse(void); + + /** + * Reverse the range [<TT>start</TT>, <TT>start + length</TT>) in + * this UnicodeString. + * @param start the start of the range to reverse + * @param length the number of characters to to reverse + * @return a reference to this + * @stable ICU 2.0 + */ + inline UnicodeString& reverse(int32_t start, + int32_t length); + + /** + * Convert the characters in this to UPPER CASE following the conventions of + * the default locale. + * @return A reference to this. + * @stable ICU 2.0 + */ + UnicodeString& toUpper(void); + + /** + * Convert the characters in this to UPPER CASE following the conventions of + * a specific locale. + * @param locale The locale containing the conventions to use. + * @return A reference to this. + * @stable ICU 2.0 + */ + UnicodeString& toUpper(const Locale& locale); + + /** + * Convert the characters in this to lower case following the conventions of + * the default locale. + * @return A reference to this. + * @stable ICU 2.0 + */ + UnicodeString& toLower(void); + + /** + * Convert the characters in this to lower case following the conventions of + * a specific locale. + * @param locale The locale containing the conventions to use. + * @return A reference to this. + * @stable ICU 2.0 + */ + UnicodeString& toLower(const Locale& locale); + +#if !UCONFIG_NO_BREAK_ITERATION + + /** + * Titlecase this string, convenience function using the default locale. + * + * Casing is locale-dependent and context-sensitive. + * Titlecasing uses a break iterator to find the first characters of words + * that are to be titlecased. It titlecases those characters and lowercases + * all others. + * + * The titlecase break iterator can be provided to customize for arbitrary + * styles, using rules and dictionaries beyond the standard iterators. + * It may be more efficient to always provide an iterator to avoid + * opening and closing one for each string. + * The standard titlecase iterator for the root locale implements the + * algorithm of Unicode TR 21. + * + * This function uses only the setText(), first() and next() methods of the + * provided break iterator. + * + * @param titleIter A break iterator to find the first characters of words + * that are to be titlecased. + * If none is provided (0), then a standard titlecase + * break iterator is opened. + * Otherwise the provided iterator is set to the string's text. + * @return A reference to this. + * @stable ICU 2.1 + */ + UnicodeString &toTitle(BreakIterator *titleIter); + + /** + * Titlecase this string. + * + * Casing is locale-dependent and context-sensitive. + * Titlecasing uses a break iterator to find the first characters of words + * that are to be titlecased. It titlecases those characters and lowercases + * all others. + * + * The titlecase break iterator can be provided to customize for arbitrary + * styles, using rules and dictionaries beyond the standard iterators. + * It may be more efficient to always provide an iterator to avoid + * opening and closing one for each string. + * The standard titlecase iterator for the root locale implements the + * algorithm of Unicode TR 21. + * + * This function uses only the setText(), first() and next() methods of the + * provided break iterator. + * + * @param titleIter A break iterator to find the first characters of words + * that are to be titlecased. + * If none is provided (0), then a standard titlecase + * break iterator is opened. + * Otherwise the provided iterator is set to the string's text. + * @param locale The locale to consider. + * @return A reference to this. + * @stable ICU 2.1 + */ + UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale); + + /** + * Titlecase this string, with options. + * + * Casing is locale-dependent and context-sensitive. + * Titlecasing uses a break iterator to find the first characters of words + * that are to be titlecased. It titlecases those characters and lowercases + * all others. (This can be modified with options.) + * + * The titlecase break iterator can be provided to customize for arbitrary + * styles, using rules and dictionaries beyond the standard iterators. + * It may be more efficient to always provide an iterator to avoid + * opening and closing one for each string. + * The standard titlecase iterator for the root locale implements the + * algorithm of Unicode TR 21. + * + * This function uses only the setText(), first() and next() methods of the + * provided break iterator. + * + * @param titleIter A break iterator to find the first characters of words + * that are to be titlecased. + * If none is provided (0), then a standard titlecase + * break iterator is opened. + * Otherwise the provided iterator is set to the string's text. + * @param locale The locale to consider. + * @param options Options bit set, usually 0. See U_TITLECASE_NO_LOWERCASE, + * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED, + * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES. + * @param options Options bit set, see ucasemap_open(). + * @return A reference to this. + * @stable ICU 3.8 + */ + UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options); + +#endif + + /** + * Case-folds the characters in this string. + * + * Case-folding is locale-independent and not context-sensitive, + * but there is an option for whether to include or exclude mappings for dotted I + * and dotless i that are marked with 'T' in CaseFolding.txt. + * + * The result may be longer or shorter than the original. + * + * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I + * @return A reference to this. + * @stable ICU 2.0 + */ + UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/); + + //======================================== + // Access to the internal buffer + //======================================== + + /** + * Get a read/write pointer to the internal buffer. + * The buffer is guaranteed to be large enough for at least minCapacity char16_ts, + * writable, and is still owned by the UnicodeString object. + * Calls to getBuffer(minCapacity) must not be nested, and + * must be matched with calls to releaseBuffer(newLength). + * If the string buffer was read-only or shared, + * then it will be reallocated and copied. + * + * An attempted nested call will return 0, and will not further modify the + * state of the UnicodeString object. + * It also returns 0 if the string is bogus. + * + * The actual capacity of the string buffer may be larger than minCapacity. + * getCapacity() returns the actual capacity. + * For many operations, the full capacity should be used to avoid reallocations. + * + * While the buffer is "open" between getBuffer(minCapacity) + * and releaseBuffer(newLength), the following applies: + * - The string length is set to 0. + * - Any read API call on the UnicodeString object will behave like on a 0-length string. + * - Any write API call on the UnicodeString object is disallowed and will have no effect. + * - You can read from and write to the returned buffer. + * - The previous string contents will still be in the buffer; + * if you want to use it, then you need to call length() before getBuffer(minCapacity). + * If the length() was greater than minCapacity, then any contents after minCapacity + * may be lost. + * The buffer contents is not NUL-terminated by getBuffer(). + * If length()<getCapacity() then you can terminate it by writing a NUL + * at index length(). + * - You must call releaseBuffer(newLength) before and in order to + * return to normal UnicodeString operation. + * + * @param minCapacity the minimum number of char16_ts that are to be available + * in the buffer, starting at the returned pointer; + * default to the current string capacity if minCapacity==-1 + * @return a writable pointer to the internal string buffer, + * or nullptr if an error occurs (nested calls, out of memory) + * + * @see releaseBuffer + * @see getTerminatedBuffer() + * @stable ICU 2.0 + */ + char16_t *getBuffer(int32_t minCapacity); + + /** + * Release a read/write buffer on a UnicodeString object with an + * "open" getBuffer(minCapacity). + * This function must be called in a matched pair with getBuffer(minCapacity). + * releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open". + * + * It will set the string length to newLength, at most to the current capacity. + * If newLength==-1 then it will set the length according to the + * first NUL in the buffer, or to the capacity if there is no NUL. + * + * After calling releaseBuffer(newLength) the UnicodeString is back to normal operation. + * + * @param newLength the new length of the UnicodeString object; + * defaults to the current capacity if newLength is greater than that; + * if newLength==-1, it defaults to u_strlen(buffer) but not more than + * the current capacity of the string + * + * @see getBuffer(int32_t minCapacity) + * @stable ICU 2.0 + */ + void releaseBuffer(int32_t newLength=-1); + + /** + * Get a read-only pointer to the internal buffer. + * This can be called at any time on a valid UnicodeString. + * + * It returns 0 if the string is bogus, or + * during an "open" getBuffer(minCapacity). + * + * It can be called as many times as desired. + * The pointer that it returns will remain valid until the UnicodeString object is modified, + * at which time the pointer is semantically invalidated and must not be used any more. + * + * The capacity of the buffer can be determined with getCapacity(). + * The part after length() may or may not be initialized and valid, + * depending on the history of the UnicodeString object. + * + * The buffer contents is (probably) not NUL-terminated. + * You can check if it is with + * <code>(s.length()<s.getCapacity() && buffer[s.length()]==0)</code>. + * (See getTerminatedBuffer().) + * + * The buffer may reside in read-only memory. Its contents must not + * be modified. + * + * @return a read-only pointer to the internal string buffer, + * or nullptr if the string is empty or bogus + * + * @see getBuffer(int32_t minCapacity) + * @see getTerminatedBuffer() + * @stable ICU 2.0 + */ + inline const char16_t *getBuffer() const; + + /** + * Get a read-only pointer to the internal buffer, + * making sure that it is NUL-terminated. + * This can be called at any time on a valid UnicodeString. + * + * It returns 0 if the string is bogus, or + * during an "open" getBuffer(minCapacity), or if the buffer cannot + * be NUL-terminated (because memory allocation failed). + * + * It can be called as many times as desired. + * The pointer that it returns will remain valid until the UnicodeString object is modified, + * at which time the pointer is semantically invalidated and must not be used any more. + * + * The capacity of the buffer can be determined with getCapacity(). + * The part after length()+1 may or may not be initialized and valid, + * depending on the history of the UnicodeString object. + * + * The buffer contents is guaranteed to be NUL-terminated. + * getTerminatedBuffer() may reallocate the buffer if a terminating NUL + * is written. + * For this reason, this function is not const, unlike getBuffer(). + * Note that a UnicodeString may also contain NUL characters as part of its contents. + * + * The buffer may reside in read-only memory. Its contents must not + * be modified. + * + * @return a read-only pointer to the internal string buffer, + * or 0 if the string is empty or bogus + * + * @see getBuffer(int32_t minCapacity) + * @see getBuffer() + * @stable ICU 2.2 + */ + const char16_t *getTerminatedBuffer(); + + //======================================== + // Constructors + //======================================== + + /** Construct an empty UnicodeString. + * @stable ICU 2.0 + */ + inline UnicodeString(); + + /** + * Construct a UnicodeString with capacity to hold <TT>capacity</TT> char16_ts + * @param capacity the number of char16_ts this UnicodeString should hold + * before a resize is necessary; if count is greater than 0 and count + * code points c take up more space than capacity, then capacity is adjusted + * accordingly. + * @param c is used to initially fill the string + * @param count specifies how many code points c are to be written in the + * string + * @stable ICU 2.0 + */ + UnicodeString(int32_t capacity, UChar32 c, int32_t count); + + /** + * Single char16_t (code unit) constructor. + * + * It is recommended to mark this constructor "explicit" by + * <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code> + * on the compiler command line or similar. + * @param ch the character to place in the UnicodeString + * @stable ICU 2.0 + */ + UNISTR_FROM_CHAR_EXPLICIT UnicodeString(char16_t ch); + + /** + * Single UChar32 (code point) constructor. + * + * It is recommended to mark this constructor "explicit" by + * <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code> + * on the compiler command line or similar. + * @param ch the character to place in the UnicodeString + * @stable ICU 2.0 + */ + UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch); + + /** + * char16_t* constructor. + * + * It is recommended to mark this constructor "explicit" by + * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code> + * on the compiler command line or similar. + * @param text The characters to place in the UnicodeString. <TT>text</TT> + * must be NULL (U+0000) terminated. + * @stable ICU 2.0 + */ + UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char16_t *text); + +#if !U_CHAR16_IS_TYPEDEF + /** + * uint16_t * constructor. + * Delegates to UnicodeString(const char16_t *). + * + * It is recommended to mark this constructor "explicit" by + * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code> + * on the compiler command line or similar. + * @param text NUL-terminated UTF-16 string + * @stable ICU 59 + */ + UNISTR_FROM_STRING_EXPLICIT UnicodeString(const uint16_t *text) : + UnicodeString(ConstChar16Ptr(text)) {} +#endif + +#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) + /** + * wchar_t * constructor. + * (Only defined if U_SIZEOF_WCHAR_T==2.) + * Delegates to UnicodeString(const char16_t *). + * + * It is recommended to mark this constructor "explicit" by + * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code> + * on the compiler command line or similar. + * @param text NUL-terminated UTF-16 string + * @stable ICU 59 + */ + UNISTR_FROM_STRING_EXPLICIT UnicodeString(const wchar_t *text) : + UnicodeString(ConstChar16Ptr(text)) {} +#endif + + /** + * nullptr_t constructor. + * Effectively the same as the default constructor, makes an empty string object. + * + * It is recommended to mark this constructor "explicit" by + * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code> + * on the compiler command line or similar. + * @param text nullptr + * @stable ICU 59 + */ + UNISTR_FROM_STRING_EXPLICIT inline UnicodeString(const std::nullptr_t text); + + /** + * char16_t* constructor. + * @param text The characters to place in the UnicodeString. + * @param textLength The number of Unicode characters in <TT>text</TT> + * to copy. + * @stable ICU 2.0 + */ + UnicodeString(const char16_t *text, + int32_t textLength); + +#if !U_CHAR16_IS_TYPEDEF + /** + * uint16_t * constructor. + * Delegates to UnicodeString(const char16_t *, int32_t). + * @param text UTF-16 string + * @param length string length + * @stable ICU 59 + */ + UnicodeString(const uint16_t *text, int32_t length) : + UnicodeString(ConstChar16Ptr(text), length) {} +#endif + +#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) + /** + * wchar_t * constructor. + * (Only defined if U_SIZEOF_WCHAR_T==2.) + * Delegates to UnicodeString(const char16_t *, int32_t). + * @param text NUL-terminated UTF-16 string + * @param length string length + * @stable ICU 59 + */ + UnicodeString(const wchar_t *text, int32_t length) : + UnicodeString(ConstChar16Ptr(text), length) {} +#endif + + /** + * nullptr_t constructor. + * Effectively the same as the default constructor, makes an empty string object. + * @param text nullptr + * @param length ignored + * @stable ICU 59 + */ + inline UnicodeString(const std::nullptr_t text, int32_t length); + + /** + * Readonly-aliasing char16_t* constructor. + * The text will be used for the UnicodeString object, but + * it will not be released when the UnicodeString is destroyed. + * This has copy-on-write semantics: + * When the string is modified, then the buffer is first copied into + * newly allocated memory. + * The aliased buffer is never modified. + * + * In an assignment to another UnicodeString, when using the copy constructor + * or the assignment operator, the text will be copied. + * When using fastCopyFrom(), the text will be aliased again, + * so that both strings then alias the same readonly-text. + * + * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated. + * This must be true if <code>textLength==-1</code>. + * @param text The characters to alias for the UnicodeString. + * @param textLength The number of Unicode characters in <code>text</code> to alias. + * If -1, then this constructor will determine the length + * by calling <code>u_strlen()</code>. + * @stable ICU 2.0 + */ + UnicodeString(UBool isTerminated, + ConstChar16Ptr text, + int32_t textLength); + + /** + * Writable-aliasing char16_t* constructor. + * The text will be used for the UnicodeString object, but + * it will not be released when the UnicodeString is destroyed. + * This has write-through semantics: + * For as long as the capacity of the buffer is sufficient, write operations + * will directly affect the buffer. When more capacity is necessary, then + * a new buffer will be allocated and the contents copied as with regularly + * constructed strings. + * In an assignment to another UnicodeString, the buffer will be copied. + * The extract(Char16Ptr dst) function detects whether the dst pointer is the same + * as the string buffer itself and will in this case not copy the contents. + * + * @param buffer The characters to alias for the UnicodeString. + * @param buffLength The number of Unicode characters in <code>buffer</code> to alias. + * @param buffCapacity The size of <code>buffer</code> in char16_ts. + * @stable ICU 2.0 + */ + UnicodeString(char16_t *buffer, int32_t buffLength, int32_t buffCapacity); + +#if !U_CHAR16_IS_TYPEDEF + /** + * Writable-aliasing uint16_t * constructor. + * Delegates to UnicodeString(const char16_t *, int32_t, int32_t). + * @param buffer writable buffer of/for UTF-16 text + * @param buffLength length of the current buffer contents + * @param buffCapacity buffer capacity + * @stable ICU 59 + */ + UnicodeString(uint16_t *buffer, int32_t buffLength, int32_t buffCapacity) : + UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {} +#endif + +#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) + /** + * Writable-aliasing wchar_t * constructor. + * (Only defined if U_SIZEOF_WCHAR_T==2.) + * Delegates to UnicodeString(const char16_t *, int32_t, int32_t). + * @param buffer writable buffer of/for UTF-16 text + * @param buffLength length of the current buffer contents + * @param buffCapacity buffer capacity + * @stable ICU 59 + */ + UnicodeString(wchar_t *buffer, int32_t buffLength, int32_t buffCapacity) : + UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {} +#endif + + /** + * Writable-aliasing nullptr_t constructor. + * Effectively the same as the default constructor, makes an empty string object. + * @param buffer nullptr + * @param buffLength ignored + * @param buffCapacity ignored + * @stable ICU 59 + */ + inline UnicodeString(std::nullptr_t buffer, int32_t buffLength, int32_t buffCapacity); + +#if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION + + /** + * char* constructor. + * Uses the default converter (and thus depends on the ICU conversion code) + * unless U_CHARSET_IS_UTF8 is set to 1. + * + * For ASCII (really "invariant character") strings it is more efficient to use + * the constructor that takes a US_INV (for its enum EInvariant). + * For ASCII (invariant-character) string literals, see UNICODE_STRING and + * UNICODE_STRING_SIMPLE. + * + * It is recommended to mark this constructor "explicit" by + * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code> + * on the compiler command line or similar. + * @param codepageData an array of bytes, null-terminated, + * in the platform's default codepage. + * @stable ICU 2.0 + * @see UNICODE_STRING + * @see UNICODE_STRING_SIMPLE + */ + UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData); + + /** + * char* constructor. + * Uses the default converter (and thus depends on the ICU conversion code) + * unless U_CHARSET_IS_UTF8 is set to 1. + * @param codepageData an array of bytes in the platform's default codepage. + * @param dataLength The number of bytes in <TT>codepageData</TT>. + * @stable ICU 2.0 + */ + UnicodeString(const char *codepageData, int32_t dataLength); + +#endif + +#if !UCONFIG_NO_CONVERSION + + /** + * char* constructor. + * @param codepageData an array of bytes, null-terminated + * @param codepage the encoding of <TT>codepageData</TT>. The special + * value 0 for <TT>codepage</TT> indicates that the text is in the + * platform's default codepage. + * + * If <code>codepage</code> is an empty string (<code>""</code>), + * then a simple conversion is performed on the codepage-invariant + * subset ("invariant characters") of the platform encoding. See utypes.h. + * Recommendation: For invariant-character strings use the constructor + * UnicodeString(const char *src, int32_t length, enum EInvariant inv) + * because it avoids object code dependencies of UnicodeString on + * the conversion code. + * + * @stable ICU 2.0 + */ + UnicodeString(const char *codepageData, const char *codepage); + + /** + * char* constructor. + * @param codepageData an array of bytes. + * @param dataLength The number of bytes in <TT>codepageData</TT>. + * @param codepage the encoding of <TT>codepageData</TT>. The special + * value 0 for <TT>codepage</TT> indicates that the text is in the + * platform's default codepage. + * If <code>codepage</code> is an empty string (<code>""</code>), + * then a simple conversion is performed on the codepage-invariant + * subset ("invariant characters") of the platform encoding. See utypes.h. + * Recommendation: For invariant-character strings use the constructor + * UnicodeString(const char *src, int32_t length, enum EInvariant inv) + * because it avoids object code dependencies of UnicodeString on + * the conversion code. + * + * @stable ICU 2.0 + */ + UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage); + + /** + * char * / UConverter constructor. + * This constructor uses an existing UConverter object to + * convert the codepage string to Unicode and construct a UnicodeString + * from that. + * + * The converter is reset at first. + * If the error code indicates a failure before this constructor is called, + * or if an error occurs during conversion or construction, + * then the string will be bogus. + * + * This function avoids the overhead of opening and closing a converter if + * multiple strings are constructed. + * + * @param src input codepage string + * @param srcLength length of the input string, can be -1 for NUL-terminated strings + * @param cnv converter object (ucnv_resetToUnicode() will be called), + * can be NULL for the default converter + * @param errorCode normal ICU error code + * @stable ICU 2.0 + */ + UnicodeString( + const char *src, int32_t srcLength, + UConverter *cnv, + UErrorCode &errorCode); + +#endif + + /** + * Constructs a Unicode string from an invariant-character char * string. + * About invariant characters see utypes.h. + * This constructor has no runtime dependency on conversion code and is + * therefore recommended over ones taking a charset name string + * (where the empty string "" indicates invariant-character conversion). + * + * Use the macro US_INV as the third, signature-distinguishing parameter. + * + * For example: + * \code + * void fn(const char *s) { + * UnicodeString ustr(s, -1, US_INV); + * // use ustr ... + * } + * \endcode + * + * @param src String using only invariant characters. + * @param length Length of src, or -1 if NUL-terminated. + * @param inv Signature-distinguishing paramater, use US_INV. + * + * @see US_INV + * @stable ICU 3.2 + */ + UnicodeString(const char *src, int32_t length, enum EInvariant inv); + + + /** + * Copy constructor. + * + * Starting with ICU 2.4, the assignment operator and the copy constructor + * allocate a new buffer and copy the buffer contents even for readonly aliases. + * By contrast, the fastCopyFrom() function implements the old, + * more efficient but less safe behavior + * of making this string also a readonly alias to the same buffer. + * + * If the source object has an "open" buffer from getBuffer(minCapacity), + * then the copy is an empty string. + * + * @param that The UnicodeString object to copy. + * @stable ICU 2.0 + * @see fastCopyFrom + */ + UnicodeString(const UnicodeString& that); + + /** + * Move constructor, might leave src in bogus state. + * This string will have the same contents and state that the source string had. + * @param src source string + * @stable ICU 56 + */ + UnicodeString(UnicodeString &&src) U_NOEXCEPT; + + /** + * 'Substring' constructor from tail of source string. + * @param src The UnicodeString object to copy. + * @param srcStart The offset into <tt>src</tt> at which to start copying. + * @stable ICU 2.2 + */ + UnicodeString(const UnicodeString& src, int32_t srcStart); + + /** + * 'Substring' constructor from subrange of source string. + * @param src The UnicodeString object to copy. + * @param srcStart The offset into <tt>src</tt> at which to start copying. + * @param srcLength The number of characters from <tt>src</tt> to copy. + * @stable ICU 2.2 + */ + UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength); + + /** + * Clone this object, an instance of a subclass of Replaceable. + * Clones can be used concurrently in multiple threads. + * If a subclass does not implement clone(), or if an error occurs, + * then NULL is returned. + * The clone functions in all subclasses return a pointer to a Replaceable + * because some compilers do not support covariant (same-as-this) + * return types; cast to the appropriate subclass if necessary. + * The caller must delete the clone. + * + * @return a clone of this object + * + * @see Replaceable::clone + * @see getDynamicClassID + * @stable ICU 2.6 + */ + virtual Replaceable *clone() const; + + /** Destructor. + * @stable ICU 2.0 + */ + virtual ~UnicodeString(); + + /** + * Create a UnicodeString from a UTF-8 string. + * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string. + * Calls u_strFromUTF8WithSub(). + * + * @param utf8 UTF-8 input string. + * Note that a StringPiece can be implicitly constructed + * from a std::string or a NUL-terminated const char * string. + * @return A UnicodeString with equivalent UTF-16 contents. + * @see toUTF8 + * @see toUTF8String + * @stable ICU 4.2 + */ + static UnicodeString fromUTF8(StringPiece utf8); + + /** + * Create a UnicodeString from a UTF-32 string. + * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string. + * Calls u_strFromUTF32WithSub(). + * + * @param utf32 UTF-32 input string. Must not be NULL. + * @param length Length of the input string, or -1 if NUL-terminated. + * @return A UnicodeString with equivalent UTF-16 contents. + * @see toUTF32 + * @stable ICU 4.2 + */ + static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length); + + /* Miscellaneous operations */ + + /** + * Unescape a string of characters and return a string containing + * the result. The following escape sequences are recognized: + * + * \\uhhhh 4 hex digits; h in [0-9A-Fa-f] + * \\Uhhhhhhhh 8 hex digits + * \\xhh 1-2 hex digits + * \\ooo 1-3 octal digits; o in [0-7] + * \\cX control-X; X is masked with 0x1F + * + * as well as the standard ANSI C escapes: + * + * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A, + * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B, + * \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C + * + * Anything else following a backslash is generically escaped. For + * example, "[a\\-z]" returns "[a-z]". + * + * If an escape sequence is ill-formed, this method returns an empty + * string. An example of an ill-formed sequence is "\\u" followed by + * fewer than 4 hex digits. + * + * This function is similar to u_unescape() but not identical to it. + * The latter takes a source char*, so it does escape recognition + * and also invariant conversion. + * + * @return a string with backslash escapes interpreted, or an + * empty string on error. + * @see UnicodeString#unescapeAt() + * @see u_unescape() + * @see u_unescapeAt() + * @stable ICU 2.0 + */ + UnicodeString unescape() const; + + /** + * Unescape a single escape sequence and return the represented + * character. See unescape() for a listing of the recognized escape + * sequences. The character at offset-1 is assumed (without + * checking) to be a backslash. If the escape sequence is + * ill-formed, or the offset is out of range, U_SENTINEL=-1 is + * returned. + * + * @param offset an input output parameter. On input, it is the + * offset into this string where the escape sequence is located, + * after the initial backslash. On output, it is advanced after the + * last character parsed. On error, it is not advanced at all. + * @return the character represented by the escape sequence at + * offset, or U_SENTINEL=-1 on error. + * @see UnicodeString#unescape() + * @see u_unescape() + * @see u_unescapeAt() + * @stable ICU 2.0 + */ + UChar32 unescapeAt(int32_t &offset) const; + + /** + * ICU "poor man's RTTI", returns a UClassID for this class. + * + * @stable ICU 2.2 + */ + static UClassID U_EXPORT2 getStaticClassID(); + + /** + * ICU "poor man's RTTI", returns a UClassID for the actual class. + * + * @stable ICU 2.2 + */ + virtual UClassID getDynamicClassID() const; + + //======================================== + // Implementation methods + //======================================== + +protected: + /** + * Implement Replaceable::getLength() (see jitterbug 1027). + * @stable ICU 2.4 + */ + virtual int32_t getLength() const; + + /** + * The change in Replaceable to use virtual getCharAt() allows + * UnicodeString::charAt() to be inline again (see jitterbug 709). + * @stable ICU 2.4 + */ + virtual char16_t getCharAt(int32_t offset) const; + + /** + * The change in Replaceable to use virtual getChar32At() allows + * UnicodeString::char32At() to be inline again (see jitterbug 709). + * @stable ICU 2.4 + */ + virtual UChar32 getChar32At(int32_t offset) const; + +private: + // For char* constructors. Could be made public. + UnicodeString &setToUTF8(StringPiece utf8); + // For extract(char*). + // We could make a toUTF8(target, capacity, errorCode) public but not + // this version: New API will be cleaner if we make callers create substrings + // rather than having start+length on every method, + // and it should take a UErrorCode&. + int32_t + toUTF8(int32_t start, int32_t len, + char *target, int32_t capacity) const; + + /** + * Internal string contents comparison, called by operator==. + * Requires: this & text not bogus and have same lengths. + */ + UBool doEquals(const UnicodeString &text, int32_t len) const; + + inline int8_t + doCompare(int32_t start, + int32_t length, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength) const; + + int8_t doCompare(int32_t start, + int32_t length, + const char16_t *srcChars, + int32_t srcStart, + int32_t srcLength) const; + + inline int8_t + doCompareCodePointOrder(int32_t start, + int32_t length, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength) const; + + int8_t doCompareCodePointOrder(int32_t start, + int32_t length, + const char16_t *srcChars, + int32_t srcStart, + int32_t srcLength) const; + + inline int8_t + doCaseCompare(int32_t start, + int32_t length, + const UnicodeString &srcText, + int32_t srcStart, + int32_t srcLength, + uint32_t options) const; + + int8_t + doCaseCompare(int32_t start, + int32_t length, + const char16_t *srcChars, + int32_t srcStart, + int32_t srcLength, + uint32_t options) const; + + int32_t doIndexOf(char16_t c, + int32_t start, + int32_t length) const; + + int32_t doIndexOf(UChar32 c, + int32_t start, + int32_t length) const; + + int32_t doLastIndexOf(char16_t c, + int32_t start, + int32_t length) const; + + int32_t doLastIndexOf(UChar32 c, + int32_t start, + int32_t length) const; + + void doExtract(int32_t start, + int32_t length, + char16_t *dst, + int32_t dstStart) const; + + inline void doExtract(int32_t start, + int32_t length, + UnicodeString& target) const; + + inline char16_t doCharAt(int32_t offset) const; + + UnicodeString& doReplace(int32_t start, + int32_t length, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength); + + UnicodeString& doReplace(int32_t start, + int32_t length, + const char16_t *srcChars, + int32_t srcStart, + int32_t srcLength); + + UnicodeString& doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength); + UnicodeString& doAppend(const char16_t *srcChars, int32_t srcStart, int32_t srcLength); + + UnicodeString& doReverse(int32_t start, + int32_t length); + + // calculate hash code + int32_t doHashCode(void) const; + + // get pointer to start of array + // these do not check for kOpenGetBuffer, unlike the public getBuffer() function + inline char16_t* getArrayStart(void); + inline const char16_t* getArrayStart(void) const; + + inline UBool hasShortLength() const; + inline int32_t getShortLength() const; + + // A UnicodeString object (not necessarily its current buffer) + // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity). + inline UBool isWritable() const; + + // Is the current buffer writable? + inline UBool isBufferWritable() const; + + // None of the following does releaseArray(). + inline void setZeroLength(); + inline void setShortLength(int32_t len); + inline void setLength(int32_t len); + inline void setToEmpty(); + inline void setArray(char16_t *array, int32_t len, int32_t capacity); // sets length but not flags + + // allocate the array; result may be the stack buffer + // sets refCount to 1 if appropriate + // sets fArray, fCapacity, and flags + // sets length to 0 + // returns boolean for success or failure + UBool allocate(int32_t capacity); + + // release the array if owned + void releaseArray(void); + + // turn a bogus string into an empty one + void unBogus(); + + // implements assigment operator, copy constructor, and fastCopyFrom() + UnicodeString ©From(const UnicodeString &src, UBool fastCopy=FALSE); + + // Copies just the fields without memory management. + void copyFieldsFrom(UnicodeString &src, UBool setSrcToBogus) U_NOEXCEPT; + + // Pin start and limit to acceptable values. + inline void pinIndex(int32_t& start) const; + inline void pinIndices(int32_t& start, + int32_t& length) const; + +#if !UCONFIG_NO_CONVERSION + + /* Internal extract() using UConverter. */ + int32_t doExtract(int32_t start, int32_t length, + char *dest, int32_t destCapacity, + UConverter *cnv, + UErrorCode &errorCode) const; + + /* + * Real constructor for converting from codepage data. + * It assumes that it is called with !fRefCounted. + * + * If <code>codepage==0</code>, then the default converter + * is used for the platform encoding. + * If <code>codepage</code> is an empty string (<code>""</code>), + * then a simple conversion is performed on the codepage-invariant + * subset ("invariant characters") of the platform encoding. See utypes.h. + */ + void doCodepageCreate(const char *codepageData, + int32_t dataLength, + const char *codepage); + + /* + * Worker function for creating a UnicodeString from + * a codepage string using a UConverter. + */ + void + doCodepageCreate(const char *codepageData, + int32_t dataLength, + UConverter *converter, + UErrorCode &status); + +#endif + + /* + * This function is called when write access to the array + * is necessary. + * + * We need to make a copy of the array if + * the buffer is read-only, or + * the buffer is refCounted (shared), and refCount>1, or + * the buffer is too small. + * + * Return FALSE if memory could not be allocated. + */ + UBool cloneArrayIfNeeded(int32_t newCapacity = -1, + int32_t growCapacity = -1, + UBool doCopyArray = TRUE, + int32_t **pBufferToDelete = 0, + UBool forceClone = FALSE); + + /** + * Common function for UnicodeString case mappings. + * The stringCaseMapper has the same type UStringCaseMapper + * as in ustr_imp.h for ustrcase_map(). + */ + UnicodeString & + caseMap(int32_t caseLocale, uint32_t options, +#if !UCONFIG_NO_BREAK_ITERATION + BreakIterator *iter, +#endif + UStringCaseMapper *stringCaseMapper); + + // ref counting + void addRef(void); + int32_t removeRef(void); + int32_t refCount(void) const; + + // constants + enum { + /** + * Size of stack buffer for short strings. + * Must be at least U16_MAX_LENGTH for the single-code point constructor to work. + * @see UNISTR_OBJECT_SIZE + */ + US_STACKBUF_SIZE=(int32_t)(UNISTR_OBJECT_SIZE-sizeof(void *)-2)/U_SIZEOF_UCHAR, + kInvalidUChar=0xffff, // U+FFFF returned by charAt(invalid index) + kInvalidHashCode=0, // invalid hash code + kEmptyHashCode=1, // hash code for empty string + + // bit flag values for fLengthAndFlags + kIsBogus=1, // this string is bogus, i.e., not valid or NULL + kUsingStackBuffer=2,// using fUnion.fStackFields instead of fUnion.fFields + kRefCounted=4, // there is a refCount field before the characters in fArray + kBufferIsReadonly=8,// do not write to this buffer + kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"), + // and releaseBuffer(newLength) must be called + kAllStorageFlags=0x1f, + + kLengthShift=5, // remaining 11 bits for non-negative short length, or negative if long + kLength1=1<<kLengthShift, + kMaxShortLength=0x3ff, // max non-negative short length (leaves top bit 0) + kLengthIsLarge=0xffe0, // short length < 0, real length is in fUnion.fFields.fLength + + // combined values for convenience + kShortString=kUsingStackBuffer, + kLongString=kRefCounted, + kReadonlyAlias=kBufferIsReadonly, + kWritableAlias=0 + }; + + friend class UnicodeStringAppendable; + + union StackBufferOrFields; // forward declaration necessary before friend declaration + friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion + + /* + * The following are all the class fields that are stored + * in each UnicodeString object. + * Note that UnicodeString has virtual functions, + * therefore there is an implicit vtable pointer + * as the first real field. + * The fields should be aligned such that no padding is necessary. + * On 32-bit machines, the size should be 32 bytes, + * on 64-bit machines (8-byte pointers), it should be 40 bytes. + * + * We use a hack to achieve this. + * + * With at least some compilers, each of the following is forced to + * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer], + * rounded up with additional padding if the fields do not already fit that requirement: + * - sizeof(class UnicodeString) + * - offsetof(UnicodeString, fUnion) + * - sizeof(fUnion) + * - sizeof(fStackFields) + * + * We optimize for the longest possible internal buffer for short strings. + * fUnion.fStackFields begins with 2 bytes for storage flags + * and the length of relatively short strings, + * followed by the buffer for short string contents. + * There is no padding inside fStackFields. + * + * Heap-allocated and aliased strings use fUnion.fFields. + * Both fStackFields and fFields must begin with the same fields for flags and short length, + * that is, those must have the same memory offsets inside the object, + * because the flags must be inspected in order to decide which half of fUnion is being used. + * We assume that the compiler does not reorder the fields. + * + * (Padding at the end of fFields is ok: + * As long as it is no larger than fStackFields, it is not wasted space.) + * + * For some of the history of the UnicodeString class fields layout, see + * - ICU ticket #11551 "longer UnicodeString contents in stack buffer" + * - ICU ticket #11336 "UnicodeString: recombine stack buffer arrays" + * - ICU ticket #8322 "why is sizeof(UnicodeString)==48?" + */ + // (implicit) *vtable; + union StackBufferOrFields { + // fStackFields is used iff (fLengthAndFlags&kUsingStackBuffer) else fFields is used. + // Each struct of the union must begin with fLengthAndFlags. + struct { + int16_t fLengthAndFlags; // bit fields: see constants above + char16_t fBuffer[US_STACKBUF_SIZE]; // buffer for short strings + } fStackFields; + struct { + int16_t fLengthAndFlags; // bit fields: see constants above + int32_t fLength; // number of characters in fArray if >127; else undefined + int32_t fCapacity; // capacity of fArray (in char16_ts) + // array pointer last to minimize padding for machines with P128 data model + // or pointer sizes that are not a power of 2 + char16_t *fArray; // the Unicode data + } fFields; + } fUnion; +}; + +/** + * Create a new UnicodeString with the concatenation of two others. + * + * @param s1 The first string to be copied to the new one. + * @param s2 The second string to be copied to the new one, after s1. + * @return UnicodeString(s1).append(s2) + * @stable ICU 2.8 + */ +U_COMMON_API UnicodeString U_EXPORT2 +operator+ (const UnicodeString &s1, const UnicodeString &s2); + +//======================================== +// Inline members +//======================================== + +//======================================== +// Privates +//======================================== + +inline void +UnicodeString::pinIndex(int32_t& start) const +{ + // pin index + if(start < 0) { + start = 0; + } else if(start > length()) { + start = length(); + } +} + +inline void +UnicodeString::pinIndices(int32_t& start, + int32_t& _length) const +{ + // pin indices + int32_t len = length(); + if(start < 0) { + start = 0; + } else if(start > len) { + start = len; + } + if(_length < 0) { + _length = 0; + } else if(_length > (len - start)) { + _length = (len - start); + } +} + +inline char16_t* +UnicodeString::getArrayStart() { + return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ? + fUnion.fStackFields.fBuffer : fUnion.fFields.fArray; +} + +inline const char16_t* +UnicodeString::getArrayStart() const { + return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ? + fUnion.fStackFields.fBuffer : fUnion.fFields.fArray; +} + +//======================================== +// Default constructor +//======================================== + +inline +UnicodeString::UnicodeString() { + fUnion.fStackFields.fLengthAndFlags=kShortString; +} + +inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/) { + fUnion.fStackFields.fLengthAndFlags=kShortString; +} + +inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/, int32_t /*length*/) { + fUnion.fStackFields.fLengthAndFlags=kShortString; +} + +inline UnicodeString::UnicodeString(std::nullptr_t /*buffer*/, int32_t /*buffLength*/, int32_t /*buffCapacity*/) { + fUnion.fStackFields.fLengthAndFlags=kShortString; +} + +//======================================== +// Read-only implementation methods +//======================================== +inline UBool +UnicodeString::hasShortLength() const { + return fUnion.fFields.fLengthAndFlags>=0; +} + +inline int32_t +UnicodeString::getShortLength() const { + // fLengthAndFlags must be non-negative -> short length >= 0 + // and arithmetic or logical shift does not matter. + return fUnion.fFields.fLengthAndFlags>>kLengthShift; +} + +inline int32_t +UnicodeString::length() const { + return hasShortLength() ? getShortLength() : fUnion.fFields.fLength; +} + +inline int32_t +UnicodeString::getCapacity() const { + return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ? + US_STACKBUF_SIZE : fUnion.fFields.fCapacity; +} + +inline int32_t +UnicodeString::hashCode() const +{ return doHashCode(); } + +inline UBool +UnicodeString::isBogus() const +{ return (UBool)(fUnion.fFields.fLengthAndFlags & kIsBogus); } + +inline UBool +UnicodeString::isWritable() const +{ return (UBool)!(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus)); } + +inline UBool +UnicodeString::isBufferWritable() const +{ + return (UBool)( + !(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) && + (!(fUnion.fFields.fLengthAndFlags&kRefCounted) || refCount()==1)); +} + +inline const char16_t * +UnicodeString::getBuffer() const { + if(fUnion.fFields.fLengthAndFlags&(kIsBogus|kOpenGetBuffer)) { + return nullptr; + } else if(fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) { + return fUnion.fStackFields.fBuffer; + } else { + return fUnion.fFields.fArray; + } +} + +//======================================== +// Read-only alias methods +//======================================== +inline int8_t +UnicodeString::doCompare(int32_t start, + int32_t thisLength, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength) const +{ + if(srcText.isBogus()) { + return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise + } else { + srcText.pinIndices(srcStart, srcLength); + return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength); + } +} + +inline UBool +UnicodeString::operator== (const UnicodeString& text) const +{ + if(isBogus()) { + return text.isBogus(); + } else { + int32_t len = length(), textLength = text.length(); + return !text.isBogus() && len == textLength && doEquals(text, len); + } +} + +inline UBool +UnicodeString::operator!= (const UnicodeString& text) const +{ return (! operator==(text)); } + +inline UBool +UnicodeString::operator> (const UnicodeString& text) const +{ return doCompare(0, length(), text, 0, text.length()) == 1; } + +inline UBool +UnicodeString::operator< (const UnicodeString& text) const +{ return doCompare(0, length(), text, 0, text.length()) == -1; } + +inline UBool +UnicodeString::operator>= (const UnicodeString& text) const +{ return doCompare(0, length(), text, 0, text.length()) != -1; } + +inline UBool +UnicodeString::operator<= (const UnicodeString& text) const +{ return doCompare(0, length(), text, 0, text.length()) != 1; } + +inline int8_t +UnicodeString::compare(const UnicodeString& text) const +{ return doCompare(0, length(), text, 0, text.length()); } + +inline int8_t +UnicodeString::compare(int32_t start, + int32_t _length, + const UnicodeString& srcText) const +{ return doCompare(start, _length, srcText, 0, srcText.length()); } + +inline int8_t +UnicodeString::compare(ConstChar16Ptr srcChars, + int32_t srcLength) const +{ return doCompare(0, length(), srcChars, 0, srcLength); } + +inline int8_t +UnicodeString::compare(int32_t start, + int32_t _length, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength) const +{ return doCompare(start, _length, srcText, srcStart, srcLength); } + +inline int8_t +UnicodeString::compare(int32_t start, + int32_t _length, + const char16_t *srcChars) const +{ return doCompare(start, _length, srcChars, 0, _length); } + +inline int8_t +UnicodeString::compare(int32_t start, + int32_t _length, + const char16_t *srcChars, + int32_t srcStart, + int32_t srcLength) const +{ return doCompare(start, _length, srcChars, srcStart, srcLength); } + +inline int8_t +UnicodeString::compareBetween(int32_t start, + int32_t limit, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLimit) const +{ return doCompare(start, limit - start, + srcText, srcStart, srcLimit - srcStart); } + +inline int8_t +UnicodeString::doCompareCodePointOrder(int32_t start, + int32_t thisLength, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength) const +{ + if(srcText.isBogus()) { + return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise + } else { + srcText.pinIndices(srcStart, srcLength); + return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength); + } +} + +inline int8_t +UnicodeString::compareCodePointOrder(const UnicodeString& text) const +{ return doCompareCodePointOrder(0, length(), text, 0, text.length()); } + +inline int8_t +UnicodeString::compareCodePointOrder(int32_t start, + int32_t _length, + const UnicodeString& srcText) const +{ return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); } + +inline int8_t +UnicodeString::compareCodePointOrder(ConstChar16Ptr srcChars, + int32_t srcLength) const +{ return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); } + +inline int8_t +UnicodeString::compareCodePointOrder(int32_t start, + int32_t _length, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength) const +{ return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); } + +inline int8_t +UnicodeString::compareCodePointOrder(int32_t start, + int32_t _length, + const char16_t *srcChars) const +{ return doCompareCodePointOrder(start, _length, srcChars, 0, _length); } + +inline int8_t +UnicodeString::compareCodePointOrder(int32_t start, + int32_t _length, + const char16_t *srcChars, + int32_t srcStart, + int32_t srcLength) const +{ return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); } + +inline int8_t +UnicodeString::compareCodePointOrderBetween(int32_t start, + int32_t limit, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLimit) const +{ return doCompareCodePointOrder(start, limit - start, + srcText, srcStart, srcLimit - srcStart); } + +inline int8_t +UnicodeString::doCaseCompare(int32_t start, + int32_t thisLength, + const UnicodeString &srcText, + int32_t srcStart, + int32_t srcLength, + uint32_t options) const +{ + if(srcText.isBogus()) { + return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise + } else { + srcText.pinIndices(srcStart, srcLength); + return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options); + } +} + +inline int8_t +UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const { + return doCaseCompare(0, length(), text, 0, text.length(), options); +} + +inline int8_t +UnicodeString::caseCompare(int32_t start, + int32_t _length, + const UnicodeString &srcText, + uint32_t options) const { + return doCaseCompare(start, _length, srcText, 0, srcText.length(), options); +} + +inline int8_t +UnicodeString::caseCompare(ConstChar16Ptr srcChars, + int32_t srcLength, + uint32_t options) const { + return doCaseCompare(0, length(), srcChars, 0, srcLength, options); +} + +inline int8_t +UnicodeString::caseCompare(int32_t start, + int32_t _length, + const UnicodeString &srcText, + int32_t srcStart, + int32_t srcLength, + uint32_t options) const { + return doCaseCompare(start, _length, srcText, srcStart, srcLength, options); +} + +inline int8_t +UnicodeString::caseCompare(int32_t start, + int32_t _length, + const char16_t *srcChars, + uint32_t options) const { + return doCaseCompare(start, _length, srcChars, 0, _length, options); +} + +inline int8_t +UnicodeString::caseCompare(int32_t start, + int32_t _length, + const char16_t *srcChars, + int32_t srcStart, + int32_t srcLength, + uint32_t options) const { + return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options); +} + +inline int8_t +UnicodeString::caseCompareBetween(int32_t start, + int32_t limit, + const UnicodeString &srcText, + int32_t srcStart, + int32_t srcLimit, + uint32_t options) const { + return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options); +} + +inline int32_t +UnicodeString::indexOf(const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength, + int32_t start, + int32_t _length) const +{ + if(!srcText.isBogus()) { + srcText.pinIndices(srcStart, srcLength); + if(srcLength > 0) { + return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length); + } + } + return -1; +} + +inline int32_t +UnicodeString::indexOf(const UnicodeString& text) const +{ return indexOf(text, 0, text.length(), 0, length()); } + +inline int32_t +UnicodeString::indexOf(const UnicodeString& text, + int32_t start) const { + pinIndex(start); + return indexOf(text, 0, text.length(), start, length() - start); +} + +inline int32_t +UnicodeString::indexOf(const UnicodeString& text, + int32_t start, + int32_t _length) const +{ return indexOf(text, 0, text.length(), start, _length); } + +inline int32_t +UnicodeString::indexOf(const char16_t *srcChars, + int32_t srcLength, + int32_t start) const { + pinIndex(start); + return indexOf(srcChars, 0, srcLength, start, length() - start); +} + +inline int32_t +UnicodeString::indexOf(ConstChar16Ptr srcChars, + int32_t srcLength, + int32_t start, + int32_t _length) const +{ return indexOf(srcChars, 0, srcLength, start, _length); } + +inline int32_t +UnicodeString::indexOf(char16_t c, + int32_t start, + int32_t _length) const +{ return doIndexOf(c, start, _length); } + +inline int32_t +UnicodeString::indexOf(UChar32 c, + int32_t start, + int32_t _length) const +{ return doIndexOf(c, start, _length); } + +inline int32_t +UnicodeString::indexOf(char16_t c) const +{ return doIndexOf(c, 0, length()); } + +inline int32_t +UnicodeString::indexOf(UChar32 c) const +{ return indexOf(c, 0, length()); } + +inline int32_t +UnicodeString::indexOf(char16_t c, + int32_t start) const { + pinIndex(start); + return doIndexOf(c, start, length() - start); +} + +inline int32_t +UnicodeString::indexOf(UChar32 c, + int32_t start) const { + pinIndex(start); + return indexOf(c, start, length() - start); +} + +inline int32_t +UnicodeString::lastIndexOf(ConstChar16Ptr srcChars, + int32_t srcLength, + int32_t start, + int32_t _length) const +{ return lastIndexOf(srcChars, 0, srcLength, start, _length); } + +inline int32_t +UnicodeString::lastIndexOf(const char16_t *srcChars, + int32_t srcLength, + int32_t start) const { + pinIndex(start); + return lastIndexOf(srcChars, 0, srcLength, start, length() - start); +} + +inline int32_t +UnicodeString::lastIndexOf(const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength, + int32_t start, + int32_t _length) const +{ + if(!srcText.isBogus()) { + srcText.pinIndices(srcStart, srcLength); + if(srcLength > 0) { + return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length); + } + } + return -1; +} + +inline int32_t +UnicodeString::lastIndexOf(const UnicodeString& text, + int32_t start, + int32_t _length) const +{ return lastIndexOf(text, 0, text.length(), start, _length); } + +inline int32_t +UnicodeString::lastIndexOf(const UnicodeString& text, + int32_t start) const { + pinIndex(start); + return lastIndexOf(text, 0, text.length(), start, length() - start); +} + +inline int32_t +UnicodeString::lastIndexOf(const UnicodeString& text) const +{ return lastIndexOf(text, 0, text.length(), 0, length()); } + +inline int32_t +UnicodeString::lastIndexOf(char16_t c, + int32_t start, + int32_t _length) const +{ return doLastIndexOf(c, start, _length); } + +inline int32_t +UnicodeString::lastIndexOf(UChar32 c, + int32_t start, + int32_t _length) const { + return doLastIndexOf(c, start, _length); +} + +inline int32_t +UnicodeString::lastIndexOf(char16_t c) const +{ return doLastIndexOf(c, 0, length()); } + +inline int32_t +UnicodeString::lastIndexOf(UChar32 c) const { + return lastIndexOf(c, 0, length()); +} + +inline int32_t +UnicodeString::lastIndexOf(char16_t c, + int32_t start) const { + pinIndex(start); + return doLastIndexOf(c, start, length() - start); +} + +inline int32_t +UnicodeString::lastIndexOf(UChar32 c, + int32_t start) const { + pinIndex(start); + return lastIndexOf(c, start, length() - start); +} + +inline UBool +UnicodeString::startsWith(const UnicodeString& text) const +{ return compare(0, text.length(), text, 0, text.length()) == 0; } + +inline UBool +UnicodeString::startsWith(const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength) const +{ return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; } + +inline UBool +UnicodeString::startsWith(ConstChar16Ptr srcChars, int32_t srcLength) const { + if(srcLength < 0) { + srcLength = u_strlen(toUCharPtr(srcChars)); + } + return doCompare(0, srcLength, srcChars, 0, srcLength) == 0; +} + +inline UBool +UnicodeString::startsWith(const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const { + if(srcLength < 0) { + srcLength = u_strlen(toUCharPtr(srcChars)); + } + return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0; +} + +inline UBool +UnicodeString::endsWith(const UnicodeString& text) const +{ return doCompare(length() - text.length(), text.length(), + text, 0, text.length()) == 0; } + +inline UBool +UnicodeString::endsWith(const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength) const { + srcText.pinIndices(srcStart, srcLength); + return doCompare(length() - srcLength, srcLength, + srcText, srcStart, srcLength) == 0; +} + +inline UBool +UnicodeString::endsWith(ConstChar16Ptr srcChars, + int32_t srcLength) const { + if(srcLength < 0) { + srcLength = u_strlen(toUCharPtr(srcChars)); + } + return doCompare(length() - srcLength, srcLength, + srcChars, 0, srcLength) == 0; +} + +inline UBool +UnicodeString::endsWith(const char16_t *srcChars, + int32_t srcStart, + int32_t srcLength) const { + if(srcLength < 0) { + srcLength = u_strlen(toUCharPtr(srcChars + srcStart)); + } + return doCompare(length() - srcLength, srcLength, + srcChars, srcStart, srcLength) == 0; +} + +//======================================== +// replace +//======================================== +inline UnicodeString& +UnicodeString::replace(int32_t start, + int32_t _length, + const UnicodeString& srcText) +{ return doReplace(start, _length, srcText, 0, srcText.length()); } + +inline UnicodeString& +UnicodeString::replace(int32_t start, + int32_t _length, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength) +{ return doReplace(start, _length, srcText, srcStart, srcLength); } + +inline UnicodeString& +UnicodeString::replace(int32_t start, + int32_t _length, + ConstChar16Ptr srcChars, + int32_t srcLength) +{ return doReplace(start, _length, srcChars, 0, srcLength); } + +inline UnicodeString& +UnicodeString::replace(int32_t start, + int32_t _length, + const char16_t *srcChars, + int32_t srcStart, + int32_t srcLength) +{ return doReplace(start, _length, srcChars, srcStart, srcLength); } + +inline UnicodeString& +UnicodeString::replace(int32_t start, + int32_t _length, + char16_t srcChar) +{ return doReplace(start, _length, &srcChar, 0, 1); } + +inline UnicodeString& +UnicodeString::replaceBetween(int32_t start, + int32_t limit, + const UnicodeString& srcText) +{ return doReplace(start, limit - start, srcText, 0, srcText.length()); } + +inline UnicodeString& +UnicodeString::replaceBetween(int32_t start, + int32_t limit, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLimit) +{ return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); } + +inline UnicodeString& +UnicodeString::findAndReplace(const UnicodeString& oldText, + const UnicodeString& newText) +{ return findAndReplace(0, length(), oldText, 0, oldText.length(), + newText, 0, newText.length()); } + +inline UnicodeString& +UnicodeString::findAndReplace(int32_t start, + int32_t _length, + const UnicodeString& oldText, + const UnicodeString& newText) +{ return findAndReplace(start, _length, oldText, 0, oldText.length(), + newText, 0, newText.length()); } + +// ============================ +// extract +// ============================ +inline void +UnicodeString::doExtract(int32_t start, + int32_t _length, + UnicodeString& target) const +{ target.replace(0, target.length(), *this, start, _length); } + +inline void +UnicodeString::extract(int32_t start, + int32_t _length, + Char16Ptr target, + int32_t targetStart) const +{ doExtract(start, _length, target, targetStart); } + +inline void +UnicodeString::extract(int32_t start, + int32_t _length, + UnicodeString& target) const +{ doExtract(start, _length, target); } + +#if !UCONFIG_NO_CONVERSION + +inline int32_t +UnicodeString::extract(int32_t start, + int32_t _length, + char *dst, + const char *codepage) const + +{ + // This dstSize value will be checked explicitly + return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage); +} + +#endif + +inline void +UnicodeString::extractBetween(int32_t start, + int32_t limit, + char16_t *dst, + int32_t dstStart) const { + pinIndex(start); + pinIndex(limit); + doExtract(start, limit - start, dst, dstStart); +} + +inline UnicodeString +UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const { + return tempSubString(start, limit - start); +} + +inline char16_t +UnicodeString::doCharAt(int32_t offset) const +{ + if((uint32_t)offset < (uint32_t)length()) { + return getArrayStart()[offset]; + } else { + return kInvalidUChar; + } +} + +inline char16_t +UnicodeString::charAt(int32_t offset) const +{ return doCharAt(offset); } + +inline char16_t +UnicodeString::operator[] (int32_t offset) const +{ return doCharAt(offset); } + +inline UBool +UnicodeString::isEmpty() const { + // Arithmetic or logical right shift does not matter: only testing for 0. + return (fUnion.fFields.fLengthAndFlags>>kLengthShift) == 0; +} + +//======================================== +// Write implementation methods +//======================================== +inline void +UnicodeString::setZeroLength() { + fUnion.fFields.fLengthAndFlags &= kAllStorageFlags; +} + +inline void +UnicodeString::setShortLength(int32_t len) { + // requires 0 <= len <= kMaxShortLength + fUnion.fFields.fLengthAndFlags = + (int16_t)((fUnion.fFields.fLengthAndFlags & kAllStorageFlags) | (len << kLengthShift)); +} + +inline void +UnicodeString::setLength(int32_t len) { + if(len <= kMaxShortLength) { + setShortLength(len); + } else { + fUnion.fFields.fLengthAndFlags |= kLengthIsLarge; + fUnion.fFields.fLength = len; + } +} + +inline void +UnicodeString::setToEmpty() { + fUnion.fFields.fLengthAndFlags = kShortString; +} + +inline void +UnicodeString::setArray(char16_t *array, int32_t len, int32_t capacity) { + setLength(len); + fUnion.fFields.fArray = array; + fUnion.fFields.fCapacity = capacity; +} + +inline UnicodeString& +UnicodeString::operator= (char16_t ch) +{ return doReplace(0, length(), &ch, 0, 1); } + +inline UnicodeString& +UnicodeString::operator= (UChar32 ch) +{ return replace(0, length(), ch); } + +inline UnicodeString& +UnicodeString::setTo(const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength) +{ + unBogus(); + return doReplace(0, length(), srcText, srcStart, srcLength); +} + +inline UnicodeString& +UnicodeString::setTo(const UnicodeString& srcText, + int32_t srcStart) +{ + unBogus(); + srcText.pinIndex(srcStart); + return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart); +} + +inline UnicodeString& +UnicodeString::setTo(const UnicodeString& srcText) +{ + return copyFrom(srcText); +} + +inline UnicodeString& +UnicodeString::setTo(const char16_t *srcChars, + int32_t srcLength) +{ + unBogus(); + return doReplace(0, length(), srcChars, 0, srcLength); +} + +inline UnicodeString& +UnicodeString::setTo(char16_t srcChar) +{ + unBogus(); + return doReplace(0, length(), &srcChar, 0, 1); +} + +inline UnicodeString& +UnicodeString::setTo(UChar32 srcChar) +{ + unBogus(); + return replace(0, length(), srcChar); +} + +inline UnicodeString& +UnicodeString::append(const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength) +{ return doAppend(srcText, srcStart, srcLength); } + +inline UnicodeString& +UnicodeString::append(const UnicodeString& srcText) +{ return doAppend(srcText, 0, srcText.length()); } + +inline UnicodeString& +UnicodeString::append(const char16_t *srcChars, + int32_t srcStart, + int32_t srcLength) +{ return doAppend(srcChars, srcStart, srcLength); } + +inline UnicodeString& +UnicodeString::append(ConstChar16Ptr srcChars, + int32_t srcLength) +{ return doAppend(srcChars, 0, srcLength); } + +inline UnicodeString& +UnicodeString::append(char16_t srcChar) +{ return doAppend(&srcChar, 0, 1); } + +inline UnicodeString& +UnicodeString::operator+= (char16_t ch) +{ return doAppend(&ch, 0, 1); } + +inline UnicodeString& +UnicodeString::operator+= (UChar32 ch) { + return append(ch); +} + +inline UnicodeString& +UnicodeString::operator+= (const UnicodeString& srcText) +{ return doAppend(srcText, 0, srcText.length()); } + +inline UnicodeString& +UnicodeString::insert(int32_t start, + const UnicodeString& srcText, + int32_t srcStart, + int32_t srcLength) +{ return doReplace(start, 0, srcText, srcStart, srcLength); } + +inline UnicodeString& +UnicodeString::insert(int32_t start, + const UnicodeString& srcText) +{ return doReplace(start, 0, srcText, 0, srcText.length()); } + +inline UnicodeString& +UnicodeString::insert(int32_t start, + const char16_t *srcChars, + int32_t srcStart, + int32_t srcLength) +{ return doReplace(start, 0, srcChars, srcStart, srcLength); } + +inline UnicodeString& +UnicodeString::insert(int32_t start, + ConstChar16Ptr srcChars, + int32_t srcLength) +{ return doReplace(start, 0, srcChars, 0, srcLength); } + +inline UnicodeString& +UnicodeString::insert(int32_t start, + char16_t srcChar) +{ return doReplace(start, 0, &srcChar, 0, 1); } + +inline UnicodeString& +UnicodeString::insert(int32_t start, + UChar32 srcChar) +{ return replace(start, 0, srcChar); } + + +inline UnicodeString& +UnicodeString::remove() +{ + // remove() of a bogus string makes the string empty and non-bogus + if(isBogus()) { + setToEmpty(); + } else { + setZeroLength(); + } + return *this; +} + +inline UnicodeString& +UnicodeString::remove(int32_t start, + int32_t _length) +{ + if(start <= 0 && _length == INT32_MAX) { + // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus + return remove(); + } + return doReplace(start, _length, NULL, 0, 0); +} + +inline UnicodeString& +UnicodeString::removeBetween(int32_t start, + int32_t limit) +{ return doReplace(start, limit - start, NULL, 0, 0); } + +inline UnicodeString & +UnicodeString::retainBetween(int32_t start, int32_t limit) { + truncate(limit); + return doReplace(0, start, NULL, 0, 0); +} + +inline UBool +UnicodeString::truncate(int32_t targetLength) +{ + if(isBogus() && targetLength == 0) { + // truncate(0) of a bogus string makes the string empty and non-bogus + unBogus(); + return FALSE; + } else if((uint32_t)targetLength < (uint32_t)length()) { + setLength(targetLength); + return TRUE; + } else { + return FALSE; + } +} + +inline UnicodeString& +UnicodeString::reverse() +{ return doReverse(0, length()); } + +inline UnicodeString& +UnicodeString::reverse(int32_t start, + int32_t _length) +{ return doReverse(start, _length); } + +U_NAMESPACE_END + +#endif diff --git a/vendor/icu/include/unicode/uobject.h b/vendor/icu/include/unicode/uobject.h new file mode 100644 index 0000000000..9026b3b061 --- /dev/null +++ b/vendor/icu/include/unicode/uobject.h @@ -0,0 +1,322 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 2002-2012, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* file name: uobject.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2002jun26 +* created by: Markus W. Scherer +*/ + +#ifndef __UOBJECT_H__ +#define __UOBJECT_H__ + +#include <unicode/utypes.h> + +/** + * \file + * \brief C++ API: Common ICU base class UObject. + */ + +/** + * @{ + * \def U_NO_THROW + * Define this to define the throw() specification so + * certain functions do not throw any exceptions + * + * UMemory operator new methods should have the throw() specification + * appended to them, so that the compiler adds the additional NULL check + * before calling constructors. Without, if <code>operator new</code> returns NULL the + * constructor is still called, and if the constructor references member + * data, (which it typically does), the result is a segmentation violation. + * + * @stable ICU 4.2 + */ +#ifndef U_NO_THROW +#define U_NO_THROW throw() +#endif + +/** @} */ + +/*===========================================================================*/ +/* UClassID-based RTTI */ +/*===========================================================================*/ + +/** + * UClassID is used to identify classes without using the compiler's RTTI. + * This was used before C++ compilers consistently supported RTTI. + * ICU 4.6 requires compiler RTTI to be turned on. + * + * Each class hierarchy which needs + * to implement polymorphic clone() or operator==() defines two methods, + * described in detail below. UClassID values can be compared using + * operator==(). Nothing else should be done with them. + * + * \par + * In class hierarchies that implement "poor man's RTTI", + * each concrete subclass implements getDynamicClassID() in the same way: + * + * \code + * class Derived { + * public: + * virtual UClassID getDynamicClassID() const + * { return Derived::getStaticClassID(); } + * } + * \endcode + * + * Each concrete class implements getStaticClassID() as well, which allows + * clients to test for a specific type. + * + * \code + * class Derived { + * public: + * static UClassID U_EXPORT2 getStaticClassID(); + * private: + * static char fgClassID; + * } + * + * // In Derived.cpp: + * UClassID Derived::getStaticClassID() + * { return (UClassID)&Derived::fgClassID; } + * char Derived::fgClassID = 0; // Value is irrelevant + * \endcode + * @stable ICU 2.0 + */ +typedef void* UClassID; + +U_NAMESPACE_BEGIN + +/** + * UMemory is the common ICU base class. + * All other ICU C++ classes are derived from UMemory (starting with ICU 2.4). + * + * This is primarily to make it possible and simple to override the + * C++ memory management by adding new/delete operators to this base class. + * + * To override ALL ICU memory management, including that from plain C code, + * replace the allocation functions declared in cmemory.h + * + * UMemory does not contain any virtual functions. + * Common "boilerplate" functions are defined in UObject. + * + * @stable ICU 2.4 + */ +class U_COMMON_API UMemory { +public: + +/* test versions for debugging shaper heap memory problems */ +#ifdef SHAPER_MEMORY_DEBUG + static void * NewArray(int size, int count); + static void * GrowArray(void * array, int newSize ); + static void FreeArray(void * array ); +#endif + +#if U_OVERRIDE_CXX_ALLOCATION + /** + * Override for ICU4C C++ memory management. + * simple, non-class types are allocated using the macros in common/cmemory.h + * (uprv_malloc(), uprv_free(), uprv_realloc()); + * they or something else could be used here to implement C++ new/delete + * for ICU4C C++ classes + * @stable ICU 2.4 + */ + static void * U_EXPORT2 operator new(size_t size) U_NO_THROW; + + /** + * Override for ICU4C C++ memory management. + * See new(). + * @stable ICU 2.4 + */ + static void * U_EXPORT2 operator new[](size_t size) U_NO_THROW; + + /** + * Override for ICU4C C++ memory management. + * simple, non-class types are allocated using the macros in common/cmemory.h + * (uprv_malloc(), uprv_free(), uprv_realloc()); + * they or something else could be used here to implement C++ new/delete + * for ICU4C C++ classes + * @stable ICU 2.4 + */ + static void U_EXPORT2 operator delete(void *p) U_NO_THROW; + + /** + * Override for ICU4C C++ memory management. + * See delete(). + * @stable ICU 2.4 + */ + static void U_EXPORT2 operator delete[](void *p) U_NO_THROW; + +#if U_HAVE_PLACEMENT_NEW + /** + * Override for ICU4C C++ memory management for STL. + * See new(). + * @stable ICU 2.6 + */ + static inline void * U_EXPORT2 operator new(size_t, void *ptr) U_NO_THROW { return ptr; } + + /** + * Override for ICU4C C++ memory management for STL. + * See delete(). + * @stable ICU 2.6 + */ + static inline void U_EXPORT2 operator delete(void *, void *) U_NO_THROW {} +#endif /* U_HAVE_PLACEMENT_NEW */ +#if U_HAVE_DEBUG_LOCATION_NEW + /** + * This method overrides the MFC debug version of the operator new + * + * @param size The requested memory size + * @param file The file where the allocation was requested + * @param line The line where the allocation was requested + */ + static void * U_EXPORT2 operator new(size_t size, const char* file, int line) U_NO_THROW; + /** + * This method provides a matching delete for the MFC debug new + * + * @param p The pointer to the allocated memory + * @param file The file where the allocation was requested + * @param line The line where the allocation was requested + */ + static void U_EXPORT2 operator delete(void* p, const char* file, int line) U_NO_THROW; +#endif /* U_HAVE_DEBUG_LOCATION_NEW */ +#endif /* U_OVERRIDE_CXX_ALLOCATION */ + + /* + * Assignment operator not declared. The compiler will provide one + * which does nothing since this class does not contain any data members. + * API/code coverage may show the assignment operator as present and + * untested - ignore. + * Subclasses need this assignment operator if they use compiler-provided + * assignment operators of their own. An alternative to not declaring one + * here would be to declare and empty-implement a protected or public one. + UMemory &UMemory::operator=(const UMemory &); + */ +}; + +/** + * UObject is the common ICU "boilerplate" class. + * UObject inherits UMemory (starting with ICU 2.4), + * and all other public ICU C++ classes + * are derived from UObject (starting with ICU 2.2). + * + * UObject contains common virtual functions, in particular a virtual destructor. + * + * The clone() function is not available in UObject because it is not + * implemented by all ICU classes. + * Many ICU services provide a clone() function for their class trees, + * defined on the service's C++ base class, and all subclasses within that + * service class tree return a pointer to the service base class + * (which itself is a subclass of UObject). + * This is because some compilers do not support covariant (same-as-this) + * return types; cast to the appropriate subclass if necessary. + * + * @stable ICU 2.2 + */ +class U_COMMON_API UObject : public UMemory { +public: + /** + * Destructor. + * + * @stable ICU 2.2 + */ + virtual ~UObject(); + + /** + * ICU4C "poor man's RTTI", returns a UClassID for the actual ICU class. + * The base class implementation returns a dummy value. + * + * Use compiler RTTI rather than ICU's "poor man's RTTI". + * Since ICU 4.6, new ICU C++ class hierarchies do not implement "poor man's RTTI". + * + * @stable ICU 2.2 + */ + virtual UClassID getDynamicClassID() const; + +protected: + // the following functions are protected to prevent instantiation and + // direct use of UObject itself + + // default constructor + // inline UObject() {} + + // copy constructor + // inline UObject(const UObject &other) {} + +#if 0 + // TODO Sometime in the future. Implement operator==(). + // (This comment inserted in 2.2) + // some or all of the following "boilerplate" functions may be made public + // in a future ICU4C release when all subclasses implement them + + // assignment operator + // (not virtual, see "Taligent's Guide to Designing Programs" pp.73..74) + // commented out because the implementation is the same as a compiler's default + // UObject &operator=(const UObject &other) { return *this; } + + // comparison operators + virtual inline UBool operator==(const UObject &other) const { return this==&other; } + inline UBool operator!=(const UObject &other) const { return !operator==(other); } + + // clone() commented out from the base class: + // some compilers do not support co-variant return types + // (i.e., subclasses would have to return UObject * as well, instead of SubClass *) + // see also UObject class documentation. + // virtual UObject *clone() const; +#endif + + /* + * Assignment operator not declared. The compiler will provide one + * which does nothing since this class does not contain any data members. + * API/code coverage may show the assignment operator as present and + * untested - ignore. + * Subclasses need this assignment operator if they use compiler-provided + * assignment operators of their own. An alternative to not declaring one + * here would be to declare and empty-implement a protected or public one. + UObject &UObject::operator=(const UObject &); + */ +}; + +#ifndef U_HIDE_INTERNAL_API +/** + * This is a simple macro to add ICU RTTI to an ICU object implementation. + * This does not go into the header. This should only be used in *.cpp files. + * + * @param myClass The name of the class that needs RTTI defined. + * @internal + */ +#define UOBJECT_DEFINE_RTTI_IMPLEMENTATION(myClass) \ + UClassID U_EXPORT2 myClass::getStaticClassID() { \ + static char classID = 0; \ + return (UClassID)&classID; \ + } \ + UClassID myClass::getDynamicClassID() const \ + { return myClass::getStaticClassID(); } + + +/** + * This macro adds ICU RTTI to an ICU abstract class implementation. + * This macro should be invoked in *.cpp files. The corresponding + * header should declare getStaticClassID. + * + * @param myClass The name of the class that needs RTTI defined. + * @internal + */ +#define UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(myClass) \ + UClassID U_EXPORT2 myClass::getStaticClassID() { \ + static char classID = 0; \ + return (UClassID)&classID; \ + } + +#endif /* U_HIDE_INTERNAL_API */ + +U_NAMESPACE_END + +#endif diff --git a/vendor/icu/include/unicode/urename.h b/vendor/icu/include/unicode/urename.h new file mode 100644 index 0000000000..5d6e4b077b --- /dev/null +++ b/vendor/icu/include/unicode/urename.h @@ -0,0 +1,1819 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2002-2016, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* +* file name: urename.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* Created by: Perl script tools/genren.pl written by Vladimir Weinstein +* +* Contains data for renaming ICU exports. +* Gets included by umachine.h +* +* THIS FILE IS MACHINE-GENERATED, DON'T PLAY WITH IT IF YOU DON'T KNOW WHAT +* YOU ARE DOING, OTHERWISE VERY BAD THINGS WILL HAPPEN! +*/ + +#ifndef URENAME_H +#define URENAME_H + +/* U_DISABLE_RENAMING can be defined in the following ways: + * - when running configure, e.g. + * runConfigureICU Linux --disable-renaming + * - by changing the default setting of U_DISABLE_RENAMING in uconfig.h + */ + +#include <unicode/uconfig.h> + +#if !U_DISABLE_RENAMING + +/* We need the U_ICU_ENTRY_POINT_RENAME definition. There's a default one in unicode/uvernum.h we can use, but we will give + the platform a chance to define it first. + Normally (if utypes.h or umachine.h was included first) this will not be necessary as it will already be defined. + */ + +#ifndef U_ICU_ENTRY_POINT_RENAME +#include <unicode/umachine.h> +#endif + +/* If we still don't have U_ICU_ENTRY_POINT_RENAME use the default. */ +#ifndef U_ICU_ENTRY_POINT_RENAME +#include <unicode/uvernum.h> +#endif + +/* Error out before the following defines cause very strange and unexpected code breakage */ +#ifndef U_ICU_ENTRY_POINT_RENAME +#error U_ICU_ENTRY_POINT_RENAME is not defined - cannot continue. Consider defining U_DISABLE_RENAMING if renaming should not be used. +#endif + + +/* C exports renaming data */ + +#define T_CString_int64ToString U_ICU_ENTRY_POINT_RENAME(T_CString_int64ToString) +#define T_CString_integerToString U_ICU_ENTRY_POINT_RENAME(T_CString_integerToString) +#define T_CString_stringToInteger U_ICU_ENTRY_POINT_RENAME(T_CString_stringToInteger) +#define T_CString_toLowerCase U_ICU_ENTRY_POINT_RENAME(T_CString_toLowerCase) +#define T_CString_toUpperCase U_ICU_ENTRY_POINT_RENAME(T_CString_toUpperCase) +#define UCNV_FROM_U_CALLBACK_ESCAPE U_ICU_ENTRY_POINT_RENAME(UCNV_FROM_U_CALLBACK_ESCAPE) +#define UCNV_FROM_U_CALLBACK_SKIP U_ICU_ENTRY_POINT_RENAME(UCNV_FROM_U_CALLBACK_SKIP) +#define UCNV_FROM_U_CALLBACK_STOP U_ICU_ENTRY_POINT_RENAME(UCNV_FROM_U_CALLBACK_STOP) +#define UCNV_FROM_U_CALLBACK_SUBSTITUTE U_ICU_ENTRY_POINT_RENAME(UCNV_FROM_U_CALLBACK_SUBSTITUTE) +#define UCNV_TO_U_CALLBACK_ESCAPE U_ICU_ENTRY_POINT_RENAME(UCNV_TO_U_CALLBACK_ESCAPE) +#define UCNV_TO_U_CALLBACK_SKIP U_ICU_ENTRY_POINT_RENAME(UCNV_TO_U_CALLBACK_SKIP) +#define UCNV_TO_U_CALLBACK_STOP U_ICU_ENTRY_POINT_RENAME(UCNV_TO_U_CALLBACK_STOP) +#define UCNV_TO_U_CALLBACK_SUBSTITUTE U_ICU_ENTRY_POINT_RENAME(UCNV_TO_U_CALLBACK_SUBSTITUTE) +#define UDataMemory_createNewInstance U_ICU_ENTRY_POINT_RENAME(UDataMemory_createNewInstance) +#define UDataMemory_init U_ICU_ENTRY_POINT_RENAME(UDataMemory_init) +#define UDataMemory_isLoaded U_ICU_ENTRY_POINT_RENAME(UDataMemory_isLoaded) +#define UDataMemory_normalizeDataPointer U_ICU_ENTRY_POINT_RENAME(UDataMemory_normalizeDataPointer) +#define UDataMemory_setData U_ICU_ENTRY_POINT_RENAME(UDataMemory_setData) +#define UDatamemory_assign U_ICU_ENTRY_POINT_RENAME(UDatamemory_assign) +#define _ASCIIData U_ICU_ENTRY_POINT_RENAME(_ASCIIData) +#define _Bocu1Data U_ICU_ENTRY_POINT_RENAME(_Bocu1Data) +#define _CESU8Data U_ICU_ENTRY_POINT_RENAME(_CESU8Data) +#define _CompoundTextData U_ICU_ENTRY_POINT_RENAME(_CompoundTextData) +#define _HZData U_ICU_ENTRY_POINT_RENAME(_HZData) +#define _IMAPData U_ICU_ENTRY_POINT_RENAME(_IMAPData) +#define _ISCIIData U_ICU_ENTRY_POINT_RENAME(_ISCIIData) +#define _ISO2022Data U_ICU_ENTRY_POINT_RENAME(_ISO2022Data) +#define _LMBCSData1 U_ICU_ENTRY_POINT_RENAME(_LMBCSData1) +#define _LMBCSData11 U_ICU_ENTRY_POINT_RENAME(_LMBCSData11) +#define _LMBCSData16 U_ICU_ENTRY_POINT_RENAME(_LMBCSData16) +#define _LMBCSData17 U_ICU_ENTRY_POINT_RENAME(_LMBCSData17) +#define _LMBCSData18 U_ICU_ENTRY_POINT_RENAME(_LMBCSData18) +#define _LMBCSData19 U_ICU_ENTRY_POINT_RENAME(_LMBCSData19) +#define _LMBCSData2 U_ICU_ENTRY_POINT_RENAME(_LMBCSData2) +#define _LMBCSData3 U_ICU_ENTRY_POINT_RENAME(_LMBCSData3) +#define _LMBCSData4 U_ICU_ENTRY_POINT_RENAME(_LMBCSData4) +#define _LMBCSData5 U_ICU_ENTRY_POINT_RENAME(_LMBCSData5) +#define _LMBCSData6 U_ICU_ENTRY_POINT_RENAME(_LMBCSData6) +#define _LMBCSData8 U_ICU_ENTRY_POINT_RENAME(_LMBCSData8) +#define _Latin1Data U_ICU_ENTRY_POINT_RENAME(_Latin1Data) +#define _MBCSData U_ICU_ENTRY_POINT_RENAME(_MBCSData) +#define _SCSUData U_ICU_ENTRY_POINT_RENAME(_SCSUData) +#define _UTF16BEData U_ICU_ENTRY_POINT_RENAME(_UTF16BEData) +#define _UTF16Data U_ICU_ENTRY_POINT_RENAME(_UTF16Data) +#define _UTF16LEData U_ICU_ENTRY_POINT_RENAME(_UTF16LEData) +#define _UTF16v2Data U_ICU_ENTRY_POINT_RENAME(_UTF16v2Data) +#define _UTF32BEData U_ICU_ENTRY_POINT_RENAME(_UTF32BEData) +#define _UTF32Data U_ICU_ENTRY_POINT_RENAME(_UTF32Data) +#define _UTF32LEData U_ICU_ENTRY_POINT_RENAME(_UTF32LEData) +#define _UTF7Data U_ICU_ENTRY_POINT_RENAME(_UTF7Data) +#define _UTF8Data U_ICU_ENTRY_POINT_RENAME(_UTF8Data) +#define allowedHourFormatsCleanup U_ICU_ENTRY_POINT_RENAME(allowedHourFormatsCleanup) +#define cmemory_cleanup U_ICU_ENTRY_POINT_RENAME(cmemory_cleanup) +#define dayPeriodRulesCleanup U_ICU_ENTRY_POINT_RENAME(dayPeriodRulesCleanup) +#define deleteAllowedHourFormats U_ICU_ENTRY_POINT_RENAME(deleteAllowedHourFormats) +#define gTimeZoneFilesInitOnce U_ICU_ENTRY_POINT_RENAME(gTimeZoneFilesInitOnce) +#define izrule_clone U_ICU_ENTRY_POINT_RENAME(izrule_clone) +#define izrule_close U_ICU_ENTRY_POINT_RENAME(izrule_close) +#define izrule_equals U_ICU_ENTRY_POINT_RENAME(izrule_equals) +#define izrule_getDSTSavings U_ICU_ENTRY_POINT_RENAME(izrule_getDSTSavings) +#define izrule_getDynamicClassID U_ICU_ENTRY_POINT_RENAME(izrule_getDynamicClassID) +#define izrule_getFinalStart U_ICU_ENTRY_POINT_RENAME(izrule_getFinalStart) +#define izrule_getFirstStart U_ICU_ENTRY_POINT_RENAME(izrule_getFirstStart) +#define izrule_getName U_ICU_ENTRY_POINT_RENAME(izrule_getName) +#define izrule_getNextStart U_ICU_ENTRY_POINT_RENAME(izrule_getNextStart) +#define izrule_getPreviousStart U_ICU_ENTRY_POINT_RENAME(izrule_getPreviousStart) +#define izrule_getRawOffset U_ICU_ENTRY_POINT_RENAME(izrule_getRawOffset) +#define izrule_getStaticClassID U_ICU_ENTRY_POINT_RENAME(izrule_getStaticClassID) +#define izrule_isEquivalentTo U_ICU_ENTRY_POINT_RENAME(izrule_isEquivalentTo) +#define izrule_open U_ICU_ENTRY_POINT_RENAME(izrule_open) +#define locale_getKeywords U_ICU_ENTRY_POINT_RENAME(locale_getKeywords) +#define locale_getKeywordsStart U_ICU_ENTRY_POINT_RENAME(locale_getKeywordsStart) +#define locale_get_default U_ICU_ENTRY_POINT_RENAME(locale_get_default) +#define locale_set_default U_ICU_ENTRY_POINT_RENAME(locale_set_default) +#define pl_addFontRun U_ICU_ENTRY_POINT_RENAME(pl_addFontRun) +#define pl_addLocaleRun U_ICU_ENTRY_POINT_RENAME(pl_addLocaleRun) +#define pl_addValueRun U_ICU_ENTRY_POINT_RENAME(pl_addValueRun) +#define pl_close U_ICU_ENTRY_POINT_RENAME(pl_close) +#define pl_closeFontRuns U_ICU_ENTRY_POINT_RENAME(pl_closeFontRuns) +#define pl_closeLine U_ICU_ENTRY_POINT_RENAME(pl_closeLine) +#define pl_closeLocaleRuns U_ICU_ENTRY_POINT_RENAME(pl_closeLocaleRuns) +#define pl_closeValueRuns U_ICU_ENTRY_POINT_RENAME(pl_closeValueRuns) +#define pl_countLineRuns U_ICU_ENTRY_POINT_RENAME(pl_countLineRuns) +#define pl_create U_ICU_ENTRY_POINT_RENAME(pl_create) +#define pl_getAscent U_ICU_ENTRY_POINT_RENAME(pl_getAscent) +#define pl_getDescent U_ICU_ENTRY_POINT_RENAME(pl_getDescent) +#define pl_getFontRunCount U_ICU_ENTRY_POINT_RENAME(pl_getFontRunCount) +#define pl_getFontRunFont U_ICU_ENTRY_POINT_RENAME(pl_getFontRunFont) +#define pl_getFontRunLastLimit U_ICU_ENTRY_POINT_RENAME(pl_getFontRunLastLimit) +#define pl_getFontRunLimit U_ICU_ENTRY_POINT_RENAME(pl_getFontRunLimit) +#define pl_getLeading U_ICU_ENTRY_POINT_RENAME(pl_getLeading) +#define pl_getLineAscent U_ICU_ENTRY_POINT_RENAME(pl_getLineAscent) +#define pl_getLineDescent U_ICU_ENTRY_POINT_RENAME(pl_getLineDescent) +#define pl_getLineLeading U_ICU_ENTRY_POINT_RENAME(pl_getLineLeading) +#define pl_getLineVisualRun U_ICU_ENTRY_POINT_RENAME(pl_getLineVisualRun) +#define pl_getLineWidth U_ICU_ENTRY_POINT_RENAME(pl_getLineWidth) +#define pl_getLocaleRunCount U_ICU_ENTRY_POINT_RENAME(pl_getLocaleRunCount) +#define pl_getLocaleRunLastLimit U_ICU_ENTRY_POINT_RENAME(pl_getLocaleRunLastLimit) +#define pl_getLocaleRunLimit U_ICU_ENTRY_POINT_RENAME(pl_getLocaleRunLimit) +#define pl_getLocaleRunLocale U_ICU_ENTRY_POINT_RENAME(pl_getLocaleRunLocale) +#define pl_getParagraphLevel U_ICU_ENTRY_POINT_RENAME(pl_getParagraphLevel) +#define pl_getTextDirection U_ICU_ENTRY_POINT_RENAME(pl_getTextDirection) +#define pl_getValueRunCount U_ICU_ENTRY_POINT_RENAME(pl_getValueRunCount) +#define pl_getValueRunLastLimit U_ICU_ENTRY_POINT_RENAME(pl_getValueRunLastLimit) +#define pl_getValueRunLimit U_ICU_ENTRY_POINT_RENAME(pl_getValueRunLimit) +#define pl_getValueRunValue U_ICU_ENTRY_POINT_RENAME(pl_getValueRunValue) +#define pl_getVisualRunAscent U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunAscent) +#define pl_getVisualRunDescent U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunDescent) +#define pl_getVisualRunDirection U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunDirection) +#define pl_getVisualRunFont U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunFont) +#define pl_getVisualRunGlyphCount U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunGlyphCount) +#define pl_getVisualRunGlyphToCharMap U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunGlyphToCharMap) +#define pl_getVisualRunGlyphs U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunGlyphs) +#define pl_getVisualRunLeading U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunLeading) +#define pl_getVisualRunPositions U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunPositions) +#define pl_isComplex U_ICU_ENTRY_POINT_RENAME(pl_isComplex) +#define pl_nextLine U_ICU_ENTRY_POINT_RENAME(pl_nextLine) +#define pl_openEmptyFontRuns U_ICU_ENTRY_POINT_RENAME(pl_openEmptyFontRuns) +#define pl_openEmptyLocaleRuns U_ICU_ENTRY_POINT_RENAME(pl_openEmptyLocaleRuns) +#define pl_openEmptyValueRuns U_ICU_ENTRY_POINT_RENAME(pl_openEmptyValueRuns) +#define pl_openFontRuns U_ICU_ENTRY_POINT_RENAME(pl_openFontRuns) +#define pl_openLocaleRuns U_ICU_ENTRY_POINT_RENAME(pl_openLocaleRuns) +#define pl_openValueRuns U_ICU_ENTRY_POINT_RENAME(pl_openValueRuns) +#define pl_reflow U_ICU_ENTRY_POINT_RENAME(pl_reflow) +#define pl_resetFontRuns U_ICU_ENTRY_POINT_RENAME(pl_resetFontRuns) +#define pl_resetLocaleRuns U_ICU_ENTRY_POINT_RENAME(pl_resetLocaleRuns) +#define pl_resetValueRuns U_ICU_ENTRY_POINT_RENAME(pl_resetValueRuns) +#define res_countArrayItems U_ICU_ENTRY_POINT_RENAME(res_countArrayItems) +#define res_findResource U_ICU_ENTRY_POINT_RENAME(res_findResource) +#define res_getAlias U_ICU_ENTRY_POINT_RENAME(res_getAlias) +#define res_getArrayItem U_ICU_ENTRY_POINT_RENAME(res_getArrayItem) +#define res_getBinary U_ICU_ENTRY_POINT_RENAME(res_getBinary) +#define res_getIntVector U_ICU_ENTRY_POINT_RENAME(res_getIntVector) +#define res_getPublicType U_ICU_ENTRY_POINT_RENAME(res_getPublicType) +#define res_getResource U_ICU_ENTRY_POINT_RENAME(res_getResource) +#define res_getString U_ICU_ENTRY_POINT_RENAME(res_getString) +#define res_getTableItemByIndex U_ICU_ENTRY_POINT_RENAME(res_getTableItemByIndex) +#define res_getTableItemByKey U_ICU_ENTRY_POINT_RENAME(res_getTableItemByKey) +#define res_load U_ICU_ENTRY_POINT_RENAME(res_load) +#define res_read U_ICU_ENTRY_POINT_RENAME(res_read) +#define res_unload U_ICU_ENTRY_POINT_RENAME(res_unload) +#define u_UCharsToChars U_ICU_ENTRY_POINT_RENAME(u_UCharsToChars) +#define u_austrcpy U_ICU_ENTRY_POINT_RENAME(u_austrcpy) +#define u_austrncpy U_ICU_ENTRY_POINT_RENAME(u_austrncpy) +#define u_caseInsensitivePrefixMatch U_ICU_ENTRY_POINT_RENAME(u_caseInsensitivePrefixMatch) +#define u_catclose U_ICU_ENTRY_POINT_RENAME(u_catclose) +#define u_catgets U_ICU_ENTRY_POINT_RENAME(u_catgets) +#define u_catopen U_ICU_ENTRY_POINT_RENAME(u_catopen) +#define u_charAge U_ICU_ENTRY_POINT_RENAME(u_charAge) +#define u_charDigitValue U_ICU_ENTRY_POINT_RENAME(u_charDigitValue) +#define u_charDirection U_ICU_ENTRY_POINT_RENAME(u_charDirection) +#define u_charFromName U_ICU_ENTRY_POINT_RENAME(u_charFromName) +#define u_charMirror U_ICU_ENTRY_POINT_RENAME(u_charMirror) +#define u_charName U_ICU_ENTRY_POINT_RENAME(u_charName) +#define u_charType U_ICU_ENTRY_POINT_RENAME(u_charType) +#define u_charsToUChars U_ICU_ENTRY_POINT_RENAME(u_charsToUChars) +#define u_cleanup U_ICU_ENTRY_POINT_RENAME(u_cleanup) +#define u_countChar32 U_ICU_ENTRY_POINT_RENAME(u_countChar32) +#define u_digit U_ICU_ENTRY_POINT_RENAME(u_digit) +#define u_enumCharNames U_ICU_ENTRY_POINT_RENAME(u_enumCharNames) +#define u_enumCharTypes U_ICU_ENTRY_POINT_RENAME(u_enumCharTypes) +#define u_errorName U_ICU_ENTRY_POINT_RENAME(u_errorName) +#define u_fadopt U_ICU_ENTRY_POINT_RENAME(u_fadopt) +#define u_fclose U_ICU_ENTRY_POINT_RENAME(u_fclose) +#define u_feof U_ICU_ENTRY_POINT_RENAME(u_feof) +#define u_fflush U_ICU_ENTRY_POINT_RENAME(u_fflush) +#define u_fgetConverter U_ICU_ENTRY_POINT_RENAME(u_fgetConverter) +#define u_fgetNumberFormat U_ICU_ENTRY_POINT_RENAME(u_fgetNumberFormat) +#define u_fgetc U_ICU_ENTRY_POINT_RENAME(u_fgetc) +#define u_fgetcodepage U_ICU_ENTRY_POINT_RENAME(u_fgetcodepage) +#define u_fgetcx U_ICU_ENTRY_POINT_RENAME(u_fgetcx) +#define u_fgetfile U_ICU_ENTRY_POINT_RENAME(u_fgetfile) +#define u_fgetlocale U_ICU_ENTRY_POINT_RENAME(u_fgetlocale) +#define u_fgets U_ICU_ENTRY_POINT_RENAME(u_fgets) +#define u_file_read U_ICU_ENTRY_POINT_RENAME(u_file_read) +#define u_file_write U_ICU_ENTRY_POINT_RENAME(u_file_write) +#define u_file_write_flush U_ICU_ENTRY_POINT_RENAME(u_file_write_flush) +#define u_finit U_ICU_ENTRY_POINT_RENAME(u_finit) +#define u_flushDefaultConverter U_ICU_ENTRY_POINT_RENAME(u_flushDefaultConverter) +#define u_foldCase U_ICU_ENTRY_POINT_RENAME(u_foldCase) +#define u_fopen U_ICU_ENTRY_POINT_RENAME(u_fopen) +#define u_fopen_u U_ICU_ENTRY_POINT_RENAME(u_fopen_u) +#define u_forDigit U_ICU_ENTRY_POINT_RENAME(u_forDigit) +#define u_formatMessage U_ICU_ENTRY_POINT_RENAME(u_formatMessage) +#define u_formatMessageWithError U_ICU_ENTRY_POINT_RENAME(u_formatMessageWithError) +#define u_fprintf U_ICU_ENTRY_POINT_RENAME(u_fprintf) +#define u_fprintf_u U_ICU_ENTRY_POINT_RENAME(u_fprintf_u) +#define u_fputc U_ICU_ENTRY_POINT_RENAME(u_fputc) +#define u_fputs U_ICU_ENTRY_POINT_RENAME(u_fputs) +#define u_frewind U_ICU_ENTRY_POINT_RENAME(u_frewind) +#define u_fscanf U_ICU_ENTRY_POINT_RENAME(u_fscanf) +#define u_fscanf_u U_ICU_ENTRY_POINT_RENAME(u_fscanf_u) +#define u_fsetcodepage U_ICU_ENTRY_POINT_RENAME(u_fsetcodepage) +#define u_fsetlocale U_ICU_ENTRY_POINT_RENAME(u_fsetlocale) +#define u_fsettransliterator U_ICU_ENTRY_POINT_RENAME(u_fsettransliterator) +#define u_fstropen U_ICU_ENTRY_POINT_RENAME(u_fstropen) +#define u_fungetc U_ICU_ENTRY_POINT_RENAME(u_fungetc) +#define u_getBidiPairedBracket U_ICU_ENTRY_POINT_RENAME(u_getBidiPairedBracket) +#define u_getCombiningClass U_ICU_ENTRY_POINT_RENAME(u_getCombiningClass) +#define u_getDataDirectory U_ICU_ENTRY_POINT_RENAME(u_getDataDirectory) +#define u_getDataVersion U_ICU_ENTRY_POINT_RENAME(u_getDataVersion) +#define u_getDefaultConverter U_ICU_ENTRY_POINT_RENAME(u_getDefaultConverter) +#define u_getFC_NFKC_Closure U_ICU_ENTRY_POINT_RENAME(u_getFC_NFKC_Closure) +#define u_getISOComment U_ICU_ENTRY_POINT_RENAME(u_getISOComment) +#define u_getIntPropertyMaxValue U_ICU_ENTRY_POINT_RENAME(u_getIntPropertyMaxValue) +#define u_getIntPropertyMinValue U_ICU_ENTRY_POINT_RENAME(u_getIntPropertyMinValue) +#define u_getIntPropertyValue U_ICU_ENTRY_POINT_RENAME(u_getIntPropertyValue) +#define u_getMainProperties U_ICU_ENTRY_POINT_RENAME(u_getMainProperties) +#define u_getNumericValue U_ICU_ENTRY_POINT_RENAME(u_getNumericValue) +#define u_getPropertyEnum U_ICU_ENTRY_POINT_RENAME(u_getPropertyEnum) +#define u_getPropertyName U_ICU_ENTRY_POINT_RENAME(u_getPropertyName) +#define u_getPropertyValueEnum U_ICU_ENTRY_POINT_RENAME(u_getPropertyValueEnum) +#define u_getPropertyValueName U_ICU_ENTRY_POINT_RENAME(u_getPropertyValueName) +#define u_getTimeZoneFilesDirectory U_ICU_ENTRY_POINT_RENAME(u_getTimeZoneFilesDirectory) +#define u_getUnicodeProperties U_ICU_ENTRY_POINT_RENAME(u_getUnicodeProperties) +#define u_getUnicodeVersion U_ICU_ENTRY_POINT_RENAME(u_getUnicodeVersion) +#define u_getVersion U_ICU_ENTRY_POINT_RENAME(u_getVersion) +#define u_get_stdout U_ICU_ENTRY_POINT_RENAME(u_get_stdout) +#define u_hasBinaryProperty U_ICU_ENTRY_POINT_RENAME(u_hasBinaryProperty) +#define u_init U_ICU_ENTRY_POINT_RENAME(u_init) +#define u_isIDIgnorable U_ICU_ENTRY_POINT_RENAME(u_isIDIgnorable) +#define u_isIDPart U_ICU_ENTRY_POINT_RENAME(u_isIDPart) +#define u_isIDStart U_ICU_ENTRY_POINT_RENAME(u_isIDStart) +#define u_isISOControl U_ICU_ENTRY_POINT_RENAME(u_isISOControl) +#define u_isJavaIDPart U_ICU_ENTRY_POINT_RENAME(u_isJavaIDPart) +#define u_isJavaIDStart U_ICU_ENTRY_POINT_RENAME(u_isJavaIDStart) +#define u_isJavaSpaceChar U_ICU_ENTRY_POINT_RENAME(u_isJavaSpaceChar) +#define u_isMirrored U_ICU_ENTRY_POINT_RENAME(u_isMirrored) +#define u_isUAlphabetic U_ICU_ENTRY_POINT_RENAME(u_isUAlphabetic) +#define u_isULowercase U_ICU_ENTRY_POINT_RENAME(u_isULowercase) +#define u_isUUppercase U_ICU_ENTRY_POINT_RENAME(u_isUUppercase) +#define u_isUWhiteSpace U_ICU_ENTRY_POINT_RENAME(u_isUWhiteSpace) +#define u_isWhitespace U_ICU_ENTRY_POINT_RENAME(u_isWhitespace) +#define u_isalnum U_ICU_ENTRY_POINT_RENAME(u_isalnum) +#define u_isalnumPOSIX U_ICU_ENTRY_POINT_RENAME(u_isalnumPOSIX) +#define u_isalpha U_ICU_ENTRY_POINT_RENAME(u_isalpha) +#define u_isbase U_ICU_ENTRY_POINT_RENAME(u_isbase) +#define u_isblank U_ICU_ENTRY_POINT_RENAME(u_isblank) +#define u_iscntrl U_ICU_ENTRY_POINT_RENAME(u_iscntrl) +#define u_isdefined U_ICU_ENTRY_POINT_RENAME(u_isdefined) +#define u_isdigit U_ICU_ENTRY_POINT_RENAME(u_isdigit) +#define u_isgraph U_ICU_ENTRY_POINT_RENAME(u_isgraph) +#define u_isgraphPOSIX U_ICU_ENTRY_POINT_RENAME(u_isgraphPOSIX) +#define u_islower U_ICU_ENTRY_POINT_RENAME(u_islower) +#define u_isprint U_ICU_ENTRY_POINT_RENAME(u_isprint) +#define u_isprintPOSIX U_ICU_ENTRY_POINT_RENAME(u_isprintPOSIX) +#define u_ispunct U_ICU_ENTRY_POINT_RENAME(u_ispunct) +#define u_isspace U_ICU_ENTRY_POINT_RENAME(u_isspace) +#define u_istitle U_ICU_ENTRY_POINT_RENAME(u_istitle) +#define u_isupper U_ICU_ENTRY_POINT_RENAME(u_isupper) +#define u_isxdigit U_ICU_ENTRY_POINT_RENAME(u_isxdigit) +#define u_locbund_close U_ICU_ENTRY_POINT_RENAME(u_locbund_close) +#define u_locbund_getNumberFormat U_ICU_ENTRY_POINT_RENAME(u_locbund_getNumberFormat) +#define u_locbund_init U_ICU_ENTRY_POINT_RENAME(u_locbund_init) +#define u_memcasecmp U_ICU_ENTRY_POINT_RENAME(u_memcasecmp) +#define u_memchr U_ICU_ENTRY_POINT_RENAME(u_memchr) +#define u_memchr32 U_ICU_ENTRY_POINT_RENAME(u_memchr32) +#define u_memcmp U_ICU_ENTRY_POINT_RENAME(u_memcmp) +#define u_memcmpCodePointOrder U_ICU_ENTRY_POINT_RENAME(u_memcmpCodePointOrder) +#define u_memcpy U_ICU_ENTRY_POINT_RENAME(u_memcpy) +#define u_memmove U_ICU_ENTRY_POINT_RENAME(u_memmove) +#define u_memrchr U_ICU_ENTRY_POINT_RENAME(u_memrchr) +#define u_memrchr32 U_ICU_ENTRY_POINT_RENAME(u_memrchr32) +#define u_memset U_ICU_ENTRY_POINT_RENAME(u_memset) +#define u_parseMessage U_ICU_ENTRY_POINT_RENAME(u_parseMessage) +#define u_parseMessageWithError U_ICU_ENTRY_POINT_RENAME(u_parseMessageWithError) +#define u_printf U_ICU_ENTRY_POINT_RENAME(u_printf) +#define u_printf_parse U_ICU_ENTRY_POINT_RENAME(u_printf_parse) +#define u_printf_u U_ICU_ENTRY_POINT_RENAME(u_printf_u) +#define u_releaseDefaultConverter U_ICU_ENTRY_POINT_RENAME(u_releaseDefaultConverter) +#define u_scanf_parse U_ICU_ENTRY_POINT_RENAME(u_scanf_parse) +#define u_setAtomicIncDecFunctions U_ICU_ENTRY_POINT_RENAME(u_setAtomicIncDecFunctions) +#define u_setDataDirectory U_ICU_ENTRY_POINT_RENAME(u_setDataDirectory) +#define u_setMemoryFunctions U_ICU_ENTRY_POINT_RENAME(u_setMemoryFunctions) +#define u_setMutexFunctions U_ICU_ENTRY_POINT_RENAME(u_setMutexFunctions) +#define u_setTimeZoneFilesDirectory U_ICU_ENTRY_POINT_RENAME(u_setTimeZoneFilesDirectory) +#define u_shapeArabic U_ICU_ENTRY_POINT_RENAME(u_shapeArabic) +#define u_snprintf U_ICU_ENTRY_POINT_RENAME(u_snprintf) +#define u_snprintf_u U_ICU_ENTRY_POINT_RENAME(u_snprintf_u) +#define u_sprintf U_ICU_ENTRY_POINT_RENAME(u_sprintf) +#define u_sprintf_u U_ICU_ENTRY_POINT_RENAME(u_sprintf_u) +#define u_sscanf U_ICU_ENTRY_POINT_RENAME(u_sscanf) +#define u_sscanf_u U_ICU_ENTRY_POINT_RENAME(u_sscanf_u) +#define u_strCaseCompare U_ICU_ENTRY_POINT_RENAME(u_strCaseCompare) +#define u_strCompare U_ICU_ENTRY_POINT_RENAME(u_strCompare) +#define u_strCompareIter U_ICU_ENTRY_POINT_RENAME(u_strCompareIter) +#define u_strFindFirst U_ICU_ENTRY_POINT_RENAME(u_strFindFirst) +#define u_strFindLast U_ICU_ENTRY_POINT_RENAME(u_strFindLast) +#define u_strFoldCase U_ICU_ENTRY_POINT_RENAME(u_strFoldCase) +#define u_strFromJavaModifiedUTF8WithSub U_ICU_ENTRY_POINT_RENAME(u_strFromJavaModifiedUTF8WithSub) +#define u_strFromPunycode U_ICU_ENTRY_POINT_RENAME(u_strFromPunycode) +#define u_strFromUTF32 U_ICU_ENTRY_POINT_RENAME(u_strFromUTF32) +#define u_strFromUTF32WithSub U_ICU_ENTRY_POINT_RENAME(u_strFromUTF32WithSub) +#define u_strFromUTF8 U_ICU_ENTRY_POINT_RENAME(u_strFromUTF8) +#define u_strFromUTF8Lenient U_ICU_ENTRY_POINT_RENAME(u_strFromUTF8Lenient) +#define u_strFromUTF8WithSub U_ICU_ENTRY_POINT_RENAME(u_strFromUTF8WithSub) +#define u_strFromWCS U_ICU_ENTRY_POINT_RENAME(u_strFromWCS) +#define u_strHasMoreChar32Than U_ICU_ENTRY_POINT_RENAME(u_strHasMoreChar32Than) +#define u_strToJavaModifiedUTF8 U_ICU_ENTRY_POINT_RENAME(u_strToJavaModifiedUTF8) +#define u_strToLower U_ICU_ENTRY_POINT_RENAME(u_strToLower) +#define u_strToPunycode U_ICU_ENTRY_POINT_RENAME(u_strToPunycode) +#define u_strToTitle U_ICU_ENTRY_POINT_RENAME(u_strToTitle) +#define u_strToUTF32 U_ICU_ENTRY_POINT_RENAME(u_strToUTF32) +#define u_strToUTF32WithSub U_ICU_ENTRY_POINT_RENAME(u_strToUTF32WithSub) +#define u_strToUTF8 U_ICU_ENTRY_POINT_RENAME(u_strToUTF8) +#define u_strToUTF8WithSub U_ICU_ENTRY_POINT_RENAME(u_strToUTF8WithSub) +#define u_strToUpper U_ICU_ENTRY_POINT_RENAME(u_strToUpper) +#define u_strToWCS U_ICU_ENTRY_POINT_RENAME(u_strToWCS) +#define u_strcasecmp U_ICU_ENTRY_POINT_RENAME(u_strcasecmp) +#define u_strcat U_ICU_ENTRY_POINT_RENAME(u_strcat) +#define u_strchr U_ICU_ENTRY_POINT_RENAME(u_strchr) +#define u_strchr32 U_ICU_ENTRY_POINT_RENAME(u_strchr32) +#define u_strcmp U_ICU_ENTRY_POINT_RENAME(u_strcmp) +#define u_strcmpCodePointOrder U_ICU_ENTRY_POINT_RENAME(u_strcmpCodePointOrder) +#define u_strcmpFold U_ICU_ENTRY_POINT_RENAME(u_strcmpFold) +#define u_strcpy U_ICU_ENTRY_POINT_RENAME(u_strcpy) +#define u_strcspn U_ICU_ENTRY_POINT_RENAME(u_strcspn) +#define u_strlen U_ICU_ENTRY_POINT_RENAME(u_strlen) +#define u_strncasecmp U_ICU_ENTRY_POINT_RENAME(u_strncasecmp) +#define u_strncat U_ICU_ENTRY_POINT_RENAME(u_strncat) +#define u_strncmp U_ICU_ENTRY_POINT_RENAME(u_strncmp) +#define u_strncmpCodePointOrder U_ICU_ENTRY_POINT_RENAME(u_strncmpCodePointOrder) +#define u_strncpy U_ICU_ENTRY_POINT_RENAME(u_strncpy) +#define u_strpbrk U_ICU_ENTRY_POINT_RENAME(u_strpbrk) +#define u_strrchr U_ICU_ENTRY_POINT_RENAME(u_strrchr) +#define u_strrchr32 U_ICU_ENTRY_POINT_RENAME(u_strrchr32) +#define u_strrstr U_ICU_ENTRY_POINT_RENAME(u_strrstr) +#define u_strspn U_ICU_ENTRY_POINT_RENAME(u_strspn) +#define u_strstr U_ICU_ENTRY_POINT_RENAME(u_strstr) +#define u_strtok_r U_ICU_ENTRY_POINT_RENAME(u_strtok_r) +#define u_terminateChars U_ICU_ENTRY_POINT_RENAME(u_terminateChars) +#define u_terminateUChar32s U_ICU_ENTRY_POINT_RENAME(u_terminateUChar32s) +#define u_terminateUChars U_ICU_ENTRY_POINT_RENAME(u_terminateUChars) +#define u_terminateWChars U_ICU_ENTRY_POINT_RENAME(u_terminateWChars) +#define u_tolower U_ICU_ENTRY_POINT_RENAME(u_tolower) +#define u_totitle U_ICU_ENTRY_POINT_RENAME(u_totitle) +#define u_toupper U_ICU_ENTRY_POINT_RENAME(u_toupper) +#define u_uastrcpy U_ICU_ENTRY_POINT_RENAME(u_uastrcpy) +#define u_uastrncpy U_ICU_ENTRY_POINT_RENAME(u_uastrncpy) +#define u_unescape U_ICU_ENTRY_POINT_RENAME(u_unescape) +#define u_unescapeAt U_ICU_ENTRY_POINT_RENAME(u_unescapeAt) +#define u_versionFromString U_ICU_ENTRY_POINT_RENAME(u_versionFromString) +#define u_versionFromUString U_ICU_ENTRY_POINT_RENAME(u_versionFromUString) +#define u_versionToString U_ICU_ENTRY_POINT_RENAME(u_versionToString) +#define u_vformatMessage U_ICU_ENTRY_POINT_RENAME(u_vformatMessage) +#define u_vformatMessageWithError U_ICU_ENTRY_POINT_RENAME(u_vformatMessageWithError) +#define u_vfprintf U_ICU_ENTRY_POINT_RENAME(u_vfprintf) +#define u_vfprintf_u U_ICU_ENTRY_POINT_RENAME(u_vfprintf_u) +#define u_vfscanf U_ICU_ENTRY_POINT_RENAME(u_vfscanf) +#define u_vfscanf_u U_ICU_ENTRY_POINT_RENAME(u_vfscanf_u) +#define u_vparseMessage U_ICU_ENTRY_POINT_RENAME(u_vparseMessage) +#define u_vparseMessageWithError U_ICU_ENTRY_POINT_RENAME(u_vparseMessageWithError) +#define u_vsnprintf U_ICU_ENTRY_POINT_RENAME(u_vsnprintf) +#define u_vsnprintf_u U_ICU_ENTRY_POINT_RENAME(u_vsnprintf_u) +#define u_vsprintf U_ICU_ENTRY_POINT_RENAME(u_vsprintf) +#define u_vsprintf_u U_ICU_ENTRY_POINT_RENAME(u_vsprintf_u) +#define u_vsscanf U_ICU_ENTRY_POINT_RENAME(u_vsscanf) +#define u_vsscanf_u U_ICU_ENTRY_POINT_RENAME(u_vsscanf_u) +#define u_writeIdenticalLevelRun U_ICU_ENTRY_POINT_RENAME(u_writeIdenticalLevelRun) +#define ubidi_addPropertyStarts U_ICU_ENTRY_POINT_RENAME(ubidi_addPropertyStarts) +#define ubidi_close U_ICU_ENTRY_POINT_RENAME(ubidi_close) +#define ubidi_countParagraphs U_ICU_ENTRY_POINT_RENAME(ubidi_countParagraphs) +#define ubidi_countRuns U_ICU_ENTRY_POINT_RENAME(ubidi_countRuns) +#define ubidi_getBaseDirection U_ICU_ENTRY_POINT_RENAME(ubidi_getBaseDirection) +#define ubidi_getClass U_ICU_ENTRY_POINT_RENAME(ubidi_getClass) +#define ubidi_getClassCallback U_ICU_ENTRY_POINT_RENAME(ubidi_getClassCallback) +#define ubidi_getCustomizedClass U_ICU_ENTRY_POINT_RENAME(ubidi_getCustomizedClass) +#define ubidi_getDirection U_ICU_ENTRY_POINT_RENAME(ubidi_getDirection) +#define ubidi_getJoiningGroup U_ICU_ENTRY_POINT_RENAME(ubidi_getJoiningGroup) +#define ubidi_getJoiningType U_ICU_ENTRY_POINT_RENAME(ubidi_getJoiningType) +#define ubidi_getLength U_ICU_ENTRY_POINT_RENAME(ubidi_getLength) +#define ubidi_getLevelAt U_ICU_ENTRY_POINT_RENAME(ubidi_getLevelAt) +#define ubidi_getLevels U_ICU_ENTRY_POINT_RENAME(ubidi_getLevels) +#define ubidi_getLogicalIndex U_ICU_ENTRY_POINT_RENAME(ubidi_getLogicalIndex) +#define ubidi_getLogicalMap U_ICU_ENTRY_POINT_RENAME(ubidi_getLogicalMap) +#define ubidi_getLogicalRun U_ICU_ENTRY_POINT_RENAME(ubidi_getLogicalRun) +#define ubidi_getMaxValue U_ICU_ENTRY_POINT_RENAME(ubidi_getMaxValue) +#define ubidi_getMemory U_ICU_ENTRY_POINT_RENAME(ubidi_getMemory) +#define ubidi_getMirror U_ICU_ENTRY_POINT_RENAME(ubidi_getMirror) +#define ubidi_getPairedBracket U_ICU_ENTRY_POINT_RENAME(ubidi_getPairedBracket) +#define ubidi_getPairedBracketType U_ICU_ENTRY_POINT_RENAME(ubidi_getPairedBracketType) +#define ubidi_getParaLevel U_ICU_ENTRY_POINT_RENAME(ubidi_getParaLevel) +#define ubidi_getParaLevelAtIndex U_ICU_ENTRY_POINT_RENAME(ubidi_getParaLevelAtIndex) +#define ubidi_getParagraph U_ICU_ENTRY_POINT_RENAME(ubidi_getParagraph) +#define ubidi_getParagraphByIndex U_ICU_ENTRY_POINT_RENAME(ubidi_getParagraphByIndex) +#define ubidi_getProcessedLength U_ICU_ENTRY_POINT_RENAME(ubidi_getProcessedLength) +#define ubidi_getReorderingMode U_ICU_ENTRY_POINT_RENAME(ubidi_getReorderingMode) +#define ubidi_getReorderingOptions U_ICU_ENTRY_POINT_RENAME(ubidi_getReorderingOptions) +#define ubidi_getResultLength U_ICU_ENTRY_POINT_RENAME(ubidi_getResultLength) +#define ubidi_getRuns U_ICU_ENTRY_POINT_RENAME(ubidi_getRuns) +#define ubidi_getText U_ICU_ENTRY_POINT_RENAME(ubidi_getText) +#define ubidi_getVisualIndex U_ICU_ENTRY_POINT_RENAME(ubidi_getVisualIndex) +#define ubidi_getVisualMap U_ICU_ENTRY_POINT_RENAME(ubidi_getVisualMap) +#define ubidi_getVisualRun U_ICU_ENTRY_POINT_RENAME(ubidi_getVisualRun) +#define ubidi_invertMap U_ICU_ENTRY_POINT_RENAME(ubidi_invertMap) +#define ubidi_isBidiControl U_ICU_ENTRY_POINT_RENAME(ubidi_isBidiControl) +#define ubidi_isInverse U_ICU_ENTRY_POINT_RENAME(ubidi_isInverse) +#define ubidi_isJoinControl U_ICU_ENTRY_POINT_RENAME(ubidi_isJoinControl) +#define ubidi_isMirrored U_ICU_ENTRY_POINT_RENAME(ubidi_isMirrored) +#define ubidi_isOrderParagraphsLTR U_ICU_ENTRY_POINT_RENAME(ubidi_isOrderParagraphsLTR) +#define ubidi_open U_ICU_ENTRY_POINT_RENAME(ubidi_open) +#define ubidi_openSized U_ICU_ENTRY_POINT_RENAME(ubidi_openSized) +#define ubidi_orderParagraphsLTR U_ICU_ENTRY_POINT_RENAME(ubidi_orderParagraphsLTR) +#define ubidi_reorderLogical U_ICU_ENTRY_POINT_RENAME(ubidi_reorderLogical) +#define ubidi_reorderVisual U_ICU_ENTRY_POINT_RENAME(ubidi_reorderVisual) +#define ubidi_setClassCallback U_ICU_ENTRY_POINT_RENAME(ubidi_setClassCallback) +#define ubidi_setContext U_ICU_ENTRY_POINT_RENAME(ubidi_setContext) +#define ubidi_setInverse U_ICU_ENTRY_POINT_RENAME(ubidi_setInverse) +#define ubidi_setLine U_ICU_ENTRY_POINT_RENAME(ubidi_setLine) +#define ubidi_setPara U_ICU_ENTRY_POINT_RENAME(ubidi_setPara) +#define ubidi_setReorderingMode U_ICU_ENTRY_POINT_RENAME(ubidi_setReorderingMode) +#define ubidi_setReorderingOptions U_ICU_ENTRY_POINT_RENAME(ubidi_setReorderingOptions) +#define ubidi_writeReordered U_ICU_ENTRY_POINT_RENAME(ubidi_writeReordered) +#define ubidi_writeReverse U_ICU_ENTRY_POINT_RENAME(ubidi_writeReverse) +#define ubiditransform_close U_ICU_ENTRY_POINT_RENAME(ubiditransform_close) +#define ubiditransform_open U_ICU_ENTRY_POINT_RENAME(ubiditransform_open) +#define ubiditransform_transform U_ICU_ENTRY_POINT_RENAME(ubiditransform_transform) +#define ublock_getCode U_ICU_ENTRY_POINT_RENAME(ublock_getCode) +#define ubrk_close U_ICU_ENTRY_POINT_RENAME(ubrk_close) +#define ubrk_countAvailable U_ICU_ENTRY_POINT_RENAME(ubrk_countAvailable) +#define ubrk_current U_ICU_ENTRY_POINT_RENAME(ubrk_current) +#define ubrk_first U_ICU_ENTRY_POINT_RENAME(ubrk_first) +#define ubrk_following U_ICU_ENTRY_POINT_RENAME(ubrk_following) +#define ubrk_getAvailable U_ICU_ENTRY_POINT_RENAME(ubrk_getAvailable) +#define ubrk_getBinaryRules U_ICU_ENTRY_POINT_RENAME(ubrk_getBinaryRules) +#define ubrk_getLocaleByType U_ICU_ENTRY_POINT_RENAME(ubrk_getLocaleByType) +#define ubrk_getRuleStatus U_ICU_ENTRY_POINT_RENAME(ubrk_getRuleStatus) +#define ubrk_getRuleStatusVec U_ICU_ENTRY_POINT_RENAME(ubrk_getRuleStatusVec) +#define ubrk_isBoundary U_ICU_ENTRY_POINT_RENAME(ubrk_isBoundary) +#define ubrk_last U_ICU_ENTRY_POINT_RENAME(ubrk_last) +#define ubrk_next U_ICU_ENTRY_POINT_RENAME(ubrk_next) +#define ubrk_open U_ICU_ENTRY_POINT_RENAME(ubrk_open) +#define ubrk_openBinaryRules U_ICU_ENTRY_POINT_RENAME(ubrk_openBinaryRules) +#define ubrk_openRules U_ICU_ENTRY_POINT_RENAME(ubrk_openRules) +#define ubrk_preceding U_ICU_ENTRY_POINT_RENAME(ubrk_preceding) +#define ubrk_previous U_ICU_ENTRY_POINT_RENAME(ubrk_previous) +#define ubrk_refreshUText U_ICU_ENTRY_POINT_RENAME(ubrk_refreshUText) +#define ubrk_safeClone U_ICU_ENTRY_POINT_RENAME(ubrk_safeClone) +#define ubrk_setText U_ICU_ENTRY_POINT_RENAME(ubrk_setText) +#define ubrk_setUText U_ICU_ENTRY_POINT_RENAME(ubrk_setUText) +#define ubrk_swap U_ICU_ENTRY_POINT_RENAME(ubrk_swap) +#define ucache_compareKeys U_ICU_ENTRY_POINT_RENAME(ucache_compareKeys) +#define ucache_deleteKey U_ICU_ENTRY_POINT_RENAME(ucache_deleteKey) +#define ucache_hashKeys U_ICU_ENTRY_POINT_RENAME(ucache_hashKeys) +#define ucal_add U_ICU_ENTRY_POINT_RENAME(ucal_add) +#define ucal_clear U_ICU_ENTRY_POINT_RENAME(ucal_clear) +#define ucal_clearField U_ICU_ENTRY_POINT_RENAME(ucal_clearField) +#define ucal_clone U_ICU_ENTRY_POINT_RENAME(ucal_clone) +#define ucal_close U_ICU_ENTRY_POINT_RENAME(ucal_close) +#define ucal_countAvailable U_ICU_ENTRY_POINT_RENAME(ucal_countAvailable) +#define ucal_equivalentTo U_ICU_ENTRY_POINT_RENAME(ucal_equivalentTo) +#define ucal_get U_ICU_ENTRY_POINT_RENAME(ucal_get) +#define ucal_getAttribute U_ICU_ENTRY_POINT_RENAME(ucal_getAttribute) +#define ucal_getAvailable U_ICU_ENTRY_POINT_RENAME(ucal_getAvailable) +#define ucal_getCanonicalTimeZoneID U_ICU_ENTRY_POINT_RENAME(ucal_getCanonicalTimeZoneID) +#define ucal_getDSTSavings U_ICU_ENTRY_POINT_RENAME(ucal_getDSTSavings) +#define ucal_getDayOfWeekType U_ICU_ENTRY_POINT_RENAME(ucal_getDayOfWeekType) +#define ucal_getDefaultTimeZone U_ICU_ENTRY_POINT_RENAME(ucal_getDefaultTimeZone) +#define ucal_getFieldDifference U_ICU_ENTRY_POINT_RENAME(ucal_getFieldDifference) +#define ucal_getGregorianChange U_ICU_ENTRY_POINT_RENAME(ucal_getGregorianChange) +#define ucal_getKeywordValuesForLocale U_ICU_ENTRY_POINT_RENAME(ucal_getKeywordValuesForLocale) +#define ucal_getLimit U_ICU_ENTRY_POINT_RENAME(ucal_getLimit) +#define ucal_getLocaleByType U_ICU_ENTRY_POINT_RENAME(ucal_getLocaleByType) +#define ucal_getMillis U_ICU_ENTRY_POINT_RENAME(ucal_getMillis) +#define ucal_getNow U_ICU_ENTRY_POINT_RENAME(ucal_getNow) +#define ucal_getTZDataVersion U_ICU_ENTRY_POINT_RENAME(ucal_getTZDataVersion) +#define ucal_getTimeZoneDisplayName U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneDisplayName) +#define ucal_getTimeZoneID U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneID) +#define ucal_getTimeZoneIDForWindowsID U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneIDForWindowsID) +#define ucal_getTimeZoneTransitionDate U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneTransitionDate) +#define ucal_getType U_ICU_ENTRY_POINT_RENAME(ucal_getType) +#define ucal_getWeekendTransition U_ICU_ENTRY_POINT_RENAME(ucal_getWeekendTransition) +#define ucal_getWindowsTimeZoneID U_ICU_ENTRY_POINT_RENAME(ucal_getWindowsTimeZoneID) +#define ucal_inDaylightTime U_ICU_ENTRY_POINT_RENAME(ucal_inDaylightTime) +#define ucal_isSet U_ICU_ENTRY_POINT_RENAME(ucal_isSet) +#define ucal_isWeekend U_ICU_ENTRY_POINT_RENAME(ucal_isWeekend) +#define ucal_open U_ICU_ENTRY_POINT_RENAME(ucal_open) +#define ucal_openCountryTimeZones U_ICU_ENTRY_POINT_RENAME(ucal_openCountryTimeZones) +#define ucal_openTimeZoneIDEnumeration U_ICU_ENTRY_POINT_RENAME(ucal_openTimeZoneIDEnumeration) +#define ucal_openTimeZones U_ICU_ENTRY_POINT_RENAME(ucal_openTimeZones) +#define ucal_roll U_ICU_ENTRY_POINT_RENAME(ucal_roll) +#define ucal_set U_ICU_ENTRY_POINT_RENAME(ucal_set) +#define ucal_setAttribute U_ICU_ENTRY_POINT_RENAME(ucal_setAttribute) +#define ucal_setDate U_ICU_ENTRY_POINT_RENAME(ucal_setDate) +#define ucal_setDateTime U_ICU_ENTRY_POINT_RENAME(ucal_setDateTime) +#define ucal_setDefaultTimeZone U_ICU_ENTRY_POINT_RENAME(ucal_setDefaultTimeZone) +#define ucal_setGregorianChange U_ICU_ENTRY_POINT_RENAME(ucal_setGregorianChange) +#define ucal_setMillis U_ICU_ENTRY_POINT_RENAME(ucal_setMillis) +#define ucal_setTimeZone U_ICU_ENTRY_POINT_RENAME(ucal_setTimeZone) +#define ucase_addCaseClosure U_ICU_ENTRY_POINT_RENAME(ucase_addCaseClosure) +#define ucase_addPropertyStarts U_ICU_ENTRY_POINT_RENAME(ucase_addPropertyStarts) +#define ucase_addStringCaseClosure U_ICU_ENTRY_POINT_RENAME(ucase_addStringCaseClosure) +#define ucase_fold U_ICU_ENTRY_POINT_RENAME(ucase_fold) +#define ucase_getCaseLocale U_ICU_ENTRY_POINT_RENAME(ucase_getCaseLocale) +#define ucase_getTrie U_ICU_ENTRY_POINT_RENAME(ucase_getTrie) +#define ucase_getType U_ICU_ENTRY_POINT_RENAME(ucase_getType) +#define ucase_getTypeOrIgnorable U_ICU_ENTRY_POINT_RENAME(ucase_getTypeOrIgnorable) +#define ucase_hasBinaryProperty U_ICU_ENTRY_POINT_RENAME(ucase_hasBinaryProperty) +#define ucase_isCaseSensitive U_ICU_ENTRY_POINT_RENAME(ucase_isCaseSensitive) +#define ucase_isSoftDotted U_ICU_ENTRY_POINT_RENAME(ucase_isSoftDotted) +#define ucase_toFullFolding U_ICU_ENTRY_POINT_RENAME(ucase_toFullFolding) +#define ucase_toFullLower U_ICU_ENTRY_POINT_RENAME(ucase_toFullLower) +#define ucase_toFullTitle U_ICU_ENTRY_POINT_RENAME(ucase_toFullTitle) +#define ucase_toFullUpper U_ICU_ENTRY_POINT_RENAME(ucase_toFullUpper) +#define ucase_tolower U_ICU_ENTRY_POINT_RENAME(ucase_tolower) +#define ucase_totitle U_ICU_ENTRY_POINT_RENAME(ucase_totitle) +#define ucase_toupper U_ICU_ENTRY_POINT_RENAME(ucase_toupper) +#define ucasemap_close U_ICU_ENTRY_POINT_RENAME(ucasemap_close) +#define ucasemap_getBreakIterator U_ICU_ENTRY_POINT_RENAME(ucasemap_getBreakIterator) +#define ucasemap_getLocale U_ICU_ENTRY_POINT_RENAME(ucasemap_getLocale) +#define ucasemap_getOptions U_ICU_ENTRY_POINT_RENAME(ucasemap_getOptions) +#define ucasemap_internalUTF8ToTitle U_ICU_ENTRY_POINT_RENAME(ucasemap_internalUTF8ToTitle) +#define ucasemap_mapUTF8 U_ICU_ENTRY_POINT_RENAME(ucasemap_mapUTF8) +#define ucasemap_open U_ICU_ENTRY_POINT_RENAME(ucasemap_open) +#define ucasemap_setBreakIterator U_ICU_ENTRY_POINT_RENAME(ucasemap_setBreakIterator) +#define ucasemap_setLocale U_ICU_ENTRY_POINT_RENAME(ucasemap_setLocale) +#define ucasemap_setOptions U_ICU_ENTRY_POINT_RENAME(ucasemap_setOptions) +#define ucasemap_toTitle U_ICU_ENTRY_POINT_RENAME(ucasemap_toTitle) +#define ucasemap_utf8FoldCase U_ICU_ENTRY_POINT_RENAME(ucasemap_utf8FoldCase) +#define ucasemap_utf8ToLower U_ICU_ENTRY_POINT_RENAME(ucasemap_utf8ToLower) +#define ucasemap_utf8ToTitle U_ICU_ENTRY_POINT_RENAME(ucasemap_utf8ToTitle) +#define ucasemap_utf8ToUpper U_ICU_ENTRY_POINT_RENAME(ucasemap_utf8ToUpper) +#define uchar_addPropertyStarts U_ICU_ENTRY_POINT_RENAME(uchar_addPropertyStarts) +#define uchar_swapNames U_ICU_ENTRY_POINT_RENAME(uchar_swapNames) +#define ucln_cleanupOne U_ICU_ENTRY_POINT_RENAME(ucln_cleanupOne) +#define ucln_common_registerCleanup U_ICU_ENTRY_POINT_RENAME(ucln_common_registerCleanup) +#define ucln_i18n_registerCleanup U_ICU_ENTRY_POINT_RENAME(ucln_i18n_registerCleanup) +#define ucln_io_registerCleanup U_ICU_ENTRY_POINT_RENAME(ucln_io_registerCleanup) +#define ucln_lib_cleanup U_ICU_ENTRY_POINT_RENAME(ucln_lib_cleanup) +#define ucln_registerCleanup U_ICU_ENTRY_POINT_RENAME(ucln_registerCleanup) +#define ucnv_MBCSFromUChar32 U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSFromUChar32) +#define ucnv_MBCSFromUnicodeWithOffsets U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSFromUnicodeWithOffsets) +#define ucnv_MBCSGetFilteredUnicodeSetForUnicode U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSGetFilteredUnicodeSetForUnicode) +#define ucnv_MBCSGetType U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSGetType) +#define ucnv_MBCSGetUnicodeSetForUnicode U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSGetUnicodeSetForUnicode) +#define ucnv_MBCSIsLeadByte U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSIsLeadByte) +#define ucnv_MBCSSimpleGetNextUChar U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSSimpleGetNextUChar) +#define ucnv_MBCSToUnicodeWithOffsets U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSToUnicodeWithOffsets) +#define ucnv_bld_countAvailableConverters U_ICU_ENTRY_POINT_RENAME(ucnv_bld_countAvailableConverters) +#define ucnv_bld_getAvailableConverter U_ICU_ENTRY_POINT_RENAME(ucnv_bld_getAvailableConverter) +#define ucnv_canCreateConverter U_ICU_ENTRY_POINT_RENAME(ucnv_canCreateConverter) +#define ucnv_cbFromUWriteBytes U_ICU_ENTRY_POINT_RENAME(ucnv_cbFromUWriteBytes) +#define ucnv_cbFromUWriteSub U_ICU_ENTRY_POINT_RENAME(ucnv_cbFromUWriteSub) +#define ucnv_cbFromUWriteUChars U_ICU_ENTRY_POINT_RENAME(ucnv_cbFromUWriteUChars) +#define ucnv_cbToUWriteSub U_ICU_ENTRY_POINT_RENAME(ucnv_cbToUWriteSub) +#define ucnv_cbToUWriteUChars U_ICU_ENTRY_POINT_RENAME(ucnv_cbToUWriteUChars) +#define ucnv_close U_ICU_ENTRY_POINT_RENAME(ucnv_close) +#define ucnv_compareNames U_ICU_ENTRY_POINT_RENAME(ucnv_compareNames) +#define ucnv_convert U_ICU_ENTRY_POINT_RENAME(ucnv_convert) +#define ucnv_convertEx U_ICU_ENTRY_POINT_RENAME(ucnv_convertEx) +#define ucnv_countAliases U_ICU_ENTRY_POINT_RENAME(ucnv_countAliases) +#define ucnv_countAvailable U_ICU_ENTRY_POINT_RENAME(ucnv_countAvailable) +#define ucnv_countStandards U_ICU_ENTRY_POINT_RENAME(ucnv_countStandards) +#define ucnv_createAlgorithmicConverter U_ICU_ENTRY_POINT_RENAME(ucnv_createAlgorithmicConverter) +#define ucnv_createConverter U_ICU_ENTRY_POINT_RENAME(ucnv_createConverter) +#define ucnv_createConverterFromPackage U_ICU_ENTRY_POINT_RENAME(ucnv_createConverterFromPackage) +#define ucnv_createConverterFromSharedData U_ICU_ENTRY_POINT_RENAME(ucnv_createConverterFromSharedData) +#define ucnv_detectUnicodeSignature U_ICU_ENTRY_POINT_RENAME(ucnv_detectUnicodeSignature) +#define ucnv_extContinueMatchFromU U_ICU_ENTRY_POINT_RENAME(ucnv_extContinueMatchFromU) +#define ucnv_extContinueMatchToU U_ICU_ENTRY_POINT_RENAME(ucnv_extContinueMatchToU) +#define ucnv_extGetUnicodeSet U_ICU_ENTRY_POINT_RENAME(ucnv_extGetUnicodeSet) +#define ucnv_extInitialMatchFromU U_ICU_ENTRY_POINT_RENAME(ucnv_extInitialMatchFromU) +#define ucnv_extInitialMatchToU U_ICU_ENTRY_POINT_RENAME(ucnv_extInitialMatchToU) +#define ucnv_extSimpleMatchFromU U_ICU_ENTRY_POINT_RENAME(ucnv_extSimpleMatchFromU) +#define ucnv_extSimpleMatchToU U_ICU_ENTRY_POINT_RENAME(ucnv_extSimpleMatchToU) +#define ucnv_fixFileSeparator U_ICU_ENTRY_POINT_RENAME(ucnv_fixFileSeparator) +#define ucnv_flushCache U_ICU_ENTRY_POINT_RENAME(ucnv_flushCache) +#define ucnv_fromAlgorithmic U_ICU_ENTRY_POINT_RENAME(ucnv_fromAlgorithmic) +#define ucnv_fromUChars U_ICU_ENTRY_POINT_RENAME(ucnv_fromUChars) +#define ucnv_fromUCountPending U_ICU_ENTRY_POINT_RENAME(ucnv_fromUCountPending) +#define ucnv_fromUWriteBytes U_ICU_ENTRY_POINT_RENAME(ucnv_fromUWriteBytes) +#define ucnv_fromUnicode U_ICU_ENTRY_POINT_RENAME(ucnv_fromUnicode) +#define ucnv_fromUnicode_UTF8 U_ICU_ENTRY_POINT_RENAME(ucnv_fromUnicode_UTF8) +#define ucnv_fromUnicode_UTF8_OFFSETS_LOGIC U_ICU_ENTRY_POINT_RENAME(ucnv_fromUnicode_UTF8_OFFSETS_LOGIC) +#define ucnv_getAlias U_ICU_ENTRY_POINT_RENAME(ucnv_getAlias) +#define ucnv_getAliases U_ICU_ENTRY_POINT_RENAME(ucnv_getAliases) +#define ucnv_getAvailableName U_ICU_ENTRY_POINT_RENAME(ucnv_getAvailableName) +#define ucnv_getCCSID U_ICU_ENTRY_POINT_RENAME(ucnv_getCCSID) +#define ucnv_getCanonicalName U_ICU_ENTRY_POINT_RENAME(ucnv_getCanonicalName) +#define ucnv_getCompleteUnicodeSet U_ICU_ENTRY_POINT_RENAME(ucnv_getCompleteUnicodeSet) +#define ucnv_getDefaultName U_ICU_ENTRY_POINT_RENAME(ucnv_getDefaultName) +#define ucnv_getDisplayName U_ICU_ENTRY_POINT_RENAME(ucnv_getDisplayName) +#define ucnv_getFromUCallBack U_ICU_ENTRY_POINT_RENAME(ucnv_getFromUCallBack) +#define ucnv_getInvalidChars U_ICU_ENTRY_POINT_RENAME(ucnv_getInvalidChars) +#define ucnv_getInvalidUChars U_ICU_ENTRY_POINT_RENAME(ucnv_getInvalidUChars) +#define ucnv_getMaxCharSize U_ICU_ENTRY_POINT_RENAME(ucnv_getMaxCharSize) +#define ucnv_getMinCharSize U_ICU_ENTRY_POINT_RENAME(ucnv_getMinCharSize) +#define ucnv_getName U_ICU_ENTRY_POINT_RENAME(ucnv_getName) +#define ucnv_getNextUChar U_ICU_ENTRY_POINT_RENAME(ucnv_getNextUChar) +#define ucnv_getNonSurrogateUnicodeSet U_ICU_ENTRY_POINT_RENAME(ucnv_getNonSurrogateUnicodeSet) +#define ucnv_getPlatform U_ICU_ENTRY_POINT_RENAME(ucnv_getPlatform) +#define ucnv_getStandard U_ICU_ENTRY_POINT_RENAME(ucnv_getStandard) +#define ucnv_getStandardName U_ICU_ENTRY_POINT_RENAME(ucnv_getStandardName) +#define ucnv_getStarters U_ICU_ENTRY_POINT_RENAME(ucnv_getStarters) +#define ucnv_getSubstChars U_ICU_ENTRY_POINT_RENAME(ucnv_getSubstChars) +#define ucnv_getToUCallBack U_ICU_ENTRY_POINT_RENAME(ucnv_getToUCallBack) +#define ucnv_getType U_ICU_ENTRY_POINT_RENAME(ucnv_getType) +#define ucnv_getUnicodeSet U_ICU_ENTRY_POINT_RENAME(ucnv_getUnicodeSet) +#define ucnv_incrementRefCount U_ICU_ENTRY_POINT_RENAME(ucnv_incrementRefCount) +#define ucnv_io_countKnownConverters U_ICU_ENTRY_POINT_RENAME(ucnv_io_countKnownConverters) +#define ucnv_io_getConverterName U_ICU_ENTRY_POINT_RENAME(ucnv_io_getConverterName) +#define ucnv_io_stripASCIIForCompare U_ICU_ENTRY_POINT_RENAME(ucnv_io_stripASCIIForCompare) +#define ucnv_io_stripEBCDICForCompare U_ICU_ENTRY_POINT_RENAME(ucnv_io_stripEBCDICForCompare) +#define ucnv_isAmbiguous U_ICU_ENTRY_POINT_RENAME(ucnv_isAmbiguous) +#define ucnv_isFixedWidth U_ICU_ENTRY_POINT_RENAME(ucnv_isFixedWidth) +#define ucnv_load U_ICU_ENTRY_POINT_RENAME(ucnv_load) +#define ucnv_loadSharedData U_ICU_ENTRY_POINT_RENAME(ucnv_loadSharedData) +#define ucnv_open U_ICU_ENTRY_POINT_RENAME(ucnv_open) +#define ucnv_openAllNames U_ICU_ENTRY_POINT_RENAME(ucnv_openAllNames) +#define ucnv_openCCSID U_ICU_ENTRY_POINT_RENAME(ucnv_openCCSID) +#define ucnv_openPackage U_ICU_ENTRY_POINT_RENAME(ucnv_openPackage) +#define ucnv_openStandardNames U_ICU_ENTRY_POINT_RENAME(ucnv_openStandardNames) +#define ucnv_openU U_ICU_ENTRY_POINT_RENAME(ucnv_openU) +#define ucnv_reset U_ICU_ENTRY_POINT_RENAME(ucnv_reset) +#define ucnv_resetFromUnicode U_ICU_ENTRY_POINT_RENAME(ucnv_resetFromUnicode) +#define ucnv_resetToUnicode U_ICU_ENTRY_POINT_RENAME(ucnv_resetToUnicode) +#define ucnv_safeClone U_ICU_ENTRY_POINT_RENAME(ucnv_safeClone) +#define ucnv_setDefaultName U_ICU_ENTRY_POINT_RENAME(ucnv_setDefaultName) +#define ucnv_setFallback U_ICU_ENTRY_POINT_RENAME(ucnv_setFallback) +#define ucnv_setFromUCallBack U_ICU_ENTRY_POINT_RENAME(ucnv_setFromUCallBack) +#define ucnv_setSubstChars U_ICU_ENTRY_POINT_RENAME(ucnv_setSubstChars) +#define ucnv_setSubstString U_ICU_ENTRY_POINT_RENAME(ucnv_setSubstString) +#define ucnv_setToUCallBack U_ICU_ENTRY_POINT_RENAME(ucnv_setToUCallBack) +#define ucnv_swap U_ICU_ENTRY_POINT_RENAME(ucnv_swap) +#define ucnv_swapAliases U_ICU_ENTRY_POINT_RENAME(ucnv_swapAliases) +#define ucnv_toAlgorithmic U_ICU_ENTRY_POINT_RENAME(ucnv_toAlgorithmic) +#define ucnv_toUChars U_ICU_ENTRY_POINT_RENAME(ucnv_toUChars) +#define ucnv_toUCountPending U_ICU_ENTRY_POINT_RENAME(ucnv_toUCountPending) +#define ucnv_toUWriteCodePoint U_ICU_ENTRY_POINT_RENAME(ucnv_toUWriteCodePoint) +#define ucnv_toUWriteUChars U_ICU_ENTRY_POINT_RENAME(ucnv_toUWriteUChars) +#define ucnv_toUnicode U_ICU_ENTRY_POINT_RENAME(ucnv_toUnicode) +#define ucnv_unload U_ICU_ENTRY_POINT_RENAME(ucnv_unload) +#define ucnv_unloadSharedDataIfReady U_ICU_ENTRY_POINT_RENAME(ucnv_unloadSharedDataIfReady) +#define ucnv_usesFallback U_ICU_ENTRY_POINT_RENAME(ucnv_usesFallback) +#define ucnvsel_close U_ICU_ENTRY_POINT_RENAME(ucnvsel_close) +#define ucnvsel_open U_ICU_ENTRY_POINT_RENAME(ucnvsel_open) +#define ucnvsel_openFromSerialized U_ICU_ENTRY_POINT_RENAME(ucnvsel_openFromSerialized) +#define ucnvsel_selectForString U_ICU_ENTRY_POINT_RENAME(ucnvsel_selectForString) +#define ucnvsel_selectForUTF8 U_ICU_ENTRY_POINT_RENAME(ucnvsel_selectForUTF8) +#define ucnvsel_serialize U_ICU_ENTRY_POINT_RENAME(ucnvsel_serialize) +#define ucol_cloneBinary U_ICU_ENTRY_POINT_RENAME(ucol_cloneBinary) +#define ucol_close U_ICU_ENTRY_POINT_RENAME(ucol_close) +#define ucol_closeElements U_ICU_ENTRY_POINT_RENAME(ucol_closeElements) +#define ucol_countAvailable U_ICU_ENTRY_POINT_RENAME(ucol_countAvailable) +#define ucol_equal U_ICU_ENTRY_POINT_RENAME(ucol_equal) +#define ucol_equals U_ICU_ENTRY_POINT_RENAME(ucol_equals) +#define ucol_getAttribute U_ICU_ENTRY_POINT_RENAME(ucol_getAttribute) +#define ucol_getAvailable U_ICU_ENTRY_POINT_RENAME(ucol_getAvailable) +#define ucol_getBound U_ICU_ENTRY_POINT_RENAME(ucol_getBound) +#define ucol_getContractions U_ICU_ENTRY_POINT_RENAME(ucol_getContractions) +#define ucol_getContractionsAndExpansions U_ICU_ENTRY_POINT_RENAME(ucol_getContractionsAndExpansions) +#define ucol_getDisplayName U_ICU_ENTRY_POINT_RENAME(ucol_getDisplayName) +#define ucol_getEquivalentReorderCodes U_ICU_ENTRY_POINT_RENAME(ucol_getEquivalentReorderCodes) +#define ucol_getFunctionalEquivalent U_ICU_ENTRY_POINT_RENAME(ucol_getFunctionalEquivalent) +#define ucol_getKeywordValues U_ICU_ENTRY_POINT_RENAME(ucol_getKeywordValues) +#define ucol_getKeywordValuesForLocale U_ICU_ENTRY_POINT_RENAME(ucol_getKeywordValuesForLocale) +#define ucol_getKeywords U_ICU_ENTRY_POINT_RENAME(ucol_getKeywords) +#define ucol_getLocale U_ICU_ENTRY_POINT_RENAME(ucol_getLocale) +#define ucol_getLocaleByType U_ICU_ENTRY_POINT_RENAME(ucol_getLocaleByType) +#define ucol_getMaxExpansion U_ICU_ENTRY_POINT_RENAME(ucol_getMaxExpansion) +#define ucol_getMaxVariable U_ICU_ENTRY_POINT_RENAME(ucol_getMaxVariable) +#define ucol_getOffset U_ICU_ENTRY_POINT_RENAME(ucol_getOffset) +#define ucol_getReorderCodes U_ICU_ENTRY_POINT_RENAME(ucol_getReorderCodes) +#define ucol_getRules U_ICU_ENTRY_POINT_RENAME(ucol_getRules) +#define ucol_getRulesEx U_ICU_ENTRY_POINT_RENAME(ucol_getRulesEx) +#define ucol_getShortDefinitionString U_ICU_ENTRY_POINT_RENAME(ucol_getShortDefinitionString) +#define ucol_getSortKey U_ICU_ENTRY_POINT_RENAME(ucol_getSortKey) +#define ucol_getStrength U_ICU_ENTRY_POINT_RENAME(ucol_getStrength) +#define ucol_getTailoredSet U_ICU_ENTRY_POINT_RENAME(ucol_getTailoredSet) +#define ucol_getUCAVersion U_ICU_ENTRY_POINT_RENAME(ucol_getUCAVersion) +#define ucol_getUnsafeSet U_ICU_ENTRY_POINT_RENAME(ucol_getUnsafeSet) +#define ucol_getVariableTop U_ICU_ENTRY_POINT_RENAME(ucol_getVariableTop) +#define ucol_getVersion U_ICU_ENTRY_POINT_RENAME(ucol_getVersion) +#define ucol_greater U_ICU_ENTRY_POINT_RENAME(ucol_greater) +#define ucol_greaterOrEqual U_ICU_ENTRY_POINT_RENAME(ucol_greaterOrEqual) +#define ucol_keyHashCode U_ICU_ENTRY_POINT_RENAME(ucol_keyHashCode) +#define ucol_looksLikeCollationBinary U_ICU_ENTRY_POINT_RENAME(ucol_looksLikeCollationBinary) +#define ucol_mergeSortkeys U_ICU_ENTRY_POINT_RENAME(ucol_mergeSortkeys) +#define ucol_next U_ICU_ENTRY_POINT_RENAME(ucol_next) +#define ucol_nextSortKeyPart U_ICU_ENTRY_POINT_RENAME(ucol_nextSortKeyPart) +#define ucol_normalizeShortDefinitionString U_ICU_ENTRY_POINT_RENAME(ucol_normalizeShortDefinitionString) +#define ucol_open U_ICU_ENTRY_POINT_RENAME(ucol_open) +#define ucol_openAvailableLocales U_ICU_ENTRY_POINT_RENAME(ucol_openAvailableLocales) +#define ucol_openBinary U_ICU_ENTRY_POINT_RENAME(ucol_openBinary) +#define ucol_openElements U_ICU_ENTRY_POINT_RENAME(ucol_openElements) +#define ucol_openFromShortString U_ICU_ENTRY_POINT_RENAME(ucol_openFromShortString) +#define ucol_openRules U_ICU_ENTRY_POINT_RENAME(ucol_openRules) +#define ucol_prepareShortStringOpen U_ICU_ENTRY_POINT_RENAME(ucol_prepareShortStringOpen) +#define ucol_previous U_ICU_ENTRY_POINT_RENAME(ucol_previous) +#define ucol_primaryOrder U_ICU_ENTRY_POINT_RENAME(ucol_primaryOrder) +#define ucol_reset U_ICU_ENTRY_POINT_RENAME(ucol_reset) +#define ucol_restoreVariableTop U_ICU_ENTRY_POINT_RENAME(ucol_restoreVariableTop) +#define ucol_safeClone U_ICU_ENTRY_POINT_RENAME(ucol_safeClone) +#define ucol_secondaryOrder U_ICU_ENTRY_POINT_RENAME(ucol_secondaryOrder) +#define ucol_setAttribute U_ICU_ENTRY_POINT_RENAME(ucol_setAttribute) +#define ucol_setMaxVariable U_ICU_ENTRY_POINT_RENAME(ucol_setMaxVariable) +#define ucol_setOffset U_ICU_ENTRY_POINT_RENAME(ucol_setOffset) +#define ucol_setReorderCodes U_ICU_ENTRY_POINT_RENAME(ucol_setReorderCodes) +#define ucol_setStrength U_ICU_ENTRY_POINT_RENAME(ucol_setStrength) +#define ucol_setText U_ICU_ENTRY_POINT_RENAME(ucol_setText) +#define ucol_setVariableTop U_ICU_ENTRY_POINT_RENAME(ucol_setVariableTop) +#define ucol_strcoll U_ICU_ENTRY_POINT_RENAME(ucol_strcoll) +#define ucol_strcollIter U_ICU_ENTRY_POINT_RENAME(ucol_strcollIter) +#define ucol_strcollUTF8 U_ICU_ENTRY_POINT_RENAME(ucol_strcollUTF8) +#define ucol_swap U_ICU_ENTRY_POINT_RENAME(ucol_swap) +#define ucol_swapInverseUCA U_ICU_ENTRY_POINT_RENAME(ucol_swapInverseUCA) +#define ucol_tertiaryOrder U_ICU_ENTRY_POINT_RENAME(ucol_tertiaryOrder) +#define ucsdet_close U_ICU_ENTRY_POINT_RENAME(ucsdet_close) +#define ucsdet_detect U_ICU_ENTRY_POINT_RENAME(ucsdet_detect) +#define ucsdet_detectAll U_ICU_ENTRY_POINT_RENAME(ucsdet_detectAll) +#define ucsdet_enableInputFilter U_ICU_ENTRY_POINT_RENAME(ucsdet_enableInputFilter) +#define ucsdet_getAllDetectableCharsets U_ICU_ENTRY_POINT_RENAME(ucsdet_getAllDetectableCharsets) +#define ucsdet_getConfidence U_ICU_ENTRY_POINT_RENAME(ucsdet_getConfidence) +#define ucsdet_getDetectableCharsets U_ICU_ENTRY_POINT_RENAME(ucsdet_getDetectableCharsets) +#define ucsdet_getLanguage U_ICU_ENTRY_POINT_RENAME(ucsdet_getLanguage) +#define ucsdet_getName U_ICU_ENTRY_POINT_RENAME(ucsdet_getName) +#define ucsdet_getUChars U_ICU_ENTRY_POINT_RENAME(ucsdet_getUChars) +#define ucsdet_isInputFilterEnabled U_ICU_ENTRY_POINT_RENAME(ucsdet_isInputFilterEnabled) +#define ucsdet_open U_ICU_ENTRY_POINT_RENAME(ucsdet_open) +#define ucsdet_setDeclaredEncoding U_ICU_ENTRY_POINT_RENAME(ucsdet_setDeclaredEncoding) +#define ucsdet_setDetectableCharset U_ICU_ENTRY_POINT_RENAME(ucsdet_setDetectableCharset) +#define ucsdet_setText U_ICU_ENTRY_POINT_RENAME(ucsdet_setText) +#define ucurr_countCurrencies U_ICU_ENTRY_POINT_RENAME(ucurr_countCurrencies) +#define ucurr_forLocale U_ICU_ENTRY_POINT_RENAME(ucurr_forLocale) +#define ucurr_forLocaleAndDate U_ICU_ENTRY_POINT_RENAME(ucurr_forLocaleAndDate) +#define ucurr_getDefaultFractionDigits U_ICU_ENTRY_POINT_RENAME(ucurr_getDefaultFractionDigits) +#define ucurr_getDefaultFractionDigitsForUsage U_ICU_ENTRY_POINT_RENAME(ucurr_getDefaultFractionDigitsForUsage) +#define ucurr_getKeywordValuesForLocale U_ICU_ENTRY_POINT_RENAME(ucurr_getKeywordValuesForLocale) +#define ucurr_getName U_ICU_ENTRY_POINT_RENAME(ucurr_getName) +#define ucurr_getNumericCode U_ICU_ENTRY_POINT_RENAME(ucurr_getNumericCode) +#define ucurr_getPluralName U_ICU_ENTRY_POINT_RENAME(ucurr_getPluralName) +#define ucurr_getRoundingIncrement U_ICU_ENTRY_POINT_RENAME(ucurr_getRoundingIncrement) +#define ucurr_getRoundingIncrementForUsage U_ICU_ENTRY_POINT_RENAME(ucurr_getRoundingIncrementForUsage) +#define ucurr_isAvailable U_ICU_ENTRY_POINT_RENAME(ucurr_isAvailable) +#define ucurr_openISOCurrencies U_ICU_ENTRY_POINT_RENAME(ucurr_openISOCurrencies) +#define ucurr_register U_ICU_ENTRY_POINT_RENAME(ucurr_register) +#define ucurr_unregister U_ICU_ENTRY_POINT_RENAME(ucurr_unregister) +#define udat_adoptNumberFormat U_ICU_ENTRY_POINT_RENAME(udat_adoptNumberFormat) +#define udat_adoptNumberFormatForFields U_ICU_ENTRY_POINT_RENAME(udat_adoptNumberFormatForFields) +#define udat_applyPattern U_ICU_ENTRY_POINT_RENAME(udat_applyPattern) +#define udat_applyPatternRelative U_ICU_ENTRY_POINT_RENAME(udat_applyPatternRelative) +#define udat_clone U_ICU_ENTRY_POINT_RENAME(udat_clone) +#define udat_close U_ICU_ENTRY_POINT_RENAME(udat_close) +#define udat_countAvailable U_ICU_ENTRY_POINT_RENAME(udat_countAvailable) +#define udat_countSymbols U_ICU_ENTRY_POINT_RENAME(udat_countSymbols) +#define udat_format U_ICU_ENTRY_POINT_RENAME(udat_format) +#define udat_formatCalendar U_ICU_ENTRY_POINT_RENAME(udat_formatCalendar) +#define udat_formatCalendarForFields U_ICU_ENTRY_POINT_RENAME(udat_formatCalendarForFields) +#define udat_formatForFields U_ICU_ENTRY_POINT_RENAME(udat_formatForFields) +#define udat_get2DigitYearStart U_ICU_ENTRY_POINT_RENAME(udat_get2DigitYearStart) +#define udat_getAvailable U_ICU_ENTRY_POINT_RENAME(udat_getAvailable) +#define udat_getBooleanAttribute U_ICU_ENTRY_POINT_RENAME(udat_getBooleanAttribute) +#define udat_getCalendar U_ICU_ENTRY_POINT_RENAME(udat_getCalendar) +#define udat_getContext U_ICU_ENTRY_POINT_RENAME(udat_getContext) +#define udat_getLocaleByType U_ICU_ENTRY_POINT_RENAME(udat_getLocaleByType) +#define udat_getNumberFormat U_ICU_ENTRY_POINT_RENAME(udat_getNumberFormat) +#define udat_getNumberFormatForField U_ICU_ENTRY_POINT_RENAME(udat_getNumberFormatForField) +#define udat_getSymbols U_ICU_ENTRY_POINT_RENAME(udat_getSymbols) +#define udat_isLenient U_ICU_ENTRY_POINT_RENAME(udat_isLenient) +#define udat_open U_ICU_ENTRY_POINT_RENAME(udat_open) +#define udat_parse U_ICU_ENTRY_POINT_RENAME(udat_parse) +#define udat_parseCalendar U_ICU_ENTRY_POINT_RENAME(udat_parseCalendar) +#define udat_registerOpener U_ICU_ENTRY_POINT_RENAME(udat_registerOpener) +#define udat_set2DigitYearStart U_ICU_ENTRY_POINT_RENAME(udat_set2DigitYearStart) +#define udat_setBooleanAttribute U_ICU_ENTRY_POINT_RENAME(udat_setBooleanAttribute) +#define udat_setCalendar U_ICU_ENTRY_POINT_RENAME(udat_setCalendar) +#define udat_setContext U_ICU_ENTRY_POINT_RENAME(udat_setContext) +#define udat_setLenient U_ICU_ENTRY_POINT_RENAME(udat_setLenient) +#define udat_setNumberFormat U_ICU_ENTRY_POINT_RENAME(udat_setNumberFormat) +#define udat_setSymbols U_ICU_ENTRY_POINT_RENAME(udat_setSymbols) +#define udat_toCalendarDateField U_ICU_ENTRY_POINT_RENAME(udat_toCalendarDateField) +#define udat_toPattern U_ICU_ENTRY_POINT_RENAME(udat_toPattern) +#define udat_toPatternRelativeDate U_ICU_ENTRY_POINT_RENAME(udat_toPatternRelativeDate) +#define udat_toPatternRelativeTime U_ICU_ENTRY_POINT_RENAME(udat_toPatternRelativeTime) +#define udat_unregisterOpener U_ICU_ENTRY_POINT_RENAME(udat_unregisterOpener) +#define udata_checkCommonData U_ICU_ENTRY_POINT_RENAME(udata_checkCommonData) +#define udata_close U_ICU_ENTRY_POINT_RENAME(udata_close) +#define udata_closeSwapper U_ICU_ENTRY_POINT_RENAME(udata_closeSwapper) +#define udata_getHeaderSize U_ICU_ENTRY_POINT_RENAME(udata_getHeaderSize) +#define udata_getInfo U_ICU_ENTRY_POINT_RENAME(udata_getInfo) +#define udata_getInfoSize U_ICU_ENTRY_POINT_RENAME(udata_getInfoSize) +#define udata_getLength U_ICU_ENTRY_POINT_RENAME(udata_getLength) +#define udata_getMemory U_ICU_ENTRY_POINT_RENAME(udata_getMemory) +#define udata_getRawMemory U_ICU_ENTRY_POINT_RENAME(udata_getRawMemory) +#define udata_open U_ICU_ENTRY_POINT_RENAME(udata_open) +#define udata_openChoice U_ICU_ENTRY_POINT_RENAME(udata_openChoice) +#define udata_openSwapper U_ICU_ENTRY_POINT_RENAME(udata_openSwapper) +#define udata_openSwapperForInputData U_ICU_ENTRY_POINT_RENAME(udata_openSwapperForInputData) +#define udata_printError U_ICU_ENTRY_POINT_RENAME(udata_printError) +#define udata_readInt16 U_ICU_ENTRY_POINT_RENAME(udata_readInt16) +#define udata_readInt32 U_ICU_ENTRY_POINT_RENAME(udata_readInt32) +#define udata_setAppData U_ICU_ENTRY_POINT_RENAME(udata_setAppData) +#define udata_setCommonData U_ICU_ENTRY_POINT_RENAME(udata_setCommonData) +#define udata_setFileAccess U_ICU_ENTRY_POINT_RENAME(udata_setFileAccess) +#define udata_swapDataHeader U_ICU_ENTRY_POINT_RENAME(udata_swapDataHeader) +#define udata_swapInvStringBlock U_ICU_ENTRY_POINT_RENAME(udata_swapInvStringBlock) +#define udatpg_addPattern U_ICU_ENTRY_POINT_RENAME(udatpg_addPattern) +#define udatpg_clone U_ICU_ENTRY_POINT_RENAME(udatpg_clone) +#define udatpg_close U_ICU_ENTRY_POINT_RENAME(udatpg_close) +#define udatpg_getAppendItemFormat U_ICU_ENTRY_POINT_RENAME(udatpg_getAppendItemFormat) +#define udatpg_getAppendItemName U_ICU_ENTRY_POINT_RENAME(udatpg_getAppendItemName) +#define udatpg_getBaseSkeleton U_ICU_ENTRY_POINT_RENAME(udatpg_getBaseSkeleton) +#define udatpg_getBestPattern U_ICU_ENTRY_POINT_RENAME(udatpg_getBestPattern) +#define udatpg_getBestPatternWithOptions U_ICU_ENTRY_POINT_RENAME(udatpg_getBestPatternWithOptions) +#define udatpg_getDateTimeFormat U_ICU_ENTRY_POINT_RENAME(udatpg_getDateTimeFormat) +#define udatpg_getDecimal U_ICU_ENTRY_POINT_RENAME(udatpg_getDecimal) +#define udatpg_getFieldDisplayName U_ICU_ENTRY_POINT_RENAME(udatpg_getFieldDisplayName) +#define udatpg_getPatternForSkeleton U_ICU_ENTRY_POINT_RENAME(udatpg_getPatternForSkeleton) +#define udatpg_getSkeleton U_ICU_ENTRY_POINT_RENAME(udatpg_getSkeleton) +#define udatpg_open U_ICU_ENTRY_POINT_RENAME(udatpg_open) +#define udatpg_openBaseSkeletons U_ICU_ENTRY_POINT_RENAME(udatpg_openBaseSkeletons) +#define udatpg_openEmpty U_ICU_ENTRY_POINT_RENAME(udatpg_openEmpty) +#define udatpg_openSkeletons U_ICU_ENTRY_POINT_RENAME(udatpg_openSkeletons) +#define udatpg_replaceFieldTypes U_ICU_ENTRY_POINT_RENAME(udatpg_replaceFieldTypes) +#define udatpg_replaceFieldTypesWithOptions U_ICU_ENTRY_POINT_RENAME(udatpg_replaceFieldTypesWithOptions) +#define udatpg_setAppendItemFormat U_ICU_ENTRY_POINT_RENAME(udatpg_setAppendItemFormat) +#define udatpg_setAppendItemName U_ICU_ENTRY_POINT_RENAME(udatpg_setAppendItemName) +#define udatpg_setDateTimeFormat U_ICU_ENTRY_POINT_RENAME(udatpg_setDateTimeFormat) +#define udatpg_setDecimal U_ICU_ENTRY_POINT_RENAME(udatpg_setDecimal) +#define udict_swap U_ICU_ENTRY_POINT_RENAME(udict_swap) +#define udtitvfmt_close U_ICU_ENTRY_POINT_RENAME(udtitvfmt_close) +#define udtitvfmt_format U_ICU_ENTRY_POINT_RENAME(udtitvfmt_format) +#define udtitvfmt_open U_ICU_ENTRY_POINT_RENAME(udtitvfmt_open) +#define uenum_close U_ICU_ENTRY_POINT_RENAME(uenum_close) +#define uenum_count U_ICU_ENTRY_POINT_RENAME(uenum_count) +#define uenum_next U_ICU_ENTRY_POINT_RENAME(uenum_next) +#define uenum_nextDefault U_ICU_ENTRY_POINT_RENAME(uenum_nextDefault) +#define uenum_openCharStringsEnumeration U_ICU_ENTRY_POINT_RENAME(uenum_openCharStringsEnumeration) +#define uenum_openFromStringEnumeration U_ICU_ENTRY_POINT_RENAME(uenum_openFromStringEnumeration) +#define uenum_openUCharStringsEnumeration U_ICU_ENTRY_POINT_RENAME(uenum_openUCharStringsEnumeration) +#define uenum_reset U_ICU_ENTRY_POINT_RENAME(uenum_reset) +#define uenum_unext U_ICU_ENTRY_POINT_RENAME(uenum_unext) +#define uenum_unextDefault U_ICU_ENTRY_POINT_RENAME(uenum_unextDefault) +#define ufieldpositer_close U_ICU_ENTRY_POINT_RENAME(ufieldpositer_close) +#define ufieldpositer_next U_ICU_ENTRY_POINT_RENAME(ufieldpositer_next) +#define ufieldpositer_open U_ICU_ENTRY_POINT_RENAME(ufieldpositer_open) +#define ufile_close_translit U_ICU_ENTRY_POINT_RENAME(ufile_close_translit) +#define ufile_fill_uchar_buffer U_ICU_ENTRY_POINT_RENAME(ufile_fill_uchar_buffer) +#define ufile_flush_io U_ICU_ENTRY_POINT_RENAME(ufile_flush_io) +#define ufile_flush_translit U_ICU_ENTRY_POINT_RENAME(ufile_flush_translit) +#define ufile_getch U_ICU_ENTRY_POINT_RENAME(ufile_getch) +#define ufile_getch32 U_ICU_ENTRY_POINT_RENAME(ufile_getch32) +#define ufmt_64tou U_ICU_ENTRY_POINT_RENAME(ufmt_64tou) +#define ufmt_close U_ICU_ENTRY_POINT_RENAME(ufmt_close) +#define ufmt_defaultCPToUnicode U_ICU_ENTRY_POINT_RENAME(ufmt_defaultCPToUnicode) +#define ufmt_digitvalue U_ICU_ENTRY_POINT_RENAME(ufmt_digitvalue) +#define ufmt_getArrayItemByIndex U_ICU_ENTRY_POINT_RENAME(ufmt_getArrayItemByIndex) +#define ufmt_getArrayLength U_ICU_ENTRY_POINT_RENAME(ufmt_getArrayLength) +#define ufmt_getDate U_ICU_ENTRY_POINT_RENAME(ufmt_getDate) +#define ufmt_getDecNumChars U_ICU_ENTRY_POINT_RENAME(ufmt_getDecNumChars) +#define ufmt_getDouble U_ICU_ENTRY_POINT_RENAME(ufmt_getDouble) +#define ufmt_getInt64 U_ICU_ENTRY_POINT_RENAME(ufmt_getInt64) +#define ufmt_getLong U_ICU_ENTRY_POINT_RENAME(ufmt_getLong) +#define ufmt_getObject U_ICU_ENTRY_POINT_RENAME(ufmt_getObject) +#define ufmt_getType U_ICU_ENTRY_POINT_RENAME(ufmt_getType) +#define ufmt_getUChars U_ICU_ENTRY_POINT_RENAME(ufmt_getUChars) +#define ufmt_isNumeric U_ICU_ENTRY_POINT_RENAME(ufmt_isNumeric) +#define ufmt_isdigit U_ICU_ENTRY_POINT_RENAME(ufmt_isdigit) +#define ufmt_open U_ICU_ENTRY_POINT_RENAME(ufmt_open) +#define ufmt_ptou U_ICU_ENTRY_POINT_RENAME(ufmt_ptou) +#define ufmt_uto64 U_ICU_ENTRY_POINT_RENAME(ufmt_uto64) +#define ufmt_utop U_ICU_ENTRY_POINT_RENAME(ufmt_utop) +#define ugender_getInstance U_ICU_ENTRY_POINT_RENAME(ugender_getInstance) +#define ugender_getListGender U_ICU_ENTRY_POINT_RENAME(ugender_getListGender) +#define uhash_close U_ICU_ENTRY_POINT_RENAME(uhash_close) +#define uhash_compareCaselessUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_compareCaselessUnicodeString) +#define uhash_compareChars U_ICU_ENTRY_POINT_RENAME(uhash_compareChars) +#define uhash_compareIChars U_ICU_ENTRY_POINT_RENAME(uhash_compareIChars) +#define uhash_compareLong U_ICU_ENTRY_POINT_RENAME(uhash_compareLong) +#define uhash_compareScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_compareScriptSet) +#define uhash_compareUChars U_ICU_ENTRY_POINT_RENAME(uhash_compareUChars) +#define uhash_compareUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_compareUnicodeString) +#define uhash_count U_ICU_ENTRY_POINT_RENAME(uhash_count) +#define uhash_deleteHashtable U_ICU_ENTRY_POINT_RENAME(uhash_deleteHashtable) +#define uhash_deleteScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_deleteScriptSet) +#define uhash_equals U_ICU_ENTRY_POINT_RENAME(uhash_equals) +#define uhash_equalsScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_equalsScriptSet) +#define uhash_find U_ICU_ENTRY_POINT_RENAME(uhash_find) +#define uhash_get U_ICU_ENTRY_POINT_RENAME(uhash_get) +#define uhash_geti U_ICU_ENTRY_POINT_RENAME(uhash_geti) +#define uhash_hashCaselessUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_hashCaselessUnicodeString) +#define uhash_hashChars U_ICU_ENTRY_POINT_RENAME(uhash_hashChars) +#define uhash_hashIChars U_ICU_ENTRY_POINT_RENAME(uhash_hashIChars) +#define uhash_hashLong U_ICU_ENTRY_POINT_RENAME(uhash_hashLong) +#define uhash_hashScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_hashScriptSet) +#define uhash_hashUChars U_ICU_ENTRY_POINT_RENAME(uhash_hashUChars) +#define uhash_hashUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_hashUnicodeString) +#define uhash_iget U_ICU_ENTRY_POINT_RENAME(uhash_iget) +#define uhash_igeti U_ICU_ENTRY_POINT_RENAME(uhash_igeti) +#define uhash_init U_ICU_ENTRY_POINT_RENAME(uhash_init) +#define uhash_initSize U_ICU_ENTRY_POINT_RENAME(uhash_initSize) +#define uhash_iput U_ICU_ENTRY_POINT_RENAME(uhash_iput) +#define uhash_iputi U_ICU_ENTRY_POINT_RENAME(uhash_iputi) +#define uhash_iremove U_ICU_ENTRY_POINT_RENAME(uhash_iremove) +#define uhash_iremovei U_ICU_ENTRY_POINT_RENAME(uhash_iremovei) +#define uhash_nextElement U_ICU_ENTRY_POINT_RENAME(uhash_nextElement) +#define uhash_open U_ICU_ENTRY_POINT_RENAME(uhash_open) +#define uhash_openSize U_ICU_ENTRY_POINT_RENAME(uhash_openSize) +#define uhash_put U_ICU_ENTRY_POINT_RENAME(uhash_put) +#define uhash_puti U_ICU_ENTRY_POINT_RENAME(uhash_puti) +#define uhash_remove U_ICU_ENTRY_POINT_RENAME(uhash_remove) +#define uhash_removeAll U_ICU_ENTRY_POINT_RENAME(uhash_removeAll) +#define uhash_removeElement U_ICU_ENTRY_POINT_RENAME(uhash_removeElement) +#define uhash_removei U_ICU_ENTRY_POINT_RENAME(uhash_removei) +#define uhash_setKeyComparator U_ICU_ENTRY_POINT_RENAME(uhash_setKeyComparator) +#define uhash_setKeyDeleter U_ICU_ENTRY_POINT_RENAME(uhash_setKeyDeleter) +#define uhash_setKeyHasher U_ICU_ENTRY_POINT_RENAME(uhash_setKeyHasher) +#define uhash_setResizePolicy U_ICU_ENTRY_POINT_RENAME(uhash_setResizePolicy) +#define uhash_setValueComparator U_ICU_ENTRY_POINT_RENAME(uhash_setValueComparator) +#define uhash_setValueDeleter U_ICU_ENTRY_POINT_RENAME(uhash_setValueDeleter) +#define uidna_IDNToASCII U_ICU_ENTRY_POINT_RENAME(uidna_IDNToASCII) +#define uidna_IDNToUnicode U_ICU_ENTRY_POINT_RENAME(uidna_IDNToUnicode) +#define uidna_close U_ICU_ENTRY_POINT_RENAME(uidna_close) +#define uidna_compare U_ICU_ENTRY_POINT_RENAME(uidna_compare) +#define uidna_labelToASCII U_ICU_ENTRY_POINT_RENAME(uidna_labelToASCII) +#define uidna_labelToASCII_UTF8 U_ICU_ENTRY_POINT_RENAME(uidna_labelToASCII_UTF8) +#define uidna_labelToUnicode U_ICU_ENTRY_POINT_RENAME(uidna_labelToUnicode) +#define uidna_labelToUnicodeUTF8 U_ICU_ENTRY_POINT_RENAME(uidna_labelToUnicodeUTF8) +#define uidna_nameToASCII U_ICU_ENTRY_POINT_RENAME(uidna_nameToASCII) +#define uidna_nameToASCII_UTF8 U_ICU_ENTRY_POINT_RENAME(uidna_nameToASCII_UTF8) +#define uidna_nameToUnicode U_ICU_ENTRY_POINT_RENAME(uidna_nameToUnicode) +#define uidna_nameToUnicodeUTF8 U_ICU_ENTRY_POINT_RENAME(uidna_nameToUnicodeUTF8) +#define uidna_openUTS46 U_ICU_ENTRY_POINT_RENAME(uidna_openUTS46) +#define uidna_toASCII U_ICU_ENTRY_POINT_RENAME(uidna_toASCII) +#define uidna_toUnicode U_ICU_ENTRY_POINT_RENAME(uidna_toUnicode) +#define uiter_current32 U_ICU_ENTRY_POINT_RENAME(uiter_current32) +#define uiter_getState U_ICU_ENTRY_POINT_RENAME(uiter_getState) +#define uiter_next32 U_ICU_ENTRY_POINT_RENAME(uiter_next32) +#define uiter_previous32 U_ICU_ENTRY_POINT_RENAME(uiter_previous32) +#define uiter_setCharacterIterator U_ICU_ENTRY_POINT_RENAME(uiter_setCharacterIterator) +#define uiter_setReplaceable U_ICU_ENTRY_POINT_RENAME(uiter_setReplaceable) +#define uiter_setState U_ICU_ENTRY_POINT_RENAME(uiter_setState) +#define uiter_setString U_ICU_ENTRY_POINT_RENAME(uiter_setString) +#define uiter_setUTF16BE U_ICU_ENTRY_POINT_RENAME(uiter_setUTF16BE) +#define uiter_setUTF8 U_ICU_ENTRY_POINT_RENAME(uiter_setUTF8) +#define uldn_close U_ICU_ENTRY_POINT_RENAME(uldn_close) +#define uldn_getContext U_ICU_ENTRY_POINT_RENAME(uldn_getContext) +#define uldn_getDialectHandling U_ICU_ENTRY_POINT_RENAME(uldn_getDialectHandling) +#define uldn_getLocale U_ICU_ENTRY_POINT_RENAME(uldn_getLocale) +#define uldn_keyDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_keyDisplayName) +#define uldn_keyValueDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_keyValueDisplayName) +#define uldn_languageDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_languageDisplayName) +#define uldn_localeDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_localeDisplayName) +#define uldn_open U_ICU_ENTRY_POINT_RENAME(uldn_open) +#define uldn_openForContext U_ICU_ENTRY_POINT_RENAME(uldn_openForContext) +#define uldn_regionDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_regionDisplayName) +#define uldn_scriptCodeDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_scriptCodeDisplayName) +#define uldn_scriptDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_scriptDisplayName) +#define uldn_variantDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_variantDisplayName) +#define ulist_addItemBeginList U_ICU_ENTRY_POINT_RENAME(ulist_addItemBeginList) +#define ulist_addItemEndList U_ICU_ENTRY_POINT_RENAME(ulist_addItemEndList) +#define ulist_close_keyword_values_iterator U_ICU_ENTRY_POINT_RENAME(ulist_close_keyword_values_iterator) +#define ulist_containsString U_ICU_ENTRY_POINT_RENAME(ulist_containsString) +#define ulist_count_keyword_values U_ICU_ENTRY_POINT_RENAME(ulist_count_keyword_values) +#define ulist_createEmptyList U_ICU_ENTRY_POINT_RENAME(ulist_createEmptyList) +#define ulist_deleteList U_ICU_ENTRY_POINT_RENAME(ulist_deleteList) +#define ulist_getListFromEnum U_ICU_ENTRY_POINT_RENAME(ulist_getListFromEnum) +#define ulist_getListSize U_ICU_ENTRY_POINT_RENAME(ulist_getListSize) +#define ulist_getNext U_ICU_ENTRY_POINT_RENAME(ulist_getNext) +#define ulist_next_keyword_value U_ICU_ENTRY_POINT_RENAME(ulist_next_keyword_value) +#define ulist_removeString U_ICU_ENTRY_POINT_RENAME(ulist_removeString) +#define ulist_resetList U_ICU_ENTRY_POINT_RENAME(ulist_resetList) +#define ulist_reset_keyword_values_iterator U_ICU_ENTRY_POINT_RENAME(ulist_reset_keyword_values_iterator) +#define ulistfmt_close U_ICU_ENTRY_POINT_RENAME(ulistfmt_close) +#define ulistfmt_format U_ICU_ENTRY_POINT_RENAME(ulistfmt_format) +#define ulistfmt_open U_ICU_ENTRY_POINT_RENAME(ulistfmt_open) +#define uloc_acceptLanguage U_ICU_ENTRY_POINT_RENAME(uloc_acceptLanguage) +#define uloc_acceptLanguageFromHTTP U_ICU_ENTRY_POINT_RENAME(uloc_acceptLanguageFromHTTP) +#define uloc_addLikelySubtags U_ICU_ENTRY_POINT_RENAME(uloc_addLikelySubtags) +#define uloc_canonicalize U_ICU_ENTRY_POINT_RENAME(uloc_canonicalize) +#define uloc_countAvailable U_ICU_ENTRY_POINT_RENAME(uloc_countAvailable) +#define uloc_forLanguageTag U_ICU_ENTRY_POINT_RENAME(uloc_forLanguageTag) +#define uloc_getAvailable U_ICU_ENTRY_POINT_RENAME(uloc_getAvailable) +#define uloc_getBaseName U_ICU_ENTRY_POINT_RENAME(uloc_getBaseName) +#define uloc_getCharacterOrientation U_ICU_ENTRY_POINT_RENAME(uloc_getCharacterOrientation) +#define uloc_getCountry U_ICU_ENTRY_POINT_RENAME(uloc_getCountry) +#define uloc_getCurrentCountryID U_ICU_ENTRY_POINT_RENAME(uloc_getCurrentCountryID) +#define uloc_getCurrentLanguageID U_ICU_ENTRY_POINT_RENAME(uloc_getCurrentLanguageID) +#define uloc_getDefault U_ICU_ENTRY_POINT_RENAME(uloc_getDefault) +#define uloc_getDisplayCountry U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayCountry) +#define uloc_getDisplayKeyword U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayKeyword) +#define uloc_getDisplayKeywordValue U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayKeywordValue) +#define uloc_getDisplayLanguage U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayLanguage) +#define uloc_getDisplayName U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayName) +#define uloc_getDisplayScript U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayScript) +#define uloc_getDisplayScriptInContext U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayScriptInContext) +#define uloc_getDisplayVariant U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayVariant) +#define uloc_getISO3Country U_ICU_ENTRY_POINT_RENAME(uloc_getISO3Country) +#define uloc_getISO3Language U_ICU_ENTRY_POINT_RENAME(uloc_getISO3Language) +#define uloc_getISOCountries U_ICU_ENTRY_POINT_RENAME(uloc_getISOCountries) +#define uloc_getISOLanguages U_ICU_ENTRY_POINT_RENAME(uloc_getISOLanguages) +#define uloc_getKeywordValue U_ICU_ENTRY_POINT_RENAME(uloc_getKeywordValue) +#define uloc_getLCID U_ICU_ENTRY_POINT_RENAME(uloc_getLCID) +#define uloc_getLanguage U_ICU_ENTRY_POINT_RENAME(uloc_getLanguage) +#define uloc_getLineOrientation U_ICU_ENTRY_POINT_RENAME(uloc_getLineOrientation) +#define uloc_getLocaleForLCID U_ICU_ENTRY_POINT_RENAME(uloc_getLocaleForLCID) +#define uloc_getName U_ICU_ENTRY_POINT_RENAME(uloc_getName) +#define uloc_getParent U_ICU_ENTRY_POINT_RENAME(uloc_getParent) +#define uloc_getScript U_ICU_ENTRY_POINT_RENAME(uloc_getScript) +#define uloc_getTableStringWithFallback U_ICU_ENTRY_POINT_RENAME(uloc_getTableStringWithFallback) +#define uloc_getVariant U_ICU_ENTRY_POINT_RENAME(uloc_getVariant) +#define uloc_isRightToLeft U_ICU_ENTRY_POINT_RENAME(uloc_isRightToLeft) +#define uloc_minimizeSubtags U_ICU_ENTRY_POINT_RENAME(uloc_minimizeSubtags) +#define uloc_openKeywordList U_ICU_ENTRY_POINT_RENAME(uloc_openKeywordList) +#define uloc_openKeywords U_ICU_ENTRY_POINT_RENAME(uloc_openKeywords) +#define uloc_setDefault U_ICU_ENTRY_POINT_RENAME(uloc_setDefault) +#define uloc_setKeywordValue U_ICU_ENTRY_POINT_RENAME(uloc_setKeywordValue) +#define uloc_toLanguageTag U_ICU_ENTRY_POINT_RENAME(uloc_toLanguageTag) +#define uloc_toLegacyKey U_ICU_ENTRY_POINT_RENAME(uloc_toLegacyKey) +#define uloc_toLegacyType U_ICU_ENTRY_POINT_RENAME(uloc_toLegacyType) +#define uloc_toUnicodeLocaleKey U_ICU_ENTRY_POINT_RENAME(uloc_toUnicodeLocaleKey) +#define uloc_toUnicodeLocaleType U_ICU_ENTRY_POINT_RENAME(uloc_toUnicodeLocaleType) +#define ulocdata_close U_ICU_ENTRY_POINT_RENAME(ulocdata_close) +#define ulocdata_getCLDRVersion U_ICU_ENTRY_POINT_RENAME(ulocdata_getCLDRVersion) +#define ulocdata_getDelimiter U_ICU_ENTRY_POINT_RENAME(ulocdata_getDelimiter) +#define ulocdata_getExemplarSet U_ICU_ENTRY_POINT_RENAME(ulocdata_getExemplarSet) +#define ulocdata_getLocaleDisplayPattern U_ICU_ENTRY_POINT_RENAME(ulocdata_getLocaleDisplayPattern) +#define ulocdata_getLocaleSeparator U_ICU_ENTRY_POINT_RENAME(ulocdata_getLocaleSeparator) +#define ulocdata_getMeasurementSystem U_ICU_ENTRY_POINT_RENAME(ulocdata_getMeasurementSystem) +#define ulocdata_getNoSubstitute U_ICU_ENTRY_POINT_RENAME(ulocdata_getNoSubstitute) +#define ulocdata_getPaperSize U_ICU_ENTRY_POINT_RENAME(ulocdata_getPaperSize) +#define ulocdata_open U_ICU_ENTRY_POINT_RENAME(ulocdata_open) +#define ulocdata_setNoSubstitute U_ICU_ENTRY_POINT_RENAME(ulocdata_setNoSubstitute) +#define ulocimp_getCountry U_ICU_ENTRY_POINT_RENAME(ulocimp_getCountry) +#define ulocimp_getLanguage U_ICU_ENTRY_POINT_RENAME(ulocimp_getLanguage) +#define ulocimp_getRegionForSupplementalData U_ICU_ENTRY_POINT_RENAME(ulocimp_getRegionForSupplementalData) +#define ulocimp_getScript U_ICU_ENTRY_POINT_RENAME(ulocimp_getScript) +#define ulocimp_toBcpKey U_ICU_ENTRY_POINT_RENAME(ulocimp_toBcpKey) +#define ulocimp_toBcpType U_ICU_ENTRY_POINT_RENAME(ulocimp_toBcpType) +#define ulocimp_toLegacyKey U_ICU_ENTRY_POINT_RENAME(ulocimp_toLegacyKey) +#define ulocimp_toLegacyType U_ICU_ENTRY_POINT_RENAME(ulocimp_toLegacyType) +#define ultag_isUnicodeLocaleKey U_ICU_ENTRY_POINT_RENAME(ultag_isUnicodeLocaleKey) +#define ultag_isUnicodeLocaleType U_ICU_ENTRY_POINT_RENAME(ultag_isUnicodeLocaleType) +#define umsg_applyPattern U_ICU_ENTRY_POINT_RENAME(umsg_applyPattern) +#define umsg_autoQuoteApostrophe U_ICU_ENTRY_POINT_RENAME(umsg_autoQuoteApostrophe) +#define umsg_clone U_ICU_ENTRY_POINT_RENAME(umsg_clone) +#define umsg_close U_ICU_ENTRY_POINT_RENAME(umsg_close) +#define umsg_format U_ICU_ENTRY_POINT_RENAME(umsg_format) +#define umsg_getLocale U_ICU_ENTRY_POINT_RENAME(umsg_getLocale) +#define umsg_open U_ICU_ENTRY_POINT_RENAME(umsg_open) +#define umsg_parse U_ICU_ENTRY_POINT_RENAME(umsg_parse) +#define umsg_setLocale U_ICU_ENTRY_POINT_RENAME(umsg_setLocale) +#define umsg_toPattern U_ICU_ENTRY_POINT_RENAME(umsg_toPattern) +#define umsg_vformat U_ICU_ENTRY_POINT_RENAME(umsg_vformat) +#define umsg_vparse U_ICU_ENTRY_POINT_RENAME(umsg_vparse) +#define umtx_condBroadcast U_ICU_ENTRY_POINT_RENAME(umtx_condBroadcast) +#define umtx_condSignal U_ICU_ENTRY_POINT_RENAME(umtx_condSignal) +#define umtx_condWait U_ICU_ENTRY_POINT_RENAME(umtx_condWait) +#define umtx_lock U_ICU_ENTRY_POINT_RENAME(umtx_lock) +#define umtx_unlock U_ICU_ENTRY_POINT_RENAME(umtx_unlock) +#define uniset_getUnicode32Instance U_ICU_ENTRY_POINT_RENAME(uniset_getUnicode32Instance) +#define unorm2_append U_ICU_ENTRY_POINT_RENAME(unorm2_append) +#define unorm2_close U_ICU_ENTRY_POINT_RENAME(unorm2_close) +#define unorm2_composePair U_ICU_ENTRY_POINT_RENAME(unorm2_composePair) +#define unorm2_getCombiningClass U_ICU_ENTRY_POINT_RENAME(unorm2_getCombiningClass) +#define unorm2_getDecomposition U_ICU_ENTRY_POINT_RENAME(unorm2_getDecomposition) +#define unorm2_getInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getInstance) +#define unorm2_getNFCInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFCInstance) +#define unorm2_getNFDInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFDInstance) +#define unorm2_getNFKCCasefoldInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFKCCasefoldInstance) +#define unorm2_getNFKCInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFKCInstance) +#define unorm2_getNFKDInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFKDInstance) +#define unorm2_getRawDecomposition U_ICU_ENTRY_POINT_RENAME(unorm2_getRawDecomposition) +#define unorm2_hasBoundaryAfter U_ICU_ENTRY_POINT_RENAME(unorm2_hasBoundaryAfter) +#define unorm2_hasBoundaryBefore U_ICU_ENTRY_POINT_RENAME(unorm2_hasBoundaryBefore) +#define unorm2_isInert U_ICU_ENTRY_POINT_RENAME(unorm2_isInert) +#define unorm2_isNormalized U_ICU_ENTRY_POINT_RENAME(unorm2_isNormalized) +#define unorm2_normalize U_ICU_ENTRY_POINT_RENAME(unorm2_normalize) +#define unorm2_normalizeSecondAndAppend U_ICU_ENTRY_POINT_RENAME(unorm2_normalizeSecondAndAppend) +#define unorm2_openFiltered U_ICU_ENTRY_POINT_RENAME(unorm2_openFiltered) +#define unorm2_quickCheck U_ICU_ENTRY_POINT_RENAME(unorm2_quickCheck) +#define unorm2_spanQuickCheckYes U_ICU_ENTRY_POINT_RENAME(unorm2_spanQuickCheckYes) +#define unorm2_swap U_ICU_ENTRY_POINT_RENAME(unorm2_swap) +#define unorm_compare U_ICU_ENTRY_POINT_RENAME(unorm_compare) +#define unorm_concatenate U_ICU_ENTRY_POINT_RENAME(unorm_concatenate) +#define unorm_getFCD16 U_ICU_ENTRY_POINT_RENAME(unorm_getFCD16) +#define unorm_getQuickCheck U_ICU_ENTRY_POINT_RENAME(unorm_getQuickCheck) +#define unorm_isNormalized U_ICU_ENTRY_POINT_RENAME(unorm_isNormalized) +#define unorm_isNormalizedWithOptions U_ICU_ENTRY_POINT_RENAME(unorm_isNormalizedWithOptions) +#define unorm_next U_ICU_ENTRY_POINT_RENAME(unorm_next) +#define unorm_normalize U_ICU_ENTRY_POINT_RENAME(unorm_normalize) +#define unorm_previous U_ICU_ENTRY_POINT_RENAME(unorm_previous) +#define unorm_quickCheck U_ICU_ENTRY_POINT_RENAME(unorm_quickCheck) +#define unorm_quickCheckWithOptions U_ICU_ENTRY_POINT_RENAME(unorm_quickCheckWithOptions) +#define unum_applyPattern U_ICU_ENTRY_POINT_RENAME(unum_applyPattern) +#define unum_clone U_ICU_ENTRY_POINT_RENAME(unum_clone) +#define unum_close U_ICU_ENTRY_POINT_RENAME(unum_close) +#define unum_countAvailable U_ICU_ENTRY_POINT_RENAME(unum_countAvailable) +#define unum_format U_ICU_ENTRY_POINT_RENAME(unum_format) +#define unum_formatDecimal U_ICU_ENTRY_POINT_RENAME(unum_formatDecimal) +#define unum_formatDouble U_ICU_ENTRY_POINT_RENAME(unum_formatDouble) +#define unum_formatDoubleCurrency U_ICU_ENTRY_POINT_RENAME(unum_formatDoubleCurrency) +#define unum_formatDoubleForFields U_ICU_ENTRY_POINT_RENAME(unum_formatDoubleForFields) +#define unum_formatInt64 U_ICU_ENTRY_POINT_RENAME(unum_formatInt64) +#define unum_formatUFormattable U_ICU_ENTRY_POINT_RENAME(unum_formatUFormattable) +#define unum_getAttribute U_ICU_ENTRY_POINT_RENAME(unum_getAttribute) +#define unum_getAvailable U_ICU_ENTRY_POINT_RENAME(unum_getAvailable) +#define unum_getContext U_ICU_ENTRY_POINT_RENAME(unum_getContext) +#define unum_getDoubleAttribute U_ICU_ENTRY_POINT_RENAME(unum_getDoubleAttribute) +#define unum_getLocaleByType U_ICU_ENTRY_POINT_RENAME(unum_getLocaleByType) +#define unum_getSymbol U_ICU_ENTRY_POINT_RENAME(unum_getSymbol) +#define unum_getTextAttribute U_ICU_ENTRY_POINT_RENAME(unum_getTextAttribute) +#define unum_open U_ICU_ENTRY_POINT_RENAME(unum_open) +#define unum_parse U_ICU_ENTRY_POINT_RENAME(unum_parse) +#define unum_parseDecimal U_ICU_ENTRY_POINT_RENAME(unum_parseDecimal) +#define unum_parseDouble U_ICU_ENTRY_POINT_RENAME(unum_parseDouble) +#define unum_parseDoubleCurrency U_ICU_ENTRY_POINT_RENAME(unum_parseDoubleCurrency) +#define unum_parseInt64 U_ICU_ENTRY_POINT_RENAME(unum_parseInt64) +#define unum_parseToUFormattable U_ICU_ENTRY_POINT_RENAME(unum_parseToUFormattable) +#define unum_setAttribute U_ICU_ENTRY_POINT_RENAME(unum_setAttribute) +#define unum_setContext U_ICU_ENTRY_POINT_RENAME(unum_setContext) +#define unum_setDoubleAttribute U_ICU_ENTRY_POINT_RENAME(unum_setDoubleAttribute) +#define unum_setSymbol U_ICU_ENTRY_POINT_RENAME(unum_setSymbol) +#define unum_setTextAttribute U_ICU_ENTRY_POINT_RENAME(unum_setTextAttribute) +#define unum_toPattern U_ICU_ENTRY_POINT_RENAME(unum_toPattern) +#define unumsys_close U_ICU_ENTRY_POINT_RENAME(unumsys_close) +#define unumsys_getDescription U_ICU_ENTRY_POINT_RENAME(unumsys_getDescription) +#define unumsys_getName U_ICU_ENTRY_POINT_RENAME(unumsys_getName) +#define unumsys_getRadix U_ICU_ENTRY_POINT_RENAME(unumsys_getRadix) +#define unumsys_isAlgorithmic U_ICU_ENTRY_POINT_RENAME(unumsys_isAlgorithmic) +#define unumsys_open U_ICU_ENTRY_POINT_RENAME(unumsys_open) +#define unumsys_openAvailableNames U_ICU_ENTRY_POINT_RENAME(unumsys_openAvailableNames) +#define unumsys_openByName U_ICU_ENTRY_POINT_RENAME(unumsys_openByName) +#define uplrules_close U_ICU_ENTRY_POINT_RENAME(uplrules_close) +#define uplrules_getKeywords U_ICU_ENTRY_POINT_RENAME(uplrules_getKeywords) +#define uplrules_open U_ICU_ENTRY_POINT_RENAME(uplrules_open) +#define uplrules_openForType U_ICU_ENTRY_POINT_RENAME(uplrules_openForType) +#define uplrules_select U_ICU_ENTRY_POINT_RENAME(uplrules_select) +#define uplrules_selectWithFormat U_ICU_ENTRY_POINT_RENAME(uplrules_selectWithFormat) +#define uplug_closeLibrary U_ICU_ENTRY_POINT_RENAME(uplug_closeLibrary) +#define uplug_findLibrary U_ICU_ENTRY_POINT_RENAME(uplug_findLibrary) +#define uplug_getConfiguration U_ICU_ENTRY_POINT_RENAME(uplug_getConfiguration) +#define uplug_getContext U_ICU_ENTRY_POINT_RENAME(uplug_getContext) +#define uplug_getCurrentLevel U_ICU_ENTRY_POINT_RENAME(uplug_getCurrentLevel) +#define uplug_getLibrary U_ICU_ENTRY_POINT_RENAME(uplug_getLibrary) +#define uplug_getLibraryName U_ICU_ENTRY_POINT_RENAME(uplug_getLibraryName) +#define uplug_getPlugInternal U_ICU_ENTRY_POINT_RENAME(uplug_getPlugInternal) +#define uplug_getPlugLevel U_ICU_ENTRY_POINT_RENAME(uplug_getPlugLevel) +#define uplug_getPlugLoadStatus U_ICU_ENTRY_POINT_RENAME(uplug_getPlugLoadStatus) +#define uplug_getPlugName U_ICU_ENTRY_POINT_RENAME(uplug_getPlugName) +#define uplug_getPluginFile U_ICU_ENTRY_POINT_RENAME(uplug_getPluginFile) +#define uplug_getSymbolName U_ICU_ENTRY_POINT_RENAME(uplug_getSymbolName) +#define uplug_init U_ICU_ENTRY_POINT_RENAME(uplug_init) +#define uplug_loadPlugFromEntrypoint U_ICU_ENTRY_POINT_RENAME(uplug_loadPlugFromEntrypoint) +#define uplug_loadPlugFromLibrary U_ICU_ENTRY_POINT_RENAME(uplug_loadPlugFromLibrary) +#define uplug_nextPlug U_ICU_ENTRY_POINT_RENAME(uplug_nextPlug) +#define uplug_openLibrary U_ICU_ENTRY_POINT_RENAME(uplug_openLibrary) +#define uplug_removePlug U_ICU_ENTRY_POINT_RENAME(uplug_removePlug) +#define uplug_setContext U_ICU_ENTRY_POINT_RENAME(uplug_setContext) +#define uplug_setPlugLevel U_ICU_ENTRY_POINT_RENAME(uplug_setPlugLevel) +#define uplug_setPlugName U_ICU_ENTRY_POINT_RENAME(uplug_setPlugName) +#define uplug_setPlugNoUnload U_ICU_ENTRY_POINT_RENAME(uplug_setPlugNoUnload) +#define uprops_getSource U_ICU_ENTRY_POINT_RENAME(uprops_getSource) +#define upropsvec_addPropertyStarts U_ICU_ENTRY_POINT_RENAME(upropsvec_addPropertyStarts) +#define uprv_aestrncpy U_ICU_ENTRY_POINT_RENAME(uprv_aestrncpy) +#define uprv_asciiFromEbcdic U_ICU_ENTRY_POINT_RENAME(uprv_asciiFromEbcdic) +#define uprv_asciitolower U_ICU_ENTRY_POINT_RENAME(uprv_asciitolower) +#define uprv_calloc U_ICU_ENTRY_POINT_RENAME(uprv_calloc) +#define uprv_ceil U_ICU_ENTRY_POINT_RENAME(uprv_ceil) +#define uprv_compareASCIIPropertyNames U_ICU_ENTRY_POINT_RENAME(uprv_compareASCIIPropertyNames) +#define uprv_compareEBCDICPropertyNames U_ICU_ENTRY_POINT_RENAME(uprv_compareEBCDICPropertyNames) +#define uprv_compareInvAscii U_ICU_ENTRY_POINT_RENAME(uprv_compareInvAscii) +#define uprv_compareInvEbcdic U_ICU_ENTRY_POINT_RENAME(uprv_compareInvEbcdic) +#define uprv_compareInvEbcdicAsAscii U_ICU_ENTRY_POINT_RENAME(uprv_compareInvEbcdicAsAscii) +#define uprv_convertToLCID U_ICU_ENTRY_POINT_RENAME(uprv_convertToLCID) +#define uprv_convertToLCIDPlatform U_ICU_ENTRY_POINT_RENAME(uprv_convertToLCIDPlatform) +#define uprv_convertToPosix U_ICU_ENTRY_POINT_RENAME(uprv_convertToPosix) +#define uprv_copyAscii U_ICU_ENTRY_POINT_RENAME(uprv_copyAscii) +#define uprv_copyEbcdic U_ICU_ENTRY_POINT_RENAME(uprv_copyEbcdic) +#define uprv_decContextClearStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextClearStatus) +#define uprv_decContextDefault U_ICU_ENTRY_POINT_RENAME(uprv_decContextDefault) +#define uprv_decContextGetRounding U_ICU_ENTRY_POINT_RENAME(uprv_decContextGetRounding) +#define uprv_decContextGetStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextGetStatus) +#define uprv_decContextRestoreStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextRestoreStatus) +#define uprv_decContextSaveStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextSaveStatus) +#define uprv_decContextSetRounding U_ICU_ENTRY_POINT_RENAME(uprv_decContextSetRounding) +#define uprv_decContextSetStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextSetStatus) +#define uprv_decContextSetStatusFromString U_ICU_ENTRY_POINT_RENAME(uprv_decContextSetStatusFromString) +#define uprv_decContextSetStatusFromStringQuiet U_ICU_ENTRY_POINT_RENAME(uprv_decContextSetStatusFromStringQuiet) +#define uprv_decContextSetStatusQuiet U_ICU_ENTRY_POINT_RENAME(uprv_decContextSetStatusQuiet) +#define uprv_decContextStatusToString U_ICU_ENTRY_POINT_RENAME(uprv_decContextStatusToString) +#define uprv_decContextTestSavedStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextTestSavedStatus) +#define uprv_decContextTestStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextTestStatus) +#define uprv_decContextZeroStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextZeroStatus) +#define uprv_decNumberAbs U_ICU_ENTRY_POINT_RENAME(uprv_decNumberAbs) +#define uprv_decNumberAdd U_ICU_ENTRY_POINT_RENAME(uprv_decNumberAdd) +#define uprv_decNumberAnd U_ICU_ENTRY_POINT_RENAME(uprv_decNumberAnd) +#define uprv_decNumberClass U_ICU_ENTRY_POINT_RENAME(uprv_decNumberClass) +#define uprv_decNumberClassToString U_ICU_ENTRY_POINT_RENAME(uprv_decNumberClassToString) +#define uprv_decNumberCompare U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCompare) +#define uprv_decNumberCompareSignal U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCompareSignal) +#define uprv_decNumberCompareTotal U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCompareTotal) +#define uprv_decNumberCompareTotalMag U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCompareTotalMag) +#define uprv_decNumberCopy U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCopy) +#define uprv_decNumberCopyAbs U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCopyAbs) +#define uprv_decNumberCopyNegate U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCopyNegate) +#define uprv_decNumberCopySign U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCopySign) +#define uprv_decNumberDivide U_ICU_ENTRY_POINT_RENAME(uprv_decNumberDivide) +#define uprv_decNumberDivideInteger U_ICU_ENTRY_POINT_RENAME(uprv_decNumberDivideInteger) +#define uprv_decNumberExp U_ICU_ENTRY_POINT_RENAME(uprv_decNumberExp) +#define uprv_decNumberFMA U_ICU_ENTRY_POINT_RENAME(uprv_decNumberFMA) +#define uprv_decNumberFromInt32 U_ICU_ENTRY_POINT_RENAME(uprv_decNumberFromInt32) +#define uprv_decNumberFromString U_ICU_ENTRY_POINT_RENAME(uprv_decNumberFromString) +#define uprv_decNumberFromUInt32 U_ICU_ENTRY_POINT_RENAME(uprv_decNumberFromUInt32) +#define uprv_decNumberGetBCD U_ICU_ENTRY_POINT_RENAME(uprv_decNumberGetBCD) +#define uprv_decNumberInvert U_ICU_ENTRY_POINT_RENAME(uprv_decNumberInvert) +#define uprv_decNumberIsNormal U_ICU_ENTRY_POINT_RENAME(uprv_decNumberIsNormal) +#define uprv_decNumberIsSubnormal U_ICU_ENTRY_POINT_RENAME(uprv_decNumberIsSubnormal) +#define uprv_decNumberLn U_ICU_ENTRY_POINT_RENAME(uprv_decNumberLn) +#define uprv_decNumberLog10 U_ICU_ENTRY_POINT_RENAME(uprv_decNumberLog10) +#define uprv_decNumberLogB U_ICU_ENTRY_POINT_RENAME(uprv_decNumberLogB) +#define uprv_decNumberMax U_ICU_ENTRY_POINT_RENAME(uprv_decNumberMax) +#define uprv_decNumberMaxMag U_ICU_ENTRY_POINT_RENAME(uprv_decNumberMaxMag) +#define uprv_decNumberMin U_ICU_ENTRY_POINT_RENAME(uprv_decNumberMin) +#define uprv_decNumberMinMag U_ICU_ENTRY_POINT_RENAME(uprv_decNumberMinMag) +#define uprv_decNumberMinus U_ICU_ENTRY_POINT_RENAME(uprv_decNumberMinus) +#define uprv_decNumberMultiply U_ICU_ENTRY_POINT_RENAME(uprv_decNumberMultiply) +#define uprv_decNumberNextMinus U_ICU_ENTRY_POINT_RENAME(uprv_decNumberNextMinus) +#define uprv_decNumberNextPlus U_ICU_ENTRY_POINT_RENAME(uprv_decNumberNextPlus) +#define uprv_decNumberNextToward U_ICU_ENTRY_POINT_RENAME(uprv_decNumberNextToward) +#define uprv_decNumberNormalize U_ICU_ENTRY_POINT_RENAME(uprv_decNumberNormalize) +#define uprv_decNumberOr U_ICU_ENTRY_POINT_RENAME(uprv_decNumberOr) +#define uprv_decNumberPlus U_ICU_ENTRY_POINT_RENAME(uprv_decNumberPlus) +#define uprv_decNumberPower U_ICU_ENTRY_POINT_RENAME(uprv_decNumberPower) +#define uprv_decNumberQuantize U_ICU_ENTRY_POINT_RENAME(uprv_decNumberQuantize) +#define uprv_decNumberReduce U_ICU_ENTRY_POINT_RENAME(uprv_decNumberReduce) +#define uprv_decNumberRemainder U_ICU_ENTRY_POINT_RENAME(uprv_decNumberRemainder) +#define uprv_decNumberRemainderNear U_ICU_ENTRY_POINT_RENAME(uprv_decNumberRemainderNear) +#define uprv_decNumberRescale U_ICU_ENTRY_POINT_RENAME(uprv_decNumberRescale) +#define uprv_decNumberRotate U_ICU_ENTRY_POINT_RENAME(uprv_decNumberRotate) +#define uprv_decNumberSameQuantum U_ICU_ENTRY_POINT_RENAME(uprv_decNumberSameQuantum) +#define uprv_decNumberScaleB U_ICU_ENTRY_POINT_RENAME(uprv_decNumberScaleB) +#define uprv_decNumberSetBCD U_ICU_ENTRY_POINT_RENAME(uprv_decNumberSetBCD) +#define uprv_decNumberShift U_ICU_ENTRY_POINT_RENAME(uprv_decNumberShift) +#define uprv_decNumberSquareRoot U_ICU_ENTRY_POINT_RENAME(uprv_decNumberSquareRoot) +#define uprv_decNumberSubtract U_ICU_ENTRY_POINT_RENAME(uprv_decNumberSubtract) +#define uprv_decNumberToEngString U_ICU_ENTRY_POINT_RENAME(uprv_decNumberToEngString) +#define uprv_decNumberToInt32 U_ICU_ENTRY_POINT_RENAME(uprv_decNumberToInt32) +#define uprv_decNumberToIntegralExact U_ICU_ENTRY_POINT_RENAME(uprv_decNumberToIntegralExact) +#define uprv_decNumberToIntegralValue U_ICU_ENTRY_POINT_RENAME(uprv_decNumberToIntegralValue) +#define uprv_decNumberToString U_ICU_ENTRY_POINT_RENAME(uprv_decNumberToString) +#define uprv_decNumberToUInt32 U_ICU_ENTRY_POINT_RENAME(uprv_decNumberToUInt32) +#define uprv_decNumberTrim U_ICU_ENTRY_POINT_RENAME(uprv_decNumberTrim) +#define uprv_decNumberVersion U_ICU_ENTRY_POINT_RENAME(uprv_decNumberVersion) +#define uprv_decNumberXor U_ICU_ENTRY_POINT_RENAME(uprv_decNumberXor) +#define uprv_decNumberZero U_ICU_ENTRY_POINT_RENAME(uprv_decNumberZero) +#define uprv_deleteConditionalCE32 U_ICU_ENTRY_POINT_RENAME(uprv_deleteConditionalCE32) +#define uprv_deleteUObject U_ICU_ENTRY_POINT_RENAME(uprv_deleteUObject) +#define uprv_dl_close U_ICU_ENTRY_POINT_RENAME(uprv_dl_close) +#define uprv_dl_open U_ICU_ENTRY_POINT_RENAME(uprv_dl_open) +#define uprv_dlsym_func U_ICU_ENTRY_POINT_RENAME(uprv_dlsym_func) +#define uprv_eastrncpy U_ICU_ENTRY_POINT_RENAME(uprv_eastrncpy) +#define uprv_ebcdicFromAscii U_ICU_ENTRY_POINT_RENAME(uprv_ebcdicFromAscii) +#define uprv_ebcdicToLowercaseAscii U_ICU_ENTRY_POINT_RENAME(uprv_ebcdicToLowercaseAscii) +#define uprv_ebcdictolower U_ICU_ENTRY_POINT_RENAME(uprv_ebcdictolower) +#define uprv_fabs U_ICU_ENTRY_POINT_RENAME(uprv_fabs) +#define uprv_floor U_ICU_ENTRY_POINT_RENAME(uprv_floor) +#define uprv_fmax U_ICU_ENTRY_POINT_RENAME(uprv_fmax) +#define uprv_fmin U_ICU_ENTRY_POINT_RENAME(uprv_fmin) +#define uprv_fmod U_ICU_ENTRY_POINT_RENAME(uprv_fmod) +#define uprv_free U_ICU_ENTRY_POINT_RENAME(uprv_free) +#define uprv_getCharNameCharacters U_ICU_ENTRY_POINT_RENAME(uprv_getCharNameCharacters) +#define uprv_getDefaultCodepage U_ICU_ENTRY_POINT_RENAME(uprv_getDefaultCodepage) +#define uprv_getDefaultLocaleID U_ICU_ENTRY_POINT_RENAME(uprv_getDefaultLocaleID) +#define uprv_getInfinity U_ICU_ENTRY_POINT_RENAME(uprv_getInfinity) +#define uprv_getMaxCharNameLength U_ICU_ENTRY_POINT_RENAME(uprv_getMaxCharNameLength) +#define uprv_getMaxValues U_ICU_ENTRY_POINT_RENAME(uprv_getMaxValues) +#define uprv_getNaN U_ICU_ENTRY_POINT_RENAME(uprv_getNaN) +#define uprv_getRawUTCtime U_ICU_ENTRY_POINT_RENAME(uprv_getRawUTCtime) +#define uprv_getStaticCurrencyName U_ICU_ENTRY_POINT_RENAME(uprv_getStaticCurrencyName) +#define uprv_getUTCtime U_ICU_ENTRY_POINT_RENAME(uprv_getUTCtime) +#define uprv_int32Comparator U_ICU_ENTRY_POINT_RENAME(uprv_int32Comparator) +#define uprv_isASCIILetter U_ICU_ENTRY_POINT_RENAME(uprv_isASCIILetter) +#define uprv_isInfinite U_ICU_ENTRY_POINT_RENAME(uprv_isInfinite) +#define uprv_isInvariantString U_ICU_ENTRY_POINT_RENAME(uprv_isInvariantString) +#define uprv_isInvariantUString U_ICU_ENTRY_POINT_RENAME(uprv_isInvariantUString) +#define uprv_isNaN U_ICU_ENTRY_POINT_RENAME(uprv_isNaN) +#define uprv_isNegativeInfinity U_ICU_ENTRY_POINT_RENAME(uprv_isNegativeInfinity) +#define uprv_isPositiveInfinity U_ICU_ENTRY_POINT_RENAME(uprv_isPositiveInfinity) +#define uprv_itou U_ICU_ENTRY_POINT_RENAME(uprv_itou) +#define uprv_log U_ICU_ENTRY_POINT_RENAME(uprv_log) +#define uprv_malloc U_ICU_ENTRY_POINT_RENAME(uprv_malloc) +#define uprv_mapFile U_ICU_ENTRY_POINT_RENAME(uprv_mapFile) +#define uprv_max U_ICU_ENTRY_POINT_RENAME(uprv_max) +#define uprv_maxMantissa U_ICU_ENTRY_POINT_RENAME(uprv_maxMantissa) +#define uprv_maximumPtr U_ICU_ENTRY_POINT_RENAME(uprv_maximumPtr) +#define uprv_min U_ICU_ENTRY_POINT_RENAME(uprv_min) +#define uprv_modf U_ICU_ENTRY_POINT_RENAME(uprv_modf) +#define uprv_parseCurrency U_ICU_ENTRY_POINT_RENAME(uprv_parseCurrency) +#define uprv_pathIsAbsolute U_ICU_ENTRY_POINT_RENAME(uprv_pathIsAbsolute) +#define uprv_pow U_ICU_ENTRY_POINT_RENAME(uprv_pow) +#define uprv_pow10 U_ICU_ENTRY_POINT_RENAME(uprv_pow10) +#define uprv_realloc U_ICU_ENTRY_POINT_RENAME(uprv_realloc) +#define uprv_round U_ICU_ENTRY_POINT_RENAME(uprv_round) +#define uprv_sortArray U_ICU_ENTRY_POINT_RENAME(uprv_sortArray) +#define uprv_stableBinarySearch U_ICU_ENTRY_POINT_RENAME(uprv_stableBinarySearch) +#define uprv_strCompare U_ICU_ENTRY_POINT_RENAME(uprv_strCompare) +#define uprv_strdup U_ICU_ENTRY_POINT_RENAME(uprv_strdup) +#define uprv_stricmp U_ICU_ENTRY_POINT_RENAME(uprv_stricmp) +#define uprv_strndup U_ICU_ENTRY_POINT_RENAME(uprv_strndup) +#define uprv_strnicmp U_ICU_ENTRY_POINT_RENAME(uprv_strnicmp) +#define uprv_syntaxError U_ICU_ENTRY_POINT_RENAME(uprv_syntaxError) +#define uprv_timezone U_ICU_ENTRY_POINT_RENAME(uprv_timezone) +#define uprv_toupper U_ICU_ENTRY_POINT_RENAME(uprv_toupper) +#define uprv_trunc U_ICU_ENTRY_POINT_RENAME(uprv_trunc) +#define uprv_tzname U_ICU_ENTRY_POINT_RENAME(uprv_tzname) +#define uprv_tzname_clear_cache U_ICU_ENTRY_POINT_RENAME(uprv_tzname_clear_cache) +#define uprv_tzset U_ICU_ENTRY_POINT_RENAME(uprv_tzset) +#define uprv_uint16Comparator U_ICU_ENTRY_POINT_RENAME(uprv_uint16Comparator) +#define uprv_uint32Comparator U_ICU_ENTRY_POINT_RENAME(uprv_uint32Comparator) +#define uprv_unmapFile U_ICU_ENTRY_POINT_RENAME(uprv_unmapFile) +#define upvec_cloneArray U_ICU_ENTRY_POINT_RENAME(upvec_cloneArray) +#define upvec_close U_ICU_ENTRY_POINT_RENAME(upvec_close) +#define upvec_compact U_ICU_ENTRY_POINT_RENAME(upvec_compact) +#define upvec_compactToUTrie2Handler U_ICU_ENTRY_POINT_RENAME(upvec_compactToUTrie2Handler) +#define upvec_compactToUTrie2WithRowIndexes U_ICU_ENTRY_POINT_RENAME(upvec_compactToUTrie2WithRowIndexes) +#define upvec_getArray U_ICU_ENTRY_POINT_RENAME(upvec_getArray) +#define upvec_getRow U_ICU_ENTRY_POINT_RENAME(upvec_getRow) +#define upvec_getValue U_ICU_ENTRY_POINT_RENAME(upvec_getValue) +#define upvec_open U_ICU_ENTRY_POINT_RENAME(upvec_open) +#define upvec_setValue U_ICU_ENTRY_POINT_RENAME(upvec_setValue) +#define uregex_appendReplacement U_ICU_ENTRY_POINT_RENAME(uregex_appendReplacement) +#define uregex_appendReplacementUText U_ICU_ENTRY_POINT_RENAME(uregex_appendReplacementUText) +#define uregex_appendTail U_ICU_ENTRY_POINT_RENAME(uregex_appendTail) +#define uregex_appendTailUText U_ICU_ENTRY_POINT_RENAME(uregex_appendTailUText) +#define uregex_clone U_ICU_ENTRY_POINT_RENAME(uregex_clone) +#define uregex_close U_ICU_ENTRY_POINT_RENAME(uregex_close) +#define uregex_end U_ICU_ENTRY_POINT_RENAME(uregex_end) +#define uregex_end64 U_ICU_ENTRY_POINT_RENAME(uregex_end64) +#define uregex_find U_ICU_ENTRY_POINT_RENAME(uregex_find) +#define uregex_find64 U_ICU_ENTRY_POINT_RENAME(uregex_find64) +#define uregex_findNext U_ICU_ENTRY_POINT_RENAME(uregex_findNext) +#define uregex_flags U_ICU_ENTRY_POINT_RENAME(uregex_flags) +#define uregex_getFindProgressCallback U_ICU_ENTRY_POINT_RENAME(uregex_getFindProgressCallback) +#define uregex_getMatchCallback U_ICU_ENTRY_POINT_RENAME(uregex_getMatchCallback) +#define uregex_getStackLimit U_ICU_ENTRY_POINT_RENAME(uregex_getStackLimit) +#define uregex_getText U_ICU_ENTRY_POINT_RENAME(uregex_getText) +#define uregex_getTimeLimit U_ICU_ENTRY_POINT_RENAME(uregex_getTimeLimit) +#define uregex_getUText U_ICU_ENTRY_POINT_RENAME(uregex_getUText) +#define uregex_group U_ICU_ENTRY_POINT_RENAME(uregex_group) +#define uregex_groupCount U_ICU_ENTRY_POINT_RENAME(uregex_groupCount) +#define uregex_groupNumberFromCName U_ICU_ENTRY_POINT_RENAME(uregex_groupNumberFromCName) +#define uregex_groupNumberFromName U_ICU_ENTRY_POINT_RENAME(uregex_groupNumberFromName) +#define uregex_groupUText U_ICU_ENTRY_POINT_RENAME(uregex_groupUText) +#define uregex_hasAnchoringBounds U_ICU_ENTRY_POINT_RENAME(uregex_hasAnchoringBounds) +#define uregex_hasTransparentBounds U_ICU_ENTRY_POINT_RENAME(uregex_hasTransparentBounds) +#define uregex_hitEnd U_ICU_ENTRY_POINT_RENAME(uregex_hitEnd) +#define uregex_lookingAt U_ICU_ENTRY_POINT_RENAME(uregex_lookingAt) +#define uregex_lookingAt64 U_ICU_ENTRY_POINT_RENAME(uregex_lookingAt64) +#define uregex_matches U_ICU_ENTRY_POINT_RENAME(uregex_matches) +#define uregex_matches64 U_ICU_ENTRY_POINT_RENAME(uregex_matches64) +#define uregex_open U_ICU_ENTRY_POINT_RENAME(uregex_open) +#define uregex_openC U_ICU_ENTRY_POINT_RENAME(uregex_openC) +#define uregex_openUText U_ICU_ENTRY_POINT_RENAME(uregex_openUText) +#define uregex_pattern U_ICU_ENTRY_POINT_RENAME(uregex_pattern) +#define uregex_patternUText U_ICU_ENTRY_POINT_RENAME(uregex_patternUText) +#define uregex_refreshUText U_ICU_ENTRY_POINT_RENAME(uregex_refreshUText) +#define uregex_regionEnd U_ICU_ENTRY_POINT_RENAME(uregex_regionEnd) +#define uregex_regionEnd64 U_ICU_ENTRY_POINT_RENAME(uregex_regionEnd64) +#define uregex_regionStart U_ICU_ENTRY_POINT_RENAME(uregex_regionStart) +#define uregex_regionStart64 U_ICU_ENTRY_POINT_RENAME(uregex_regionStart64) +#define uregex_replaceAll U_ICU_ENTRY_POINT_RENAME(uregex_replaceAll) +#define uregex_replaceAllUText U_ICU_ENTRY_POINT_RENAME(uregex_replaceAllUText) +#define uregex_replaceFirst U_ICU_ENTRY_POINT_RENAME(uregex_replaceFirst) +#define uregex_replaceFirstUText U_ICU_ENTRY_POINT_RENAME(uregex_replaceFirstUText) +#define uregex_requireEnd U_ICU_ENTRY_POINT_RENAME(uregex_requireEnd) +#define uregex_reset U_ICU_ENTRY_POINT_RENAME(uregex_reset) +#define uregex_reset64 U_ICU_ENTRY_POINT_RENAME(uregex_reset64) +#define uregex_setFindProgressCallback U_ICU_ENTRY_POINT_RENAME(uregex_setFindProgressCallback) +#define uregex_setMatchCallback U_ICU_ENTRY_POINT_RENAME(uregex_setMatchCallback) +#define uregex_setRegion U_ICU_ENTRY_POINT_RENAME(uregex_setRegion) +#define uregex_setRegion64 U_ICU_ENTRY_POINT_RENAME(uregex_setRegion64) +#define uregex_setRegionAndStart U_ICU_ENTRY_POINT_RENAME(uregex_setRegionAndStart) +#define uregex_setStackLimit U_ICU_ENTRY_POINT_RENAME(uregex_setStackLimit) +#define uregex_setText U_ICU_ENTRY_POINT_RENAME(uregex_setText) +#define uregex_setTimeLimit U_ICU_ENTRY_POINT_RENAME(uregex_setTimeLimit) +#define uregex_setUText U_ICU_ENTRY_POINT_RENAME(uregex_setUText) +#define uregex_split U_ICU_ENTRY_POINT_RENAME(uregex_split) +#define uregex_splitUText U_ICU_ENTRY_POINT_RENAME(uregex_splitUText) +#define uregex_start U_ICU_ENTRY_POINT_RENAME(uregex_start) +#define uregex_start64 U_ICU_ENTRY_POINT_RENAME(uregex_start64) +#define uregex_ucstr_unescape_charAt U_ICU_ENTRY_POINT_RENAME(uregex_ucstr_unescape_charAt) +#define uregex_useAnchoringBounds U_ICU_ENTRY_POINT_RENAME(uregex_useAnchoringBounds) +#define uregex_useTransparentBounds U_ICU_ENTRY_POINT_RENAME(uregex_useTransparentBounds) +#define uregex_utext_unescape_charAt U_ICU_ENTRY_POINT_RENAME(uregex_utext_unescape_charAt) +#define uregion_areEqual U_ICU_ENTRY_POINT_RENAME(uregion_areEqual) +#define uregion_contains U_ICU_ENTRY_POINT_RENAME(uregion_contains) +#define uregion_getAvailable U_ICU_ENTRY_POINT_RENAME(uregion_getAvailable) +#define uregion_getContainedRegions U_ICU_ENTRY_POINT_RENAME(uregion_getContainedRegions) +#define uregion_getContainedRegionsOfType U_ICU_ENTRY_POINT_RENAME(uregion_getContainedRegionsOfType) +#define uregion_getContainingRegion U_ICU_ENTRY_POINT_RENAME(uregion_getContainingRegion) +#define uregion_getContainingRegionOfType U_ICU_ENTRY_POINT_RENAME(uregion_getContainingRegionOfType) +#define uregion_getNumericCode U_ICU_ENTRY_POINT_RENAME(uregion_getNumericCode) +#define uregion_getPreferredValues U_ICU_ENTRY_POINT_RENAME(uregion_getPreferredValues) +#define uregion_getRegionCode U_ICU_ENTRY_POINT_RENAME(uregion_getRegionCode) +#define uregion_getRegionFromCode U_ICU_ENTRY_POINT_RENAME(uregion_getRegionFromCode) +#define uregion_getRegionFromNumericCode U_ICU_ENTRY_POINT_RENAME(uregion_getRegionFromNumericCode) +#define uregion_getType U_ICU_ENTRY_POINT_RENAME(uregion_getType) +#define ureldatefmt_close U_ICU_ENTRY_POINT_RENAME(ureldatefmt_close) +#define ureldatefmt_combineDateAndTime U_ICU_ENTRY_POINT_RENAME(ureldatefmt_combineDateAndTime) +#define ureldatefmt_format U_ICU_ENTRY_POINT_RENAME(ureldatefmt_format) +#define ureldatefmt_formatNumeric U_ICU_ENTRY_POINT_RENAME(ureldatefmt_formatNumeric) +#define ureldatefmt_open U_ICU_ENTRY_POINT_RENAME(ureldatefmt_open) +#define ures_close U_ICU_ENTRY_POINT_RENAME(ures_close) +#define ures_copyResb U_ICU_ENTRY_POINT_RENAME(ures_copyResb) +#define ures_countArrayItems U_ICU_ENTRY_POINT_RENAME(ures_countArrayItems) +#define ures_findResource U_ICU_ENTRY_POINT_RENAME(ures_findResource) +#define ures_findSubResource U_ICU_ENTRY_POINT_RENAME(ures_findSubResource) +#define ures_getAllItemsWithFallback U_ICU_ENTRY_POINT_RENAME(ures_getAllItemsWithFallback) +#define ures_getBinary U_ICU_ENTRY_POINT_RENAME(ures_getBinary) +#define ures_getByIndex U_ICU_ENTRY_POINT_RENAME(ures_getByIndex) +#define ures_getByKey U_ICU_ENTRY_POINT_RENAME(ures_getByKey) +#define ures_getByKeyWithFallback U_ICU_ENTRY_POINT_RENAME(ures_getByKeyWithFallback) +#define ures_getFunctionalEquivalent U_ICU_ENTRY_POINT_RENAME(ures_getFunctionalEquivalent) +#define ures_getInt U_ICU_ENTRY_POINT_RENAME(ures_getInt) +#define ures_getIntVector U_ICU_ENTRY_POINT_RENAME(ures_getIntVector) +#define ures_getKey U_ICU_ENTRY_POINT_RENAME(ures_getKey) +#define ures_getKeywordValues U_ICU_ENTRY_POINT_RENAME(ures_getKeywordValues) +#define ures_getLocale U_ICU_ENTRY_POINT_RENAME(ures_getLocale) +#define ures_getLocaleByType U_ICU_ENTRY_POINT_RENAME(ures_getLocaleByType) +#define ures_getLocaleInternal U_ICU_ENTRY_POINT_RENAME(ures_getLocaleInternal) +#define ures_getName U_ICU_ENTRY_POINT_RENAME(ures_getName) +#define ures_getNextResource U_ICU_ENTRY_POINT_RENAME(ures_getNextResource) +#define ures_getNextString U_ICU_ENTRY_POINT_RENAME(ures_getNextString) +#define ures_getSize U_ICU_ENTRY_POINT_RENAME(ures_getSize) +#define ures_getString U_ICU_ENTRY_POINT_RENAME(ures_getString) +#define ures_getStringByIndex U_ICU_ENTRY_POINT_RENAME(ures_getStringByIndex) +#define ures_getStringByKey U_ICU_ENTRY_POINT_RENAME(ures_getStringByKey) +#define ures_getStringByKeyWithFallback U_ICU_ENTRY_POINT_RENAME(ures_getStringByKeyWithFallback) +#define ures_getType U_ICU_ENTRY_POINT_RENAME(ures_getType) +#define ures_getUInt U_ICU_ENTRY_POINT_RENAME(ures_getUInt) +#define ures_getUTF8String U_ICU_ENTRY_POINT_RENAME(ures_getUTF8String) +#define ures_getUTF8StringByIndex U_ICU_ENTRY_POINT_RENAME(ures_getUTF8StringByIndex) +#define ures_getUTF8StringByKey U_ICU_ENTRY_POINT_RENAME(ures_getUTF8StringByKey) +#define ures_getVersion U_ICU_ENTRY_POINT_RENAME(ures_getVersion) +#define ures_getVersionByKey U_ICU_ENTRY_POINT_RENAME(ures_getVersionByKey) +#define ures_getVersionNumber U_ICU_ENTRY_POINT_RENAME(ures_getVersionNumber) +#define ures_getVersionNumberInternal U_ICU_ENTRY_POINT_RENAME(ures_getVersionNumberInternal) +#define ures_hasNext U_ICU_ENTRY_POINT_RENAME(ures_hasNext) +#define ures_initStackObject U_ICU_ENTRY_POINT_RENAME(ures_initStackObject) +#define ures_open U_ICU_ENTRY_POINT_RENAME(ures_open) +#define ures_openAvailableLocales U_ICU_ENTRY_POINT_RENAME(ures_openAvailableLocales) +#define ures_openDirect U_ICU_ENTRY_POINT_RENAME(ures_openDirect) +#define ures_openFillIn U_ICU_ENTRY_POINT_RENAME(ures_openFillIn) +#define ures_openNoDefault U_ICU_ENTRY_POINT_RENAME(ures_openNoDefault) +#define ures_openU U_ICU_ENTRY_POINT_RENAME(ures_openU) +#define ures_resetIterator U_ICU_ENTRY_POINT_RENAME(ures_resetIterator) +#define ures_swap U_ICU_ENTRY_POINT_RENAME(ures_swap) +#define uscript_breaksBetweenLetters U_ICU_ENTRY_POINT_RENAME(uscript_breaksBetweenLetters) +#define uscript_closeRun U_ICU_ENTRY_POINT_RENAME(uscript_closeRun) +#define uscript_getCode U_ICU_ENTRY_POINT_RENAME(uscript_getCode) +#define uscript_getName U_ICU_ENTRY_POINT_RENAME(uscript_getName) +#define uscript_getSampleString U_ICU_ENTRY_POINT_RENAME(uscript_getSampleString) +#define uscript_getSampleUnicodeString U_ICU_ENTRY_POINT_RENAME(uscript_getSampleUnicodeString) +#define uscript_getScript U_ICU_ENTRY_POINT_RENAME(uscript_getScript) +#define uscript_getScriptExtensions U_ICU_ENTRY_POINT_RENAME(uscript_getScriptExtensions) +#define uscript_getShortName U_ICU_ENTRY_POINT_RENAME(uscript_getShortName) +#define uscript_getUsage U_ICU_ENTRY_POINT_RENAME(uscript_getUsage) +#define uscript_hasScript U_ICU_ENTRY_POINT_RENAME(uscript_hasScript) +#define uscript_isCased U_ICU_ENTRY_POINT_RENAME(uscript_isCased) +#define uscript_isRightToLeft U_ICU_ENTRY_POINT_RENAME(uscript_isRightToLeft) +#define uscript_nextRun U_ICU_ENTRY_POINT_RENAME(uscript_nextRun) +#define uscript_openRun U_ICU_ENTRY_POINT_RENAME(uscript_openRun) +#define uscript_resetRun U_ICU_ENTRY_POINT_RENAME(uscript_resetRun) +#define uscript_setRunText U_ICU_ENTRY_POINT_RENAME(uscript_setRunText) +#define usearch_close U_ICU_ENTRY_POINT_RENAME(usearch_close) +#define usearch_first U_ICU_ENTRY_POINT_RENAME(usearch_first) +#define usearch_following U_ICU_ENTRY_POINT_RENAME(usearch_following) +#define usearch_getAttribute U_ICU_ENTRY_POINT_RENAME(usearch_getAttribute) +#define usearch_getBreakIterator U_ICU_ENTRY_POINT_RENAME(usearch_getBreakIterator) +#define usearch_getCollator U_ICU_ENTRY_POINT_RENAME(usearch_getCollator) +#define usearch_getMatchedLength U_ICU_ENTRY_POINT_RENAME(usearch_getMatchedLength) +#define usearch_getMatchedStart U_ICU_ENTRY_POINT_RENAME(usearch_getMatchedStart) +#define usearch_getMatchedText U_ICU_ENTRY_POINT_RENAME(usearch_getMatchedText) +#define usearch_getOffset U_ICU_ENTRY_POINT_RENAME(usearch_getOffset) +#define usearch_getPattern U_ICU_ENTRY_POINT_RENAME(usearch_getPattern) +#define usearch_getText U_ICU_ENTRY_POINT_RENAME(usearch_getText) +#define usearch_handleNextCanonical U_ICU_ENTRY_POINT_RENAME(usearch_handleNextCanonical) +#define usearch_handleNextExact U_ICU_ENTRY_POINT_RENAME(usearch_handleNextExact) +#define usearch_handlePreviousCanonical U_ICU_ENTRY_POINT_RENAME(usearch_handlePreviousCanonical) +#define usearch_handlePreviousExact U_ICU_ENTRY_POINT_RENAME(usearch_handlePreviousExact) +#define usearch_last U_ICU_ENTRY_POINT_RENAME(usearch_last) +#define usearch_next U_ICU_ENTRY_POINT_RENAME(usearch_next) +#define usearch_open U_ICU_ENTRY_POINT_RENAME(usearch_open) +#define usearch_openFromCollator U_ICU_ENTRY_POINT_RENAME(usearch_openFromCollator) +#define usearch_preceding U_ICU_ENTRY_POINT_RENAME(usearch_preceding) +#define usearch_previous U_ICU_ENTRY_POINT_RENAME(usearch_previous) +#define usearch_reset U_ICU_ENTRY_POINT_RENAME(usearch_reset) +#define usearch_search U_ICU_ENTRY_POINT_RENAME(usearch_search) +#define usearch_searchBackwards U_ICU_ENTRY_POINT_RENAME(usearch_searchBackwards) +#define usearch_setAttribute U_ICU_ENTRY_POINT_RENAME(usearch_setAttribute) +#define usearch_setBreakIterator U_ICU_ENTRY_POINT_RENAME(usearch_setBreakIterator) +#define usearch_setCollator U_ICU_ENTRY_POINT_RENAME(usearch_setCollator) +#define usearch_setOffset U_ICU_ENTRY_POINT_RENAME(usearch_setOffset) +#define usearch_setPattern U_ICU_ENTRY_POINT_RENAME(usearch_setPattern) +#define usearch_setText U_ICU_ENTRY_POINT_RENAME(usearch_setText) +#define uset_add U_ICU_ENTRY_POINT_RENAME(uset_add) +#define uset_addAll U_ICU_ENTRY_POINT_RENAME(uset_addAll) +#define uset_addAllCodePoints U_ICU_ENTRY_POINT_RENAME(uset_addAllCodePoints) +#define uset_addRange U_ICU_ENTRY_POINT_RENAME(uset_addRange) +#define uset_addString U_ICU_ENTRY_POINT_RENAME(uset_addString) +#define uset_applyIntPropertyValue U_ICU_ENTRY_POINT_RENAME(uset_applyIntPropertyValue) +#define uset_applyPattern U_ICU_ENTRY_POINT_RENAME(uset_applyPattern) +#define uset_applyPropertyAlias U_ICU_ENTRY_POINT_RENAME(uset_applyPropertyAlias) +#define uset_charAt U_ICU_ENTRY_POINT_RENAME(uset_charAt) +#define uset_clear U_ICU_ENTRY_POINT_RENAME(uset_clear) +#define uset_clone U_ICU_ENTRY_POINT_RENAME(uset_clone) +#define uset_cloneAsThawed U_ICU_ENTRY_POINT_RENAME(uset_cloneAsThawed) +#define uset_close U_ICU_ENTRY_POINT_RENAME(uset_close) +#define uset_closeOver U_ICU_ENTRY_POINT_RENAME(uset_closeOver) +#define uset_compact U_ICU_ENTRY_POINT_RENAME(uset_compact) +#define uset_complement U_ICU_ENTRY_POINT_RENAME(uset_complement) +#define uset_complementAll U_ICU_ENTRY_POINT_RENAME(uset_complementAll) +#define uset_contains U_ICU_ENTRY_POINT_RENAME(uset_contains) +#define uset_containsAll U_ICU_ENTRY_POINT_RENAME(uset_containsAll) +#define uset_containsAllCodePoints U_ICU_ENTRY_POINT_RENAME(uset_containsAllCodePoints) +#define uset_containsNone U_ICU_ENTRY_POINT_RENAME(uset_containsNone) +#define uset_containsRange U_ICU_ENTRY_POINT_RENAME(uset_containsRange) +#define uset_containsSome U_ICU_ENTRY_POINT_RENAME(uset_containsSome) +#define uset_containsString U_ICU_ENTRY_POINT_RENAME(uset_containsString) +#define uset_equals U_ICU_ENTRY_POINT_RENAME(uset_equals) +#define uset_freeze U_ICU_ENTRY_POINT_RENAME(uset_freeze) +#define uset_getItem U_ICU_ENTRY_POINT_RENAME(uset_getItem) +#define uset_getItemCount U_ICU_ENTRY_POINT_RENAME(uset_getItemCount) +#define uset_getSerializedRange U_ICU_ENTRY_POINT_RENAME(uset_getSerializedRange) +#define uset_getSerializedRangeCount U_ICU_ENTRY_POINT_RENAME(uset_getSerializedRangeCount) +#define uset_getSerializedSet U_ICU_ENTRY_POINT_RENAME(uset_getSerializedSet) +#define uset_indexOf U_ICU_ENTRY_POINT_RENAME(uset_indexOf) +#define uset_isEmpty U_ICU_ENTRY_POINT_RENAME(uset_isEmpty) +#define uset_isFrozen U_ICU_ENTRY_POINT_RENAME(uset_isFrozen) +#define uset_open U_ICU_ENTRY_POINT_RENAME(uset_open) +#define uset_openEmpty U_ICU_ENTRY_POINT_RENAME(uset_openEmpty) +#define uset_openPattern U_ICU_ENTRY_POINT_RENAME(uset_openPattern) +#define uset_openPatternOptions U_ICU_ENTRY_POINT_RENAME(uset_openPatternOptions) +#define uset_remove U_ICU_ENTRY_POINT_RENAME(uset_remove) +#define uset_removeAll U_ICU_ENTRY_POINT_RENAME(uset_removeAll) +#define uset_removeAllStrings U_ICU_ENTRY_POINT_RENAME(uset_removeAllStrings) +#define uset_removeRange U_ICU_ENTRY_POINT_RENAME(uset_removeRange) +#define uset_removeString U_ICU_ENTRY_POINT_RENAME(uset_removeString) +#define uset_resemblesPattern U_ICU_ENTRY_POINT_RENAME(uset_resemblesPattern) +#define uset_retain U_ICU_ENTRY_POINT_RENAME(uset_retain) +#define uset_retainAll U_ICU_ENTRY_POINT_RENAME(uset_retainAll) +#define uset_serialize U_ICU_ENTRY_POINT_RENAME(uset_serialize) +#define uset_serializedContains U_ICU_ENTRY_POINT_RENAME(uset_serializedContains) +#define uset_set U_ICU_ENTRY_POINT_RENAME(uset_set) +#define uset_setSerializedToOne U_ICU_ENTRY_POINT_RENAME(uset_setSerializedToOne) +#define uset_size U_ICU_ENTRY_POINT_RENAME(uset_size) +#define uset_span U_ICU_ENTRY_POINT_RENAME(uset_span) +#define uset_spanBack U_ICU_ENTRY_POINT_RENAME(uset_spanBack) +#define uset_spanBackUTF8 U_ICU_ENTRY_POINT_RENAME(uset_spanBackUTF8) +#define uset_spanUTF8 U_ICU_ENTRY_POINT_RENAME(uset_spanUTF8) +#define uset_toPattern U_ICU_ENTRY_POINT_RENAME(uset_toPattern) +#define uspoof_areConfusable U_ICU_ENTRY_POINT_RENAME(uspoof_areConfusable) +#define uspoof_areConfusableUTF8 U_ICU_ENTRY_POINT_RENAME(uspoof_areConfusableUTF8) +#define uspoof_areConfusableUnicodeString U_ICU_ENTRY_POINT_RENAME(uspoof_areConfusableUnicodeString) +#define uspoof_check U_ICU_ENTRY_POINT_RENAME(uspoof_check) +#define uspoof_check2 U_ICU_ENTRY_POINT_RENAME(uspoof_check2) +#define uspoof_check2UTF8 U_ICU_ENTRY_POINT_RENAME(uspoof_check2UTF8) +#define uspoof_check2UnicodeString U_ICU_ENTRY_POINT_RENAME(uspoof_check2UnicodeString) +#define uspoof_checkUTF8 U_ICU_ENTRY_POINT_RENAME(uspoof_checkUTF8) +#define uspoof_checkUnicodeString U_ICU_ENTRY_POINT_RENAME(uspoof_checkUnicodeString) +#define uspoof_clone U_ICU_ENTRY_POINT_RENAME(uspoof_clone) +#define uspoof_close U_ICU_ENTRY_POINT_RENAME(uspoof_close) +#define uspoof_closeCheckResult U_ICU_ENTRY_POINT_RENAME(uspoof_closeCheckResult) +#define uspoof_getAllowedChars U_ICU_ENTRY_POINT_RENAME(uspoof_getAllowedChars) +#define uspoof_getAllowedLocales U_ICU_ENTRY_POINT_RENAME(uspoof_getAllowedLocales) +#define uspoof_getAllowedUnicodeSet U_ICU_ENTRY_POINT_RENAME(uspoof_getAllowedUnicodeSet) +#define uspoof_getCheckResultChecks U_ICU_ENTRY_POINT_RENAME(uspoof_getCheckResultChecks) +#define uspoof_getCheckResultNumerics U_ICU_ENTRY_POINT_RENAME(uspoof_getCheckResultNumerics) +#define uspoof_getCheckResultRestrictionLevel U_ICU_ENTRY_POINT_RENAME(uspoof_getCheckResultRestrictionLevel) +#define uspoof_getChecks U_ICU_ENTRY_POINT_RENAME(uspoof_getChecks) +#define uspoof_getInclusionSet U_ICU_ENTRY_POINT_RENAME(uspoof_getInclusionSet) +#define uspoof_getInclusionUnicodeSet U_ICU_ENTRY_POINT_RENAME(uspoof_getInclusionUnicodeSet) +#define uspoof_getRecommendedSet U_ICU_ENTRY_POINT_RENAME(uspoof_getRecommendedSet) +#define uspoof_getRecommendedUnicodeSet U_ICU_ENTRY_POINT_RENAME(uspoof_getRecommendedUnicodeSet) +#define uspoof_getRestrictionLevel U_ICU_ENTRY_POINT_RENAME(uspoof_getRestrictionLevel) +#define uspoof_getSkeleton U_ICU_ENTRY_POINT_RENAME(uspoof_getSkeleton) +#define uspoof_getSkeletonUTF8 U_ICU_ENTRY_POINT_RENAME(uspoof_getSkeletonUTF8) +#define uspoof_getSkeletonUnicodeString U_ICU_ENTRY_POINT_RENAME(uspoof_getSkeletonUnicodeString) +#define uspoof_internalInitStatics U_ICU_ENTRY_POINT_RENAME(uspoof_internalInitStatics) +#define uspoof_open U_ICU_ENTRY_POINT_RENAME(uspoof_open) +#define uspoof_openCheckResult U_ICU_ENTRY_POINT_RENAME(uspoof_openCheckResult) +#define uspoof_openFromSerialized U_ICU_ENTRY_POINT_RENAME(uspoof_openFromSerialized) +#define uspoof_openFromSource U_ICU_ENTRY_POINT_RENAME(uspoof_openFromSource) +#define uspoof_serialize U_ICU_ENTRY_POINT_RENAME(uspoof_serialize) +#define uspoof_setAllowedChars U_ICU_ENTRY_POINT_RENAME(uspoof_setAllowedChars) +#define uspoof_setAllowedLocales U_ICU_ENTRY_POINT_RENAME(uspoof_setAllowedLocales) +#define uspoof_setAllowedUnicodeSet U_ICU_ENTRY_POINT_RENAME(uspoof_setAllowedUnicodeSet) +#define uspoof_setChecks U_ICU_ENTRY_POINT_RENAME(uspoof_setChecks) +#define uspoof_setRestrictionLevel U_ICU_ENTRY_POINT_RENAME(uspoof_setRestrictionLevel) +#define uspoof_swap U_ICU_ENTRY_POINT_RENAME(uspoof_swap) +#define usprep_close U_ICU_ENTRY_POINT_RENAME(usprep_close) +#define usprep_open U_ICU_ENTRY_POINT_RENAME(usprep_open) +#define usprep_openByType U_ICU_ENTRY_POINT_RENAME(usprep_openByType) +#define usprep_prepare U_ICU_ENTRY_POINT_RENAME(usprep_prepare) +#define usprep_swap U_ICU_ENTRY_POINT_RENAME(usprep_swap) +#define ustr_hashCharsN U_ICU_ENTRY_POINT_RENAME(ustr_hashCharsN) +#define ustr_hashICharsN U_ICU_ENTRY_POINT_RENAME(ustr_hashICharsN) +#define ustr_hashUCharsN U_ICU_ENTRY_POINT_RENAME(ustr_hashUCharsN) +#define ustrcase_getCaseLocale U_ICU_ENTRY_POINT_RENAME(ustrcase_getCaseLocale) +#define ustrcase_getTitleBreakIterator U_ICU_ENTRY_POINT_RENAME(ustrcase_getTitleBreakIterator) +#define ustrcase_internalFold U_ICU_ENTRY_POINT_RENAME(ustrcase_internalFold) +#define ustrcase_internalToLower U_ICU_ENTRY_POINT_RENAME(ustrcase_internalToLower) +#define ustrcase_internalToTitle U_ICU_ENTRY_POINT_RENAME(ustrcase_internalToTitle) +#define ustrcase_internalToUpper U_ICU_ENTRY_POINT_RENAME(ustrcase_internalToUpper) +#define ustrcase_map U_ICU_ENTRY_POINT_RENAME(ustrcase_map) +#define ustrcase_mapWithOverlap U_ICU_ENTRY_POINT_RENAME(ustrcase_mapWithOverlap) +#define utext_char32At U_ICU_ENTRY_POINT_RENAME(utext_char32At) +#define utext_clone U_ICU_ENTRY_POINT_RENAME(utext_clone) +#define utext_close U_ICU_ENTRY_POINT_RENAME(utext_close) +#define utext_copy U_ICU_ENTRY_POINT_RENAME(utext_copy) +#define utext_current32 U_ICU_ENTRY_POINT_RENAME(utext_current32) +#define utext_equals U_ICU_ENTRY_POINT_RENAME(utext_equals) +#define utext_extract U_ICU_ENTRY_POINT_RENAME(utext_extract) +#define utext_freeze U_ICU_ENTRY_POINT_RENAME(utext_freeze) +#define utext_getNativeIndex U_ICU_ENTRY_POINT_RENAME(utext_getNativeIndex) +#define utext_getPreviousNativeIndex U_ICU_ENTRY_POINT_RENAME(utext_getPreviousNativeIndex) +#define utext_hasMetaData U_ICU_ENTRY_POINT_RENAME(utext_hasMetaData) +#define utext_isLengthExpensive U_ICU_ENTRY_POINT_RENAME(utext_isLengthExpensive) +#define utext_isWritable U_ICU_ENTRY_POINT_RENAME(utext_isWritable) +#define utext_moveIndex32 U_ICU_ENTRY_POINT_RENAME(utext_moveIndex32) +#define utext_nativeLength U_ICU_ENTRY_POINT_RENAME(utext_nativeLength) +#define utext_next32 U_ICU_ENTRY_POINT_RENAME(utext_next32) +#define utext_next32From U_ICU_ENTRY_POINT_RENAME(utext_next32From) +#define utext_openCharacterIterator U_ICU_ENTRY_POINT_RENAME(utext_openCharacterIterator) +#define utext_openConstUnicodeString U_ICU_ENTRY_POINT_RENAME(utext_openConstUnicodeString) +#define utext_openReplaceable U_ICU_ENTRY_POINT_RENAME(utext_openReplaceable) +#define utext_openUChars U_ICU_ENTRY_POINT_RENAME(utext_openUChars) +#define utext_openUTF8 U_ICU_ENTRY_POINT_RENAME(utext_openUTF8) +#define utext_openUnicodeString U_ICU_ENTRY_POINT_RENAME(utext_openUnicodeString) +#define utext_previous32 U_ICU_ENTRY_POINT_RENAME(utext_previous32) +#define utext_previous32From U_ICU_ENTRY_POINT_RENAME(utext_previous32From) +#define utext_replace U_ICU_ENTRY_POINT_RENAME(utext_replace) +#define utext_setNativeIndex U_ICU_ENTRY_POINT_RENAME(utext_setNativeIndex) +#define utext_setup U_ICU_ENTRY_POINT_RENAME(utext_setup) +#define utf8_appendCharSafeBody U_ICU_ENTRY_POINT_RENAME(utf8_appendCharSafeBody) +#define utf8_back1SafeBody U_ICU_ENTRY_POINT_RENAME(utf8_back1SafeBody) +#define utf8_countTrailBytes U_ICU_ENTRY_POINT_RENAME(utf8_countTrailBytes) +#define utf8_nextCharSafeBody U_ICU_ENTRY_POINT_RENAME(utf8_nextCharSafeBody) +#define utf8_prevCharSafeBody U_ICU_ENTRY_POINT_RENAME(utf8_prevCharSafeBody) +#define utmscale_fromInt64 U_ICU_ENTRY_POINT_RENAME(utmscale_fromInt64) +#define utmscale_getTimeScaleValue U_ICU_ENTRY_POINT_RENAME(utmscale_getTimeScaleValue) +#define utmscale_toInt64 U_ICU_ENTRY_POINT_RENAME(utmscale_toInt64) +#define utrace_cleanup U_ICU_ENTRY_POINT_RENAME(utrace_cleanup) +#define utrace_data U_ICU_ENTRY_POINT_RENAME(utrace_data) +#define utrace_entry U_ICU_ENTRY_POINT_RENAME(utrace_entry) +#define utrace_exit U_ICU_ENTRY_POINT_RENAME(utrace_exit) +#define utrace_format U_ICU_ENTRY_POINT_RENAME(utrace_format) +#define utrace_functionName U_ICU_ENTRY_POINT_RENAME(utrace_functionName) +#define utrace_getFunctions U_ICU_ENTRY_POINT_RENAME(utrace_getFunctions) +#define utrace_getLevel U_ICU_ENTRY_POINT_RENAME(utrace_getLevel) +#define utrace_setFunctions U_ICU_ENTRY_POINT_RENAME(utrace_setFunctions) +#define utrace_setLevel U_ICU_ENTRY_POINT_RENAME(utrace_setLevel) +#define utrace_vformat U_ICU_ENTRY_POINT_RENAME(utrace_vformat) +#define utrans_clone U_ICU_ENTRY_POINT_RENAME(utrans_clone) +#define utrans_close U_ICU_ENTRY_POINT_RENAME(utrans_close) +#define utrans_countAvailableIDs U_ICU_ENTRY_POINT_RENAME(utrans_countAvailableIDs) +#define utrans_getAvailableID U_ICU_ENTRY_POINT_RENAME(utrans_getAvailableID) +#define utrans_getID U_ICU_ENTRY_POINT_RENAME(utrans_getID) +#define utrans_getSourceSet U_ICU_ENTRY_POINT_RENAME(utrans_getSourceSet) +#define utrans_getUnicodeID U_ICU_ENTRY_POINT_RENAME(utrans_getUnicodeID) +#define utrans_open U_ICU_ENTRY_POINT_RENAME(utrans_open) +#define utrans_openIDs U_ICU_ENTRY_POINT_RENAME(utrans_openIDs) +#define utrans_openInverse U_ICU_ENTRY_POINT_RENAME(utrans_openInverse) +#define utrans_openU U_ICU_ENTRY_POINT_RENAME(utrans_openU) +#define utrans_register U_ICU_ENTRY_POINT_RENAME(utrans_register) +#define utrans_rep_caseContextIterator U_ICU_ENTRY_POINT_RENAME(utrans_rep_caseContextIterator) +#define utrans_setFilter U_ICU_ENTRY_POINT_RENAME(utrans_setFilter) +#define utrans_stripRules U_ICU_ENTRY_POINT_RENAME(utrans_stripRules) +#define utrans_toRules U_ICU_ENTRY_POINT_RENAME(utrans_toRules) +#define utrans_trans U_ICU_ENTRY_POINT_RENAME(utrans_trans) +#define utrans_transIncremental U_ICU_ENTRY_POINT_RENAME(utrans_transIncremental) +#define utrans_transIncrementalUChars U_ICU_ENTRY_POINT_RENAME(utrans_transIncrementalUChars) +#define utrans_transUChars U_ICU_ENTRY_POINT_RENAME(utrans_transUChars) +#define utrans_transliterator_cleanup U_ICU_ENTRY_POINT_RENAME(utrans_transliterator_cleanup) +#define utrans_unregister U_ICU_ENTRY_POINT_RENAME(utrans_unregister) +#define utrans_unregisterID U_ICU_ENTRY_POINT_RENAME(utrans_unregisterID) +#define utrie2_clone U_ICU_ENTRY_POINT_RENAME(utrie2_clone) +#define utrie2_cloneAsThawed U_ICU_ENTRY_POINT_RENAME(utrie2_cloneAsThawed) +#define utrie2_close U_ICU_ENTRY_POINT_RENAME(utrie2_close) +#define utrie2_enum U_ICU_ENTRY_POINT_RENAME(utrie2_enum) +#define utrie2_enumForLeadSurrogate U_ICU_ENTRY_POINT_RENAME(utrie2_enumForLeadSurrogate) +#define utrie2_freeze U_ICU_ENTRY_POINT_RENAME(utrie2_freeze) +#define utrie2_fromUTrie U_ICU_ENTRY_POINT_RENAME(utrie2_fromUTrie) +#define utrie2_get32 U_ICU_ENTRY_POINT_RENAME(utrie2_get32) +#define utrie2_get32FromLeadSurrogateCodeUnit U_ICU_ENTRY_POINT_RENAME(utrie2_get32FromLeadSurrogateCodeUnit) +#define utrie2_getVersion U_ICU_ENTRY_POINT_RENAME(utrie2_getVersion) +#define utrie2_internalU8NextIndex U_ICU_ENTRY_POINT_RENAME(utrie2_internalU8NextIndex) +#define utrie2_internalU8PrevIndex U_ICU_ENTRY_POINT_RENAME(utrie2_internalU8PrevIndex) +#define utrie2_isFrozen U_ICU_ENTRY_POINT_RENAME(utrie2_isFrozen) +#define utrie2_open U_ICU_ENTRY_POINT_RENAME(utrie2_open) +#define utrie2_openDummy U_ICU_ENTRY_POINT_RENAME(utrie2_openDummy) +#define utrie2_openFromSerialized U_ICU_ENTRY_POINT_RENAME(utrie2_openFromSerialized) +#define utrie2_serialize U_ICU_ENTRY_POINT_RENAME(utrie2_serialize) +#define utrie2_set32 U_ICU_ENTRY_POINT_RENAME(utrie2_set32) +#define utrie2_set32ForLeadSurrogateCodeUnit U_ICU_ENTRY_POINT_RENAME(utrie2_set32ForLeadSurrogateCodeUnit) +#define utrie2_setRange32 U_ICU_ENTRY_POINT_RENAME(utrie2_setRange32) +#define utrie2_swap U_ICU_ENTRY_POINT_RENAME(utrie2_swap) +#define utrie2_swapAnyVersion U_ICU_ENTRY_POINT_RENAME(utrie2_swapAnyVersion) +#define utrie_clone U_ICU_ENTRY_POINT_RENAME(utrie_clone) +#define utrie_close U_ICU_ENTRY_POINT_RENAME(utrie_close) +#define utrie_defaultGetFoldingOffset U_ICU_ENTRY_POINT_RENAME(utrie_defaultGetFoldingOffset) +#define utrie_enum U_ICU_ENTRY_POINT_RENAME(utrie_enum) +#define utrie_get32 U_ICU_ENTRY_POINT_RENAME(utrie_get32) +#define utrie_getData U_ICU_ENTRY_POINT_RENAME(utrie_getData) +#define utrie_open U_ICU_ENTRY_POINT_RENAME(utrie_open) +#define utrie_serialize U_ICU_ENTRY_POINT_RENAME(utrie_serialize) +#define utrie_set32 U_ICU_ENTRY_POINT_RENAME(utrie_set32) +#define utrie_setRange32 U_ICU_ENTRY_POINT_RENAME(utrie_setRange32) +#define utrie_swap U_ICU_ENTRY_POINT_RENAME(utrie_swap) +#define utrie_unserialize U_ICU_ENTRY_POINT_RENAME(utrie_unserialize) +#define utrie_unserializeDummy U_ICU_ENTRY_POINT_RENAME(utrie_unserializeDummy) +#define vzone_clone U_ICU_ENTRY_POINT_RENAME(vzone_clone) +#define vzone_close U_ICU_ENTRY_POINT_RENAME(vzone_close) +#define vzone_countTransitionRules U_ICU_ENTRY_POINT_RENAME(vzone_countTransitionRules) +#define vzone_equals U_ICU_ENTRY_POINT_RENAME(vzone_equals) +#define vzone_getDynamicClassID U_ICU_ENTRY_POINT_RENAME(vzone_getDynamicClassID) +#define vzone_getLastModified U_ICU_ENTRY_POINT_RENAME(vzone_getLastModified) +#define vzone_getNextTransition U_ICU_ENTRY_POINT_RENAME(vzone_getNextTransition) +#define vzone_getOffset U_ICU_ENTRY_POINT_RENAME(vzone_getOffset) +#define vzone_getOffset2 U_ICU_ENTRY_POINT_RENAME(vzone_getOffset2) +#define vzone_getOffset3 U_ICU_ENTRY_POINT_RENAME(vzone_getOffset3) +#define vzone_getPreviousTransition U_ICU_ENTRY_POINT_RENAME(vzone_getPreviousTransition) +#define vzone_getRawOffset U_ICU_ENTRY_POINT_RENAME(vzone_getRawOffset) +#define vzone_getStaticClassID U_ICU_ENTRY_POINT_RENAME(vzone_getStaticClassID) +#define vzone_getTZURL U_ICU_ENTRY_POINT_RENAME(vzone_getTZURL) +#define vzone_hasSameRules U_ICU_ENTRY_POINT_RENAME(vzone_hasSameRules) +#define vzone_inDaylightTime U_ICU_ENTRY_POINT_RENAME(vzone_inDaylightTime) +#define vzone_openData U_ICU_ENTRY_POINT_RENAME(vzone_openData) +#define vzone_openID U_ICU_ENTRY_POINT_RENAME(vzone_openID) +#define vzone_setLastModified U_ICU_ENTRY_POINT_RENAME(vzone_setLastModified) +#define vzone_setRawOffset U_ICU_ENTRY_POINT_RENAME(vzone_setRawOffset) +#define vzone_setTZURL U_ICU_ENTRY_POINT_RENAME(vzone_setTZURL) +#define vzone_useDaylightTime U_ICU_ENTRY_POINT_RENAME(vzone_useDaylightTime) +#define vzone_write U_ICU_ENTRY_POINT_RENAME(vzone_write) +#define vzone_writeFromStart U_ICU_ENTRY_POINT_RENAME(vzone_writeFromStart) +#define vzone_writeSimple U_ICU_ENTRY_POINT_RENAME(vzone_writeSimple) +#define zrule_close U_ICU_ENTRY_POINT_RENAME(zrule_close) +#define zrule_equals U_ICU_ENTRY_POINT_RENAME(zrule_equals) +#define zrule_getDSTSavings U_ICU_ENTRY_POINT_RENAME(zrule_getDSTSavings) +#define zrule_getName U_ICU_ENTRY_POINT_RENAME(zrule_getName) +#define zrule_getRawOffset U_ICU_ENTRY_POINT_RENAME(zrule_getRawOffset) +#define zrule_isEquivalentTo U_ICU_ENTRY_POINT_RENAME(zrule_isEquivalentTo) +#define ztrans_adoptFrom U_ICU_ENTRY_POINT_RENAME(ztrans_adoptFrom) +#define ztrans_adoptTo U_ICU_ENTRY_POINT_RENAME(ztrans_adoptTo) +#define ztrans_clone U_ICU_ENTRY_POINT_RENAME(ztrans_clone) +#define ztrans_close U_ICU_ENTRY_POINT_RENAME(ztrans_close) +#define ztrans_equals U_ICU_ENTRY_POINT_RENAME(ztrans_equals) +#define ztrans_getDynamicClassID U_ICU_ENTRY_POINT_RENAME(ztrans_getDynamicClassID) +#define ztrans_getFrom U_ICU_ENTRY_POINT_RENAME(ztrans_getFrom) +#define ztrans_getStaticClassID U_ICU_ENTRY_POINT_RENAME(ztrans_getStaticClassID) +#define ztrans_getTime U_ICU_ENTRY_POINT_RENAME(ztrans_getTime) +#define ztrans_getTo U_ICU_ENTRY_POINT_RENAME(ztrans_getTo) +#define ztrans_open U_ICU_ENTRY_POINT_RENAME(ztrans_open) +#define ztrans_openEmpty U_ICU_ENTRY_POINT_RENAME(ztrans_openEmpty) +#define ztrans_setFrom U_ICU_ENTRY_POINT_RENAME(ztrans_setFrom) +#define ztrans_setTime U_ICU_ENTRY_POINT_RENAME(ztrans_setTime) +#define ztrans_setTo U_ICU_ENTRY_POINT_RENAME(ztrans_setTo) + +#endif + +#endif diff --git a/vendor/icu/include/unicode/uscript.h b/vendor/icu/include/unicode/uscript.h new file mode 100644 index 0000000000..3859d719a9 --- /dev/null +++ b/vendor/icu/include/unicode/uscript.h @@ -0,0 +1,675 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* + ********************************************************************** + * Copyright (C) 1997-2016, International Business Machines + * Corporation and others. All Rights Reserved. + ********************************************************************** + * + * File USCRIPT.H + * + * Modification History: + * + * Date Name Description + * 07/06/2001 Ram Creation. + ****************************************************************************** + */ + +#ifndef USCRIPT_H +#define USCRIPT_H +#include <unicode/utypes.h> + +/** + * \file + * \brief C API: Unicode Script Information + */ + +/** + * Constants for ISO 15924 script codes. + * + * The current set of script code constants supports at least all scripts + * that are encoded in the version of Unicode which ICU currently supports. + * The names of the constants are usually derived from the + * Unicode script property value aliases. + * See UAX #24 Unicode Script Property (http://www.unicode.org/reports/tr24/) + * and http://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt . + * + * In addition, constants for many ISO 15924 script codes + * are included, for use with language tags, CLDR data, and similar. + * Some of those codes are not used in the Unicode Character Database (UCD). + * For example, there are no characters that have a UCD script property value of + * Hans or Hant. All Han ideographs have the Hani script property value in Unicode. + * + * Private-use codes Qaaa..Qabx are not included, except as used in the UCD or in CLDR. + * + * Starting with ICU 55, script codes are only added when their scripts + * have been or will certainly be encoded in Unicode, + * and have been assigned Unicode script property value aliases, + * to ensure that their script names are stable and match the names of the constants. + * Script codes like Latf and Aran that are not subject to separate encoding + * may be added at any time. + * + * @stable ICU 2.2 + */ +typedef enum UScriptCode { + /* + * Note: UScriptCode constants and their ISO script code comments + * are parsed by preparseucd.py. + * It matches lines like + * USCRIPT_<Unicode Script value name> = <integer>, / * <ISO script code> * / + */ + + /** @stable ICU 2.2 */ + USCRIPT_INVALID_CODE = -1, + /** @stable ICU 2.2 */ + USCRIPT_COMMON = 0, /* Zyyy */ + /** @stable ICU 2.2 */ + USCRIPT_INHERITED = 1, /* Zinh */ /* "Code for inherited script", for non-spacing combining marks; also Qaai */ + /** @stable ICU 2.2 */ + USCRIPT_ARABIC = 2, /* Arab */ + /** @stable ICU 2.2 */ + USCRIPT_ARMENIAN = 3, /* Armn */ + /** @stable ICU 2.2 */ + USCRIPT_BENGALI = 4, /* Beng */ + /** @stable ICU 2.2 */ + USCRIPT_BOPOMOFO = 5, /* Bopo */ + /** @stable ICU 2.2 */ + USCRIPT_CHEROKEE = 6, /* Cher */ + /** @stable ICU 2.2 */ + USCRIPT_COPTIC = 7, /* Copt */ + /** @stable ICU 2.2 */ + USCRIPT_CYRILLIC = 8, /* Cyrl */ + /** @stable ICU 2.2 */ + USCRIPT_DESERET = 9, /* Dsrt */ + /** @stable ICU 2.2 */ + USCRIPT_DEVANAGARI = 10, /* Deva */ + /** @stable ICU 2.2 */ + USCRIPT_ETHIOPIC = 11, /* Ethi */ + /** @stable ICU 2.2 */ + USCRIPT_GEORGIAN = 12, /* Geor */ + /** @stable ICU 2.2 */ + USCRIPT_GOTHIC = 13, /* Goth */ + /** @stable ICU 2.2 */ + USCRIPT_GREEK = 14, /* Grek */ + /** @stable ICU 2.2 */ + USCRIPT_GUJARATI = 15, /* Gujr */ + /** @stable ICU 2.2 */ + USCRIPT_GURMUKHI = 16, /* Guru */ + /** @stable ICU 2.2 */ + USCRIPT_HAN = 17, /* Hani */ + /** @stable ICU 2.2 */ + USCRIPT_HANGUL = 18, /* Hang */ + /** @stable ICU 2.2 */ + USCRIPT_HEBREW = 19, /* Hebr */ + /** @stable ICU 2.2 */ + USCRIPT_HIRAGANA = 20, /* Hira */ + /** @stable ICU 2.2 */ + USCRIPT_KANNADA = 21, /* Knda */ + /** @stable ICU 2.2 */ + USCRIPT_KATAKANA = 22, /* Kana */ + /** @stable ICU 2.2 */ + USCRIPT_KHMER = 23, /* Khmr */ + /** @stable ICU 2.2 */ + USCRIPT_LAO = 24, /* Laoo */ + /** @stable ICU 2.2 */ + USCRIPT_LATIN = 25, /* Latn */ + /** @stable ICU 2.2 */ + USCRIPT_MALAYALAM = 26, /* Mlym */ + /** @stable ICU 2.2 */ + USCRIPT_MONGOLIAN = 27, /* Mong */ + /** @stable ICU 2.2 */ + USCRIPT_MYANMAR = 28, /* Mymr */ + /** @stable ICU 2.2 */ + USCRIPT_OGHAM = 29, /* Ogam */ + /** @stable ICU 2.2 */ + USCRIPT_OLD_ITALIC = 30, /* Ital */ + /** @stable ICU 2.2 */ + USCRIPT_ORIYA = 31, /* Orya */ + /** @stable ICU 2.2 */ + USCRIPT_RUNIC = 32, /* Runr */ + /** @stable ICU 2.2 */ + USCRIPT_SINHALA = 33, /* Sinh */ + /** @stable ICU 2.2 */ + USCRIPT_SYRIAC = 34, /* Syrc */ + /** @stable ICU 2.2 */ + USCRIPT_TAMIL = 35, /* Taml */ + /** @stable ICU 2.2 */ + USCRIPT_TELUGU = 36, /* Telu */ + /** @stable ICU 2.2 */ + USCRIPT_THAANA = 37, /* Thaa */ + /** @stable ICU 2.2 */ + USCRIPT_THAI = 38, /* Thai */ + /** @stable ICU 2.2 */ + USCRIPT_TIBETAN = 39, /* Tibt */ + /** Canadian_Aboriginal script. @stable ICU 2.6 */ + USCRIPT_CANADIAN_ABORIGINAL = 40, /* Cans */ + /** Canadian_Aboriginal script (alias). @stable ICU 2.2 */ + USCRIPT_UCAS = USCRIPT_CANADIAN_ABORIGINAL, + /** @stable ICU 2.2 */ + USCRIPT_YI = 41, /* Yiii */ + /* New scripts in Unicode 3.2 */ + /** @stable ICU 2.2 */ + USCRIPT_TAGALOG = 42, /* Tglg */ + /** @stable ICU 2.2 */ + USCRIPT_HANUNOO = 43, /* Hano */ + /** @stable ICU 2.2 */ + USCRIPT_BUHID = 44, /* Buhd */ + /** @stable ICU 2.2 */ + USCRIPT_TAGBANWA = 45, /* Tagb */ + + /* New scripts in Unicode 4 */ + /** @stable ICU 2.6 */ + USCRIPT_BRAILLE = 46, /* Brai */ + /** @stable ICU 2.6 */ + USCRIPT_CYPRIOT = 47, /* Cprt */ + /** @stable ICU 2.6 */ + USCRIPT_LIMBU = 48, /* Limb */ + /** @stable ICU 2.6 */ + USCRIPT_LINEAR_B = 49, /* Linb */ + /** @stable ICU 2.6 */ + USCRIPT_OSMANYA = 50, /* Osma */ + /** @stable ICU 2.6 */ + USCRIPT_SHAVIAN = 51, /* Shaw */ + /** @stable ICU 2.6 */ + USCRIPT_TAI_LE = 52, /* Tale */ + /** @stable ICU 2.6 */ + USCRIPT_UGARITIC = 53, /* Ugar */ + + /** New script code in Unicode 4.0.1 @stable ICU 3.0 */ + USCRIPT_KATAKANA_OR_HIRAGANA = 54,/*Hrkt */ + + /* New scripts in Unicode 4.1 */ + /** @stable ICU 3.4 */ + USCRIPT_BUGINESE = 55, /* Bugi */ + /** @stable ICU 3.4 */ + USCRIPT_GLAGOLITIC = 56, /* Glag */ + /** @stable ICU 3.4 */ + USCRIPT_KHAROSHTHI = 57, /* Khar */ + /** @stable ICU 3.4 */ + USCRIPT_SYLOTI_NAGRI = 58, /* Sylo */ + /** @stable ICU 3.4 */ + USCRIPT_NEW_TAI_LUE = 59, /* Talu */ + /** @stable ICU 3.4 */ + USCRIPT_TIFINAGH = 60, /* Tfng */ + /** @stable ICU 3.4 */ + USCRIPT_OLD_PERSIAN = 61, /* Xpeo */ + + /* New script codes from Unicode and ISO 15924 */ + /** @stable ICU 3.6 */ + USCRIPT_BALINESE = 62, /* Bali */ + /** @stable ICU 3.6 */ + USCRIPT_BATAK = 63, /* Batk */ + /** @stable ICU 3.6 */ + USCRIPT_BLISSYMBOLS = 64, /* Blis */ + /** @stable ICU 3.6 */ + USCRIPT_BRAHMI = 65, /* Brah */ + /** @stable ICU 3.6 */ + USCRIPT_CHAM = 66, /* Cham */ + /** @stable ICU 3.6 */ + USCRIPT_CIRTH = 67, /* Cirt */ + /** @stable ICU 3.6 */ + USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC = 68, /* Cyrs */ + /** @stable ICU 3.6 */ + USCRIPT_DEMOTIC_EGYPTIAN = 69, /* Egyd */ + /** @stable ICU 3.6 */ + USCRIPT_HIERATIC_EGYPTIAN = 70, /* Egyh */ + /** @stable ICU 3.6 */ + USCRIPT_EGYPTIAN_HIEROGLYPHS = 71, /* Egyp */ + /** @stable ICU 3.6 */ + USCRIPT_KHUTSURI = 72, /* Geok */ + /** @stable ICU 3.6 */ + USCRIPT_SIMPLIFIED_HAN = 73, /* Hans */ + /** @stable ICU 3.6 */ + USCRIPT_TRADITIONAL_HAN = 74, /* Hant */ + /** @stable ICU 3.6 */ + USCRIPT_PAHAWH_HMONG = 75, /* Hmng */ + /** @stable ICU 3.6 */ + USCRIPT_OLD_HUNGARIAN = 76, /* Hung */ + /** @stable ICU 3.6 */ + USCRIPT_HARAPPAN_INDUS = 77, /* Inds */ + /** @stable ICU 3.6 */ + USCRIPT_JAVANESE = 78, /* Java */ + /** @stable ICU 3.6 */ + USCRIPT_KAYAH_LI = 79, /* Kali */ + /** @stable ICU 3.6 */ + USCRIPT_LATIN_FRAKTUR = 80, /* Latf */ + /** @stable ICU 3.6 */ + USCRIPT_LATIN_GAELIC = 81, /* Latg */ + /** @stable ICU 3.6 */ + USCRIPT_LEPCHA = 82, /* Lepc */ + /** @stable ICU 3.6 */ + USCRIPT_LINEAR_A = 83, /* Lina */ + /** @stable ICU 4.6 */ + USCRIPT_MANDAIC = 84, /* Mand */ + /** @stable ICU 3.6 */ + USCRIPT_MANDAEAN = USCRIPT_MANDAIC, + /** @stable ICU 3.6 */ + USCRIPT_MAYAN_HIEROGLYPHS = 85, /* Maya */ + /** @stable ICU 4.6 */ + USCRIPT_MEROITIC_HIEROGLYPHS = 86, /* Mero */ + /** @stable ICU 3.6 */ + USCRIPT_MEROITIC = USCRIPT_MEROITIC_HIEROGLYPHS, + /** @stable ICU 3.6 */ + USCRIPT_NKO = 87, /* Nkoo */ + /** @stable ICU 3.6 */ + USCRIPT_ORKHON = 88, /* Orkh */ + /** @stable ICU 3.6 */ + USCRIPT_OLD_PERMIC = 89, /* Perm */ + /** @stable ICU 3.6 */ + USCRIPT_PHAGS_PA = 90, /* Phag */ + /** @stable ICU 3.6 */ + USCRIPT_PHOENICIAN = 91, /* Phnx */ + /** @stable ICU 52 */ + USCRIPT_MIAO = 92, /* Plrd */ + /** @stable ICU 3.6 */ + USCRIPT_PHONETIC_POLLARD = USCRIPT_MIAO, + /** @stable ICU 3.6 */ + USCRIPT_RONGORONGO = 93, /* Roro */ + /** @stable ICU 3.6 */ + USCRIPT_SARATI = 94, /* Sara */ + /** @stable ICU 3.6 */ + USCRIPT_ESTRANGELO_SYRIAC = 95, /* Syre */ + /** @stable ICU 3.6 */ + USCRIPT_WESTERN_SYRIAC = 96, /* Syrj */ + /** @stable ICU 3.6 */ + USCRIPT_EASTERN_SYRIAC = 97, /* Syrn */ + /** @stable ICU 3.6 */ + USCRIPT_TENGWAR = 98, /* Teng */ + /** @stable ICU 3.6 */ + USCRIPT_VAI = 99, /* Vaii */ + /** @stable ICU 3.6 */ + USCRIPT_VISIBLE_SPEECH = 100,/* Visp */ + /** @stable ICU 3.6 */ + USCRIPT_CUNEIFORM = 101,/* Xsux */ + /** @stable ICU 3.6 */ + USCRIPT_UNWRITTEN_LANGUAGES = 102,/* Zxxx */ + /** @stable ICU 3.6 */ + USCRIPT_UNKNOWN = 103,/* Zzzz */ /* Unknown="Code for uncoded script", for unassigned code points */ + + /** @stable ICU 3.8 */ + USCRIPT_CARIAN = 104,/* Cari */ + /** @stable ICU 3.8 */ + USCRIPT_JAPANESE = 105,/* Jpan */ + /** @stable ICU 3.8 */ + USCRIPT_LANNA = 106,/* Lana */ + /** @stable ICU 3.8 */ + USCRIPT_LYCIAN = 107,/* Lyci */ + /** @stable ICU 3.8 */ + USCRIPT_LYDIAN = 108,/* Lydi */ + /** @stable ICU 3.8 */ + USCRIPT_OL_CHIKI = 109,/* Olck */ + /** @stable ICU 3.8 */ + USCRIPT_REJANG = 110,/* Rjng */ + /** @stable ICU 3.8 */ + USCRIPT_SAURASHTRA = 111,/* Saur */ + /** Sutton SignWriting @stable ICU 3.8 */ + USCRIPT_SIGN_WRITING = 112,/* Sgnw */ + /** @stable ICU 3.8 */ + USCRIPT_SUNDANESE = 113,/* Sund */ + /** @stable ICU 3.8 */ + USCRIPT_MOON = 114,/* Moon */ + /** @stable ICU 3.8 */ + USCRIPT_MEITEI_MAYEK = 115,/* Mtei */ + + /** @stable ICU 4.0 */ + USCRIPT_IMPERIAL_ARAMAIC = 116,/* Armi */ + /** @stable ICU 4.0 */ + USCRIPT_AVESTAN = 117,/* Avst */ + /** @stable ICU 4.0 */ + USCRIPT_CHAKMA = 118,/* Cakm */ + /** @stable ICU 4.0 */ + USCRIPT_KOREAN = 119,/* Kore */ + /** @stable ICU 4.0 */ + USCRIPT_KAITHI = 120,/* Kthi */ + /** @stable ICU 4.0 */ + USCRIPT_MANICHAEAN = 121,/* Mani */ + /** @stable ICU 4.0 */ + USCRIPT_INSCRIPTIONAL_PAHLAVI = 122,/* Phli */ + /** @stable ICU 4.0 */ + USCRIPT_PSALTER_PAHLAVI = 123,/* Phlp */ + /** @stable ICU 4.0 */ + USCRIPT_BOOK_PAHLAVI = 124,/* Phlv */ + /** @stable ICU 4.0 */ + USCRIPT_INSCRIPTIONAL_PARTHIAN = 125,/* Prti */ + /** @stable ICU 4.0 */ + USCRIPT_SAMARITAN = 126,/* Samr */ + /** @stable ICU 4.0 */ + USCRIPT_TAI_VIET = 127,/* Tavt */ + /** @stable ICU 4.0 */ + USCRIPT_MATHEMATICAL_NOTATION = 128,/* Zmth */ + /** @stable ICU 4.0 */ + USCRIPT_SYMBOLS = 129,/* Zsym */ + + /** @stable ICU 4.4 */ + USCRIPT_BAMUM = 130,/* Bamu */ + /** @stable ICU 4.4 */ + USCRIPT_LISU = 131,/* Lisu */ + /** @stable ICU 4.4 */ + USCRIPT_NAKHI_GEBA = 132,/* Nkgb */ + /** @stable ICU 4.4 */ + USCRIPT_OLD_SOUTH_ARABIAN = 133,/* Sarb */ + + /** @stable ICU 4.6 */ + USCRIPT_BASSA_VAH = 134,/* Bass */ + /** @stable ICU 54 */ + USCRIPT_DUPLOYAN = 135,/* Dupl */ +#ifndef U_HIDE_DEPRECATED_API + /** @deprecated ICU 54 Typo, use USCRIPT_DUPLOYAN */ + USCRIPT_DUPLOYAN_SHORTAND = USCRIPT_DUPLOYAN, +#endif /* U_HIDE_DEPRECATED_API */ + /** @stable ICU 4.6 */ + USCRIPT_ELBASAN = 136,/* Elba */ + /** @stable ICU 4.6 */ + USCRIPT_GRANTHA = 137,/* Gran */ + /** @stable ICU 4.6 */ + USCRIPT_KPELLE = 138,/* Kpel */ + /** @stable ICU 4.6 */ + USCRIPT_LOMA = 139,/* Loma */ + /** Mende Kikakui @stable ICU 4.6 */ + USCRIPT_MENDE = 140,/* Mend */ + /** @stable ICU 4.6 */ + USCRIPT_MEROITIC_CURSIVE = 141,/* Merc */ + /** @stable ICU 4.6 */ + USCRIPT_OLD_NORTH_ARABIAN = 142,/* Narb */ + /** @stable ICU 4.6 */ + USCRIPT_NABATAEAN = 143,/* Nbat */ + /** @stable ICU 4.6 */ + USCRIPT_PALMYRENE = 144,/* Palm */ + /** @stable ICU 54 */ + USCRIPT_KHUDAWADI = 145,/* Sind */ + /** @stable ICU 4.6 */ + USCRIPT_SINDHI = USCRIPT_KHUDAWADI, + /** @stable ICU 4.6 */ + USCRIPT_WARANG_CITI = 146,/* Wara */ + + /** @stable ICU 4.8 */ + USCRIPT_AFAKA = 147,/* Afak */ + /** @stable ICU 4.8 */ + USCRIPT_JURCHEN = 148,/* Jurc */ + /** @stable ICU 4.8 */ + USCRIPT_MRO = 149,/* Mroo */ + /** @stable ICU 4.8 */ + USCRIPT_NUSHU = 150,/* Nshu */ + /** @stable ICU 4.8 */ + USCRIPT_SHARADA = 151,/* Shrd */ + /** @stable ICU 4.8 */ + USCRIPT_SORA_SOMPENG = 152,/* Sora */ + /** @stable ICU 4.8 */ + USCRIPT_TAKRI = 153,/* Takr */ + /** @stable ICU 4.8 */ + USCRIPT_TANGUT = 154,/* Tang */ + /** @stable ICU 4.8 */ + USCRIPT_WOLEAI = 155,/* Wole */ + + /** @stable ICU 49 */ + USCRIPT_ANATOLIAN_HIEROGLYPHS = 156,/* Hluw */ + /** @stable ICU 49 */ + USCRIPT_KHOJKI = 157,/* Khoj */ + /** @stable ICU 49 */ + USCRIPT_TIRHUTA = 158,/* Tirh */ + + /** @stable ICU 52 */ + USCRIPT_CAUCASIAN_ALBANIAN = 159,/* Aghb */ + /** @stable ICU 52 */ + USCRIPT_MAHAJANI = 160,/* Mahj */ + + /** @stable ICU 54 */ + USCRIPT_AHOM = 161,/* Ahom */ + /** @stable ICU 54 */ + USCRIPT_HATRAN = 162,/* Hatr */ + /** @stable ICU 54 */ + USCRIPT_MODI = 163,/* Modi */ + /** @stable ICU 54 */ + USCRIPT_MULTANI = 164,/* Mult */ + /** @stable ICU 54 */ + USCRIPT_PAU_CIN_HAU = 165,/* Pauc */ + /** @stable ICU 54 */ + USCRIPT_SIDDHAM = 166,/* Sidd */ + + /** @stable ICU 58 */ + USCRIPT_ADLAM = 167,/* Adlm */ + /** @stable ICU 58 */ + USCRIPT_BHAIKSUKI = 168,/* Bhks */ + /** @stable ICU 58 */ + USCRIPT_MARCHEN = 169,/* Marc */ + /** @stable ICU 58 */ + USCRIPT_NEWA = 170,/* Newa */ + /** @stable ICU 58 */ + USCRIPT_OSAGE = 171,/* Osge */ + + /** @stable ICU 58 */ + USCRIPT_HAN_WITH_BOPOMOFO = 172,/* Hanb */ + /** @stable ICU 58 */ + USCRIPT_JAMO = 173,/* Jamo */ + /** @stable ICU 58 */ + USCRIPT_SYMBOLS_EMOJI = 174,/* Zsye */ + + /** @stable ICU 60 */ + USCRIPT_MASARAM_GONDI = 175,/* Gonm */ + /** @stable ICU 60 */ + USCRIPT_SOYOMBO = 176,/* Soyo */ + /** @stable ICU 60 */ + USCRIPT_ZANABAZAR_SQUARE = 177,/* Zanb */ + +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal UScriptCode value. + * The highest value is available via u_getIntPropertyMaxValue(UCHAR_SCRIPT). + * + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + USCRIPT_CODE_LIMIT = 178 +#endif // U_HIDE_DEPRECATED_API +} UScriptCode; + +/** + * Gets the script codes associated with the given locale or ISO 15924 abbreviation or name. + * Fills in USCRIPT_MALAYALAM given "Malayam" OR "Mlym". + * Fills in USCRIPT_LATIN given "en" OR "en_US" + * If the required capacity is greater than the capacity of the destination buffer, + * then the error code is set to U_BUFFER_OVERFLOW_ERROR and the required capacity is returned. + * + * <p>Note: To search by short or long script alias only, use + * u_getPropertyValueEnum(UCHAR_SCRIPT, alias) instead. That does + * a fast lookup with no access of the locale data. + * + * @param nameOrAbbrOrLocale name of the script, as given in + * PropertyValueAliases.txt, or ISO 15924 code or locale + * @param fillIn the UScriptCode buffer to fill in the script code + * @param capacity the capacity (size) of UScriptCode buffer passed in. + * @param err the error status code. + * @return The number of script codes filled in the buffer passed in + * @stable ICU 2.4 + */ +U_STABLE int32_t U_EXPORT2 +uscript_getCode(const char* nameOrAbbrOrLocale,UScriptCode* fillIn,int32_t capacity,UErrorCode *err); + +/** + * Returns the long Unicode script name, if there is one. + * Otherwise returns the 4-letter ISO 15924 script code. + * Returns "Malayam" given USCRIPT_MALAYALAM. + * + * @param scriptCode UScriptCode enum + * @return long script name as given in PropertyValueAliases.txt, or the 4-letter code, + * or NULL if scriptCode is invalid + * @stable ICU 2.4 + */ +U_STABLE const char* U_EXPORT2 +uscript_getName(UScriptCode scriptCode); + +/** + * Returns the 4-letter ISO 15924 script code, + * which is the same as the short Unicode script name if Unicode has names for the script. + * Returns "Mlym" given USCRIPT_MALAYALAM. + * + * @param scriptCode UScriptCode enum + * @return short script name (4-letter code), or NULL if scriptCode is invalid + * @stable ICU 2.4 + */ +U_STABLE const char* U_EXPORT2 +uscript_getShortName(UScriptCode scriptCode); + +/** + * Gets the script code associated with the given codepoint. + * Returns USCRIPT_MALAYALAM given 0x0D02 + * @param codepoint UChar32 codepoint + * @param err the error status code. + * @return The UScriptCode, or 0 if codepoint is invalid + * @stable ICU 2.4 + */ +U_STABLE UScriptCode U_EXPORT2 +uscript_getScript(UChar32 codepoint, UErrorCode *err); + +/** + * Do the Script_Extensions of code point c contain script sc? + * If c does not have explicit Script_Extensions, then this tests whether + * c has the Script property value sc. + * + * Some characters are commonly used in multiple scripts. + * For more information, see UAX #24: http://www.unicode.org/reports/tr24/. + * @param c code point + * @param sc script code + * @return TRUE if sc is in Script_Extensions(c) + * @stable ICU 49 + */ +U_STABLE UBool U_EXPORT2 +uscript_hasScript(UChar32 c, UScriptCode sc); + +/** + * Writes code point c's Script_Extensions as a list of UScriptCode values + * to the output scripts array and returns the number of script codes. + * - If c does have Script_Extensions, then the Script property value + * (normally Common or Inherited) is not included. + * - If c does not have Script_Extensions, then the one Script code is written to the output array. + * - If c is not a valid code point, then the one USCRIPT_UNKNOWN code is written. + * In other words, if the return value is 1, + * then the output array contains exactly c's single Script code. + * If the return value is n>=2, then the output array contains c's n Script_Extensions script codes. + * + * Some characters are commonly used in multiple scripts. + * For more information, see UAX #24: http://www.unicode.org/reports/tr24/. + * + * If there are more than capacity script codes to be written, then + * U_BUFFER_OVERFLOW_ERROR is set and the number of Script_Extensions is returned. + * (Usual ICU buffer handling behavior.) + * + * @param c code point + * @param scripts output script code array + * @param capacity capacity of the scripts array + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return number of script codes in c's Script_Extensions, or 1 for the single Script value, + * written to scripts unless U_BUFFER_OVERFLOW_ERROR indicates insufficient capacity + * @stable ICU 49 + */ +U_STABLE int32_t U_EXPORT2 +uscript_getScriptExtensions(UChar32 c, + UScriptCode *scripts, int32_t capacity, + UErrorCode *errorCode); + +/** + * Script usage constants. + * See UAX #31 Unicode Identifier and Pattern Syntax. + * http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Exclusion_from_Identifiers + * + * @stable ICU 51 + */ +typedef enum UScriptUsage { + /** Not encoded in Unicode. @stable ICU 51 */ + USCRIPT_USAGE_NOT_ENCODED, + /** Unknown script usage. @stable ICU 51 */ + USCRIPT_USAGE_UNKNOWN, + /** Candidate for Exclusion from Identifiers. @stable ICU 51 */ + USCRIPT_USAGE_EXCLUDED, + /** Limited Use script. @stable ICU 51 */ + USCRIPT_USAGE_LIMITED_USE, + /** Aspirational Use script. @stable ICU 51 */ + USCRIPT_USAGE_ASPIRATIONAL, + /** Recommended script. @stable ICU 51 */ + USCRIPT_USAGE_RECOMMENDED +} UScriptUsage; + +/** + * Writes the script sample character string. + * This string normally consists of one code point but might be longer. + * The string is empty if the script is not encoded. + * + * @param script script code + * @param dest output string array + * @param capacity number of UChars in the dest array + * @param pErrorCode standard ICU in/out error code, must pass U_SUCCESS() on input + * @return the string length, even if U_BUFFER_OVERFLOW_ERROR + * @stable ICU 51 + */ +U_STABLE int32_t U_EXPORT2 +uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode); + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN +class UnicodeString; +U_NAMESPACE_END + +/** + * Returns the script sample character string. + * This string normally consists of one code point but might be longer. + * The string is empty if the script is not encoded. + * + * @param script script code + * @return the sample character string + * @stable ICU 51 + */ +U_COMMON_API icu::UnicodeString U_EXPORT2 +uscript_getSampleUnicodeString(UScriptCode script); + +#endif + +/** + * Returns the script usage according to UAX #31 Unicode Identifier and Pattern Syntax. + * Returns USCRIPT_USAGE_NOT_ENCODED if the script is not encoded in Unicode. + * + * @param script script code + * @return script usage + * @see UScriptUsage + * @stable ICU 51 + */ +U_STABLE UScriptUsage U_EXPORT2 +uscript_getUsage(UScriptCode script); + +/** + * Returns TRUE if the script is written right-to-left. + * For example, Arab and Hebr. + * + * @param script script code + * @return TRUE if the script is right-to-left + * @stable ICU 51 + */ +U_STABLE UBool U_EXPORT2 +uscript_isRightToLeft(UScriptCode script); + +/** + * Returns TRUE if the script allows line breaks between letters (excluding hyphenation). + * Such a script typically requires dictionary-based line breaking. + * For example, Hani and Thai. + * + * @param script script code + * @return TRUE if the script allows line breaks between letters + * @stable ICU 51 + */ +U_STABLE UBool U_EXPORT2 +uscript_breaksBetweenLetters(UScriptCode script); + +/** + * Returns TRUE if in modern (or most recent) usage of the script case distinctions are customary. + * For example, Latn and Cyrl. + * + * @param script script code + * @return TRUE if the script is cased + * @stable ICU 51 + */ +U_STABLE UBool U_EXPORT2 +uscript_isCased(UScriptCode script); + +#endif diff --git a/vendor/icu/include/unicode/uset.h b/vendor/icu/include/unicode/uset.h new file mode 100644 index 0000000000..3c1ee4906b --- /dev/null +++ b/vendor/icu/include/unicode/uset.h @@ -0,0 +1,1130 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2002-2014, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: uset.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2002mar07 +* created by: Markus W. Scherer +* +* C version of UnicodeSet. +*/ + + +/** + * \file + * \brief C API: Unicode Set + * + * <p>This is a C wrapper around the C++ UnicodeSet class.</p> + */ + +#ifndef __USET_H__ +#define __USET_H__ + +#include <unicode/utypes.h> +#include <unicode/uchar.h> +#include <unicode/localpointer.h> + +#ifndef UCNV_H +struct USet; +/** + * A UnicodeSet. Use the uset_* API to manipulate. Create with + * uset_open*, and destroy with uset_close. + * @stable ICU 2.4 + */ +typedef struct USet USet; +#endif + +/** + * Bitmask values to be passed to uset_openPatternOptions() or + * uset_applyPattern() taking an option parameter. + * @stable ICU 2.4 + */ +enum { + /** + * Ignore white space within patterns unless quoted or escaped. + * @stable ICU 2.4 + */ + USET_IGNORE_SPACE = 1, + + /** + * Enable case insensitive matching. E.g., "[ab]" with this flag + * will match 'a', 'A', 'b', and 'B'. "[^ab]" with this flag will + * match all except 'a', 'A', 'b', and 'B'. This performs a full + * closure over case mappings, e.g. U+017F for s. + * + * The resulting set is a superset of the input for the code points but + * not for the strings. + * It performs a case mapping closure of the code points and adds + * full case folding strings for the code points, and reduces strings of + * the original set to their full case folding equivalents. + * + * This is designed for case-insensitive matches, for example + * in regular expressions. The full code point case closure allows checking of + * an input character directly against the closure set. + * Strings are matched by comparing the case-folded form from the closure + * set with an incremental case folding of the string in question. + * + * The closure set will also contain single code points if the original + * set contained case-equivalent strings (like U+00DF for "ss" or "Ss" etc.). + * This is not necessary (that is, redundant) for the above matching method + * but results in the same closure sets regardless of whether the original + * set contained the code point or a string. + * + * @stable ICU 2.4 + */ + USET_CASE_INSENSITIVE = 2, + + /** + * Enable case insensitive matching. E.g., "[ab]" with this flag + * will match 'a', 'A', 'b', and 'B'. "[^ab]" with this flag will + * match all except 'a', 'A', 'b', and 'B'. This adds the lower-, + * title-, and uppercase mappings as well as the case folding + * of each existing element in the set. + * @stable ICU 3.2 + */ + USET_ADD_CASE_MAPPINGS = 4 +}; + +/** + * Argument values for whether span() and similar functions continue while + * the current character is contained vs. not contained in the set. + * + * The functionality is straightforward for sets with only single code points, + * without strings (which is the common case): + * - USET_SPAN_CONTAINED and USET_SPAN_SIMPLE work the same. + * - USET_SPAN_CONTAINED and USET_SPAN_SIMPLE are inverses of USET_SPAN_NOT_CONTAINED. + * - span() and spanBack() partition any string the same way when + * alternating between span(USET_SPAN_NOT_CONTAINED) and + * span(either "contained" condition). + * - Using a complemented (inverted) set and the opposite span conditions + * yields the same results. + * + * When a set contains multi-code point strings, then these statements may not + * be true, depending on the strings in the set (for example, whether they + * overlap with each other) and the string that is processed. + * For a set with strings: + * - The complement of the set contains the opposite set of code points, + * but the same set of strings. + * Therefore, complementing both the set and the span conditions + * may yield different results. + * - When starting spans at different positions in a string + * (span(s, ...) vs. span(s+1, ...)) the ends of the spans may be different + * because a set string may start before the later position. + * - span(USET_SPAN_SIMPLE) may be shorter than + * span(USET_SPAN_CONTAINED) because it will not recursively try + * all possible paths. + * For example, with a set which contains the three strings "xy", "xya" and "ax", + * span("xyax", USET_SPAN_CONTAINED) will return 4 but + * span("xyax", USET_SPAN_SIMPLE) will return 3. + * span(USET_SPAN_SIMPLE) will never be longer than + * span(USET_SPAN_CONTAINED). + * - With either "contained" condition, span() and spanBack() may partition + * a string in different ways. + * For example, with a set which contains the two strings "ab" and "ba", + * and when processing the string "aba", + * span() will yield contained/not-contained boundaries of { 0, 2, 3 } + * while spanBack() will yield boundaries of { 0, 1, 3 }. + * + * Note: If it is important to get the same boundaries whether iterating forward + * or backward through a string, then either only span() should be used and + * the boundaries cached for backward operation, or an ICU BreakIterator + * could be used. + * + * Note: Unpaired surrogates are treated like surrogate code points. + * Similarly, set strings match only on code point boundaries, + * never in the middle of a surrogate pair. + * Illegal UTF-8 sequences are treated like U+FFFD. + * When processing UTF-8 strings, malformed set strings + * (strings with unpaired surrogates which cannot be converted to UTF-8) + * are ignored. + * + * @stable ICU 3.8 + */ +typedef enum USetSpanCondition { + /** + * Continues a span() while there is no set element at the current position. + * Increments by one code point at a time. + * Stops before the first set element (character or string). + * (For code points only, this is like while contains(current)==FALSE). + * + * When span() returns, the substring between where it started and the position + * it returned consists only of characters that are not in the set, + * and none of its strings overlap with the span. + * + * @stable ICU 3.8 + */ + USET_SPAN_NOT_CONTAINED = 0, + /** + * Spans the longest substring that is a concatenation of set elements (characters or strings). + * (For characters only, this is like while contains(current)==TRUE). + * + * When span() returns, the substring between where it started and the position + * it returned consists only of set elements (characters or strings) that are in the set. + * + * If a set contains strings, then the span will be the longest substring for which there + * exists at least one non-overlapping concatenation of set elements (characters or strings). + * This is equivalent to a POSIX regular expression for <code>(OR of each set element)*</code>. + * (Java/ICU/Perl regex stops at the first match of an OR.) + * + * @stable ICU 3.8 + */ + USET_SPAN_CONTAINED = 1, + /** + * Continues a span() while there is a set element at the current position. + * Increments by the longest matching element at each position. + * (For characters only, this is like while contains(current)==TRUE). + * + * When span() returns, the substring between where it started and the position + * it returned consists only of set elements (characters or strings) that are in the set. + * + * If a set only contains single characters, then this is the same + * as USET_SPAN_CONTAINED. + * + * If a set contains strings, then the span will be the longest substring + * with a match at each position with the longest single set element (character or string). + * + * Use this span condition together with other longest-match algorithms, + * such as ICU converters (ucnv_getUnicodeSet()). + * + * @stable ICU 3.8 + */ + USET_SPAN_SIMPLE = 2, +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the last span condition. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + USET_SPAN_CONDITION_COUNT +#endif // U_HIDE_DEPRECATED_API +} USetSpanCondition; + +enum { + /** + * Capacity of USerializedSet::staticArray. + * Enough for any single-code point set. + * Also provides padding for nice sizeof(USerializedSet). + * @stable ICU 2.4 + */ + USET_SERIALIZED_STATIC_ARRAY_CAPACITY=8 +}; + +/** + * A serialized form of a Unicode set. Limited manipulations are + * possible directly on a serialized set. See below. + * @stable ICU 2.4 + */ +typedef struct USerializedSet { + /** + * The serialized Unicode Set. + * @stable ICU 2.4 + */ + const uint16_t *array; + /** + * The length of the array that contains BMP characters. + * @stable ICU 2.4 + */ + int32_t bmpLength; + /** + * The total length of the array. + * @stable ICU 2.4 + */ + int32_t length; + /** + * A small buffer for the array to reduce memory allocations. + * @stable ICU 2.4 + */ + uint16_t staticArray[USET_SERIALIZED_STATIC_ARRAY_CAPACITY]; +} USerializedSet; + +/********************************************************************* + * USet API + *********************************************************************/ + +/** + * Create an empty USet object. + * Equivalent to uset_open(1, 0). + * @return a newly created USet. The caller must call uset_close() on + * it when done. + * @stable ICU 4.2 + */ +U_STABLE USet* U_EXPORT2 +uset_openEmpty(void); + +/** + * Creates a USet object that contains the range of characters + * start..end, inclusive. If <code>start > end</code> + * then an empty set is created (same as using uset_openEmpty()). + * @param start first character of the range, inclusive + * @param end last character of the range, inclusive + * @return a newly created USet. The caller must call uset_close() on + * it when done. + * @stable ICU 2.4 + */ +U_STABLE USet* U_EXPORT2 +uset_open(UChar32 start, UChar32 end); + +/** + * Creates a set from the given pattern. See the UnicodeSet class + * description for the syntax of the pattern language. + * @param pattern a string specifying what characters are in the set + * @param patternLength the length of the pattern, or -1 if null + * terminated + * @param ec the error code + * @stable ICU 2.4 + */ +U_STABLE USet* U_EXPORT2 +uset_openPattern(const UChar* pattern, int32_t patternLength, + UErrorCode* ec); + +/** + * Creates a set from the given pattern. See the UnicodeSet class + * description for the syntax of the pattern language. + * @param pattern a string specifying what characters are in the set + * @param patternLength the length of the pattern, or -1 if null + * terminated + * @param options bitmask for options to apply to the pattern. + * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE. + * @param ec the error code + * @stable ICU 2.4 + */ +U_STABLE USet* U_EXPORT2 +uset_openPatternOptions(const UChar* pattern, int32_t patternLength, + uint32_t options, + UErrorCode* ec); + +/** + * Disposes of the storage used by a USet object. This function should + * be called exactly once for objects returned by uset_open(). + * @param set the object to dispose of + * @stable ICU 2.4 + */ +U_STABLE void U_EXPORT2 +uset_close(USet* set); + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalUSetPointer + * "Smart pointer" class, closes a USet via uset_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @stable ICU 4.4 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalUSetPointer, USet, uset_close); + +U_NAMESPACE_END + +#endif + +/** + * Returns a copy of this object. + * If this set is frozen, then the clone will be frozen as well. + * Use uset_cloneAsThawed() for a mutable clone of a frozen set. + * @param set the original set + * @return the newly allocated copy of the set + * @see uset_cloneAsThawed + * @stable ICU 3.8 + */ +U_STABLE USet * U_EXPORT2 +uset_clone(const USet *set); + +/** + * Determines whether the set has been frozen (made immutable) or not. + * See the ICU4J Freezable interface for details. + * @param set the set + * @return TRUE/FALSE for whether the set has been frozen + * @see uset_freeze + * @see uset_cloneAsThawed + * @stable ICU 3.8 + */ +U_STABLE UBool U_EXPORT2 +uset_isFrozen(const USet *set); + +/** + * Freeze the set (make it immutable). + * Once frozen, it cannot be unfrozen and is therefore thread-safe + * until it is deleted. + * See the ICU4J Freezable interface for details. + * Freezing the set may also make some operations faster, for example + * uset_contains() and uset_span(). + * A frozen set will not be modified. (It remains frozen.) + * @param set the set + * @return the same set, now frozen + * @see uset_isFrozen + * @see uset_cloneAsThawed + * @stable ICU 3.8 + */ +U_STABLE void U_EXPORT2 +uset_freeze(USet *set); + +/** + * Clone the set and make the clone mutable. + * See the ICU4J Freezable interface for details. + * @param set the set + * @return the mutable clone + * @see uset_freeze + * @see uset_isFrozen + * @see uset_clone + * @stable ICU 3.8 + */ +U_STABLE USet * U_EXPORT2 +uset_cloneAsThawed(const USet *set); + +/** + * Causes the USet object to represent the range <code>start - end</code>. + * If <code>start > end</code> then this USet is set to an empty range. + * A frozen set will not be modified. + * @param set the object to set to the given range + * @param start first character in the set, inclusive + * @param end last character in the set, inclusive + * @stable ICU 3.2 + */ +U_STABLE void U_EXPORT2 +uset_set(USet* set, + UChar32 start, UChar32 end); + +/** + * Modifies the set to represent the set specified by the given + * pattern. See the UnicodeSet class description for the syntax of + * the pattern language. See also the User Guide chapter about UnicodeSet. + * <em>Empties the set passed before applying the pattern.</em> + * A frozen set will not be modified. + * @param set The set to which the pattern is to be applied. + * @param pattern A pointer to UChar string specifying what characters are in the set. + * The character at pattern[0] must be a '['. + * @param patternLength The length of the UChar string. -1 if NUL terminated. + * @param options A bitmask for options to apply to the pattern. + * Valid options are USET_IGNORE_SPACE and USET_CASE_INSENSITIVE. + * @param status Returns an error if the pattern cannot be parsed. + * @return Upon successful parse, the value is either + * the index of the character after the closing ']' + * of the parsed pattern. + * If the status code indicates failure, then the return value + * is the index of the error in the source. + * + * @stable ICU 2.8 + */ +U_STABLE int32_t U_EXPORT2 +uset_applyPattern(USet *set, + const UChar *pattern, int32_t patternLength, + uint32_t options, + UErrorCode *status); + +/** + * Modifies the set to contain those code points which have the given value + * for the given binary or enumerated property, as returned by + * u_getIntPropertyValue. Prior contents of this set are lost. + * A frozen set will not be modified. + * + * @param set the object to contain the code points defined by the property + * + * @param prop a property in the range UCHAR_BIN_START..UCHAR_BIN_LIMIT-1 + * or UCHAR_INT_START..UCHAR_INT_LIMIT-1 + * or UCHAR_MASK_START..UCHAR_MASK_LIMIT-1. + * + * @param value a value in the range u_getIntPropertyMinValue(prop).. + * u_getIntPropertyMaxValue(prop), with one exception. If prop is + * UCHAR_GENERAL_CATEGORY_MASK, then value should not be a UCharCategory, but + * rather a mask value produced by U_GET_GC_MASK(). This allows grouped + * categories such as [:L:] to be represented. + * + * @param ec error code input/output parameter + * + * @stable ICU 3.2 + */ +U_STABLE void U_EXPORT2 +uset_applyIntPropertyValue(USet* set, + UProperty prop, int32_t value, UErrorCode* ec); + +/** + * Modifies the set to contain those code points which have the + * given value for the given property. Prior contents of this + * set are lost. + * A frozen set will not be modified. + * + * @param set the object to contain the code points defined by the given + * property and value alias + * + * @param prop a string specifying a property alias, either short or long. + * The name is matched loosely. See PropertyAliases.txt for names and a + * description of loose matching. If the value string is empty, then this + * string is interpreted as either a General_Category value alias, a Script + * value alias, a binary property alias, or a special ID. Special IDs are + * matched loosely and correspond to the following sets: + * + * "ANY" = [\\u0000-\\U0010FFFF], + * "ASCII" = [\\u0000-\\u007F], + * "Assigned" = [:^Cn:]. + * + * @param propLength the length of the prop, or -1 if NULL + * + * @param value a string specifying a value alias, either short or long. + * The name is matched loosely. See PropertyValueAliases.txt for names + * and a description of loose matching. In addition to aliases listed, + * numeric values and canonical combining classes may be expressed + * numerically, e.g., ("nv", "0.5") or ("ccc", "220"). The value string + * may also be empty. + * + * @param valueLength the length of the value, or -1 if NULL + * + * @param ec error code input/output parameter + * + * @stable ICU 3.2 + */ +U_STABLE void U_EXPORT2 +uset_applyPropertyAlias(USet* set, + const UChar *prop, int32_t propLength, + const UChar *value, int32_t valueLength, + UErrorCode* ec); + +/** + * Return true if the given position, in the given pattern, appears + * to be the start of a UnicodeSet pattern. + * + * @param pattern a string specifying the pattern + * @param patternLength the length of the pattern, or -1 if NULL + * @param pos the given position + * @stable ICU 3.2 + */ +U_STABLE UBool U_EXPORT2 +uset_resemblesPattern(const UChar *pattern, int32_t patternLength, + int32_t pos); + +/** + * Returns a string representation of this set. If the result of + * calling this function is passed to a uset_openPattern(), it + * will produce another set that is equal to this one. + * @param set the set + * @param result the string to receive the rules, may be NULL + * @param resultCapacity the capacity of result, may be 0 if result is NULL + * @param escapeUnprintable if TRUE then convert unprintable + * character to their hex escape representations, \\uxxxx or + * \\Uxxxxxxxx. Unprintable characters are those other than + * U+000A, U+0020..U+007E. + * @param ec error code. + * @return length of string, possibly larger than resultCapacity + * @stable ICU 2.4 + */ +U_STABLE int32_t U_EXPORT2 +uset_toPattern(const USet* set, + UChar* result, int32_t resultCapacity, + UBool escapeUnprintable, + UErrorCode* ec); + +/** + * Adds the given character to the given USet. After this call, + * uset_contains(set, c) will return TRUE. + * A frozen set will not be modified. + * @param set the object to which to add the character + * @param c the character to add + * @stable ICU 2.4 + */ +U_STABLE void U_EXPORT2 +uset_add(USet* set, UChar32 c); + +/** + * Adds all of the elements in the specified set to this set if + * they're not already present. This operation effectively + * modifies this set so that its value is the <i>union</i> of the two + * sets. The behavior of this operation is unspecified if the specified + * collection is modified while the operation is in progress. + * A frozen set will not be modified. + * + * @param set the object to which to add the set + * @param additionalSet the source set whose elements are to be added to this set. + * @stable ICU 2.6 + */ +U_STABLE void U_EXPORT2 +uset_addAll(USet* set, const USet *additionalSet); + +/** + * Adds the given range of characters to the given USet. After this call, + * uset_contains(set, start, end) will return TRUE. + * A frozen set will not be modified. + * @param set the object to which to add the character + * @param start the first character of the range to add, inclusive + * @param end the last character of the range to add, inclusive + * @stable ICU 2.2 + */ +U_STABLE void U_EXPORT2 +uset_addRange(USet* set, UChar32 start, UChar32 end); + +/** + * Adds the given string to the given USet. After this call, + * uset_containsString(set, str, strLen) will return TRUE. + * A frozen set will not be modified. + * @param set the object to which to add the character + * @param str the string to add + * @param strLen the length of the string or -1 if null terminated. + * @stable ICU 2.4 + */ +U_STABLE void U_EXPORT2 +uset_addString(USet* set, const UChar* str, int32_t strLen); + +/** + * Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"} + * If this set already any particular character, it has no effect on that character. + * A frozen set will not be modified. + * @param set the object to which to add the character + * @param str the source string + * @param strLen the length of the string or -1 if null terminated. + * @stable ICU 3.4 + */ +U_STABLE void U_EXPORT2 +uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen); + +/** + * Removes the given character from the given USet. After this call, + * uset_contains(set, c) will return FALSE. + * A frozen set will not be modified. + * @param set the object from which to remove the character + * @param c the character to remove + * @stable ICU 2.4 + */ +U_STABLE void U_EXPORT2 +uset_remove(USet* set, UChar32 c); + +/** + * Removes the given range of characters from the given USet. After this call, + * uset_contains(set, start, end) will return FALSE. + * A frozen set will not be modified. + * @param set the object to which to add the character + * @param start the first character of the range to remove, inclusive + * @param end the last character of the range to remove, inclusive + * @stable ICU 2.2 + */ +U_STABLE void U_EXPORT2 +uset_removeRange(USet* set, UChar32 start, UChar32 end); + +/** + * Removes the given string to the given USet. After this call, + * uset_containsString(set, str, strLen) will return FALSE. + * A frozen set will not be modified. + * @param set the object to which to add the character + * @param str the string to remove + * @param strLen the length of the string or -1 if null terminated. + * @stable ICU 2.4 + */ +U_STABLE void U_EXPORT2 +uset_removeString(USet* set, const UChar* str, int32_t strLen); + +/** + * Removes from this set all of its elements that are contained in the + * specified set. This operation effectively modifies this + * set so that its value is the <i>asymmetric set difference</i> of + * the two sets. + * A frozen set will not be modified. + * @param set the object from which the elements are to be removed + * @param removeSet the object that defines which elements will be + * removed from this set + * @stable ICU 3.2 + */ +U_STABLE void U_EXPORT2 +uset_removeAll(USet* set, const USet* removeSet); + +/** + * Retain only the elements in this set that are contained in the + * specified range. If <code>start > end</code> then an empty range is + * retained, leaving the set empty. This is equivalent to + * a boolean logic AND, or a set INTERSECTION. + * A frozen set will not be modified. + * + * @param set the object for which to retain only the specified range + * @param start first character, inclusive, of range to be retained + * to this set. + * @param end last character, inclusive, of range to be retained + * to this set. + * @stable ICU 3.2 + */ +U_STABLE void U_EXPORT2 +uset_retain(USet* set, UChar32 start, UChar32 end); + +/** + * Retains only the elements in this set that are contained in the + * specified set. In other words, removes from this set all of + * its elements that are not contained in the specified set. This + * operation effectively modifies this set so that its value is + * the <i>intersection</i> of the two sets. + * A frozen set will not be modified. + * + * @param set the object on which to perform the retain + * @param retain set that defines which elements this set will retain + * @stable ICU 3.2 + */ +U_STABLE void U_EXPORT2 +uset_retainAll(USet* set, const USet* retain); + +/** + * Reallocate this objects internal structures to take up the least + * possible space, without changing this object's value. + * A frozen set will not be modified. + * + * @param set the object on which to perfrom the compact + * @stable ICU 3.2 + */ +U_STABLE void U_EXPORT2 +uset_compact(USet* set); + +/** + * Inverts this set. This operation modifies this set so that + * its value is its complement. This operation does not affect + * the multicharacter strings, if any. + * A frozen set will not be modified. + * @param set the set + * @stable ICU 2.4 + */ +U_STABLE void U_EXPORT2 +uset_complement(USet* set); + +/** + * Complements in this set all elements contained in the specified + * set. Any character in the other set will be removed if it is + * in this set, or will be added if it is not in this set. + * A frozen set will not be modified. + * + * @param set the set with which to complement + * @param complement set that defines which elements will be xor'ed + * from this set. + * @stable ICU 3.2 + */ +U_STABLE void U_EXPORT2 +uset_complementAll(USet* set, const USet* complement); + +/** + * Removes all of the elements from this set. This set will be + * empty after this call returns. + * A frozen set will not be modified. + * @param set the set + * @stable ICU 2.4 + */ +U_STABLE void U_EXPORT2 +uset_clear(USet* set); + +/** + * Close this set over the given attribute. For the attribute + * USET_CASE, the result is to modify this set so that: + * + * 1. For each character or string 'a' in this set, all strings or + * characters 'b' such that foldCase(a) == foldCase(b) are added + * to this set. + * + * 2. For each string 'e' in the resulting set, if e != + * foldCase(e), 'e' will be removed. + * + * Example: [aq\\u00DF{Bc}{bC}{Fi}] => [aAqQ\\u00DF\\uFB01{ss}{bc}{fi}] + * + * (Here foldCase(x) refers to the operation u_strFoldCase, and a + * == b denotes that the contents are the same, not pointer + * comparison.) + * + * A frozen set will not be modified. + * + * @param set the set + * + * @param attributes bitmask for attributes to close over. + * Currently only the USET_CASE bit is supported. Any undefined bits + * are ignored. + * @stable ICU 4.2 + */ +U_STABLE void U_EXPORT2 +uset_closeOver(USet* set, int32_t attributes); + +/** + * Remove all strings from this set. + * + * @param set the set + * @stable ICU 4.2 + */ +U_STABLE void U_EXPORT2 +uset_removeAllStrings(USet* set); + +/** + * Returns TRUE if the given USet contains no characters and no + * strings. + * @param set the set + * @return true if set is empty + * @stable ICU 2.4 + */ +U_STABLE UBool U_EXPORT2 +uset_isEmpty(const USet* set); + +/** + * Returns TRUE if the given USet contains the given character. + * This function works faster with a frozen set. + * @param set the set + * @param c The codepoint to check for within the set + * @return true if set contains c + * @stable ICU 2.4 + */ +U_STABLE UBool U_EXPORT2 +uset_contains(const USet* set, UChar32 c); + +/** + * Returns TRUE if the given USet contains all characters c + * where start <= c && c <= end. + * @param set the set + * @param start the first character of the range to test, inclusive + * @param end the last character of the range to test, inclusive + * @return TRUE if set contains the range + * @stable ICU 2.2 + */ +U_STABLE UBool U_EXPORT2 +uset_containsRange(const USet* set, UChar32 start, UChar32 end); + +/** + * Returns TRUE if the given USet contains the given string. + * @param set the set + * @param str the string + * @param strLen the length of the string or -1 if null terminated. + * @return true if set contains str + * @stable ICU 2.4 + */ +U_STABLE UBool U_EXPORT2 +uset_containsString(const USet* set, const UChar* str, int32_t strLen); + +/** + * Returns the index of the given character within this set, where + * the set is ordered by ascending code point. If the character + * is not in this set, return -1. The inverse of this method is + * <code>charAt()</code>. + * @param set the set + * @param c the character to obtain the index for + * @return an index from 0..size()-1, or -1 + * @stable ICU 3.2 + */ +U_STABLE int32_t U_EXPORT2 +uset_indexOf(const USet* set, UChar32 c); + +/** + * Returns the character at the given index within this set, where + * the set is ordered by ascending code point. If the index is + * out of range, return (UChar32)-1. The inverse of this method is + * <code>indexOf()</code>. + * @param set the set + * @param charIndex an index from 0..size()-1 to obtain the char for + * @return the character at the given index, or (UChar32)-1. + * @stable ICU 3.2 + */ +U_STABLE UChar32 U_EXPORT2 +uset_charAt(const USet* set, int32_t charIndex); + +/** + * Returns the number of characters and strings contained in the given + * USet. + * @param set the set + * @return a non-negative integer counting the characters and strings + * contained in set + * @stable ICU 2.4 + */ +U_STABLE int32_t U_EXPORT2 +uset_size(const USet* set); + +/** + * Returns the number of items in this set. An item is either a range + * of characters or a single multicharacter string. + * @param set the set + * @return a non-negative integer counting the character ranges + * and/or strings contained in set + * @stable ICU 2.4 + */ +U_STABLE int32_t U_EXPORT2 +uset_getItemCount(const USet* set); + +/** + * Returns an item of this set. An item is either a range of + * characters or a single multicharacter string. + * @param set the set + * @param itemIndex a non-negative integer in the range 0.. + * uset_getItemCount(set)-1 + * @param start pointer to variable to receive first character + * in range, inclusive + * @param end pointer to variable to receive last character in range, + * inclusive + * @param str buffer to receive the string, may be NULL + * @param strCapacity capacity of str, or 0 if str is NULL + * @param ec error code + * @return the length of the string (>= 2), or 0 if the item is a + * range, in which case it is the range *start..*end, or -1 if + * itemIndex is out of range + * @stable ICU 2.4 + */ +U_STABLE int32_t U_EXPORT2 +uset_getItem(const USet* set, int32_t itemIndex, + UChar32* start, UChar32* end, + UChar* str, int32_t strCapacity, + UErrorCode* ec); + +/** + * Returns true if set1 contains all the characters and strings + * of set2. It answers the question, 'Is set1 a superset of set2?' + * @param set1 set to be checked for containment + * @param set2 set to be checked for containment + * @return true if the test condition is met + * @stable ICU 3.2 + */ +U_STABLE UBool U_EXPORT2 +uset_containsAll(const USet* set1, const USet* set2); + +/** + * Returns true if this set contains all the characters + * of the given string. This is does not check containment of grapheme + * clusters, like uset_containsString. + * @param set set of characters to be checked for containment + * @param str string containing codepoints to be checked for containment + * @param strLen the length of the string or -1 if null terminated. + * @return true if the test condition is met + * @stable ICU 3.4 + */ +U_STABLE UBool U_EXPORT2 +uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen); + +/** + * Returns true if set1 contains none of the characters and strings + * of set2. It answers the question, 'Is set1 a disjoint set of set2?' + * @param set1 set to be checked for containment + * @param set2 set to be checked for containment + * @return true if the test condition is met + * @stable ICU 3.2 + */ +U_STABLE UBool U_EXPORT2 +uset_containsNone(const USet* set1, const USet* set2); + +/** + * Returns true if set1 contains some of the characters and strings + * of set2. It answers the question, 'Does set1 and set2 have an intersection?' + * @param set1 set to be checked for containment + * @param set2 set to be checked for containment + * @return true if the test condition is met + * @stable ICU 3.2 + */ +U_STABLE UBool U_EXPORT2 +uset_containsSome(const USet* set1, const USet* set2); + +/** + * Returns the length of the initial substring of the input string which + * consists only of characters and strings that are contained in this set + * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE), + * or only of characters and strings that are not contained + * in this set (USET_SPAN_NOT_CONTAINED). + * See USetSpanCondition for details. + * Similar to the strspn() C library function. + * Unpaired surrogates are treated according to contains() of their surrogate code points. + * This function works faster with a frozen set and with a non-negative string length argument. + * @param set the set + * @param s start of the string + * @param length of the string; can be -1 for NUL-terminated + * @param spanCondition specifies the containment condition + * @return the length of the initial substring according to the spanCondition; + * 0 if the start of the string does not fit the spanCondition + * @stable ICU 3.8 + * @see USetSpanCondition + */ +U_STABLE int32_t U_EXPORT2 +uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition); + +/** + * Returns the start of the trailing substring of the input string which + * consists only of characters and strings that are contained in this set + * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE), + * or only of characters and strings that are not contained + * in this set (USET_SPAN_NOT_CONTAINED). + * See USetSpanCondition for details. + * Unpaired surrogates are treated according to contains() of their surrogate code points. + * This function works faster with a frozen set and with a non-negative string length argument. + * @param set the set + * @param s start of the string + * @param length of the string; can be -1 for NUL-terminated + * @param spanCondition specifies the containment condition + * @return the start of the trailing substring according to the spanCondition; + * the string length if the end of the string does not fit the spanCondition + * @stable ICU 3.8 + * @see USetSpanCondition + */ +U_STABLE int32_t U_EXPORT2 +uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition); + +/** + * Returns the length of the initial substring of the input string which + * consists only of characters and strings that are contained in this set + * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE), + * or only of characters and strings that are not contained + * in this set (USET_SPAN_NOT_CONTAINED). + * See USetSpanCondition for details. + * Similar to the strspn() C library function. + * Malformed byte sequences are treated according to contains(0xfffd). + * This function works faster with a frozen set and with a non-negative string length argument. + * @param set the set + * @param s start of the string (UTF-8) + * @param length of the string; can be -1 for NUL-terminated + * @param spanCondition specifies the containment condition + * @return the length of the initial substring according to the spanCondition; + * 0 if the start of the string does not fit the spanCondition + * @stable ICU 3.8 + * @see USetSpanCondition + */ +U_STABLE int32_t U_EXPORT2 +uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition); + +/** + * Returns the start of the trailing substring of the input string which + * consists only of characters and strings that are contained in this set + * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE), + * or only of characters and strings that are not contained + * in this set (USET_SPAN_NOT_CONTAINED). + * See USetSpanCondition for details. + * Malformed byte sequences are treated according to contains(0xfffd). + * This function works faster with a frozen set and with a non-negative string length argument. + * @param set the set + * @param s start of the string (UTF-8) + * @param length of the string; can be -1 for NUL-terminated + * @param spanCondition specifies the containment condition + * @return the start of the trailing substring according to the spanCondition; + * the string length if the end of the string does not fit the spanCondition + * @stable ICU 3.8 + * @see USetSpanCondition + */ +U_STABLE int32_t U_EXPORT2 +uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition); + +/** + * Returns true if set1 contains all of the characters and strings + * of set2, and vis versa. It answers the question, 'Is set1 equal to set2?' + * @param set1 set to be checked for containment + * @param set2 set to be checked for containment + * @return true if the test condition is met + * @stable ICU 3.2 + */ +U_STABLE UBool U_EXPORT2 +uset_equals(const USet* set1, const USet* set2); + +/********************************************************************* + * Serialized set API + *********************************************************************/ + +/** + * Serializes this set into an array of 16-bit integers. Serialization + * (currently) only records the characters in the set; multicharacter + * strings are ignored. + * + * The array + * has following format (each line is one 16-bit integer): + * + * length = (n+2*m) | (m!=0?0x8000:0) + * bmpLength = n; present if m!=0 + * bmp[0] + * bmp[1] + * ... + * bmp[n-1] + * supp-high[0] + * supp-low[0] + * supp-high[1] + * supp-low[1] + * ... + * supp-high[m-1] + * supp-low[m-1] + * + * The array starts with a header. After the header are n bmp + * code points, then m supplementary code points. Either n or m + * or both may be zero. n+2*m is always <= 0x7FFF. + * + * If there are no supplementary characters (if m==0) then the + * header is one 16-bit integer, 'length', with value n. + * + * If there are supplementary characters (if m!=0) then the header + * is two 16-bit integers. The first, 'length', has value + * (n+2*m)|0x8000. The second, 'bmpLength', has value n. + * + * After the header the code points are stored in ascending order. + * Supplementary code points are stored as most significant 16 + * bits followed by least significant 16 bits. + * + * @param set the set + * @param dest pointer to buffer of destCapacity 16-bit integers. + * May be NULL only if destCapacity is zero. + * @param destCapacity size of dest, or zero. Must not be negative. + * @param pErrorCode pointer to the error code. Will be set to + * U_INDEX_OUTOFBOUNDS_ERROR if n+2*m > 0x7FFF. Will be set to + * U_BUFFER_OVERFLOW_ERROR if n+2*m+(m!=0?2:1) > destCapacity. + * @return the total length of the serialized format, including + * the header, that is, n+2*m+(m!=0?2:1), or 0 on error other + * than U_BUFFER_OVERFLOW_ERROR. + * @stable ICU 2.4 + */ +U_STABLE int32_t U_EXPORT2 +uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* pErrorCode); + +/** + * Given a serialized array, fill in the given serialized set object. + * @param fillSet pointer to result + * @param src pointer to start of array + * @param srcLength length of array + * @return true if the given array is valid, otherwise false + * @stable ICU 2.4 + */ +U_STABLE UBool U_EXPORT2 +uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength); + +/** + * Set the USerializedSet to contain the given character (and nothing + * else). + * @param fillSet pointer to result + * @param c The codepoint to set + * @stable ICU 2.4 + */ +U_STABLE void U_EXPORT2 +uset_setSerializedToOne(USerializedSet* fillSet, UChar32 c); + +/** + * Returns TRUE if the given USerializedSet contains the given + * character. + * @param set the serialized set + * @param c The codepoint to check for within the set + * @return true if set contains c + * @stable ICU 2.4 + */ +U_STABLE UBool U_EXPORT2 +uset_serializedContains(const USerializedSet* set, UChar32 c); + +/** + * Returns the number of disjoint ranges of characters contained in + * the given serialized set. Ignores any strings contained in the + * set. + * @param set the serialized set + * @return a non-negative integer counting the character ranges + * contained in set + * @stable ICU 2.4 + */ +U_STABLE int32_t U_EXPORT2 +uset_getSerializedRangeCount(const USerializedSet* set); + +/** + * Returns a range of characters contained in the given serialized + * set. + * @param set the serialized set + * @param rangeIndex a non-negative integer in the range 0.. + * uset_getSerializedRangeCount(set)-1 + * @param pStart pointer to variable to receive first character + * in range, inclusive + * @param pEnd pointer to variable to receive last character in range, + * inclusive + * @return true if rangeIndex is valid, otherwise false + * @stable ICU 2.4 + */ +U_STABLE UBool U_EXPORT2 +uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex, + UChar32* pStart, UChar32* pEnd); + +#endif diff --git a/vendor/icu/include/unicode/ushape.h b/vendor/icu/include/unicode/ushape.h new file mode 100644 index 0000000000..80e356ca79 --- /dev/null +++ b/vendor/icu/include/unicode/ushape.h @@ -0,0 +1,476 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 2000-2012, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* file name: ushape.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2000jun29 +* created by: Markus W. Scherer +*/ + +#ifndef __USHAPE_H__ +#define __USHAPE_H__ + +#include <unicode/utypes.h> + +/** + * \file + * \brief C API: Arabic shaping + * + */ + +/** + * Shape Arabic text on a character basis. + * + * <p>This function performs basic operations for "shaping" Arabic text. It is most + * useful for use with legacy data formats and legacy display technology + * (simple terminals). All operations are performed on Unicode characters.</p> + * + * <p>Text-based shaping means that some character code points in the text are + * replaced by others depending on the context. It transforms one kind of text + * into another. In comparison, modern displays for Arabic text select + * appropriate, context-dependent font glyphs for each text element, which means + * that they transform text into a glyph vector.</p> + * + * <p>Text transformations are necessary when modern display technology is not + * available or when text needs to be transformed to or from legacy formats that + * use "shaped" characters. Since the Arabic script is cursive, connecting + * adjacent letters to each other, computers select images for each letter based + * on the surrounding letters. This usually results in four images per Arabic + * letter: initial, middle, final, and isolated forms. In Unicode, on the other + * hand, letters are normally stored abstract, and a display system is expected + * to select the necessary glyphs. (This makes searching and other text + * processing easier because the same letter has only one code.) It is possible + * to mimic this with text transformations because there are characters in + * Unicode that are rendered as letters with a specific shape + * (or cursive connectivity). They were included for interoperability with + * legacy systems and codepages, and for unsophisticated display systems.</p> + * + * <p>A second kind of text transformations is supported for Arabic digits: + * For compatibility with legacy codepages that only include European digits, + * it is possible to replace one set of digits by another, changing the + * character code points. These operations can be performed for either + * Arabic-Indic Digits (U+0660...U+0669) or Eastern (Extended) Arabic-Indic + * digits (U+06f0...U+06f9).</p> + * + * <p>Some replacements may result in more or fewer characters (code points). + * By default, this means that the destination buffer may receive text with a + * length different from the source length. Some legacy systems rely on the + * length of the text to be constant. They expect extra spaces to be added + * or consumed either next to the affected character or at the end of the + * text.</p> + * + * <p>For details about the available operations, see the description of the + * <code>U_SHAPE_...</code> options.</p> + * + * @param source The input text. + * + * @param sourceLength The number of UChars in <code>source</code>. + * + * @param dest The destination buffer that will receive the results of the + * requested operations. It may be <code>NULL</code> only if + * <code>destSize</code> is 0. The source and destination must not + * overlap. + * + * @param destSize The size (capacity) of the destination buffer in UChars. + * If <code>destSize</code> is 0, then no output is produced, + * but the necessary buffer size is returned ("preflighting"). + * + * @param options This is a 32-bit set of flags that specify the operations + * that are performed on the input text. If no error occurs, + * then the result will always be written to the destination + * buffer. + * + * @param pErrorCode must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * + * @return The number of UChars written to the destination buffer. + * If an error occurred, then no output was written, or it may be + * incomplete. If <code>U_BUFFER_OVERFLOW_ERROR</code> is set, then + * the return value indicates the necessary destination buffer size. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_shapeArabic(const UChar *source, int32_t sourceLength, + UChar *dest, int32_t destSize, + uint32_t options, + UErrorCode *pErrorCode); + +/** + * Memory option: allow the result to have a different length than the source. + * Affects: LamAlef options + * @stable ICU 2.0 + */ +#define U_SHAPE_LENGTH_GROW_SHRINK 0 + +/** + * Memory option: allow the result to have a different length than the source. + * Affects: LamAlef options + * This option is an alias to U_SHAPE_LENGTH_GROW_SHRINK + * @stable ICU 4.2 + */ +#define U_SHAPE_LAMALEF_RESIZE 0 + +/** + * Memory option: the result must have the same length as the source. + * If more room is necessary, then try to consume spaces next to modified characters. + * @stable ICU 2.0 + */ +#define U_SHAPE_LENGTH_FIXED_SPACES_NEAR 1 + +/** + * Memory option: the result must have the same length as the source. + * If more room is necessary, then try to consume spaces next to modified characters. + * Affects: LamAlef options + * This option is an alias to U_SHAPE_LENGTH_FIXED_SPACES_NEAR + * @stable ICU 4.2 + */ +#define U_SHAPE_LAMALEF_NEAR 1 + +/** + * Memory option: the result must have the same length as the source. + * If more room is necessary, then try to consume spaces at the end of the text. + * @stable ICU 2.0 + */ +#define U_SHAPE_LENGTH_FIXED_SPACES_AT_END 2 + +/** + * Memory option: the result must have the same length as the source. + * If more room is necessary, then try to consume spaces at the end of the text. + * Affects: LamAlef options + * This option is an alias to U_SHAPE_LENGTH_FIXED_SPACES_AT_END + * @stable ICU 4.2 + */ +#define U_SHAPE_LAMALEF_END 2 + +/** + * Memory option: the result must have the same length as the source. + * If more room is necessary, then try to consume spaces at the beginning of the text. + * @stable ICU 2.0 + */ +#define U_SHAPE_LENGTH_FIXED_SPACES_AT_BEGINNING 3 + +/** + * Memory option: the result must have the same length as the source. + * If more room is necessary, then try to consume spaces at the beginning of the text. + * Affects: LamAlef options + * This option is an alias to U_SHAPE_LENGTH_FIXED_SPACES_AT_BEGINNING + * @stable ICU 4.2 + */ +#define U_SHAPE_LAMALEF_BEGIN 3 + + +/** + * Memory option: the result must have the same length as the source. + * Shaping Mode: For each LAMALEF character found, expand LAMALEF using space at end. + * If there is no space at end, use spaces at beginning of the buffer. If there + * is no space at beginning of the buffer, use spaces at the near (i.e. the space + * after the LAMALEF character). + * If there are no spaces found, an error U_NO_SPACE_AVAILABLE (as defined in utypes.h) + * will be set in pErrorCode + * + * Deshaping Mode: Perform the same function as the flag equals U_SHAPE_LAMALEF_END. + * Affects: LamAlef options + * @stable ICU 4.2 + */ +#define U_SHAPE_LAMALEF_AUTO 0x10000 + +/** Bit mask for memory options. @stable ICU 2.0 */ +#define U_SHAPE_LENGTH_MASK 0x10003 /* Changed old value 3 */ + + +/** + * Bit mask for LamAlef memory options. + * @stable ICU 4.2 + */ +#define U_SHAPE_LAMALEF_MASK 0x10003 /* updated */ + +/** Direction indicator: the source is in logical (keyboard) order. @stable ICU 2.0 */ +#define U_SHAPE_TEXT_DIRECTION_LOGICAL 0 + +/** + * Direction indicator: + * the source is in visual RTL order, + * the rightmost displayed character stored first. + * This option is an alias to U_SHAPE_TEXT_DIRECTION_LOGICAL + * @stable ICU 4.2 + */ +#define U_SHAPE_TEXT_DIRECTION_VISUAL_RTL 0 + +/** + * Direction indicator: + * the source is in visual LTR order, + * the leftmost displayed character stored first. + * @stable ICU 2.0 + */ +#define U_SHAPE_TEXT_DIRECTION_VISUAL_LTR 4 + +/** Bit mask for direction indicators. @stable ICU 2.0 */ +#define U_SHAPE_TEXT_DIRECTION_MASK 4 + + +/** Letter shaping option: do not perform letter shaping. @stable ICU 2.0 */ +#define U_SHAPE_LETTERS_NOOP 0 + +/** Letter shaping option: replace abstract letter characters by "shaped" ones. @stable ICU 2.0 */ +#define U_SHAPE_LETTERS_SHAPE 8 + +/** Letter shaping option: replace "shaped" letter characters by abstract ones. @stable ICU 2.0 */ +#define U_SHAPE_LETTERS_UNSHAPE 0x10 + +/** + * Letter shaping option: replace abstract letter characters by "shaped" ones. + * The only difference with U_SHAPE_LETTERS_SHAPE is that Tashkeel letters + * are always "shaped" into the isolated form instead of the medial form + * (selecting code points from the Arabic Presentation Forms-B block). + * @stable ICU 2.0 + */ +#define U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED 0x18 + + +/** Bit mask for letter shaping options. @stable ICU 2.0 */ +#define U_SHAPE_LETTERS_MASK 0x18 + + +/** Digit shaping option: do not perform digit shaping. @stable ICU 2.0 */ +#define U_SHAPE_DIGITS_NOOP 0 + +/** + * Digit shaping option: + * Replace European digits (U+0030...) by Arabic-Indic digits. + * @stable ICU 2.0 + */ +#define U_SHAPE_DIGITS_EN2AN 0x20 + +/** + * Digit shaping option: + * Replace Arabic-Indic digits by European digits (U+0030...). + * @stable ICU 2.0 + */ +#define U_SHAPE_DIGITS_AN2EN 0x40 + +/** + * Digit shaping option: + * Replace European digits (U+0030...) by Arabic-Indic digits if the most recent + * strongly directional character is an Arabic letter + * (<code>u_charDirection()</code> result <code>U_RIGHT_TO_LEFT_ARABIC</code> [AL]).<br> + * The direction of "preceding" depends on the direction indicator option. + * For the first characters, the preceding strongly directional character + * (initial state) is assumed to be not an Arabic letter + * (it is <code>U_LEFT_TO_RIGHT</code> [L] or <code>U_RIGHT_TO_LEFT</code> [R]). + * @stable ICU 2.0 + */ +#define U_SHAPE_DIGITS_ALEN2AN_INIT_LR 0x60 + +/** + * Digit shaping option: + * Replace European digits (U+0030...) by Arabic-Indic digits if the most recent + * strongly directional character is an Arabic letter + * (<code>u_charDirection()</code> result <code>U_RIGHT_TO_LEFT_ARABIC</code> [AL]).<br> + * The direction of "preceding" depends on the direction indicator option. + * For the first characters, the preceding strongly directional character + * (initial state) is assumed to be an Arabic letter. + * @stable ICU 2.0 + */ +#define U_SHAPE_DIGITS_ALEN2AN_INIT_AL 0x80 + +/** Not a valid option value. May be replaced by a new option. @stable ICU 2.0 */ +#define U_SHAPE_DIGITS_RESERVED 0xa0 + +/** Bit mask for digit shaping options. @stable ICU 2.0 */ +#define U_SHAPE_DIGITS_MASK 0xe0 + + +/** Digit type option: Use Arabic-Indic digits (U+0660...U+0669). @stable ICU 2.0 */ +#define U_SHAPE_DIGIT_TYPE_AN 0 + +/** Digit type option: Use Eastern (Extended) Arabic-Indic digits (U+06f0...U+06f9). @stable ICU 2.0 */ +#define U_SHAPE_DIGIT_TYPE_AN_EXTENDED 0x100 + +/** Not a valid option value. May be replaced by a new option. @stable ICU 2.0 */ +#define U_SHAPE_DIGIT_TYPE_RESERVED 0x200 + +/** Bit mask for digit type options. @stable ICU 2.0 */ +#define U_SHAPE_DIGIT_TYPE_MASK 0x300 /* I need to change this from 0x3f00 to 0x300 */ + +/** + * Tashkeel aggregation option: + * Replaces any combination of U+0651 with one of + * U+064C, U+064D, U+064E, U+064F, U+0650 with + * U+FC5E, U+FC5F, U+FC60, U+FC61, U+FC62 consecutively. + * @stable ICU 3.6 + */ +#define U_SHAPE_AGGREGATE_TASHKEEL 0x4000 +/** Tashkeel aggregation option: do not aggregate tashkeels. @stable ICU 3.6 */ +#define U_SHAPE_AGGREGATE_TASHKEEL_NOOP 0 +/** Bit mask for tashkeel aggregation. @stable ICU 3.6 */ +#define U_SHAPE_AGGREGATE_TASHKEEL_MASK 0x4000 + +/** + * Presentation form option: + * Don't replace Arabic Presentation Forms-A and Arabic Presentation Forms-B + * characters with 0+06xx characters, before shaping. + * @stable ICU 3.6 + */ +#define U_SHAPE_PRESERVE_PRESENTATION 0x8000 +/** Presentation form option: + * Replace Arabic Presentation Forms-A and Arabic Presentationo Forms-B with + * their unshaped correspondants in range 0+06xx, before shaping. + * @stable ICU 3.6 + */ +#define U_SHAPE_PRESERVE_PRESENTATION_NOOP 0 +/** Bit mask for preserve presentation form. @stable ICU 3.6 */ +#define U_SHAPE_PRESERVE_PRESENTATION_MASK 0x8000 + +/* Seen Tail option */ +/** + * Memory option: the result must have the same length as the source. + * Shaping mode: The SEEN family character will expand into two characters using space near + * the SEEN family character(i.e. the space after the character). + * If there are no spaces found, an error U_NO_SPACE_AVAILABLE (as defined in utypes.h) + * will be set in pErrorCode + * + * De-shaping mode: Any Seen character followed by Tail character will be + * replaced by one cell Seen and a space will replace the Tail. + * Affects: Seen options + * @stable ICU 4.2 + */ +#define U_SHAPE_SEEN_TWOCELL_NEAR 0x200000 + +/** + * Bit mask for Seen memory options. + * @stable ICU 4.2 + */ +#define U_SHAPE_SEEN_MASK 0x700000 + +/* YehHamza option */ +/** + * Memory option: the result must have the same length as the source. + * Shaping mode: The YEHHAMZA character will expand into two characters using space near it + * (i.e. the space after the character + * If there are no spaces found, an error U_NO_SPACE_AVAILABLE (as defined in utypes.h) + * will be set in pErrorCode + * + * De-shaping mode: Any Yeh (final or isolated) character followed by Hamza character will be + * replaced by one cell YehHamza and space will replace the Hamza. + * Affects: YehHamza options + * @stable ICU 4.2 + */ +#define U_SHAPE_YEHHAMZA_TWOCELL_NEAR 0x1000000 + + +/** + * Bit mask for YehHamza memory options. + * @stable ICU 4.2 + */ +#define U_SHAPE_YEHHAMZA_MASK 0x3800000 + +/* New Tashkeel options */ +/** + * Memory option: the result must have the same length as the source. + * Shaping mode: Tashkeel characters will be replaced by spaces. + * Spaces will be placed at beginning of the buffer + * + * De-shaping mode: N/A + * Affects: Tashkeel options + * @stable ICU 4.2 + */ +#define U_SHAPE_TASHKEEL_BEGIN 0x40000 + +/** + * Memory option: the result must have the same length as the source. + * Shaping mode: Tashkeel characters will be replaced by spaces. + * Spaces will be placed at end of the buffer + * + * De-shaping mode: N/A + * Affects: Tashkeel options + * @stable ICU 4.2 + */ +#define U_SHAPE_TASHKEEL_END 0x60000 + +/** + * Memory option: allow the result to have a different length than the source. + * Shaping mode: Tashkeel characters will be removed, buffer length will shrink. + * De-shaping mode: N/A + * + * Affect: Tashkeel options + * @stable ICU 4.2 + */ +#define U_SHAPE_TASHKEEL_RESIZE 0x80000 + +/** + * Memory option: the result must have the same length as the source. + * Shaping mode: Tashkeel characters will be replaced by Tatweel if it is connected to adjacent + * characters (i.e. shaped on Tatweel) or replaced by space if it is not connected. + * + * De-shaping mode: N/A + * Affects: YehHamza options + * @stable ICU 4.2 + */ +#define U_SHAPE_TASHKEEL_REPLACE_BY_TATWEEL 0xC0000 + +/** + * Bit mask for Tashkeel replacement with Space or Tatweel memory options. + * @stable ICU 4.2 + */ +#define U_SHAPE_TASHKEEL_MASK 0xE0000 + + +/* Space location Control options */ +/** + * This option affect the meaning of BEGIN and END options. if this option is not used the default + * for BEGIN and END will be as following: + * The Default (for both Visual LTR, Visual RTL and Logical Text) + * 1. BEGIN always refers to the start address of physical memory. + * 2. END always refers to the end address of physical memory. + * + * If this option is used it will swap the meaning of BEGIN and END only for Visual LTR text. + * + * The effect on BEGIN and END Memory Options will be as following: + * A. BEGIN For Visual LTR text: This will be the beginning (right side) of the visual text( + * corresponding to the physical memory address end for Visual LTR text, Same as END in + * default behavior) + * B. BEGIN For Logical text: Same as BEGIN in default behavior. + * C. END For Visual LTR text: This will be the end (left side) of the visual text (corresponding + * to the physical memory address beginning for Visual LTR text, Same as BEGIN in default behavior. + * D. END For Logical text: Same as END in default behavior). + * Affects: All LamAlef BEGIN, END and AUTO options. + * @stable ICU 4.2 + */ +#define U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END 0x4000000 + +/** + * Bit mask for swapping BEGIN and END for Visual LTR text + * @stable ICU 4.2 + */ +#define U_SHAPE_SPACES_RELATIVE_TO_TEXT_MASK 0x4000000 + +/** + * If this option is used, shaping will use the new Unicode code point for TAIL (i.e. 0xFE73). + * If this option is not specified (Default), old unofficial Unicode TAIL code point is used (i.e. 0x200B) + * De-shaping will not use this option as it will always search for both the new Unicode code point for the + * TAIL (i.e. 0xFE73) or the old unofficial Unicode TAIL code point (i.e. 0x200B) and de-shape the + * Seen-Family letter accordingly. + * + * Shaping Mode: Only shaping. + * De-shaping Mode: N/A. + * Affects: All Seen options + * @stable ICU 4.8 + */ +#define U_SHAPE_TAIL_NEW_UNICODE 0x8000000 + +/** + * Bit mask for new Unicode Tail option + * @stable ICU 4.8 + */ +#define U_SHAPE_TAIL_TYPE_MASK 0x8000000 + +#endif diff --git a/vendor/icu/include/unicode/ustring.h b/vendor/icu/include/unicode/ustring.h new file mode 100644 index 0000000000..1e9aacdb3c --- /dev/null +++ b/vendor/icu/include/unicode/ustring.h @@ -0,0 +1,1692 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 1998-2014, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* +* File ustring.h +* +* Modification History: +* +* Date Name Description +* 12/07/98 bertrand Creation. +****************************************************************************** +*/ + +#ifndef USTRING_H +#define USTRING_H + +#include <unicode/utypes.h> +#include <unicode/putil.h> +#include <unicode/uiter.h> + +/** + * \def UBRK_TYPEDEF_UBREAK_ITERATOR + * @internal + */ + +#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR +# define UBRK_TYPEDEF_UBREAK_ITERATOR +/** Simple declaration for u_strToTitle() to avoid including unicode/ubrk.h. @stable ICU 2.1*/ + typedef struct UBreakIterator UBreakIterator; +#endif + +/** + * \file + * \brief C API: Unicode string handling functions + * + * These C API functions provide general Unicode string handling. + * + * Some functions are equivalent in name, signature, and behavior to the ANSI C <string.h> + * functions. (For example, they do not check for bad arguments like NULL string pointers.) + * In some cases, only the thread-safe variant of such a function is implemented here + * (see u_strtok_r()). + * + * Other functions provide more Unicode-specific functionality like locale-specific + * upper/lower-casing and string comparison in code point order. + * + * ICU uses 16-bit Unicode (UTF-16) in the form of arrays of UChar code units. + * UTF-16 encodes each Unicode code point with either one or two UChar code units. + * (This is the default form of Unicode, and a forward-compatible extension of the original, + * fixed-width form that was known as UCS-2. UTF-16 superseded UCS-2 with Unicode 2.0 + * in 1996.) + * + * Some APIs accept a 32-bit UChar32 value for a single code point. + * + * ICU also handles 16-bit Unicode text with unpaired surrogates. + * Such text is not well-formed UTF-16. + * Code-point-related functions treat unpaired surrogates as surrogate code points, + * i.e., as separate units. + * + * Although UTF-16 is a variable-width encoding form (like some legacy multi-byte encodings), + * it is much more efficient even for random access because the code unit values + * for single-unit characters vs. lead units vs. trail units are completely disjoint. + * This means that it is easy to determine character (code point) boundaries from + * random offsets in the string. + * + * Unicode (UTF-16) string processing is optimized for the single-unit case. + * Although it is important to support supplementary characters + * (which use pairs of lead/trail code units called "surrogates"), + * their occurrence is rare. Almost all characters in modern use require only + * a single UChar code unit (i.e., their code point values are <=0xffff). + * + * For more details see the User Guide Strings chapter (http://icu-project.org/userguide/strings.html). + * For a discussion of the handling of unpaired surrogates see also + * Jitterbug 2145 and its icu mailing list proposal on 2002-sep-18. + */ + +/** + * \defgroup ustring_ustrlen String Length + * \ingroup ustring_strlen + */ +/*@{*/ +/** + * Determine the length of an array of UChar. + * + * @param s The array of UChars, NULL (U+0000) terminated. + * @return The number of UChars in <code>chars</code>, minus the terminator. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_strlen(const UChar *s); +/*@}*/ + +/** + * Count Unicode code points in the length UChar code units of the string. + * A code point may occupy either one or two UChar code units. + * Counting code points involves reading all code units. + * + * This functions is basically the inverse of the U16_FWD_N() macro (see utf.h). + * + * @param s The input string. + * @param length The number of UChar code units to be checked, or -1 to count all + * code points before the first NUL (U+0000). + * @return The number of code points in the specified code units. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_countChar32(const UChar *s, int32_t length); + +/** + * Check if the string contains more Unicode code points than a certain number. + * This is more efficient than counting all code points in the entire string + * and comparing that number with a threshold. + * This function may not need to scan the string at all if the length is known + * (not -1 for NUL-termination) and falls within a certain range, and + * never needs to count more than 'number+1' code points. + * Logically equivalent to (u_countChar32(s, length)>number). + * A Unicode code point may occupy either one or two UChar code units. + * + * @param s The input string. + * @param length The length of the string, or -1 if it is NUL-terminated. + * @param number The number of code points in the string is compared against + * the 'number' parameter. + * @return Boolean value for whether the string contains more Unicode code points + * than 'number'. Same as (u_countChar32(s, length)>number). + * @stable ICU 2.4 + */ +U_STABLE UBool U_EXPORT2 +u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number); + +/** + * Concatenate two ustrings. Appends a copy of <code>src</code>, + * including the null terminator, to <code>dst</code>. The initial copied + * character from <code>src</code> overwrites the null terminator in <code>dst</code>. + * + * @param dst The destination string. + * @param src The source string. + * @return A pointer to <code>dst</code>. + * @stable ICU 2.0 + */ +U_STABLE UChar* U_EXPORT2 +u_strcat(UChar *dst, + const UChar *src); + +/** + * Concatenate two ustrings. + * Appends at most <code>n</code> characters from <code>src</code> to <code>dst</code>. + * Adds a terminating NUL. + * If src is too long, then only <code>n-1</code> characters will be copied + * before the terminating NUL. + * If <code>n<=0</code> then dst is not modified. + * + * @param dst The destination string. + * @param src The source string (can be NULL/invalid if n<=0). + * @param n The maximum number of characters to append; no-op if <=0. + * @return A pointer to <code>dst</code>. + * @stable ICU 2.0 + */ +U_STABLE UChar* U_EXPORT2 +u_strncat(UChar *dst, + const UChar *src, + int32_t n); + +/** + * Find the first occurrence of a substring in a string. + * The substring is found at code point boundaries. + * That means that if the substring begins with + * a trail surrogate or ends with a lead surrogate, + * then it is found only if these surrogates stand alone in the text. + * Otherwise, the substring edge units would be matched against + * halves of surrogate pairs. + * + * @param s The string to search (NUL-terminated). + * @param substring The substring to find (NUL-terminated). + * @return A pointer to the first occurrence of <code>substring</code> in <code>s</code>, + * or <code>s</code> itself if the <code>substring</code> is empty, + * or <code>NULL</code> if <code>substring</code> is not in <code>s</code>. + * @stable ICU 2.0 + * + * @see u_strrstr + * @see u_strFindFirst + * @see u_strFindLast + */ +U_STABLE UChar * U_EXPORT2 +u_strstr(const UChar *s, const UChar *substring); + +/** + * Find the first occurrence of a substring in a string. + * The substring is found at code point boundaries. + * That means that if the substring begins with + * a trail surrogate or ends with a lead surrogate, + * then it is found only if these surrogates stand alone in the text. + * Otherwise, the substring edge units would be matched against + * halves of surrogate pairs. + * + * @param s The string to search. + * @param length The length of s (number of UChars), or -1 if it is NUL-terminated. + * @param substring The substring to find (NUL-terminated). + * @param subLength The length of substring (number of UChars), or -1 if it is NUL-terminated. + * @return A pointer to the first occurrence of <code>substring</code> in <code>s</code>, + * or <code>s</code> itself if the <code>substring</code> is empty, + * or <code>NULL</code> if <code>substring</code> is not in <code>s</code>. + * @stable ICU 2.4 + * + * @see u_strstr + * @see u_strFindLast + */ +U_STABLE UChar * U_EXPORT2 +u_strFindFirst(const UChar *s, int32_t length, const UChar *substring, int32_t subLength); + +/** + * Find the first occurrence of a BMP code point in a string. + * A surrogate code point is found only if its match in the text is not + * part of a surrogate pair. + * A NUL character is found at the string terminator. + * + * @param s The string to search (NUL-terminated). + * @param c The BMP code point to find. + * @return A pointer to the first occurrence of <code>c</code> in <code>s</code> + * or <code>NULL</code> if <code>c</code> is not in <code>s</code>. + * @stable ICU 2.0 + * + * @see u_strchr32 + * @see u_memchr + * @see u_strstr + * @see u_strFindFirst + */ +U_STABLE UChar * U_EXPORT2 +u_strchr(const UChar *s, UChar c); + +/** + * Find the first occurrence of a code point in a string. + * A surrogate code point is found only if its match in the text is not + * part of a surrogate pair. + * A NUL character is found at the string terminator. + * + * @param s The string to search (NUL-terminated). + * @param c The code point to find. + * @return A pointer to the first occurrence of <code>c</code> in <code>s</code> + * or <code>NULL</code> if <code>c</code> is not in <code>s</code>. + * @stable ICU 2.0 + * + * @see u_strchr + * @see u_memchr32 + * @see u_strstr + * @see u_strFindFirst + */ +U_STABLE UChar * U_EXPORT2 +u_strchr32(const UChar *s, UChar32 c); + +/** + * Find the last occurrence of a substring in a string. + * The substring is found at code point boundaries. + * That means that if the substring begins with + * a trail surrogate or ends with a lead surrogate, + * then it is found only if these surrogates stand alone in the text. + * Otherwise, the substring edge units would be matched against + * halves of surrogate pairs. + * + * @param s The string to search (NUL-terminated). + * @param substring The substring to find (NUL-terminated). + * @return A pointer to the last occurrence of <code>substring</code> in <code>s</code>, + * or <code>s</code> itself if the <code>substring</code> is empty, + * or <code>NULL</code> if <code>substring</code> is not in <code>s</code>. + * @stable ICU 2.4 + * + * @see u_strstr + * @see u_strFindFirst + * @see u_strFindLast + */ +U_STABLE UChar * U_EXPORT2 +u_strrstr(const UChar *s, const UChar *substring); + +/** + * Find the last occurrence of a substring in a string. + * The substring is found at code point boundaries. + * That means that if the substring begins with + * a trail surrogate or ends with a lead surrogate, + * then it is found only if these surrogates stand alone in the text. + * Otherwise, the substring edge units would be matched against + * halves of surrogate pairs. + * + * @param s The string to search. + * @param length The length of s (number of UChars), or -1 if it is NUL-terminated. + * @param substring The substring to find (NUL-terminated). + * @param subLength The length of substring (number of UChars), or -1 if it is NUL-terminated. + * @return A pointer to the last occurrence of <code>substring</code> in <code>s</code>, + * or <code>s</code> itself if the <code>substring</code> is empty, + * or <code>NULL</code> if <code>substring</code> is not in <code>s</code>. + * @stable ICU 2.4 + * + * @see u_strstr + * @see u_strFindLast + */ +U_STABLE UChar * U_EXPORT2 +u_strFindLast(const UChar *s, int32_t length, const UChar *substring, int32_t subLength); + +/** + * Find the last occurrence of a BMP code point in a string. + * A surrogate code point is found only if its match in the text is not + * part of a surrogate pair. + * A NUL character is found at the string terminator. + * + * @param s The string to search (NUL-terminated). + * @param c The BMP code point to find. + * @return A pointer to the last occurrence of <code>c</code> in <code>s</code> + * or <code>NULL</code> if <code>c</code> is not in <code>s</code>. + * @stable ICU 2.4 + * + * @see u_strrchr32 + * @see u_memrchr + * @see u_strrstr + * @see u_strFindLast + */ +U_STABLE UChar * U_EXPORT2 +u_strrchr(const UChar *s, UChar c); + +/** + * Find the last occurrence of a code point in a string. + * A surrogate code point is found only if its match in the text is not + * part of a surrogate pair. + * A NUL character is found at the string terminator. + * + * @param s The string to search (NUL-terminated). + * @param c The code point to find. + * @return A pointer to the last occurrence of <code>c</code> in <code>s</code> + * or <code>NULL</code> if <code>c</code> is not in <code>s</code>. + * @stable ICU 2.4 + * + * @see u_strrchr + * @see u_memchr32 + * @see u_strrstr + * @see u_strFindLast + */ +U_STABLE UChar * U_EXPORT2 +u_strrchr32(const UChar *s, UChar32 c); + +/** + * Locates the first occurrence in the string <code>string</code> of any of the characters + * in the string <code>matchSet</code>. + * Works just like C's strpbrk but with Unicode. + * + * @param string The string in which to search, NUL-terminated. + * @param matchSet A NUL-terminated string defining a set of code points + * for which to search in the text string. + * @return A pointer to the character in <code>string</code> that matches one of the + * characters in <code>matchSet</code>, or NULL if no such character is found. + * @stable ICU 2.0 + */ +U_STABLE UChar * U_EXPORT2 +u_strpbrk(const UChar *string, const UChar *matchSet); + +/** + * Returns the number of consecutive characters in <code>string</code>, + * beginning with the first, that do not occur somewhere in <code>matchSet</code>. + * Works just like C's strcspn but with Unicode. + * + * @param string The string in which to search, NUL-terminated. + * @param matchSet A NUL-terminated string defining a set of code points + * for which to search in the text string. + * @return The number of initial characters in <code>string</code> that do not + * occur in <code>matchSet</code>. + * @see u_strspn + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_strcspn(const UChar *string, const UChar *matchSet); + +/** + * Returns the number of consecutive characters in <code>string</code>, + * beginning with the first, that occur somewhere in <code>matchSet</code>. + * Works just like C's strspn but with Unicode. + * + * @param string The string in which to search, NUL-terminated. + * @param matchSet A NUL-terminated string defining a set of code points + * for which to search in the text string. + * @return The number of initial characters in <code>string</code> that do + * occur in <code>matchSet</code>. + * @see u_strcspn + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_strspn(const UChar *string, const UChar *matchSet); + +/** + * The string tokenizer API allows an application to break a string into + * tokens. Unlike strtok(), the saveState (the current pointer within the + * original string) is maintained in saveState. In the first call, the + * argument src is a pointer to the string. In subsequent calls to + * return successive tokens of that string, src must be specified as + * NULL. The value saveState is set by this function to maintain the + * function's position within the string, and on each subsequent call + * you must give this argument the same variable. This function does + * handle surrogate pairs. This function is similar to the strtok_r() + * the POSIX Threads Extension (1003.1c-1995) version. + * + * @param src String containing token(s). This string will be modified. + * After the first call to u_strtok_r(), this argument must + * be NULL to get to the next token. + * @param delim Set of delimiter characters (Unicode code points). + * @param saveState The current pointer within the original string, + * which is set by this function. The saveState + * parameter should the address of a local variable of type + * UChar *. (i.e. defined "UChar *myLocalSaveState" and use + * &myLocalSaveState for this parameter). + * @return A pointer to the next token found in src, or NULL + * when there are no more tokens. + * @stable ICU 2.0 + */ +U_STABLE UChar * U_EXPORT2 +u_strtok_r(UChar *src, + const UChar *delim, + UChar **saveState); + +/** + * Compare two Unicode strings for bitwise equality (code unit order). + * + * @param s1 A string to compare. + * @param s2 A string to compare. + * @return 0 if <code>s1</code> and <code>s2</code> are bitwise equal; a negative + * value if <code>s1</code> is bitwise less than <code>s2,</code>; a positive + * value if <code>s1</code> is bitwise greater than <code>s2</code>. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_strcmp(const UChar *s1, + const UChar *s2); + +/** + * Compare two Unicode strings in code point order. + * See u_strCompare for details. + * + * @param s1 A string to compare. + * @param s2 A string to compare. + * @return a negative/zero/positive integer corresponding to whether + * the first string is less than/equal to/greater than the second one + * in code point order + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_strcmpCodePointOrder(const UChar *s1, const UChar *s2); + +/** + * Compare two Unicode strings (binary order). + * + * The comparison can be done in code unit order or in code point order. + * They differ only in UTF-16 when + * comparing supplementary code points (U+10000..U+10ffff) + * to BMP code points near the end of the BMP (i.e., U+e000..U+ffff). + * In code unit order, high BMP code points sort after supplementary code points + * because they are stored as pairs of surrogates which are at U+d800..U+dfff. + * + * This functions works with strings of different explicitly specified lengths + * unlike the ANSI C-like u_strcmp() and u_memcmp() etc. + * NUL-terminated strings are possible with length arguments of -1. + * + * @param s1 First source string. + * @param length1 Length of first source string, or -1 if NUL-terminated. + * + * @param s2 Second source string. + * @param length2 Length of second source string, or -1 if NUL-terminated. + * + * @param codePointOrder Choose between code unit order (FALSE) + * and code point order (TRUE). + * + * @return <0 or 0 or >0 as usual for string comparisons + * + * @stable ICU 2.2 + */ +U_STABLE int32_t U_EXPORT2 +u_strCompare(const UChar *s1, int32_t length1, + const UChar *s2, int32_t length2, + UBool codePointOrder); + +/** + * Compare two Unicode strings (binary order) + * as presented by UCharIterator objects. + * Works otherwise just like u_strCompare(). + * + * Both iterators are reset to their start positions. + * When the function returns, it is undefined where the iterators + * have stopped. + * + * @param iter1 First source string iterator. + * @param iter2 Second source string iterator. + * @param codePointOrder Choose between code unit order (FALSE) + * and code point order (TRUE). + * + * @return <0 or 0 or >0 as usual for string comparisons + * + * @see u_strCompare + * + * @stable ICU 2.6 + */ +U_STABLE int32_t U_EXPORT2 +u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder); + +/** + * Compare two strings case-insensitively using full case folding. + * This is equivalent to + * u_strCompare(u_strFoldCase(s1, options), + * u_strFoldCase(s2, options), + * (options&U_COMPARE_CODE_POINT_ORDER)!=0). + * + * The comparison can be done in UTF-16 code unit order or in code point order. + * They differ only when comparing supplementary code points (U+10000..U+10ffff) + * to BMP code points near the end of the BMP (i.e., U+e000..U+ffff). + * In code unit order, high BMP code points sort after supplementary code points + * because they are stored as pairs of surrogates which are at U+d800..U+dfff. + * + * This functions works with strings of different explicitly specified lengths + * unlike the ANSI C-like u_strcmp() and u_memcmp() etc. + * NUL-terminated strings are possible with length arguments of -1. + * + * @param s1 First source string. + * @param length1 Length of first source string, or -1 if NUL-terminated. + * + * @param s2 Second source string. + * @param length2 Length of second source string, or -1 if NUL-terminated. + * + * @param options A bit set of options: + * - U_FOLD_CASE_DEFAULT or 0 is used for default options: + * Comparison in code unit order with default case folding. + * + * - U_COMPARE_CODE_POINT_ORDER + * Set to choose code point order instead of code unit order + * (see u_strCompare for details). + * + * - U_FOLD_CASE_EXCLUDE_SPECIAL_I + * + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * + * @return <0 or 0 or >0 as usual for string comparisons + * + * @stable ICU 2.2 + */ +U_STABLE int32_t U_EXPORT2 +u_strCaseCompare(const UChar *s1, int32_t length1, + const UChar *s2, int32_t length2, + uint32_t options, + UErrorCode *pErrorCode); + +/** + * Compare two ustrings for bitwise equality. + * Compares at most <code>n</code> characters. + * + * @param ucs1 A string to compare (can be NULL/invalid if n<=0). + * @param ucs2 A string to compare (can be NULL/invalid if n<=0). + * @param n The maximum number of characters to compare; always returns 0 if n<=0. + * @return 0 if <code>s1</code> and <code>s2</code> are bitwise equal; a negative + * value if <code>s1</code> is bitwise less than <code>s2</code>; a positive + * value if <code>s1</code> is bitwise greater than <code>s2</code>. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_strncmp(const UChar *ucs1, + const UChar *ucs2, + int32_t n); + +/** + * Compare two Unicode strings in code point order. + * This is different in UTF-16 from u_strncmp() if supplementary characters are present. + * For details, see u_strCompare(). + * + * @param s1 A string to compare. + * @param s2 A string to compare. + * @param n The maximum number of characters to compare. + * @return a negative/zero/positive integer corresponding to whether + * the first string is less than/equal to/greater than the second one + * in code point order + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_strncmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t n); + +/** + * Compare two strings case-insensitively using full case folding. + * This is equivalent to u_strcmp(u_strFoldCase(s1, options), u_strFoldCase(s2, options)). + * + * @param s1 A string to compare. + * @param s2 A string to compare. + * @param options A bit set of options: + * - U_FOLD_CASE_DEFAULT or 0 is used for default options: + * Comparison in code unit order with default case folding. + * + * - U_COMPARE_CODE_POINT_ORDER + * Set to choose code point order instead of code unit order + * (see u_strCompare for details). + * + * - U_FOLD_CASE_EXCLUDE_SPECIAL_I + * + * @return A negative, zero, or positive integer indicating the comparison result. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options); + +/** + * Compare two strings case-insensitively using full case folding. + * This is equivalent to u_strcmp(u_strFoldCase(s1, at most n, options), + * u_strFoldCase(s2, at most n, options)). + * + * @param s1 A string to compare. + * @param s2 A string to compare. + * @param n The maximum number of characters each string to case-fold and then compare. + * @param options A bit set of options: + * - U_FOLD_CASE_DEFAULT or 0 is used for default options: + * Comparison in code unit order with default case folding. + * + * - U_COMPARE_CODE_POINT_ORDER + * Set to choose code point order instead of code unit order + * (see u_strCompare for details). + * + * - U_FOLD_CASE_EXCLUDE_SPECIAL_I + * + * @return A negative, zero, or positive integer indicating the comparison result. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options); + +/** + * Compare two strings case-insensitively using full case folding. + * This is equivalent to u_strcmp(u_strFoldCase(s1, n, options), + * u_strFoldCase(s2, n, options)). + * + * @param s1 A string to compare. + * @param s2 A string to compare. + * @param length The number of characters in each string to case-fold and then compare. + * @param options A bit set of options: + * - U_FOLD_CASE_DEFAULT or 0 is used for default options: + * Comparison in code unit order with default case folding. + * + * - U_COMPARE_CODE_POINT_ORDER + * Set to choose code point order instead of code unit order + * (see u_strCompare for details). + * + * - U_FOLD_CASE_EXCLUDE_SPECIAL_I + * + * @return A negative, zero, or positive integer indicating the comparison result. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options); + +/** + * Copy a ustring. Adds a null terminator. + * + * @param dst The destination string. + * @param src The source string. + * @return A pointer to <code>dst</code>. + * @stable ICU 2.0 + */ +U_STABLE UChar* U_EXPORT2 +u_strcpy(UChar *dst, + const UChar *src); + +/** + * Copy a ustring. + * Copies at most <code>n</code> characters. The result will be null terminated + * if the length of <code>src</code> is less than <code>n</code>. + * + * @param dst The destination string. + * @param src The source string (can be NULL/invalid if n<=0). + * @param n The maximum number of characters to copy; no-op if <=0. + * @return A pointer to <code>dst</code>. + * @stable ICU 2.0 + */ +U_STABLE UChar* U_EXPORT2 +u_strncpy(UChar *dst, + const UChar *src, + int32_t n); + +#if !UCONFIG_NO_CONVERSION + +/** + * Copy a byte string encoded in the default codepage to a ustring. + * Adds a null terminator. + * Performs a host byte to UChar conversion + * + * @param dst The destination string. + * @param src The source string. + * @return A pointer to <code>dst</code>. + * @stable ICU 2.0 + */ +U_STABLE UChar* U_EXPORT2 u_uastrcpy(UChar *dst, + const char *src ); + +/** + * Copy a byte string encoded in the default codepage to a ustring. + * Copies at most <code>n</code> characters. The result will be null terminated + * if the length of <code>src</code> is less than <code>n</code>. + * Performs a host byte to UChar conversion + * + * @param dst The destination string. + * @param src The source string. + * @param n The maximum number of characters to copy. + * @return A pointer to <code>dst</code>. + * @stable ICU 2.0 + */ +U_STABLE UChar* U_EXPORT2 u_uastrncpy(UChar *dst, + const char *src, + int32_t n); + +/** + * Copy ustring to a byte string encoded in the default codepage. + * Adds a null terminator. + * Performs a UChar to host byte conversion + * + * @param dst The destination string. + * @param src The source string. + * @return A pointer to <code>dst</code>. + * @stable ICU 2.0 + */ +U_STABLE char* U_EXPORT2 u_austrcpy(char *dst, + const UChar *src ); + +/** + * Copy ustring to a byte string encoded in the default codepage. + * Copies at most <code>n</code> characters. The result will be null terminated + * if the length of <code>src</code> is less than <code>n</code>. + * Performs a UChar to host byte conversion + * + * @param dst The destination string. + * @param src The source string. + * @param n The maximum number of characters to copy. + * @return A pointer to <code>dst</code>. + * @stable ICU 2.0 + */ +U_STABLE char* U_EXPORT2 u_austrncpy(char *dst, + const UChar *src, + int32_t n ); + +#endif + +/** + * Synonym for memcpy(), but with UChars only. + * @param dest The destination string + * @param src The source string (can be NULL/invalid if count<=0) + * @param count The number of characters to copy; no-op if <=0 + * @return A pointer to <code>dest</code> + * @stable ICU 2.0 + */ +U_STABLE UChar* U_EXPORT2 +u_memcpy(UChar *dest, const UChar *src, int32_t count); + +/** + * Synonym for memmove(), but with UChars only. + * @param dest The destination string + * @param src The source string (can be NULL/invalid if count<=0) + * @param count The number of characters to move; no-op if <=0 + * @return A pointer to <code>dest</code> + * @stable ICU 2.0 + */ +U_STABLE UChar* U_EXPORT2 +u_memmove(UChar *dest, const UChar *src, int32_t count); + +/** + * Initialize <code>count</code> characters of <code>dest</code> to <code>c</code>. + * + * @param dest The destination string. + * @param c The character to initialize the string. + * @param count The maximum number of characters to set. + * @return A pointer to <code>dest</code>. + * @stable ICU 2.0 + */ +U_STABLE UChar* U_EXPORT2 +u_memset(UChar *dest, UChar c, int32_t count); + +/** + * Compare the first <code>count</code> UChars of each buffer. + * + * @param buf1 The first string to compare. + * @param buf2 The second string to compare. + * @param count The maximum number of UChars to compare. + * @return When buf1 < buf2, a negative number is returned. + * When buf1 == buf2, 0 is returned. + * When buf1 > buf2, a positive number is returned. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_memcmp(const UChar *buf1, const UChar *buf2, int32_t count); + +/** + * Compare two Unicode strings in code point order. + * This is different in UTF-16 from u_memcmp() if supplementary characters are present. + * For details, see u_strCompare(). + * + * @param s1 A string to compare. + * @param s2 A string to compare. + * @param count The maximum number of characters to compare. + * @return a negative/zero/positive integer corresponding to whether + * the first string is less than/equal to/greater than the second one + * in code point order + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_memcmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t count); + +/** + * Find the first occurrence of a BMP code point in a string. + * A surrogate code point is found only if its match in the text is not + * part of a surrogate pair. + * A NUL character is found at the string terminator. + * + * @param s The string to search (contains <code>count</code> UChars). + * @param c The BMP code point to find. + * @param count The length of the string. + * @return A pointer to the first occurrence of <code>c</code> in <code>s</code> + * or <code>NULL</code> if <code>c</code> is not in <code>s</code>. + * @stable ICU 2.0 + * + * @see u_strchr + * @see u_memchr32 + * @see u_strFindFirst + */ +U_STABLE UChar* U_EXPORT2 +u_memchr(const UChar *s, UChar c, int32_t count); + +/** + * Find the first occurrence of a code point in a string. + * A surrogate code point is found only if its match in the text is not + * part of a surrogate pair. + * A NUL character is found at the string terminator. + * + * @param s The string to search (contains <code>count</code> UChars). + * @param c The code point to find. + * @param count The length of the string. + * @return A pointer to the first occurrence of <code>c</code> in <code>s</code> + * or <code>NULL</code> if <code>c</code> is not in <code>s</code>. + * @stable ICU 2.0 + * + * @see u_strchr32 + * @see u_memchr + * @see u_strFindFirst + */ +U_STABLE UChar* U_EXPORT2 +u_memchr32(const UChar *s, UChar32 c, int32_t count); + +/** + * Find the last occurrence of a BMP code point in a string. + * A surrogate code point is found only if its match in the text is not + * part of a surrogate pair. + * A NUL character is found at the string terminator. + * + * @param s The string to search (contains <code>count</code> UChars). + * @param c The BMP code point to find. + * @param count The length of the string. + * @return A pointer to the last occurrence of <code>c</code> in <code>s</code> + * or <code>NULL</code> if <code>c</code> is not in <code>s</code>. + * @stable ICU 2.4 + * + * @see u_strrchr + * @see u_memrchr32 + * @see u_strFindLast + */ +U_STABLE UChar* U_EXPORT2 +u_memrchr(const UChar *s, UChar c, int32_t count); + +/** + * Find the last occurrence of a code point in a string. + * A surrogate code point is found only if its match in the text is not + * part of a surrogate pair. + * A NUL character is found at the string terminator. + * + * @param s The string to search (contains <code>count</code> UChars). + * @param c The code point to find. + * @param count The length of the string. + * @return A pointer to the last occurrence of <code>c</code> in <code>s</code> + * or <code>NULL</code> if <code>c</code> is not in <code>s</code>. + * @stable ICU 2.4 + * + * @see u_strrchr32 + * @see u_memrchr + * @see u_strFindLast + */ +U_STABLE UChar* U_EXPORT2 +u_memrchr32(const UChar *s, UChar32 c, int32_t count); + +/** + * Unicode String literals in C. + * We need one macro to declare a variable for the string + * and to statically preinitialize it if possible, + * and a second macro to dynamically initialize such a string variable if necessary. + * + * The macros are defined for maximum performance. + * They work only for strings that contain "invariant characters", i.e., + * only latin letters, digits, and some punctuation. + * See utypes.h for details. + * + * A pair of macros for a single string must be used with the same + * parameters. + * The string parameter must be a C string literal. + * The length of the string, not including the terminating + * <code>NUL</code>, must be specified as a constant. + * The U_STRING_DECL macro should be invoked exactly once for one + * such string variable before it is used. + * + * Usage: + * <pre> + * U_STRING_DECL(ustringVar1, "Quick-Fox 2", 11); + * U_STRING_DECL(ustringVar2, "jumps 5%", 8); + * static UBool didInit=FALSE; + * + * int32_t function() { + * if(!didInit) { + * U_STRING_INIT(ustringVar1, "Quick-Fox 2", 11); + * U_STRING_INIT(ustringVar2, "jumps 5%", 8); + * didInit=TRUE; + * } + * return u_strcmp(ustringVar1, ustringVar2); + * } + * </pre> + * + * Note that the macros will NOT consistently work if their argument is another <code>#define</code>. + * The following will not work on all platforms, don't use it. + * + * <pre> + * #define GLUCK "Mr. Gluck" + * U_STRING_DECL(var, GLUCK, 9) + * U_STRING_INIT(var, GLUCK, 9) + * </pre> + * + * Instead, use the string literal "Mr. Gluck" as the argument to both macro + * calls. + * + * + * @stable ICU 2.0 + */ +#if defined(U_DECLARE_UTF16) +# define U_STRING_DECL(var, cs, length) static const UChar *var=(const UChar *)U_DECLARE_UTF16(cs) + /**@stable ICU 2.0 */ +# define U_STRING_INIT(var, cs, length) +#elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16))) +# define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]=L ## cs + /**@stable ICU 2.0 */ +# define U_STRING_INIT(var, cs, length) +#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY +# define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]=cs + /**@stable ICU 2.0 */ +# define U_STRING_INIT(var, cs, length) +#else +# define U_STRING_DECL(var, cs, length) static UChar var[(length)+1] + /**@stable ICU 2.0 */ +# define U_STRING_INIT(var, cs, length) u_charsToUChars(cs, var, length+1) +#endif + +/** + * Unescape a string of characters and write the resulting + * Unicode characters to the destination buffer. The following escape + * sequences are recognized: + * + * \\uhhhh 4 hex digits; h in [0-9A-Fa-f] + * \\Uhhhhhhhh 8 hex digits + * \\xhh 1-2 hex digits + * \\x{h...} 1-8 hex digits + * \\ooo 1-3 octal digits; o in [0-7] + * \\cX control-X; X is masked with 0x1F + * + * as well as the standard ANSI C escapes: + * + * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A, + * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B, + * \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C + * + * Anything else following a backslash is generically escaped. For + * example, "[a\\-z]" returns "[a-z]". + * + * If an escape sequence is ill-formed, this method returns an empty + * string. An example of an ill-formed sequence is "\\u" followed by + * fewer than 4 hex digits. + * + * The above characters are recognized in the compiler's codepage, + * that is, they are coded as 'u', '\\', etc. Characters that are + * not parts of escape sequences are converted using u_charsToUChars(). + * + * This function is similar to UnicodeString::unescape() but not + * identical to it. The latter takes a source UnicodeString, so it + * does escape recognition but no conversion. + * + * @param src a zero-terminated string of invariant characters + * @param dest pointer to buffer to receive converted and unescaped + * text and, if there is room, a zero terminator. May be NULL for + * preflighting, in which case no UChars will be written, but the + * return value will still be valid. On error, an empty string is + * stored here (if possible). + * @param destCapacity the number of UChars that may be written at + * dest. Ignored if dest == NULL. + * @return the length of unescaped string. + * @see u_unescapeAt + * @see UnicodeString#unescape() + * @see UnicodeString#unescapeAt() + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_unescape(const char *src, + UChar *dest, int32_t destCapacity); + +U_CDECL_BEGIN +/** + * Callback function for u_unescapeAt() that returns a character of + * the source text given an offset and a context pointer. The context + * pointer will be whatever is passed into u_unescapeAt(). + * + * @param offset pointer to the offset that will be passed to u_unescapeAt(). + * @param context an opaque pointer passed directly into u_unescapeAt() + * @return the character represented by the escape sequence at + * offset + * @see u_unescapeAt + * @stable ICU 2.0 + */ +typedef UChar (U_CALLCONV *UNESCAPE_CHAR_AT)(int32_t offset, void *context); +U_CDECL_END + +/** + * Unescape a single sequence. The character at offset-1 is assumed + * (without checking) to be a backslash. This method takes a callback + * pointer to a function that returns the UChar at a given offset. By + * varying this callback, ICU functions are able to unescape char* + * strings, UnicodeString objects, and UFILE pointers. + * + * If offset is out of range, or if the escape sequence is ill-formed, + * (UChar32)0xFFFFFFFF is returned. See documentation of u_unescape() + * for a list of recognized sequences. + * + * @param charAt callback function that returns a UChar of the source + * text given an offset and a context pointer. + * @param offset pointer to the offset that will be passed to charAt. + * The offset value will be updated upon return to point after the + * last parsed character of the escape sequence. On error the offset + * is unchanged. + * @param length the number of characters in the source text. The + * last character of the source text is considered to be at offset + * length-1. + * @param context an opaque pointer passed directly into charAt. + * @return the character represented by the escape sequence at + * offset, or (UChar32)0xFFFFFFFF on error. + * @see u_unescape() + * @see UnicodeString#unescape() + * @see UnicodeString#unescapeAt() + * @stable ICU 2.0 + */ +U_STABLE UChar32 U_EXPORT2 +u_unescapeAt(UNESCAPE_CHAR_AT charAt, + int32_t *offset, + int32_t length, + void *context); + +/** + * Uppercase the characters in a string. + * Casing is locale-dependent and context-sensitive. + * The result may be longer or shorter than the original. + * The source string and the destination buffer are allowed to overlap. + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of UChars). If it is 0, then + * dest may be NULL and the function will only return the length of the result + * without writing any of the result string. + * @param src The original string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param locale The locale to consider, or "" for the root locale or NULL for the default locale. + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * @return The length of the result string. It may be greater than destCapacity. In that case, + * only some of the result was written to the destination buffer. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_strToUpper(UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + const char *locale, + UErrorCode *pErrorCode); + +/** + * Lowercase the characters in a string. + * Casing is locale-dependent and context-sensitive. + * The result may be longer or shorter than the original. + * The source string and the destination buffer are allowed to overlap. + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of UChars). If it is 0, then + * dest may be NULL and the function will only return the length of the result + * without writing any of the result string. + * @param src The original string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param locale The locale to consider, or "" for the root locale or NULL for the default locale. + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * @return The length of the result string. It may be greater than destCapacity. In that case, + * only some of the result was written to the destination buffer. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_strToLower(UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + const char *locale, + UErrorCode *pErrorCode); + +#if !UCONFIG_NO_BREAK_ITERATION + +/** + * Titlecase a string. + * Casing is locale-dependent and context-sensitive. + * Titlecasing uses a break iterator to find the first characters of words + * that are to be titlecased. It titlecases those characters and lowercases + * all others. + * + * The titlecase break iterator can be provided to customize for arbitrary + * styles, using rules and dictionaries beyond the standard iterators. + * It may be more efficient to always provide an iterator to avoid + * opening and closing one for each string. + * The standard titlecase iterator for the root locale implements the + * algorithm of Unicode TR 21. + * + * This function uses only the setText(), first() and next() methods of the + * provided break iterator. + * + * The result may be longer or shorter than the original. + * The source string and the destination buffer are allowed to overlap. + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of UChars). If it is 0, then + * dest may be NULL and the function will only return the length of the result + * without writing any of the result string. + * @param src The original string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param titleIter A break iterator to find the first characters of words + * that are to be titlecased. + * If none is provided (NULL), then a standard titlecase + * break iterator is opened. + * @param locale The locale to consider, or "" for the root locale or NULL for the default locale. + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * @return The length of the result string. It may be greater than destCapacity. In that case, + * only some of the result was written to the destination buffer. + * @stable ICU 2.1 + */ +U_STABLE int32_t U_EXPORT2 +u_strToTitle(UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + UBreakIterator *titleIter, + const char *locale, + UErrorCode *pErrorCode); + +#endif + +/** + * Case-folds the characters in a string. + * + * Case-folding is locale-independent and not context-sensitive, + * but there is an option for whether to include or exclude mappings for dotted I + * and dotless i that are marked with 'T' in CaseFolding.txt. + * + * The result may be longer or shorter than the original. + * The source string and the destination buffer are allowed to overlap. + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of UChars). If it is 0, then + * dest may be NULL and the function will only return the length of the result + * without writing any of the result string. + * @param src The original string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * @return The length of the result string. It may be greater than destCapacity. In that case, + * only some of the result was written to the destination buffer. + * @stable ICU 2.0 + */ +U_STABLE int32_t U_EXPORT2 +u_strFoldCase(UChar *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + uint32_t options, + UErrorCode *pErrorCode); + +#if defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION +/** + * Convert a UTF-16 string to a wchar_t string. + * If it is known at compile time that wchar_t strings are in UTF-16 or UTF-32, then + * this function simply calls the fast, dedicated function for that. + * Otherwise, two conversions UTF-16 -> default charset -> wchar_t* are performed. + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of wchar_t's). If it is 0, then + * dest may be NULL and the function will only return the length of the + * result without writing any of the result string (pre-flighting). + * @param pDestLength A pointer to receive the number of units written to the destination. If + * pDestLength!=NULL then *pDestLength is always set to the + * number of output units corresponding to the transformation of + * all the input units, even in case of a buffer overflow. + * @param src The original source string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * @return The pointer to destination buffer. + * @stable ICU 2.0 + */ +U_STABLE wchar_t* U_EXPORT2 +u_strToWCS(wchar_t *dest, + int32_t destCapacity, + int32_t *pDestLength, + const UChar *src, + int32_t srcLength, + UErrorCode *pErrorCode); +/** + * Convert a wchar_t string to UTF-16. + * If it is known at compile time that wchar_t strings are in UTF-16 or UTF-32, then + * this function simply calls the fast, dedicated function for that. + * Otherwise, two conversions wchar_t* -> default charset -> UTF-16 are performed. + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of UChars). If it is 0, then + * dest may be NULL and the function will only return the length of the + * result without writing any of the result string (pre-flighting). + * @param pDestLength A pointer to receive the number of units written to the destination. If + * pDestLength!=NULL then *pDestLength is always set to the + * number of output units corresponding to the transformation of + * all the input units, even in case of a buffer overflow. + * @param src The original source string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * @return The pointer to destination buffer. + * @stable ICU 2.0 + */ +U_STABLE UChar* U_EXPORT2 +u_strFromWCS(UChar *dest, + int32_t destCapacity, + int32_t *pDestLength, + const wchar_t *src, + int32_t srcLength, + UErrorCode *pErrorCode); +#endif /* defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION */ + +/** + * Convert a UTF-16 string to UTF-8. + * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set. + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of chars). If it is 0, then + * dest may be NULL and the function will only return the length of the + * result without writing any of the result string (pre-flighting). + * @param pDestLength A pointer to receive the number of units written to the destination. If + * pDestLength!=NULL then *pDestLength is always set to the + * number of output units corresponding to the transformation of + * all the input units, even in case of a buffer overflow. + * @param src The original source string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * @return The pointer to destination buffer. + * @stable ICU 2.0 + * @see u_strToUTF8WithSub + * @see u_strFromUTF8 + */ +U_STABLE char* U_EXPORT2 +u_strToUTF8(char *dest, + int32_t destCapacity, + int32_t *pDestLength, + const UChar *src, + int32_t srcLength, + UErrorCode *pErrorCode); + +/** + * Convert a UTF-8 string to UTF-16. + * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set. + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of UChars). If it is 0, then + * dest may be NULL and the function will only return the length of the + * result without writing any of the result string (pre-flighting). + * @param pDestLength A pointer to receive the number of units written to the destination. If + * pDestLength!=NULL then *pDestLength is always set to the + * number of output units corresponding to the transformation of + * all the input units, even in case of a buffer overflow. + * @param src The original source string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * @return The pointer to destination buffer. + * @stable ICU 2.0 + * @see u_strFromUTF8WithSub + * @see u_strFromUTF8Lenient + */ +U_STABLE UChar* U_EXPORT2 +u_strFromUTF8(UChar *dest, + int32_t destCapacity, + int32_t *pDestLength, + const char *src, + int32_t srcLength, + UErrorCode *pErrorCode); + +/** + * Convert a UTF-16 string to UTF-8. + * + * Same as u_strToUTF8() except for the additional subchar which is output for + * illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code. + * With subchar==U_SENTINEL, this function behaves exactly like u_strToUTF8(). + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of chars). If it is 0, then + * dest may be NULL and the function will only return the length of the + * result without writing any of the result string (pre-flighting). + * @param pDestLength A pointer to receive the number of units written to the destination. If + * pDestLength!=NULL then *pDestLength is always set to the + * number of output units corresponding to the transformation of + * all the input units, even in case of a buffer overflow. + * @param src The original source string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param subchar The substitution character to use in place of an illegal input sequence, + * or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead. + * A substitution character can be any valid Unicode code point (up to U+10FFFF) + * except for surrogate code points (U+D800..U+DFFF). + * The recommended value is U+FFFD "REPLACEMENT CHARACTER". + * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0. + * Set to 0 if no substitutions occur or subchar<0. + * pNumSubstitutions can be NULL. + * @param pErrorCode Pointer to a standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return The pointer to destination buffer. + * @see u_strToUTF8 + * @see u_strFromUTF8WithSub + * @stable ICU 3.6 + */ +U_STABLE char* U_EXPORT2 +u_strToUTF8WithSub(char *dest, + int32_t destCapacity, + int32_t *pDestLength, + const UChar *src, + int32_t srcLength, + UChar32 subchar, int32_t *pNumSubstitutions, + UErrorCode *pErrorCode); + +/** + * Convert a UTF-8 string to UTF-16. + * + * Same as u_strFromUTF8() except for the additional subchar which is output for + * illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code. + * With subchar==U_SENTINEL, this function behaves exactly like u_strFromUTF8(). + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of UChars). If it is 0, then + * dest may be NULL and the function will only return the length of the + * result without writing any of the result string (pre-flighting). + * @param pDestLength A pointer to receive the number of units written to the destination. If + * pDestLength!=NULL then *pDestLength is always set to the + * number of output units corresponding to the transformation of + * all the input units, even in case of a buffer overflow. + * @param src The original source string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param subchar The substitution character to use in place of an illegal input sequence, + * or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead. + * A substitution character can be any valid Unicode code point (up to U+10FFFF) + * except for surrogate code points (U+D800..U+DFFF). + * The recommended value is U+FFFD "REPLACEMENT CHARACTER". + * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0. + * Set to 0 if no substitutions occur or subchar<0. + * pNumSubstitutions can be NULL. + * @param pErrorCode Pointer to a standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return The pointer to destination buffer. + * @see u_strFromUTF8 + * @see u_strFromUTF8Lenient + * @see u_strToUTF8WithSub + * @stable ICU 3.6 + */ +U_STABLE UChar* U_EXPORT2 +u_strFromUTF8WithSub(UChar *dest, + int32_t destCapacity, + int32_t *pDestLength, + const char *src, + int32_t srcLength, + UChar32 subchar, int32_t *pNumSubstitutions, + UErrorCode *pErrorCode); + +/** + * Convert a UTF-8 string to UTF-16. + * + * Same as u_strFromUTF8() except that this function is designed to be very fast, + * which it achieves by being lenient about malformed UTF-8 sequences. + * This function is intended for use in environments where UTF-8 text is + * expected to be well-formed. + * + * Its semantics are: + * - Well-formed UTF-8 text is correctly converted to well-formed UTF-16 text. + * - The function will not read beyond the input string, nor write beyond + * the destCapacity. + * - Malformed UTF-8 results in "garbage" 16-bit Unicode strings which may not + * be well-formed UTF-16. + * The function will resynchronize to valid code point boundaries + * within a small number of code points after an illegal sequence. + * - Non-shortest forms are not detected and will result in "spoofing" output. + * + * For further performance improvement, if srcLength is given (>=0), + * then it must be destCapacity>=srcLength. + * + * There is no inverse u_strToUTF8Lenient() function because there is practically + * no performance gain from not checking that a UTF-16 string is well-formed. + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of UChars). If it is 0, then + * dest may be NULL and the function will only return the length of the + * result without writing any of the result string (pre-flighting). + * Unlike for other ICU functions, if srcLength>=0 then it + * must be destCapacity>=srcLength. + * @param pDestLength A pointer to receive the number of units written to the destination. If + * pDestLength!=NULL then *pDestLength is always set to the + * number of output units corresponding to the transformation of + * all the input units, even in case of a buffer overflow. + * Unlike for other ICU functions, if srcLength>=0 but + * destCapacity<srcLength, then *pDestLength will be set to srcLength + * (and U_BUFFER_OVERFLOW_ERROR will be set) + * regardless of the actual result length. + * @param src The original source string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param pErrorCode Pointer to a standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return The pointer to destination buffer. + * @see u_strFromUTF8 + * @see u_strFromUTF8WithSub + * @see u_strToUTF8WithSub + * @stable ICU 3.6 + */ +U_STABLE UChar * U_EXPORT2 +u_strFromUTF8Lenient(UChar *dest, + int32_t destCapacity, + int32_t *pDestLength, + const char *src, + int32_t srcLength, + UErrorCode *pErrorCode); + +/** + * Convert a UTF-16 string to UTF-32. + * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set. + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of UChar32s). If it is 0, then + * dest may be NULL and the function will only return the length of the + * result without writing any of the result string (pre-flighting). + * @param pDestLength A pointer to receive the number of units written to the destination. If + * pDestLength!=NULL then *pDestLength is always set to the + * number of output units corresponding to the transformation of + * all the input units, even in case of a buffer overflow. + * @param src The original source string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * @return The pointer to destination buffer. + * @see u_strToUTF32WithSub + * @see u_strFromUTF32 + * @stable ICU 2.0 + */ +U_STABLE UChar32* U_EXPORT2 +u_strToUTF32(UChar32 *dest, + int32_t destCapacity, + int32_t *pDestLength, + const UChar *src, + int32_t srcLength, + UErrorCode *pErrorCode); + +/** + * Convert a UTF-32 string to UTF-16. + * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set. + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of UChars). If it is 0, then + * dest may be NULL and the function will only return the length of the + * result without writing any of the result string (pre-flighting). + * @param pDestLength A pointer to receive the number of units written to the destination. If + * pDestLength!=NULL then *pDestLength is always set to the + * number of output units corresponding to the transformation of + * all the input units, even in case of a buffer overflow. + * @param src The original source string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param pErrorCode Must be a valid pointer to an error code value, + * which must not indicate a failure before the function call. + * @return The pointer to destination buffer. + * @see u_strFromUTF32WithSub + * @see u_strToUTF32 + * @stable ICU 2.0 + */ +U_STABLE UChar* U_EXPORT2 +u_strFromUTF32(UChar *dest, + int32_t destCapacity, + int32_t *pDestLength, + const UChar32 *src, + int32_t srcLength, + UErrorCode *pErrorCode); + +/** + * Convert a UTF-16 string to UTF-32. + * + * Same as u_strToUTF32() except for the additional subchar which is output for + * illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code. + * With subchar==U_SENTINEL, this function behaves exactly like u_strToUTF32(). + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of UChar32s). If it is 0, then + * dest may be NULL and the function will only return the length of the + * result without writing any of the result string (pre-flighting). + * @param pDestLength A pointer to receive the number of units written to the destination. If + * pDestLength!=NULL then *pDestLength is always set to the + * number of output units corresponding to the transformation of + * all the input units, even in case of a buffer overflow. + * @param src The original source string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param subchar The substitution character to use in place of an illegal input sequence, + * or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead. + * A substitution character can be any valid Unicode code point (up to U+10FFFF) + * except for surrogate code points (U+D800..U+DFFF). + * The recommended value is U+FFFD "REPLACEMENT CHARACTER". + * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0. + * Set to 0 if no substitutions occur or subchar<0. + * pNumSubstitutions can be NULL. + * @param pErrorCode Pointer to a standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return The pointer to destination buffer. + * @see u_strToUTF32 + * @see u_strFromUTF32WithSub + * @stable ICU 4.2 + */ +U_STABLE UChar32* U_EXPORT2 +u_strToUTF32WithSub(UChar32 *dest, + int32_t destCapacity, + int32_t *pDestLength, + const UChar *src, + int32_t srcLength, + UChar32 subchar, int32_t *pNumSubstitutions, + UErrorCode *pErrorCode); + +/** + * Convert a UTF-32 string to UTF-16. + * + * Same as u_strFromUTF32() except for the additional subchar which is output for + * illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code. + * With subchar==U_SENTINEL, this function behaves exactly like u_strFromUTF32(). + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of UChars). If it is 0, then + * dest may be NULL and the function will only return the length of the + * result without writing any of the result string (pre-flighting). + * @param pDestLength A pointer to receive the number of units written to the destination. If + * pDestLength!=NULL then *pDestLength is always set to the + * number of output units corresponding to the transformation of + * all the input units, even in case of a buffer overflow. + * @param src The original source string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param subchar The substitution character to use in place of an illegal input sequence, + * or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead. + * A substitution character can be any valid Unicode code point (up to U+10FFFF) + * except for surrogate code points (U+D800..U+DFFF). + * The recommended value is U+FFFD "REPLACEMENT CHARACTER". + * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0. + * Set to 0 if no substitutions occur or subchar<0. + * pNumSubstitutions can be NULL. + * @param pErrorCode Pointer to a standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return The pointer to destination buffer. + * @see u_strFromUTF32 + * @see u_strToUTF32WithSub + * @stable ICU 4.2 + */ +U_STABLE UChar* U_EXPORT2 +u_strFromUTF32WithSub(UChar *dest, + int32_t destCapacity, + int32_t *pDestLength, + const UChar32 *src, + int32_t srcLength, + UChar32 subchar, int32_t *pNumSubstitutions, + UErrorCode *pErrorCode); + +/** + * Convert a 16-bit Unicode string to Java Modified UTF-8. + * See http://java.sun.com/javase/6/docs/api/java/io/DataInput.html#modified-utf-8 + * + * This function behaves according to the documentation for Java DataOutput.writeUTF() + * except that it does not encode the output length in the destination buffer + * and does not have an output length restriction. + * See http://java.sun.com/javase/6/docs/api/java/io/DataOutput.html#writeUTF(java.lang.String) + * + * The input string need not be well-formed UTF-16. + * (Therefore there is no subchar parameter.) + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of chars). If it is 0, then + * dest may be NULL and the function will only return the length of the + * result without writing any of the result string (pre-flighting). + * @param pDestLength A pointer to receive the number of units written to the destination. If + * pDestLength!=NULL then *pDestLength is always set to the + * number of output units corresponding to the transformation of + * all the input units, even in case of a buffer overflow. + * @param src The original source string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param pErrorCode Pointer to a standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return The pointer to destination buffer. + * @stable ICU 4.4 + * @see u_strToUTF8WithSub + * @see u_strFromJavaModifiedUTF8WithSub + */ +U_STABLE char* U_EXPORT2 +u_strToJavaModifiedUTF8( + char *dest, + int32_t destCapacity, + int32_t *pDestLength, + const UChar *src, + int32_t srcLength, + UErrorCode *pErrorCode); + +/** + * Convert a Java Modified UTF-8 string to a 16-bit Unicode string. + * If the input string is not well-formed and no substitution char is specified, + * then the U_INVALID_CHAR_FOUND error code is set. + * + * This function behaves according to the documentation for Java DataInput.readUTF() + * except that it takes a length parameter rather than + * interpreting the first two input bytes as the length. + * See http://java.sun.com/javase/6/docs/api/java/io/DataInput.html#readUTF() + * + * The output string may not be well-formed UTF-16. + * + * @param dest A buffer for the result string. The result will be zero-terminated if + * the buffer is large enough. + * @param destCapacity The size of the buffer (number of UChars). If it is 0, then + * dest may be NULL and the function will only return the length of the + * result without writing any of the result string (pre-flighting). + * @param pDestLength A pointer to receive the number of units written to the destination. If + * pDestLength!=NULL then *pDestLength is always set to the + * number of output units corresponding to the transformation of + * all the input units, even in case of a buffer overflow. + * @param src The original source string + * @param srcLength The length of the original string. If -1, then src must be zero-terminated. + * @param subchar The substitution character to use in place of an illegal input sequence, + * or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead. + * A substitution character can be any valid Unicode code point (up to U+10FFFF) + * except for surrogate code points (U+D800..U+DFFF). + * The recommended value is U+FFFD "REPLACEMENT CHARACTER". + * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0. + * Set to 0 if no substitutions occur or subchar<0. + * pNumSubstitutions can be NULL. + * @param pErrorCode Pointer to a standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return The pointer to destination buffer. + * @see u_strFromUTF8WithSub + * @see u_strFromUTF8Lenient + * @see u_strToJavaModifiedUTF8 + * @stable ICU 4.4 + */ +U_STABLE UChar* U_EXPORT2 +u_strFromJavaModifiedUTF8WithSub( + UChar *dest, + int32_t destCapacity, + int32_t *pDestLength, + const char *src, + int32_t srcLength, + UChar32 subchar, int32_t *pNumSubstitutions, + UErrorCode *pErrorCode); + +#endif diff --git a/vendor/icu/include/unicode/utf.h b/vendor/icu/include/unicode/utf.h new file mode 100644 index 0000000000..9b85c7620c --- /dev/null +++ b/vendor/icu/include/unicode/utf.h @@ -0,0 +1,225 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 1999-2011, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: utf.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 1999sep09 +* created by: Markus W. Scherer +*/ + +/** + * \file + * \brief C API: Code point macros + * + * This file defines macros for checking whether a code point is + * a surrogate or a non-character etc. + * + * If U_NO_DEFAULT_INCLUDE_UTF_HEADERS is 0 then utf.h is included by utypes.h + * and itself includes utf8.h and utf16.h after some + * common definitions. + * If U_NO_DEFAULT_INCLUDE_UTF_HEADERS is 1 then each of these headers must be + * included explicitly if their definitions are used. + * + * utf8.h and utf16.h define macros for efficiently getting code points + * in and out of UTF-8/16 strings. + * utf16.h macros have "U16_" prefixes. + * utf8.h defines similar macros with "U8_" prefixes for UTF-8 string handling. + * + * ICU mostly processes 16-bit Unicode strings. + * Most of the time, such strings are well-formed UTF-16. + * Single, unpaired surrogates must be handled as well, and are treated in ICU + * like regular code points where possible. + * (Pairs of surrogate code points are indistinguishable from supplementary + * code points encoded as pairs of supplementary code units.) + * + * In fact, almost all Unicode code points in normal text (>99%) + * are on the BMP (<=U+ffff) and even <=U+d7ff. + * ICU functions handle supplementary code points (U+10000..U+10ffff) + * but are optimized for the much more frequently occurring BMP code points. + * + * umachine.h defines UChar to be an unsigned 16-bit integer. + * Since ICU 59, ICU uses char16_t in C++, UChar only in C, + * and defines UChar=char16_t by default. See the UChar API docs for details. + * + * UChar32 is defined to be a signed 32-bit integer (int32_t), large enough for a 21-bit + * Unicode code point (Unicode scalar value, 0..0x10ffff) and U_SENTINEL (-1). + * Before ICU 2.4, the definition of UChar32 was similarly platform-dependent as + * the definition of UChar. For details see the documentation for UChar32 itself. + * + * utf.h defines a small number of C macros for single Unicode code points. + * These are simple checks for surrogates and non-characters. + * For actual Unicode character properties see uchar.h. + * + * By default, string operations must be done with error checking in case + * a string is not well-formed UTF-16 or UTF-8. + * + * The U16_ macros detect if a surrogate code unit is unpaired + * (lead unit without trail unit or vice versa) and just return the unit itself + * as the code point. + * + * The U8_ macros detect illegal byte sequences and return a negative value. + * Starting with ICU 60, the observable length of a single illegal byte sequence + * skipped by one of these macros follows the Unicode 6+ recommendation + * which is consistent with the W3C Encoding Standard. + * + * There are ..._OR_FFFD versions of both U16_ and U8_ macros + * that return U+FFFD for illegal code unit sequences. + * + * The regular "safe" macros require that the initial, passed-in string index + * is within bounds. They only check the index when they read more than one + * code unit. This is usually done with code similar to the following loop: + * <pre>while(i<length) { + * U16_NEXT(s, i, length, c); + * // use c + * }</pre> + * + * When it is safe to assume that text is well-formed UTF-16 + * (does not contain single, unpaired surrogates), then one can use + * U16_..._UNSAFE macros. + * These do not check for proper code unit sequences or truncated text and may + * yield wrong results or even cause a crash if they are used with "malformed" + * text. + * In practice, U16_..._UNSAFE macros will produce slightly less code but + * should not be faster because the processing is only different when a + * surrogate code unit is detected, which will be rare. + * + * Similarly for UTF-8, there are "safe" macros without a suffix, + * and U8_..._UNSAFE versions. + * The performance differences are much larger here because UTF-8 provides so + * many opportunities for malformed sequences. + * The unsafe UTF-8 macros are entirely implemented inside the macro definitions + * and are fast, while the safe UTF-8 macros call functions for some complicated cases. + * + * Unlike with UTF-16, malformed sequences cannot be expressed with distinct + * code point values (0..U+10ffff). They are indicated with negative values instead. + * + * For more information see the ICU User Guide Strings chapter + * (http://userguide.icu-project.org/strings). + * + * <em>Usage:</em> + * ICU coding guidelines for if() statements should be followed when using these macros. + * Compound statements (curly braces {}) must be used for if-else-while... + * bodies and all macro statements should be terminated with semicolon. + * + * @stable ICU 2.4 + */ + +#ifndef __UTF_H__ +#define __UTF_H__ + +#include <unicode/umachine.h> +/* include the utfXX.h after the following definitions */ + +/* single-code point definitions -------------------------------------------- */ + +/** + * Is this code point a Unicode noncharacter? + * @param c 32-bit code point + * @return TRUE or FALSE + * @stable ICU 2.4 + */ +#define U_IS_UNICODE_NONCHAR(c) \ + ((c)>=0xfdd0 && \ + ((c)<=0xfdef || ((c)&0xfffe)==0xfffe) && (c)<=0x10ffff) + +/** + * Is c a Unicode code point value (0..U+10ffff) + * that can be assigned a character? + * + * Code points that are not characters include: + * - single surrogate code points (U+d800..U+dfff, 2048 code points) + * - the last two code points on each plane (U+__fffe and U+__ffff, 34 code points) + * - U+fdd0..U+fdef (new with Unicode 3.1, 32 code points) + * - the highest Unicode code point value is U+10ffff + * + * This means that all code points below U+d800 are character code points, + * and that boundary is tested first for performance. + * + * @param c 32-bit code point + * @return TRUE or FALSE + * @stable ICU 2.4 + */ +#define U_IS_UNICODE_CHAR(c) \ + ((uint32_t)(c)<0xd800 || \ + (0xdfff<(c) && (c)<=0x10ffff && !U_IS_UNICODE_NONCHAR(c))) + +/** + * Is this code point a BMP code point (U+0000..U+ffff)? + * @param c 32-bit code point + * @return TRUE or FALSE + * @stable ICU 2.8 + */ +#define U_IS_BMP(c) ((uint32_t)(c)<=0xffff) + +/** + * Is this code point a supplementary code point (U+10000..U+10ffff)? + * @param c 32-bit code point + * @return TRUE or FALSE + * @stable ICU 2.8 + */ +#define U_IS_SUPPLEMENTARY(c) ((uint32_t)((c)-0x10000)<=0xfffff) + +/** + * Is this code point a lead surrogate (U+d800..U+dbff)? + * @param c 32-bit code point + * @return TRUE or FALSE + * @stable ICU 2.4 + */ +#define U_IS_LEAD(c) (((c)&0xfffffc00)==0xd800) + +/** + * Is this code point a trail surrogate (U+dc00..U+dfff)? + * @param c 32-bit code point + * @return TRUE or FALSE + * @stable ICU 2.4 + */ +#define U_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00) + +/** + * Is this code point a surrogate (U+d800..U+dfff)? + * @param c 32-bit code point + * @return TRUE or FALSE + * @stable ICU 2.4 + */ +#define U_IS_SURROGATE(c) (((c)&0xfffff800)==0xd800) + +/** + * Assuming c is a surrogate code point (U_IS_SURROGATE(c)), + * is it a lead surrogate? + * @param c 32-bit code point + * @return TRUE or FALSE + * @stable ICU 2.4 + */ +#define U_IS_SURROGATE_LEAD(c) (((c)&0x400)==0) + +/** + * Assuming c is a surrogate code point (U_IS_SURROGATE(c)), + * is it a trail surrogate? + * @param c 32-bit code point + * @return TRUE or FALSE + * @stable ICU 4.2 + */ +#define U_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0) + +/* include the utfXX.h ------------------------------------------------------ */ + +#if !U_NO_DEFAULT_INCLUDE_UTF_HEADERS + +#include <unicode/utf8.h> +#include <unicode/utf16.h> + +/* utf_old.h contains deprecated, pre-ICU 2.4 definitions */ +#include <unicode/utf_old.h> + +#endif /* !U_NO_DEFAULT_INCLUDE_UTF_HEADERS */ + +#endif /* __UTF_H__ */ diff --git a/vendor/icu/include/unicode/utf16.h b/vendor/icu/include/unicode/utf16.h new file mode 100644 index 0000000000..ffdf584ec6 --- /dev/null +++ b/vendor/icu/include/unicode/utf16.h @@ -0,0 +1,745 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 1999-2012, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: utf16.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 1999sep09 +* created by: Markus W. Scherer +*/ + +/** + * \file + * \brief C API: 16-bit Unicode handling macros + * + * This file defines macros to deal with 16-bit Unicode (UTF-16) code units and strings. + * + * For more information see utf.h and the ICU User Guide Strings chapter + * (http://userguide.icu-project.org/strings). + * + * <em>Usage:</em> + * ICU coding guidelines for if() statements should be followed when using these macros. + * Compound statements (curly braces {}) must be used for if-else-while... + * bodies and all macro statements should be terminated with semicolon. + */ + +#ifndef __UTF16_H__ +#define __UTF16_H__ + +#include <unicode/umachine.h> +#ifndef __UTF_H__ +# include "unicode/utf.h" +#endif + +/* single-code point definitions -------------------------------------------- */ + +/** + * Does this code unit alone encode a code point (BMP, not a surrogate)? + * @param c 16-bit code unit + * @return TRUE or FALSE + * @stable ICU 2.4 + */ +#define U16_IS_SINGLE(c) !U_IS_SURROGATE(c) + +/** + * Is this code unit a lead surrogate (U+d800..U+dbff)? + * @param c 16-bit code unit + * @return TRUE or FALSE + * @stable ICU 2.4 + */ +#define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800) + +/** + * Is this code unit a trail surrogate (U+dc00..U+dfff)? + * @param c 16-bit code unit + * @return TRUE or FALSE + * @stable ICU 2.4 + */ +#define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00) + +/** + * Is this code unit a surrogate (U+d800..U+dfff)? + * @param c 16-bit code unit + * @return TRUE or FALSE + * @stable ICU 2.4 + */ +#define U16_IS_SURROGATE(c) U_IS_SURROGATE(c) + +/** + * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)), + * is it a lead surrogate? + * @param c 16-bit code unit + * @return TRUE or FALSE + * @stable ICU 2.4 + */ +#define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0) + +/** + * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)), + * is it a trail surrogate? + * @param c 16-bit code unit + * @return TRUE or FALSE + * @stable ICU 4.2 + */ +#define U16_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0) + +/** + * Helper constant for U16_GET_SUPPLEMENTARY. + * @internal + */ +#define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000) + +/** + * Get a supplementary code point value (U+10000..U+10ffff) + * from its lead and trail surrogates. + * The result is undefined if the input values are not + * lead and trail surrogates. + * + * @param lead lead surrogate (U+d800..U+dbff) + * @param trail trail surrogate (U+dc00..U+dfff) + * @return supplementary code point (U+10000..U+10ffff) + * @stable ICU 2.4 + */ +#define U16_GET_SUPPLEMENTARY(lead, trail) \ + (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET) + + +/** + * Get the lead surrogate (0xd800..0xdbff) for a + * supplementary code point (0x10000..0x10ffff). + * @param supplementary 32-bit code point (U+10000..U+10ffff) + * @return lead surrogate (U+d800..U+dbff) for supplementary + * @stable ICU 2.4 + */ +#define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0) + +/** + * Get the trail surrogate (0xdc00..0xdfff) for a + * supplementary code point (0x10000..0x10ffff). + * @param supplementary 32-bit code point (U+10000..U+10ffff) + * @return trail surrogate (U+dc00..U+dfff) for supplementary + * @stable ICU 2.4 + */ +#define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00) + +/** + * How many 16-bit code units are used to encode this Unicode code point? (1 or 2) + * The result is not defined if c is not a Unicode code point (U+0000..U+10ffff). + * @param c 32-bit code point + * @return 1 or 2 + * @stable ICU 2.4 + */ +#define U16_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2) + +/** + * The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff). + * @return 2 + * @stable ICU 2.4 + */ +#define U16_MAX_LENGTH 2 + +/** + * Get a code point from a string at a random-access offset, + * without changing the offset. + * "Unsafe" macro, assumes well-formed UTF-16. + * + * The offset may point to either the lead or trail surrogate unit + * for a supplementary code point, in which case the macro will read + * the adjacent matching surrogate as well. + * The result is undefined if the offset points to a single, unpaired surrogate. + * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT. + * + * @param s const UChar * string + * @param i string offset + * @param c output UChar32 variable + * @see U16_GET + * @stable ICU 2.4 + */ +#define U16_GET_UNSAFE(s, i, c) { \ + (c)=(s)[i]; \ + if(U16_IS_SURROGATE(c)) { \ + if(U16_IS_SURROGATE_LEAD(c)) { \ + (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)+1]); \ + } else { \ + (c)=U16_GET_SUPPLEMENTARY((s)[(i)-1], (c)); \ + } \ + } \ +} + +/** + * Get a code point from a string at a random-access offset, + * without changing the offset. + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * The offset may point to either the lead or trail surrogate unit + * for a supplementary code point, in which case the macro will read + * the adjacent matching surrogate as well. + * + * The length can be negative for a NUL-terminated string. + * + * If the offset points to a single, unpaired surrogate, then + * c is set to that unpaired surrogate. + * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT. + * + * @param s const UChar * string + * @param start starting string offset (usually 0) + * @param i string offset, must be start<=i<length + * @param length string length + * @param c output UChar32 variable + * @see U16_GET_UNSAFE + * @stable ICU 2.4 + */ +#define U16_GET(s, start, i, length, c) { \ + (c)=(s)[i]; \ + if(U16_IS_SURROGATE(c)) { \ + uint16_t __c2; \ + if(U16_IS_SURROGATE_LEAD(c)) { \ + if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \ + (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ + } \ + } else { \ + if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ + (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ + } \ + } \ + } \ +} + +#ifndef U_HIDE_DRAFT_API + +/** + * Get a code point from a string at a random-access offset, + * without changing the offset. + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * The offset may point to either the lead or trail surrogate unit + * for a supplementary code point, in which case the macro will read + * the adjacent matching surrogate as well. + * + * The length can be negative for a NUL-terminated string. + * + * If the offset points to a single, unpaired surrogate, then + * c is set to U+FFFD. + * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT_OR_FFFD. + * + * @param s const UChar * string + * @param start starting string offset (usually 0) + * @param i string offset, must be start<=i<length + * @param length string length + * @param c output UChar32 variable + * @see U16_GET_UNSAFE + * @draft ICU 60 + */ +#define U16_GET_OR_FFFD(s, start, i, length, c) { \ + (c)=(s)[i]; \ + if(U16_IS_SURROGATE(c)) { \ + uint16_t __c2; \ + if(U16_IS_SURROGATE_LEAD(c)) { \ + if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \ + (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ + } else { \ + (c)=0xfffd; \ + } \ + } else { \ + if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ + (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ + } else { \ + (c)=0xfffd; \ + } \ + } \ + } \ +} + +#endif // U_HIDE_DRAFT_API + +/* definitions with forward iteration --------------------------------------- */ + +/** + * Get a code point from a string at a code point boundary offset, + * and advance the offset to the next code point boundary. + * (Post-incrementing forward iteration.) + * "Unsafe" macro, assumes well-formed UTF-16. + * + * The offset may point to the lead surrogate unit + * for a supplementary code point, in which case the macro will read + * the following trail surrogate as well. + * If the offset points to a trail surrogate, then that itself + * will be returned as the code point. + * The result is undefined if the offset points to a single, unpaired lead surrogate. + * + * @param s const UChar * string + * @param i string offset + * @param c output UChar32 variable + * @see U16_NEXT + * @stable ICU 2.4 + */ +#define U16_NEXT_UNSAFE(s, i, c) { \ + (c)=(s)[(i)++]; \ + if(U16_IS_LEAD(c)) { \ + (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)++]); \ + } \ +} + +/** + * Get a code point from a string at a code point boundary offset, + * and advance the offset to the next code point boundary. + * (Post-incrementing forward iteration.) + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * The length can be negative for a NUL-terminated string. + * + * The offset may point to the lead surrogate unit + * for a supplementary code point, in which case the macro will read + * the following trail surrogate as well. + * If the offset points to a trail surrogate or + * to a single, unpaired lead surrogate, then c is set to that unpaired surrogate. + * + * @param s const UChar * string + * @param i string offset, must be i<length + * @param length string length + * @param c output UChar32 variable + * @see U16_NEXT_UNSAFE + * @stable ICU 2.4 + */ +#define U16_NEXT(s, i, length, c) { \ + (c)=(s)[(i)++]; \ + if(U16_IS_LEAD(c)) { \ + uint16_t __c2; \ + if((i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \ + ++(i); \ + (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ + } \ + } \ +} + +#ifndef U_HIDE_DRAFT_API + +/** + * Get a code point from a string at a code point boundary offset, + * and advance the offset to the next code point boundary. + * (Post-incrementing forward iteration.) + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * The length can be negative for a NUL-terminated string. + * + * The offset may point to the lead surrogate unit + * for a supplementary code point, in which case the macro will read + * the following trail surrogate as well. + * If the offset points to a trail surrogate or + * to a single, unpaired lead surrogate, then c is set to U+FFFD. + * + * @param s const UChar * string + * @param i string offset, must be i<length + * @param length string length + * @param c output UChar32 variable + * @see U16_NEXT_UNSAFE + * @draft ICU 60 + */ +#define U16_NEXT_OR_FFFD(s, i, length, c) { \ + (c)=(s)[(i)++]; \ + if(U16_IS_SURROGATE(c)) { \ + uint16_t __c2; \ + if(U16_IS_SURROGATE_LEAD(c) && (i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \ + ++(i); \ + (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ + } else { \ + (c)=0xfffd; \ + } \ + } \ +} + +#endif // U_HIDE_DRAFT_API + +/** + * Append a code point to a string, overwriting 1 or 2 code units. + * The offset points to the current end of the string contents + * and is advanced (post-increment). + * "Unsafe" macro, assumes a valid code point and sufficient space in the string. + * Otherwise, the result is undefined. + * + * @param s const UChar * string buffer + * @param i string offset + * @param c code point to append + * @see U16_APPEND + * @stable ICU 2.4 + */ +#define U16_APPEND_UNSAFE(s, i, c) { \ + if((uint32_t)(c)<=0xffff) { \ + (s)[(i)++]=(uint16_t)(c); \ + } else { \ + (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ + (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ + } \ +} + +/** + * Append a code point to a string, overwriting 1 or 2 code units. + * The offset points to the current end of the string contents + * and is advanced (post-increment). + * "Safe" macro, checks for a valid code point. + * If a surrogate pair is written, checks for sufficient space in the string. + * If the code point is not valid or a trail surrogate does not fit, + * then isError is set to TRUE. + * + * @param s const UChar * string buffer + * @param i string offset, must be i<capacity + * @param capacity size of the string buffer + * @param c code point to append + * @param isError output UBool set to TRUE if an error occurs, otherwise not modified + * @see U16_APPEND_UNSAFE + * @stable ICU 2.4 + */ +#define U16_APPEND(s, i, capacity, c, isError) { \ + if((uint32_t)(c)<=0xffff) { \ + (s)[(i)++]=(uint16_t)(c); \ + } else if((uint32_t)(c)<=0x10ffff && (i)+1<(capacity)) { \ + (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ + (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ + } else /* c>0x10ffff or not enough space */ { \ + (isError)=TRUE; \ + } \ +} + +/** + * Advance the string offset from one code point boundary to the next. + * (Post-incrementing iteration.) + * "Unsafe" macro, assumes well-formed UTF-16. + * + * @param s const UChar * string + * @param i string offset + * @see U16_FWD_1 + * @stable ICU 2.4 + */ +#define U16_FWD_1_UNSAFE(s, i) { \ + if(U16_IS_LEAD((s)[(i)++])) { \ + ++(i); \ + } \ +} + +/** + * Advance the string offset from one code point boundary to the next. + * (Post-incrementing iteration.) + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * The length can be negative for a NUL-terminated string. + * + * @param s const UChar * string + * @param i string offset, must be i<length + * @param length string length + * @see U16_FWD_1_UNSAFE + * @stable ICU 2.4 + */ +#define U16_FWD_1(s, i, length) { \ + if(U16_IS_LEAD((s)[(i)++]) && (i)!=(length) && U16_IS_TRAIL((s)[i])) { \ + ++(i); \ + } \ +} + +/** + * Advance the string offset from one code point boundary to the n-th next one, + * i.e., move forward by n code points. + * (Post-incrementing iteration.) + * "Unsafe" macro, assumes well-formed UTF-16. + * + * @param s const UChar * string + * @param i string offset + * @param n number of code points to skip + * @see U16_FWD_N + * @stable ICU 2.4 + */ +#define U16_FWD_N_UNSAFE(s, i, n) { \ + int32_t __N=(n); \ + while(__N>0) { \ + U16_FWD_1_UNSAFE(s, i); \ + --__N; \ + } \ +} + +/** + * Advance the string offset from one code point boundary to the n-th next one, + * i.e., move forward by n code points. + * (Post-incrementing iteration.) + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * The length can be negative for a NUL-terminated string. + * + * @param s const UChar * string + * @param i int32_t string offset, must be i<length + * @param length int32_t string length + * @param n number of code points to skip + * @see U16_FWD_N_UNSAFE + * @stable ICU 2.4 + */ +#define U16_FWD_N(s, i, length, n) { \ + int32_t __N=(n); \ + while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \ + U16_FWD_1(s, i, length); \ + --__N; \ + } \ +} + +/** + * Adjust a random-access offset to a code point boundary + * at the start of a code point. + * If the offset points to the trail surrogate of a surrogate pair, + * then the offset is decremented. + * Otherwise, it is not modified. + * "Unsafe" macro, assumes well-formed UTF-16. + * + * @param s const UChar * string + * @param i string offset + * @see U16_SET_CP_START + * @stable ICU 2.4 + */ +#define U16_SET_CP_START_UNSAFE(s, i) { \ + if(U16_IS_TRAIL((s)[i])) { \ + --(i); \ + } \ +} + +/** + * Adjust a random-access offset to a code point boundary + * at the start of a code point. + * If the offset points to the trail surrogate of a surrogate pair, + * then the offset is decremented. + * Otherwise, it is not modified. + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * @param s const UChar * string + * @param start starting string offset (usually 0) + * @param i string offset, must be start<=i + * @see U16_SET_CP_START_UNSAFE + * @stable ICU 2.4 + */ +#define U16_SET_CP_START(s, start, i) { \ + if(U16_IS_TRAIL((s)[i]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \ + --(i); \ + } \ +} + +/* definitions with backward iteration -------------------------------------- */ + +/** + * Move the string offset from one code point boundary to the previous one + * and get the code point between them. + * (Pre-decrementing backward iteration.) + * "Unsafe" macro, assumes well-formed UTF-16. + * + * The input offset may be the same as the string length. + * If the offset is behind a trail surrogate unit + * for a supplementary code point, then the macro will read + * the preceding lead surrogate as well. + * If the offset is behind a lead surrogate, then that itself + * will be returned as the code point. + * The result is undefined if the offset is behind a single, unpaired trail surrogate. + * + * @param s const UChar * string + * @param i string offset + * @param c output UChar32 variable + * @see U16_PREV + * @stable ICU 2.4 + */ +#define U16_PREV_UNSAFE(s, i, c) { \ + (c)=(s)[--(i)]; \ + if(U16_IS_TRAIL(c)) { \ + (c)=U16_GET_SUPPLEMENTARY((s)[--(i)], (c)); \ + } \ +} + +/** + * Move the string offset from one code point boundary to the previous one + * and get the code point between them. + * (Pre-decrementing backward iteration.) + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * The input offset may be the same as the string length. + * If the offset is behind a trail surrogate unit + * for a supplementary code point, then the macro will read + * the preceding lead surrogate as well. + * If the offset is behind a lead surrogate or behind a single, unpaired + * trail surrogate, then c is set to that unpaired surrogate. + * + * @param s const UChar * string + * @param start starting string offset (usually 0) + * @param i string offset, must be start<i + * @param c output UChar32 variable + * @see U16_PREV_UNSAFE + * @stable ICU 2.4 + */ +#define U16_PREV(s, start, i, c) { \ + (c)=(s)[--(i)]; \ + if(U16_IS_TRAIL(c)) { \ + uint16_t __c2; \ + if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ + --(i); \ + (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ + } \ + } \ +} + +#ifndef U_HIDE_DRAFT_API + +/** + * Move the string offset from one code point boundary to the previous one + * and get the code point between them. + * (Pre-decrementing backward iteration.) + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * The input offset may be the same as the string length. + * If the offset is behind a trail surrogate unit + * for a supplementary code point, then the macro will read + * the preceding lead surrogate as well. + * If the offset is behind a lead surrogate or behind a single, unpaired + * trail surrogate, then c is set to U+FFFD. + * + * @param s const UChar * string + * @param start starting string offset (usually 0) + * @param i string offset, must be start<i + * @param c output UChar32 variable + * @see U16_PREV_UNSAFE + * @draft ICU 60 + */ +#define U16_PREV_OR_FFFD(s, start, i, c) { \ + (c)=(s)[--(i)]; \ + if(U16_IS_SURROGATE(c)) { \ + uint16_t __c2; \ + if(U16_IS_SURROGATE_TRAIL(c) && (i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ + --(i); \ + (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ + } else { \ + (c)=0xfffd; \ + } \ + } \ +} + +#endif // U_HIDE_DRAFT_API + +/** + * Move the string offset from one code point boundary to the previous one. + * (Pre-decrementing backward iteration.) + * The input offset may be the same as the string length. + * "Unsafe" macro, assumes well-formed UTF-16. + * + * @param s const UChar * string + * @param i string offset + * @see U16_BACK_1 + * @stable ICU 2.4 + */ +#define U16_BACK_1_UNSAFE(s, i) { \ + if(U16_IS_TRAIL((s)[--(i)])) { \ + --(i); \ + } \ +} + +/** + * Move the string offset from one code point boundary to the previous one. + * (Pre-decrementing backward iteration.) + * The input offset may be the same as the string length. + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * @param s const UChar * string + * @param start starting string offset (usually 0) + * @param i string offset, must be start<i + * @see U16_BACK_1_UNSAFE + * @stable ICU 2.4 + */ +#define U16_BACK_1(s, start, i) { \ + if(U16_IS_TRAIL((s)[--(i)]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \ + --(i); \ + } \ +} + +/** + * Move the string offset from one code point boundary to the n-th one before it, + * i.e., move backward by n code points. + * (Pre-decrementing backward iteration.) + * The input offset may be the same as the string length. + * "Unsafe" macro, assumes well-formed UTF-16. + * + * @param s const UChar * string + * @param i string offset + * @param n number of code points to skip + * @see U16_BACK_N + * @stable ICU 2.4 + */ +#define U16_BACK_N_UNSAFE(s, i, n) { \ + int32_t __N=(n); \ + while(__N>0) { \ + U16_BACK_1_UNSAFE(s, i); \ + --__N; \ + } \ +} + +/** + * Move the string offset from one code point boundary to the n-th one before it, + * i.e., move backward by n code points. + * (Pre-decrementing backward iteration.) + * The input offset may be the same as the string length. + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * @param s const UChar * string + * @param start start of string + * @param i string offset, must be start<i + * @param n number of code points to skip + * @see U16_BACK_N_UNSAFE + * @stable ICU 2.4 + */ +#define U16_BACK_N(s, start, i, n) { \ + int32_t __N=(n); \ + while(__N>0 && (i)>(start)) { \ + U16_BACK_1(s, start, i); \ + --__N; \ + } \ +} + +/** + * Adjust a random-access offset to a code point boundary after a code point. + * If the offset is behind the lead surrogate of a surrogate pair, + * then the offset is incremented. + * Otherwise, it is not modified. + * The input offset may be the same as the string length. + * "Unsafe" macro, assumes well-formed UTF-16. + * + * @param s const UChar * string + * @param i string offset + * @see U16_SET_CP_LIMIT + * @stable ICU 2.4 + */ +#define U16_SET_CP_LIMIT_UNSAFE(s, i) { \ + if(U16_IS_LEAD((s)[(i)-1])) { \ + ++(i); \ + } \ +} + +/** + * Adjust a random-access offset to a code point boundary after a code point. + * If the offset is behind the lead surrogate of a surrogate pair, + * then the offset is incremented. + * Otherwise, it is not modified. + * The input offset may be the same as the string length. + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * The length can be negative for a NUL-terminated string. + * + * @param s const UChar * string + * @param start int32_t starting string offset (usually 0) + * @param i int32_t string offset, start<=i<=length + * @param length int32_t string length + * @see U16_SET_CP_LIMIT_UNSAFE + * @stable ICU 2.4 + */ +#define U16_SET_CP_LIMIT(s, start, i, length) { \ + if((start)<(i) && ((i)<(length) || (length)<0) && U16_IS_LEAD((s)[(i)-1]) && U16_IS_TRAIL((s)[i])) { \ + ++(i); \ + } \ +} + +#endif diff --git a/vendor/icu/include/unicode/utf8.h b/vendor/icu/include/unicode/utf8.h new file mode 100644 index 0000000000..20bfc77179 --- /dev/null +++ b/vendor/icu/include/unicode/utf8.h @@ -0,0 +1,882 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 1999-2015, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: utf8.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 1999sep13 +* created by: Markus W. Scherer +*/ + +/** + * \file + * \brief C API: 8-bit Unicode handling macros + * + * This file defines macros to deal with 8-bit Unicode (UTF-8) code units (bytes) and strings. + * + * For more information see utf.h and the ICU User Guide Strings chapter + * (http://userguide.icu-project.org/strings). + * + * <em>Usage:</em> + * ICU coding guidelines for if() statements should be followed when using these macros. + * Compound statements (curly braces {}) must be used for if-else-while... + * bodies and all macro statements should be terminated with semicolon. + */ + +#ifndef __UTF8_H__ +#define __UTF8_H__ + +#include <unicode/umachine.h> +#ifndef __UTF_H__ +# include "unicode/utf.h" +#endif + +/* internal definitions ----------------------------------------------------- */ + +/** + * Counts the trail bytes for a UTF-8 lead byte. + * Returns 0 for 0..0xc1 as well as for 0xf5..0xff. + * leadByte might be evaluated multiple times. + * + * This is internal since it is not meant to be called directly by external clients; + * however it is called by public macros in this file and thus must remain stable. + * + * @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff. + * @internal + */ +#define U8_COUNT_TRAIL_BYTES(leadByte) \ + (U8_IS_LEAD(leadByte) ? \ + ((uint8_t)(leadByte)>=0xe0)+((uint8_t)(leadByte)>=0xf0)+1 : 0) + +/** + * Counts the trail bytes for a UTF-8 lead byte of a valid UTF-8 sequence. + * Returns 0 for 0..0xc1. Undefined for 0xf5..0xff. + * leadByte might be evaluated multiple times. + * + * This is internal since it is not meant to be called directly by external clients; + * however it is called by public macros in this file and thus must remain stable. + * + * @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff. + * @internal + */ +#define U8_COUNT_TRAIL_BYTES_UNSAFE(leadByte) \ + (((uint8_t)(leadByte)>=0xc2)+((uint8_t)(leadByte)>=0xe0)+((uint8_t)(leadByte)>=0xf0)) + +/** + * Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value. + * + * This is internal since it is not meant to be called directly by external clients; + * however it is called by public macros in this file and thus must remain stable. + * @internal + */ +#define U8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1) + +/** + * Internal bit vector for 3-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD3_AND_T1. + * Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence. + * Lead byte E0..EF bits 3..0 are used as byte index, + * first trail byte bits 7..5 are used as bit index into that byte. + * @see U8_IS_VALID_LEAD3_AND_T1 + * @internal + */ +#define U8_LEAD3_T1_BITS "\x20\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x10\x30\x30" + +/** + * Internal 3-byte UTF-8 validity check. + * Non-zero if lead byte E0..EF and first trail byte 00..FF start a valid sequence. + * @internal + */ +#define U8_IS_VALID_LEAD3_AND_T1(lead, t1) (U8_LEAD3_T1_BITS[(lead)&0xf]&(1<<((uint8_t)(t1)>>5))) + +/** + * Internal bit vector for 4-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD4_AND_T1. + * Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence. + * First trail byte bits 7..4 are used as byte index, + * lead byte F0..F4 bits 2..0 are used as bit index into that byte. + * @see U8_IS_VALID_LEAD4_AND_T1 + * @internal + */ +#define U8_LEAD4_T1_BITS "\x00\x00\x00\x00\x00\x00\x00\x00\x1E\x0F\x0F\x0F\x00\x00\x00\x00" + +/** + * Internal 4-byte UTF-8 validity check. + * Non-zero if lead byte F0..F4 and first trail byte 00..FF start a valid sequence. + * @internal + */ +#define U8_IS_VALID_LEAD4_AND_T1(lead, t1) (U8_LEAD4_T1_BITS[(uint8_t)(t1)>>4]&(1<<((lead)&7))) + +/** + * Function for handling "next code point" with error-checking. + * + * This is internal since it is not meant to be called directly by external clients; + * however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this + * file and thus must remain stable, and should not be hidden when other internal + * functions are hidden (otherwise public macros would fail to compile). + * @internal + */ +U_STABLE UChar32 U_EXPORT2 +utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict); + +/** + * Function for handling "append code point" with error-checking. + * + * This is internal since it is not meant to be called directly by external clients; + * however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this + * file and thus must remain stable, and should not be hidden when other internal + * functions are hidden (otherwise public macros would fail to compile). + * @internal + */ +U_STABLE int32_t U_EXPORT2 +utf8_appendCharSafeBody(uint8_t *s, int32_t i, int32_t length, UChar32 c, UBool *pIsError); + +/** + * Function for handling "previous code point" with error-checking. + * + * This is internal since it is not meant to be called directly by external clients; + * however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this + * file and thus must remain stable, and should not be hidden when other internal + * functions are hidden (otherwise public macros would fail to compile). + * @internal + */ +U_STABLE UChar32 U_EXPORT2 +utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, UBool strict); + +/** + * Function for handling "skip backward one code point" with error-checking. + * + * This is internal since it is not meant to be called directly by external clients; + * however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this + * file and thus must remain stable, and should not be hidden when other internal + * functions are hidden (otherwise public macros would fail to compile). + * @internal + */ +U_STABLE int32_t U_EXPORT2 +utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); + +/* single-code point definitions -------------------------------------------- */ + +/** + * Does this code unit (byte) encode a code point by itself (US-ASCII 0..0x7f)? + * @param c 8-bit code unit (byte) + * @return TRUE or FALSE + * @stable ICU 2.4 + */ +#define U8_IS_SINGLE(c) (((c)&0x80)==0) + +/** + * Is this code unit (byte) a UTF-8 lead byte? (0xC2..0xF4) + * @param c 8-bit code unit (byte) + * @return TRUE or FALSE + * @stable ICU 2.4 + */ +#define U8_IS_LEAD(c) ((uint8_t)((c)-0xc2)<=0x32) +// 0x32=0xf4-0xc2 + +/** + * Is this code unit (byte) a UTF-8 trail byte? (0x80..0xBF) + * @param c 8-bit code unit (byte) + * @return TRUE or FALSE + * @stable ICU 2.4 + */ +#define U8_IS_TRAIL(c) ((int8_t)(c)<-0x40) + +/** + * How many code units (bytes) are used for the UTF-8 encoding + * of this Unicode code point? + * @param c 32-bit code point + * @return 1..4, or 0 if c is a surrogate or not a Unicode code point + * @stable ICU 2.4 + */ +#define U8_LENGTH(c) \ + ((uint32_t)(c)<=0x7f ? 1 : \ + ((uint32_t)(c)<=0x7ff ? 2 : \ + ((uint32_t)(c)<=0xd7ff ? 3 : \ + ((uint32_t)(c)<=0xdfff || (uint32_t)(c)>0x10ffff ? 0 : \ + ((uint32_t)(c)<=0xffff ? 3 : 4)\ + ) \ + ) \ + ) \ + ) + +/** + * The maximum number of UTF-8 code units (bytes) per Unicode code point (U+0000..U+10ffff). + * @return 4 + * @stable ICU 2.4 + */ +#define U8_MAX_LENGTH 4 + +/** + * Get a code point from a string at a random-access offset, + * without changing the offset. + * The offset may point to either the lead byte or one of the trail bytes + * for a code point, in which case the macro will read all of the bytes + * for the code point. + * The result is undefined if the offset points to an illegal UTF-8 + * byte sequence. + * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT. + * + * @param s const uint8_t * string + * @param i string offset + * @param c output UChar32 variable + * @see U8_GET + * @stable ICU 2.4 + */ +#define U8_GET_UNSAFE(s, i, c) { \ + int32_t _u8_get_unsafe_index=(int32_t)(i); \ + U8_SET_CP_START_UNSAFE(s, _u8_get_unsafe_index); \ + U8_NEXT_UNSAFE(s, _u8_get_unsafe_index, c); \ +} + +/** + * Get a code point from a string at a random-access offset, + * without changing the offset. + * The offset may point to either the lead byte or one of the trail bytes + * for a code point, in which case the macro will read all of the bytes + * for the code point. + * + * The length can be negative for a NUL-terminated string. + * + * If the offset points to an illegal UTF-8 byte sequence, then + * c is set to a negative value. + * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT. + * + * @param s const uint8_t * string + * @param start int32_t starting string offset + * @param i int32_t string offset, must be start<=i<length + * @param length int32_t string length + * @param c output UChar32 variable, set to <0 in case of an error + * @see U8_GET_UNSAFE + * @stable ICU 2.4 + */ +#define U8_GET(s, start, i, length, c) { \ + int32_t _u8_get_index=(i); \ + U8_SET_CP_START(s, start, _u8_get_index); \ + U8_NEXT(s, _u8_get_index, length, c); \ +} + +/** + * Get a code point from a string at a random-access offset, + * without changing the offset. + * The offset may point to either the lead byte or one of the trail bytes + * for a code point, in which case the macro will read all of the bytes + * for the code point. + * + * The length can be negative for a NUL-terminated string. + * + * If the offset points to an illegal UTF-8 byte sequence, then + * c is set to U+FFFD. + * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT_OR_FFFD. + * + * This macro does not distinguish between a real U+FFFD in the text + * and U+FFFD returned for an ill-formed sequence. + * Use U8_GET() if that distinction is important. + * + * @param s const uint8_t * string + * @param start int32_t starting string offset + * @param i int32_t string offset, must be start<=i<length + * @param length int32_t string length + * @param c output UChar32 variable, set to U+FFFD in case of an error + * @see U8_GET + * @stable ICU 51 + */ +#define U8_GET_OR_FFFD(s, start, i, length, c) { \ + int32_t _u8_get_index=(i); \ + U8_SET_CP_START(s, start, _u8_get_index); \ + U8_NEXT_OR_FFFD(s, _u8_get_index, length, c); \ +} + +/* definitions with forward iteration --------------------------------------- */ + +/** + * Get a code point from a string at a code point boundary offset, + * and advance the offset to the next code point boundary. + * (Post-incrementing forward iteration.) + * "Unsafe" macro, assumes well-formed UTF-8. + * + * The offset may point to the lead byte of a multi-byte sequence, + * in which case the macro will read the whole sequence. + * The result is undefined if the offset points to a trail byte + * or an illegal UTF-8 sequence. + * + * @param s const uint8_t * string + * @param i string offset + * @param c output UChar32 variable + * @see U8_NEXT + * @stable ICU 2.4 + */ +#define U8_NEXT_UNSAFE(s, i, c) { \ + (c)=(uint8_t)(s)[(i)++]; \ + if(!U8_IS_SINGLE(c)) { \ + if((c)<0xe0) { \ + (c)=(((c)&0x1f)<<6)|((s)[(i)++]&0x3f); \ + } else if((c)<0xf0) { \ + /* no need for (c&0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ \ + (c)=(UChar)(((c)<<12)|(((s)[i]&0x3f)<<6)|((s)[(i)+1]&0x3f)); \ + (i)+=2; \ + } else { \ + (c)=(((c)&7)<<18)|(((s)[i]&0x3f)<<12)|(((s)[(i)+1]&0x3f)<<6)|((s)[(i)+2]&0x3f); \ + (i)+=3; \ + } \ + } \ +} + +/** + * Get a code point from a string at a code point boundary offset, + * and advance the offset to the next code point boundary. + * (Post-incrementing forward iteration.) + * "Safe" macro, checks for illegal sequences and for string boundaries. + * + * The length can be negative for a NUL-terminated string. + * + * The offset may point to the lead byte of a multi-byte sequence, + * in which case the macro will read the whole sequence. + * If the offset points to a trail byte or an illegal UTF-8 sequence, then + * c is set to a negative value. + * + * @param s const uint8_t * string + * @param i int32_t string offset, must be i<length + * @param length int32_t string length + * @param c output UChar32 variable, set to <0 in case of an error + * @see U8_NEXT_UNSAFE + * @stable ICU 2.4 + */ +#define U8_NEXT(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, U_SENTINEL) + +/** + * Get a code point from a string at a code point boundary offset, + * and advance the offset to the next code point boundary. + * (Post-incrementing forward iteration.) + * "Safe" macro, checks for illegal sequences and for string boundaries. + * + * The length can be negative for a NUL-terminated string. + * + * The offset may point to the lead byte of a multi-byte sequence, + * in which case the macro will read the whole sequence. + * If the offset points to a trail byte or an illegal UTF-8 sequence, then + * c is set to U+FFFD. + * + * This macro does not distinguish between a real U+FFFD in the text + * and U+FFFD returned for an ill-formed sequence. + * Use U8_NEXT() if that distinction is important. + * + * @param s const uint8_t * string + * @param i int32_t string offset, must be i<length + * @param length int32_t string length + * @param c output UChar32 variable, set to U+FFFD in case of an error + * @see U8_NEXT + * @stable ICU 51 + */ +#define U8_NEXT_OR_FFFD(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, 0xfffd) + +/** @internal */ +#define U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, sub) { \ + (c)=(uint8_t)(s)[(i)++]; \ + if(!U8_IS_SINGLE(c)) { \ + uint8_t __t = 0; \ + if((i)!=(length) && \ + /* fetch/validate/assemble all but last trail byte */ \ + ((c)>=0xe0 ? \ + ((c)<0xf0 ? /* U+0800..U+FFFF except surrogates */ \ + U8_LEAD3_T1_BITS[(c)&=0xf]&(1<<((__t=(s)[i])>>5)) && \ + (__t&=0x3f, 1) \ + : /* U+10000..U+10FFFF */ \ + ((c)-=0xf0)<=4 && \ + U8_LEAD4_T1_BITS[(__t=(s)[i])>>4]&(1<<(c)) && \ + ((c)=((c)<<6)|(__t&0x3f), ++(i)!=(length)) && \ + (__t=(s)[i]-0x80)<=0x3f) && \ + /* valid second-to-last trail byte */ \ + ((c)=((c)<<6)|__t, ++(i)!=(length)) \ + : /* U+0080..U+07FF */ \ + (c)>=0xc2 && ((c)&=0x1f, 1)) && \ + /* last trail byte */ \ + (__t=(s)[i]-0x80)<=0x3f && \ + ((c)=((c)<<6)|__t, ++(i), 1)) { \ + } else { \ + (c)=(sub); /* ill-formed*/ \ + } \ + } \ +} + +/** + * Append a code point to a string, overwriting 1 to 4 bytes. + * The offset points to the current end of the string contents + * and is advanced (post-increment). + * "Unsafe" macro, assumes a valid code point and sufficient space in the string. + * Otherwise, the result is undefined. + * + * @param s const uint8_t * string buffer + * @param i string offset + * @param c code point to append + * @see U8_APPEND + * @stable ICU 2.4 + */ +#define U8_APPEND_UNSAFE(s, i, c) { \ + uint32_t __uc=(c); \ + if(__uc<=0x7f) { \ + (s)[(i)++]=(uint8_t)__uc; \ + } else { \ + if(__uc<=0x7ff) { \ + (s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \ + } else { \ + if(__uc<=0xffff) { \ + (s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \ + } else { \ + (s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \ + (s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \ + } \ + (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \ + } \ + (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \ + } \ +} + +/** + * Append a code point to a string, overwriting 1 to 4 bytes. + * The offset points to the current end of the string contents + * and is advanced (post-increment). + * "Safe" macro, checks for a valid code point. + * If a non-ASCII code point is written, checks for sufficient space in the string. + * If the code point is not valid or trail bytes do not fit, + * then isError is set to TRUE. + * + * @param s const uint8_t * string buffer + * @param i int32_t string offset, must be i<capacity + * @param capacity int32_t size of the string buffer + * @param c UChar32 code point to append + * @param isError output UBool set to TRUE if an error occurs, otherwise not modified + * @see U8_APPEND_UNSAFE + * @stable ICU 2.4 + */ +#define U8_APPEND(s, i, capacity, c, isError) { \ + uint32_t __uc=(c); \ + if(__uc<=0x7f) { \ + (s)[(i)++]=(uint8_t)__uc; \ + } else if(__uc<=0x7ff && (i)+1<(capacity)) { \ + (s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \ + (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \ + } else if((__uc<=0xd7ff || (0xe000<=__uc && __uc<=0xffff)) && (i)+2<(capacity)) { \ + (s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \ + (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \ + (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \ + } else if(0xffff<__uc && __uc<=0x10ffff && (i)+3<(capacity)) { \ + (s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \ + (s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \ + (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \ + (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \ + } else { \ + (isError)=TRUE; \ + } \ +} + +/** + * Advance the string offset from one code point boundary to the next. + * (Post-incrementing iteration.) + * "Unsafe" macro, assumes well-formed UTF-8. + * + * @param s const uint8_t * string + * @param i string offset + * @see U8_FWD_1 + * @stable ICU 2.4 + */ +#define U8_FWD_1_UNSAFE(s, i) { \ + (i)+=1+U8_COUNT_TRAIL_BYTES_UNSAFE((s)[i]); \ +} + +/** + * Advance the string offset from one code point boundary to the next. + * (Post-incrementing iteration.) + * "Safe" macro, checks for illegal sequences and for string boundaries. + * + * The length can be negative for a NUL-terminated string. + * + * @param s const uint8_t * string + * @param i int32_t string offset, must be i<length + * @param length int32_t string length + * @see U8_FWD_1_UNSAFE + * @stable ICU 2.4 + */ +#define U8_FWD_1(s, i, length) { \ + uint8_t __b=(s)[(i)++]; \ + if(U8_IS_LEAD(__b) && (i)!=(length)) { \ + uint8_t __t1=(s)[i]; \ + if((0xe0<=__b && __b<0xf0)) { \ + if(U8_IS_VALID_LEAD3_AND_T1(__b, __t1) && \ + ++(i)!=(length) && U8_IS_TRAIL((s)[i])) { \ + ++(i); \ + } \ + } else if(__b<0xe0) { \ + if(U8_IS_TRAIL(__t1)) { \ + ++(i); \ + } \ + } else /* c>=0xf0 */ { \ + if(U8_IS_VALID_LEAD4_AND_T1(__b, __t1) && \ + ++(i)!=(length) && U8_IS_TRAIL((s)[i]) && \ + ++(i)!=(length) && U8_IS_TRAIL((s)[i])) { \ + ++(i); \ + } \ + } \ + } \ +} + +/** + * Advance the string offset from one code point boundary to the n-th next one, + * i.e., move forward by n code points. + * (Post-incrementing iteration.) + * "Unsafe" macro, assumes well-formed UTF-8. + * + * @param s const uint8_t * string + * @param i string offset + * @param n number of code points to skip + * @see U8_FWD_N + * @stable ICU 2.4 + */ +#define U8_FWD_N_UNSAFE(s, i, n) { \ + int32_t __N=(n); \ + while(__N>0) { \ + U8_FWD_1_UNSAFE(s, i); \ + --__N; \ + } \ +} + +/** + * Advance the string offset from one code point boundary to the n-th next one, + * i.e., move forward by n code points. + * (Post-incrementing iteration.) + * "Safe" macro, checks for illegal sequences and for string boundaries. + * + * The length can be negative for a NUL-terminated string. + * + * @param s const uint8_t * string + * @param i int32_t string offset, must be i<length + * @param length int32_t string length + * @param n number of code points to skip + * @see U8_FWD_N_UNSAFE + * @stable ICU 2.4 + */ +#define U8_FWD_N(s, i, length, n) { \ + int32_t __N=(n); \ + while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \ + U8_FWD_1(s, i, length); \ + --__N; \ + } \ +} + +/** + * Adjust a random-access offset to a code point boundary + * at the start of a code point. + * If the offset points to a UTF-8 trail byte, + * then the offset is moved backward to the corresponding lead byte. + * Otherwise, it is not modified. + * "Unsafe" macro, assumes well-formed UTF-8. + * + * @param s const uint8_t * string + * @param i string offset + * @see U8_SET_CP_START + * @stable ICU 2.4 + */ +#define U8_SET_CP_START_UNSAFE(s, i) { \ + while(U8_IS_TRAIL((s)[i])) { --(i); } \ +} + +/** + * Adjust a random-access offset to a code point boundary + * at the start of a code point. + * If the offset points to a UTF-8 trail byte, + * then the offset is moved backward to the corresponding lead byte. + * Otherwise, it is not modified. + * + * "Safe" macro, checks for illegal sequences and for string boundaries. + * Unlike U8_TRUNCATE_IF_INCOMPLETE(), this macro always reads s[i]. + * + * @param s const uint8_t * string + * @param start int32_t starting string offset (usually 0) + * @param i int32_t string offset, must be start<=i + * @see U8_SET_CP_START_UNSAFE + * @see U8_TRUNCATE_IF_INCOMPLETE + * @stable ICU 2.4 + */ +#define U8_SET_CP_START(s, start, i) { \ + if(U8_IS_TRAIL((s)[(i)])) { \ + (i)=utf8_back1SafeBody(s, start, (i)); \ + } \ +} + +#ifndef U_HIDE_DRAFT_API +/** + * If the string ends with a UTF-8 byte sequence that is valid so far + * but incomplete, then reduce the length of the string to end before + * the lead byte of that incomplete sequence. + * For example, if the string ends with E1 80, the length is reduced by 2. + * + * In all other cases (the string ends with a complete sequence, or it is not + * possible for any further trail byte to extend the trailing sequence) + * the length remains unchanged. + * + * Useful for processing text split across multiple buffers + * (save the incomplete sequence for later) + * and for optimizing iteration + * (check for string length only once per character). + * + * "Safe" macro, checks for illegal sequences and for string boundaries. + * Unlike U8_SET_CP_START(), this macro never reads s[length]. + * + * (In UTF-16, simply check for U16_IS_LEAD(last code unit).) + * + * @param s const uint8_t * string + * @param start int32_t starting string offset (usually 0) + * @param length int32_t string length (usually start<=length) + * @see U8_SET_CP_START + * @draft ICU 61 + */ +#define U8_TRUNCATE_IF_INCOMPLETE(s, start, length) \ + if((length)>(start)) { \ + uint8_t __b1=s[(length)-1]; \ + if(U8_IS_SINGLE(__b1)) { \ + /* common ASCII character */ \ + } else if(U8_IS_LEAD(__b1)) { \ + --(length); \ + } else if(U8_IS_TRAIL(__b1) && ((length)-2)>=(start)) { \ + uint8_t __b2=s[(length)-2]; \ + if(0xe0<=__b2 && __b2<=0xf4) { \ + if(__b2<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(__b2, __b1) : \ + U8_IS_VALID_LEAD4_AND_T1(__b2, __b1)) { \ + (length)-=2; \ + } \ + } else if(U8_IS_TRAIL(__b2) && ((length)-3)>=(start)) { \ + uint8_t __b3=s[(length)-3]; \ + if(0xf0<=__b3 && __b3<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(__b3, __b2)) { \ + (length)-=3; \ + } \ + } \ + } \ + } +#endif // U_HIDE_DRAFT_API + +/* definitions with backward iteration -------------------------------------- */ + +/** + * Move the string offset from one code point boundary to the previous one + * and get the code point between them. + * (Pre-decrementing backward iteration.) + * "Unsafe" macro, assumes well-formed UTF-8. + * + * The input offset may be the same as the string length. + * If the offset is behind a multi-byte sequence, then the macro will read + * the whole sequence. + * If the offset is behind a lead byte, then that itself + * will be returned as the code point. + * The result is undefined if the offset is behind an illegal UTF-8 sequence. + * + * @param s const uint8_t * string + * @param i string offset + * @param c output UChar32 variable + * @see U8_PREV + * @stable ICU 2.4 + */ +#define U8_PREV_UNSAFE(s, i, c) { \ + (c)=(uint8_t)(s)[--(i)]; \ + if(U8_IS_TRAIL(c)) { \ + uint8_t __b, __count=1, __shift=6; \ +\ + /* c is a trail byte */ \ + (c)&=0x3f; \ + for(;;) { \ + __b=(s)[--(i)]; \ + if(__b>=0xc0) { \ + U8_MASK_LEAD_BYTE(__b, __count); \ + (c)|=(UChar32)__b<<__shift; \ + break; \ + } else { \ + (c)|=(UChar32)(__b&0x3f)<<__shift; \ + ++__count; \ + __shift+=6; \ + } \ + } \ + } \ +} + +/** + * Move the string offset from one code point boundary to the previous one + * and get the code point between them. + * (Pre-decrementing backward iteration.) + * "Safe" macro, checks for illegal sequences and for string boundaries. + * + * The input offset may be the same as the string length. + * If the offset is behind a multi-byte sequence, then the macro will read + * the whole sequence. + * If the offset is behind a lead byte, then that itself + * will be returned as the code point. + * If the offset is behind an illegal UTF-8 sequence, then c is set to a negative value. + * + * @param s const uint8_t * string + * @param start int32_t starting string offset (usually 0) + * @param i int32_t string offset, must be start<i + * @param c output UChar32 variable, set to <0 in case of an error + * @see U8_PREV_UNSAFE + * @stable ICU 2.4 + */ +#define U8_PREV(s, start, i, c) { \ + (c)=(uint8_t)(s)[--(i)]; \ + if(!U8_IS_SINGLE(c)) { \ + (c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -1); \ + } \ +} + +/** + * Move the string offset from one code point boundary to the previous one + * and get the code point between them. + * (Pre-decrementing backward iteration.) + * "Safe" macro, checks for illegal sequences and for string boundaries. + * + * The input offset may be the same as the string length. + * If the offset is behind a multi-byte sequence, then the macro will read + * the whole sequence. + * If the offset is behind a lead byte, then that itself + * will be returned as the code point. + * If the offset is behind an illegal UTF-8 sequence, then c is set to U+FFFD. + * + * This macro does not distinguish between a real U+FFFD in the text + * and U+FFFD returned for an ill-formed sequence. + * Use U8_PREV() if that distinction is important. + * + * @param s const uint8_t * string + * @param start int32_t starting string offset (usually 0) + * @param i int32_t string offset, must be start<i + * @param c output UChar32 variable, set to U+FFFD in case of an error + * @see U8_PREV + * @stable ICU 51 + */ +#define U8_PREV_OR_FFFD(s, start, i, c) { \ + (c)=(uint8_t)(s)[--(i)]; \ + if(!U8_IS_SINGLE(c)) { \ + (c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -3); \ + } \ +} + +/** + * Move the string offset from one code point boundary to the previous one. + * (Pre-decrementing backward iteration.) + * The input offset may be the same as the string length. + * "Unsafe" macro, assumes well-formed UTF-8. + * + * @param s const uint8_t * string + * @param i string offset + * @see U8_BACK_1 + * @stable ICU 2.4 + */ +#define U8_BACK_1_UNSAFE(s, i) { \ + while(U8_IS_TRAIL((s)[--(i)])) {} \ +} + +/** + * Move the string offset from one code point boundary to the previous one. + * (Pre-decrementing backward iteration.) + * The input offset may be the same as the string length. + * "Safe" macro, checks for illegal sequences and for string boundaries. + * + * @param s const uint8_t * string + * @param start int32_t starting string offset (usually 0) + * @param i int32_t string offset, must be start<i + * @see U8_BACK_1_UNSAFE + * @stable ICU 2.4 + */ +#define U8_BACK_1(s, start, i) { \ + if(U8_IS_TRAIL((s)[--(i)])) { \ + (i)=utf8_back1SafeBody(s, start, (i)); \ + } \ +} + +/** + * Move the string offset from one code point boundary to the n-th one before it, + * i.e., move backward by n code points. + * (Pre-decrementing backward iteration.) + * The input offset may be the same as the string length. + * "Unsafe" macro, assumes well-formed UTF-8. + * + * @param s const uint8_t * string + * @param i string offset + * @param n number of code points to skip + * @see U8_BACK_N + * @stable ICU 2.4 + */ +#define U8_BACK_N_UNSAFE(s, i, n) { \ + int32_t __N=(n); \ + while(__N>0) { \ + U8_BACK_1_UNSAFE(s, i); \ + --__N; \ + } \ +} + +/** + * Move the string offset from one code point boundary to the n-th one before it, + * i.e., move backward by n code points. + * (Pre-decrementing backward iteration.) + * The input offset may be the same as the string length. + * "Safe" macro, checks for illegal sequences and for string boundaries. + * + * @param s const uint8_t * string + * @param start int32_t index of the start of the string + * @param i int32_t string offset, must be start<i + * @param n number of code points to skip + * @see U8_BACK_N_UNSAFE + * @stable ICU 2.4 + */ +#define U8_BACK_N(s, start, i, n) { \ + int32_t __N=(n); \ + while(__N>0 && (i)>(start)) { \ + U8_BACK_1(s, start, i); \ + --__N; \ + } \ +} + +/** + * Adjust a random-access offset to a code point boundary after a code point. + * If the offset is behind a partial multi-byte sequence, + * then the offset is incremented to behind the whole sequence. + * Otherwise, it is not modified. + * The input offset may be the same as the string length. + * "Unsafe" macro, assumes well-formed UTF-8. + * + * @param s const uint8_t * string + * @param i string offset + * @see U8_SET_CP_LIMIT + * @stable ICU 2.4 + */ +#define U8_SET_CP_LIMIT_UNSAFE(s, i) { \ + U8_BACK_1_UNSAFE(s, i); \ + U8_FWD_1_UNSAFE(s, i); \ +} + +/** + * Adjust a random-access offset to a code point boundary after a code point. + * If the offset is behind a partial multi-byte sequence, + * then the offset is incremented to behind the whole sequence. + * Otherwise, it is not modified. + * The input offset may be the same as the string length. + * "Safe" macro, checks for illegal sequences and for string boundaries. + * + * The length can be negative for a NUL-terminated string. + * + * @param s const uint8_t * string + * @param start int32_t starting string offset (usually 0) + * @param i int32_t string offset, must be start<=i<=length + * @param length int32_t string length + * @see U8_SET_CP_LIMIT_UNSAFE + * @stable ICU 2.4 + */ +#define U8_SET_CP_LIMIT(s, start, i, length) { \ + if((start)<(i) && ((i)<(length) || (length)<0)) { \ + U8_BACK_1(s, start, i); \ + U8_FWD_1(s, i, length); \ + } \ +} + +#endif diff --git a/vendor/icu/include/unicode/utf_old.h b/vendor/icu/include/unicode/utf_old.h new file mode 100644 index 0000000000..ec34494681 --- /dev/null +++ b/vendor/icu/include/unicode/utf_old.h @@ -0,0 +1,1204 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2002-2012, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: utf_old.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2002sep21 +* created by: Markus W. Scherer +*/ + +/** + * \file + * \brief C API: Deprecated macros for Unicode string handling + */ + +/** + * + * The macros in utf_old.h are all deprecated and their use discouraged. + * Some of the design principles behind the set of UTF macros + * have changed or proved impractical. + * Almost all of the old "UTF macros" are at least renamed. + * If you are looking for a new equivalent to an old macro, please see the + * comment at the old one. + * + * Brief summary of reasons for deprecation: + * - Switch on UTF_SIZE (selection of UTF-8/16/32 default string processing) + * was impractical. + * - Switch on UTF_SAFE etc. (selection of unsafe/safe/strict default string processing) + * was of little use and impractical. + * - Whole classes of macros became obsolete outside of the UTF_SIZE/UTF_SAFE + * selection framework: UTF32_ macros (all trivial) + * and UTF_ default and intermediate macros (all aliases). + * - The selection framework also caused many macro aliases. + * - Change in Unicode standard: "irregular" sequences (3.0) became illegal (3.2). + * - Change of language in Unicode standard: + * Growing distinction between internal x-bit Unicode strings and external UTF-x + * forms, with the former more lenient. + * Suggests renaming of UTF16_ macros to U16_. + * - The prefix "UTF_" without a width number confused some users. + * - "Safe" append macros needed the addition of an error indicator output. + * - "Safe" UTF-8 macros used legitimate (if rarely used) code point values + * to indicate error conditions. + * - The use of the "_CHAR" infix for code point operations confused some users. + * + * More details: + * + * Until ICU 2.2, utf.h theoretically allowed to choose among UTF-8/16/32 + * for string processing, and among unsafe/safe/strict default macros for that. + * + * It proved nearly impossible to write non-trivial, high-performance code + * that is UTF-generic. + * Unsafe default macros would be dangerous for default string processing, + * and the main reason for the "strict" versions disappeared: + * Between Unicode 3.0 and 3.2 all "irregular" UTF-8 sequences became illegal. + * The only other conditions that "strict" checked for were non-characters, + * which are valid during processing. Only during text input/output should they + * be checked, and at that time other well-formedness checks may be + * necessary or useful as well. + * This can still be done by using U16_NEXT and U_IS_UNICODE_NONCHAR + * or U_IS_UNICODE_CHAR. + * + * The old UTF8_..._SAFE macros also used some normal Unicode code points + * to indicate malformed sequences. + * The new UTF8_ macros without suffix use negative values instead. + * + * The entire contents of utf32.h was moved here without replacement + * because all those macros were trivial and + * were meaningful only in the framework of choosing the UTF size. + * + * See Jitterbug 2150 and its discussion on the ICU mailing list + * in September 2002. + * + * <hr> + * + * <em>Obsolete part</em> of pre-ICU 2.4 utf.h file documentation: + * + * <p>The original concept for these files was for ICU to allow + * in principle to set which UTF (UTF-8/16/32) is used internally + * by defining UTF_SIZE to either 8, 16, or 32. utf.h would then define the UChar type + * accordingly. UTF-16 was the default.</p> + * + * <p>This concept has been abandoned. + * A lot of the ICU source code assumes UChar strings are in UTF-16. + * This is especially true for low-level code like + * conversion, normalization, and collation. + * The utf.h header enforces the default of UTF-16. + * The UTF-8 and UTF-32 macros remain for now for completeness and backward compatibility.</p> + * + * <p>Accordingly, utf.h defines UChar to be an unsigned 16-bit integer. If this matches wchar_t, then + * UChar is defined to be exactly wchar_t, otherwise uint16_t.</p> + * + * <p>UChar32 is defined to be a signed 32-bit integer (int32_t), large enough for a 21-bit + * Unicode code point (Unicode scalar value, 0..0x10ffff). + * Before ICU 2.4, the definition of UChar32 was similarly platform-dependent as + * the definition of UChar. For details see the documentation for UChar32 itself.</p> + * + * <p>utf.h also defines a number of C macros for handling single Unicode code points and + * for using UTF Unicode strings. It includes utf8.h, utf16.h, and utf32.h for the actual + * implementations of those macros and then aliases one set of them (for UTF-16) for general use. + * The UTF-specific macros have the UTF size in the macro name prefixes (UTF16_...), while + * the general alias macros always begin with UTF_...</p> + * + * <p>Many string operations can be done with or without error checking. + * Where such a distinction is useful, there are two versions of the macros, "unsafe" and "safe" + * ones with ..._UNSAFE and ..._SAFE suffixes. The unsafe macros are fast but may cause + * program failures if the strings are not well-formed. The safe macros have an additional, boolean + * parameter "strict". If strict is FALSE, then only illegal sequences are detected. + * Otherwise, irregular sequences and non-characters are detected as well (like single surrogates). + * Safe macros return special error code points for illegal/irregular sequences: + * Typically, U+ffff, or values that would result in a code unit sequence of the same length + * as the erroneous input sequence.<br> + * Note that _UNSAFE macros have fewer parameters: They do not have the strictness parameter, and + * they do not have start/length parameters for boundary checking.</p> + * + * <p>Here, the macros are aliased in two steps: + * In the first step, the UTF-specific macros with UTF16_ prefix and _UNSAFE and _SAFE suffixes are + * aliased according to the UTF_SIZE to macros with UTF_ prefix and the same suffixes and signatures. + * Then, in a second step, the default, general alias macros are set to use either the unsafe or + * the safe/not strict (default) or the safe/strict macro; + * these general macros do not have a strictness parameter.</p> + * + * <p>It is possible to change the default choice for the general alias macros to be unsafe, safe/not strict or safe/strict. + * The default is safe/not strict. It is not recommended to select the unsafe macros as the basis for + * Unicode string handling in ICU! To select this, define UTF_SAFE, UTF_STRICT, or UTF_UNSAFE.</p> + * + * <p>For general use, one should use the default, general macros with UTF_ prefix and no _SAFE/_UNSAFE suffix. + * Only in some cases it may be necessary to control the choice of macro directly and use a less generic alias. + * For example, if it can be assumed that a string is well-formed and the index will stay within the bounds, + * then the _UNSAFE version may be used. + * If a UTF-8 string is to be processed, then the macros with UTF8_ prefixes need to be used.</p> + * + * <hr> + * + * @deprecated ICU 2.4. Use the macros in utf.h, utf16.h, utf8.h instead. + */ + +#ifndef __UTF_OLD_H__ +#define __UTF_OLD_H__ + +/** + * \def U_HIDE_OBSOLETE_UTF_OLD_H + * + * Hides the obsolete definitions in unicode/utf_old.h. + * Recommended to be set to 1 at compile time to make sure + * the long-deprecated macros are no longer used. + * + * For reasons for the deprecation see the utf_old.h file comments. + * + * @internal + */ +#ifndef U_HIDE_OBSOLETE_UTF_OLD_H +# define U_HIDE_OBSOLETE_UTF_OLD_H 0 +#endif + +#if !defined(U_HIDE_DEPRECATED_API) && !U_HIDE_OBSOLETE_UTF_OLD_H + +#include <unicode/utf.h> +#include <unicode/utf8.h> +#include <unicode/utf16.h> + +/* Formerly utf.h, part 1 --------------------------------------------------- */ + +#ifdef U_USE_UTF_DEPRECATES +/** + * Unicode string and array offset and index type. + * ICU always counts Unicode code units (UChars) for + * string offsets, indexes, and lengths, not Unicode code points. + * + * @obsolete ICU 2.6. Use int32_t directly instead since this API will be removed in that release. + */ +typedef int32_t UTextOffset; +#endif + +/** Number of bits in a Unicode string code unit - ICU uses 16-bit Unicode. @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF_SIZE 16 + +/** + * The default choice for general Unicode string macros is to use the ..._SAFE macro implementations + * with strict=FALSE. + * + * @deprecated ICU 2.4. Obsolete, see utf_old.h. + */ +#define UTF_SAFE +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#undef UTF_UNSAFE +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#undef UTF_STRICT + +/** + * UTF8_ERROR_VALUE_1 and UTF8_ERROR_VALUE_2 are special error values for UTF-8, + * which need 1 or 2 bytes in UTF-8: + * \code + * U+0015 = NAK = Negative Acknowledge, C0 control character + * U+009f = highest C1 control character + * \endcode + * + * These are used by UTF8_..._SAFE macros so that they can return an error value + * that needs the same number of code units (bytes) as were seen by + * a macro. They should be tested with UTF_IS_ERROR() or UTF_IS_VALID(). + * + * @deprecated ICU 2.4. Obsolete, see utf_old.h. + */ +#define UTF8_ERROR_VALUE_1 0x15 + +/** + * See documentation on UTF8_ERROR_VALUE_1 for details. + * + * @deprecated ICU 2.4. Obsolete, see utf_old.h. + */ +#define UTF8_ERROR_VALUE_2 0x9f + +/** + * Error value for all UTFs. This code point value will be set by macros with error + * checking if an error is detected. + * + * @deprecated ICU 2.4. Obsolete, see utf_old.h. + */ +#define UTF_ERROR_VALUE 0xffff + +/** + * Is a given 32-bit code an error value + * as returned by one of the macros for any UTF? + * + * @deprecated ICU 2.4. Obsolete, see utf_old.h. + */ +#define UTF_IS_ERROR(c) \ + (((c)&0xfffe)==0xfffe || (c)==UTF8_ERROR_VALUE_1 || (c)==UTF8_ERROR_VALUE_2) + +/** + * This is a combined macro: Is c a valid Unicode value _and_ not an error code? + * + * @deprecated ICU 2.4. Obsolete, see utf_old.h. + */ +#define UTF_IS_VALID(c) \ + (UTF_IS_UNICODE_CHAR(c) && \ + (c)!=UTF8_ERROR_VALUE_1 && (c)!=UTF8_ERROR_VALUE_2) + +/** + * Is this code unit or code point a surrogate (U+d800..U+dfff)? + * @deprecated ICU 2.4. Renamed to U_IS_SURROGATE and U16_IS_SURROGATE, see utf_old.h. + */ +#define UTF_IS_SURROGATE(uchar) (((uchar)&0xfffff800)==0xd800) + +/** + * Is a given 32-bit code point a Unicode noncharacter? + * + * @deprecated ICU 2.4. Renamed to U_IS_UNICODE_NONCHAR, see utf_old.h. + */ +#define UTF_IS_UNICODE_NONCHAR(c) \ + ((c)>=0xfdd0 && \ + ((uint32_t)(c)<=0xfdef || ((c)&0xfffe)==0xfffe) && \ + (uint32_t)(c)<=0x10ffff) + +/** + * Is a given 32-bit value a Unicode code point value (0..U+10ffff) + * that can be assigned a character? + * + * Code points that are not characters include: + * - single surrogate code points (U+d800..U+dfff, 2048 code points) + * - the last two code points on each plane (U+__fffe and U+__ffff, 34 code points) + * - U+fdd0..U+fdef (new with Unicode 3.1, 32 code points) + * - the highest Unicode code point value is U+10ffff + * + * This means that all code points below U+d800 are character code points, + * and that boundary is tested first for performance. + * + * @deprecated ICU 2.4. Renamed to U_IS_UNICODE_CHAR, see utf_old.h. + */ +#define UTF_IS_UNICODE_CHAR(c) \ + ((uint32_t)(c)<0xd800 || \ + ((uint32_t)(c)>0xdfff && \ + (uint32_t)(c)<=0x10ffff && \ + !UTF_IS_UNICODE_NONCHAR(c))) + +/* Formerly utf8.h ---------------------------------------------------------- */ + +/** +* \var utf8_countTrailBytes +* Internal array with numbers of trail bytes for any given byte used in +* lead byte position. +* +* This is internal since it is not meant to be called directly by external clients; +* however it is called by public macros in this file and thus must remain stable, +* and should not be hidden when other internal functions are hidden (otherwise +* public macros would fail to compile). +* @internal +*/ +#ifdef U_UTF8_IMPL +// No forward declaration if compiling utf_impl.cpp, which defines utf8_countTrailBytes. +#elif defined(U_STATIC_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) +U_CFUNC const uint8_t utf8_countTrailBytes[]; +#else +U_CFUNC U_IMPORT const uint8_t utf8_countTrailBytes[]; /* U_IMPORT2? */ /*U_IMPORT*/ +#endif + +/** + * Count the trail bytes for a UTF-8 lead byte. + * @deprecated ICU 2.4. Renamed to U8_COUNT_TRAIL_BYTES, see utf_old.h. + */ +#define UTF8_COUNT_TRAIL_BYTES(leadByte) (utf8_countTrailBytes[(uint8_t)leadByte]) + +/** + * Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value. + * @deprecated ICU 2.4. Renamed to U8_MASK_LEAD_BYTE, see utf_old.h. + */ +#define UTF8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1) + +/** Is this this code point a single code unit (byte)? @deprecated ICU 2.4. Renamed to U8_IS_SINGLE, see utf_old.h. */ +#define UTF8_IS_SINGLE(uchar) (((uchar)&0x80)==0) +/** Is this this code unit the lead code unit (byte) of a code point? @deprecated ICU 2.4. Renamed to U8_IS_LEAD, see utf_old.h. */ +#define UTF8_IS_LEAD(uchar) ((uint8_t)((uchar)-0xc0)<0x3e) +/** Is this this code unit a trailing code unit (byte) of a code point? @deprecated ICU 2.4. Renamed to U8_IS_TRAIL, see utf_old.h. */ +#define UTF8_IS_TRAIL(uchar) (((uchar)&0xc0)==0x80) + +/** Does this scalar Unicode value need multiple code units for storage? @deprecated ICU 2.4. Use U8_LENGTH or test ((uint32_t)(c)>0x7f) instead, see utf_old.h. */ +#define UTF8_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0x7f) + +/** + * Given the lead character, how many bytes are taken by this code point. + * ICU does not deal with code points >0x10ffff + * unless necessary for advancing in the byte stream. + * + * These length macros take into account that for values >0x10ffff + * the UTF8_APPEND_CHAR_SAFE macros would write the error code point 0xffff + * with 3 bytes. + * Code point comparisons need to be in uint32_t because UChar32 + * may be a signed type, and negative values must be recognized. + * + * @deprecated ICU 2.4. Use U8_LENGTH instead, see utf.h. + */ +#if 1 +# define UTF8_CHAR_LENGTH(c) \ + ((uint32_t)(c)<=0x7f ? 1 : \ + ((uint32_t)(c)<=0x7ff ? 2 : \ + ((uint32_t)((c)-0x10000)>0xfffff ? 3 : 4) \ + ) \ + ) +#else +# define UTF8_CHAR_LENGTH(c) \ + ((uint32_t)(c)<=0x7f ? 1 : \ + ((uint32_t)(c)<=0x7ff ? 2 : \ + ((uint32_t)(c)<=0xffff ? 3 : \ + ((uint32_t)(c)<=0x10ffff ? 4 : \ + ((uint32_t)(c)<=0x3ffffff ? 5 : \ + ((uint32_t)(c)<=0x7fffffff ? 6 : 3) \ + ) \ + ) \ + ) \ + ) \ + ) +#endif + +/** The maximum number of bytes per code point. @deprecated ICU 2.4. Renamed to U8_MAX_LENGTH, see utf_old.h. */ +#define UTF8_MAX_CHAR_LENGTH 4 + +/** Average number of code units compared to UTF-16. @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF8_ARRAY_SIZE(size) ((5*(size))/2) + +/** @deprecated ICU 2.4. Renamed to U8_GET_UNSAFE, see utf_old.h. */ +#define UTF8_GET_CHAR_UNSAFE(s, i, c) { \ + int32_t _utf8_get_char_unsafe_index=(int32_t)(i); \ + UTF8_SET_CHAR_START_UNSAFE(s, _utf8_get_char_unsafe_index); \ + UTF8_NEXT_CHAR_UNSAFE(s, _utf8_get_char_unsafe_index, c); \ +} + +/** @deprecated ICU 2.4. Use U8_GET instead, see utf_old.h. */ +#define UTF8_GET_CHAR_SAFE(s, start, i, length, c, strict) { \ + int32_t _utf8_get_char_safe_index=(int32_t)(i); \ + UTF8_SET_CHAR_START_SAFE(s, start, _utf8_get_char_safe_index); \ + UTF8_NEXT_CHAR_SAFE(s, _utf8_get_char_safe_index, length, c, strict); \ +} + +/** @deprecated ICU 2.4. Renamed to U8_NEXT_UNSAFE, see utf_old.h. */ +#define UTF8_NEXT_CHAR_UNSAFE(s, i, c) { \ + (c)=(s)[(i)++]; \ + if((uint8_t)((c)-0xc0)<0x35) { \ + uint8_t __count=UTF8_COUNT_TRAIL_BYTES(c); \ + UTF8_MASK_LEAD_BYTE(c, __count); \ + switch(__count) { \ + /* each following branch falls through to the next one */ \ + case 3: \ + (c)=((c)<<6)|((s)[(i)++]&0x3f); \ + case 2: \ + (c)=((c)<<6)|((s)[(i)++]&0x3f); \ + case 1: \ + (c)=((c)<<6)|((s)[(i)++]&0x3f); \ + /* no other branches to optimize switch() */ \ + break; \ + } \ + } \ +} + +/** @deprecated ICU 2.4. Renamed to U8_APPEND_UNSAFE, see utf_old.h. */ +#define UTF8_APPEND_CHAR_UNSAFE(s, i, c) { \ + if((uint32_t)(c)<=0x7f) { \ + (s)[(i)++]=(uint8_t)(c); \ + } else { \ + if((uint32_t)(c)<=0x7ff) { \ + (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \ + } else { \ + if((uint32_t)(c)<=0xffff) { \ + (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \ + } else { \ + (s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); \ + (s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); \ + } \ + (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \ + } \ + (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \ + } \ +} + +/** @deprecated ICU 2.4. Renamed to U8_FWD_1_UNSAFE, see utf_old.h. */ +#define UTF8_FWD_1_UNSAFE(s, i) { \ + (i)+=1+UTF8_COUNT_TRAIL_BYTES((s)[i]); \ +} + +/** @deprecated ICU 2.4. Renamed to U8_FWD_N_UNSAFE, see utf_old.h. */ +#define UTF8_FWD_N_UNSAFE(s, i, n) { \ + int32_t __N=(n); \ + while(__N>0) { \ + UTF8_FWD_1_UNSAFE(s, i); \ + --__N; \ + } \ +} + +/** @deprecated ICU 2.4. Renamed to U8_SET_CP_START_UNSAFE, see utf_old.h. */ +#define UTF8_SET_CHAR_START_UNSAFE(s, i) { \ + while(UTF8_IS_TRAIL((s)[i])) { --(i); } \ +} + +/** @deprecated ICU 2.4. Use U8_NEXT instead, see utf_old.h. */ +#define UTF8_NEXT_CHAR_SAFE(s, i, length, c, strict) { \ + (c)=(s)[(i)++]; \ + if((c)>=0x80) { \ + if(UTF8_IS_LEAD(c)) { \ + (c)=utf8_nextCharSafeBody(s, &(i), (int32_t)(length), c, strict); \ + } else { \ + (c)=UTF8_ERROR_VALUE_1; \ + } \ + } \ +} + +/** @deprecated ICU 2.4. Use U8_APPEND instead, see utf_old.h. */ +#define UTF8_APPEND_CHAR_SAFE(s, i, length, c) { \ + if((uint32_t)(c)<=0x7f) { \ + (s)[(i)++]=(uint8_t)(c); \ + } else { \ + (i)=utf8_appendCharSafeBody(s, (int32_t)(i), (int32_t)(length), c, NULL); \ + } \ +} + +/** @deprecated ICU 2.4. Renamed to U8_FWD_1, see utf_old.h. */ +#define UTF8_FWD_1_SAFE(s, i, length) U8_FWD_1(s, i, length) + +/** @deprecated ICU 2.4. Renamed to U8_FWD_N, see utf_old.h. */ +#define UTF8_FWD_N_SAFE(s, i, length, n) U8_FWD_N(s, i, length, n) + +/** @deprecated ICU 2.4. Renamed to U8_SET_CP_START, see utf_old.h. */ +#define UTF8_SET_CHAR_START_SAFE(s, start, i) U8_SET_CP_START(s, start, i) + +/** @deprecated ICU 2.4. Renamed to U8_PREV_UNSAFE, see utf_old.h. */ +#define UTF8_PREV_CHAR_UNSAFE(s, i, c) { \ + (c)=(s)[--(i)]; \ + if(UTF8_IS_TRAIL(c)) { \ + uint8_t __b, __count=1, __shift=6; \ +\ + /* c is a trail byte */ \ + (c)&=0x3f; \ + for(;;) { \ + __b=(s)[--(i)]; \ + if(__b>=0xc0) { \ + UTF8_MASK_LEAD_BYTE(__b, __count); \ + (c)|=(UChar32)__b<<__shift; \ + break; \ + } else { \ + (c)|=(UChar32)(__b&0x3f)<<__shift; \ + ++__count; \ + __shift+=6; \ + } \ + } \ + } \ +} + +/** @deprecated ICU 2.4. Renamed to U8_BACK_1_UNSAFE, see utf_old.h. */ +#define UTF8_BACK_1_UNSAFE(s, i) { \ + while(UTF8_IS_TRAIL((s)[--(i)])) {} \ +} + +/** @deprecated ICU 2.4. Renamed to U8_BACK_N_UNSAFE, see utf_old.h. */ +#define UTF8_BACK_N_UNSAFE(s, i, n) { \ + int32_t __N=(n); \ + while(__N>0) { \ + UTF8_BACK_1_UNSAFE(s, i); \ + --__N; \ + } \ +} + +/** @deprecated ICU 2.4. Renamed to U8_SET_CP_LIMIT_UNSAFE, see utf_old.h. */ +#define UTF8_SET_CHAR_LIMIT_UNSAFE(s, i) { \ + UTF8_BACK_1_UNSAFE(s, i); \ + UTF8_FWD_1_UNSAFE(s, i); \ +} + +/** @deprecated ICU 2.4. Use U8_PREV instead, see utf_old.h. */ +#define UTF8_PREV_CHAR_SAFE(s, start, i, c, strict) { \ + (c)=(s)[--(i)]; \ + if((c)>=0x80) { \ + if((c)<=0xbf) { \ + (c)=utf8_prevCharSafeBody(s, start, &(i), c, strict); \ + } else { \ + (c)=UTF8_ERROR_VALUE_1; \ + } \ + } \ +} + +/** @deprecated ICU 2.4. Renamed to U8_BACK_1, see utf_old.h. */ +#define UTF8_BACK_1_SAFE(s, start, i) U8_BACK_1(s, start, i) + +/** @deprecated ICU 2.4. Renamed to U8_BACK_N, see utf_old.h. */ +#define UTF8_BACK_N_SAFE(s, start, i, n) U8_BACK_N(s, start, i, n) + +/** @deprecated ICU 2.4. Renamed to U8_SET_CP_LIMIT, see utf_old.h. */ +#define UTF8_SET_CHAR_LIMIT_SAFE(s, start, i, length) U8_SET_CP_LIMIT(s, start, i, length) + +/* Formerly utf16.h --------------------------------------------------------- */ + +/** Is uchar a first/lead surrogate? @deprecated ICU 2.4. Renamed to U_IS_LEAD and U16_IS_LEAD, see utf_old.h. */ +#define UTF_IS_FIRST_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xd800) + +/** Is uchar a second/trail surrogate? @deprecated ICU 2.4. Renamed to U_IS_TRAIL and U16_IS_TRAIL, see utf_old.h. */ +#define UTF_IS_SECOND_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xdc00) + +/** Assuming c is a surrogate, is it a first/lead surrogate? @deprecated ICU 2.4. Renamed to U_IS_SURROGATE_LEAD and U16_IS_SURROGATE_LEAD, see utf_old.h. */ +#define UTF_IS_SURROGATE_FIRST(c) (((c)&0x400)==0) + +/** Helper constant for UTF16_GET_PAIR_VALUE. @deprecated ICU 2.4. Renamed to U16_SURROGATE_OFFSET, see utf_old.h. */ +#define UTF_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000) + +/** Get the UTF-32 value from the surrogate code units. @deprecated ICU 2.4. Renamed to U16_GET_SUPPLEMENTARY, see utf_old.h. */ +#define UTF16_GET_PAIR_VALUE(first, second) \ + (((first)<<10UL)+(second)-UTF_SURROGATE_OFFSET) + +/** @deprecated ICU 2.4. Renamed to U16_LEAD, see utf_old.h. */ +#define UTF_FIRST_SURROGATE(supplementary) (UChar)(((supplementary)>>10)+0xd7c0) + +/** @deprecated ICU 2.4. Renamed to U16_TRAIL, see utf_old.h. */ +#define UTF_SECOND_SURROGATE(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00) + +/** @deprecated ICU 2.4. Renamed to U16_LEAD, see utf_old.h. */ +#define UTF16_LEAD(supplementary) UTF_FIRST_SURROGATE(supplementary) + +/** @deprecated ICU 2.4. Renamed to U16_TRAIL, see utf_old.h. */ +#define UTF16_TRAIL(supplementary) UTF_SECOND_SURROGATE(supplementary) + +/** @deprecated ICU 2.4. Renamed to U16_IS_SINGLE, see utf_old.h. */ +#define UTF16_IS_SINGLE(uchar) !UTF_IS_SURROGATE(uchar) + +/** @deprecated ICU 2.4. Renamed to U16_IS_LEAD, see utf_old.h. */ +#define UTF16_IS_LEAD(uchar) UTF_IS_FIRST_SURROGATE(uchar) + +/** @deprecated ICU 2.4. Renamed to U16_IS_TRAIL, see utf_old.h. */ +#define UTF16_IS_TRAIL(uchar) UTF_IS_SECOND_SURROGATE(uchar) + +/** Does this scalar Unicode value need multiple code units for storage? @deprecated ICU 2.4. Use U16_LENGTH or test ((uint32_t)(c)>0xffff) instead, see utf_old.h. */ +#define UTF16_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0xffff) + +/** @deprecated ICU 2.4. Renamed to U16_LENGTH, see utf_old.h. */ +#define UTF16_CHAR_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2) + +/** @deprecated ICU 2.4. Renamed to U16_MAX_LENGTH, see utf_old.h. */ +#define UTF16_MAX_CHAR_LENGTH 2 + +/** Average number of code units compared to UTF-16. @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF16_ARRAY_SIZE(size) (size) + +/** + * Get a single code point from an offset that points to any + * of the code units that belong to that code point. + * Assume 0<=i<length. + * + * This could be used for iteration together with + * UTF16_CHAR_LENGTH() and UTF_IS_ERROR(), + * but the use of UTF16_NEXT_CHAR[_UNSAFE]() and + * UTF16_PREV_CHAR[_UNSAFE]() is more efficient for that. + * @deprecated ICU 2.4. Renamed to U16_GET_UNSAFE, see utf_old.h. + */ +#define UTF16_GET_CHAR_UNSAFE(s, i, c) { \ + (c)=(s)[i]; \ + if(UTF_IS_SURROGATE(c)) { \ + if(UTF_IS_SURROGATE_FIRST(c)) { \ + (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)+1]); \ + } else { \ + (c)=UTF16_GET_PAIR_VALUE((s)[(i)-1], (c)); \ + } \ + } \ +} + +/** @deprecated ICU 2.4. Use U16_GET instead, see utf_old.h. */ +#define UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) { \ + (c)=(s)[i]; \ + if(UTF_IS_SURROGATE(c)) { \ + uint16_t __c2; \ + if(UTF_IS_SURROGATE_FIRST(c)) { \ + if((i)+1<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)+1])) { \ + (c)=UTF16_GET_PAIR_VALUE((c), __c2); \ + /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \ + } else if(strict) {\ + /* unmatched first surrogate */ \ + (c)=UTF_ERROR_VALUE; \ + } \ + } else { \ + if((i)-1>=(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \ + (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \ + /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \ + } else if(strict) {\ + /* unmatched second surrogate */ \ + (c)=UTF_ERROR_VALUE; \ + } \ + } \ + } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \ + (c)=UTF_ERROR_VALUE; \ + } \ +} + +/** @deprecated ICU 2.4. Renamed to U16_NEXT_UNSAFE, see utf_old.h. */ +#define UTF16_NEXT_CHAR_UNSAFE(s, i, c) { \ + (c)=(s)[(i)++]; \ + if(UTF_IS_FIRST_SURROGATE(c)) { \ + (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)++]); \ + } \ +} + +/** @deprecated ICU 2.4. Renamed to U16_APPEND_UNSAFE, see utf_old.h. */ +#define UTF16_APPEND_CHAR_UNSAFE(s, i, c) { \ + if((uint32_t)(c)<=0xffff) { \ + (s)[(i)++]=(uint16_t)(c); \ + } else { \ + (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ + (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ + } \ +} + +/** @deprecated ICU 2.4. Renamed to U16_FWD_1_UNSAFE, see utf_old.h. */ +#define UTF16_FWD_1_UNSAFE(s, i) { \ + if(UTF_IS_FIRST_SURROGATE((s)[(i)++])) { \ + ++(i); \ + } \ +} + +/** @deprecated ICU 2.4. Renamed to U16_FWD_N_UNSAFE, see utf_old.h. */ +#define UTF16_FWD_N_UNSAFE(s, i, n) { \ + int32_t __N=(n); \ + while(__N>0) { \ + UTF16_FWD_1_UNSAFE(s, i); \ + --__N; \ + } \ +} + +/** @deprecated ICU 2.4. Renamed to U16_SET_CP_START_UNSAFE, see utf_old.h. */ +#define UTF16_SET_CHAR_START_UNSAFE(s, i) { \ + if(UTF_IS_SECOND_SURROGATE((s)[i])) { \ + --(i); \ + } \ +} + +/** @deprecated ICU 2.4. Use U16_NEXT instead, see utf_old.h. */ +#define UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) { \ + (c)=(s)[(i)++]; \ + if(UTF_IS_FIRST_SURROGATE(c)) { \ + uint16_t __c2; \ + if((i)<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)])) { \ + ++(i); \ + (c)=UTF16_GET_PAIR_VALUE((c), __c2); \ + /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \ + } else if(strict) {\ + /* unmatched first surrogate */ \ + (c)=UTF_ERROR_VALUE; \ + } \ + } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \ + /* unmatched second surrogate or other non-character */ \ + (c)=UTF_ERROR_VALUE; \ + } \ +} + +/** @deprecated ICU 2.4. Use U16_APPEND instead, see utf_old.h. */ +#define UTF16_APPEND_CHAR_SAFE(s, i, length, c) { \ + if((uint32_t)(c)<=0xffff) { \ + (s)[(i)++]=(uint16_t)(c); \ + } else if((uint32_t)(c)<=0x10ffff) { \ + if((i)+1<(length)) { \ + (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ + (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ + } else /* not enough space */ { \ + (s)[(i)++]=UTF_ERROR_VALUE; \ + } \ + } else /* c>0x10ffff, write error value */ { \ + (s)[(i)++]=UTF_ERROR_VALUE; \ + } \ +} + +/** @deprecated ICU 2.4. Renamed to U16_FWD_1, see utf_old.h. */ +#define UTF16_FWD_1_SAFE(s, i, length) U16_FWD_1(s, i, length) + +/** @deprecated ICU 2.4. Renamed to U16_FWD_N, see utf_old.h. */ +#define UTF16_FWD_N_SAFE(s, i, length, n) U16_FWD_N(s, i, length, n) + +/** @deprecated ICU 2.4. Renamed to U16_SET_CP_START, see utf_old.h. */ +#define UTF16_SET_CHAR_START_SAFE(s, start, i) U16_SET_CP_START(s, start, i) + +/** @deprecated ICU 2.4. Renamed to U16_PREV_UNSAFE, see utf_old.h. */ +#define UTF16_PREV_CHAR_UNSAFE(s, i, c) { \ + (c)=(s)[--(i)]; \ + if(UTF_IS_SECOND_SURROGATE(c)) { \ + (c)=UTF16_GET_PAIR_VALUE((s)[--(i)], (c)); \ + } \ +} + +/** @deprecated ICU 2.4. Renamed to U16_BACK_1_UNSAFE, see utf_old.h. */ +#define UTF16_BACK_1_UNSAFE(s, i) { \ + if(UTF_IS_SECOND_SURROGATE((s)[--(i)])) { \ + --(i); \ + } \ +} + +/** @deprecated ICU 2.4. Renamed to U16_BACK_N_UNSAFE, see utf_old.h. */ +#define UTF16_BACK_N_UNSAFE(s, i, n) { \ + int32_t __N=(n); \ + while(__N>0) { \ + UTF16_BACK_1_UNSAFE(s, i); \ + --__N; \ + } \ +} + +/** @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT_UNSAFE, see utf_old.h. */ +#define UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) { \ + if(UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \ + ++(i); \ + } \ +} + +/** @deprecated ICU 2.4. Use U16_PREV instead, see utf_old.h. */ +#define UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) { \ + (c)=(s)[--(i)]; \ + if(UTF_IS_SECOND_SURROGATE(c)) { \ + uint16_t __c2; \ + if((i)>(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \ + --(i); \ + (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \ + /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \ + } else if(strict) {\ + /* unmatched second surrogate */ \ + (c)=UTF_ERROR_VALUE; \ + } \ + } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \ + /* unmatched first surrogate or other non-character */ \ + (c)=UTF_ERROR_VALUE; \ + } \ +} + +/** @deprecated ICU 2.4. Renamed to U16_BACK_1, see utf_old.h. */ +#define UTF16_BACK_1_SAFE(s, start, i) U16_BACK_1(s, start, i) + +/** @deprecated ICU 2.4. Renamed to U16_BACK_N, see utf_old.h. */ +#define UTF16_BACK_N_SAFE(s, start, i, n) U16_BACK_N(s, start, i, n) + +/** @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT, see utf_old.h. */ +#define UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length) + +/* Formerly utf32.h --------------------------------------------------------- */ + +/* +* Old documentation: +* +* This file defines macros to deal with UTF-32 code units and code points. +* Signatures and semantics are the same as for the similarly named macros +* in utf16.h. +* utf32.h is included by utf.h after unicode/umachine.h</p> +* and some common definitions. +* <p><b>Usage:</b> ICU coding guidelines for if() statements should be followed when using these macros. +* Compound statements (curly braces {}) must be used for if-else-while... +* bodies and all macro statements should be terminated with semicolon.</p> +*/ + +/* internal definitions ----------------------------------------------------- */ + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_IS_SAFE(c, strict) \ + (!(strict) ? \ + (uint32_t)(c)<=0x10ffff : \ + UTF_IS_UNICODE_CHAR(c)) + +/* + * For the semantics of all of these macros, see utf16.h. + * The UTF-32 versions are trivial because any code point is + * encoded using exactly one code unit. + */ + +/* single-code point definitions -------------------------------------------- */ + +/* classes of code unit values */ + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_IS_SINGLE(uchar) 1 +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_IS_LEAD(uchar) 0 +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_IS_TRAIL(uchar) 0 + +/* number of code units per code point */ + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_NEED_MULTIPLE_UCHAR(c) 0 +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_CHAR_LENGTH(c) 1 +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_MAX_CHAR_LENGTH 1 + +/* average number of code units compared to UTF-16 */ + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_ARRAY_SIZE(size) (size) + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_GET_CHAR_UNSAFE(s, i, c) { \ + (c)=(s)[i]; \ +} + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_GET_CHAR_SAFE(s, start, i, length, c, strict) { \ + (c)=(s)[i]; \ + if(!UTF32_IS_SAFE(c, strict)) { \ + (c)=UTF_ERROR_VALUE; \ + } \ +} + +/* definitions with forward iteration --------------------------------------- */ + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_NEXT_CHAR_UNSAFE(s, i, c) { \ + (c)=(s)[(i)++]; \ +} + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_APPEND_CHAR_UNSAFE(s, i, c) { \ + (s)[(i)++]=(c); \ +} + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_FWD_1_UNSAFE(s, i) { \ + ++(i); \ +} + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_FWD_N_UNSAFE(s, i, n) { \ + (i)+=(n); \ +} + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_SET_CHAR_START_UNSAFE(s, i) { \ +} + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_NEXT_CHAR_SAFE(s, i, length, c, strict) { \ + (c)=(s)[(i)++]; \ + if(!UTF32_IS_SAFE(c, strict)) { \ + (c)=UTF_ERROR_VALUE; \ + } \ +} + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_APPEND_CHAR_SAFE(s, i, length, c) { \ + if((uint32_t)(c)<=0x10ffff) { \ + (s)[(i)++]=(c); \ + } else /* c>0x10ffff, write 0xfffd */ { \ + (s)[(i)++]=0xfffd; \ + } \ +} + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_FWD_1_SAFE(s, i, length) { \ + ++(i); \ +} + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_FWD_N_SAFE(s, i, length, n) { \ + if(((i)+=(n))>(length)) { \ + (i)=(length); \ + } \ +} + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_SET_CHAR_START_SAFE(s, start, i) { \ +} + +/* definitions with backward iteration -------------------------------------- */ + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_PREV_CHAR_UNSAFE(s, i, c) { \ + (c)=(s)[--(i)]; \ +} + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_BACK_1_UNSAFE(s, i) { \ + --(i); \ +} + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_BACK_N_UNSAFE(s, i, n) { \ + (i)-=(n); \ +} + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_SET_CHAR_LIMIT_UNSAFE(s, i) { \ +} + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_PREV_CHAR_SAFE(s, start, i, c, strict) { \ + (c)=(s)[--(i)]; \ + if(!UTF32_IS_SAFE(c, strict)) { \ + (c)=UTF_ERROR_VALUE; \ + } \ +} + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_BACK_1_SAFE(s, start, i) { \ + --(i); \ +} + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_BACK_N_SAFE(s, start, i, n) { \ + (i)-=(n); \ + if((i)<(start)) { \ + (i)=(start); \ + } \ +} + +/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */ +#define UTF32_SET_CHAR_LIMIT_SAFE(s, i, length) { \ +} + +/* Formerly utf.h, part 2 --------------------------------------------------- */ + +/** + * Estimate the number of code units for a string based on the number of UTF-16 code units. + * + * @deprecated ICU 2.4. Obsolete, see utf_old.h. + */ +#define UTF_ARRAY_SIZE(size) UTF16_ARRAY_SIZE(size) + +/** @deprecated ICU 2.4. Renamed to U16_GET_UNSAFE, see utf_old.h. */ +#define UTF_GET_CHAR_UNSAFE(s, i, c) UTF16_GET_CHAR_UNSAFE(s, i, c) + +/** @deprecated ICU 2.4. Use U16_GET instead, see utf_old.h. */ +#define UTF_GET_CHAR_SAFE(s, start, i, length, c, strict) UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) + + +/** @deprecated ICU 2.4. Renamed to U16_NEXT_UNSAFE, see utf_old.h. */ +#define UTF_NEXT_CHAR_UNSAFE(s, i, c) UTF16_NEXT_CHAR_UNSAFE(s, i, c) + +/** @deprecated ICU 2.4. Use U16_NEXT instead, see utf_old.h. */ +#define UTF_NEXT_CHAR_SAFE(s, i, length, c, strict) UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) + + +/** @deprecated ICU 2.4. Renamed to U16_APPEND_UNSAFE, see utf_old.h. */ +#define UTF_APPEND_CHAR_UNSAFE(s, i, c) UTF16_APPEND_CHAR_UNSAFE(s, i, c) + +/** @deprecated ICU 2.4. Use U16_APPEND instead, see utf_old.h. */ +#define UTF_APPEND_CHAR_SAFE(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c) + + +/** @deprecated ICU 2.4. Renamed to U16_FWD_1_UNSAFE, see utf_old.h. */ +#define UTF_FWD_1_UNSAFE(s, i) UTF16_FWD_1_UNSAFE(s, i) + +/** @deprecated ICU 2.4. Renamed to U16_FWD_1, see utf_old.h. */ +#define UTF_FWD_1_SAFE(s, i, length) UTF16_FWD_1_SAFE(s, i, length) + + +/** @deprecated ICU 2.4. Renamed to U16_FWD_N_UNSAFE, see utf_old.h. */ +#define UTF_FWD_N_UNSAFE(s, i, n) UTF16_FWD_N_UNSAFE(s, i, n) + +/** @deprecated ICU 2.4. Renamed to U16_FWD_N, see utf_old.h. */ +#define UTF_FWD_N_SAFE(s, i, length, n) UTF16_FWD_N_SAFE(s, i, length, n) + + +/** @deprecated ICU 2.4. Renamed to U16_SET_CP_START_UNSAFE, see utf_old.h. */ +#define UTF_SET_CHAR_START_UNSAFE(s, i) UTF16_SET_CHAR_START_UNSAFE(s, i) + +/** @deprecated ICU 2.4. Renamed to U16_SET_CP_START, see utf_old.h. */ +#define UTF_SET_CHAR_START_SAFE(s, start, i) UTF16_SET_CHAR_START_SAFE(s, start, i) + + +/** @deprecated ICU 2.4. Renamed to U16_PREV_UNSAFE, see utf_old.h. */ +#define UTF_PREV_CHAR_UNSAFE(s, i, c) UTF16_PREV_CHAR_UNSAFE(s, i, c) + +/** @deprecated ICU 2.4. Use U16_PREV instead, see utf_old.h. */ +#define UTF_PREV_CHAR_SAFE(s, start, i, c, strict) UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) + + +/** @deprecated ICU 2.4. Renamed to U16_BACK_1_UNSAFE, see utf_old.h. */ +#define UTF_BACK_1_UNSAFE(s, i) UTF16_BACK_1_UNSAFE(s, i) + +/** @deprecated ICU 2.4. Renamed to U16_BACK_1, see utf_old.h. */ +#define UTF_BACK_1_SAFE(s, start, i) UTF16_BACK_1_SAFE(s, start, i) + + +/** @deprecated ICU 2.4. Renamed to U16_BACK_N_UNSAFE, see utf_old.h. */ +#define UTF_BACK_N_UNSAFE(s, i, n) UTF16_BACK_N_UNSAFE(s, i, n) + +/** @deprecated ICU 2.4. Renamed to U16_BACK_N, see utf_old.h. */ +#define UTF_BACK_N_SAFE(s, start, i, n) UTF16_BACK_N_SAFE(s, start, i, n) + + +/** @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT_UNSAFE, see utf_old.h. */ +#define UTF_SET_CHAR_LIMIT_UNSAFE(s, i) UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) + +/** @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT, see utf_old.h. */ +#define UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length) UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length) + +/* Define default macros (UTF-16 "safe") ------------------------------------ */ + +/** + * Does this code unit alone encode a code point (BMP, not a surrogate)? + * Same as UTF16_IS_SINGLE. + * @deprecated ICU 2.4. Renamed to U_IS_SINGLE and U16_IS_SINGLE, see utf_old.h. + */ +#define UTF_IS_SINGLE(uchar) U16_IS_SINGLE(uchar) + +/** + * Is this code unit the first one of several (a lead surrogate)? + * Same as UTF16_IS_LEAD. + * @deprecated ICU 2.4. Renamed to U_IS_LEAD and U16_IS_LEAD, see utf_old.h. + */ +#define UTF_IS_LEAD(uchar) U16_IS_LEAD(uchar) + +/** + * Is this code unit one of several but not the first one (a trail surrogate)? + * Same as UTF16_IS_TRAIL. + * @deprecated ICU 2.4. Renamed to U_IS_TRAIL and U16_IS_TRAIL, see utf_old.h. + */ +#define UTF_IS_TRAIL(uchar) U16_IS_TRAIL(uchar) + +/** + * Does this code point require multiple code units (is it a supplementary code point)? + * Same as UTF16_NEED_MULTIPLE_UCHAR. + * @deprecated ICU 2.4. Use U16_LENGTH or test ((uint32_t)(c)>0xffff) instead. + */ +#define UTF_NEED_MULTIPLE_UCHAR(c) UTF16_NEED_MULTIPLE_UCHAR(c) + +/** + * How many code units are used to encode this code point (1 or 2)? + * Same as UTF16_CHAR_LENGTH. + * @deprecated ICU 2.4. Renamed to U16_LENGTH, see utf_old.h. + */ +#define UTF_CHAR_LENGTH(c) U16_LENGTH(c) + +/** + * How many code units are used at most for any Unicode code point (2)? + * Same as UTF16_MAX_CHAR_LENGTH. + * @deprecated ICU 2.4. Renamed to U16_MAX_LENGTH, see utf_old.h. + */ +#define UTF_MAX_CHAR_LENGTH U16_MAX_LENGTH + +/** + * Set c to the code point that contains the code unit i. + * i could point to the lead or the trail surrogate for the code point. + * i is not modified. + * Same as UTF16_GET_CHAR. + * \pre 0<=i<length + * + * @deprecated ICU 2.4. Renamed to U16_GET, see utf_old.h. + */ +#define UTF_GET_CHAR(s, start, i, length, c) U16_GET(s, start, i, length, c) + +/** + * Set c to the code point that starts at code unit i + * and advance i to beyond the code units of this code point (post-increment). + * i must point to the first code unit of a code point. + * Otherwise c is set to the trail unit (surrogate) itself. + * Same as UTF16_NEXT_CHAR. + * \pre 0<=i<length + * \post 0<i<=length + * + * @deprecated ICU 2.4. Renamed to U16_NEXT, see utf_old.h. + */ +#define UTF_NEXT_CHAR(s, i, length, c) U16_NEXT(s, i, length, c) + +/** + * Append the code units of code point c to the string at index i + * and advance i to beyond the new code units (post-increment). + * The code units beginning at index i will be overwritten. + * Same as UTF16_APPEND_CHAR. + * \pre 0<=c<=0x10ffff + * \pre 0<=i<length + * \post 0<i<=length + * + * @deprecated ICU 2.4. Use U16_APPEND instead, see utf_old.h. + */ +#define UTF_APPEND_CHAR(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c) + +/** + * Advance i to beyond the code units of the code point that begins at i. + * I.e., advance i by one code point. + * Same as UTF16_FWD_1. + * \pre 0<=i<length + * \post 0<i<=length + * + * @deprecated ICU 2.4. Renamed to U16_FWD_1, see utf_old.h. + */ +#define UTF_FWD_1(s, i, length) U16_FWD_1(s, i, length) + +/** + * Advance i to beyond the code units of the n code points where the first one begins at i. + * I.e., advance i by n code points. + * Same as UT16_FWD_N. + * \pre 0<=i<length + * \post 0<i<=length + * + * @deprecated ICU 2.4. Renamed to U16_FWD_N, see utf_old.h. + */ +#define UTF_FWD_N(s, i, length, n) U16_FWD_N(s, i, length, n) + +/** + * Take the random-access index i and adjust it so that it points to the beginning + * of a code point. + * The input index points to any code unit of a code point and is moved to point to + * the first code unit of the same code point. i is never incremented. + * In other words, if i points to a trail surrogate that is preceded by a matching + * lead surrogate, then i is decremented. Otherwise it is not modified. + * This can be used to start an iteration with UTF_NEXT_CHAR() from a random index. + * Same as UTF16_SET_CHAR_START. + * \pre start<=i<length + * \post start<=i<length + * + * @deprecated ICU 2.4. Renamed to U16_SET_CP_START, see utf_old.h. + */ +#define UTF_SET_CHAR_START(s, start, i) U16_SET_CP_START(s, start, i) + +/** + * Set c to the code point that has code units before i + * and move i backward (towards the beginning of the string) + * to the first code unit of this code point (pre-increment). + * i must point to the first code unit after the last unit of a code point (i==length is allowed). + * Same as UTF16_PREV_CHAR. + * \pre start<i<=length + * \post start<=i<length + * + * @deprecated ICU 2.4. Renamed to U16_PREV, see utf_old.h. + */ +#define UTF_PREV_CHAR(s, start, i, c) U16_PREV(s, start, i, c) + +/** + * Move i backward (towards the beginning of the string) + * to the first code unit of the code point that has code units before i. + * I.e., move i backward by one code point. + * i must point to the first code unit after the last unit of a code point (i==length is allowed). + * Same as UTF16_BACK_1. + * \pre start<i<=length + * \post start<=i<length + * + * @deprecated ICU 2.4. Renamed to U16_BACK_1, see utf_old.h. + */ +#define UTF_BACK_1(s, start, i) U16_BACK_1(s, start, i) + +/** + * Move i backward (towards the beginning of the string) + * to the first code unit of the n code points that have code units before i. + * I.e., move i backward by n code points. + * i must point to the first code unit after the last unit of a code point (i==length is allowed). + * Same as UTF16_BACK_N. + * \pre start<i<=length + * \post start<=i<length + * + * @deprecated ICU 2.4. Renamed to U16_BACK_N, see utf_old.h. + */ +#define UTF_BACK_N(s, start, i, n) U16_BACK_N(s, start, i, n) + +/** + * Take the random-access index i and adjust it so that it points beyond + * a code point. The input index points beyond any code unit + * of a code point and is moved to point beyond the last code unit of the same + * code point. i is never decremented. + * In other words, if i points to a trail surrogate that is preceded by a matching + * lead surrogate, then i is incremented. Otherwise it is not modified. + * This can be used to start an iteration with UTF_PREV_CHAR() from a random index. + * Same as UTF16_SET_CHAR_LIMIT. + * \pre start<i<=length + * \post start<i<=length + * + * @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT, see utf_old.h. + */ +#define UTF_SET_CHAR_LIMIT(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length) + +#endif // !U_HIDE_DEPRECATED_API && !U_HIDE_OBSOLETE_UTF_OLD_H + +#endif diff --git a/vendor/icu/include/unicode/utypes.h b/vendor/icu/include/unicode/utypes.h new file mode 100644 index 0000000000..6e22fb0f92 --- /dev/null +++ b/vendor/icu/include/unicode/utypes.h @@ -0,0 +1,704 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 1996-2016, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* +* FILE NAME : UTYPES.H (formerly ptypes.h) +* +* Date Name Description +* 12/11/96 helena Creation. +* 02/27/97 aliu Added typedefs for UClassID, int8, int16, int32, +* uint8, uint16, and uint32. +* 04/01/97 aliu Added XP_CPLUSPLUS and modified to work under C as +* well as C++. +* Modified to use memcpy() for uprv_arrayCopy() fns. +* 04/14/97 aliu Added TPlatformUtilities. +* 05/07/97 aliu Added import/export specifiers (replacing the old +* broken EXT_CLASS). Added version number for our +* code. Cleaned up header. +* 6/20/97 helena Java class name change. +* 08/11/98 stephen UErrorCode changed from typedef to enum +* 08/12/98 erm Changed T_ANALYTIC_PACKAGE_VERSION to 3 +* 08/14/98 stephen Added uprv_arrayCopy() for int8_t, int16_t, int32_t +* 12/09/98 jfitz Added BUFFER_OVERFLOW_ERROR (bug 1100066) +* 04/20/99 stephen Cleaned up & reworked for autoconf. +* Renamed to utypes.h. +* 05/05/99 stephen Changed to use <inttypes.h> +* 12/07/99 helena Moved copyright notice string from ucnv_bld.h here. +******************************************************************************* +*/ + +#ifndef UTYPES_H +#define UTYPES_H + + +#include <unicode/umachine.h> +#include <unicode/uversion.h> +#include <unicode/uconfig.h> +#include <float.h> + +#if !U_NO_DEFAULT_INCLUDE_UTF_HEADERS +# include "unicode/utf.h" +#endif + +/*! + * \file + * \brief Basic definitions for ICU, for both C and C++ APIs + * + * This file defines basic types, constants, and enumerations directly or + * indirectly by including other header files, especially utf.h for the + * basic character and string definitions and umachine.h for consistent + * integer and other types. + */ + + +/** + * \def U_SHOW_CPLUSPLUS_API + * @internal + */ +#ifdef __cplusplus +# ifndef U_SHOW_CPLUSPLUS_API +# define U_SHOW_CPLUSPLUS_API 1 +# endif +#else +# undef U_SHOW_CPLUSPLUS_API +# define U_SHOW_CPLUSPLUS_API 0 +#endif + +/** @{ API visibility control */ + +/** + * \def U_HIDE_DRAFT_API + * Define this to 1 to request that draft API be "hidden" + * @internal + */ +/** + * \def U_HIDE_INTERNAL_API + * Define this to 1 to request that internal API be "hidden" + * @internal + */ +#if !U_DEFAULT_SHOW_DRAFT && !defined(U_SHOW_DRAFT_API) +#define U_HIDE_DRAFT_API 1 +#endif +#if !U_DEFAULT_SHOW_DRAFT && !defined(U_SHOW_INTERNAL_API) +#define U_HIDE_INTERNAL_API 1 +#endif + +/** @} */ + +/*===========================================================================*/ +/* ICUDATA naming scheme */ +/*===========================================================================*/ + +/** + * \def U_ICUDATA_TYPE_LETTER + * + * This is a platform-dependent string containing one letter: + * - b for big-endian, ASCII-family platforms + * - l for little-endian, ASCII-family platforms + * - e for big-endian, EBCDIC-family platforms + * This letter is part of the common data file name. + * @stable ICU 2.0 + */ + +/** + * \def U_ICUDATA_TYPE_LITLETTER + * The non-string form of U_ICUDATA_TYPE_LETTER + * @stable ICU 2.0 + */ +#if U_CHARSET_FAMILY +# if U_IS_BIG_ENDIAN + /* EBCDIC - should always be BE */ +# define U_ICUDATA_TYPE_LETTER "e" +# define U_ICUDATA_TYPE_LITLETTER e +# else +# error "Don't know what to do with little endian EBCDIC!" +# define U_ICUDATA_TYPE_LETTER "x" +# define U_ICUDATA_TYPE_LITLETTER x +# endif +#else +# if U_IS_BIG_ENDIAN + /* Big-endian ASCII */ +# define U_ICUDATA_TYPE_LETTER "b" +# define U_ICUDATA_TYPE_LITLETTER b +# else + /* Little-endian ASCII */ +# define U_ICUDATA_TYPE_LETTER "l" +# define U_ICUDATA_TYPE_LITLETTER l +# endif +#endif + +/** + * A single string literal containing the icudata stub name. i.e. 'icudt18e' for + * ICU 1.8.x on EBCDIC, etc.. + * @stable ICU 2.0 + */ +#define U_ICUDATA_NAME "icudt" U_ICU_VERSION_SHORT U_ICUDATA_TYPE_LETTER +#ifndef U_HIDE_INTERNAL_API +#define U_USRDATA_NAME "usrdt" U_ICU_VERSION_SHORT U_ICUDATA_TYPE_LETTER /**< @internal */ +#define U_USE_USRDATA 0 /**< @internal */ +#endif /* U_HIDE_INTERNAL_API */ + +/** + * U_ICU_ENTRY_POINT is the name of the DLL entry point to the ICU data library. + * Defined as a literal, not a string. + * Tricky Preprocessor use - ## operator replaces macro parameters with the literal string + * from the corresponding macro invocation, _before_ other macro substitutions. + * Need a nested \#defines to get the actual version numbers rather than + * the literal text U_ICU_VERSION_MAJOR_NUM into the name. + * The net result will be something of the form + * \#define U_ICU_ENTRY_POINT icudt19_dat + * @stable ICU 2.4 + */ +#define U_ICUDATA_ENTRY_POINT U_DEF2_ICUDATA_ENTRY_POINT(U_ICU_VERSION_MAJOR_NUM,U_LIB_SUFFIX_C_NAME) + +#ifndef U_HIDE_INTERNAL_API +/** + * Do not use. Note that it's OK for the 2nd argument to be undefined (literal). + * @internal + */ +#define U_DEF2_ICUDATA_ENTRY_POINT(major,suff) U_DEF_ICUDATA_ENTRY_POINT(major,suff) + +/** + * Do not use. + * @internal + */ +#ifndef U_DEF_ICUDATA_ENTRY_POINT +/* affected by symbol renaming. See platform.h */ +#ifndef U_LIB_SUFFIX_C_NAME +#define U_DEF_ICUDATA_ENTRY_POINT(major, suff) icudt##major##_dat +#else +#define U_DEF_ICUDATA_ENTRY_POINT(major, suff) icudt##suff ## major##_dat +#endif +#endif +#endif /* U_HIDE_INTERNAL_API */ + +/** + * \def NULL + * Define NULL if necessary, to nullptr for C++ and to ((void *)0) for C. + * @stable ICU 2.0 + */ +#ifndef NULL +#ifdef __cplusplus +#define NULL nullptr +#else +#define NULL ((void *)0) +#endif +#endif + +/*===========================================================================*/ +/* Calendar/TimeZone data types */ +/*===========================================================================*/ + +/** + * Date and Time data type. + * This is a primitive data type that holds the date and time + * as the number of milliseconds since 1970-jan-01, 00:00 UTC. + * UTC leap seconds are ignored. + * @stable ICU 2.0 + */ +typedef double UDate; + +/** The number of milliseconds per second @stable ICU 2.0 */ +#define U_MILLIS_PER_SECOND (1000) +/** The number of milliseconds per minute @stable ICU 2.0 */ +#define U_MILLIS_PER_MINUTE (60000) +/** The number of milliseconds per hour @stable ICU 2.0 */ +#define U_MILLIS_PER_HOUR (3600000) +/** The number of milliseconds per day @stable ICU 2.0 */ +#define U_MILLIS_PER_DAY (86400000) + +/** + * Maximum UDate value + * @stable ICU 4.8 + */ +#define U_DATE_MAX DBL_MAX + +/** + * Minimum UDate value + * @stable ICU 4.8 + */ +#define U_DATE_MIN -U_DATE_MAX + +/*===========================================================================*/ +/* Shared library/DLL import-export API control */ +/*===========================================================================*/ + +/* + * Control of symbol import/export. + * ICU is separated into three libraries. + */ + +/** + * \def U_COMBINED_IMPLEMENTATION + * Set to export library symbols from inside the ICU library + * when all of ICU is in a single library. + * This can be set as a compiler option while building ICU, and it + * needs to be the first one tested to override U_COMMON_API, U_I18N_API, etc. + * @stable ICU 2.0 + */ + +/** + * \def U_DATA_API + * Set to export library symbols from inside the stubdata library, + * and to import them from outside. + * @stable ICU 3.0 + */ + +/** + * \def U_COMMON_API + * Set to export library symbols from inside the common library, + * and to import them from outside. + * @stable ICU 2.0 + */ + +/** + * \def U_I18N_API + * Set to export library symbols from inside the i18n library, + * and to import them from outside. + * @stable ICU 2.0 + */ + +/** + * \def U_LAYOUT_API + * Set to export library symbols from inside the layout engine library, + * and to import them from outside. + * @stable ICU 2.0 + */ + +/** + * \def U_LAYOUTEX_API + * Set to export library symbols from inside the layout extensions library, + * and to import them from outside. + * @stable ICU 2.6 + */ + +/** + * \def U_IO_API + * Set to export library symbols from inside the ustdio library, + * and to import them from outside. + * @stable ICU 2.0 + */ + +/** + * \def U_TOOLUTIL_API + * Set to export library symbols from inside the toolutil library, + * and to import them from outside. + * @stable ICU 3.4 + */ + +#if defined(U_COMBINED_IMPLEMENTATION) +#define U_DATA_API U_EXPORT +#define U_COMMON_API U_EXPORT +#define U_I18N_API U_EXPORT +#define U_LAYOUT_API U_EXPORT +#define U_LAYOUTEX_API U_EXPORT +#define U_IO_API U_EXPORT +#define U_TOOLUTIL_API U_EXPORT +#elif defined(U_STATIC_IMPLEMENTATION) +#define U_DATA_API +#define U_COMMON_API +#define U_I18N_API +#define U_LAYOUT_API +#define U_LAYOUTEX_API +#define U_IO_API +#define U_TOOLUTIL_API +#elif defined(U_COMMON_IMPLEMENTATION) +#define U_DATA_API U_IMPORT +#define U_COMMON_API U_EXPORT +#define U_I18N_API U_IMPORT +#define U_LAYOUT_API U_IMPORT +#define U_LAYOUTEX_API U_IMPORT +#define U_IO_API U_IMPORT +#define U_TOOLUTIL_API U_IMPORT +#elif defined(U_I18N_IMPLEMENTATION) +#define U_DATA_API U_IMPORT +#define U_COMMON_API U_IMPORT +#define U_I18N_API U_EXPORT +#define U_LAYOUT_API U_IMPORT +#define U_LAYOUTEX_API U_IMPORT +#define U_IO_API U_IMPORT +#define U_TOOLUTIL_API U_IMPORT +#elif defined(U_LAYOUT_IMPLEMENTATION) +#define U_DATA_API U_IMPORT +#define U_COMMON_API U_IMPORT +#define U_I18N_API U_IMPORT +#define U_LAYOUT_API U_EXPORT +#define U_LAYOUTEX_API U_IMPORT +#define U_IO_API U_IMPORT +#define U_TOOLUTIL_API U_IMPORT +#elif defined(U_LAYOUTEX_IMPLEMENTATION) +#define U_DATA_API U_IMPORT +#define U_COMMON_API U_IMPORT +#define U_I18N_API U_IMPORT +#define U_LAYOUT_API U_IMPORT +#define U_LAYOUTEX_API U_EXPORT +#define U_IO_API U_IMPORT +#define U_TOOLUTIL_API U_IMPORT +#elif defined(U_IO_IMPLEMENTATION) +#define U_DATA_API U_IMPORT +#define U_COMMON_API U_IMPORT +#define U_I18N_API U_IMPORT +#define U_LAYOUT_API U_IMPORT +#define U_LAYOUTEX_API U_IMPORT +#define U_IO_API U_EXPORT +#define U_TOOLUTIL_API U_IMPORT +#elif defined(U_TOOLUTIL_IMPLEMENTATION) +#define U_DATA_API U_IMPORT +#define U_COMMON_API U_IMPORT +#define U_I18N_API U_IMPORT +#define U_LAYOUT_API U_IMPORT +#define U_LAYOUTEX_API U_IMPORT +#define U_IO_API U_IMPORT +#define U_TOOLUTIL_API U_EXPORT +#else +#define U_DATA_API U_IMPORT +#define U_COMMON_API U_IMPORT +#define U_I18N_API U_IMPORT +#define U_LAYOUT_API U_IMPORT +#define U_LAYOUTEX_API U_IMPORT +#define U_IO_API U_IMPORT +#define U_TOOLUTIL_API U_IMPORT +#endif + +/** + * \def U_STANDARD_CPP_NAMESPACE + * Control of C++ Namespace + * @stable ICU 2.0 + */ +#ifdef __cplusplus +#define U_STANDARD_CPP_NAMESPACE :: +#else +#define U_STANDARD_CPP_NAMESPACE +#endif + +/*===========================================================================*/ +/* UErrorCode */ +/*===========================================================================*/ + +/** + * Error code to replace exception handling, so that the code is compatible with all C++ compilers, + * and to use the same mechanism for C and C++. + * + * \par + * ICU functions that take a reference (C++) or a pointer (C) to a UErrorCode + * first test if(U_FAILURE(errorCode)) { return immediately; } + * so that in a chain of such functions the first one that sets an error code + * causes the following ones to not perform any operations. + * + * \par + * Error codes should be tested using U_FAILURE() and U_SUCCESS(). + * @stable ICU 2.0 + */ +typedef enum UErrorCode { + /* The ordering of U_ERROR_INFO_START Vs U_USING_FALLBACK_WARNING looks weird + * and is that way because VC++ debugger displays first encountered constant, + * which is not the what the code is used for + */ + + U_USING_FALLBACK_WARNING = -128, /**< A resource bundle lookup returned a fallback result (not an error) */ + + U_ERROR_WARNING_START = -128, /**< Start of information results (semantically successful) */ + + U_USING_DEFAULT_WARNING = -127, /**< A resource bundle lookup returned a result from the root locale (not an error) */ + + U_SAFECLONE_ALLOCATED_WARNING = -126, /**< A SafeClone operation required allocating memory (informational only) */ + + U_STATE_OLD_WARNING = -125, /**< ICU has to use compatibility layer to construct the service. Expect performance/memory usage degradation. Consider upgrading */ + + U_STRING_NOT_TERMINATED_WARNING = -124,/**< An output string could not be NUL-terminated because output length==destCapacity. */ + + U_SORT_KEY_TOO_SHORT_WARNING = -123, /**< Number of levels requested in getBound is higher than the number of levels in the sort key */ + + U_AMBIGUOUS_ALIAS_WARNING = -122, /**< This converter alias can go to different converter implementations */ + + U_DIFFERENT_UCA_VERSION = -121, /**< ucol_open encountered a mismatch between UCA version and collator image version, so the collator was constructed from rules. No impact to further function */ + + U_PLUGIN_CHANGED_LEVEL_WARNING = -120, /**< A plugin caused a level change. May not be an error, but later plugins may not load. */ + +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal UErrorCode warning value. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + U_ERROR_WARNING_LIMIT, +#endif // U_HIDE_DEPRECATED_API + + U_ZERO_ERROR = 0, /**< No error, no warning. */ + + U_ILLEGAL_ARGUMENT_ERROR = 1, /**< Start of codes indicating failure */ + U_MISSING_RESOURCE_ERROR = 2, /**< The requested resource cannot be found */ + U_INVALID_FORMAT_ERROR = 3, /**< Data format is not what is expected */ + U_FILE_ACCESS_ERROR = 4, /**< The requested file cannot be found */ + U_INTERNAL_PROGRAM_ERROR = 5, /**< Indicates a bug in the library code */ + U_MESSAGE_PARSE_ERROR = 6, /**< Unable to parse a message (message format) */ + U_MEMORY_ALLOCATION_ERROR = 7, /**< Memory allocation error */ + U_INDEX_OUTOFBOUNDS_ERROR = 8, /**< Trying to access the index that is out of bounds */ + U_PARSE_ERROR = 9, /**< Equivalent to Java ParseException */ + U_INVALID_CHAR_FOUND = 10, /**< Character conversion: Unmappable input sequence. In other APIs: Invalid character. */ + U_TRUNCATED_CHAR_FOUND = 11, /**< Character conversion: Incomplete input sequence. */ + U_ILLEGAL_CHAR_FOUND = 12, /**< Character conversion: Illegal input sequence/combination of input units. */ + U_INVALID_TABLE_FORMAT = 13, /**< Conversion table file found, but corrupted */ + U_INVALID_TABLE_FILE = 14, /**< Conversion table file not found */ + U_BUFFER_OVERFLOW_ERROR = 15, /**< A result would not fit in the supplied buffer */ + U_UNSUPPORTED_ERROR = 16, /**< Requested operation not supported in current context */ + U_RESOURCE_TYPE_MISMATCH = 17, /**< an operation is requested over a resource that does not support it */ + U_ILLEGAL_ESCAPE_SEQUENCE = 18, /**< ISO-2022 illegal escape sequence */ + U_UNSUPPORTED_ESCAPE_SEQUENCE = 19, /**< ISO-2022 unsupported escape sequence */ + U_NO_SPACE_AVAILABLE = 20, /**< No space available for in-buffer expansion for Arabic shaping */ + U_CE_NOT_FOUND_ERROR = 21, /**< Currently used only while setting variable top, but can be used generally */ + U_PRIMARY_TOO_LONG_ERROR = 22, /**< User tried to set variable top to a primary that is longer than two bytes */ + U_STATE_TOO_OLD_ERROR = 23, /**< ICU cannot construct a service from this state, as it is no longer supported */ + U_TOO_MANY_ALIASES_ERROR = 24, /**< There are too many aliases in the path to the requested resource. + It is very possible that a circular alias definition has occurred */ + U_ENUM_OUT_OF_SYNC_ERROR = 25, /**< UEnumeration out of sync with underlying collection */ + U_INVARIANT_CONVERSION_ERROR = 26, /**< Unable to convert a UChar* string to char* with the invariant converter. */ + U_INVALID_STATE_ERROR = 27, /**< Requested operation can not be completed with ICU in its current state */ + U_COLLATOR_VERSION_MISMATCH = 28, /**< Collator version is not compatible with the base version */ + U_USELESS_COLLATOR_ERROR = 29, /**< Collator is options only and no base is specified */ + U_NO_WRITE_PERMISSION = 30, /**< Attempt to modify read-only or constant data. */ + +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest standard error code. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + U_STANDARD_ERROR_LIMIT, +#endif // U_HIDE_DEPRECATED_API + + /* + * Error codes in the range 0x10000 0x10100 are reserved for Transliterator. + */ + U_BAD_VARIABLE_DEFINITION=0x10000,/**< Missing '$' or duplicate variable name */ + U_PARSE_ERROR_START = 0x10000, /**< Start of Transliterator errors */ + U_MALFORMED_RULE, /**< Elements of a rule are misplaced */ + U_MALFORMED_SET, /**< A UnicodeSet pattern is invalid*/ + U_MALFORMED_SYMBOL_REFERENCE, /**< UNUSED as of ICU 2.4 */ + U_MALFORMED_UNICODE_ESCAPE, /**< A Unicode escape pattern is invalid*/ + U_MALFORMED_VARIABLE_DEFINITION, /**< A variable definition is invalid */ + U_MALFORMED_VARIABLE_REFERENCE, /**< A variable reference is invalid */ + U_MISMATCHED_SEGMENT_DELIMITERS, /**< UNUSED as of ICU 2.4 */ + U_MISPLACED_ANCHOR_START, /**< A start anchor appears at an illegal position */ + U_MISPLACED_CURSOR_OFFSET, /**< A cursor offset occurs at an illegal position */ + U_MISPLACED_QUANTIFIER, /**< A quantifier appears after a segment close delimiter */ + U_MISSING_OPERATOR, /**< A rule contains no operator */ + U_MISSING_SEGMENT_CLOSE, /**< UNUSED as of ICU 2.4 */ + U_MULTIPLE_ANTE_CONTEXTS, /**< More than one ante context */ + U_MULTIPLE_CURSORS, /**< More than one cursor */ + U_MULTIPLE_POST_CONTEXTS, /**< More than one post context */ + U_TRAILING_BACKSLASH, /**< A dangling backslash */ + U_UNDEFINED_SEGMENT_REFERENCE, /**< A segment reference does not correspond to a defined segment */ + U_UNDEFINED_VARIABLE, /**< A variable reference does not correspond to a defined variable */ + U_UNQUOTED_SPECIAL, /**< A special character was not quoted or escaped */ + U_UNTERMINATED_QUOTE, /**< A closing single quote is missing */ + U_RULE_MASK_ERROR, /**< A rule is hidden by an earlier more general rule */ + U_MISPLACED_COMPOUND_FILTER, /**< A compound filter is in an invalid location */ + U_MULTIPLE_COMPOUND_FILTERS, /**< More than one compound filter */ + U_INVALID_RBT_SYNTAX, /**< A "::id" rule was passed to the RuleBasedTransliterator parser */ + U_INVALID_PROPERTY_PATTERN, /**< UNUSED as of ICU 2.4 */ + U_MALFORMED_PRAGMA, /**< A 'use' pragma is invalid */ + U_UNCLOSED_SEGMENT, /**< A closing ')' is missing */ + U_ILLEGAL_CHAR_IN_SEGMENT, /**< UNUSED as of ICU 2.4 */ + U_VARIABLE_RANGE_EXHAUSTED, /**< Too many stand-ins generated for the given variable range */ + U_VARIABLE_RANGE_OVERLAP, /**< The variable range overlaps characters used in rules */ + U_ILLEGAL_CHARACTER, /**< A special character is outside its allowed context */ + U_INTERNAL_TRANSLITERATOR_ERROR, /**< Internal transliterator system error */ + U_INVALID_ID, /**< A "::id" rule specifies an unknown transliterator */ + U_INVALID_FUNCTION, /**< A "&fn()" rule specifies an unknown transliterator */ +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal Transliterator error code. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + U_PARSE_ERROR_LIMIT, +#endif // U_HIDE_DEPRECATED_API + + /* + * Error codes in the range 0x10100 0x10200 are reserved for the formatting API. + */ + U_UNEXPECTED_TOKEN=0x10100, /**< Syntax error in format pattern */ + U_FMT_PARSE_ERROR_START=0x10100, /**< Start of format library errors */ + U_MULTIPLE_DECIMAL_SEPARATORS, /**< More than one decimal separator in number pattern */ + U_MULTIPLE_DECIMAL_SEPERATORS = U_MULTIPLE_DECIMAL_SEPARATORS, /**< Typo: kept for backward compatibility. Use U_MULTIPLE_DECIMAL_SEPARATORS */ + U_MULTIPLE_EXPONENTIAL_SYMBOLS, /**< More than one exponent symbol in number pattern */ + U_MALFORMED_EXPONENTIAL_PATTERN, /**< Grouping symbol in exponent pattern */ + U_MULTIPLE_PERCENT_SYMBOLS, /**< More than one percent symbol in number pattern */ + U_MULTIPLE_PERMILL_SYMBOLS, /**< More than one permill symbol in number pattern */ + U_MULTIPLE_PAD_SPECIFIERS, /**< More than one pad symbol in number pattern */ + U_PATTERN_SYNTAX_ERROR, /**< Syntax error in format pattern */ + U_ILLEGAL_PAD_POSITION, /**< Pad symbol misplaced in number pattern */ + U_UNMATCHED_BRACES, /**< Braces do not match in message pattern */ + U_UNSUPPORTED_PROPERTY, /**< UNUSED as of ICU 2.4 */ + U_UNSUPPORTED_ATTRIBUTE, /**< UNUSED as of ICU 2.4 */ + U_ARGUMENT_TYPE_MISMATCH, /**< Argument name and argument index mismatch in MessageFormat functions */ + U_DUPLICATE_KEYWORD, /**< Duplicate keyword in PluralFormat */ + U_UNDEFINED_KEYWORD, /**< Undefined Plural keyword */ + U_DEFAULT_KEYWORD_MISSING, /**< Missing DEFAULT rule in plural rules */ + U_DECIMAL_NUMBER_SYNTAX_ERROR, /**< Decimal number syntax error */ + U_FORMAT_INEXACT_ERROR, /**< Cannot format a number exactly and rounding mode is ROUND_UNNECESSARY @stable ICU 4.8 */ +#ifndef U_HIDE_DRAFT_API + U_NUMBER_ARG_OUTOFBOUNDS_ERROR, /**< The argument to a NumberFormatter helper method was out of bounds; the bounds are usually 0 to 999. @draft ICU 61 */ +#endif // U_HIDE_DRAFT_API +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal formatting API error code. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + U_FMT_PARSE_ERROR_LIMIT = 0x10113, +#endif // U_HIDE_DEPRECATED_API + + /* + * Error codes in the range 0x10200 0x102ff are reserved for BreakIterator. + */ + U_BRK_INTERNAL_ERROR=0x10200, /**< An internal error (bug) was detected. */ + U_BRK_ERROR_START=0x10200, /**< Start of codes indicating Break Iterator failures */ + U_BRK_HEX_DIGITS_EXPECTED, /**< Hex digits expected as part of a escaped char in a rule. */ + U_BRK_SEMICOLON_EXPECTED, /**< Missing ';' at the end of a RBBI rule. */ + U_BRK_RULE_SYNTAX, /**< Syntax error in RBBI rule. */ + U_BRK_UNCLOSED_SET, /**< UnicodeSet writing an RBBI rule missing a closing ']'. */ + U_BRK_ASSIGN_ERROR, /**< Syntax error in RBBI rule assignment statement. */ + U_BRK_VARIABLE_REDFINITION, /**< RBBI rule $Variable redefined. */ + U_BRK_MISMATCHED_PAREN, /**< Mis-matched parentheses in an RBBI rule. */ + U_BRK_NEW_LINE_IN_QUOTED_STRING, /**< Missing closing quote in an RBBI rule. */ + U_BRK_UNDEFINED_VARIABLE, /**< Use of an undefined $Variable in an RBBI rule. */ + U_BRK_INIT_ERROR, /**< Initialization failure. Probable missing ICU Data. */ + U_BRK_RULE_EMPTY_SET, /**< Rule contains an empty Unicode Set. */ + U_BRK_UNRECOGNIZED_OPTION, /**< !!option in RBBI rules not recognized. */ + U_BRK_MALFORMED_RULE_TAG, /**< The {nnn} tag on a rule is malformed */ +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal BreakIterator error code. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + U_BRK_ERROR_LIMIT, +#endif // U_HIDE_DEPRECATED_API + + /* + * Error codes in the range 0x10300-0x103ff are reserved for regular expression related errors. + */ + U_REGEX_INTERNAL_ERROR=0x10300, /**< An internal error (bug) was detected. */ + U_REGEX_ERROR_START=0x10300, /**< Start of codes indicating Regexp failures */ + U_REGEX_RULE_SYNTAX, /**< Syntax error in regexp pattern. */ + U_REGEX_INVALID_STATE, /**< RegexMatcher in invalid state for requested operation */ + U_REGEX_BAD_ESCAPE_SEQUENCE, /**< Unrecognized backslash escape sequence in pattern */ + U_REGEX_PROPERTY_SYNTAX, /**< Incorrect Unicode property */ + U_REGEX_UNIMPLEMENTED, /**< Use of regexp feature that is not yet implemented. */ + U_REGEX_MISMATCHED_PAREN, /**< Incorrectly nested parentheses in regexp pattern. */ + U_REGEX_NUMBER_TOO_BIG, /**< Decimal number is too large. */ + U_REGEX_BAD_INTERVAL, /**< Error in {min,max} interval */ + U_REGEX_MAX_LT_MIN, /**< In {min,max}, max is less than min. */ + U_REGEX_INVALID_BACK_REF, /**< Back-reference to a non-existent capture group. */ + U_REGEX_INVALID_FLAG, /**< Invalid value for match mode flags. */ + U_REGEX_LOOK_BEHIND_LIMIT, /**< Look-Behind pattern matches must have a bounded maximum length. */ + U_REGEX_SET_CONTAINS_STRING, /**< Regexps cannot have UnicodeSets containing strings.*/ +#ifndef U_HIDE_DEPRECATED_API + U_REGEX_OCTAL_TOO_BIG, /**< Octal character constants must be <= 0377. @deprecated ICU 54. This error cannot occur. */ +#endif /* U_HIDE_DEPRECATED_API */ + U_REGEX_MISSING_CLOSE_BRACKET=U_REGEX_SET_CONTAINS_STRING+2, /**< Missing closing bracket on a bracket expression. */ + U_REGEX_INVALID_RANGE, /**< In a character range [x-y], x is greater than y. */ + U_REGEX_STACK_OVERFLOW, /**< Regular expression backtrack stack overflow. */ + U_REGEX_TIME_OUT, /**< Maximum allowed match time exceeded */ + U_REGEX_STOPPED_BY_CALLER, /**< Matching operation aborted by user callback fn. */ + U_REGEX_PATTERN_TOO_BIG, /**< Pattern exceeds limits on size or complexity. @stable ICU 55 */ + U_REGEX_INVALID_CAPTURE_GROUP_NAME, /**< Invalid capture group name. @stable ICU 55 */ +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal regular expression error code. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + U_REGEX_ERROR_LIMIT=U_REGEX_STOPPED_BY_CALLER+3, +#endif // U_HIDE_DEPRECATED_API + + /* + * Error codes in the range 0x10400-0x104ff are reserved for IDNA related error codes. + */ + U_IDNA_PROHIBITED_ERROR=0x10400, + U_IDNA_ERROR_START=0x10400, + U_IDNA_UNASSIGNED_ERROR, + U_IDNA_CHECK_BIDI_ERROR, + U_IDNA_STD3_ASCII_RULES_ERROR, + U_IDNA_ACE_PREFIX_ERROR, + U_IDNA_VERIFICATION_ERROR, + U_IDNA_LABEL_TOO_LONG_ERROR, + U_IDNA_ZERO_LENGTH_LABEL_ERROR, + U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR, +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal IDNA error code. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + U_IDNA_ERROR_LIMIT, +#endif // U_HIDE_DEPRECATED_API + /* + * Aliases for StringPrep + */ + U_STRINGPREP_PROHIBITED_ERROR = U_IDNA_PROHIBITED_ERROR, + U_STRINGPREP_UNASSIGNED_ERROR = U_IDNA_UNASSIGNED_ERROR, + U_STRINGPREP_CHECK_BIDI_ERROR = U_IDNA_CHECK_BIDI_ERROR, + + /* + * Error codes in the range 0x10500-0x105ff are reserved for Plugin related error codes. + */ + U_PLUGIN_ERROR_START=0x10500, /**< Start of codes indicating plugin failures */ + U_PLUGIN_TOO_HIGH=0x10500, /**< The plugin's level is too high to be loaded right now. */ + U_PLUGIN_DIDNT_SET_LEVEL, /**< The plugin didn't call uplug_setPlugLevel in response to a QUERY */ +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal plug-in error code. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + U_PLUGIN_ERROR_LIMIT, +#endif // U_HIDE_DEPRECATED_API + +#ifndef U_HIDE_DEPRECATED_API + /** + * One more than the highest normal error code. + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + U_ERROR_LIMIT=U_PLUGIN_ERROR_LIMIT +#endif // U_HIDE_DEPRECATED_API +} UErrorCode; + +/* Use the following to determine if an UErrorCode represents */ +/* operational success or failure. */ + +#ifdef __cplusplus + /** + * Does the error code indicate success? + * @stable ICU 2.0 + */ + static + inline UBool U_SUCCESS(UErrorCode code) { return (UBool)(code<=U_ZERO_ERROR); } + /** + * Does the error code indicate a failure? + * @stable ICU 2.0 + */ + static + inline UBool U_FAILURE(UErrorCode code) { return (UBool)(code>U_ZERO_ERROR); } +#else + /** + * Does the error code indicate success? + * @stable ICU 2.0 + */ +# define U_SUCCESS(x) ((x)<=U_ZERO_ERROR) + /** + * Does the error code indicate a failure? + * @stable ICU 2.0 + */ +# define U_FAILURE(x) ((x)>U_ZERO_ERROR) +#endif + +/** + * Return a string for a UErrorCode value. + * The string will be the same as the name of the error code constant + * in the UErrorCode enum above. + * @stable ICU 2.0 + */ +U_STABLE const char * U_EXPORT2 +u_errorName(UErrorCode code); + + +#endif /* _UTYPES */ diff --git a/vendor/icu/include/unicode/uvernum.h b/vendor/icu/include/unicode/uvernum.h new file mode 100644 index 0000000000..0427bcb03d --- /dev/null +++ b/vendor/icu/include/unicode/uvernum.h @@ -0,0 +1,178 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2000-2016, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* +* file name: uvernum.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* Created by: Vladimir Weinstein +* Updated by: Steven R. Loomis +* +*/ + +/** + * \file + * \brief C API: definitions of ICU version numbers + * + * This file is included by uversion.h and other files. This file contains only + * macros and definitions. The actual version numbers are defined here. + */ + + /* + * IMPORTANT: When updating version, the following things need to be done: + * source/common/unicode/uvernum.h - this file: update major, minor, + * patchlevel, suffix, version, short version constants, namespace, + * renaming macro, and copyright + * + * The following files need to be updated as well, which can be done + * by running the UNIX makefile target 'update-windows-makefiles' in icu/source. + * + * + * source/common/common.vcxproj - update 'Output file name' on the link tab so + * that it contains the new major/minor combination + * source/i18n/i18n.vcxproj - same as for the common.vcxproj + * source/layoutex/layoutex.vcproj - same + * source/stubdata/stubdata.vcproj - same as for the common.vcxproj + * source/io/io.vcproj - same as for the common.vcxproj + * source/data/makedata.mak - change U_ICUDATA_NAME so that it contains + * the new major/minor combination and the Unicode version. + */ + +#ifndef UVERNUM_H +#define UVERNUM_H + +/** The standard copyright notice that gets compiled into each library. + * This value will change in the subsequent releases of ICU + * @stable ICU 2.4 + */ +#define U_COPYRIGHT_STRING \ + " Copyright (C) 2016 and later: Unicode, Inc. and others. License & terms of use: http://www.unicode.org/copyright.html " + +/** The current ICU major version as an integer. + * This value will change in the subsequent releases of ICU + * @stable ICU 2.4 + */ +#define U_ICU_VERSION_MAJOR_NUM 61 + +/** The current ICU minor version as an integer. + * This value will change in the subsequent releases of ICU + * @stable ICU 2.6 + */ +#define U_ICU_VERSION_MINOR_NUM 1 + +/** The current ICU patchlevel version as an integer. + * This value will change in the subsequent releases of ICU + * @stable ICU 2.4 + */ +#define U_ICU_VERSION_PATCHLEVEL_NUM 0 + +/** The current ICU build level version as an integer. + * This value is for use by ICU clients. It defaults to 0. + * @stable ICU 4.0 + */ +#ifndef U_ICU_VERSION_BUILDLEVEL_NUM +#define U_ICU_VERSION_BUILDLEVEL_NUM 0 +#endif + +/** Glued version suffix for renamers + * This value will change in the subsequent releases of ICU + * @stable ICU 2.6 + */ +#define U_ICU_VERSION_SUFFIX _61 + +/** + * \def U_DEF2_ICU_ENTRY_POINT_RENAME + * @internal + */ +/** + * \def U_DEF_ICU_ENTRY_POINT_RENAME + * @internal + */ +/** Glued version suffix function for renamers + * This value will change in the subsequent releases of ICU. + * If a custom suffix (such as matching library suffixes) is desired, this can be modified. + * Note that if present, platform.h may contain an earlier definition of this macro. + * \def U_ICU_ENTRY_POINT_RENAME + * @stable ICU 4.2 + */ + +#ifndef U_ICU_ENTRY_POINT_RENAME +#ifdef U_HAVE_LIB_SUFFIX +#define U_DEF_ICU_ENTRY_POINT_RENAME(x,y,z) x ## y ## z +#define U_DEF2_ICU_ENTRY_POINT_RENAME(x,y,z) U_DEF_ICU_ENTRY_POINT_RENAME(x,y,z) +#define U_ICU_ENTRY_POINT_RENAME(x) U_DEF2_ICU_ENTRY_POINT_RENAME(x,U_ICU_VERSION_SUFFIX,U_LIB_SUFFIX_C_NAME) +#else +#define U_DEF_ICU_ENTRY_POINT_RENAME(x,y) x ## y +#define U_DEF2_ICU_ENTRY_POINT_RENAME(x,y) U_DEF_ICU_ENTRY_POINT_RENAME(x,y) +#define U_ICU_ENTRY_POINT_RENAME(x) U_DEF2_ICU_ENTRY_POINT_RENAME(x,U_ICU_VERSION_SUFFIX) +#endif +#endif + +/** The current ICU library version as a dotted-decimal string. The patchlevel + * only appears in this string if it non-zero. + * This value will change in the subsequent releases of ICU + * @stable ICU 2.4 + */ +#define U_ICU_VERSION "61.1" + +/** + * The current ICU library major version number as a string, for library name suffixes. + * This value will change in subsequent releases of ICU. + * + * Until ICU 4.8, this was the combination of the single-digit major and minor ICU version numbers + * into one string without dots ("48"). + * Since ICU 49, it is the double-digit major ICU version number. + * See http://userguide.icu-project.org/design#TOC-Version-Numbers-in-ICU + * + * @stable ICU 2.6 + */ +#define U_ICU_VERSION_SHORT "61" + +#ifndef U_HIDE_INTERNAL_API +/** Data version in ICU4C. + * @internal ICU 4.4 Internal Use Only + **/ +#define U_ICU_DATA_VERSION "61.1" +#endif /* U_HIDE_INTERNAL_API */ + +/*=========================================================================== + * ICU collation framework version information + * Version info that can be obtained from a collator is affected by these + * numbers in a secret and magic way. Please use collator version as whole + *=========================================================================== + */ + +/** + * Collation runtime version (sort key generator, strcoll). + * If the version is different, sort keys for the same string could be different. + * This value may change in subsequent releases of ICU. + * @stable ICU 2.4 + */ +#define UCOL_RUNTIME_VERSION 9 + +/** + * Collation builder code version. + * When this is different, the same tailoring might result + * in assigning different collation elements to code points. + * This value may change in subsequent releases of ICU. + * @stable ICU 2.4 + */ +#define UCOL_BUILDER_VERSION 9 + +#ifndef U_HIDE_DEPRECATED_API +/** + * Constant 1. + * This was intended to be the version of collation tailorings, + * but instead the tailoring data carries a version number. + * @deprecated ICU 54 + */ +#define UCOL_TAILORINGS_VERSION 1 +#endif /* U_HIDE_DEPRECATED_API */ + +#endif diff --git a/vendor/icu/include/unicode/uversion.h b/vendor/icu/include/unicode/uversion.h new file mode 100644 index 0000000000..e40070ec1f --- /dev/null +++ b/vendor/icu/include/unicode/uversion.h @@ -0,0 +1,201 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2000-2011, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* +* file name: uversion.h +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* Created by: Vladimir Weinstein +* +* Gets included by utypes.h and Windows .rc files +*/ + +/** + * \file + * \brief C API: API for accessing ICU version numbers. + */ +/*===========================================================================*/ +/* Main ICU version information */ +/*===========================================================================*/ + +#ifndef UVERSION_H +#define UVERSION_H + +#include <unicode/umachine.h> + +/* Actual version info lives in uvernum.h */ +#include <unicode/uvernum.h> + +/** Maximum length of the copyright string. + * @stable ICU 2.4 + */ +#define U_COPYRIGHT_STRING_LENGTH 128 + +/** An ICU version consists of up to 4 numbers from 0..255. + * @stable ICU 2.4 + */ +#define U_MAX_VERSION_LENGTH 4 + +/** In a string, ICU version fields are delimited by dots. + * @stable ICU 2.4 + */ +#define U_VERSION_DELIMITER '.' + +/** The maximum length of an ICU version string. + * @stable ICU 2.4 + */ +#define U_MAX_VERSION_STRING_LENGTH 20 + +/** The binary form of a version on ICU APIs is an array of 4 uint8_t. + * To compare two versions, use memcmp(v1,v2,sizeof(UVersionInfo)). + * @stable ICU 2.4 + */ +typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH]; + +/*===========================================================================*/ +/* C++ namespace if supported. Versioned unless versioning is disabled. */ +/*===========================================================================*/ + +/** + * \def U_NAMESPACE_BEGIN + * This is used to begin a declaration of a public ICU C++ API. + * When not compiling for C++, it does nothing. + * When compiling for C++, it begins an extern "C++" linkage block (to protect + * against cases in which an external client includes ICU header files inside + * an extern "C" linkage block). + * + * It also begins a versioned-ICU-namespace block. + * @stable ICU 2.4 + */ + +/** + * \def U_NAMESPACE_END + * This is used to end a declaration of a public ICU C++ API. + * When not compiling for C++, it does nothing. + * When compiling for C++, it ends the extern "C++" block begun by + * U_NAMESPACE_BEGIN. + * + * It also ends the versioned-ICU-namespace block begun by U_NAMESPACE_BEGIN. + * @stable ICU 2.4 + */ + +/** + * \def U_NAMESPACE_USE + * This is used to specify that the rest of the code uses the + * public ICU C++ API namespace. + * This is invoked by default; we recommend that you turn it off: + * See the "Recommended Build Options" section of the ICU4C readme + * (http://source.icu-project.org/repos/icu/icu/trunk/readme.html#RecBuild) + * @stable ICU 2.4 + */ + +/** + * \def U_NAMESPACE_QUALIFIER + * This is used to qualify that a function or class is part of + * the public ICU C++ API namespace. + * + * This macro is unnecessary since ICU 49 requires namespace support. + * You can just use "icu::" instead. + * @stable ICU 2.4 + */ + +/* Define C++ namespace symbols. */ +#ifdef __cplusplus +# if U_DISABLE_RENAMING +# define U_ICU_NAMESPACE icu + namespace U_ICU_NAMESPACE { } +# else +# define U_ICU_NAMESPACE U_ICU_ENTRY_POINT_RENAME(icu) + namespace U_ICU_NAMESPACE { } + namespace icu = U_ICU_NAMESPACE; +# endif + +# define U_NAMESPACE_BEGIN extern "C++" { namespace U_ICU_NAMESPACE { +# define U_NAMESPACE_END } } +# define U_NAMESPACE_USE using namespace U_ICU_NAMESPACE; +# define U_NAMESPACE_QUALIFIER U_ICU_NAMESPACE:: + +# ifndef U_USING_ICU_NAMESPACE +# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || \ + defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) || \ + defined(U_LAYOUTEX_IMPLEMENTATION) || defined(U_TOOLUTIL_IMPLEMENTATION) +# define U_USING_ICU_NAMESPACE 0 +# else +# define U_USING_ICU_NAMESPACE 0 +# endif +# endif +# if U_USING_ICU_NAMESPACE + U_NAMESPACE_USE +# endif +#else +# define U_NAMESPACE_BEGIN +# define U_NAMESPACE_END +# define U_NAMESPACE_USE +# define U_NAMESPACE_QUALIFIER +#endif + +/*===========================================================================*/ +/* General version helper functions. Definitions in putil.c */ +/*===========================================================================*/ + +/** + * Parse a string with dotted-decimal version information and + * fill in a UVersionInfo structure with the result. + * Definition of this function lives in putil.c + * + * @param versionArray The destination structure for the version information. + * @param versionString A string with dotted-decimal version information, + * with up to four non-negative number fields with + * values of up to 255 each. + * @stable ICU 2.4 + */ +U_STABLE void U_EXPORT2 +u_versionFromString(UVersionInfo versionArray, const char *versionString); + +/** + * Parse a Unicode string with dotted-decimal version information and + * fill in a UVersionInfo structure with the result. + * Definition of this function lives in putil.c + * + * @param versionArray The destination structure for the version information. + * @param versionString A Unicode string with dotted-decimal version + * information, with up to four non-negative number + * fields with values of up to 255 each. + * @stable ICU 4.2 + */ +U_STABLE void U_EXPORT2 +u_versionFromUString(UVersionInfo versionArray, const UChar *versionString); + + +/** + * Write a string with dotted-decimal version information according + * to the input UVersionInfo. + * Definition of this function lives in putil.c + * + * @param versionArray The version information to be written as a string. + * @param versionString A string buffer that will be filled in with + * a string corresponding to the numeric version + * information in versionArray. + * The buffer size must be at least U_MAX_VERSION_STRING_LENGTH. + * @stable ICU 2.4 + */ +U_STABLE void U_EXPORT2 +u_versionToString(const UVersionInfo versionArray, char *versionString); + +/** + * Gets the ICU release version. The version array stores the version information + * for ICU. For example, release "1.3.31.2" is then represented as 0x01031F02. + * Definition of this function lives in putil.c + * + * @param versionArray the version # information, the result will be filled in + * @stable ICU 2.0 + */ +U_STABLE void U_EXPORT2 +u_getVersion(UVersionInfo versionArray); +#endif |