diff options
author | Lorry Tar Creator <lorry-tar-importer@baserock.org> | 2012-10-26 16:25:44 +0000 |
---|---|---|
committer | <> | 2012-11-12 12:15:52 +0000 |
commit | 58ed4748338f9466599adfc8a9171280ed99e23f (patch) | |
tree | 02027d99ded4fb56a64aa9489ac2eb487e7858ab /include/iprt/uni.h | |
download | VirtualBox-58ed4748338f9466599adfc8a9171280ed99e23f.tar.gz |
Imported from /home/lorry/working-area/delta_VirtualBox/VirtualBox-4.2.4.tar.bz2.VirtualBox-4.2.4
Diffstat (limited to 'include/iprt/uni.h')
-rw-r--r-- | include/iprt/uni.h | 397 |
1 files changed, 397 insertions, 0 deletions
diff --git a/include/iprt/uni.h b/include/iprt/uni.h new file mode 100644 index 00000000..6f0c23ec --- /dev/null +++ b/include/iprt/uni.h @@ -0,0 +1,397 @@ +/** @file + * IPRT - Unicode Code Points. + */ + +/* + * Copyright (C) 2006-2007 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL) only, as it comes in the "COPYING.CDDL" file of the + * VirtualBox OSE distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + */ + +#ifndef ___iprt_uni_h +#define ___iprt_uni_h + +/** @defgroup grp_rt_uni RTUniCp - Unicode Code Points + * @ingroup grp_rt + * @{ + */ + +/** @def RTUNI_USE_WCTYPE + * Define RTUNI_USE_WCTYPE to not use the IPRT unicode data but the + * data which the C runtime library provides. */ +#ifdef DOXYGEN_RUNNING +# define RTUNI_USE_WCTYPE +#endif + +#include <iprt/types.h> +#ifdef RTUNI_USE_WCTYPE +# include <wctype.h> +#endif + +RT_C_DECLS_BEGIN + + + +#ifndef RTUNI_USE_WCTYPE +/** + * A unicode flags range. + * @internal + */ +typedef struct RTUNIFLAGSRANGE +{ + /** The first code point of the range. */ + RTUNICP BeginCP; + /** The last + 1 code point of the range. */ + RTUNICP EndCP; + /** Pointer to the array of case folded code points. */ + const uint8_t *pafFlags; +} RTUNIFLAGSRANGE; +/** Pointer to a flags range. + * @internal */ +typedef RTUNIFLAGSRANGE *PRTUNIFLAGSRANGE; +/** Pointer to a const flags range. + * @internal */ +typedef const RTUNIFLAGSRANGE *PCRTUNIFLAGSRANGE; + +/** + * A unicode case folded range. + * @internal + */ +typedef struct RTUNICASERANGE +{ + /** The first code point of the range. */ + RTUNICP BeginCP; + /** The last + 1 code point of the range. */ + RTUNICP EndCP; + /** Pointer to the array of case folded code points. */ + PCRTUNICP paFoldedCPs; +} RTUNICASERANGE; +/** Pointer to a case folded range. + * @internal */ +typedef RTUNICASERANGE *PRTUNICASERANGE; +/** Pointer to a const case folded range. + * @internal */ +typedef const RTUNICASERANGE *PCRTUNICASERANGE; + +/** @name Unicode Code Point Flags. + * @internal + * @{ */ +#define RTUNI_UPPER RT_BIT(0) +#define RTUNI_LOWER RT_BIT(1) +#define RTUNI_ALPHA RT_BIT(2) +#define RTUNI_XDIGIT RT_BIT(3) +#define RTUNI_DDIGIT RT_BIT(4) +#define RTUNI_WSPACE RT_BIT(5) +/*#define RTUNI_BSPACE RT_BIT(6) - later */ +/** When set, the codepoint requires further checking wrt NFC and NFD + * normalization. I.e. set when either of QC_NFD and QC_NFC are not Y. */ +#define RTUNI_QC_NFX RT_BIT(7) +/** @} */ + + +/** + * Array of flags ranges. + * @internal + */ +extern RTDATADECL(const RTUNIFLAGSRANGE) g_aRTUniFlagsRanges[]; + +/** + * Gets the flags for a unicode code point. + * + * @returns The flag mask. (RTUNI_*) + * @param CodePoint The unicode code point. + * @internal + */ +DECLINLINE(RTUNICP) rtUniCpFlags(RTUNICP CodePoint) +{ + PCRTUNIFLAGSRANGE pCur = &g_aRTUniFlagsRanges[0]; + do + { + if (pCur->EndCP > CodePoint) + { + if (pCur->BeginCP <= CodePoint) + return pCur->pafFlags[CodePoint - pCur->BeginCP]; + break; + } + pCur++; + } while (pCur->EndCP != RTUNICP_MAX); + return 0; +} + + +/** + * Checks if a unicode code point is upper case. + * + * @returns true if it is. + * @returns false if it isn't. + * @param CodePoint The code point. + */ +DECLINLINE(bool) RTUniCpIsUpper(RTUNICP CodePoint) +{ + return (rtUniCpFlags(CodePoint) & RTUNI_UPPER) != 0; +} + + +/** + * Checks if a unicode code point is lower case. + * + * @returns true if it is. + * @returns false if it isn't. + * @param CodePoint The code point. + */ +DECLINLINE(bool) RTUniCpIsLower(RTUNICP CodePoint) +{ + return (rtUniCpFlags(CodePoint) & RTUNI_LOWER) != 0; +} + + +/** + * Checks if a unicode code point is alphabetic. + * + * @returns true if it is. + * @returns false if it isn't. + * @param CodePoint The code point. + */ +DECLINLINE(bool) RTUniCpIsAlphabetic(RTUNICP CodePoint) +{ + return (rtUniCpFlags(CodePoint) & RTUNI_ALPHA) != 0; +} + + +/** + * Checks if a unicode code point is a decimal digit. + * + * @returns true if it is. + * @returns false if it isn't. + * @param CodePoint The code point. + */ +DECLINLINE(bool) RTUniCpIsDecDigit(RTUNICP CodePoint) +{ + return (rtUniCpFlags(CodePoint) & RTUNI_DDIGIT) != 0; +} + + +/** + * Checks if a unicode code point is a hexadecimal digit. + * + * @returns true if it is. + * @returns false if it isn't. + * @param CodePoint The code point. + */ +DECLINLINE(bool) RTUniCpIsHexDigit(RTUNICP CodePoint) +{ + return (rtUniCpFlags(CodePoint) & RTUNI_XDIGIT) != 0; +} + + +/** + * Checks if a unicode code point is white space. + * + * @returns true if it is. + * @returns false if it isn't. + * @param CodePoint The code point. + */ +DECLINLINE(bool) RTUniCpIsSpace(RTUNICP CodePoint) +{ + return (rtUniCpFlags(CodePoint) & RTUNI_WSPACE) != 0; +} + + + +/** + * Array of uppercase ranges. + * @internal + */ +extern RTDATADECL(const RTUNICASERANGE) g_aRTUniUpperRanges[]; + +/** + * Array of lowercase ranges. + * @internal + */ +extern RTDATADECL(const RTUNICASERANGE) g_aRTUniLowerRanges[]; + + +/** + * Folds a unicode code point using the specified range array. + * + * @returns FOlded code point. + * @param CodePoint The unicode code point to fold. + * @param pCur The case folding range to use. + */ +DECLINLINE(RTUNICP) rtUniCpFold(RTUNICP CodePoint, PCRTUNICASERANGE pCur) +{ + do + { + if (pCur->EndCP > CodePoint) + { + if (pCur->BeginCP <= CodePoint) + CodePoint = pCur->paFoldedCPs[CodePoint - pCur->BeginCP]; + break; + } + pCur++; + } while (pCur->EndCP != RTUNICP_MAX); + return CodePoint; +} + + +/** + * Folds a unicode code point to upper case. + * + * @returns Folded code point. + * @param CodePoint The unicode code point to fold. + */ +DECLINLINE(RTUNICP) RTUniCpToUpper(RTUNICP CodePoint) +{ + return rtUniCpFold(CodePoint, &g_aRTUniUpperRanges[0]); +} + + +/** + * Folds a unicode code point to lower case. + * + * @returns Folded code point. + * @param CodePoint The unicode code point to fold. + */ +DECLINLINE(RTUNICP) RTUniCpToLower(RTUNICP CodePoint) +{ + return rtUniCpFold(CodePoint, &g_aRTUniLowerRanges[0]); +} + + +#else /* RTUNI_USE_WCTYPE */ + + +/** + * Checks if a unicode code point is upper case. + * + * @returns true if it is. + * @returns false if it isn't. + * @param CodePoint The code point. + */ +DECLINLINE(bool) RTUniCpIsUpper(RTUNICP CodePoint) +{ + return !!iswupper(CodePoint); +} + + +/** + * Checks if a unicode code point is lower case. + * + * @returns true if it is. + * @returns false if it isn't. + * @param CodePoint The code point. + */ +DECLINLINE(bool) RTUniCpIsLower(RTUNICP CodePoint) +{ + return !!iswlower(CodePoint); +} + + +/** + * Checks if a unicode code point is alphabetic. + * + * @returns true if it is. + * @returns false if it isn't. + * @param CodePoint The code point. + */ +DECLINLINE(bool) RTUniCpIsAlphabetic(RTUNICP CodePoint) +{ + return !!iswalpha(CodePoint); +} + + +/** + * Checks if a unicode code point is a decimal digit. + * + * @returns true if it is. + * @returns false if it isn't. + * @param CodePoint The code point. + */ +DECLINLINE(bool) RTUniCpIsDecDigit(RTUNICP CodePoint) +{ + return !!iswdigit(CodePoint); +} + + +/** + * Checks if a unicode code point is a hexadecimal digit. + * + * @returns true if it is. + * @returns false if it isn't. + * @param CodePoint The code point. + */ +DECLINLINE(bool) RTUniCpIsHexDigit(RTUNICP CodePoint) +{ + return !!iswxdigit(CodePoint); +} + + +/** + * Checks if a unicode code point is white space. + * + * @returns true if it is. + * @returns false if it isn't. + * @param CodePoint The code point. + */ +DECLINLINE(bool) RTUniCpIsSpace(RTUNICP CodePoint) +{ + return !!iswspace(CodePoint); +} + + +/** + * Folds a unicode code point to upper case. + * + * @returns Folded code point. + * @param CodePoint The unicode code point to fold. + */ +DECLINLINE(RTUNICP) RTUniCpToUpper(RTUNICP CodePoint) +{ + return towupper(CodePoint); +} + + +/** + * Folds a unicode code point to lower case. + * + * @returns Folded code point. + * @param CodePoint The unicode code point to fold. + */ +DECLINLINE(RTUNICP) RTUniCpToLower(RTUNICP CodePoint) +{ + return towlower(CodePoint); +} + + +#endif /* RTUNI_USE_WCTYPE */ + + +/** + * Frees a unicode string. + * + * @param pusz The string to free. + */ +RTDECL(void) RTUniFree(PRTUNICP pusz); + + +RT_C_DECLS_END +/** @} */ + + +#endif + |