summaryrefslogtreecommitdiff
path: root/include/iprt/uni.h
diff options
context:
space:
mode:
authorLorry Tar Creator <lorry-tar-importer@baserock.org>2012-10-26 16:25:44 +0000
committer <>2012-11-12 12:15:52 +0000
commit58ed4748338f9466599adfc8a9171280ed99e23f (patch)
tree02027d99ded4fb56a64aa9489ac2eb487e7858ab /include/iprt/uni.h
downloadVirtualBox-58ed4748338f9466599adfc8a9171280ed99e23f.tar.gz
Imported from /home/lorry/working-area/delta_VirtualBox/VirtualBox-4.2.4.tar.bz2.VirtualBox-4.2.4
Diffstat (limited to 'include/iprt/uni.h')
-rw-r--r--include/iprt/uni.h397
1 files changed, 397 insertions, 0 deletions
diff --git a/include/iprt/uni.h b/include/iprt/uni.h
new file mode 100644
index 00000000..6f0c23ec
--- /dev/null
+++ b/include/iprt/uni.h
@@ -0,0 +1,397 @@
+/** @file
+ * IPRT - Unicode Code Points.
+ */
+
+/*
+ * Copyright (C) 2006-2007 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
+ * VirtualBox OSE distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ */
+
+#ifndef ___iprt_uni_h
+#define ___iprt_uni_h
+
+/** @defgroup grp_rt_uni RTUniCp - Unicode Code Points
+ * @ingroup grp_rt
+ * @{
+ */
+
+/** @def RTUNI_USE_WCTYPE
+ * Define RTUNI_USE_WCTYPE to not use the IPRT unicode data but the
+ * data which the C runtime library provides. */
+#ifdef DOXYGEN_RUNNING
+# define RTUNI_USE_WCTYPE
+#endif
+
+#include <iprt/types.h>
+#ifdef RTUNI_USE_WCTYPE
+# include <wctype.h>
+#endif
+
+RT_C_DECLS_BEGIN
+
+
+
+#ifndef RTUNI_USE_WCTYPE
+/**
+ * A unicode flags range.
+ * @internal
+ */
+typedef struct RTUNIFLAGSRANGE
+{
+ /** The first code point of the range. */
+ RTUNICP BeginCP;
+ /** The last + 1 code point of the range. */
+ RTUNICP EndCP;
+ /** Pointer to the array of case folded code points. */
+ const uint8_t *pafFlags;
+} RTUNIFLAGSRANGE;
+/** Pointer to a flags range.
+ * @internal */
+typedef RTUNIFLAGSRANGE *PRTUNIFLAGSRANGE;
+/** Pointer to a const flags range.
+ * @internal */
+typedef const RTUNIFLAGSRANGE *PCRTUNIFLAGSRANGE;
+
+/**
+ * A unicode case folded range.
+ * @internal
+ */
+typedef struct RTUNICASERANGE
+{
+ /** The first code point of the range. */
+ RTUNICP BeginCP;
+ /** The last + 1 code point of the range. */
+ RTUNICP EndCP;
+ /** Pointer to the array of case folded code points. */
+ PCRTUNICP paFoldedCPs;
+} RTUNICASERANGE;
+/** Pointer to a case folded range.
+ * @internal */
+typedef RTUNICASERANGE *PRTUNICASERANGE;
+/** Pointer to a const case folded range.
+ * @internal */
+typedef const RTUNICASERANGE *PCRTUNICASERANGE;
+
+/** @name Unicode Code Point Flags.
+ * @internal
+ * @{ */
+#define RTUNI_UPPER RT_BIT(0)
+#define RTUNI_LOWER RT_BIT(1)
+#define RTUNI_ALPHA RT_BIT(2)
+#define RTUNI_XDIGIT RT_BIT(3)
+#define RTUNI_DDIGIT RT_BIT(4)
+#define RTUNI_WSPACE RT_BIT(5)
+/*#define RTUNI_BSPACE RT_BIT(6) - later */
+/** When set, the codepoint requires further checking wrt NFC and NFD
+ * normalization. I.e. set when either of QC_NFD and QC_NFC are not Y. */
+#define RTUNI_QC_NFX RT_BIT(7)
+/** @} */
+
+
+/**
+ * Array of flags ranges.
+ * @internal
+ */
+extern RTDATADECL(const RTUNIFLAGSRANGE) g_aRTUniFlagsRanges[];
+
+/**
+ * Gets the flags for a unicode code point.
+ *
+ * @returns The flag mask. (RTUNI_*)
+ * @param CodePoint The unicode code point.
+ * @internal
+ */
+DECLINLINE(RTUNICP) rtUniCpFlags(RTUNICP CodePoint)
+{
+ PCRTUNIFLAGSRANGE pCur = &g_aRTUniFlagsRanges[0];
+ do
+ {
+ if (pCur->EndCP > CodePoint)
+ {
+ if (pCur->BeginCP <= CodePoint)
+ return pCur->pafFlags[CodePoint - pCur->BeginCP];
+ break;
+ }
+ pCur++;
+ } while (pCur->EndCP != RTUNICP_MAX);
+ return 0;
+}
+
+
+/**
+ * Checks if a unicode code point is upper case.
+ *
+ * @returns true if it is.
+ * @returns false if it isn't.
+ * @param CodePoint The code point.
+ */
+DECLINLINE(bool) RTUniCpIsUpper(RTUNICP CodePoint)
+{
+ return (rtUniCpFlags(CodePoint) & RTUNI_UPPER) != 0;
+}
+
+
+/**
+ * Checks if a unicode code point is lower case.
+ *
+ * @returns true if it is.
+ * @returns false if it isn't.
+ * @param CodePoint The code point.
+ */
+DECLINLINE(bool) RTUniCpIsLower(RTUNICP CodePoint)
+{
+ return (rtUniCpFlags(CodePoint) & RTUNI_LOWER) != 0;
+}
+
+
+/**
+ * Checks if a unicode code point is alphabetic.
+ *
+ * @returns true if it is.
+ * @returns false if it isn't.
+ * @param CodePoint The code point.
+ */
+DECLINLINE(bool) RTUniCpIsAlphabetic(RTUNICP CodePoint)
+{
+ return (rtUniCpFlags(CodePoint) & RTUNI_ALPHA) != 0;
+}
+
+
+/**
+ * Checks if a unicode code point is a decimal digit.
+ *
+ * @returns true if it is.
+ * @returns false if it isn't.
+ * @param CodePoint The code point.
+ */
+DECLINLINE(bool) RTUniCpIsDecDigit(RTUNICP CodePoint)
+{
+ return (rtUniCpFlags(CodePoint) & RTUNI_DDIGIT) != 0;
+}
+
+
+/**
+ * Checks if a unicode code point is a hexadecimal digit.
+ *
+ * @returns true if it is.
+ * @returns false if it isn't.
+ * @param CodePoint The code point.
+ */
+DECLINLINE(bool) RTUniCpIsHexDigit(RTUNICP CodePoint)
+{
+ return (rtUniCpFlags(CodePoint) & RTUNI_XDIGIT) != 0;
+}
+
+
+/**
+ * Checks if a unicode code point is white space.
+ *
+ * @returns true if it is.
+ * @returns false if it isn't.
+ * @param CodePoint The code point.
+ */
+DECLINLINE(bool) RTUniCpIsSpace(RTUNICP CodePoint)
+{
+ return (rtUniCpFlags(CodePoint) & RTUNI_WSPACE) != 0;
+}
+
+
+
+/**
+ * Array of uppercase ranges.
+ * @internal
+ */
+extern RTDATADECL(const RTUNICASERANGE) g_aRTUniUpperRanges[];
+
+/**
+ * Array of lowercase ranges.
+ * @internal
+ */
+extern RTDATADECL(const RTUNICASERANGE) g_aRTUniLowerRanges[];
+
+
+/**
+ * Folds a unicode code point using the specified range array.
+ *
+ * @returns FOlded code point.
+ * @param CodePoint The unicode code point to fold.
+ * @param pCur The case folding range to use.
+ */
+DECLINLINE(RTUNICP) rtUniCpFold(RTUNICP CodePoint, PCRTUNICASERANGE pCur)
+{
+ do
+ {
+ if (pCur->EndCP > CodePoint)
+ {
+ if (pCur->BeginCP <= CodePoint)
+ CodePoint = pCur->paFoldedCPs[CodePoint - pCur->BeginCP];
+ break;
+ }
+ pCur++;
+ } while (pCur->EndCP != RTUNICP_MAX);
+ return CodePoint;
+}
+
+
+/**
+ * Folds a unicode code point to upper case.
+ *
+ * @returns Folded code point.
+ * @param CodePoint The unicode code point to fold.
+ */
+DECLINLINE(RTUNICP) RTUniCpToUpper(RTUNICP CodePoint)
+{
+ return rtUniCpFold(CodePoint, &g_aRTUniUpperRanges[0]);
+}
+
+
+/**
+ * Folds a unicode code point to lower case.
+ *
+ * @returns Folded code point.
+ * @param CodePoint The unicode code point to fold.
+ */
+DECLINLINE(RTUNICP) RTUniCpToLower(RTUNICP CodePoint)
+{
+ return rtUniCpFold(CodePoint, &g_aRTUniLowerRanges[0]);
+}
+
+
+#else /* RTUNI_USE_WCTYPE */
+
+
+/**
+ * Checks if a unicode code point is upper case.
+ *
+ * @returns true if it is.
+ * @returns false if it isn't.
+ * @param CodePoint The code point.
+ */
+DECLINLINE(bool) RTUniCpIsUpper(RTUNICP CodePoint)
+{
+ return !!iswupper(CodePoint);
+}
+
+
+/**
+ * Checks if a unicode code point is lower case.
+ *
+ * @returns true if it is.
+ * @returns false if it isn't.
+ * @param CodePoint The code point.
+ */
+DECLINLINE(bool) RTUniCpIsLower(RTUNICP CodePoint)
+{
+ return !!iswlower(CodePoint);
+}
+
+
+/**
+ * Checks if a unicode code point is alphabetic.
+ *
+ * @returns true if it is.
+ * @returns false if it isn't.
+ * @param CodePoint The code point.
+ */
+DECLINLINE(bool) RTUniCpIsAlphabetic(RTUNICP CodePoint)
+{
+ return !!iswalpha(CodePoint);
+}
+
+
+/**
+ * Checks if a unicode code point is a decimal digit.
+ *
+ * @returns true if it is.
+ * @returns false if it isn't.
+ * @param CodePoint The code point.
+ */
+DECLINLINE(bool) RTUniCpIsDecDigit(RTUNICP CodePoint)
+{
+ return !!iswdigit(CodePoint);
+}
+
+
+/**
+ * Checks if a unicode code point is a hexadecimal digit.
+ *
+ * @returns true if it is.
+ * @returns false if it isn't.
+ * @param CodePoint The code point.
+ */
+DECLINLINE(bool) RTUniCpIsHexDigit(RTUNICP CodePoint)
+{
+ return !!iswxdigit(CodePoint);
+}
+
+
+/**
+ * Checks if a unicode code point is white space.
+ *
+ * @returns true if it is.
+ * @returns false if it isn't.
+ * @param CodePoint The code point.
+ */
+DECLINLINE(bool) RTUniCpIsSpace(RTUNICP CodePoint)
+{
+ return !!iswspace(CodePoint);
+}
+
+
+/**
+ * Folds a unicode code point to upper case.
+ *
+ * @returns Folded code point.
+ * @param CodePoint The unicode code point to fold.
+ */
+DECLINLINE(RTUNICP) RTUniCpToUpper(RTUNICP CodePoint)
+{
+ return towupper(CodePoint);
+}
+
+
+/**
+ * Folds a unicode code point to lower case.
+ *
+ * @returns Folded code point.
+ * @param CodePoint The unicode code point to fold.
+ */
+DECLINLINE(RTUNICP) RTUniCpToLower(RTUNICP CodePoint)
+{
+ return towlower(CodePoint);
+}
+
+
+#endif /* RTUNI_USE_WCTYPE */
+
+
+/**
+ * Frees a unicode string.
+ *
+ * @param pusz The string to free.
+ */
+RTDECL(void) RTUniFree(PRTUNICP pusz);
+
+
+RT_C_DECLS_END
+/** @} */
+
+
+#endif
+