summaryrefslogtreecommitdiff
path: root/gcc/cppcharset.c
diff options
context:
space:
mode:
authorneil <neil@138bc75d-0d04-0410-961f-82ee72b054a4>2003-04-20 07:29:23 +0000
committerneil <neil@138bc75d-0d04-0410-961f-82ee72b054a4>2003-04-20 07:29:23 +0000
commit2cbf1359092416d271b02844198e7ad6e5e56284 (patch)
tree01f484dfa9250821cfb9113a0a797ca2bec80e59 /gcc/cppcharset.c
parent8d6d7930ebb0b4a3a39a502fdbca5842d1f226a4 (diff)
downloadgcc-2cbf1359092416d271b02844198e7ad6e5e56284.tar.gz
* Makefile.in (LIBCPP_OBJS): Add cppcharset.o.
(cppcharset.o): New target. * c-lex.c (is_extended_char): Move to cppcharset.c. (utf8_extend_token): Delete. * cppcharset.c: New file. * cpphash.h (_cpp_valid_ucn): New. * cpplex.c (lex_identifier): Update prototype. (continues_identifier_p): Rename forms_identifier_p. Handle UCN escapes. (maybe_read_ucs): Rename maybe_read_ucn. Update to use code in cppcharset.c. (lex_number, lex_identifier, cpp_parse_escape): Update. (_cpp_lex_direct): Update to handle UCNs. (cpp_avoid_paste): Don't paste to form a UCN. testsuite: * ucs.c: Update diagnostic messages. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@65845 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/cppcharset.c')
-rw-r--r--gcc/cppcharset.c591
1 files changed, 591 insertions, 0 deletions
diff --git a/gcc/cppcharset.c b/gcc/cppcharset.c
new file mode 100644
index 00000000000..900e4516fbc
--- /dev/null
+++ b/gcc/cppcharset.c
@@ -0,0 +1,591 @@
+/* CPP Library - charsets
+ Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003
+ Free Software Foundation, Inc.
+
+ Broken out of c-lex.c Apr 2003, adding valid C99 UCN ranges.
+
+This program is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2, or (at your option) any
+later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "cpplib.h"
+#include "cpphash.h"
+
+static int ucn_valid_in_identifier PARAMS ((cpp_reader *, cppchar_t));
+
+/* [lex.charset]: The character designated by the universal character
+ name \UNNNNNNNN is that character whose character short name in
+ ISO/IEC 10646 is NNNNNNNN; the character designated by the
+ universal character name \uNNNN is that character whose character
+ short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
+ for a universal character name is less than 0x20 or in the range
+ 0x7F-0x9F (inclusive), or if the universal character name
+ designates a character in the basic source character set, then the
+ program is ill-formed.
+
+ *PSTR must be preceded by "\u" or "\U"; it is assumed that the
+ buffer end is delimited by a non-hex digit. Returns zero if UCNs
+ are not part of the relevant standard, or if the string beginning
+ at *PSTR doesn't syntactically match the form 'NNNN' or 'NNNNNNNN'.
+
+ Otherwise the non-zero value of the UCN, whether valid or invalid,
+ is returned. Diagnostics are emitted for invalid values. PSTR
+ is updated to point one beyond the UCN, or to the syntactically
+ invalid character.
+
+ IDENTIFIER_POS is 0 when not in an identifier, 1 for the start of
+ an identifier, or 2 otherwise.
+*/
+
+cppchar_t
+_cpp_valid_ucn (pfile, pstr, identifier_pos)
+ cpp_reader *pfile;
+ const uchar **pstr;
+ int identifier_pos;
+{
+ cppchar_t result, c;
+ unsigned int length;
+ const uchar *str = *pstr;
+ const uchar *base = str - 2;
+
+ /* Only attempt to interpret a UCS for C++ and C99. */
+ if (!CPP_OPTION (pfile, cplusplus) && !CPP_OPTION (pfile, c99))
+ return 0;
+
+ /* We don't accept UCNs for an EBCDIC target. */
+ if (CPP_OPTION (pfile, EBCDIC))
+ return 0;
+
+ if (str[-1] == 'u')
+ length = 4;
+ else if (str[-1] == 'U')
+ length = 8;
+ else
+ abort();
+
+ result = 0;
+ do
+ {
+ c = *str;
+ if (!ISXDIGIT (c))
+ break;
+ str++;
+ result = (result << 4) + hex_value (c);
+ }
+ while (--length);
+
+ *pstr = str;
+ if (length)
+ /* We'll error when we try it out as the start of an identifier. */
+ cpp_error (pfile, DL_ERROR, "incomplete universal character name %.*s",
+ str - base, base);
+ /* The standard permits $, @ and ` to be specified as UCNs. We use
+ hex escapes so that this also works with EBCDIC hosts. */
+ else if ((result < 0xa0
+ && (result != 0x24 && result != 0x40 && result != 0x60))
+ || (result & 0x80000000)
+ || (result >= 0xD800 && result <= 0xDFFF))
+ {
+ cpp_error (pfile, DL_ERROR, "%.*s is not a valid universal character",
+ str - base, base);
+ }
+ else if (identifier_pos)
+ {
+ int validity = ucn_valid_in_identifier (pfile, result);
+
+ if (validity == 0)
+ cpp_error (pfile, DL_ERROR,
+ "universal character %.*s is not valid in an identifier",
+ str - base, base);
+ else if (validity == 2 && identifier_pos == 1)
+ cpp_error (pfile, DL_ERROR,
+ "universal character %.*s is not valid at the start of an identifier",
+ str - base, base);
+ }
+
+ if (result == 0)
+ result = 1;
+
+ return result;
+}
+
+/* Returns 1 if C is valid in an identifier, 2 if C is valid except at
+ the start of an identifier, and 0 if C is not valid in an
+ identifier. We assume C has already gone through the checks of
+ _cpp_valid_ucn. */
+static int
+ucn_valid_in_identifier (pfile, c)
+ cpp_reader *pfile;
+ cppchar_t c;
+{
+ /* None of the valid chars are outside the Basic Multilingual Plane (the
+ low 16 bits). */
+ if (c > 0xffff)
+ return 0;
+
+ if (CPP_OPTION (pfile, c99) || !CPP_PEDANTIC (pfile))
+ {
+ /* Latin. */
+ if (c == 0x0aa || c == 0x00ba || c == 0x207f || c == 0x1e9b)
+ return 1;
+
+ /* Greek. */
+ if (c == 0x0386)
+ return 1;
+
+ /* Cyrillic. */
+ if (c == 0x040c)
+ return 1;
+
+ /* Hebrew. */
+ if ((c >= 0x05b0 && c <= 0x05b9)
+ || (c >= 0x05bb && c <= 0x005bd)
+ || c == 0x05bf
+ || (c >= 0x05c1 && c <= 0x05c2))
+ return 1;
+
+ /* Arabic. */
+ if ((c >= 0x06d0 && c <= 0x06dc)
+ || c == 0x06e8
+ || (c >= 0x06ea && c <= 0x06ed))
+ return 1;
+
+ /* Devanagari */
+ if ((c >= 0x0901 && c <= 0x0903)
+ || (c >= 0x093e && c <= 0x094d)
+ || (c >= 0x0950 && c <= 0x0952)
+ || c == 0x0963)
+ return 1;
+
+ /* Bengali */
+ if ((c >= 0x0981 && c <= 0x0983)
+ || (c >= 0x09be && c <= 0x09c4)
+ || (c >= 0x09c7 && c <= 0x09c8)
+ || (c >= 0x09cb && c <= 0x09cd)
+ || (c >= 0x09e2 && c <= 0x09e3))
+ return 1;
+
+ /* Gurmukhi */
+ if (c == 0x0a02
+ || (c >= 0x0a3e && c <= 0x0a42)
+ || (c >= 0x0a47 && c <= 0x0a48)
+ || (c >= 0x0a4b && c <= 0x0a4d)
+ || (c == 0x0a74))
+ return 1;
+
+ /* Gujarati */
+ if ((c >= 0x0a81 && c <= 0x0a83)
+ || (c >= 0x0abd && c <= 0x0ac5)
+ || (c >= 0x0ac7 && c <= 0x0ac9)
+ || (c >= 0x0acb && c <= 0x0acd)
+ || (c == 0x0ad0))
+ return 1;
+
+ /* Oriya */
+ if ((c >= 0x0b01 && c <= 0x0b03)
+ || (c >= 0x0b3e && c <= 0x0b43)
+ || (c >= 0x0b47 && c <= 0x0b48)
+ || (c >= 0x0b4b && c <= 0x0b4d))
+ return 1;
+
+ /* Tamil */
+ if ((c >= 0x0b82 && c <= 0x0b83)
+ || (c >= 0x0bbe && c <= 0x0bc2)
+ || (c >= 0x0bc6 && c <= 0x0bc8)
+ || (c >= 0x0bc8 && c <= 0x0bcd))
+ return 1;
+
+ /* Telugu */
+ if ((c >= 0x0c01 && c <= 0x0c03)
+ || (c >= 0x0c3e && c <= 0x0c44)
+ || (c >= 0x0c46 && c <= 0x0c48)
+ || (c >= 0x0c4a && c <= 0x0c4d))
+ return 1;
+
+ /* Kannada */
+ if ((c >= 0x0c82 && c <= 0x0c83)
+ || (c >= 0x0cbe && c <= 0x0cc4)
+ || (c >= 0x0cc6 && c <= 0x0cc8)
+ || (c >= 0x0cca && c <= 0x0ccd)
+ || c == 0x0cde)
+ return 1;
+
+ /* Malayalam */
+ if ((c >= 0x0d02 && c <= 0x0d03)
+ || (c >= 0x0d3e && c <= 0x0d43)
+ || (c >= 0x0d46 && c <= 0x0d48)
+ || (c >= 0x0d4a && c <= 0x0d4d))
+ return 1;
+
+ /* Thai */
+ if ((c >= 0x0e01 && c <= 0x0e3a)
+ || (c >= 0x0e40 && c <= 0x0e5b))
+ return 1;
+
+ /* Lao */
+ if ((c >= 0x0ead && c <= 0x0eae)
+ || (c >= 0x0eb0 && c <= 0x0eb9)
+ || (c >= 0x0ebb && c <= 0x0ebd)
+ || (c >= 0x0ec0 && c <= 0x0ec4)
+ || c == 0x0ec6
+ || (c >= 0x0ec8 && c <= 0x0ecd)
+ || (c >= 0x0edc && c <= 0x0ed))
+ return 1;
+
+ /* Tibetan. */
+ if (c == 0x0f00
+ || (c >= 0x0f18 && c <= 0x0f19)
+ || c == 0x0f35
+ || c == 0x0f37
+ || c == 0x0f39
+ || (c >= 0x0f3e && c <= 0x0f47)
+ || (c >= 0x0f49 && c <= 0x0f69)
+ || (c >= 0x0f71 && c <= 0x0f84)
+ || (c >= 0x0f86 && c <= 0x0f8b)
+ || (c >= 0x0f90 && c <= 0x0f95)
+ || c == 0x0f97
+ || (c >= 0x0f99 && c <= 0x0fad)
+ || (c >= 0x0fb1 && c <= 0x0fb7)
+ || c == 0x0fb9)
+ return 1;
+
+ /* Katakana */
+ if ((c >= 0x30a1 && c <= 0x30f6)
+ || (c >= 0x30fb && c <= 0x30fc))
+ return 1;
+
+ /* CJK Unified Ideographs. */
+ if (c >= 0x4e00 && c <= 0x9fa5)
+ return 1;
+
+ /* Hangul. */
+ if (c >= 0xac00 && c <= 0xd7a3)
+ return 1;
+
+ /* Digits. */
+ if ((c >= 0x0660 && c <= 0x0669)
+ || (c >= 0x06f0 && c <= 0x06f9)
+ || (c >= 0x0966 && c <= 0x096f)
+ || (c >= 0x09e6 && c <= 0x09ef)
+ || (c >= 0x0a66 && c <= 0x0a6f)
+ || (c >= 0x0ae6 && c <= 0x0aef)
+ || (c >= 0x0b66 && c <= 0x0b6f)
+ || (c >= 0x0be7 && c <= 0x0bef)
+ || (c >= 0x0c66 && c <= 0x0c6f)
+ || (c >= 0x0ce6 && c <= 0x0cef)
+ || (c >= 0x0d66 && c <= 0x0d6f)
+ || (c >= 0x0e50 && c <= 0x0e59)
+ || (c >= 0x0ed0 && c <= 0x0ed9)
+ || (c >= 0x0f20 && c <= 0x0f33))
+ return 2;
+
+ /* Special characters. */
+ if (c == 0x00b5
+ || c == 0x00b7
+ || (c >= 0x02b0 && c <= 0x02b8)
+ || c == 0x02bb
+ || (c >= 0x02bd && c <= 0x02c1)
+ || (c >= 0x02d0 && c <= 0x02d1)
+ || (c >= 0x02e0 && c <= 0x02e4)
+ || c == 0x037a
+ || c == 0x0559
+ || c == 0x093d
+ || c == 0x0b3d
+ || c == 0x1fbe
+ || (c >= 0x203f && c <= 0x2040)
+ || c == 0x2102
+ || c == 0x2107
+ || (c >= 0x210a && c <= 0x2113)
+ || c == 0x2115
+ || (c >= 0x2118 && c <= 0x211d)
+ || c == 0x2124
+ || c == 0x2126
+ || c == 0x2128
+ || (c >= 0x212a && c <= 0x2131)
+ || (c >= 0x2133 && c <= 0x2138)
+ || (c >= 0x2160 && c <= 0x2182)
+ || (c >= 0x3005 && c <= 0x3007)
+ || (c >= 0x3021 && c <= 0x3029))
+ return 1;
+ }
+
+ if (CPP_OPTION (pfile, cplusplus) || !CPP_PEDANTIC (pfile))
+ {
+ /* Greek. */
+ if (c == 0x0384)
+ return 1;
+
+ /* Cyrillic. */
+ if (c == 0x040d)
+ return 1;
+
+ /* Hebrew. */
+ if (c >= 0x05f3 && c <= 0x05f4)
+ return 1;
+
+ /* Lao. */
+ if ((c >= 0x0ead && c <= 0x0eb0)
+ || (c == 0x0eb2)
+ || (c == 0x0eb3)
+ || (c == 0x0ebd)
+ || (c >= 0x0ec0 && c <= 0x0ec4)
+ || (c == 0x0ec6))
+ return 1;
+
+ /* Hiragana */
+ if (c == 0x3094
+ || (c >= 0x309d && c <= 0x309e))
+ return 1;
+
+ /* Katakana */
+ if ((c >= 0x30a1 && c <= 0x30fe))
+ return 1;
+
+ /* Hangul */
+ if ((c >= 0x1100 && c <= 0x1159)
+ || (c >= 0x1161 && c <= 0x11a2)
+ || (c >= 0x11a8 && c <= 0x11f9))
+ return 1;
+
+ /* CJK Unified Ideographs */
+ if ((c >= 0xf900 && c <= 0xfa2d)
+ || (c >= 0xfb1f && c <= 0xfb36)
+ || (c >= 0xfb38 && c <= 0xfb3c)
+ || (c == 0xfb3e)
+ || (c >= 0xfb40 && c <= 0xfb41)
+ || (c >= 0xfb42 && c <= 0xfb44)
+ || (c >= 0xfb46 && c <= 0xfbb1)
+ || (c >= 0xfbd3 && c <= 0xfd3f)
+ || (c >= 0xfd50 && c <= 0xfd8f)
+ || (c >= 0xfd92 && c <= 0xfdc7)
+ || (c >= 0xfdf0 && c <= 0xfdfb)
+ || (c >= 0xfe70 && c <= 0xfe72)
+ || (c == 0xfe74)
+ || (c >= 0xfe76 && c <= 0xfefc)
+ || (c >= 0xff21 && c <= 0xff3a)
+ || (c >= 0xff41 && c <= 0xff5a)
+ || (c >= 0xff66 && c <= 0xffbe)
+ || (c >= 0xffc2 && c <= 0xffc7)
+ || (c >= 0xffca && c <= 0xffcf)
+ || (c >= 0xffd2 && c <= 0xffd7)
+ || (c >= 0xffda && c <= 0xffdc)
+ || (c >= 0x4e00 && c <= 0x9fa5))
+ return 1;
+ }
+
+ /* Latin */
+ if ((c >= 0x00c0 && c <= 0x00d6)
+ || (c >= 0x00d8 && c <= 0x00f6)
+ || (c >= 0x00f8 && c <= 0x01f5)
+ || (c >= 0x01fa && c <= 0x0217)
+ || (c >= 0x0250 && c <= 0x02a8)
+ || (c >= 0x1e00 && c <= 0x1e9a)
+ || (c >= 0x1ea0 && c <= 0x1ef9))
+ return 1;
+
+ /* Greek */
+ if ((c >= 0x0388 && c <= 0x038a)
+ || (c == 0x038c)
+ || (c >= 0x038e && c <= 0x03a1)
+ || (c >= 0x03a3 && c <= 0x03ce)
+ || (c >= 0x03d0 && c <= 0x03d6)
+ || (c == 0x03da)
+ || (c == 0x03dc)
+ || (c == 0x03de)
+ || (c == 0x03e0)
+ || (c >= 0x03e2 && c <= 0x03f3)
+ || (c >= 0x1f00 && c <= 0x1f15)
+ || (c >= 0x1f18 && c <= 0x1f1d)
+ || (c >= 0x1f20 && c <= 0x1f45)
+ || (c >= 0x1f48 && c <= 0x1f4d)
+ || (c >= 0x1f50 && c <= 0x1f57)
+ || (c == 0x1f59)
+ || (c == 0x1f5b)
+ || (c == 0x1f5d)
+ || (c >= 0x1f5f && c <= 0x1f7d)
+ || (c >= 0x1f80 && c <= 0x1fb4)
+ || (c >= 0x1fb6 && c <= 0x1fbc)
+ || (c >= 0x1fc2 && c <= 0x1fc4)
+ || (c >= 0x1fc6 && c <= 0x1fcc)
+ || (c >= 0x1fd0 && c <= 0x1fd3)
+ || (c >= 0x1fd6 && c <= 0x1fdb)
+ || (c >= 0x1fe0 && c <= 0x1fec)
+ || (c >= 0x1ff2 && c <= 0x1ff4)
+ || (c >= 0x1ff6 && c <= 0x1ffc))
+ return 1;
+
+ /* Cyrillic */
+ if ((c >= 0x0401 && c <= 0x040c)
+ || (c >= 0x040f && c <= 0x044f)
+ || (c >= 0x0451 && c <= 0x045c)
+ || (c >= 0x045e && c <= 0x0481)
+ || (c >= 0x0490 && c <= 0x04c4)
+ || (c >= 0x04c7 && c <= 0x04c8)
+ || (c >= 0x04cb && c <= 0x04cc)
+ || (c >= 0x04d0 && c <= 0x04eb)
+ || (c >= 0x04ee && c <= 0x04f5)
+ || (c >= 0x04f8 && c <= 0x04f9))
+ return 1;
+
+ /* Armenian */
+ if ((c >= 0x0531 && c <= 0x0556)
+ || (c >= 0x0561 && c <= 0x0587))
+ return 1;
+
+ /* Hebrew */
+ if ((c >= 0x05d0 && c <= 0x05ea)
+ || (c >= 0x05f0 && c <= 0x05f2))
+ return 1;
+
+ /* Arabic */
+ if ((c >= 0x0621 && c <= 0x063a)
+ || (c >= 0x0640 && c <= 0x0652)
+ || (c >= 0x0670 && c <= 0x06b7)
+ || (c >= 0x06ba && c <= 0x06be)
+ || (c >= 0x06c0 && c <= 0x06ce)
+ || (c >= 0x06e5 && c <= 0x06e7))
+ return 1;
+
+ /* Devanagari */
+ if ((c >= 0x0905 && c <= 0x0939)
+ || (c >= 0x0958 && c <= 0x0962))
+ return 1;
+
+ /* Bengali */
+ if ((c >= 0x0985 && c <= 0x098c)
+ || (c >= 0x098f && c <= 0x0990)
+ || (c >= 0x0993 && c <= 0x09a8)
+ || (c >= 0x09aa && c <= 0x09b0)
+ || (c == 0x09b2)
+ || (c >= 0x09b6 && c <= 0x09b9)
+ || (c >= 0x09dc && c <= 0x09dd)
+ || (c >= 0x09df && c <= 0x09e1)
+ || (c >= 0x09f0 && c <= 0x09f1))
+ return 1;
+
+ /* Gurmukhi */
+ if ((c >= 0x0a05 && c <= 0x0a0a)
+ || (c >= 0x0a0f && c <= 0x0a10)
+ || (c >= 0x0a13 && c <= 0x0a28)
+ || (c >= 0x0a2a && c <= 0x0a30)
+ || (c >= 0x0a32 && c <= 0x0a33)
+ || (c >= 0x0a35 && c <= 0x0a36)
+ || (c >= 0x0a38 && c <= 0x0a39)
+ || (c >= 0x0a59 && c <= 0x0a5c)
+ || (c == 0x0a5e))
+ return 1;
+
+ /* Gujarati */
+ if ((c >= 0x0a85 && c <= 0x0a8b)
+ || (c == 0x0a8d)
+ || (c >= 0x0a8f && c <= 0x0a91)
+ || (c >= 0x0a93 && c <= 0x0aa8)
+ || (c >= 0x0aaa && c <= 0x0ab0)
+ || (c >= 0x0ab2 && c <= 0x0ab3)
+ || (c >= 0x0ab5 && c <= 0x0ab9)
+ || (c == 0x0ae0))
+ return 1;
+
+ /* Oriya */
+ if ((c >= 0x0b05 && c <= 0x0b0c)
+ || (c >= 0x0b0f && c <= 0x0b10)
+ || (c >= 0x0b13 && c <= 0x0b28)
+ || (c >= 0x0b2a && c <= 0x0b30)
+ || (c >= 0x0b32 && c <= 0x0b33)
+ || (c >= 0x0b36 && c <= 0x0b39)
+ || (c >= 0x0b5c && c <= 0x0b5d)
+ || (c >= 0x0b5f && c <= 0x0b61))
+ return 1;
+
+ /* Tamil */
+ if ((c >= 0x0b85 && c <= 0x0b8a)
+ || (c >= 0x0b8e && c <= 0x0b90)
+ || (c >= 0x0b92 && c <= 0x0b95)
+ || (c >= 0x0b99 && c <= 0x0b9a)
+ || (c == 0x0b9c)
+ || (c >= 0x0b9e && c <= 0x0b9f)
+ || (c >= 0x0ba3 && c <= 0x0ba4)
+ || (c >= 0x0ba8 && c <= 0x0baa)
+ || (c >= 0x0bae && c <= 0x0bb5)
+ || (c >= 0x0bb7 && c <= 0x0bb9))
+ return 1;
+
+ /* Telugu */
+ if ((c >= 0x0c05 && c <= 0x0c0c)
+ || (c >= 0x0c0e && c <= 0x0c10)
+ || (c >= 0x0c12 && c <= 0x0c28)
+ || (c >= 0x0c2a && c <= 0x0c33)
+ || (c >= 0x0c35 && c <= 0x0c39)
+ || (c >= 0x0c60 && c <= 0x0c61))
+ return 1;
+
+ /* Kannada */
+ if ((c >= 0x0c85 && c <= 0x0c8c)
+ || (c >= 0x0c8e && c <= 0x0c90)
+ || (c >= 0x0c92 && c <= 0x0ca8)
+ || (c >= 0x0caa && c <= 0x0cb3)
+ || (c >= 0x0cb5 && c <= 0x0cb9)
+ || (c >= 0x0ce0 && c <= 0x0ce1))
+ return 1;
+
+ /* Malayalam */
+ if ((c >= 0x0d05 && c <= 0x0d0c)
+ || (c >= 0x0d0e && c <= 0x0d10)
+ || (c >= 0x0d12 && c <= 0x0d28)
+ || (c >= 0x0d2a && c <= 0x0d39)
+ || (c >= 0x0d60 && c <= 0x0d61))
+ return 1;
+
+ /* Thai */
+ if ((c >= 0x0e01 && c <= 0x0e30)
+ || (c >= 0x0e32 && c <= 0x0e33)
+ || (c >= 0x0e40 && c <= 0x0e46)
+ || (c >= 0x0e4f && c <= 0x0e5b))
+ return 1;
+
+ /* Lao */
+ if ((c >= 0x0e81 && c <= 0x0e82)
+ || (c == 0x0e84)
+ || (c == 0x0e87)
+ || (c == 0x0e88)
+ || (c == 0x0e8a)
+ || (c == 0x0e8d)
+ || (c >= 0x0e94 && c <= 0x0e97)
+ || (c >= 0x0e99 && c <= 0x0e9f)
+ || (c >= 0x0ea1 && c <= 0x0ea3)
+ || (c == 0x0ea5)
+ || (c == 0x0ea7)
+ || (c == 0x0eaa)
+ || (c == 0x0eab))
+ return 1;
+
+ /* Georgian */
+ if ((c >= 0x10a0 && c <= 0x10c5)
+ || (c >= 0x10d0 && c <= 0x10f6))
+ return 1;
+
+ /* Hiragana */
+ if ((c >= 0x3041 && c <= 0x3093)
+ || (c >= 0x309b && c <= 0x309c))
+ return 1;
+
+ /* Bopmofo */
+ if ((c >= 0x3105 && c <= 0x312c))
+ return 1;
+
+ return 0;
+}