summaryrefslogtreecommitdiff
path: root/glib/guniprop.c
diff options
context:
space:
mode:
authorNoah Levitt <nlevitt@columbia.edu>2003-09-10 16:55:36 +0000
committerNoah Levitt <nlevitt@src.gnome.org>2003-09-10 16:55:36 +0000
commit8d91ba8c585f269229c768415b04fdd222e6fa72 (patch)
tree146c8d169345f392ea79b0c6f5bc26c825415b2d /glib/guniprop.c
parent896d38706b877c66183bf7c70c2b0b76db8009d5 (diff)
downloadglib-8d91ba8c585f269229c768415b04fdd222e6fa72.tar.gz
Unicode 4.0 special casing. (#114681)
2003-09-10 Noah Levitt <nlevitt@columbia.edu> * glib/gunicodeprivate.h: * glib/gunicollate.c: * glib/gunidecomp.c: * glib/guniprop.c: * tests/casemap.txt: * tests/gen-casemap-txt.pl: Unicode 4.0 special casing. (#114681) * glib/gunicodeprivate.h: Use a private header instead of extern function declarations (_g_utf8_normalize_wc, _g_unichar_combining_class).
Diffstat (limited to 'glib/guniprop.c')
-rw-r--r--glib/guniprop.c66
1 files changed, 63 insertions, 3 deletions
diff --git a/glib/guniprop.c b/glib/guniprop.c
index 9cef6e7cd..df0067633 100644
--- a/glib/guniprop.c
+++ b/glib/guniprop.c
@@ -27,6 +27,7 @@
#include "glib.h"
#include "gunichartables.h"
+#include "gunicodeprivate.h"
#define ATTR_TABLE(Page) (((Page) <= G_UNICODE_LAST_PAGE_PART1) \
? attr_table_part1[Page] \
@@ -737,6 +738,28 @@ g_utf8_strup (const gchar *str,
return result;
}
+/* traverses the string checking for characters with combining class == 230
+ * until a base character is found */
+static gboolean
+has_more_above (gchar *str)
+{
+ gchar *p = str;
+ gint combining_class;
+
+ while (*p)
+ {
+ combining_class = _g_unichar_combining_class (g_utf8_get_char (p));
+ if (combining_class == 230)
+ return TRUE;
+ else if (combining_class == 0)
+ break;
+
+ p = g_utf8_next_char (p);
+ }
+
+ return FALSE;
+}
+
static gsize
real_tolower (const gchar *str,
gssize max_len,
@@ -758,9 +781,46 @@ real_tolower (const gchar *str,
if (locale_type == LOCALE_TURKIC && c == 'I')
{
- /* I => LATIN SMALL LETTER DOTLESS I */
- len += g_unichar_to_utf8 (0x131, out_buffer ? out_buffer + len : NULL);
- }
+ if (g_utf8_get_char (p) == 0x0307)
+ {
+ /* I + COMBINING DOT ABOVE => i (U+0069) */
+ len += g_unichar_to_utf8 (0x0069, out_buffer ? out_buffer + len : NULL);
+ p = g_utf8_next_char (p);
+ }
+ else
+ {
+ /* I => LATIN SMALL LETTER DOTLESS I */
+ len += g_unichar_to_utf8 (0x131, out_buffer ? out_buffer + len : NULL);
+ }
+ }
+ /* Introduce an explicit dot above when lowercasing capital I's and J's
+ * whenever there are more accents above. [SpecialCasing.txt] */
+ else if (locale_type == LOCALE_LITHUANIAN &&
+ (c == 0x00cc || c == 0x00cd || c == 0x0128))
+ {
+ len += g_unichar_to_utf8 (0x0069, out_buffer ? out_buffer + len : NULL);
+ len += g_unichar_to_utf8 (0x0307, out_buffer ? out_buffer + len : NULL);
+
+ switch (c)
+ {
+ case 0x00cc:
+ len += g_unichar_to_utf8 (0x0300, out_buffer ? out_buffer + len : NULL);
+ break;
+ case 0x00cd:
+ len += g_unichar_to_utf8 (0x0301, out_buffer ? out_buffer + len : NULL);
+ break;
+ case 0x0128:
+ len += g_unichar_to_utf8 (0x0303, out_buffer ? out_buffer + len : NULL);
+ break;
+ }
+ }
+ else if (locale_type == LOCALE_LITHUANIAN &&
+ (c == 'I' || c == 'J' || c == 0x012e) &&
+ has_more_above (p))
+ {
+ len += g_unichar_to_utf8 (g_unichar_tolower (c), out_buffer ? out_buffer + len : NULL);
+ len += g_unichar_to_utf8 (0x0307, out_buffer ? out_buffer + len : NULL);
+ }
else if (c == 0x03A3) /* GREEK CAPITAL LETTER SIGMA */
{
if ((max_len < 0 || p < str + max_len) && *p)