diff options
Diffstat (limited to 'tests/unicode-data.c')
-rw-r--r-- | tests/unicode-data.c | 270 |
1 files changed, 270 insertions, 0 deletions
diff --git a/tests/unicode-data.c b/tests/unicode-data.c new file mode 100644 index 000000000..1c2eef1d2 --- /dev/null +++ b/tests/unicode-data.c @@ -0,0 +1,270 @@ +/* GLib + * unicode-data.c: Test Unicode character data + * + * Copyright (C) 2019 Red Hat, Inc + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + +#include "config.h" +#include <glib.h> +#include <string.h> +#include <locale.h> + +#ifndef G_OS_WIN32 +#include <unistd.h> +#endif + +#include "test-common.h" + +static const char * +char_type (GUnicodeType t) +{ + const char *names[] = { + "Cc", "Cf", "Cn", "Co", "Cs", "Ll", "Lm", "Lo", "Lt", + "Lu", "Mc", "Me", "Mn", "Nd", "Nl", "No", "Pc", "Pd", + "Pe", "Pf", "Pi", "Po", "Ps", "Sc", "Sk", "Sm", "So", + "Zl", "Zp", "Zs" + }; + return names[t]; +} + +static const char * +break_type (GUnicodeBreakType t) +{ + const char *names[] = { + "BK", "CR", "LF", "CM", "SG", "ZW", "IN", "GL", "CB", + "SP", "BA", "BB", "B2", "HY", "NS", "OP", "CL", "QU", + "EX", "ID", "NU", "IS", "SY", "AL", "PR", "PO", "SA", + "AI", "XX", "NL", "WJ", "JL", "JV", "JT", "H2", "H3", + "CP", "CJ", "HL", "RI", "EB", "EM", "ZWJ" + }; + return names[t]; +} + +static const char * +script_name (GUnicodeScript s) +{ + const char *names[] = { + "Zyyy", "Zinh", "Arab", "Armn", "Beng", "Bopo", "Cher", + "Copt", "Cyrl", "Dsrt", "Deva", "Ethi", "Geor", "Goth", + "Grek", "Gujr", "Guru", "Hani", "Hang", "Hebr", "Hira", + "Knda", "Kana", "Khmr", "Laoo", "Latn", "Mlym", "Mong", + "Mymr", "Ogam", "Ital", "Orya", "Runr", "Sinh", "Syrc", + "Taml", "Telu", "Thaa", "Thai", "Tibt", "Cans", "Yiii", + "Tglg", "Hano", "Buhd", "Tagb", "Brai", "Cprt", "Limb", + "Osma", "Shaw", "Linb", "Tale", "Ugar", "Talu", "Bugi", + "Glag", "Tfng", "Sylo", "Xpeo", "Khar", "Zzzz", "Bali", + "Xsux", "Phnx", "Phag", "Nkoo", "Kali", "Lepc", "Rjng", + "Sund", "Saur", "Cham", "Olck", "Vaii", "Cari", "Lyci", + "Lydi", "Avst", "Bamu", "Egyp", "Armi", "Phli", "Prti", + "Java", "Kthi", "Lisu", "Mtei", "Sarb", "Orkh", "Samr", + "Lana", "Tavt", "Batk", "Brah", "Mand", "Cakm", "Merc", + "Mero", "Plrd", "Shrd", "Sora", "Takr", "Bass", "Aghb", + "Dupl", "Elba", "Gran", "Khoj", "Sind", "Lina", "Mahj", + "Mani", "Mend", "Modi", "Mroo", "Nbat", "Narb", "Perm", + "Hmng", "Palm", "Pauc", "Phlp", "Sidd", "Tirh", "Wara", + "Ahom", "Hluw", "Hatr", "Mult", "Hung", "Sgnw", "Adlm", + "Bhks", "Marc", "Newa", "Osge", "Tang", "Gonm", "Nshu", + "Soyo", "Zanb", "Dogr", "Gong", "Rohg", "Maka", "Medf", + "Sogo", "Sogd", "Elym", "Nand", "Rohg", "Wcho" + }; + return names[s]; +} + +static void +test_file (const char *filename, GString *string) +{ + char *contents; + gsize length; + GError *error = NULL; + char *p; + GString *s1, *s2, *s3; + GUnicodeScript prev_script = -1; + int m; + + if (!g_file_get_contents (filename, &contents, &length, &error)) + { + g_error ("%s", error->message); + g_error_free (error); + return; + } + + g_string_append (string, "Text: "); + s1 = g_string_new ("Char type: "); + s2 = g_string_new ("Break type: "); + s3 = g_string_new ("Script: "); + + m = MAX (MAX (s1->len, s2->len), s3->len); + + g_string_append_printf (s1, "%*s", (int)(m - s1->len), ""); + g_string_append_printf (s2, "%*s", (int)(m - s2->len), ""); + g_string_append_printf (s3, "%*s", (int)(m - s3->len), ""); + g_string_append_printf (string, "%*s", (int)(m - strlen ("Text: ")), ""); + + for (p = contents; *p; p = g_utf8_next_char (p)) + { + gunichar ch = g_utf8_get_char (p); + const char *ctype = char_type (g_unichar_type (ch)); + const char *btype = break_type (g_unichar_break_type (ch)); + GUnicodeScript script = g_unichar_get_script (ch); + int c = strlen (ctype); + int b = strlen (btype); + int s = 0; + int t = 0; + + g_string_append_printf (s1, "%s", ctype); + g_string_append_printf (s2, "%s", btype); + + if (prev_script != script) + { + const char *str = script_name (script); + prev_script = script; + g_string_append (s3, str); + s = strlen (str); + } + + if (ch == 0x20) + { + g_string_append (string, "[ ]"); + t = 3; + } + else if (g_unichar_isgraph (ch) && + (ch != 0x2028) && + (ch != 0x2029)) + { + g_string_append_unichar (string, ch); + t = 1; + } + else + { + char *str = g_strdup_printf ("[%#04x]", ch); + g_string_append (string, str); + t = strlen (str); + g_free (str); + } + + m = MAX (t, MAX (MAX (c + 1, b + 1), s + 1)); + + g_string_append_printf (string, "%*s", m - t, ""); + g_string_append_printf (s1, "%*s", m - c, ""); + g_string_append_printf (s2, "%*s", m - b, ""); + g_string_append_printf (s3, "%*s", m - s, ""); + } + + g_string_append (string, "\n"); + g_string_append_len (string, s1->str, s1->len); + g_string_append (string, "\n"); + g_string_append_len (string, s2->str, s2->len); + g_string_append (string, "\n"); + g_string_append_len (string, s3->str, s3->len); + g_string_append (string, "\n"); + + g_string_free (s1, TRUE); + g_string_free (s2, TRUE); + g_string_free (s3, TRUE); + + g_free (contents); +} + +static gchar * +get_expected_filename (const gchar *filename) +{ + gchar *f, *p, *expected; + + f = g_strdup (filename); + p = strstr (f, ".chars"); + if (p) + *p = 0; + expected = g_strconcat (f, ".expected", NULL); + + g_free (f); + + return expected; +} + +static void +test_break (gconstpointer d) +{ + const char *filename = d; + char *expected_file; + GError *error = NULL; + GString *dump; + char *diff = NULL; + + expected_file = get_expected_filename (filename); + + dump = g_string_sized_new (0); + + test_file (filename, dump); + + diff = diff_with_file (expected_file, dump->str, dump->len, &error); + g_assert_no_error (error); + + if (diff && diff[0]) + { + g_printerr ("Contents don't match expected contents:\n%s", diff); + g_test_fail (); + g_free (diff); + } + + g_string_free (dump, TRUE); + g_free (expected_file); +} + +int +main (int argc, char *argv[]) +{ + GDir *dir; + GError *error = NULL; + const gchar *name; + gchar *path; + + g_setenv ("LC_ALL", "en_US.UTF-8", TRUE); + setlocale (LC_ALL, ""); + + g_test_init (&argc, &argv, NULL); + + /* allow to easily generate expected output for new test cases */ + if (argc > 1) + { + GString *string; + + string = g_string_sized_new (0); + test_file (argv[1], string); + g_print ("%s", string->str); + + return 0; + } + + path = g_test_build_filename (G_TEST_DIST, "chars", NULL); + dir = g_dir_open (path, 0, &error); + g_free (path); + g_assert_no_error (error); + while ((name = g_dir_read_name (dir)) != NULL) + { + if (!strstr (name, "chars")) + continue; + + path = g_strdup_printf ("/chars/%s", name); + g_test_add_data_func_full (path, g_test_build_filename (G_TEST_DIST, "chars", name, NULL), + test_break, g_free); + g_free (path); + } + g_dir_close (dir); + + return g_test_run (); +} |