diff options
author | Daiki Ueno <ueno@gnu.org> | 2015-05-21 13:03:50 +0900 |
---|---|---|
committer | Daiki Ueno <ueno@gnu.org> | 2015-05-22 16:28:26 +0900 |
commit | c95aacaa91bae90038715fed8b3bb365907c0783 (patch) | |
tree | 7996c1459c74b9f5f0473fbc5d4b7a086d802517 | |
parent | e4863193c05ee219079e3adbc04bfe4c068c57c3 (diff) | |
download | gettext-wip/ueno/cldr-plural.tar.gz |
gettext-tools: Add a new utility cldr-pluralwip/ueno/cldr-plural
* Makefile.am (noinst_PROGRAMS): Add cldr-plural.
(install-exec-local): Install cldr-plural in $(libexecdir).
(cldr_plural_SOURCES): New variable.
(cldr_plural_LDADD): New variable.
* cldr-plural-exp.h: New file.
* cldr-plural-exp.c: New file.
* cldr-plural.y: New file.
-rw-r--r-- | gettext-tools/src/Makefile.am | 6 | ||||
-rw-r--r-- | gettext-tools/src/cldr-plural-exp.c | 666 | ||||
-rw-r--r-- | gettext-tools/src/cldr-plural-exp.h | 133 | ||||
-rw-r--r-- | gettext-tools/src/cldr-plural.y | 465 |
4 files changed, 1269 insertions, 1 deletions
diff --git a/gettext-tools/src/Makefile.am b/gettext-tools/src/Makefile.am index 9a23be0c5..715e39e54 100644 --- a/gettext-tools/src/Makefile.am +++ b/gettext-tools/src/Makefile.am @@ -29,7 +29,7 @@ msgcmp msgfmt msgmerge msgunfmt xgettext \ msgattrib msgcat msgcomm msgconv msgen msgexec msgfilter msggrep msginit msguniq \ recode-sr-latin -noinst_PROGRAMS = hostname urlget +noinst_PROGRAMS = hostname urlget cldr-plural lib_LTLIBRARIES = libgettextsrc.la @@ -236,6 +236,9 @@ endif recode_sr_latin_SOURCES = recode-sr-latin.c filter-sr-latin.c hostname_SOURCES = hostname.c urlget_SOURCES = urlget.c +cldr_plural_SOURCES = cldr-plural-exp.c cldr-plural.y +# FIXME: Change @MSGMERGE_LIBM@ to a better name. +cldr_plural_LDADD = $(LDADD) @MSGMERGE_LIBM@ # How to build libgettextsrc.la. # Need ../gnulib-lib/libgettextlib.la. @@ -464,6 +467,7 @@ install-exec-local: $(MKDIR_P) $(DESTDIR)$(pkglibdir) $(INSTALL_PROGRAM_ENV) $(LIBTOOL) --mode=install $(INSTALL_PROGRAM) hostname$(EXEEXT) $(DESTDIR)$(pkglibdir)/hostname$(EXEEXT) $(INSTALL_PROGRAM_ENV) $(LIBTOOL) --mode=install $(INSTALL_PROGRAM) urlget$(EXEEXT) $(DESTDIR)$(pkglibdir)/urlget$(EXEEXT) + $(INSTALL_PROGRAM_ENV) $(LIBTOOL) --mode=install $(INSTALL_PROGRAM) cldr-plural$(EXEEXT) $(DESTDIR)$(pkglibdir)/cldr-plural$(EXEEXT) $(INSTALL_SCRIPT) user-email $(DESTDIR)$(pkglibdir)/user-email $(INSTALL_SCRIPT) $(srcdir)/project-id $(DESTDIR)$(pkglibdir)/project-id diff --git a/gettext-tools/src/cldr-plural-exp.c b/gettext-tools/src/cldr-plural-exp.c new file mode 100644 index 000000000..7f56d233c --- /dev/null +++ b/gettext-tools/src/cldr-plural-exp.c @@ -0,0 +1,666 @@ +/* Unicode CLDR plural rule parser and converter + Copyright (C) 2015 Free Software Foundation, Inc. + + This file was written by Daiki Ueno <ueno@gnu.org>, 2015. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include "unistr.h" +#include "xalloc.h" +#include <math.h> + +#include "cldr-plural-exp.h" +#include "cldr-plural.h" + +/* The grammar of Unicode CLDR plural rules is defined at: + http://unicode.org/reports/tr35/tr35-numbers.html#Plural_rules_syntax + + This implementation only supports the "preferred" form, which + doesn't support obsolete keywords "in", "is", "not", and "within". + + Unlike gettext, CLDR allows an unsigned decimal value as an + operand, in addition to unsigned integers. For simplicity, we + treat decimal relations as if it has a constant truth value. + + The implementation is largely based on the idea of Michele Locati's + cldr-to-gettext-plural-rules: + https://github.com/mlocati/cldr-to-gettext-plural-rules */ + +void +cldr_plural_range_free (struct cldr_plural_range_ty *range) +{ + if (range->start != range->end) + free (range->start); + free (range->end); + free (range); +} + +void +cldr_plural_range_list_free (struct cldr_plural_range_list_ty *ranges) +{ + while (ranges->nitems-- > 0) + cldr_plural_range_free (ranges->items[ranges->nitems]); + free (ranges->items); + free (ranges); +} + +void +cldr_plural_condition_free (struct cldr_plural_condition_ty *condition) +{ + if (condition->type == CLDR_PLURAL_CONDITION_AND + || condition->type == CLDR_PLURAL_CONDITION_OR) + { + cldr_plural_condition_free (condition->value.conditions[0]); + cldr_plural_condition_free (condition->value.conditions[1]); + } + else if (condition->type == CLDR_PLURAL_CONDITION_RELATION) + cldr_plural_relation_free (condition->value.relation); + free (condition); +} + +void +cldr_plural_relation_free (struct cldr_plural_relation_ty *relation) +{ + free (relation->expression); + cldr_plural_range_list_free (relation->ranges); + free (relation); +} + +static void +cldr_plural_rule_free (struct cldr_plural_rule_ty *rule) +{ + free (rule->name); + cldr_plural_condition_free (rule->condition); + free (rule); +} + +static void +cldr_plural_rule_list_free (struct cldr_plural_rule_list_ty *rules) +{ + while (rules->nitems-- > 0) + cldr_plural_rule_free (rules->items[rules->nitems]); + free (rules->items); + free (rules); +} + +static struct cldr_plural_rule_list_ty * +cldr_plural_parse (const char *input) +{ + struct cldr_plural_parse_args arg; + + memset (&arg, 0, sizeof (struct cldr_plural_parse_args)); + arg.cp = input; + arg.cp_end = input + strlen (input); + arg.result = XMALLOC (struct cldr_plural_rule_list_ty); + memset (arg.result, 0, sizeof (struct cldr_plural_rule_list_ty)); + + if (yyparse (&arg) != 0) + return NULL; + + return arg.result; +} + +#define OPERAND_ZERO_P(o) \ + (((o)->type == CLDR_PLURAL_OPERAND_INTEGER \ + && (o)->value.ival == 0) \ + || ((o)->type == CLDR_PLURAL_OPERAND_DECIMAL \ + && (o)->value.dval.d == 0)) + +static enum cldr_plural_condition +eval_relation (struct cldr_plural_relation_ty *relation) +{ + switch (relation->expression->operand) + { + case 'n': case 'i': + { + /* Coerce decimal values in ranges into integers. */ + size_t i; + for (i = 0; i < relation->ranges->nitems; i++) + { + struct cldr_plural_range_ty *range = relation->ranges->items[i]; + if (range->start->type == CLDR_PLURAL_OPERAND_DECIMAL) + { + range->start->type = CLDR_PLURAL_OPERAND_INTEGER; + range->start->value.ival = ceil (range->start->value.dval.d); + } + if (range->end->type == CLDR_PLURAL_OPERAND_DECIMAL) + { + range->end->type = CLDR_PLURAL_OPERAND_INTEGER; + range->end->value.ival = floor (range->end->value.dval.d); + } + } + relation->expression->operand = 'i'; + } + break; + case 'f': case 't': + case 'v': case 'w': + { + /* Since plural expression in gettext only supports unsigned + integer, turn relations whose operand is either 'f', 't', + 'v', or 'w' into a constant truth value. */ + /* FIXME: check mod? */ + size_t i; + for (i = 0; i < relation->ranges->nitems; i++) + { + struct cldr_plural_range_ty *range = relation->ranges->items[i]; + if ((relation->type == CLDR_PLURAL_RELATION_EQUAL + && (!OPERAND_ZERO_P (range->start) + || !OPERAND_ZERO_P (range->end))) + || (relation->type == CLDR_PLURAL_RELATION_NOT_EQUAL + && (OPERAND_ZERO_P (range->start) + || OPERAND_ZERO_P (range->end)))) + return CLDR_PLURAL_CONDITION_FALSE; + } + return CLDR_PLURAL_CONDITION_TRUE; + } + break; + } + return CLDR_PLURAL_CONDITION_RELATION; +} + +static void +eval_condition (struct cldr_plural_condition_ty *condition) +{ + if (condition->type == CLDR_PLURAL_CONDITION_AND) + { + eval_condition (condition->value.conditions[0]); + eval_condition (condition->value.conditions[1]); + + if (condition->value.conditions[0]->type + == CLDR_PLURAL_CONDITION_FALSE + || condition->value.conditions[1]->type + == CLDR_PLURAL_CONDITION_FALSE) + { + cldr_plural_condition_free (condition->value.conditions[0]); + cldr_plural_condition_free (condition->value.conditions[1]); + condition->type = CLDR_PLURAL_CONDITION_FALSE; + } + else if (condition->value.conditions[0]->type + == CLDR_PLURAL_CONDITION_TRUE + && condition->value.conditions[1]->type + == CLDR_PLURAL_CONDITION_TRUE) + { + cldr_plural_condition_free (condition->value.conditions[0]); + cldr_plural_condition_free (condition->value.conditions[1]); + condition->type = CLDR_PLURAL_CONDITION_TRUE; + } + else if (condition->value.conditions[0]->type + == CLDR_PLURAL_CONDITION_TRUE) + { + struct cldr_plural_condition_ty *original + = condition->value.conditions[1]; + cldr_plural_condition_free (condition->value.conditions[0]); + condition->type = condition->value.conditions[1]->type; + condition->value = condition->value.conditions[1]->value; + free (original); + } + else if (condition->value.conditions[1]->type + == CLDR_PLURAL_CONDITION_TRUE) + { + struct cldr_plural_condition_ty *original + = condition->value.conditions[0]; + cldr_plural_condition_free (condition->value.conditions[1]); + condition->type = condition->value.conditions[0]->type; + condition->value = condition->value.conditions[0]->value; + free (original); + } + } + else if (condition->type == CLDR_PLURAL_CONDITION_OR) + { + eval_condition (condition->value.conditions[0]); + eval_condition (condition->value.conditions[1]); + + if (condition->value.conditions[0]->type + == CLDR_PLURAL_CONDITION_TRUE + || condition->value.conditions[1]->type + == CLDR_PLURAL_CONDITION_TRUE) + { + cldr_plural_condition_free (condition->value.conditions[0]); + cldr_plural_condition_free (condition->value.conditions[1]); + condition->type = CLDR_PLURAL_CONDITION_TRUE; + } + else if (condition->value.conditions[0]->type + == CLDR_PLURAL_CONDITION_FALSE + && condition->value.conditions[1]->type + == CLDR_PLURAL_CONDITION_FALSE) + { + cldr_plural_condition_free (condition->value.conditions[0]); + cldr_plural_condition_free (condition->value.conditions[1]); + condition->type = CLDR_PLURAL_CONDITION_FALSE; + } + else if (condition->value.conditions[0]->type + == CLDR_PLURAL_CONDITION_FALSE) + { + struct cldr_plural_condition_ty *original + = condition->value.conditions[1]; + cldr_plural_condition_free (condition->value.conditions[0]); + condition->type = condition->value.conditions[1]->type; + condition->value = condition->value.conditions[1]->value; + free (original); + } + else if (condition->value.conditions[1]->type + == CLDR_PLURAL_CONDITION_FALSE) + { + struct cldr_plural_condition_ty *original + = condition->value.conditions[0]; + cldr_plural_condition_free (condition->value.conditions[1]); + condition->type = condition->value.conditions[0]->type; + condition->value = condition->value.conditions[0]->value; + free (original); + } + } + else + { + enum cldr_plural_condition value = + eval_relation (condition->value.relation); + if (value == CLDR_PLURAL_CONDITION_TRUE + || value == CLDR_PLURAL_CONDITION_FALSE) + { + cldr_plural_relation_free (condition->value.relation); + condition->type = value; + } + } +} + +#define MAX(a,b) ((a) > (b) ? (a) : (b)) + +static int +find_largest_modulus (struct cldr_plural_condition_ty *condition) +{ + if (condition->type == CLDR_PLURAL_CONDITION_AND + || condition->type == CLDR_PLURAL_CONDITION_OR) + { + int modulus0 = + find_largest_modulus (condition->value.conditions[0]); + int modulus1 = + find_largest_modulus (condition->value.conditions[1]); + return MAX (modulus0, modulus1); + } + else if (condition->type == CLDR_PLURAL_CONDITION_RELATION) + return condition->value.relation->expression->mod; + else + return 0; +} + +static bool +apply_condition (struct cldr_plural_condition_ty *condition, int value) +{ + if (condition->type == CLDR_PLURAL_CONDITION_AND) + return apply_condition (condition->value.conditions[0], value) + && apply_condition (condition->value.conditions[1], value); + else if (condition->type == CLDR_PLURAL_CONDITION_OR) + return apply_condition (condition->value.conditions[0], value) + || apply_condition (condition->value.conditions[1], value); + else if (condition->type == CLDR_PLURAL_CONDITION_RELATION) + { + struct cldr_plural_relation_ty *relation + = condition->value.relation; + int number = value; + size_t i; + + if (relation->expression->mod > 0) + number %= relation->expression->mod; + for (i = 0; i < relation->ranges->nitems; i++) + { + struct cldr_plural_range_ty *range = relation->ranges->items[i]; + if (range->start->value.ival <= number + && number <= range->end->value.ival) + return relation->type == CLDR_PLURAL_RELATION_EQUAL; + } + return relation->type != CLDR_PLURAL_RELATION_EQUAL; + } + return false; +} + +static void +print_expression (struct cldr_plural_expression_ty *expression, bool space) +{ + if (expression->mod == 0) + printf ("n"); + else + printf (space ? "n %% %d" : "n%%%d", expression->mod); +} + +static void +print_relation (struct cldr_plural_relation_ty *relation, + enum cldr_plural_condition parent, bool space) +{ + if (relation->type == CLDR_PLURAL_RELATION_EQUAL) + { + size_t i; + if (parent == CLDR_PLURAL_CONDITION_AND + && relation->ranges->nitems > 1) + putchar ('('); + for (i = 0; i < relation->ranges->nitems; i++) + { + struct cldr_plural_range_ty *range = relation->ranges->items[i]; + if (i > 0) + printf (" || "); + if (range->start->value.ival == range->end->value.ival) + { + print_expression (relation->expression, space); + printf (space && relation->ranges->nitems == 1 + ? " == %d" : "==%d", + range->start->value.ival); + } + else if (range->start->value.ival == 0) + { + print_expression (relation->expression, false); + printf ("<=%d", range->end->value.ival); + } + else + { + if (parent == CLDR_PLURAL_CONDITION_OR + || relation->ranges->nitems > 1) + putchar ('('); + print_expression (relation->expression, false); + printf (">=%d", range->start->value.ival); + printf (" && "); + print_expression (relation->expression, false); + printf ("<=%d", range->end->value.ival); + if (parent == CLDR_PLURAL_CONDITION_OR + || relation->ranges->nitems > 1) + putchar (')'); + } + } + if (parent == CLDR_PLURAL_CONDITION_AND + && relation->ranges->nitems > 1) + putchar (')'); + } + else + { + size_t i; + if (parent == CLDR_PLURAL_CONDITION_OR + && relation->ranges->nitems > 1) + putchar ('('); + for (i = 0; i < relation->ranges->nitems; i++) + { + struct cldr_plural_range_ty *range = relation->ranges->items[i]; + if (i > 0) + printf (" && "); + if (range->start->value.ival == range->end->value.ival) + { + print_expression (relation->expression, space); + printf (space && relation->ranges->nitems == 1 + ? " != %d" : "!=%d", range->start->value.ival); + } + else if (range->start->value.ival == 0) + { + print_expression (relation->expression, false); + printf (">%d", range->end->value.ival); + } + else + { + if (parent == CLDR_PLURAL_CONDITION_AND + || relation->ranges->nitems > 1) + putchar ('('); + print_expression (relation->expression, false); + printf ("<%d", range->start->value.ival); + printf (" || "); + print_expression (relation->expression, false); + printf (">%d", range->end->value.ival); + if (parent == CLDR_PLURAL_CONDITION_AND + || relation->ranges->nitems > 1) + putchar (')'); + } + } + if (parent == CLDR_PLURAL_CONDITION_OR + && relation->ranges->nitems > 1) + putchar (')'); + } +} + +static bool +print_condition (struct cldr_plural_condition_ty *condition, + enum cldr_plural_condition parent, bool space) +{ + if (condition->type == CLDR_PLURAL_CONDITION_AND) + { + if (parent == CLDR_PLURAL_CONDITION_OR) + putchar ('('); + print_condition (condition->value.conditions[0], + CLDR_PLURAL_CONDITION_AND, false); + printf (" && "); + print_condition (condition->value.conditions[1], + CLDR_PLURAL_CONDITION_AND, false); + if (parent == CLDR_PLURAL_CONDITION_OR) + putchar (')'); + return true; + } + else if (condition->type == CLDR_PLURAL_CONDITION_OR) + { + if (parent == CLDR_PLURAL_CONDITION_AND) + putchar ('('); + print_condition (condition->value.conditions[0], + CLDR_PLURAL_CONDITION_OR, false); + printf (" || "); + print_condition (condition->value.conditions[1], + CLDR_PLURAL_CONDITION_OR, false); + if (parent == CLDR_PLURAL_CONDITION_AND) + putchar (')'); + return true; + } + else if (condition->type == CLDR_PLURAL_CONDITION_RELATION) + { + print_relation (condition->value.relation, parent, space); + return true; + } + return false; +} + +#define RULE_PRINTABLE_P(r) \ + ((r)->condition->type != CLDR_PLURAL_CONDITION_TRUE \ + && (r)->condition->type != CLDR_PLURAL_CONDITION_FALSE) + +/* Convert n == N into n != N. */ +static bool +print_condition_negation (struct cldr_plural_condition_ty *condition) +{ + if (condition->type == CLDR_PLURAL_CONDITION_RELATION + && condition->value.relation->type == CLDR_PLURAL_RELATION_EQUAL + && condition->value.relation->ranges->nitems == 1 + && condition->value.relation->ranges->items[0]->start + == condition->value.relation->ranges->items[0]->end) + { + printf ("nplurals=2; plural=(n != %d);\n", + condition->value.relation->ranges->items[0]->start->value.ival); + return true; + } + return false; +} + +/* Convert n == 0,...,N into n > N. */ +static bool +print_condition_greater (struct cldr_plural_condition_ty *condition) +{ + if (condition->type == CLDR_PLURAL_CONDITION_RELATION + && condition->value.relation->type == CLDR_PLURAL_RELATION_EQUAL) + { + int last = -1; + size_t i; + for (i = 0; i < condition->value.relation->ranges->nitems; i++) + { + struct cldr_plural_range_ty *range = + condition->value.relation->ranges->items[i]; + if (range->start->type != CLDR_PLURAL_OPERAND_INTEGER + || range->end->type != CLDR_PLURAL_OPERAND_INTEGER + || range->start->value.ival != last + 1) + break; + last = range->end->value.ival; + } + if (i == condition->value.relation->ranges->nitems) + { + struct cldr_plural_range_ty *range = + condition->value.relation->ranges->items[i - 1]; + printf ("nplurals=2; plural=(n > %d);\n", + range->end->value.ival); + return true; + } + } + return false; +} + +typedef bool (*print_condition_function_ty) (struct cldr_plural_condition_ty *); +static print_condition_function_ty print_condition_functions[] = + { + print_condition_negation, + print_condition_greater + }; + +#define SIZEOF(a) (sizeof(a) / sizeof(a[0])) + +static void +process_rule_list (struct cldr_plural_rule_list_ty *rules) +{ + size_t i; + size_t count; + size_t nplurals; + int modulus_max = 0; + + /* Prune trivial conditions. */ + for (i = 0; i < rules->nitems; i++) + { + struct cldr_plural_rule_ty *rule = rules->items[i]; + eval_condition (rule->condition); + } + + /* Find the largest modulus. */ + for (i = 0; i < rules->nitems; i++) + { + struct cldr_plural_rule_ty *rule = rules->items[i]; + int modulus = find_largest_modulus (rule->condition); + if (modulus > modulus_max) + modulus_max = modulus; + } + + if (modulus_max > 0) + { + bool *values = XNMALLOC (modulus_max, bool); + + memset (values, 0, sizeof (bool) * modulus_max); + for (i = 0; i < rules->nitems; i++) + { + struct cldr_plural_rule_ty *rule = rules->items[i]; + int j; + + for (j = 0; j < modulus_max; j++) + { + bool result = apply_condition (rule->condition, j + 1); + if (result) + values[j] = true; + } + + /* Check if all bits are set. Then we can omit one more rule. */ + for (j = 0; j < modulus_max; j++) + if (values[j] == false) + break; + if (j == modulus_max) + break; + } + + free (values); + + while (i < rules->nitems) + cldr_plural_rule_free (rules->items[--rules->nitems]); + } + + for (i = 0, nplurals = 1; i < rules->nitems; i++) + if (RULE_PRINTABLE_P (rules->items[i])) + nplurals++; + + /* Special case when rules is empty. */ + if (nplurals == 1) + { + printf ("nplurals=1; plural=0;\n"); + return; + } + + /* If we have only one printable rule, apply some heuristics. */ + if (nplurals == 2) + { + struct cldr_plural_condition_ty *condition; + size_t j; + + for (j = 0; j < rules->nitems; j++) + if (RULE_PRINTABLE_P (rules->items[j])) + break; + + condition = rules->items[j]->condition; + for (j = 0; j < SIZEOF (print_condition_functions); j++) + if (print_condition_functions[j] (condition)) + return; + } + + /* If there are more printable rules, build a tertiary operator. */ + printf ("nplurals=%zu; plural=(", nplurals); + for (i = 0, count = 0; i < rules->nitems; i++) + { + struct cldr_plural_rule_ty *rule = rules->items[i]; + if (print_condition (rule->condition, + CLDR_PLURAL_CONDITION_FALSE, + nplurals == 2) + && rules->nitems > 1) + { + bool printable_left = false; + size_t j; + + for (j = i + 1; j < rules->nitems; j++) + if (RULE_PRINTABLE_P (rules->items[j])) + printable_left = true; + + if (i < rules->nitems - 1 && printable_left) + printf (" ? %zu : ", count++); + } + } + if (rules->nitems > 1) + printf (" ? %zu : %zu", count, count + 1); + printf (");\n"); +} + +int +main (int argc, char **argv) +{ + char *line = NULL; + size_t line_size = 0; + + for (;;) + { + int line_len; + struct cldr_plural_rule_list_ty *result; + + line_len = getline (&line, &line_size, stdin); + if (line_len < 0) + break; + if (line_len > 0 && line[line_len - 1] == '\n') + line[--line_len] = '\0'; + + result = cldr_plural_parse (line); + if (result) + { + process_rule_list (result); + cldr_plural_rule_list_free (result); + } + } + + free (line); + return 0; +} diff --git a/gettext-tools/src/cldr-plural-exp.h b/gettext-tools/src/cldr-plural-exp.h new file mode 100644 index 000000000..1c0c70c1a --- /dev/null +++ b/gettext-tools/src/cldr-plural-exp.h @@ -0,0 +1,133 @@ +/* Unicode CLDR plural rule parser and converter + Copyright (C) 2015 Free Software Foundation, Inc. + + This file was written by Daiki Ueno <ueno@gnu.org>, 2015. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#ifndef _CLDR_PLURAL_EXP_H +#define _CLDR_PLURAL_EXP_H 1 + +#ifdef __cplusplus +extern "C" { +#endif + +enum cldr_plural_operand + { + CLDR_PLURAL_OPERAND_INTEGER, + CLDR_PLURAL_OPERAND_DECIMAL + }; + +struct cldr_plural_operand_ty +{ + enum cldr_plural_operand type; + union + { + int ival; + struct + { + double d; + int nfractions; + } dval; + } value; +}; + +enum cldr_plural_relation + { + CLDR_PLURAL_RELATION_EQUAL, + CLDR_PLURAL_RELATION_NOT_EQUAL + }; + +struct cldr_plural_range_ty +{ + struct cldr_plural_operand_ty *start; + struct cldr_plural_operand_ty *end; +}; + +struct cldr_plural_range_list_ty +{ + struct cldr_plural_range_ty **items; + size_t nitems; + size_t nitems_max; +}; + +struct cldr_plural_expression_ty +{ + /* 'n', 'i', 'f', 't', 'v', 'w' */ + int operand; + + /* 0 if not given */ + int mod; +}; + +struct cldr_plural_relation_ty +{ + struct cldr_plural_expression_ty *expression; + enum cldr_plural_relation type; + struct cldr_plural_range_list_ty *ranges; +}; + +enum cldr_plural_condition + { + CLDR_PLURAL_CONDITION_AND, + CLDR_PLURAL_CONDITION_OR, + CLDR_PLURAL_CONDITION_RELATION, + CLDR_PLURAL_CONDITION_TRUE, + CLDR_PLURAL_CONDITION_FALSE + }; + +struct cldr_plural_condition_ty +{ + enum cldr_plural_condition type; + union + { + struct cldr_plural_relation_ty *relation; + struct cldr_plural_condition_ty *conditions[2]; + } value; +}; + +struct cldr_plural_rule_ty +{ + char *name; + struct cldr_plural_condition_ty *condition; +}; + +struct cldr_plural_rule_list_ty +{ + struct cldr_plural_rule_ty **items; + size_t nitems; + size_t nitems_max; +}; + +struct cldr_plural_parse_args +{ + const char *cp; + const char *cp_end; + struct cldr_plural_rule_list_ty *result; +}; + +extern void +cldr_plural_range_free (struct cldr_plural_range_ty *range); +extern void +cldr_plural_range_list_free (struct cldr_plural_range_list_ty *ranges); +extern void +cldr_plural_condition_free (struct cldr_plural_condition_ty *condition); +extern void +cldr_plural_relation_free (struct cldr_plural_relation_ty *relation); + +#ifdef __cplusplus +} +#endif + +#endif /* _CLDR_PLURAL_EXP_H */ diff --git a/gettext-tools/src/cldr-plural.y b/gettext-tools/src/cldr-plural.y new file mode 100644 index 000000000..9db4a67fc --- /dev/null +++ b/gettext-tools/src/cldr-plural.y @@ -0,0 +1,465 @@ +/* Unicode CLDR plural rule parser and converter + Copyright (C) 2015 Free Software Foundation, Inc. + + This file was written by Daiki Ueno <ueno@gnu.org>, 2015. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +%{ +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include "unistr.h" +#include "xalloc.h" + +#include "cldr-plural-exp.h" +#include "cldr-plural.h" + +/* Prototypes for local functions. */ +static int yylex (YYSTYPE *lval, struct cldr_plural_parse_args *arg); +static void yyerror (struct cldr_plural_parse_args *arg, const char *str); + +/* Allocation of expressions. */ + +static struct cldr_plural_rule_ty * +new_rule (char *name, struct cldr_plural_condition_ty *condition) +{ + struct cldr_plural_rule_ty *result = + XMALLOC (struct cldr_plural_rule_ty); + result->name = name; + result->condition = condition; + return result; +} + +static struct cldr_plural_condition_ty * +new_leaf_condition (struct cldr_plural_relation_ty *relation) +{ + struct cldr_plural_condition_ty *result = + XMALLOC (struct cldr_plural_condition_ty); + result->type = CLDR_PLURAL_CONDITION_RELATION; + result->value.relation = relation; + return result; +} + +static struct cldr_plural_condition_ty * +new_branch_condition (enum cldr_plural_condition type, + struct cldr_plural_condition_ty *condition0, + struct cldr_plural_condition_ty *condition1) +{ + struct cldr_plural_condition_ty *result = + XMALLOC (struct cldr_plural_condition_ty); + result->type = type; + result->value.conditions[0] = condition0; + result->value.conditions[1] = condition1; + return result; +} + +static struct cldr_plural_relation_ty * +new_relation (struct cldr_plural_expression_ty *expression, + enum cldr_plural_relation type, + struct cldr_plural_range_list_ty *ranges) +{ + struct cldr_plural_relation_ty *result = + XMALLOC (struct cldr_plural_relation_ty); + result->expression = expression; + result->type = type; + result->ranges = ranges; + return result; +} + +static struct cldr_plural_expression_ty * +new_expression (int operand, int mod) +{ + struct cldr_plural_expression_ty *result = + XMALLOC (struct cldr_plural_expression_ty); + result->operand = operand; + result->mod = mod; + return result; +} + +static struct cldr_plural_range_list_ty * +add_range (struct cldr_plural_range_list_ty *ranges, + struct cldr_plural_range_ty *range) +{ + if (ranges->nitems == ranges->nitems_max) + { + ranges->nitems_max = ranges->nitems_max * 2 + 1; + ranges->items = xrealloc (ranges->items, + sizeof (struct cldr_plural_range_ty *) + * ranges->nitems_max); + } + ranges->items[ranges->nitems++] = range; + return ranges; +} + +static struct cldr_plural_range_ty * +new_range (struct cldr_plural_operand_ty *start, + struct cldr_plural_operand_ty *end) +{ + struct cldr_plural_range_ty *result = + XMALLOC (struct cldr_plural_range_ty); + result->start = start; + result->end = end; + return result; +} +%} + +%parse-param {struct cldr_plural_parse_args *arg} +%lex-param {struct cldr_plural_parse_args *arg} +%define api.pure full + +%union { + char *sval; + struct cldr_plural_condition_ty *cval; + struct cldr_plural_relation_ty *lval; + struct cldr_plural_expression_ty *eval; + struct cldr_plural_range_ty *gval; + struct cldr_plural_operand_ty *oval; + struct cldr_plural_range_list_ty *rval; + int ival; +} + +%destructor { free ($$); } <sval> +%destructor { cldr_plural_condition_free ($$); } <cval> +%destructor { cldr_plural_relation_free ($$); } <lval> +%destructor { free ($$); } <eval> +%destructor { cldr_plural_range_free ($$); } <gval> +%destructor { free ($$); } <oval> +%destructor { cldr_plural_range_list_free ($$); } <rval> +%destructor { } <ival> + +%token AND OR RANGE ELLIPSIS OTHER AT_INTEGER AT_DECIMAL +%token<sval> KEYWORD +%token<oval> INTEGER DECIMAL +%token<ival> OPERAND +%type<cval> condition and_condition +%type<lval> relation +%type<eval> expression +%type<gval> range range_or_integer +%type<rval> range_list + +%% + +rules: rule + | rules ';' rule + ; + +rule: KEYWORD ':' condition samples + { + struct cldr_plural_rule_ty *rule = new_rule ($1, $3); + struct cldr_plural_rule_list_ty *result = arg->result; + if (result->nitems == result->nitems_max) + { + result->nitems_max = result->nitems_max * 2 + 1; + result->items = xrealloc (result->items, + sizeof (struct cldr_plural_rule_ty *) + * result->nitems_max); + } + result->items[result->nitems++] = rule; + } + | OTHER ':' samples + ; + +condition: and_condition + { + $$ = $1; + } + | condition OR and_condition + { + $$ = new_branch_condition (CLDR_PLURAL_CONDITION_OR, $1, $3); + } + ; + +and_condition: relation + { + $$ = new_leaf_condition ($1); + } + | and_condition AND relation + { + $$ = new_branch_condition (CLDR_PLURAL_CONDITION_AND, + $1, + new_leaf_condition ($3)); + } + ; + +relation: expression '=' range_list + { + $$ = new_relation ($1, CLDR_PLURAL_RELATION_EQUAL, $3); + } + | expression '!' range_list + { + $$ = new_relation ($1, CLDR_PLURAL_RELATION_NOT_EQUAL, $3); + } + ; + +expression: OPERAND + { + $$ = new_expression ($1, 0); + } + | OPERAND '%' INTEGER + { + $$ = new_expression ($1, $3->value.ival); + } + ; + +range_list: range_or_integer + { + struct cldr_plural_range_list_ty *ranges = + XMALLOC (struct cldr_plural_range_list_ty); + memset (ranges, 0, sizeof (struct cldr_plural_range_list_ty)); + $$ = add_range (ranges, $1); + } + | range_list ',' range_or_integer + { + $$ = add_range ($1, $3); + } + ; + +range_or_integer: range + { + $$ = $1; + } + | INTEGER + { + $$ = new_range ($1, $1); + } + ; + +range: INTEGER RANGE INTEGER + { + $$ = new_range ($1, $3); + } + ; + +/* FIXME: collect samples */ +samples: at_integer at_decimal + ; + +at_integer: %empty + | AT_INTEGER sample_list + ; + +at_decimal: %empty + | AT_DECIMAL sample_list + ; + +sample_list: sample_list1 sample_ellipsis + ; +sample_list1: sample_range + | sample_list1 ',' sample_range + ; +sample_ellipsis: %empty + | ',' ELLIPSIS + ; + +sample_range: DECIMAL + | DECIMAL '~' DECIMAL + | INTEGER + | INTEGER '~' INTEGER + ; + +%% + +static int +yylex (YYSTYPE *lval, struct cldr_plural_parse_args *arg) +{ + const char *exp = arg->cp; + ucs4_t uc; + int length; + int result; + static char *buffer; + static size_t bufmax; + size_t bufpos; + + while (1) + { + if (exp[0] == '\0') + { + arg->cp = exp; + return YYEOF; + } + + if (exp[0] != ' ' && exp[0] != '\t') + break; + + ++exp; + } + + length = u8_mbtouc (&uc, (const uint8_t *) exp, arg->cp_end - exp); + if (uc == 0x2026) + { + arg->cp = exp + length; + return ELLIPSIS; + } + else if (strncmp ("...", exp, 3) == 0) + { + arg->cp = exp + 3; + return ELLIPSIS; + } + else if (strncmp ("..", exp, 2) == 0) + { + arg->cp = exp + 2; + return RANGE; + } + else if (strncmp ("other", exp, 5) == 0) + { + arg->cp = exp + 5; + return OTHER; + } + else if (strncmp ("@integer", exp, 8) == 0) + { + arg->cp = exp + 8; + return AT_INTEGER; + } + else if (strncmp ("@decimal", exp, 8) == 0) + { + arg->cp = exp + 8; + return AT_DECIMAL; + } + + result = *exp++; + switch (result) + { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + { + unsigned long int ival = result - '0'; + + while (exp[0] >= '0' && exp[0] <= '9') + { + ival *= 10; + ival += exp[0] - '0'; + ++exp; + } + + lval->oval = XMALLOC (struct cldr_plural_operand_ty); + if (exp[0] == '.' && exp[1] >= '0' && exp[1] <= '9') + { + double dval = ival; + int denominator = 10, nfractions = 0; + ++exp; + while (exp[0] >= '0' && exp[0] <= '9') + { + dval += (exp[0] - '0') / (double) denominator; + denominator *= 10; + ++nfractions; + ++exp; + } + lval->oval->type = CLDR_PLURAL_OPERAND_DECIMAL; + lval->oval->value.dval.d = dval; + lval->oval->value.dval.nfractions = nfractions; + result = DECIMAL; + } + else + { + lval->oval->type = CLDR_PLURAL_OPERAND_INTEGER; + lval->oval->value.ival = ival; + result = INTEGER; + } + } + break; + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': + case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': + case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': + case 'v': case 'w': case 'x': case 'y': case 'z': + bufpos = 0; + for (;;) + { + if (bufpos >= bufmax) + { + bufmax = 2 * bufmax + 10; + buffer = xrealloc (buffer, bufmax); + } + buffer[bufpos++] = result; + result = *exp; + switch (result) + { + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': case 'g': case 'h': case 'i': case 'j': + case 'k': case 'l': case 'm': case 'n': case 'o': + case 'p': case 'q': case 'r': case 's': case 't': + case 'u': case 'v': case 'w': case 'x': case 'y': + case 'z': + ++exp; + continue; + default: + break; + } + break; + } + + if (bufpos >= bufmax) + { + bufmax = 2 * bufmax + 10; + buffer = xrealloc (buffer, bufmax); + } + buffer[bufpos] = '\0'; + + /* Operands. */ + if (bufpos == 1) + { + switch (buffer[0]) + { + case 'n': case 'i': case 'f': case 't': case 'v': case 'w': + arg->cp = exp; + lval->ival = buffer[0]; + return OPERAND; + default: + break; + } + } + + /* Keywords. */ + if (strcmp (buffer, "and") == 0) + { + arg->cp = exp; + return AND; + } + else if (strcmp (buffer, "or") == 0) + { + arg->cp = exp; + return OR; + } + + lval->sval = xstrdup (buffer); + result = KEYWORD; + break; + case '!': + if (exp[0] == '=') + { + ++exp; + result = '!'; + } + else + result = YYERRCODE; + break; + default: + break; + } + + arg->cp = exp; + + return result; +} + +static void +yyerror (struct cldr_plural_parse_args *arg, char const *s) +{ + fprintf (stderr, "%s\n", s); +} |