summaryrefslogtreecommitdiff
path: root/locale/programs
diff options
context:
space:
mode:
Diffstat (limited to 'locale/programs')
-rw-r--r--locale/programs/charmap.c50
-rw-r--r--locale/programs/charset.c59
-rw-r--r--locale/programs/charset.h17
-rw-r--r--locale/programs/ld-collate.c16
-rw-r--r--locale/programs/ld-ctype.c38
-rw-r--r--locale/programs/linereader.c2
-rw-r--r--locale/programs/localedef.c17
-rw-r--r--locale/programs/locfile-kw.gperf3
-rw-r--r--locale/programs/locfile-kw.h160
-rw-r--r--locale/programs/locfile-token.h5
-rw-r--r--locale/programs/locfile.h3
-rw-r--r--locale/programs/repertoire.c323
-rw-r--r--locale/programs/repertoire.h38
-rw-r--r--locale/programs/stringtrans.c5
14 files changed, 565 insertions, 171 deletions
diff --git a/locale/programs/charmap.c b/locale/programs/charmap.c
index 0cd62fbfd4..7114a237a0 100644
--- a/locale/programs/charmap.c
+++ b/locale/programs/charmap.c
@@ -33,6 +33,8 @@
#include "error.h"
#include "linereader.h"
#include "charset.h"
+#include "locfile.h"
+#include "repertoire.h"
/* Uncomment following line for production version. */
@@ -209,6 +211,8 @@ parse_charmap (const char *filename)
memset (result, '\0', sizeof (struct charset_t));
/* The default DEFAULT_WIDTH is 1. */
result->width_default = 1;
+ /* Let the user overwrite the repertoire map we use. */
+ result->repertoiremap = repertoiremap;
#define obstack_chunk_alloc malloc
#define obstack_chunk_free free
@@ -265,6 +269,17 @@ parse_charmap (const char *filename)
lr_ignore_rest (cmfile, 1);
+ /* Read the repertoire map now. */
+ if (result->repertoiremap == NULL)
+ /* This is fatal. */
+ error (4, 0, _("no repertoire map specified: cannot proceed"));
+
+ result->repertoire = repertoire_read (result->repertoiremap);
+ if (result->repertoire == NULL)
+ /* This is also fatal. */
+ error (4, errno, _("cannot read repertoire map `%s'"),
+ result->repertoiremap);
+
state = 2;
continue;
}
@@ -273,7 +288,7 @@ parse_charmap (const char *filename)
&& nowtok != tok_mb_cur_min && nowtok != tok_escape_char
&& nowtok != tok_comment_char && nowtok != tok_g0esc
&& nowtok != tok_g1esc && nowtok != tok_g2esc
- && nowtok != tok_g3esc)
+ && nowtok != tok_g3esc && nowtok != tok_repertoiremap)
{
lr_error (cmfile, _("syntax error in prolog: %s"),
_("illegal definition"));
@@ -305,6 +320,18 @@ parse_charmap (const char *filename)
lr_ignore_rest (cmfile, 1);
continue;
+ case tok_repertoiremap:
+ if (arg->tok != tok_ident)
+ goto badarg;
+
+ if (result->repertoiremap == NULL)
+ result->repertoiremap = obstack_copy0 (&result->mem_pool,
+ arg->val.str.start,
+ arg->val.str.len);
+
+ lr_ignore_rest (cmfile, 1);
+ continue;
+
case tok_mb_cur_max:
case tok_mb_cur_min:
if (arg->tok != tok_number)
@@ -437,14 +464,14 @@ argument to <%s> must be a single character"),
continue;
}
- if (nowtok == tok_charcode)
- /* Write char value in table. */
- charset_new_char (cmfile, result, now->val.charcode.nbytes,
- now->val.charcode.val, from_name, to_name);
+ if (now->val.charcode.nbytes < result->mb_cur_min)
+ lr_error (cmfile, _("too few bytes in character encoding"));
+ else if (now->val.charcode.nbytes > result->mb_cur_max)
+ lr_error (cmfile, _("too many bytes in character encoding"));
else
- /* Determine ISO 10646 value and write into table. */
- charset_new_unicode (cmfile, result, now->val.charcode.nbytes,
- now->val.charcode.val, from_name, to_name);
+ charset_new_char (cmfile, &result->char_table,
+ now->val.charcode.nbytes,
+ now->val.charcode.val, from_name, to_name);
/* Ignore trailing comment silently. */
lr_ignore_rest (cmfile, 0);
@@ -466,8 +493,7 @@ argument to <%s> must be a single character"),
continue;
}
- /* If the previous line was not completely correct free the
- used memory. */
+ /* Copy the to-name in a safe place. */
to_name = (char *) obstack_copy0 (&result->mem_pool,
cmfile->token.val.str.start,
cmfile->token.val.str.len);
@@ -694,7 +720,7 @@ new_width (struct linereader *cmfile, struct charset_t *result,
{
unsigned int from_val, to_val;
- from_val = charset_find_value (result, from, strlen (from));
+ from_val = charset_find_value (&result->char_table, from, strlen (from));
if ((wchar_t) from_val == ILLEGAL_CHAR_VALUE)
{
lr_error (cmfile, _("unknown character `%s'"), from);
@@ -705,7 +731,7 @@ new_width (struct linereader *cmfile, struct charset_t *result,
to_val = from_val;
else
{
- to_val = charset_find_value (result, to, strlen (to));
+ to_val = charset_find_value (&result->char_table, to, strlen (to));
if ((wchar_t) to_val == ILLEGAL_CHAR_VALUE)
{
lr_error (cmfile, _("unknown character `%s'"), to);
diff --git a/locale/programs/charset.c b/locale/programs/charset.c
index fdacf25328..767fafb93a 100644
--- a/locale/programs/charset.c
+++ b/locale/programs/charset.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1996, 1997 Free Software Foundation, Inc.
+/* Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996.
@@ -23,6 +23,8 @@
#include <alloca.h>
#include <ctype.h>
+#include <errno.h>
+#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -31,49 +33,24 @@
#include "charset.h"
-static void
-insert_char (struct linereader *lr, struct charset_t *cs, int bytes,
- unsigned int value, const char *from, const char *to);
-
-
-void
-charset_new_char (struct linereader *lr, struct charset_t *cs, int bytes,
- unsigned int value, const char *from, const char *to)
-{
- if (bytes < cs->mb_cur_min)
- lr_error (lr, _("too few bytes in character encoding"));
- else if (bytes > cs->mb_cur_max)
- lr_error (lr, _("too many bytes in character encoding"));
- else
- insert_char (lr, cs, bytes, value, from, to);
-}
-
-
-void
-charset_new_unicode (struct linereader *lr, struct charset_t *cs, int bytes,
- unsigned int value, const char *from, const char *to)
-{
- /* For now: perhaps <Uxxxx> support will be removed again... */
- insert_char (lr, cs, bytes, value, from, to);
-}
-
-
unsigned int
-charset_find_value (const struct charset_t *cs, const char *name, size_t len)
+charset_find_value (const hash_table *ht, const char *name, size_t len)
{
void *result;
- if (find_entry ((hash_table *) &cs->char_table, name, len, &result) < 0)
+ if (find_entry ((hash_table *) ht, name, len, &result) < 0)
return ILLEGAL_CHAR_VALUE;
return (unsigned int) ((unsigned long int) result);
}
-static void
-insert_char (struct linereader *lr, struct charset_t *cs, int bytes,
- unsigned int value, const char *from, const char *to)
+void
+charset_new_char (struct linereader *lr, hash_table *ht, int bytes,
+ unsigned int value, const char *from, const char *to)
{
+ char *from_end;
+ char *to_end;
const char *cp;
char *buf;
int prefix_len, len1, len2;
@@ -81,7 +58,7 @@ insert_char (struct linereader *lr, struct charset_t *cs, int bytes,
if (to == NULL)
{
- if (insert_entry (&cs->char_table, from, strlen (from),
+ if (insert_entry (ht, from, strlen (from),
(void *) (unsigned long int) value)
< 0)
lr_error (lr, _("duplicate character name `%s'"), from);
@@ -111,8 +88,16 @@ insert_char (struct linereader *lr, struct charset_t *cs, int bytes,
if (cp == &from[len1 - 1] || strncmp (from, to, prefix_len) != 0)
goto illegal_range;
- from_nr = strtoul (&from[prefix_len], NULL, 10);
- to_nr = strtoul (&to[prefix_len], NULL, 10);
+ errno = 0;
+ from_nr = strtoul (&from[prefix_len], &from_end, 10);
+ if (*from_end != '\0' || (from_nr == ULONG_MAX && errno == ERANGE)
+ || ((to_nr = strtoul (&to[prefix_len], &to_end, 10)) == ULONG_MAX
+ && errno == ERANGE)
+ || *to_end != '\0')
+ {
+ lr_error (lr, _("<%s> and <%s> are illegal names for range"));
+ return;
+ }
if (from_nr > to_nr)
{
@@ -127,7 +112,7 @@ insert_char (struct linereader *lr, struct charset_t *cs, int bytes,
{
sprintf (&buf[prefix_len], "%0*d", len1 - prefix_len, cnt);
- if (insert_entry (&cs->char_table, buf, len1,
+ if (insert_entry (ht, buf, len1,
(void *) (unsigned long int) (value + (cnt - from_nr)))
< 0)
lr_error (lr, _("duplicate character name `%s'"), buf);
diff --git a/locale/programs/charset.h b/locale/programs/charset.h
index 82c4ef0c57..db93f16306 100644
--- a/locale/programs/charset.h
+++ b/locale/programs/charset.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 1996, 1997 Free Software Foundation, Inc.
+/* Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996.
@@ -22,6 +22,7 @@
#include <obstack.h>
+#include "repertoire.h"
#include "simple-hash.h"
#include "linereader.h"
@@ -36,6 +37,9 @@ struct width_rule
struct charset_t
{
+ const char *repertoiremap;
+ struct repertoire_t *repertoire;
+
const char *code_set_name;
int mb_cur_min;
int mb_cur_max;
@@ -63,14 +67,11 @@ extern int be_quiet;
struct charset_t *charmap_read (const char *filename);
/* Prototypes for function to insert new character. */
-void charset_new_char (struct linereader *lr, struct charset_t *cs, int bytes,
+void charset_new_char (struct linereader *lr, hash_table *ht, int bytes,
unsigned int value, const char *from, const char *to);
-void charset_new_unicode (struct linereader *lr, struct charset_t *cs,
- int bytes, unsigned int value, const char *from,
- const char *to);
-
-unsigned int charset_find_value (const struct charset_t *__cs,
- const char *__name, size_t __len);
+/* Return the value stored under the given key in the hashing table. */
+unsigned int charset_find_value (const hash_table *ht,
+ const char *name, size_t len);
#endif /* charset.h */
diff --git a/locale/programs/ld-collate.c b/locale/programs/ld-collate.c
index 57b97767af..a92ff1154a 100644
--- a/locale/programs/ld-collate.c
+++ b/locale/programs/ld-collate.c
@@ -212,7 +212,7 @@ collate_finish (struct localedef_t *locale, struct charset_t *charset)
void *ptmp;
unsigned int value = 0;
- wch = charset_find_value (charset, patch->token, toklen);
+ wch = charset_find_value (&charset->char_table, patch->token, toklen);
if (wch != ILLEGAL_CHAR_VALUE)
{
element_t *runp;
@@ -1054,7 +1054,8 @@ collate_element_to (struct linereader *lr, struct localedef_t *locale,
collate->combine_token = NULL;
}
- value = charset_find_value (charset, code->val.str.start, code->val.str.len);
+ value = charset_find_value (&charset->char_table, code->val.str.start,
+ code->val.str.len);
if ((wchar_t) value != ILLEGAL_CHAR_VALUE)
{
lr_error (lr, _("symbol for multicharacter collating element "
@@ -1181,7 +1182,8 @@ collate_symbol (struct linereader *lr, struct localedef_t *locale,
wchar_t value;
void *not_used;
- value = charset_find_value (charset, code->val.str.start, code->val.str.len);
+ value = charset_find_value (&charset->char_table, code->val.str.start,
+ code->val.str.len);
if (value != ILLEGAL_CHAR_VALUE)
{
lr_error (lr, _("symbol for multicharacter collating element "
@@ -1268,7 +1270,7 @@ collate_order_elem (struct linereader *lr, struct localedef_t *locale,
{
case tok_bsymbol:
/* We have a string to find in one of the three hashing tables. */
- value = charset_find_value (charset, code->val.str.start,
+ value = charset_find_value (&charset->char_table, code->val.str.start,
code->val.str.len);
if (value != ILLEGAL_CHAR_VALUE)
{
@@ -1533,7 +1535,8 @@ collate_weight_bsymbol (struct linereader *lr, struct localedef_t *locale,
assert (code->tok == tok_bsymbol);
- value = charset_find_value (charset, code->val.str.start, code->val.str.len);
+ value = charset_find_value (&charset->char_table, code->val.str.start,
+ code->val.str.len);
if (value != ILLEGAL_CHAR_VALUE)
{
element_t *runp;
@@ -1706,7 +1709,8 @@ collate_simple_weight (struct linereader *lr, struct localedef_t *locale,
return -1;
}
- wch = charset_find_value (charset, startp, putp - startp);
+ wch = charset_find_value (&charset->char_table, startp,
+ putp - startp);
if (wch != ILLEGAL_CHAR_VALUE)
{
element_t *pelem;
diff --git a/locale/programs/ld-ctype.c b/locale/programs/ld-ctype.c
index f2f32cc214..2f9a9a2caf 100644
--- a/locale/programs/ld-ctype.c
+++ b/locale/programs/ld-ctype.c
@@ -308,7 +308,7 @@ character %s'%s' in class `%s' must not be in class `%s'"),
}
/* ... and now test <SP> as a special case. */
- space_value = charset_find_value (charset, "SP", 2);
+ space_value = charset_find_value (&charset->char_table, "SP", 2);
if ((wchar_t) space_value == ILLEGAL_CHAR_VALUE)
{
if (!be_quiet)
@@ -634,7 +634,8 @@ ctype_class_from (struct linereader *lr, struct localedef_t *locale,
struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
unsigned int value;
- value = charset_find_value (charset, code->val.str.start, code->val.str.len);
+ value = charset_find_value (&charset->char_table, code->val.str.start,
+ code->val.str.len);
ctype->last_class_char = value;
@@ -656,7 +657,8 @@ ctype_class_to (struct linereader *lr, struct localedef_t *locale,
struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
unsigned int value, cnt;
- value = charset_find_value (charset, code->val.str.start, code->val.str.len);
+ value = charset_find_value (&charset->char_table, code->val.str.start,
+ code->val.str.len);
/* In the LC_CTYPE category it is no error when a character is
not found. This has to be ignored silently. */
@@ -750,7 +752,8 @@ ctype_map_from (struct linereader *lr, struct localedef_t *locale,
struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
unsigned int value;
- value = charset_find_value (charset, code->val.str.start, code->val.str.len);
+ value = charset_find_value (&charset->char_table, code->val.str.start,
+ code->val.str.len);
if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
/* In the LC_CTYPE category it is no error when a character is
@@ -770,7 +773,8 @@ ctype_map_to (struct linereader *lr, struct localedef_t *locale,
struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
unsigned int value;
- value = charset_find_value (charset, code->val.str.start, code->val.str.len);
+ value = charset_find_value (&charset->char_table, code->val.str.start,
+ code->val.str.len);
if ((wchar_t) ctype->from_map_char == ILLEGAL_CHAR_VALUE
|| (wchar_t) value == ILLEGAL_CHAR_VALUE)
@@ -948,7 +952,7 @@ set_class_defaults (struct locale_ctype_t *ctype, struct charset_t *charset)
unsigned int value;
tmp[0] = ch;
- value = charset_find_value (charset, tmp, 1);
+ value = charset_find_value (&charset->char_table, tmp, 1);
if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
{
if (!be_quiet)
@@ -1013,7 +1017,7 @@ character `%s' not defined while needed as default value"),
{
unsigned int value;
- value = charset_find_value (charset, "space", 5);
+ value = charset_find_value (&charset->char_table, "space", 5);
if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
{
if (!be_quiet)
@@ -1024,7 +1028,7 @@ character `%s' not defined while needed as default value"),
else
ELEM (ctype, class_collection, , value) |= BIT (tok_space);
- value = charset_find_value (charset, "form-feed", 9);
+ value = charset_find_value (&charset->char_table, "form-feed", 9);
if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
{
if (!be_quiet)
@@ -1035,7 +1039,7 @@ character `%s' not defined while needed as default value"),
else
ELEM (ctype, class_collection, , value) |= BIT (tok_space);
- value = charset_find_value (charset, "newline", 7);
+ value = charset_find_value (&charset->char_table, "newline", 7);
if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
{
if (!be_quiet)
@@ -1046,7 +1050,7 @@ character `%s' not defined while needed as default value"),
else
ELEM (ctype, class_collection, , value) |= BIT (tok_space);
- value = charset_find_value (charset, "carriage-return", 15);
+ value = charset_find_value (&charset->char_table, "carriage-return", 15);
if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
{
if (!be_quiet)
@@ -1057,7 +1061,7 @@ character `%s' not defined while needed as default value"),
else
ELEM (ctype, class_collection, , value) |= BIT (tok_space);
- value = charset_find_value (charset, "tab", 3);
+ value = charset_find_value (&charset->char_table, "tab", 3);
if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
{
if (!be_quiet)
@@ -1068,7 +1072,7 @@ character `%s' not defined while needed as default value"),
else
ELEM (ctype, class_collection, , value) |= BIT (tok_space);
- value = charset_find_value (charset, "vertical-tab", 12);
+ value = charset_find_value (&charset->char_table, "vertical-tab", 12);
if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
{
if (!be_quiet)
@@ -1097,7 +1101,7 @@ character `%s' not defined while needed as default value"),
{
unsigned int value;
- value = charset_find_value (charset, "space", 5);
+ value = charset_find_value (&charset->char_table, "space", 5);
if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
{
if (!be_quiet)
@@ -1108,7 +1112,7 @@ character `%s' not defined while needed as default value"),
else
ELEM (ctype, class_collection, , value) |= BIT (tok_blank);
- value = charset_find_value (charset, "tab", 3);
+ value = charset_find_value (&charset->char_table, "tab", 3);
if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
{
if (!be_quiet)
@@ -1149,7 +1153,7 @@ character `%s' not defined while needed as default value"),
if ((ctype->class_collection[cnt] & mask) != 0)
ctype->class_collection[cnt] |= BIT (tok_print);
- space = charset_find_value (charset, "space", 5);
+ space = charset_find_value (&charset->char_table, "space", 5);
if (space == ILLEGAL_CHAR_VALUE)
{
if (!be_quiet)
@@ -1178,7 +1182,7 @@ character `%s' not defined while needed as default value"),
tmp[1] = (char) ch;
- value_from = charset_find_value (charset, &tmp[1], 1);
+ value_from = charset_find_value (&charset->char_table, &tmp[1], 1);
if ((wchar_t) value_from == ILLEGAL_CHAR_VALUE)
{
if (!be_quiet)
@@ -1190,7 +1194,7 @@ character `%s' not defined while needed as default value"),
/* This conversion is implementation defined. */
tmp[1] = (char) (ch + ('A' - 'a'));
- value_to = charset_find_value (charset, &tmp[1], 1);
+ value_to = charset_find_value (&charset->char_table, &tmp[1], 1);
if ((wchar_t) value_to == ILLEGAL_CHAR_VALUE)
{
if (!be_quiet)
diff --git a/locale/programs/linereader.c b/locale/programs/linereader.c
index 6692164b60..4406e1ab72 100644
--- a/locale/programs/linereader.c
+++ b/locale/programs/linereader.c
@@ -524,7 +524,7 @@ get_string (struct linereader *lr, const struct charset_t *charset)
if (lr->translate_strings)
{
- value = charset_find_value (charset, &buf[startidx],
+ value = charset_find_value (&charset->char_table, &buf[startidx],
bufact - startidx);
if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
illegal_string = 1;
diff --git a/locale/programs/localedef.c b/locale/programs/localedef.c
index 32437c12f5..831c36e429 100644
--- a/locale/programs/localedef.c
+++ b/locale/programs/localedef.c
@@ -81,8 +81,8 @@ static const char *charmap_file;
/* Name of the locale definition file. */
static const char *input_file;
-/* Name of the UCS file. */
-static const char *ucs_csn;
+/* Name of the repertoire map file. */
+const char *repertoiremap;
/* Name and version of program. */
@@ -99,9 +99,7 @@ static const struct argp_option options[] =
{ "charmap", 'f', "FILE", 0,
N_("Symbolic character names defined in FILE") },
{ "inputfile", 'i', "FILE", 0, N_("Source definitions are found in FILE") },
- { "code-set-name", 'u', "NAME", OPTION_HIDDEN,
- N_("Specify code set for mapping ISO 10646 elements") },
- { "repertoire-map", 'u', NAME, 0,
+ { "repertoire-map", 'u', "FILE", 0,
N_("file containing mapping from symbolic names to UCS4 values") },
{ NULL, 0, NULL, 0, N_("Output control:") },
@@ -355,7 +353,7 @@ parse_opt (int key, char *arg, struct argp_state *state)
input_file = arg;
break;
case 'u':
- ucs_csn = arg;
+ repertoiremap = arg;
break;
case 'v':
verbose = 1;
@@ -377,10 +375,11 @@ more_help (int key, const char *text, void *input)
case ARGP_KEY_HELP_EXTRA:
/* We print some extra information. */
asprintf (&cp, gettext ("\
-System's directory for character maps: %s\n\
- locale files : %s\n\
+System's directory for character maps : %s\n\
+ repertoire maps: %s\n\
+ locale path : %s\n\
%s"),
- CHARMAP_PATH, LOCALE_PATH, gettext ("\
+ CHARMAP_PATH, REPERTOIREMAP_PATH, LOCALE_PATH, gettext ("\
Report bugs using the `glibcbug' script to <bugs@gnu.org>.\n"));
return cp;
default:
diff --git a/locale/programs/locfile-kw.gperf b/locale/programs/locfile-kw.gperf
index 38150ad46b..991e9dd6f9 100644
--- a/locale/programs/locfile-kw.gperf
+++ b/locale/programs/locfile-kw.gperf
@@ -1,5 +1,5 @@
%{
-/* Copyright (C) 1996, 1997 Free Software Foundation, Inc.
+/* Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996.
@@ -26,6 +26,7 @@ struct keyword_t ;
%%
escape_char, tok_escape_char, 0
comment_char, tok_comment_char, 0
+repertoiremap, tok_repertoiremap, 0
LC_CTYPE, tok_lc_ctype, 0
END, tok_end, 0
copy, tok_copy, 0
diff --git a/locale/programs/locfile-kw.h b/locale/programs/locfile-kw.h
index 0fb0b5ca5f..bd80618e77 100644
--- a/locale/programs/locfile-kw.h
+++ b/locale/programs/locfile-kw.h
@@ -1,6 +1,6 @@
/* C code produced by gperf version 2.5 (GNU C++ version) */
/* Command-line: gperf -acCgopt -k1,2,5,$ -N locfile_hash programs/locfile-kw.gperf */
-/* Copyright (C) 1996, 1997 Free Software Foundation, Inc.
+/* Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996.
@@ -24,12 +24,12 @@
#include "locfile-token.h"
struct keyword_t ;
-#define TOTAL_KEYWORDS 73
+#define TOTAL_KEYWORDS 74
#define MIN_WORD_LENGTH 3
#define MAX_WORD_LENGTH 17
#define MIN_HASH_VALUE 3
-#define MAX_HASH_VALUE 132
-/* maximum key range = 130, duplicates = 0 */
+#define MAX_HASH_VALUE 178
+/* maximum key range = 176, duplicates = 0 */
#ifdef __GNUC__
inline
@@ -39,19 +39,19 @@ hash (register const char *str, register int len)
{
static const unsigned char asso_values[] =
{
- 133, 133, 133, 133, 133, 133, 133, 133, 133, 133,
- 133, 133, 133, 133, 133, 133, 133, 133, 133, 133,
- 133, 133, 133, 133, 133, 133, 133, 133, 133, 133,
- 133, 133, 133, 133, 133, 133, 133, 133, 133, 133,
- 133, 133, 133, 133, 133, 133, 133, 133, 133, 133,
- 133, 133, 133, 133, 133, 133, 133, 133, 133, 133,
- 133, 133, 133, 133, 133, 133, 133, 0, 0, 0,
- 0, 0, 133, 0, 133, 133, 0, 133, 0, 20,
- 133, 133, 0, 0, 0, 5, 133, 133, 133, 5,
- 133, 133, 133, 133, 133, 5, 133, 0, 60, 0,
- 15, 10, 20, 40, 5, 20, 133, 0, 45, 40,
- 10, 0, 0, 133, 15, 50, 0, 30, 0, 10,
- 15, 15, 133, 133, 133, 133, 133, 133,
+ 179, 179, 179, 179, 179, 179, 179, 179, 179, 179,
+ 179, 179, 179, 179, 179, 179, 179, 179, 179, 179,
+ 179, 179, 179, 179, 179, 179, 179, 179, 179, 179,
+ 179, 179, 179, 179, 179, 179, 179, 179, 179, 179,
+ 179, 179, 179, 179, 179, 179, 179, 179, 179, 179,
+ 179, 179, 179, 179, 179, 179, 179, 179, 179, 179,
+ 179, 179, 179, 179, 179, 179, 179, 0, 0, 0,
+ 0, 0, 179, 0, 179, 179, 0, 179, 0, 45,
+ 179, 179, 0, 0, 0, 5, 179, 179, 179, 10,
+ 179, 179, 179, 179, 179, 5, 179, 0, 5, 0,
+ 15, 20, 5, 20, 40, 20, 179, 25, 15, 50,
+ 10, 0, 0, 179, 45, 50, 0, 30, 0, 5,
+ 10, 60, 179, 179, 179, 179, 179, 179,
};
register int hval = len;
@@ -88,102 +88,110 @@ locfile_hash (register const char *str, register int len)
{"",},
{"t_fmt", tok_t_fmt, 0},
{"LC_MESSAGES", tok_lc_messages, 0},
- {"",},
- {"charconv", tok_charconv, 0},
+ {"",}, {"",},
{"UNDEFINED", tok_undefined, 0},
{"LC_NUMERIC", tok_lc_numeric, 0},
{"",},
{"collating-element", tok_collating_element, 0},
{"position", tok_position, 0},
- {"copy", tok_copy, 0},
- {"print", tok_print, 0},
{"",},
- {"toupper", tok_toupper, 0},
+ {"alpha", tok_alpha, 0},
+ {"",}, {"",},
{"positive_sign", tok_positive_sign, 0},
{"",},
{"d_fmt", tok_d_fmt, 0},
+ {"",},
+ {"forward", tok_forward, 0},
{"",}, {"",},
- {"era", tok_era, 0},
- {"p_sep_by_space", tok_p_sep_by_space, 0},
- {"LC_COLLATE", tok_lc_collate, 0},
- {"noexpr", tok_noexpr, 0},
- {"tolower", tok_tolower, 0},
- {"day", tok_day, 0},
- {"era_t_fmt", tok_era_t_fmt, 0},
+ {"abmon", tok_abmon, 0},
+ {"collating-symbol", tok_collating_symbol, 0},
+ {"d_t_fmt", tok_d_t_fmt, 0},
+ {"backward", tok_backward, 0},
+ {"",},
{"punct", tok_punct, 0},
- {"LC_MONETARY", tok_lc_monetary, 0},
- {"comment_char", tok_comment_char, 0},
+ {"",}, {"",}, {"",},
+ {"p_sep_by_space", tok_p_sep_by_space, 0},
+ {"digit", tok_digit, 0},
+ {"",}, {"",}, {"",}, {"",},
+ {"cntrl", tok_cntrl, 0},
+ {"p_sign_posn", tok_p_sign_posn, 0},
{"",},
+ {"charconv", tok_charconv, 0},
{"n_sep_by_space", tok_n_sep_by_space, 0},
- {"digit", tok_digit, 0},
- {"order_start", tok_order_start, 0},
- {"forward", tok_forward, 0},
+ {"print", tok_print, 0},
+ {"xdigit", tok_xdigit, 0},
+ {"toupper", tok_toupper, 0},
{"negative_sign", tok_negative_sign, 0},
{"",},
- {"nostr", tok_nostr, 0},
- {"yesstr", tok_yesstr, 0},
- {"d_t_fmt", tok_d_t_fmt, 0},
- {"",},
- {"era_d_fmt", tok_era_d_fmt, 0},
- {"alpha", tok_alpha, 0},
- {"era_d_t_fmt", tok_era_d_t_fmt, 0},
+ {"LC_COLLATE", tok_lc_collate, 0},
+ {"n_sign_posn", tok_n_sign_posn, 0},
+ {"tolower", tok_tolower, 0},
+ {"",}, {"",},
+ {"int_curr_symbol", tok_int_curr_symbol, 0},
+ {"noexpr", tok_noexpr, 0},
{"",},
{"mon", tok_mon, 0},
- {"order_end", tok_order_end, 0},
+ {"copy", tok_copy, 0},
{"t_fmt_ampm", tok_t_fmt_ampm, 0},
- {"xdigit", tok_xdigit, 0},
+ {"LC_MONETARY", tok_lc_monetary, 0},
{"mon_thousands_sep", tok_mon_thousands_sep, 0},
- {"",}, {"",}, {"",},
- {"collating-symbol", tok_collating_symbol, 0},
- {"yesexpr", tok_yesexpr, 0},
- {"era_year", tok_era_year, 0},
- {"charclass", tok_charclass, 0},
- {"upper", tok_upper, 0},
- {"p_sign_posn", tok_p_sign_posn, 0},
+ {"era", tok_era, 0},
+ {"",}, {"",}, {"",}, {"",},
+ {"p_cs_precedes", tok_p_cs_precedes, 0},
+ {"era_t_fmt", tok_era_t_fmt, 0},
+ {"blank", tok_blank, 0},
{"",},
- {"thousands_sep", tok_thousands_sep, 0},
+ {"comment_char", tok_comment_char, 0},
+ {"day", tok_day, 0},
{"",},
- {"graph", tok_graph, 0},
+ {"currency_symbol", tok_currency_symbol, 0},
{"",},
{"mon_decimal_point", tok_mon_decimal_point, 0},
- {"p_cs_precedes", tok_p_cs_precedes, 0},
+ {"n_cs_precedes", tok_n_cs_precedes, 0},
+ {"",}, {"",}, {"",}, {"",}, {"",},
+ {"era_d_fmt", tok_era_d_fmt, 0},
+ {"alt_digits", tok_alt_digits, 0},
+ {"era_d_t_fmt", tok_era_d_t_fmt, 0},
{"",},
- {"space", tok_space, 0},
- {"n_sign_posn", tok_n_sign_posn, 0},
+ {"grouping", tok_grouping, 0},
{"",},
+ {"space", tok_space, 0},
+ {"",}, {"",},
{"decimal_point", tok_decimal_point, 0},
+ {"charclass", tok_charclass, 0},
+ {"int_frac_digits", tok_int_frac_digits, 0},
+ {"order_start", tok_order_start, 0},
+ {"mon_grouping", tok_mon_grouping, 0},
+ {"thousands_sep", tok_thousands_sep, 0},
{"from", tok_from, 0},
+ {"nostr", tok_nostr, 0},
+ {"",}, {"",}, {"",}, {"",},
{"lower", tok_lower, 0},
- {"",}, {"",},
- {"n_cs_precedes", tok_n_cs_precedes, 0},
+ {"",}, {"",}, {"",},
+ {"order_end", tok_order_end, 0},
{"",},
- {"abmon", tok_abmon, 0},
- {"escape_char", tok_escape_char, 0},
+ {"frac_digits", tok_frac_digits, 0},
{"",}, {"",}, {"",},
- {"int_curr_symbol", tok_int_curr_symbol, 0},
+ {"alnum", tok_alnum, 0},
{"",}, {"",},
- {"backward", tok_backward, 0},
+ {"repertoiremap", tok_repertoiremap, 0},
{"",},
+ {"upper", tok_upper, 0},
+ {"escape_char", tok_escape_char, 0},
+ {"",}, {"",}, {"",},
{"abday", tok_abday, 0},
- {"",}, {"",}, {"",}, {"",},
- {"currency_symbol", tok_currency_symbol, 0},
- {"frac_digits", tok_frac_digits, 0},
- {"",},
- {"grouping", tok_grouping, 0},
+ {"yesstr", tok_yesstr, 0},
+ {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",},
{"",},
- {"cntrl", tok_cntrl, 0},
- {"",}, {"",}, {"",}, {"",},
- {"blank", tok_blank, 0},
- {"",}, {"",}, {"",}, {"",},
- {"int_frac_digits", tok_int_frac_digits, 0},
- {"",}, {"",}, {"",}, {"",},
- {"alt_digits", tok_alt_digits, 0},
+ {"yesexpr", tok_yesexpr, 0},
+ {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",},
+ {"graph", tok_graph, 0},
{"",}, {"",}, {"",}, {"",},
{"am_pm", tok_am_pm, 0},
+ {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",},
+ {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",},
{"",}, {"",}, {"",}, {"",},
- {"alnum", tok_alnum, 0},
- {"",},
- {"mon_grouping", tok_mon_grouping, 0},
+ {"era_year", tok_era_year, 0},
};
if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
diff --git a/locale/programs/locfile-token.h b/locale/programs/locfile-token.h
index 6d1543c320..7845b4ba76 100644
--- a/locale/programs/locfile-token.h
+++ b/locale/programs/locfile-token.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 1996, 1997 Free Software Foundation, Inc.
+/* Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996.
@@ -49,6 +49,8 @@ enum token_t
tok_g2esc,
tok_g3esc,
+ tok_charids,
+
tok_code_set_name,
tok_mb_cur_max,
tok_mb_cur_min,
@@ -56,6 +58,7 @@ enum token_t
tok_width,
tok_width_variable,
tok_width_default,
+ tok_repertoiremap,
tok_lc_ctype,
tok_copy,
diff --git a/locale/programs/locfile.h b/locale/programs/locfile.h
index 604e726e20..697af64979 100644
--- a/locale/programs/locfile.h
+++ b/locale/programs/locfile.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 1996, 1997 Free Software Foundation, Inc.
+/* Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996.
@@ -55,6 +55,7 @@ struct localedef_t
/* Declared in localedef.c. */
extern int be_quiet;
+extern const char *repertoiremap;
/* Found in localedef.c. */
void def_to_process (const char *name, int category);
diff --git a/locale/programs/repertoire.c b/locale/programs/repertoire.c
new file mode 100644
index 0000000000..1f219ec81f
--- /dev/null
+++ b/locale/programs/repertoire.c
@@ -0,0 +1,323 @@
+/* Copyright (C) 1998 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <errno.h>
+#include <error.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "linereader.h"
+#include "charset.h"
+#include "repertoire.h"
+#include "simple-hash.h"
+
+
+extern void *xmalloc (size_t __n);
+
+
+/* Simple keyword hashing for the repertoiremap. */
+static struct repertoire_t *parse_repertoiremap (const char *filename);
+static const struct keyword_t *repertoiremap_hash (const char *str, int len);
+
+
+struct repertoire_t *
+repertoire_read (const char *filename)
+{
+ const char *pathnfile;
+ struct repertoire_t *result = NULL;
+
+ if (euidaccess (filename, R_OK) >= 0)
+ pathnfile = filename;
+ else if (filename[0] != '/')
+ {
+ char *cp = xmalloc (strlen (filename) + sizeof CHARMAP_PATH + 1);
+ stpcpy (stpcpy (stpcpy (cp, CHARMAP_PATH), "/"), filename);
+
+ pathnfile = (const char *) cp;
+ }
+ else
+ pathnfile = NULL;
+
+ if (pathnfile != NULL)
+ {
+ result = parse_repertoiremap (pathnfile);
+
+ if (result == NULL && !be_quiet)
+ error (0, errno, _("repertoire map file `%s' not found"), filename);
+ }
+
+ return result;
+}
+
+
+static struct repertoire_t *
+parse_repertoiremap (const char *filename)
+{
+ struct linereader *cmfile;
+ struct repertoire_t *result;
+ int state;
+ char *from_name = NULL;
+ char *to_name = NULL;
+
+ /* Determine path. */
+ cmfile = lr_open (filename, repertoiremap_hash);
+ if (cmfile == NULL)
+ {
+ if (strchr (filename, '/') == NULL)
+ {
+ /* Look in the systems charmap directory. */
+ char *buf = xmalloc (strlen (filename) + 1
+ + sizeof (REPERTOIREMAP_PATH));
+
+ stpcpy (stpcpy (stpcpy (buf, REPERTOIREMAP_PATH), "/"), filename);
+ cmfile = lr_open (buf, repertoiremap_hash);
+
+ if (cmfile == NULL)
+ free (buf);
+ }
+
+ if (cmfile == NULL)
+ return NULL;
+ }
+
+ /* Allocate room for result. */
+ result = (struct repertoire_t *) xmalloc (sizeof (struct repertoire_t));
+ memset (result, '\0', sizeof (struct repertoire_t));
+
+#define obstack_chunk_alloc malloc
+#define obstack_chunk_free free
+ obstack_init (&result->mem_pool);
+
+ if (init_hash (&result->char_table, 256))
+ {
+ free (result);
+ return NULL;
+ }
+
+ /* We use a state machine to describe the charmap description file
+ format. */
+ state = 1;
+ while (1)
+ {
+ /* What's on? */
+ struct token *now = lr_token (cmfile, NULL);
+ enum token_t nowtok = now->tok;
+ struct token *arg;
+
+ if (nowtok == tok_eof)
+ break;
+
+ switch (state)
+ {
+ case 1:
+ /* We haven't yet read any character definition. This is where
+ we accept escape_char and comment_char definitions. */
+ if (nowtok == tok_eol)
+ /* Ignore empty lines. */
+ continue;
+
+ if (nowtok == tok_escape_char || nowtok == tok_comment_char)
+ {
+ /* We know that we need an argument. */
+ arg = lr_token (cmfile, NULL);
+
+ if (arg->tok != tok_ident)
+ {
+ lr_error (cmfile, _("syntax error in prolog: %s"),
+ _("bad argument"));
+
+ lr_ignore_rest (cmfile, 0);
+ continue;
+ }
+
+ if (arg->val.str.len != 1)
+ {
+ lr_error (cmfile, _("\
+argument to <%s> must be a single character"),
+ nowtok == tok_escape_char ? "escape_char"
+ : "comment_char");
+
+ lr_ignore_rest (cmfile, 0);
+ continue;
+ }
+
+ if (nowtok == tok_escape_char)
+ cmfile->escape_char = *arg->val.str.start;
+ else
+ cmfile->comment_char = *arg->val.str.start;
+
+ lr_ignore_rest (cmfile, 1);
+ continue;
+ }
+
+ if (nowtok == tok_charids)
+ {
+ lr_ignore_rest (cmfile, 1);
+
+ state = 2;
+ continue;
+ }
+
+ /* Otherwise we start reading the character definitions. */
+ state = 2;
+ /* FALLTHROUGH */
+
+ case 2:
+ /* We are now are in the body. Each line
+ must have the format "%s %s %s\n" or "%s...%s %s %s\n". */
+ if (nowtok == tok_eol)
+ /* Ignore empty lines. */
+ continue;
+
+ if (nowtok == tok_end)
+ {
+ state = 90;
+ continue;
+ }
+
+ if (nowtok != tok_bsymbol)
+ {
+ lr_error (cmfile,
+ _("syntax error in repertoire map definition: %s"),
+ _("no symbolic name given"));
+
+ lr_ignore_rest (cmfile, 0);
+ continue;
+ }
+
+ /* If the previous line was not completely correct free the
+ used memory. */
+ if (from_name != NULL)
+ obstack_free (&result->mem_pool, from_name);
+
+ from_name = (char *) obstack_copy0 (&result->mem_pool,
+ now->val.str.start,
+ now->val.str.len);
+ to_name = NULL;
+
+ state = 3;
+ continue;
+
+ case 3:
+ /* We have two possibilities: We can see an ellipsis or an
+ encoding value. */
+ if (nowtok == tok_ellipsis)
+ {
+ state = 4;
+ continue;
+ }
+ /* FALLTHROUGH */
+
+ case 5:
+ /* We expect a value of the form <Uxxxx> or <Uxxxxxxxx> where
+ the xxx mean a hexadecimal value. */
+ state = 2;
+
+ errno = 0;
+ if (nowtok != tok_ucs2 && nowtok != tok_ucs4)
+ {
+ lr_error (cmfile,
+ _("syntax error in repertoire map definition: %s"),
+ _("no <Uxxxx> or <Uxxxxxxxx> value given"));
+
+ lr_ignore_rest (cmfile, 0);
+ continue;
+ }
+
+ /* We've found a new valid definition. */
+ charset_new_char (cmfile, &result->char_table, 4,
+ now->val.charcode.val, from_name, to_name);
+
+ /* Ignore the rest of the line. */
+ lr_ignore_rest (cmfile, 0);
+
+ from_name = NULL;
+ to_name = NULL;
+
+ continue;
+
+ case 4:
+ if (nowtok != tok_bsymbol)
+ {
+ lr_error (cmfile,
+ _("syntax error in repertoire map definition: %s"),
+ _("no symbolic name given for end of range"));
+
+ lr_ignore_rest (cmfile, 0);
+ state = 2;
+ continue;
+ }
+
+ /* Copy the to-name in a safe place. */
+ to_name = (char *) obstack_copy0 (&result->mem_pool,
+ cmfile->token.val.str.start,
+ cmfile->token.val.str.len);
+
+ state = 5;
+ continue;
+
+ case 90:
+ if (nowtok != tok_charids)
+ lr_error (cmfile, _("\
+`%1$s' definition does not end with `END %1$s'"), "CHARIDS");
+
+ lr_ignore_rest (cmfile, nowtok == tok_charids);
+ break;
+ }
+
+ break;
+ }
+
+ if (state != 2 && state != 90 && !be_quiet)
+ error (0, 0, _("%s: premature end of file"), cmfile->fname);
+
+ lr_close (cmfile);
+
+ return result;
+}
+
+
+static const struct keyword_t *
+repertoiremap_hash (const char *str, int len)
+{
+ static const struct keyword_t wordlist[0] =
+ {
+ {"escape_char", tok_escape_char, 1},
+ {"comment_char", tok_comment_char, 1},
+ {"CHARIDS", tok_charids, 0},
+ {"END", tok_end, 0},
+ };
+
+ if (len == 11 && memcmp (wordlist[0].name, str, 11) == 0)
+ return &wordlist[0];
+ if (len == 12 && memcmp (wordlist[1].name, str, 12) == 0)
+ return &wordlist[1];
+ if (len == 7 && memcmp (wordlist[2].name, str, 7) == 0)
+ return &wordlist[2];
+ if (len == 3 && memcmp (wordlist[3].name, str, 3) == 0)
+ return &wordlist[3];
+
+ return NULL;
+}
diff --git a/locale/programs/repertoire.h b/locale/programs/repertoire.h
new file mode 100644
index 0000000000..7befeb4e0d
--- /dev/null
+++ b/locale/programs/repertoire.h
@@ -0,0 +1,38 @@
+/* Copyright (C) 1998 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+#ifndef _REPERTOIREMAP_H
+#define _REPERTOIREMAP_H 1
+
+#include <obstack.h>
+
+#include "simple-hash.h"
+#include "linereader.h"
+
+struct repertoire_t
+{
+ struct obstack mem_pool;
+ hash_table char_table;
+};
+
+
+/* Prototypes for repertoire map handling functions. */
+struct repertoire_t *repertoire_read (const char *filename);
+
+#endif /* repertoiremap.h */
diff --git a/locale/programs/stringtrans.c b/locale/programs/stringtrans.c
index 34b107e295..b810129678 100644
--- a/locale/programs/stringtrans.c
+++ b/locale/programs/stringtrans.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1996, 1997 Free Software Foundation, Inc.
+/* Copyright (C) 1996, 1997, 1998 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996.
@@ -92,7 +92,8 @@ translate_string (char *str, struct charset_t *charset)
return NULL;
}
- value = charset_find_value (charset, str + 1, tp - (str + 1));
+ value = charset_find_value (&charset->char_table, str + 1,
+ tp - (str + 1));
if ((wchar_t) value == ILLEGAL_CHAR_VALUE)
{
free (buf);