diff options
Diffstat (limited to 'REORG.TODO/locale/programs')
38 files changed, 22677 insertions, 0 deletions
diff --git a/REORG.TODO/locale/programs/3level.h b/REORG.TODO/locale/programs/3level.h new file mode 100644 index 0000000000..15e192dc49 --- /dev/null +++ b/REORG.TODO/locale/programs/3level.h @@ -0,0 +1,328 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Bruno Haible <haible@clisp.cons.org>, 2000. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#include <stdint.h> + +/* Construction of sparse 3-level tables. + See wchar-lookup.h or coll-lookup.h for their structure and the + meaning of p and q. + + Before including this file, set + TABLE to the name of the structure to be defined + ELEMENT to the type of every entry + DEFAULT to the default value for empty entries + ITERATE if you want the TABLE_iterate function to be defined + NO_ADD_LOCALE if you don't want the add_locale_TABLE function + to be defined + + This will define + + struct TABLE; + void TABLE_init (struct TABLE *t); + ELEMENT TABLE_get (struct TABLE *t, uint32_t wc); + void TABLE_add (struct TABLE *t, uint32_t wc, ELEMENT value); + void TABLE_iterate (struct TABLE *t, + void (*fn) (uint32_t wc, ELEMENT value)); + void add_locale_TABLE (struct locale_file *file, struct TABLE *t); +*/ + +#define CONCAT(a,b) CONCAT1(a,b) +#define CONCAT1(a,b) a##b + +struct TABLE +{ + /* Parameters. */ + unsigned int p; + unsigned int q; + /* Working representation. */ + size_t level1_alloc; + size_t level1_size; + uint32_t *level1; + size_t level2_alloc; + size_t level2_size; + uint32_t *level2; + size_t level3_alloc; + size_t level3_size; + ELEMENT *level3; + /* Size of compressed representation. */ + size_t result_size; +}; + +/* Initialize. Assumes t->p and t->q have already been set. */ +static inline void +CONCAT(TABLE,_init) (struct TABLE *t) +{ + t->level1 = NULL; + t->level1_alloc = t->level1_size = 0; + t->level2 = NULL; + t->level2_alloc = t->level2_size = 0; + t->level3 = NULL; + t->level3_alloc = t->level3_size = 0; +} + +/* Marker for an empty slot. This has the value 0xFFFFFFFF, regardless + whether 'int' is 16 bit, 32 bit, or 64 bit. */ +#define EMPTY ((uint32_t) ~0) + +/* Retrieve an entry. */ +static inline ELEMENT +__attribute ((always_inline)) +CONCAT(TABLE,_get) (struct TABLE *t, uint32_t wc) +{ + uint32_t index1 = wc >> (t->q + t->p); + if (index1 < t->level1_size) + { + uint32_t lookup1 = t->level1[index1]; + if (lookup1 != EMPTY) + { + uint32_t index2 = ((wc >> t->p) & ((1 << t->q) - 1)) + + (lookup1 << t->q); + uint32_t lookup2 = t->level2[index2]; + if (lookup2 != EMPTY) + { + uint32_t index3 = (wc & ((1 << t->p) - 1)) + + (lookup2 << t->p); + ELEMENT lookup3 = t->level3[index3]; + + return lookup3; + } + } + } + return DEFAULT; +} + +/* Add one entry. */ +static void +CONCAT(TABLE,_add) (struct TABLE *t, uint32_t wc, ELEMENT value) +{ + uint32_t index1 = wc >> (t->q + t->p); + uint32_t index2 = (wc >> t->p) & ((1 << t->q) - 1); + uint32_t index3 = wc & ((1 << t->p) - 1); + size_t i, i1, i2; + + if (value == CONCAT(TABLE,_get) (t, wc)) + return; + + if (index1 >= t->level1_size) + { + if (index1 >= t->level1_alloc) + { + size_t alloc = 2 * t->level1_alloc; + if (alloc <= index1) + alloc = index1 + 1; + t->level1 = (uint32_t *) xrealloc ((char *) t->level1, + alloc * sizeof (uint32_t)); + t->level1_alloc = alloc; + } + while (index1 >= t->level1_size) + t->level1[t->level1_size++] = EMPTY; + } + + if (t->level1[index1] == EMPTY) + { + if (t->level2_size == t->level2_alloc) + { + size_t alloc = 2 * t->level2_alloc + 1; + t->level2 = (uint32_t *) xrealloc ((char *) t->level2, + (alloc << t->q) * sizeof (uint32_t)); + t->level2_alloc = alloc; + } + i1 = t->level2_size << t->q; + i2 = (t->level2_size + 1) << t->q; + for (i = i1; i < i2; i++) + t->level2[i] = EMPTY; + t->level1[index1] = t->level2_size++; + } + + index2 += t->level1[index1] << t->q; + + if (t->level2[index2] == EMPTY) + { + if (t->level3_size == t->level3_alloc) + { + size_t alloc = 2 * t->level3_alloc + 1; + t->level3 = (ELEMENT *) xrealloc ((char *) t->level3, + (alloc << t->p) * sizeof (ELEMENT)); + t->level3_alloc = alloc; + } + i1 = t->level3_size << t->p; + i2 = (t->level3_size + 1) << t->p; + for (i = i1; i < i2; i++) + t->level3[i] = DEFAULT; + t->level2[index2] = t->level3_size++; + } + + index3 += t->level2[index2] << t->p; + + t->level3[index3] = value; +} + +#ifdef ITERATE +/* Apply a function to all entries in the table. */ +static void +CONCAT(TABLE,_iterate) (struct TABLE *t, + void (*fn) (uint32_t wc, ELEMENT value)) +{ + uint32_t index1; + for (index1 = 0; index1 < t->level1_size; index1++) + { + uint32_t lookup1 = t->level1[index1]; + if (lookup1 != EMPTY) + { + uint32_t lookup1_shifted = lookup1 << t->q; + uint32_t index2; + for (index2 = 0; index2 < (1 << t->q); index2++) + { + uint32_t lookup2 = t->level2[index2 + lookup1_shifted]; + if (lookup2 != EMPTY) + { + uint32_t lookup2_shifted = lookup2 << t->p; + uint32_t index3; + for (index3 = 0; index3 < (1 << t->p); index3++) + { + ELEMENT lookup3 = t->level3[index3 + lookup2_shifted]; + if (lookup3 != DEFAULT) + fn ((((index1 << t->q) + index2) << t->p) + index3, + lookup3); + } + } + } + } + } +} +#endif + +#ifndef NO_ADD_LOCALE +/* Finalize and shrink. */ +static void +CONCAT(add_locale_,TABLE) (struct locale_file *file, struct TABLE *t) +{ + size_t i, j, k; + uint32_t reorder3[t->level3_size]; + uint32_t reorder2[t->level2_size]; + uint32_t level2_offset, level3_offset, last_offset; + + /* Uniquify level3 blocks. */ + k = 0; + for (j = 0; j < t->level3_size; j++) + { + for (i = 0; i < k; i++) + if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p], + (1 << t->p) * sizeof (ELEMENT)) == 0) + break; + /* Relocate block j to block i. */ + reorder3[j] = i; + if (i == k) + { + if (i != j) + memcpy (&t->level3[i << t->p], &t->level3[j << t->p], + (1 << t->p) * sizeof (ELEMENT)); + k++; + } + } + t->level3_size = k; + + for (i = 0; i < (t->level2_size << t->q); i++) + if (t->level2[i] != EMPTY) + t->level2[i] = reorder3[t->level2[i]]; + + /* Uniquify level2 blocks. */ + k = 0; + for (j = 0; j < t->level2_size; j++) + { + for (i = 0; i < k; i++) + if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q], + (1 << t->q) * sizeof (uint32_t)) == 0) + break; + /* Relocate block j to block i. */ + reorder2[j] = i; + if (i == k) + { + if (i != j) + memcpy (&t->level2[i << t->q], &t->level2[j << t->q], + (1 << t->q) * sizeof (uint32_t)); + k++; + } + } + t->level2_size = k; + + for (i = 0; i < t->level1_size; i++) + if (t->level1[i] != EMPTY) + t->level1[i] = reorder2[t->level1[i]]; + + /* Create and fill the resulting compressed representation. */ + last_offset = + 5 * sizeof (uint32_t) + + t->level1_size * sizeof (uint32_t) + + (t->level2_size << t->q) * sizeof (uint32_t) + + (t->level3_size << t->p) * sizeof (ELEMENT); + t->result_size = LOCFILE_ALIGN_UP (last_offset); + + level2_offset = + 5 * sizeof (uint32_t) + + t->level1_size * sizeof (uint32_t); + level3_offset = + 5 * sizeof (uint32_t) + + t->level1_size * sizeof (uint32_t) + + (t->level2_size << t->q) * sizeof (uint32_t); + + start_locale_structure (file); + add_locale_uint32 (file, t->q + t->p); + add_locale_uint32 (file, t->level1_size); + add_locale_uint32 (file, t->p); + add_locale_uint32 (file, (1 << t->q) - 1); + add_locale_uint32 (file, (1 << t->p) - 1); + + for (i = 0; i < t->level1_size; i++) + add_locale_uint32 + (file, + t->level1[i] == EMPTY + ? 0 + : (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset); + + for (i = 0; i < (t->level2_size << t->q); i++) + add_locale_uint32 + (file, + t->level2[i] == EMPTY + ? 0 + : (t->level2[i] << t->p) * sizeof (ELEMENT) + level3_offset); + + if (sizeof (ELEMENT) == 1) + add_locale_raw_data (file, t->level3, t->level3_size << t->p); + else if (sizeof (ELEMENT) == sizeof (uint32_t)) + add_locale_uint32_array (file, (uint32_t *) t->level3, + t->level3_size << t->p); + else + abort (); + align_locale_data (file, LOCFILE_ALIGN); + end_locale_structure (file); + + if (t->level1_alloc > 0) + free (t->level1); + if (t->level2_alloc > 0) + free (t->level2); + if (t->level3_alloc > 0) + free (t->level3); +} +#endif + +#undef EMPTY +#undef TABLE +#undef ELEMENT +#undef DEFAULT +#undef ITERATE +#undef NO_ADD_LOCALE diff --git a/REORG.TODO/locale/programs/charmap-dir.c b/REORG.TODO/locale/programs/charmap-dir.c new file mode 100644 index 0000000000..e55ab86e28 --- /dev/null +++ b/REORG.TODO/locale/programs/charmap-dir.c @@ -0,0 +1,309 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#include <dirent.h> +#include <errno.h> +#include <error.h> +#include <fcntl.h> +#include <libintl.h> +#include <spawn.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/stat.h> + +#include "localedef.h" +#include "charmap-dir.h" + +/* The data type of a charmap directory being traversed. */ +struct charmap_dir +{ + DIR *dir; + /* The directory pathname, ending in a slash. */ + char *directory; + size_t directory_len; + /* Scratch area used for returning pathnames. */ + char *pathname; + size_t pathname_size; +}; + +/* Starts a charmap directory traversal. + Returns a CHARMAP_DIR, or NULL if the directory doesn't exist. */ +CHARMAP_DIR * +charmap_opendir (const char *directory) +{ + struct charmap_dir *cdir; + DIR *dir; + size_t len; + int add_slash; + + dir = opendir (directory); + if (dir == NULL) + { + WITH_CUR_LOCALE (error (1, errno, gettext ("\ +cannot read character map directory `%s'"), directory)); + return NULL; + } + + cdir = (struct charmap_dir *) xmalloc (sizeof (struct charmap_dir)); + cdir->dir = dir; + + len = strlen (directory); + add_slash = (len == 0 || directory[len - 1] != '/'); + cdir->directory = (char *) xmalloc (len + add_slash + 1); + memcpy (cdir->directory, directory, len); + if (add_slash) + cdir->directory[len] = '/'; + cdir->directory[len + add_slash] = '\0'; + cdir->directory_len = len + add_slash; + + cdir->pathname = NULL; + cdir->pathname_size = 0; + + return cdir; +} + +/* Reads the next directory entry. + Returns its charmap name, or NULL if past the last entry or upon error. + The storage returned may be overwritten by a later charmap_readdir + call on the same CHARMAP_DIR. */ +const char * +charmap_readdir (CHARMAP_DIR *cdir) +{ + for (;;) + { + struct dirent64 *dirent; + size_t len; + size_t size; + char *filename; + mode_t mode; + + dirent = readdir64 (cdir->dir); + if (dirent == NULL) + return NULL; + if (strcmp (dirent->d_name, ".") == 0) + continue; + if (strcmp (dirent->d_name, "..") == 0) + continue; + + len = strlen (dirent->d_name); + + size = cdir->directory_len + len + 1; + if (size > cdir->pathname_size) + { + free (cdir->pathname); + if (size < 2 * cdir->pathname_size) + size = 2 * cdir->pathname_size; + cdir->pathname = (char *) xmalloc (size); + cdir->pathname_size = size; + } + + stpcpy (stpcpy (cdir->pathname, cdir->directory), dirent->d_name); + filename = cdir->pathname + cdir->directory_len; + +#ifdef _DIRENT_HAVE_D_TYPE + if (dirent->d_type != DT_UNKNOWN && dirent->d_type != DT_LNK) + mode = DTTOIF (dirent->d_type); + else +#endif + { + struct stat64 statbuf; + + if (stat64 (cdir->pathname, &statbuf) < 0) + continue; + + mode = statbuf.st_mode; + } + + if (!S_ISREG (mode)) + continue; + + /* For compressed charmaps, the canonical charmap name does not + include the extension. */ + if (len > 3 && memcmp (&filename[len - 3], ".gz", 3) == 0) + filename[len - 3] = '\0'; + else if (len > 4 && memcmp (&filename[len - 4], ".bz2", 4) == 0) + filename[len - 4] = '\0'; + + return filename; + } +} + +/* Finishes a charmap directory traversal, and frees the resources + attached to the CHARMAP_DIR. */ +int +charmap_closedir (CHARMAP_DIR *cdir) +{ + DIR *dir = cdir->dir; + + free (cdir->directory); + free (cdir->pathname); + free (cdir); + return closedir (dir); +} + +/* Creates a subprocess decompressing the given pathname, and returns + a stream reading its output (the decompressed data). */ +static +FILE * +fopen_uncompressed (const char *pathname, const char *compressor) +{ + int pfd; + + pfd = open (pathname, O_RDONLY); + if (pfd >= 0) + { + struct stat64 statbuf; + int fd[2]; + + if (fstat64 (pfd, &statbuf) >= 0 + && S_ISREG (statbuf.st_mode) + && pipe (fd) >= 0) + { + char *argv[4] + = { (char *) compressor, (char *) "-d", (char *) "-c", NULL }; + posix_spawn_file_actions_t actions; + + if (posix_spawn_file_actions_init (&actions) == 0) + { + if (posix_spawn_file_actions_adddup2 (&actions, + fd[1], STDOUT_FILENO) == 0 + && posix_spawn_file_actions_addclose (&actions, fd[1]) == 0 + && posix_spawn_file_actions_addclose (&actions, fd[0]) == 0 + && posix_spawn_file_actions_adddup2 (&actions, + pfd, STDIN_FILENO) == 0 + && posix_spawn_file_actions_addclose (&actions, pfd) == 0 + && posix_spawnp (NULL, compressor, &actions, NULL, + argv, environ) == 0) + { + posix_spawn_file_actions_destroy (&actions); + close (fd[1]); + close (pfd); + return fdopen (fd[0], "r"); + } + posix_spawn_file_actions_destroy (&actions); + } + close (fd[1]); + close (fd[0]); + } + close (pfd); + } + return NULL; +} + +/* Opens a charmap for reading, given its name (not an alias name). */ +FILE * +charmap_open (const char *directory, const char *name) +{ + size_t dlen = strlen (directory); + int add_slash = (dlen == 0 || directory[dlen - 1] != '/'); + size_t nlen = strlen (name); + char *pathname; + char *p; + FILE *stream; + + pathname = alloca (dlen + add_slash + nlen + 5); + p = stpcpy (pathname, directory); + if (add_slash) + *p++ = '/'; + p = stpcpy (p, name); + + stream = fopen (pathname, "rm"); + if (stream != NULL) + return stream; + + memcpy (p, ".gz", 4); + stream = fopen_uncompressed (pathname, "gzip"); + if (stream != NULL) + return stream; + + memcpy (p, ".bz2", 5); + stream = fopen_uncompressed (pathname, "bzip2"); + if (stream != NULL) + return stream; + + return NULL; +} + +/* An empty alias list. Avoids the need to return NULL from + charmap_aliases. */ +static char *empty[1]; + +/* Returns a NULL terminated list of alias names of a charmap. */ +char ** +charmap_aliases (const char *directory, const char *name) +{ + FILE *stream; + char **aliases; + size_t naliases; + + stream = charmap_open (directory, name); + if (stream == NULL) + return empty; + + aliases = NULL; + naliases = 0; + + while (!feof (stream)) + { + char *alias = NULL; + char junk[BUFSIZ]; + + if (fscanf (stream, " <code_set_name> %ms", &alias) == 1 + || fscanf (stream, "%% alias %ms", &alias) == 1) + { + aliases = (char **) xrealloc (aliases, + (naliases + 2) * sizeof (char *)); + aliases[naliases++] = alias; + } + + /* Read the rest of the line. */ + if (fgets (junk, sizeof junk, stream) != NULL) + { + if (strstr (junk, "CHARMAP") != NULL) + /* We cannot expect more aliases from now on. */ + break; + + while (strchr (junk, '\n') == NULL + && fgets (junk, sizeof junk, stream) != NULL) + continue; + } + } + + fclose (stream); + + if (naliases == 0) + return empty; + + aliases[naliases] = NULL; + return aliases; +} + +/* Frees an alias list returned by charmap_aliases. */ +void +charmap_free_aliases (char **aliases) +{ + if (aliases != empty) + { + char **p; + + for (p = aliases; *p; p++) + free (*p); + + free (aliases); + } +} diff --git a/REORG.TODO/locale/programs/charmap-dir.h b/REORG.TODO/locale/programs/charmap-dir.h new file mode 100644 index 0000000000..c27d7fe614 --- /dev/null +++ b/REORG.TODO/locale/programs/charmap-dir.h @@ -0,0 +1,46 @@ +/* Copyright (C) 2000-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifndef _CHARMAP_DIR_H +#define _CHARMAP_DIR_H 1 + +/* The data type of a charmap directory being traversed. */ +typedef struct charmap_dir CHARMAP_DIR; + +/* Starts a charmap directory traversal. + Returns a CHARMAP_DIR, or NULL if the directory doesn't exist. */ +extern CHARMAP_DIR *charmap_opendir (const char *directory); + +/* Reads the next directory entry. + Returns its charmap name, or NULL if past the last entry or upon error. + The storage returned may be overwritten by a later charmap_readdir + call on the same CHARMAP_DIR. */ +extern const char *charmap_readdir (CHARMAP_DIR *dir); + +/* Finishes a charmap directory traversal, and frees the resources + attached to the CHARMAP_DIR. */ +extern int charmap_closedir (CHARMAP_DIR *dir); + +/* Returns a NULL terminated list of alias names of a charmap. */ +extern char **charmap_aliases (const char *directory, const char *name); + +/* Frees an alias list returned by charmap_aliases. */ +extern void charmap_free_aliases (char **aliases); + +/* Opens a charmap for reading, given its name (not an alias name). */ +extern FILE *charmap_open (const char *directory, const char *name); + +#endif /* _CHARMAP_DIR_H */ diff --git a/REORG.TODO/locale/programs/charmap-kw.gperf b/REORG.TODO/locale/programs/charmap-kw.gperf new file mode 100644 index 0000000000..0ebdfeb26e --- /dev/null +++ b/REORG.TODO/locale/programs/charmap-kw.gperf @@ -0,0 +1,42 @@ +%{ +/* Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper, <drepper@gnu.org>. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +#include "locfile-token.h" +%} +struct keyword_t ; +%% +code_set_name, tok_code_set_name, 1 +mb_cur_max, tok_mb_cur_max, 1 +mb_cur_min, tok_mb_cur_min, 1 +escape_char, tok_escape_char, 1 +comment_char, tok_comment_char, 1 +g0esc, tok_g0esc, 1 +g1esc, tok_g1esc, 1 +g2esc, tok_g2esc, 1 +g3esc, tok_g3esc, 1 +escseq, tok_escseq, 1 +addset, tok_addset, 1 +include, tok_include, 1 +CHARMAP, tok_charmap, 0 +END, tok_end, 0 +WIDTH, tok_width, 0 +WIDTH_VARIABLE, tok_width_variable, 0 +WIDTH_DEFAULT, tok_width_default, 0 diff --git a/REORG.TODO/locale/programs/charmap-kw.h b/REORG.TODO/locale/programs/charmap-kw.h new file mode 100644 index 0000000000..9e2969c4a1 --- /dev/null +++ b/REORG.TODO/locale/programs/charmap-kw.h @@ -0,0 +1,195 @@ +/* ANSI-C code produced by gperf version 3.0.4 */ +/* Command-line: gperf -acCgopt -k'1,2,5,9,$' -L ANSI-C -N charmap_hash charmap-kw.gperf */ + +#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \ + && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \ + && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \ + && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \ + && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \ + && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \ + && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \ + && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \ + && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \ + && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \ + && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \ + && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \ + && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \ + && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \ + && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \ + && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \ + && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \ + && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \ + && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \ + && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \ + && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \ + && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \ + && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126)) +/* The character set is not based on ISO-646. */ +#error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gnu-gperf@gnu.org>." +#endif + +#line 1 "charmap-kw.gperf" + +/* Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper, <drepper@gnu.org>. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +#include "locfile-token.h" +#line 24 "charmap-kw.gperf" +struct keyword_t ; + +#define TOTAL_KEYWORDS 17 +#define MIN_WORD_LENGTH 3 +#define MAX_WORD_LENGTH 14 +#define MIN_HASH_VALUE 3 +#define MAX_HASH_VALUE 35 +/* maximum key range = 33, duplicates = 0 */ + +#ifdef __GNUC__ +__inline +#else +#ifdef __cplusplus +inline +#endif +#endif +static unsigned int +hash (register const char *str, register unsigned int len) +{ + static const unsigned char asso_values[] = + { + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 25, 20, + 15, 10, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 5, 0, 0, + 5, 36, 0, 0, 36, 36, 36, 5, 0, 36, + 0, 36, 0, 36, 0, 36, 36, 0, 36, 36, + 36, 36, 36, 36, 36, 0, 36, 5, 0, 0, + 5, 0, 36, 5, 0, 0, 36, 36, 36, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 36, 36, + 0, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, + 36, 36, 36, 36, 36, 36 + }; + register int hval = len; + + switch (hval) + { + default: + hval += asso_values[(unsigned char)str[8]]; + /*FALLTHROUGH*/ + case 8: + case 7: + case 6: + case 5: + hval += asso_values[(unsigned char)str[4]]; + /*FALLTHROUGH*/ + case 4: + case 3: + case 2: + hval += asso_values[(unsigned char)str[1]]; + /*FALLTHROUGH*/ + case 1: + hval += asso_values[(unsigned char)str[0]]; + break; + } + return hval + asso_values[(unsigned char)str[len - 1]]; +} + +#ifdef __GNUC__ +__inline +#if defined __GNUC_STDC_INLINE__ || defined __GNUC_GNU_INLINE__ +__attribute__ ((__gnu_inline__)) +#endif +#endif +const struct keyword_t * +charmap_hash (register const char *str, register unsigned int len) +{ + static const struct keyword_t wordlist[] = + { + {""}, {""}, {""}, +#line 39 "charmap-kw.gperf" + {"END", tok_end, 0}, + {""}, +#line 40 "charmap-kw.gperf" + {"WIDTH", tok_width, 0}, +#line 35 "charmap-kw.gperf" + {"escseq", tok_escseq, 1}, +#line 37 "charmap-kw.gperf" + {"include", tok_include, 1}, + {""}, {""}, +#line 28 "charmap-kw.gperf" + {"mb_cur_min", tok_mb_cur_min, 1}, +#line 29 "charmap-kw.gperf" + {"escape_char", tok_escape_char, 1}, +#line 30 "charmap-kw.gperf" + {"comment_char", tok_comment_char, 1}, +#line 26 "charmap-kw.gperf" + {"code_set_name", tok_code_set_name, 1}, +#line 41 "charmap-kw.gperf" + {"WIDTH_VARIABLE", tok_width_variable, 0}, +#line 27 "charmap-kw.gperf" + {"mb_cur_max", tok_mb_cur_max, 1}, +#line 36 "charmap-kw.gperf" + {"addset", tok_addset, 1}, +#line 38 "charmap-kw.gperf" + {"CHARMAP", tok_charmap, 0}, +#line 42 "charmap-kw.gperf" + {"WIDTH_DEFAULT", tok_width_default, 0}, + {""}, +#line 34 "charmap-kw.gperf" + {"g3esc", tok_g3esc, 1}, + {""}, {""}, {""}, {""}, +#line 33 "charmap-kw.gperf" + {"g2esc", tok_g2esc, 1}, + {""}, {""}, {""}, {""}, +#line 32 "charmap-kw.gperf" + {"g1esc", tok_g1esc, 1}, + {""}, {""}, {""}, {""}, +#line 31 "charmap-kw.gperf" + {"g0esc", tok_g0esc, 1} + }; + + if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) + { + register int key = hash (str, len); + + if (key <= MAX_HASH_VALUE && key >= 0) + { + register const char *s = wordlist[key].name; + + if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0') + return &wordlist[key]; + } + } + return 0; +} diff --git a/REORG.TODO/locale/programs/charmap.c b/REORG.TODO/locale/programs/charmap.c new file mode 100644 index 0000000000..129aefffc1 --- /dev/null +++ b/REORG.TODO/locale/programs/charmap.c @@ -0,0 +1,1104 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@gnu.org>, 1996. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <ctype.h> +#include <errno.h> +#include <libintl.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <error.h> +#include <stdint.h> + +#include "localedef.h" +#include "linereader.h" +#include "charmap.h" +#include "charmap-dir.h" + +#include <assert.h> + + +/* Define the lookup function. */ +#include "charmap-kw.h" + + +/* Prototypes for local functions. */ +static struct charmap_t *parse_charmap (struct linereader *cmfile, + int verbose, int be_quiet); +static void new_width (struct linereader *cmfile, struct charmap_t *result, + const char *from, const char *to, + unsigned long int width); +static void charmap_new_char (struct linereader *lr, struct charmap_t *cm, + size_t nbytes, unsigned char *bytes, + const char *from, const char *to, + int decimal_ellipsis, int step); + + +bool enc_not_ascii_compatible; + + +#ifdef NEED_NULL_POINTER +static const char *null_pointer; +#endif + +static struct linereader * +cmlr_open (const char *directory, const char *name, kw_hash_fct_t hf) +{ + FILE *fp; + + fp = charmap_open (directory, name); + if (fp == NULL) + return NULL; + else + { + size_t dlen = strlen (directory); + int add_slash = (dlen == 0 || directory[dlen - 1] != '/'); + size_t nlen = strlen (name); + char *pathname; + char *p; + + pathname = alloca (dlen + add_slash + nlen + 1); + p = stpcpy (pathname, directory); + if (add_slash) + *p++ = '/'; + stpcpy (p, name); + + return lr_create (fp, pathname, hf); + } +} + +struct charmap_t * +charmap_read (const char *filename, int verbose, int error_not_found, + int be_quiet, int use_default) +{ + struct charmap_t *result = NULL; + + if (filename != NULL) + { + struct linereader *cmfile; + + /* First try the name as found in the parameter. */ + cmfile = lr_open (filename, charmap_hash); + if (cmfile == NULL) + { + /* No successful. So start looking through the directories + in the I18NPATH if this is a simple name. */ + if (strchr (filename, '/') == NULL) + { + char *i18npath = getenv ("I18NPATH"); + if (i18npath != NULL && *i18npath != '\0') + { + const size_t pathlen = strlen (i18npath); + char i18npathbuf[pathlen + 1]; + char path[pathlen + sizeof ("/charmaps")]; + char *next; + i18npath = memcpy (i18npathbuf, i18npath, pathlen + 1); + + while (cmfile == NULL + && (next = strsep (&i18npath, ":")) != NULL) + { + stpcpy (stpcpy (path, next), "/charmaps"); + cmfile = cmlr_open (path, filename, charmap_hash); + + if (cmfile == NULL) + /* Try without the "/charmaps" part. */ + cmfile = cmlr_open (next, filename, charmap_hash); + } + } + + if (cmfile == NULL) + /* Try the default directory. */ + cmfile = cmlr_open (CHARMAP_PATH, filename, charmap_hash); + } + } + + if (cmfile != NULL) + result = parse_charmap (cmfile, verbose, be_quiet); + + if (result == NULL && error_not_found) + WITH_CUR_LOCALE (error (0, errno, _("\ +character map file `%s' not found"), filename)); + } + + if (result == NULL && filename != NULL && strchr (filename, '/') == NULL) + { + /* OK, one more try. We also accept the names given to the + character sets in the files. Sometimes they differ from the + file name. */ + CHARMAP_DIR *dir; + + dir = charmap_opendir (CHARMAP_PATH); + if (dir != NULL) + { + const char *dirent; + + while ((dirent = charmap_readdir (dir)) != NULL) + { + char **aliases; + char **p; + int found; + + aliases = charmap_aliases (CHARMAP_PATH, dirent); + found = 0; + for (p = aliases; *p; p++) + if (strcasecmp (*p, filename) == 0) + { + found = 1; + break; + } + charmap_free_aliases (aliases); + + if (found) + { + struct linereader *cmfile; + + cmfile = cmlr_open (CHARMAP_PATH, dirent, charmap_hash); + if (cmfile != NULL) + result = parse_charmap (cmfile, verbose, be_quiet); + + break; + } + } + + charmap_closedir (dir); + } + } + + if (result == NULL && DEFAULT_CHARMAP != NULL) + { + struct linereader *cmfile; + + cmfile = cmlr_open (CHARMAP_PATH, DEFAULT_CHARMAP, charmap_hash); + if (cmfile != NULL) + result = parse_charmap (cmfile, verbose, be_quiet); + + if (result == NULL) + WITH_CUR_LOCALE (error (4, errno, _("\ +default character map file `%s' not found"), DEFAULT_CHARMAP)); + } + + if (result != NULL && result->code_set_name == NULL) + /* The input file does not specify a code set name. This + shouldn't happen but we should cope with it. */ + result->code_set_name = basename (filename); + + /* Test of ASCII compatibility of locale encoding. + + Verify that the encoding to be used in a locale is ASCII compatible, + at least for the graphic characters, excluding the control characters, + '$' and '@'. This constraint comes from an ISO C 99 restriction. + + ISO C 99 section 7.17.(2) (about wchar_t): + the null character shall have the code value zero and each member of + the basic character set shall have a code value equal to its value + when used as the lone character in an integer character constant. + ISO C 99 section 5.2.1.(3): + Both the basic source and basic execution character sets shall have + the following members: the 26 uppercase letters of the Latin alphabet + A B C D E F G H I J K L M N O P Q R S T U V W X Y Z + the 26 lowercase letters of the Latin alphabet + a b c d e f g h i j k l m n o p q r s t u v w x y z + the 10 decimal digits + 0 1 2 3 4 5 6 7 8 9 + the following 29 graphic characters + ! " # % & ' ( ) * + , - . / : ; < = > ? [ \ ] ^ _ { | } ~ + the space character, and control characters representing horizontal + tab, vertical tab, and form feed. + + Therefore, for all members of the "basic character set", the 'char' code + must have the same value as the 'wchar_t' code, which in glibc is the + same as the Unicode code, which for all of the enumerated characters + is identical to the ASCII code. */ + if (result != NULL && use_default) + { + static const char basic_charset[] = + { + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', + 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', + 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', + '!', '"', '#', '%', '&', '\'', '(', ')', '*', '+', ',', '-', + '.', '/', ':', ';', '<', '=', '>', '?', '[', '\\', ']', '^', + '_', '{', '|', '}', '~', ' ', '\t', '\v', '\f', '\0' + }; + int failed = 0; + const char *p = basic_charset; + + do + { + struct charseq *seq = charmap_find_symbol (result, p, 1); + + if (seq == NULL || seq->ucs4 != (uint32_t) *p) + failed = 1; + } + while (*p++ != '\0'); + + if (failed) + { + WITH_CUR_LOCALE (fprintf (stderr, _("\ +character map `%s' is not ASCII compatible, locale not ISO C compliant\n"), + result->code_set_name)); + enc_not_ascii_compatible = true; + } + } + + return result; +} + + +static struct charmap_t * +parse_charmap (struct linereader *cmfile, int verbose, int be_quiet) +{ + struct charmap_t *result; + int state; + enum token_t expected_tok = tok_error; + const char *expected_str = NULL; + char *from_name = NULL; + char *to_name = NULL; + enum token_t ellipsis = 0; + int step = 1; + + /* We don't want symbolic names in string to be translated. */ + cmfile->translate_strings = 0; + + /* Allocate room for result. */ + result = (struct charmap_t *) xmalloc (sizeof (struct charmap_t)); + memset (result, '\0', sizeof (struct charmap_t)); + /* The default DEFAULT_WIDTH is 1. */ + result->width_default = 1; + +#define obstack_chunk_alloc malloc +#define obstack_chunk_free free + obstack_init (&result->mem_pool); + + if (init_hash (&result->char_table, 256) + || init_hash (&result->byte_table, 256)) + { + free (result); + return NULL; + } + + /* We use a state machine to describe the charmap description file + format. */ + state = 1; + while (1) + { + /* What's on? */ + struct token *now = lr_token (cmfile, NULL, NULL, NULL, verbose); + enum token_t nowtok = now->tok; + struct token *arg; + + if (nowtok == tok_eof) + break; + + switch (state) + { + case 1: + /* The beginning. We expect the special declarations, EOL or + `CHARMAP'. */ + if (nowtok == tok_eol) + /* Ignore empty lines. */ + continue; + + if (nowtok == tok_charmap) + { + from_name = NULL; + to_name = NULL; + + /* We have to set up the real work. Fill in some + default values. */ + if (result->mb_cur_max == 0) + result->mb_cur_max = 1; + if (result->mb_cur_min == 0) + result->mb_cur_min = result->mb_cur_max; + if (result->mb_cur_min > result->mb_cur_max) + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: <mb_cur_max> must be greater than <mb_cur_min>\n"), + cmfile->fname)); + + result->mb_cur_min = result->mb_cur_max; + } + + lr_ignore_rest (cmfile, 1); + + state = 2; + continue; + } + + if (nowtok != tok_code_set_name && nowtok != tok_mb_cur_max + && nowtok != tok_mb_cur_min && nowtok != tok_escape_char + && nowtok != tok_comment_char && nowtok != tok_g0esc + && nowtok != tok_g1esc && nowtok != tok_g2esc + && nowtok != tok_g3esc && nowtok != tok_repertoiremap + && nowtok != tok_include) + { + lr_error (cmfile, _("syntax error in prolog: %s"), + _("invalid definition")); + + lr_ignore_rest (cmfile, 0); + continue; + } + + /* We know that we need an argument. */ + arg = lr_token (cmfile, NULL, NULL, NULL, verbose); + + switch (nowtok) + { + case tok_code_set_name: + case tok_repertoiremap: + if (arg->tok != tok_ident && arg->tok != tok_string) + { + badarg: + lr_error (cmfile, _("syntax error in prolog: %s"), + _("bad argument")); + + lr_ignore_rest (cmfile, 0); + continue; + } + + if (nowtok == tok_code_set_name) + result->code_set_name = obstack_copy0 (&result->mem_pool, + arg->val.str.startmb, + arg->val.str.lenmb); + else + result->repertoiremap = obstack_copy0 (&result->mem_pool, + arg->val.str.startmb, + arg->val.str.lenmb); + + lr_ignore_rest (cmfile, 1); + continue; + + case tok_mb_cur_max: + case tok_mb_cur_min: + if (arg->tok != tok_number) + goto badarg; + + if (verbose + && ((nowtok == tok_mb_cur_max + && result->mb_cur_max != 0) + || (nowtok == tok_mb_cur_max + && result->mb_cur_max != 0))) + lr_error (cmfile, _("duplicate definition of <%s>"), + nowtok == tok_mb_cur_min + ? "mb_cur_min" : "mb_cur_max"); + + if (arg->val.num < 1) + { + lr_error (cmfile, + _("value for <%s> must be 1 or greater"), + nowtok == tok_mb_cur_min + ? "mb_cur_min" : "mb_cur_max"); + + lr_ignore_rest (cmfile, 0); + continue; + } + if ((nowtok == tok_mb_cur_max && result->mb_cur_min != 0 + && (int) arg->val.num < result->mb_cur_min) + || (nowtok == tok_mb_cur_min && result->mb_cur_max != 0 + && (int) arg->val.num > result->mb_cur_max)) + { + lr_error (cmfile, _("\ +value of <%s> must be greater or equal than the value of <%s>"), + "mb_cur_max", "mb_cur_min"); + + lr_ignore_rest (cmfile, 0); + continue; + } + + if (nowtok == tok_mb_cur_max) + result->mb_cur_max = arg->val.num; + else + result->mb_cur_min = arg->val.num; + + lr_ignore_rest (cmfile, 1); + continue; + + case tok_escape_char: + case tok_comment_char: + if (arg->tok != tok_ident) + goto badarg; + + if (arg->val.str.lenmb != 1) + { + lr_error (cmfile, _("\ +argument to <%s> must be a single character"), + nowtok == tok_escape_char ? "escape_char" + : "comment_char"); + + lr_ignore_rest (cmfile, 0); + continue; + } + + if (nowtok == tok_escape_char) + cmfile->escape_char = *arg->val.str.startmb; + else + cmfile->comment_char = *arg->val.str.startmb; + + lr_ignore_rest (cmfile, 1); + continue; + + case tok_g0esc: + case tok_g1esc: + case tok_g2esc: + case tok_g3esc: + case tok_escseq: + lr_ignore_rest (cmfile, 0); /* XXX */ + continue; + + case tok_include: + lr_error (cmfile, _("\ +character sets with locking states are not supported")); + exit (4); + + default: + /* Cannot happen. */ + assert (! "Should not happen"); + } + break; + + case 2: + /* We have seen `CHARMAP' and now are in the body. Each line + must have the format "%s %s %s\n" or "%s...%s %s %s\n". */ + if (nowtok == tok_eol) + /* Ignore empty lines. */ + continue; + + if (nowtok == tok_end) + { + expected_tok = tok_charmap; + expected_str = "CHARMAP"; + state = 90; + continue; + } + + if (nowtok != tok_bsymbol && nowtok != tok_ucs4) + { + lr_error (cmfile, _("syntax error in %s definition: %s"), + "CHARMAP", _("no symbolic name given")); + + lr_ignore_rest (cmfile, 0); + continue; + } + + /* If the previous line was not completely correct free the + used memory. */ + if (from_name != NULL) + obstack_free (&result->mem_pool, from_name); + + if (nowtok == tok_bsymbol) + from_name = (char *) obstack_copy0 (&result->mem_pool, + now->val.str.startmb, + now->val.str.lenmb); + else + { + obstack_printf (&result->mem_pool, "U%08X", + cmfile->token.val.ucs4); + obstack_1grow (&result->mem_pool, '\0'); + from_name = (char *) obstack_finish (&result->mem_pool); + } + to_name = NULL; + + state = 3; + continue; + + case 3: + /* We have two possibilities: We can see an ellipsis or an + encoding value. */ + if (nowtok == tok_ellipsis3 || nowtok == tok_ellipsis4 + || nowtok == tok_ellipsis2 || nowtok == tok_ellipsis4_2 + || nowtok == tok_ellipsis2_2) + { + ellipsis = nowtok; + if (nowtok == tok_ellipsis4_2) + { + step = 2; + nowtok = tok_ellipsis4; + } + else if (nowtok == tok_ellipsis2_2) + { + step = 2; + nowtok = tok_ellipsis2; + } + state = 4; + continue; + } + /* FALLTHROUGH */ + + case 5: + if (nowtok != tok_charcode) + { + lr_error (cmfile, _("syntax error in %s definition: %s"), + "CHARMAP", _("invalid encoding given")); + + lr_ignore_rest (cmfile, 0); + + state = 2; + continue; + } + + if (now->val.charcode.nbytes < result->mb_cur_min) + lr_error (cmfile, _("too few bytes in character encoding")); + else if (now->val.charcode.nbytes > result->mb_cur_max) + lr_error (cmfile, _("too many bytes in character encoding")); + else + charmap_new_char (cmfile, result, now->val.charcode.nbytes, + now->val.charcode.bytes, from_name, to_name, + ellipsis != tok_ellipsis2, step); + + /* Ignore trailing comment silently. */ + lr_ignore_rest (cmfile, 0); + + from_name = NULL; + to_name = NULL; + ellipsis = tok_none; + step = 1; + + state = 2; + continue; + + case 4: + if (nowtok != tok_bsymbol && nowtok != tok_ucs4) + { + lr_error (cmfile, _("syntax error in %s definition: %s"), + "CHARMAP", + _("no symbolic name given for end of range")); + + lr_ignore_rest (cmfile, 0); + continue; + } + + /* Copy the to-name in a safe place. */ + if (nowtok == tok_bsymbol) + to_name = (char *) obstack_copy0 (&result->mem_pool, + cmfile->token.val.str.startmb, + cmfile->token.val.str.lenmb); + else + { + obstack_printf (&result->mem_pool, "U%08X", + cmfile->token.val.ucs4); + obstack_1grow (&result->mem_pool, '\0'); + to_name = (char *) obstack_finish (&result->mem_pool); + } + + state = 5; + continue; + + case 90: + if (nowtok != expected_tok) + lr_error (cmfile, _("\ +%1$s: definition does not end with `END %1$s'"), expected_str); + + lr_ignore_rest (cmfile, nowtok == expected_tok); + state = 91; + continue; + + case 91: + /* Waiting for WIDTH... */ + if (nowtok == tok_eol) + /* Ignore empty lines. */ + continue; + + if (nowtok == tok_width_default) + { + state = 92; + continue; + } + + if (nowtok == tok_width) + { + lr_ignore_rest (cmfile, 1); + state = 93; + continue; + } + + if (nowtok == tok_width_variable) + { + lr_ignore_rest (cmfile, 1); + state = 98; + continue; + } + + lr_error (cmfile, _("\ +only WIDTH definitions are allowed to follow the CHARMAP definition")); + + lr_ignore_rest (cmfile, 0); + continue; + + case 92: + if (nowtok != tok_number) + lr_error (cmfile, _("value for %s must be an integer"), + "WIDTH_DEFAULT"); + else + result->width_default = now->val.num; + + lr_ignore_rest (cmfile, nowtok == tok_number); + + state = 91; + continue; + + case 93: + /* We now expect `END WIDTH' or lines of the format "%s %d\n" or + "%s...%s %d\n". */ + if (nowtok == tok_eol) + /* ignore empty lines. */ + continue; + + if (nowtok == tok_end) + { + expected_tok = tok_width; + expected_str = "WIDTH"; + state = 90; + continue; + } + + if (nowtok != tok_bsymbol && nowtok != tok_ucs4) + { + lr_error (cmfile, _("syntax error in %s definition: %s"), + "WIDTH", _("no symbolic name given")); + + lr_ignore_rest (cmfile, 0); + continue; + } + + if (from_name != NULL) + obstack_free (&result->mem_pool, from_name); + + if (nowtok == tok_bsymbol) + from_name = (char *) obstack_copy0 (&result->mem_pool, + now->val.str.startmb, + now->val.str.lenmb); + else + { + obstack_printf (&result->mem_pool, "U%08X", + cmfile->token.val.ucs4); + obstack_1grow (&result->mem_pool, '\0'); + from_name = (char *) obstack_finish (&result->mem_pool); + } + + to_name = NULL; + + state = 94; + continue; + + case 94: + if (nowtok == tok_ellipsis3) + { + state = 95; + continue; + } + + case 96: + if (nowtok != tok_number) + lr_error (cmfile, _("value for %s must be an integer"), + "WIDTH"); + else + { + /* Store width for chars. */ + new_width (cmfile, result, from_name, to_name, now->val.num); + + from_name = NULL; + to_name = NULL; + } + + lr_ignore_rest (cmfile, nowtok == tok_number); + + state = 93; + continue; + + case 95: + if (nowtok != tok_bsymbol && nowtok != tok_ucs4) + { + lr_error (cmfile, _("syntax error in %s definition: %s"), + "WIDTH", _("no symbolic name given for end of range")); + + lr_ignore_rest (cmfile, 0); + + state = 93; + continue; + } + + if (nowtok == tok_bsymbol) + to_name = (char *) obstack_copy0 (&result->mem_pool, + now->val.str.startmb, + now->val.str.lenmb); + else + { + obstack_printf (&result->mem_pool, "U%08X", + cmfile->token.val.ucs4); + obstack_1grow (&result->mem_pool, '\0'); + to_name = (char *) obstack_finish (&result->mem_pool); + } + + state = 96; + continue; + + case 98: + /* We now expect `END WIDTH_VARIABLE' or lines of the format + "%s\n" or "%s...%s\n". */ + if (nowtok == tok_eol) + /* ignore empty lines. */ + continue; + + if (nowtok == tok_end) + { + expected_tok = tok_width_variable; + expected_str = "WIDTH_VARIABLE"; + state = 90; + continue; + } + + if (nowtok != tok_bsymbol && nowtok != tok_ucs4) + { + lr_error (cmfile, _("syntax error in %s definition: %s"), + "WIDTH_VARIABLE", _("no symbolic name given")); + + lr_ignore_rest (cmfile, 0); + + continue; + } + + if (from_name != NULL) + obstack_free (&result->mem_pool, from_name); + + if (nowtok == tok_bsymbol) + from_name = (char *) obstack_copy0 (&result->mem_pool, + now->val.str.startmb, + now->val.str.lenmb); + else + { + obstack_printf (&result->mem_pool, "U%08X", + cmfile->token.val.ucs4); + obstack_1grow (&result->mem_pool, '\0'); + from_name = (char *) obstack_finish (&result->mem_pool); + } + to_name = NULL; + + state = 99; + continue; + + case 99: + if (nowtok == tok_ellipsis3) + state = 100; + + /* Store info. */ + from_name = NULL; + + /* Warn */ + state = 98; + continue; + + case 100: + if (nowtok != tok_bsymbol && nowtok != tok_ucs4) + { + lr_error (cmfile, _("syntax error in %s definition: %s"), + "WIDTH_VARIABLE", + _("no symbolic name given for end of range")); + lr_ignore_rest (cmfile, 0); + continue; + } + + if (nowtok == tok_bsymbol) + to_name = (char *) obstack_copy0 (&result->mem_pool, + now->val.str.startmb, + now->val.str.lenmb); + else + { + obstack_printf (&result->mem_pool, "U%08X", + cmfile->token.val.ucs4); + obstack_1grow (&result->mem_pool, '\0'); + to_name = (char *) obstack_finish (&result->mem_pool); + } + + /* XXX Enter value into table. */ + + lr_ignore_rest (cmfile, 1); + + state = 98; + continue; + + default: + WITH_CUR_LOCALE (error (5, 0, _("%s: error in state machine"), + __FILE__)); + /* NOTREACHED */ + } + break; + } + + if (state != 91 && !be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("%s: premature end of file"), + cmfile->fname)); + + lr_close (cmfile); + + return result; +} + + +static void +new_width (struct linereader *cmfile, struct charmap_t *result, + const char *from, const char *to, unsigned long int width) +{ + struct charseq *from_val; + struct charseq *to_val; + + from_val = charmap_find_value (result, from, strlen (from)); + if (from_val == NULL) + { + lr_error (cmfile, _("unknown character `%s'"), from); + return; + } + + if (to == NULL) + to_val = from_val; + else + { + to_val = charmap_find_value (result, to, strlen (to)); + if (to_val == NULL) + { + lr_error (cmfile, _("unknown character `%s'"), to); + return; + } + + /* Make sure the number of bytes for the end points of the range + is correct. */ + if (from_val->nbytes != to_val->nbytes) + { + lr_error (cmfile, _("\ +number of bytes for byte sequence of beginning and end of range not the same: %d vs %d"), + from_val->nbytes, to_val->nbytes); + return; + } + } + + if (result->nwidth_rules >= result->nwidth_rules_max) + { + size_t new_size = result->nwidth_rules + 32; + struct width_rule *new_rules = + (struct width_rule *) obstack_alloc (&result->mem_pool, + (new_size + * sizeof (struct width_rule))); + + memcpy (new_rules, result->width_rules, + result->nwidth_rules_max * sizeof (struct width_rule)); + + result->width_rules = new_rules; + result->nwidth_rules_max = new_size; + } + + result->width_rules[result->nwidth_rules].from = from_val; + result->width_rules[result->nwidth_rules].to = to_val; + result->width_rules[result->nwidth_rules].width = (unsigned int) width; + ++result->nwidth_rules; +} + + +struct charseq * +charmap_find_value (const struct charmap_t *cm, const char *name, size_t len) +{ + void *result; + + return (find_entry ((hash_table *) &cm->char_table, name, len, &result) + < 0 ? NULL : (struct charseq *) result); +} + + +static void +charmap_new_char (struct linereader *lr, struct charmap_t *cm, + size_t nbytes, unsigned char *bytes, + const char *from, const char *to, + int decimal_ellipsis, int step) +{ + hash_table *ht = &cm->char_table; + hash_table *bt = &cm->byte_table; + struct obstack *ob = &cm->mem_pool; + char *from_end; + char *to_end; + const char *cp; + int prefix_len, len1, len2; + unsigned int from_nr, to_nr, cnt; + struct charseq *newp; + + len1 = strlen (from); + + if (to == NULL) + { + newp = (struct charseq *) obstack_alloc (ob, sizeof (*newp) + nbytes); + newp->nbytes = nbytes; + memcpy (newp->bytes, bytes, nbytes); + newp->name = from; + + newp->ucs4 = UNINITIALIZED_CHAR_VALUE; + if ((from[0] == 'U' || from[0] == 'P') && (len1 == 5 || len1 == 9)) + { + /* Maybe the name is of the form `Uxxxx' or `Uxxxxxxxx' where + xxxx and xxxxxxxx are hexadecimal numbers. In this case + we use the value of xxxx or xxxxxxxx as the UCS4 value of + this character and we don't have to consult the repertoire + map. + + If the name is of the form `Pxxxx' or `Pxxxxxxxx' the xxxx + and xxxxxxxx also give the code point in UCS4 but this must + be in the private, i.e., unassigned, area. This should be + used for characters which do not (yet) have an equivalent + in ISO 10646 and Unicode. */ + char *endp; + + errno = 0; + newp->ucs4 = strtoul (from + 1, &endp, 16); + if (endp - from != len1 + || (newp->ucs4 == ~((uint32_t) 0) && errno == ERANGE) + || newp->ucs4 >= 0x80000000) + /* This wasn't successful. Signal this name cannot be a + correct UCS value. */ + newp->ucs4 = UNINITIALIZED_CHAR_VALUE; + } + + insert_entry (ht, from, len1, newp); + insert_entry (bt, newp->bytes, nbytes, newp); + /* Please note that it isn't a bug if a symbol is defined more + than once. All later definitions are simply discarded. */ + return; + } + + /* We have a range: the names must have names with equal prefixes + and an equal number of digits, where the second number is greater + or equal than the first. */ + len2 = strlen (to); + + if (len1 != len2) + { + illegal_range: + lr_error (lr, _("invalid names for character range")); + return; + } + + cp = &from[len1 - 1]; + if (decimal_ellipsis) + while (isdigit (*cp) && cp >= from) + --cp; + else + while (isxdigit (*cp) && cp >= from) + { + if (!isdigit (*cp) && !isupper (*cp)) + lr_error (lr, _("\ +hexadecimal range format should use only capital characters")); + --cp; + } + + prefix_len = (cp - from) + 1; + + if (cp == &from[len1 - 1] || strncmp (from, to, prefix_len) != 0) + goto illegal_range; + + errno = 0; + from_nr = strtoul (&from[prefix_len], &from_end, decimal_ellipsis ? 10 : 16); + if (*from_end != '\0' || (from_nr == UINT_MAX && errno == ERANGE) + || ((to_nr = strtoul (&to[prefix_len], &to_end, + decimal_ellipsis ? 10 : 16)) == UINT_MAX + && errno == ERANGE) + || *to_end != '\0') + { + lr_error (lr, _("<%s> and <%s> are invalid names for range"), from, to); + return; + } + + if (from_nr > to_nr) + { + lr_error (lr, _("upper limit in range is smaller than lower limit")); + return; + } + + for (cnt = from_nr; cnt <= to_nr; cnt += step) + { + char *name_end; + obstack_printf (ob, decimal_ellipsis ? "%.*s%0*d" : "%.*s%0*X", + prefix_len, from, len1 - prefix_len, cnt); + obstack_1grow (ob, '\0'); + name_end = obstack_finish (ob); + + newp = (struct charseq *) obstack_alloc (ob, sizeof (*newp) + nbytes); + newp->nbytes = nbytes; + memcpy (newp->bytes, bytes, nbytes); + newp->name = name_end; + + newp->ucs4 = UNINITIALIZED_CHAR_VALUE; + if ((name_end[0] == 'U' || name_end[0] == 'P') + && (len1 == 5 || len1 == 9)) + { + /* Maybe the name is of the form `Uxxxx' or `Uxxxxxxxx' where + xxxx and xxxxxxxx are hexadecimal numbers. In this case + we use the value of xxxx or xxxxxxxx as the UCS4 value of + this character and we don't have to consult the repertoire + map. + + If the name is of the form `Pxxxx' or `Pxxxxxxxx' the xxxx + and xxxxxxxx also give the code point in UCS4 but this must + be in the private, i.e., unassigned, area. This should be + used for characters which do not (yet) have an equivalent + in ISO 10646 and Unicode. */ + char *endp; + + errno = 0; + newp->ucs4 = strtoul (name_end + 1, &endp, 16); + if (endp - name_end != len1 + || (newp->ucs4 == ~((uint32_t) 0) && errno == ERANGE) + || newp->ucs4 >= 0x80000000) + /* This wasn't successful. Signal this name cannot be a + correct UCS value. */ + newp->ucs4 = UNINITIALIZED_CHAR_VALUE; + } + + insert_entry (ht, name_end, len1, newp); + insert_entry (bt, newp->bytes, nbytes, newp); + /* Please note we don't examine the return value since it is no error + if we have two definitions for a symbol. */ + + /* Increment the value in the byte sequence. */ + if (++bytes[nbytes - 1] == '\0') + { + int b = nbytes - 2; + + do + if (b < 0) + { + lr_error (lr, + _("resulting bytes for range not representable.")); + return; + } + while (++bytes[b--] == 0); + } + } +} + + +struct charseq * +charmap_find_symbol (const struct charmap_t *cm, const char *bytes, + size_t nbytes) +{ + void *result; + + return (find_entry ((hash_table *) &cm->byte_table, bytes, nbytes, &result) + < 0 ? NULL : (struct charseq *) result); +} diff --git a/REORG.TODO/locale/programs/charmap.h b/REORG.TODO/locale/programs/charmap.h new file mode 100644 index 0000000000..5d6b48f59c --- /dev/null +++ b/REORG.TODO/locale/programs/charmap.h @@ -0,0 +1,84 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@gnu.org>, 1996. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifndef _CHARMAP_H +#define _CHARMAP_H + +#include <obstack.h> +#include <stdbool.h> +#include <stdint.h> + +#include "repertoire.h" +#include "simple-hash.h" + + +struct width_rule +{ + struct charseq *from; + struct charseq *to; + unsigned int width; +}; + + +struct charmap_t +{ + const char *code_set_name; + const char *repertoiremap; + int mb_cur_min; + int mb_cur_max; + + struct width_rule *width_rules; + size_t nwidth_rules; + size_t nwidth_rules_max; + unsigned int width_default; + + struct obstack mem_pool; + hash_table char_table; + hash_table byte_table; + hash_table ucs4_table; +}; + + +/* This is the structure used for entries in the hash table. It represents + the sequence of bytes used for the coded character. */ +struct charseq +{ + const char *name; + uint32_t ucs4; + int nbytes; + unsigned char bytes[0]; +}; + + +/* True if the encoding is not ASCII compatible. */ +extern bool enc_not_ascii_compatible; + + +/* Prototypes for charmap handling functions. */ +extern struct charmap_t *charmap_read (const char *filename, int verbose, + int error_not_found, int be_quiet, + int use_default); + +/* Return the value stored under the given key in the hashing table. */ +extern struct charseq *charmap_find_value (const struct charmap_t *charmap, + const char *name, size_t len); + +/* Return symbol for given multibyte sequence. */ +extern struct charseq *charmap_find_symbol (const struct charmap_t *charmap, + const char *name, size_t len); + +#endif /* charmap.h */ diff --git a/REORG.TODO/locale/programs/config.h b/REORG.TODO/locale/programs/config.h new file mode 100644 index 0000000000..5b416be0d8 --- /dev/null +++ b/REORG.TODO/locale/programs/config.h @@ -0,0 +1,35 @@ +/* Configuration for localedef program. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1995. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifndef _LD_CONFIG_H +#define _LD_CONFIG_H 1 + +/* Use the internal textdomain used for libc messages. */ +#define PACKAGE _libc_intl_domainname +#ifndef VERSION +/* Get libc version number. */ +#include "../../version.h" +#endif + +#define DEFAULT_CHARMAP "ANSI_X3.4-1968" /* ASCII */ + +/* This must be one higer than the last used LC_xxx category value. */ +#define __LC_LAST 13 + +#include_next <config.h> +#endif diff --git a/REORG.TODO/locale/programs/ld-address.c b/REORG.TODO/locale/programs/ld-address.c new file mode 100644 index 0000000000..2488a5ce5c --- /dev/null +++ b/REORG.TODO/locale/programs/ld-address.c @@ -0,0 +1,545 @@ +/* Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <byteswap.h> +#include <error.h> +#include <langinfo.h> +#include <string.h> +#include <stdint.h> +#include <sys/uio.h> + +#include <assert.h> + +#include "localedef.h" +#include "localeinfo.h" +#include "locfile.h" + + +static struct +{ + const char ab2[3]; + const char ab3[4]; + uint32_t num; +} iso3166[] = +{ +#define DEFINE_COUNTRY_CODE(Name, Ab2, Ab3, Num) \ + { #Ab2, #Ab3, Num }, +#include "iso-3166.def" +}; + + +static struct +{ + const char ab[3]; + const char term[4]; + const char lib[4]; +} iso639[] = +{ +#define DEFINE_LANGUAGE_CODE(Name, Ab, Term, Lib) \ + { #Ab, #Term, #Lib }, +#define DEFINE_LANGUAGE_CODE3(Name, Term, Lib) \ + { "", #Term, #Lib }, +#define DEFINE_LANGUAGE_CODE2(Name, Term) \ + { "", #Term, "" }, +#include "iso-639.def" +}; + + +/* The real definition of the struct for the LC_ADDRESS locale. */ +struct locale_address_t +{ + const char *postal_fmt; + const char *country_name; + const char *country_post; + const char *country_ab2; + const char *country_ab3; + uint32_t country_num; + const char *country_car; + const char *country_isbn; + const char *lang_name; + const char *lang_ab; + const char *lang_term; + const char *lang_lib; +}; + + +static void +address_startup (struct linereader *lr, struct localedef_t *locale, + int ignore_content) +{ + if (!ignore_content) + locale->categories[LC_ADDRESS].address = + (struct locale_address_t *) xcalloc (1, + sizeof (struct locale_address_t)); + + if (lr != NULL) + { + lr->translate_strings = 1; + lr->return_widestr = 0; + } +} + + +void +address_finish (struct localedef_t *locale, const struct charmap_t *charmap) +{ + struct locale_address_t *address = locale->categories[LC_ADDRESS].address; + size_t cnt; + int helper; + int nothing = 0; + + /* Now resolve copying and also handle completely missing definitions. */ + if (address == NULL) + { + /* First see whether we were supposed to copy. If yes, find the + actual definition. */ + if (locale->copy_name[LC_ADDRESS] != NULL) + { + /* Find the copying locale. This has to happen transitively since + the locale we are copying from might also copying another one. */ + struct localedef_t *from = locale; + + do + from = find_locale (LC_ADDRESS, from->copy_name[LC_ADDRESS], + from->repertoire_name, charmap); + while (from->categories[LC_ADDRESS].address == NULL + && from->copy_name[LC_ADDRESS] != NULL); + + address = locale->categories[LC_ADDRESS].address + = from->categories[LC_ADDRESS].address; + } + + /* If there is still no definition issue an warning and create an + empty one. */ + if (address == NULL) + { + if (! be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +No definition for %s category found"), "LC_ADDRESS")); + address_startup (NULL, locale, 0); + address = locale->categories[LC_ADDRESS].address; + nothing = 1; + } + } + + if (address->postal_fmt == NULL) + { + if (! nothing) + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"), + "LC_ADDRESS", "postal_fmt")); + /* Use as the default value the value of the i18n locale. */ + address->postal_fmt = "%a%N%f%N%d%N%b%N%s %h %e %r%N%C-%z %T%N%c%N"; + } + else + { + /* We must check whether the format string contains only the allowed + escape sequences. Last checked against ISO 30112 WD10 [2014]. */ + const char *cp = address->postal_fmt; + + if (*cp == '\0') + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' must not be empty"), + "LC_ADDRESS", "postal_fmt")); + else + while (*cp != '\0') + { + if (*cp == '%') + { + if (*++cp == 'R') + /* Romanize-flag. */ + ++cp; + if (strchr ("nafdbshNtreClzTSc%", *cp) == NULL) + { + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: invalid escape `%%%c' sequence in field `%s'"), + "LC_ADDRESS", *cp, "postal_fmt")); + break; + } + } + ++cp; + } + } + +#define TEST_ELEM(cat) \ + if (address->cat == NULL) \ + { \ + if (verbose && ! nothing) \ + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"), \ + "LC_ADDRESS", #cat)); \ + address->cat = ""; \ + } + + TEST_ELEM (country_name); + /* XXX Test against list of defined codes. */ + TEST_ELEM (country_post); + /* XXX Test against list of defined codes. */ + TEST_ELEM (country_car); + /* XXX Test against list of defined codes. */ + TEST_ELEM (country_isbn); + TEST_ELEM (lang_name); + + helper = 1; + if (address->lang_term == NULL) + { + if (verbose && ! nothing) + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"), + "LC_ADDRESS", "lang_term")); + address->lang_term = ""; + cnt = sizeof (iso639) / sizeof (iso639[0]); + } + else if (address->lang_term[0] == '\0') + { + if (verbose) + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' must not be empty"), + "LC_ADDRESS", "lang_term")); + cnt = sizeof (iso639) / sizeof (iso639[0]); + } + else + { + /* Look for this language in the table. */ + for (cnt = 0; cnt < sizeof (iso639) / sizeof (iso639[0]); ++cnt) + if (strcmp (address->lang_term, iso639[cnt].term) == 0) + break; + if (cnt == sizeof (iso639) / sizeof (iso639[0])) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: terminology language code `%s' not defined"), + "LC_ADDRESS", address->lang_term)); + } + + if (address->lang_ab == NULL) + { + if ((cnt == sizeof (iso639) / sizeof (iso639[0]) + || iso639[cnt].ab[0] != '\0') + && verbose && ! nothing) + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"), + "LC_ADDRESS", "lang_ab")); + address->lang_ab = ""; + } + else if (address->lang_ab[0] == '\0') + { + if ((cnt == sizeof (iso639) / sizeof (iso639[0]) + || iso639[cnt].ab[0] != '\0') + && verbose) + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' must not be empty"), + "LC_ADDRESS", "lang_ab")); + } + else if (cnt < sizeof (iso639) / sizeof (iso639[0]) + && iso639[cnt].ab[0] == '\0') + { + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' must not be defined"), + "LC_ADDRESS", "lang_ab")); + + address->lang_ab = ""; + } + else + { + if (cnt == sizeof (iso639) / sizeof (iso639[0])) + { + helper = 2; + for (cnt = 0; cnt < sizeof (iso639) / sizeof (iso639[0]); ++cnt) + if (strcmp (address->lang_ab, iso639[cnt].ab) == 0) + break; + if (cnt == sizeof (iso639) / sizeof (iso639[0])) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: language abbreviation `%s' not defined"), + "LC_ADDRESS", address->lang_ab)); + } + else + if (strcmp (iso639[cnt].ab, address->lang_ab) != 0 + && iso639[cnt].ab[0] != '\0') + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: `%s' value does not match `%s' value"), + "LC_ADDRESS", "lang_ab", "lang_term")); + } + + if (address->lang_lib == NULL) + /* This is no error. */ + address->lang_lib = address->lang_term; + else if (address->lang_lib[0] == '\0') + { + if (verbose) + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' must not be empty"), + "LC_ADDRESS", "lang_lib")); + } + else + { + if (cnt == sizeof (iso639) / sizeof (iso639[0])) + { + for (cnt = 0; cnt < sizeof (iso639) / sizeof (iso639[0]); ++cnt) + if (strcmp (address->lang_lib, iso639[cnt].lib) == 0) + break; + if (cnt == sizeof (iso639) / sizeof (iso639[0])) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: language abbreviation `%s' not defined"), + "LC_ADDRESS", address->lang_lib)); + } + else + if (strcmp (iso639[cnt].ab, address->lang_ab) != 0) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: `%s' value does not match `%s' value"), "LC_ADDRESS", "lang_lib", + helper == 1 ? "lang_term" : "lang_ab")); + } + + if (address->country_num == 0) + { + if (verbose && ! nothing) + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"), + "LC_ADDRESS", "country_num")); + cnt = sizeof (iso3166) / sizeof (iso3166[0]); + } + else + { + for (cnt = 0; cnt < sizeof (iso3166) / sizeof (iso3166[0]); ++cnt) + if (address->country_num == iso3166[cnt].num) + break; + + if (cnt == sizeof (iso3166) / sizeof (iso3166[0])) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: numeric country code `%d' not valid"), + "LC_ADDRESS", address->country_num)); + } + + if (address->country_ab2 == NULL) + { + if (verbose && ! nothing) + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"), + "LC_ADDRESS", "country_ab2")); + address->country_ab2 = " "; + } + else if (cnt != sizeof (iso3166) / sizeof (iso3166[0]) + && strcmp (address->country_ab2, iso3166[cnt].ab2) != 0) + WITH_CUR_LOCALE (error (0, 0, + _("%s: `%s' value does not match `%s' value"), + "LC_ADDRESS", "country_ab2", "country_num")); + + if (address->country_ab3 == NULL) + { + if (verbose && ! nothing) + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"), + "LC_ADDRESS", "country_ab3")); + address->country_ab3 = " "; + } + else if (cnt != sizeof (iso3166) / sizeof (iso3166[0]) + && strcmp (address->country_ab3, iso3166[cnt].ab3) != 0) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: `%s' value does not match `%s' value"), + "LC_ADDRESS", "country_ab3", "country_num")); +} + + +void +address_output (struct localedef_t *locale, const struct charmap_t *charmap, + const char *output_path) +{ + struct locale_address_t *address = locale->categories[LC_ADDRESS].address; + struct locale_file file; + + init_locale_data (&file, _NL_ITEM_INDEX (_NL_NUM_LC_ADDRESS)); + add_locale_string (&file, address->postal_fmt); + add_locale_string (&file, address->country_name); + add_locale_string (&file, address->country_post); + add_locale_string (&file, address->country_ab2); + add_locale_string (&file, address->country_ab3); + add_locale_string (&file, address->country_car); + add_locale_uint32 (&file, address->country_num); + add_locale_string (&file, address->country_isbn); + add_locale_string (&file, address->lang_name); + add_locale_string (&file, address->lang_ab); + add_locale_string (&file, address->lang_term); + add_locale_string (&file, address->lang_lib); + add_locale_string (&file, charmap->code_set_name); + write_locale_data (output_path, LC_ADDRESS, "LC_ADDRESS", &file); +} + + +/* The parser for the LC_ADDRESS section of the locale definition. */ +void +address_read (struct linereader *ldfile, struct localedef_t *result, + const struct charmap_t *charmap, const char *repertoire_name, + int ignore_content) +{ + struct locale_address_t *address; + struct token *now; + struct token *arg; + enum token_t nowtok; + + /* The rest of the line containing `LC_ADDRESS' must be free. */ + lr_ignore_rest (ldfile, 1); + + + do + { + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + } + while (nowtok == tok_eol); + + /* If we see `copy' now we are almost done. */ + if (nowtok == tok_copy) + { + handle_copy (ldfile, charmap, repertoire_name, result, tok_lc_address, + LC_ADDRESS, "LC_ADDRESS", ignore_content); + return; + } + + /* Prepare the data structures. */ + address_startup (ldfile, result, ignore_content); + address = result->categories[LC_ADDRESS].address; + + while (1) + { + /* Of course we don't proceed beyond the end of file. */ + if (nowtok == tok_eof) + break; + + /* Ignore empty lines. */ + if (nowtok == tok_eol) + { + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + continue; + } + + switch (nowtok) + { +#define STR_ELEM(cat) \ + case tok_##cat: \ + /* Ignore the rest of the line if we don't need the input of \ + this line. */ \ + if (ignore_content) \ + { \ + lr_ignore_rest (ldfile, 0); \ + break; \ + } \ + \ + arg = lr_token (ldfile, charmap, result, NULL, verbose); \ + if (arg->tok != tok_string) \ + goto err_label; \ + if (address->cat != NULL) \ + lr_error (ldfile, _("\ +%s: field `%s' declared more than once"), "LC_ADDRESS", #cat); \ + else if (!ignore_content && arg->val.str.startmb == NULL) \ + { \ + lr_error (ldfile, _("\ +%s: unknown character in field `%s'"), "LC_ADDRESS", #cat); \ + address->cat = ""; \ + } \ + else if (!ignore_content) \ + address->cat = arg->val.str.startmb; \ + break + + STR_ELEM (postal_fmt); + STR_ELEM (country_name); + STR_ELEM (country_post); + STR_ELEM (country_ab2); + STR_ELEM (country_ab3); + STR_ELEM (country_car); + STR_ELEM (lang_name); + STR_ELEM (lang_ab); + STR_ELEM (lang_term); + STR_ELEM (lang_lib); + +#define INT_STR_ELEM(cat) \ + case tok_##cat: \ + /* Ignore the rest of the line if we don't need the input of \ + this line. */ \ + if (ignore_content) \ + { \ + lr_ignore_rest (ldfile, 0); \ + break; \ + } \ + \ + arg = lr_token (ldfile, charmap, result, NULL, verbose); \ + if (arg->tok != tok_string && arg->tok != tok_number) \ + goto err_label; \ + if (address->cat != NULL) \ + lr_error (ldfile, _("\ +%s: field `%s' declared more than once"), "LC_ADDRESS", #cat); \ + else if (!ignore_content && arg->tok == tok_string \ + && arg->val.str.startmb == NULL) \ + { \ + lr_error (ldfile, _("\ +%s: unknown character in field `%s'"), "LC_ADDRESS", #cat); \ + address->cat = ""; \ + } \ + else if (!ignore_content) \ + { \ + if (arg->tok == tok_string) \ + address->cat = arg->val.str.startmb; \ + else \ + { \ + char *numbuf = (char *) xmalloc (21); \ + snprintf (numbuf, 21, "%ld", arg->val.num); \ + address->cat = numbuf; \ + } \ + } \ + break + + INT_STR_ELEM (country_isbn); + +#define INT_ELEM(cat) \ + case tok_##cat: \ + /* Ignore the rest of the line if we don't need the input of \ + this line. */ \ + if (ignore_content) \ + { \ + lr_ignore_rest (ldfile, 0); \ + break; \ + } \ + \ + arg = lr_token (ldfile, charmap, result, NULL, verbose); \ + if (arg->tok != tok_number) \ + goto err_label; \ + else if (address->cat != 0) \ + lr_error (ldfile, _("\ +%s: field `%s' declared more than once"), "LC_ADDRESS", #cat); \ + else if (!ignore_content) \ + address->cat = arg->val.num; \ + break + + INT_ELEM (country_num); + + case tok_end: + /* Next we assume `LC_ADDRESS'. */ + arg = lr_token (ldfile, charmap, result, NULL, verbose); + if (arg->tok == tok_eof) + break; + if (arg->tok == tok_eol) + lr_error (ldfile, _("%s: incomplete `END' line"), + "LC_ADDRESS"); + else if (arg->tok != tok_lc_address) + lr_error (ldfile, _("\ +%1$s: definition does not end with `END %1$s'"), "LC_ADDRESS"); + lr_ignore_rest (ldfile, arg->tok == tok_lc_address); + return; + + default: + err_label: + SYNTAX_ERROR (_("%s: syntax error"), "LC_ADDRESS"); + } + + /* Prepare for the next round. */ + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + } + + /* When we come here we reached the end of the file. */ + lr_error (ldfile, _("%s: premature end of file"), "LC_ADDRESS"); +} diff --git a/REORG.TODO/locale/programs/ld-collate.c b/REORG.TODO/locale/programs/ld-collate.c new file mode 100644 index 0000000000..cec848cb7c --- /dev/null +++ b/REORG.TODO/locale/programs/ld-collate.c @@ -0,0 +1,3978 @@ +/* Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@gnu.org>, 1995. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <errno.h> +#include <error.h> +#include <stdlib.h> +#include <wchar.h> +#include <stdint.h> +#include <sys/param.h> + +#include "localedef.h" +#include "charmap.h" +#include "localeinfo.h" +#include "linereader.h" +#include "locfile.h" +#include "elem-hash.h" + +/* Uncomment the following line in the production version. */ +/* #define NDEBUG 1 */ +#include <assert.h> + +#define obstack_chunk_alloc malloc +#define obstack_chunk_free free + +static inline void +__attribute ((always_inline)) +obstack_int32_grow (struct obstack *obstack, int32_t data) +{ + assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack))); + data = maybe_swap_uint32 (data); + if (sizeof (int32_t) == sizeof (int)) + obstack_int_grow (obstack, data); + else + obstack_grow (obstack, &data, sizeof (int32_t)); +} + +static inline void +__attribute ((always_inline)) +obstack_int32_grow_fast (struct obstack *obstack, int32_t data) +{ + assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack))); + data = maybe_swap_uint32 (data); + if (sizeof (int32_t) == sizeof (int)) + obstack_int_grow_fast (obstack, data); + else + obstack_grow (obstack, &data, sizeof (int32_t)); +} + +/* Forward declaration. */ +struct element_t; + +/* Data type for list of strings. */ +struct section_list +{ + /* Successor in the known_sections list. */ + struct section_list *def_next; + /* Successor in the sections list. */ + struct section_list *next; + /* Name of the section. */ + const char *name; + /* First element of this section. */ + struct element_t *first; + /* Last element of this section. */ + struct element_t *last; + /* These are the rules for this section. */ + enum coll_sort_rule *rules; + /* Index of the rule set in the appropriate section of the output file. */ + int ruleidx; +}; + +struct element_t; + +struct element_list_t +{ + /* Number of elements. */ + int cnt; + + struct element_t **w; +}; + +/* Data type for collating element. */ +struct element_t +{ + const char *name; + + const char *mbs; + size_t nmbs; + const uint32_t *wcs; + size_t nwcs; + int *mborder; + int wcorder; + + /* The following is a bit mask which bits are set if this element is + used in the appropriate level. Interesting for the singlebyte + weight computation. + + XXX The type here restricts the number of levels to 32. It could + be changed if necessary but I doubt this is necessary. */ + unsigned int used_in_level; + + struct element_list_t *weights; + + /* Nonzero if this is a real character definition. */ + int is_character; + + /* Order of the character in the sequence. This information will + be used in range expressions. */ + int mbseqorder; + int wcseqorder; + + /* Where does the definition come from. */ + const char *file; + size_t line; + + /* Which section does this belong to. */ + struct section_list *section; + + /* Predecessor and successor in the order list. */ + struct element_t *last; + struct element_t *next; + + /* Next element in multibyte output list. */ + struct element_t *mbnext; + struct element_t *mblast; + + /* Next element in wide character output list. */ + struct element_t *wcnext; + struct element_t *wclast; +}; + +/* Special element value. */ +#define ELEMENT_ELLIPSIS2 ((struct element_t *) 1) +#define ELEMENT_ELLIPSIS3 ((struct element_t *) 2) +#define ELEMENT_ELLIPSIS4 ((struct element_t *) 3) + +/* Data type for collating symbol. */ +struct symbol_t +{ + const char *name; + + /* Point to place in the order list. */ + struct element_t *order; + + /* Where does the definition come from. */ + const char *file; + size_t line; +}; + +/* Sparse table of struct element_t *. */ +#define TABLE wchead_table +#define ELEMENT struct element_t * +#define DEFAULT NULL +#define ITERATE +#define NO_ADD_LOCALE +#include "3level.h" + +/* Sparse table of int32_t. */ +#define TABLE collidx_table +#define ELEMENT int32_t +#define DEFAULT 0 +#include "3level.h" + +/* Sparse table of uint32_t. */ +#define TABLE collseq_table +#define ELEMENT uint32_t +#define DEFAULT ~((uint32_t) 0) +#include "3level.h" + + +/* Simple name list for the preprocessor. */ +struct name_list +{ + struct name_list *next; + char str[0]; +}; + + +/* The real definition of the struct for the LC_COLLATE locale. */ +struct locale_collate_t +{ + int col_weight_max; + int cur_weight_max; + + /* List of known scripts. */ + struct section_list *known_sections; + /* List of used sections. */ + struct section_list *sections; + /* Current section using definition. */ + struct section_list *current_section; + /* There always can be an unnamed section. */ + struct section_list unnamed_section; + /* Flag whether the unnamed section has been defined. */ + bool unnamed_section_defined; + /* To make handling of errors easier we have another section. */ + struct section_list error_section; + /* Sometimes we are defining the values for collating symbols before + the first actual section. */ + struct section_list symbol_section; + + /* Start of the order list. */ + struct element_t *start; + + /* The undefined element. */ + struct element_t undefined; + + /* This is the cursor for `reorder_after' insertions. */ + struct element_t *cursor; + + /* This value is used when handling ellipsis. */ + struct element_t ellipsis_weight; + + /* Known collating elements. */ + hash_table elem_table; + + /* Known collating symbols. */ + hash_table sym_table; + + /* Known collation sequences. */ + hash_table seq_table; + + struct obstack mempool; + + /* The LC_COLLATE category is a bit special as it is sometimes possible + that the definitions from more than one input file contains information. + Therefore we keep all relevant input in a list. */ + struct locale_collate_t *next; + + /* Arrays with heads of the list for each of the leading bytes in + the multibyte sequences. */ + struct element_t *mbheads[256]; + + /* Arrays with heads of the list for each of the leading bytes in + the multibyte sequences. */ + struct wchead_table wcheads; + + /* The arrays with the collation sequence order. */ + unsigned char mbseqorder[256]; + struct collseq_table wcseqorder; + + /* State of the preprocessor. */ + enum + { + else_none = 0, + else_ignore, + else_seen + } + else_action; +}; + + +/* We have a few global variables which are used for reading all + LC_COLLATE category descriptions in all files. */ +static uint32_t nrules; + +/* List of defined preprocessor symbols. */ +static struct name_list *defined; + + +/* We need UTF-8 encoding of numbers. */ +static inline int +__attribute ((always_inline)) +utf8_encode (char *buf, int val) +{ + int retval; + + if (val < 0x80) + { + *buf++ = (char) val; + retval = 1; + } + else + { + int step; + + for (step = 2; step < 6; ++step) + if ((val & (~(uint32_t)0 << (5 * step + 1))) == 0) + break; + retval = step; + + *buf = (unsigned char) (~0xff >> step); + --step; + do + { + buf[step] = 0x80 | (val & 0x3f); + val >>= 6; + } + while (--step > 0); + *buf |= val; + } + + return retval; +} + + +static struct section_list * +make_seclist_elem (struct locale_collate_t *collate, const char *string, + struct section_list *next) +{ + struct section_list *newp; + + newp = (struct section_list *) obstack_alloc (&collate->mempool, + sizeof (*newp)); + newp->next = next; + newp->name = string; + newp->first = NULL; + newp->last = NULL; + + return newp; +} + + +static struct element_t * +new_element (struct locale_collate_t *collate, const char *mbs, size_t mbslen, + const uint32_t *wcs, const char *name, size_t namelen, + int is_character) +{ + struct element_t *newp; + + newp = (struct element_t *) obstack_alloc (&collate->mempool, + sizeof (*newp)); + newp->name = name == NULL ? NULL : obstack_copy0 (&collate->mempool, + name, namelen); + if (mbs != NULL) + { + newp->mbs = obstack_copy0 (&collate->mempool, mbs, mbslen); + newp->nmbs = mbslen; + } + else + { + newp->mbs = NULL; + newp->nmbs = 0; + } + if (wcs != NULL) + { + size_t nwcs = wcslen ((wchar_t *) wcs); + uint32_t zero = 0; + /* Handle <U0000> as a single character. */ + if (nwcs == 0) + nwcs = 1; + obstack_grow (&collate->mempool, wcs, nwcs * sizeof (uint32_t)); + obstack_grow (&collate->mempool, &zero, sizeof (uint32_t)); + newp->wcs = (uint32_t *) obstack_finish (&collate->mempool); + newp->nwcs = nwcs; + } + else + { + newp->wcs = NULL; + newp->nwcs = 0; + } + newp->mborder = NULL; + newp->wcorder = 0; + newp->used_in_level = 0; + newp->is_character = is_character; + + /* Will be assigned later. XXX */ + newp->mbseqorder = 0; + newp->wcseqorder = 0; + + /* Will be allocated later. */ + newp->weights = NULL; + + newp->file = NULL; + newp->line = 0; + + newp->section = collate->current_section; + + newp->last = NULL; + newp->next = NULL; + + newp->mbnext = NULL; + newp->mblast = NULL; + + newp->wcnext = NULL; + newp->wclast = NULL; + + return newp; +} + + +static struct symbol_t * +new_symbol (struct locale_collate_t *collate, const char *name, size_t len) +{ + struct symbol_t *newp; + + newp = (struct symbol_t *) obstack_alloc (&collate->mempool, sizeof (*newp)); + + newp->name = obstack_copy0 (&collate->mempool, name, len); + newp->order = NULL; + + newp->file = NULL; + newp->line = 0; + + return newp; +} + + +/* Test whether this name is already defined somewhere. */ +static int +check_duplicate (struct linereader *ldfile, struct locale_collate_t *collate, + const struct charmap_t *charmap, + struct repertoire_t *repertoire, const char *symbol, + size_t symbol_len) +{ + void *ignore = NULL; + + if (find_entry (&charmap->char_table, symbol, symbol_len, &ignore) == 0) + { + lr_error (ldfile, _("`%.*s' already defined in charmap"), + (int) symbol_len, symbol); + return 1; + } + + if (repertoire != NULL + && (find_entry (&repertoire->char_table, symbol, symbol_len, &ignore) + == 0)) + { + lr_error (ldfile, _("`%.*s' already defined in repertoire"), + (int) symbol_len, symbol); + return 1; + } + + if (find_entry (&collate->sym_table, symbol, symbol_len, &ignore) == 0) + { + lr_error (ldfile, _("`%.*s' already defined as collating symbol"), + (int) symbol_len, symbol); + return 1; + } + + if (find_entry (&collate->elem_table, symbol, symbol_len, &ignore) == 0) + { + lr_error (ldfile, _("`%.*s' already defined as collating element"), + (int) symbol_len, symbol); + return 1; + } + + return 0; +} + + +/* Read the direction specification. */ +static void +read_directions (struct linereader *ldfile, struct token *arg, + const struct charmap_t *charmap, + struct repertoire_t *repertoire, struct localedef_t *result) +{ + int cnt = 0; + int max = nrules ?: 10; + enum coll_sort_rule *rules = calloc (max, sizeof (*rules)); + int warned = 0; + struct locale_collate_t *collate = result->categories[LC_COLLATE].collate; + + while (1) + { + int valid = 0; + + if (arg->tok == tok_forward) + { + if (rules[cnt] & sort_backward) + { + if (! warned) + { + lr_error (ldfile, _("\ +%s: `forward' and `backward' are mutually excluding each other"), + "LC_COLLATE"); + warned = 1; + } + } + else if (rules[cnt] & sort_forward) + { + if (! warned) + { + lr_error (ldfile, _("\ +%s: `%s' mentioned more than once in definition of weight %d"), + "LC_COLLATE", "forward", cnt + 1); + } + } + else + rules[cnt] |= sort_forward; + + valid = 1; + } + else if (arg->tok == tok_backward) + { + if (rules[cnt] & sort_forward) + { + if (! warned) + { + lr_error (ldfile, _("\ +%s: `forward' and `backward' are mutually excluding each other"), + "LC_COLLATE"); + warned = 1; + } + } + else if (rules[cnt] & sort_backward) + { + if (! warned) + { + lr_error (ldfile, _("\ +%s: `%s' mentioned more than once in definition of weight %d"), + "LC_COLLATE", "backward", cnt + 1); + } + } + else + rules[cnt] |= sort_backward; + + valid = 1; + } + else if (arg->tok == tok_position) + { + if (rules[cnt] & sort_position) + { + if (! warned) + { + lr_error (ldfile, _("\ +%s: `%s' mentioned more than once in definition of weight %d"), + "LC_COLLATE", "position", cnt + 1); + } + } + else + rules[cnt] |= sort_position; + + valid = 1; + } + + if (valid) + arg = lr_token (ldfile, charmap, result, repertoire, verbose); + + if (arg->tok == tok_eof || arg->tok == tok_eol || arg->tok == tok_comma + || arg->tok == tok_semicolon) + { + if (! valid && ! warned) + { + lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE"); + warned = 1; + } + + /* See whether we have to increment the counter. */ + if (arg->tok != tok_comma && rules[cnt] != 0) + { + /* Add the default `forward' if we have seen only `position'. */ + if (rules[cnt] == sort_position) + rules[cnt] = sort_position | sort_forward; + + ++cnt; + } + + if (arg->tok == tok_eof || arg->tok == tok_eol) + /* End of line or file, so we exit the loop. */ + break; + + if (nrules == 0) + { + /* See whether we have enough room in the array. */ + if (cnt == max) + { + max += 10; + rules = (enum coll_sort_rule *) xrealloc (rules, + max + * sizeof (*rules)); + memset (&rules[cnt], '\0', (max - cnt) * sizeof (*rules)); + } + } + else + { + if (cnt == nrules) + { + /* There must not be any more rule. */ + if (! warned) + { + lr_error (ldfile, _("\ +%s: too many rules; first entry only had %d"), + "LC_COLLATE", nrules); + warned = 1; + } + + lr_ignore_rest (ldfile, 0); + break; + } + } + } + else + { + if (! warned) + { + lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE"); + warned = 1; + } + } + + arg = lr_token (ldfile, charmap, result, repertoire, verbose); + } + + if (nrules == 0) + { + /* Now we know how many rules we have. */ + nrules = cnt; + rules = (enum coll_sort_rule *) xrealloc (rules, + nrules * sizeof (*rules)); + } + else + { + if (cnt < nrules) + { + /* Not enough rules in this specification. */ + if (! warned) + lr_error (ldfile, _("%s: not enough sorting rules"), "LC_COLLATE"); + + do + rules[cnt] = sort_forward; + while (++cnt < nrules); + } + } + + collate->current_section->rules = rules; +} + + +static struct element_t * +find_element (struct linereader *ldfile, struct locale_collate_t *collate, + const char *str, size_t len) +{ + void *result = NULL; + + /* Search for the entries among the collation sequences already define. */ + if (find_entry (&collate->seq_table, str, len, &result) != 0) + { + /* Nope, not define yet. So we see whether it is a + collation symbol. */ + void *ptr; + + if (find_entry (&collate->sym_table, str, len, &ptr) == 0) + { + /* It's a collation symbol. */ + struct symbol_t *sym = (struct symbol_t *) ptr; + result = sym->order; + + if (result == NULL) + result = sym->order = new_element (collate, NULL, 0, NULL, + NULL, 0, 0); + } + else if (find_entry (&collate->elem_table, str, len, &result) != 0) + { + /* It's also no collation element. So it is a character + element defined later. */ + result = new_element (collate, NULL, 0, NULL, str, len, 1); + /* Insert it into the sequence table. */ + insert_entry (&collate->seq_table, str, len, result); + } + } + + return (struct element_t *) result; +} + + +static void +unlink_element (struct locale_collate_t *collate) +{ + if (collate->cursor == collate->start) + { + assert (collate->cursor->next == NULL); + assert (collate->cursor->last == NULL); + collate->cursor = NULL; + } + else + { + if (collate->cursor->next != NULL) + collate->cursor->next->last = collate->cursor->last; + if (collate->cursor->last != NULL) + collate->cursor->last->next = collate->cursor->next; + collate->cursor = collate->cursor->last; + } +} + + +static void +insert_weights (struct linereader *ldfile, struct element_t *elem, + const struct charmap_t *charmap, + struct repertoire_t *repertoire, struct localedef_t *result, + enum token_t ellipsis) +{ + int weight_cnt; + struct token *arg; + struct locale_collate_t *collate = result->categories[LC_COLLATE].collate; + + /* Initialize all the fields. */ + elem->file = ldfile->fname; + elem->line = ldfile->lineno; + + elem->last = collate->cursor; + elem->next = collate->cursor ? collate->cursor->next : NULL; + if (collate->cursor != NULL && collate->cursor->next != NULL) + collate->cursor->next->last = elem; + if (collate->cursor != NULL) + collate->cursor->next = elem; + if (collate->start == NULL) + { + assert (collate->cursor == NULL); + collate->start = elem; + } + + elem->section = collate->current_section; + + if (collate->current_section->first == NULL) + collate->current_section->first = elem; + if (collate->current_section->last == collate->cursor) + collate->current_section->last = elem; + + collate->cursor = elem; + + elem->weights = (struct element_list_t *) + obstack_alloc (&collate->mempool, nrules * sizeof (struct element_list_t)); + memset (elem->weights, '\0', nrules * sizeof (struct element_list_t)); + + weight_cnt = 0; + + arg = lr_token (ldfile, charmap, result, repertoire, verbose); + do + { + if (arg->tok == tok_eof || arg->tok == tok_eol) + break; + + if (arg->tok == tok_ignore) + { + /* The weight for this level has to be ignored. We use the + null pointer to indicate this. */ + elem->weights[weight_cnt].w = (struct element_t **) + obstack_alloc (&collate->mempool, sizeof (struct element_t *)); + elem->weights[weight_cnt].w[0] = NULL; + elem->weights[weight_cnt].cnt = 1; + } + else if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4) + { + char ucs4str[10]; + struct element_t *val; + char *symstr; + size_t symlen; + + if (arg->tok == tok_bsymbol) + { + symstr = arg->val.str.startmb; + symlen = arg->val.str.lenmb; + } + else + { + snprintf (ucs4str, sizeof (ucs4str), "U%08X", arg->val.ucs4); + symstr = ucs4str; + symlen = 9; + } + + val = find_element (ldfile, collate, symstr, symlen); + if (val == NULL) + break; + + elem->weights[weight_cnt].w = (struct element_t **) + obstack_alloc (&collate->mempool, sizeof (struct element_t *)); + elem->weights[weight_cnt].w[0] = val; + elem->weights[weight_cnt].cnt = 1; + } + else if (arg->tok == tok_string) + { + /* Split the string up in the individual characters and put + the element definitions in the list. */ + const char *cp = arg->val.str.startmb; + int cnt = 0; + struct element_t *charelem; + struct element_t **weights = NULL; + int max = 0; + + if (*cp == '\0') + { + lr_error (ldfile, _("%s: empty weight string not allowed"), + "LC_COLLATE"); + lr_ignore_rest (ldfile, 0); + break; + } + + do + { + if (*cp == '<') + { + /* Ahh, it's a bsymbol or an UCS4 value. If it's + the latter we have to unify the name. */ + const char *startp = ++cp; + size_t len; + + while (*cp != '>') + { + if (*cp == ldfile->escape_char) + ++cp; + if (*cp == '\0') + /* It's a syntax error. */ + goto syntax; + + ++cp; + } + + if (cp - startp == 5 && startp[0] == 'U' + && isxdigit (startp[1]) && isxdigit (startp[2]) + && isxdigit (startp[3]) && isxdigit (startp[4])) + { + unsigned int ucs4 = strtoul (startp + 1, NULL, 16); + char *newstr; + + newstr = (char *) xmalloc (10); + snprintf (newstr, 10, "U%08X", ucs4); + startp = newstr; + + len = 9; + } + else + len = cp - startp; + + charelem = find_element (ldfile, collate, startp, len); + ++cp; + } + else + { + /* People really shouldn't use characters directly in + the string. Especially since it's not really clear + what this means. We interpret all characters in the + string as if that would be bsymbols. Otherwise we + would have to match back to bsymbols somehow and this + is normally not what people normally expect. */ + charelem = find_element (ldfile, collate, cp++, 1); + } + + if (charelem == NULL) + { + /* We ignore the rest of the line. */ + lr_ignore_rest (ldfile, 0); + break; + } + + /* Add the pointer. */ + if (cnt >= max) + { + struct element_t **newp; + max += 10; + newp = (struct element_t **) + alloca (max * sizeof (struct element_t *)); + memcpy (newp, weights, cnt * sizeof (struct element_t *)); + weights = newp; + } + weights[cnt++] = charelem; + } + while (*cp != '\0'); + + /* Now store the information. */ + elem->weights[weight_cnt].w = (struct element_t **) + obstack_alloc (&collate->mempool, + cnt * sizeof (struct element_t *)); + memcpy (elem->weights[weight_cnt].w, weights, + cnt * sizeof (struct element_t *)); + elem->weights[weight_cnt].cnt = cnt; + + /* We don't need the string anymore. */ + free (arg->val.str.startmb); + } + else if (ellipsis != tok_none + && (arg->tok == tok_ellipsis2 + || arg->tok == tok_ellipsis3 + || arg->tok == tok_ellipsis4)) + { + /* It must be the same ellipsis as used in the initial column. */ + if (arg->tok != ellipsis) + lr_error (ldfile, _("\ +%s: weights must use the same ellipsis symbol as the name"), + "LC_COLLATE"); + + /* The weight for this level will depend on the element + iterating over the range. Put a placeholder. */ + elem->weights[weight_cnt].w = (struct element_t **) + obstack_alloc (&collate->mempool, sizeof (struct element_t *)); + elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2; + elem->weights[weight_cnt].cnt = 1; + } + else + { + syntax: + /* It's a syntax error. */ + lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE"); + lr_ignore_rest (ldfile, 0); + break; + } + + arg = lr_token (ldfile, charmap, result, repertoire, verbose); + /* This better should be the end of the line or a semicolon. */ + if (arg->tok == tok_semicolon) + /* OK, ignore this and read the next token. */ + arg = lr_token (ldfile, charmap, result, repertoire, verbose); + else if (arg->tok != tok_eof && arg->tok != tok_eol) + { + /* It's a syntax error. */ + lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE"); + lr_ignore_rest (ldfile, 0); + break; + } + } + while (++weight_cnt < nrules); + + if (weight_cnt < nrules) + { + /* This means the rest of the line uses the current element as + the weight. */ + do + { + elem->weights[weight_cnt].w = (struct element_t **) + obstack_alloc (&collate->mempool, sizeof (struct element_t *)); + if (ellipsis == tok_none) + elem->weights[weight_cnt].w[0] = elem; + else + elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2; + elem->weights[weight_cnt].cnt = 1; + } + while (++weight_cnt < nrules); + } + else + { + if (arg->tok == tok_ignore || arg->tok == tok_bsymbol) + { + /* Too many rule values. */ + lr_error (ldfile, _("%s: too many values"), "LC_COLLATE"); + lr_ignore_rest (ldfile, 0); + } + else + lr_ignore_rest (ldfile, arg->tok != tok_eol && arg->tok != tok_eof); + } +} + + +static int +insert_value (struct linereader *ldfile, const char *symstr, size_t symlen, + const struct charmap_t *charmap, struct repertoire_t *repertoire, + struct localedef_t *result) +{ + /* First find out what kind of symbol this is. */ + struct charseq *seq; + uint32_t wc; + struct element_t *elem = NULL; + struct locale_collate_t *collate = result->categories[LC_COLLATE].collate; + + /* Try to find the character in the charmap. */ + seq = charmap_find_value (charmap, symstr, symlen); + + /* Determine the wide character. */ + if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE) + { + wc = repertoire_find_value (repertoire, symstr, symlen); + if (seq != NULL) + seq->ucs4 = wc; + } + else + wc = seq->ucs4; + + if (wc == ILLEGAL_CHAR_VALUE && seq == NULL) + { + /* It's no character, so look through the collation elements and + symbol list. */ + void *ptr = elem; + if (find_entry (&collate->elem_table, symstr, symlen, &ptr) != 0) + { + void *result; + struct symbol_t *sym = NULL; + + /* It's also collation element. Therefore it's either a + collating symbol or it's a character which is not + supported by the character set. In the later case we + simply create a dummy entry. */ + if (find_entry (&collate->sym_table, symstr, symlen, &result) == 0) + { + /* It's a collation symbol. */ + sym = (struct symbol_t *) result; + + elem = sym->order; + } + + if (elem == NULL) + { + elem = new_element (collate, NULL, 0, NULL, symstr, symlen, 0); + + if (sym != NULL) + sym->order = elem; + else + /* Enter a fake element in the sequence table. This + won't cause anything in the output since there is + no multibyte or wide character associated with + it. */ + insert_entry (&collate->seq_table, symstr, symlen, elem); + } + } + else + /* Copy the result back. */ + elem = ptr; + } + else + { + /* Otherwise the symbols stands for a character. */ + void *ptr = elem; + if (find_entry (&collate->seq_table, symstr, symlen, &ptr) != 0) + { + uint32_t wcs[2] = { wc, 0 }; + + /* We have to allocate an entry. */ + elem = new_element (collate, + seq != NULL ? (char *) seq->bytes : NULL, + seq != NULL ? seq->nbytes : 0, + wc == ILLEGAL_CHAR_VALUE ? NULL : wcs, + symstr, symlen, 1); + + /* And add it to the table. */ + if (insert_entry (&collate->seq_table, symstr, symlen, elem) != 0) + /* This cannot happen. */ + assert (! "Internal error"); + } + else + { + /* Copy the result back. */ + elem = ptr; + + /* Maybe the character was used before the definition. In this case + we have to insert the byte sequences now. */ + if (elem->mbs == NULL && seq != NULL) + { + elem->mbs = obstack_copy0 (&collate->mempool, + seq->bytes, seq->nbytes); + elem->nmbs = seq->nbytes; + } + + if (elem->wcs == NULL && wc != ILLEGAL_CHAR_VALUE) + { + uint32_t wcs[2] = { wc, 0 }; + + elem->wcs = obstack_copy (&collate->mempool, wcs, sizeof (wcs)); + elem->nwcs = 1; + } + } + } + + /* Test whether this element is not already in the list. */ + if (elem->next != NULL || elem == collate->cursor) + { + lr_error (ldfile, _("order for `%.*s' already defined at %s:%Zu"), + (int) symlen, symstr, elem->file, elem->line); + lr_ignore_rest (ldfile, 0); + return 1; + } + + insert_weights (ldfile, elem, charmap, repertoire, result, tok_none); + + return 0; +} + + +static void +handle_ellipsis (struct linereader *ldfile, const char *symstr, size_t symlen, + enum token_t ellipsis, const struct charmap_t *charmap, + struct repertoire_t *repertoire, + struct localedef_t *result) +{ + struct element_t *startp; + struct element_t *endp; + struct locale_collate_t *collate = result->categories[LC_COLLATE].collate; + + /* Unlink the entry added for the ellipsis. */ + unlink_element (collate); + startp = collate->cursor; + + /* Process and add the end-entry. */ + if (symstr != NULL + && insert_value (ldfile, symstr, symlen, charmap, repertoire, result)) + /* Something went wrong with inserting the to-value. This means + we cannot process the ellipsis. */ + return; + + /* Reset the cursor. */ + collate->cursor = startp; + + /* Now we have to handle many different situations: + - we have to distinguish between the three different ellipsis forms + - the is the ellipsis at the beginning, in the middle, or at the end. + */ + endp = collate->cursor->next; + assert (symstr == NULL || endp != NULL); + + /* XXX The following is probably very wrong since also collating symbols + can appear in ranges. But do we want/can refine the test for that? */ +#if 0 + /* Both, the start and the end symbol, must stand for characters. */ + if ((startp != NULL && (startp->name == NULL || ! startp->is_character)) + || (endp != NULL && (endp->name == NULL|| ! endp->is_character))) + { + lr_error (ldfile, _("\ +%s: the start and the end symbol of a range must stand for characters"), + "LC_COLLATE"); + return; + } +#endif + + if (ellipsis == tok_ellipsis3) + { + /* One requirement we make here: the length of the byte + sequences for the first and end character must be the same. + This is mainly to prevent unwanted effects and this is often + not what is wanted. */ + size_t len = (startp->mbs != NULL ? startp->nmbs + : (endp->mbs != NULL ? endp->nmbs : 0)); + char mbcnt[len + 1]; + char mbend[len + 1]; + + /* Well, this should be caught somewhere else already. Just to + make sure. */ + assert (startp == NULL || startp->wcs == NULL || startp->wcs[1] == 0); + assert (endp == NULL || endp->wcs == NULL || endp->wcs[1] == 0); + + if (startp != NULL && endp != NULL + && startp->mbs != NULL && endp->mbs != NULL + && startp->nmbs != endp->nmbs) + { + lr_error (ldfile, _("\ +%s: byte sequences of first and last character must have the same length"), + "LC_COLLATE"); + return; + } + + /* Determine whether we have to generate multibyte sequences. */ + if ((startp == NULL || startp->mbs != NULL) + && (endp == NULL || endp->mbs != NULL)) + { + int cnt; + int ret; + + /* Prepare the beginning byte sequence. This is either from the + beginning byte sequence or it is all nulls if it was an + initial ellipsis. */ + if (startp == NULL || startp->mbs == NULL) + memset (mbcnt, '\0', len); + else + { + memcpy (mbcnt, startp->mbs, len); + + /* And increment it so that the value is the first one we will + try to insert. */ + for (cnt = len - 1; cnt >= 0; --cnt) + if (++mbcnt[cnt] != '\0') + break; + } + mbcnt[len] = '\0'; + + /* And the end sequence. */ + if (endp == NULL || endp->mbs == NULL) + memset (mbend, '\0', len); + else + memcpy (mbend, endp->mbs, len); + mbend[len] = '\0'; + + /* Test whether we have a correct range. */ + ret = memcmp (mbcnt, mbend, len); + if (ret >= 0) + { + if (ret > 0) + lr_error (ldfile, _("%s: byte sequence of first character of \ +range is not lower than that of the last character"), "LC_COLLATE"); + return; + } + + /* Generate the byte sequences data. */ + while (1) + { + struct charseq *seq; + + /* Quite a bit of work ahead. We have to find the character + definition for the byte sequence and then determine the + wide character belonging to it. */ + seq = charmap_find_symbol (charmap, mbcnt, len); + if (seq != NULL) + { + struct element_t *elem; + size_t namelen; + + /* I don't think this can ever happen. */ + assert (seq->name != NULL); + namelen = strlen (seq->name); + + if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE) + seq->ucs4 = repertoire_find_value (repertoire, seq->name, + namelen); + + /* Now we are ready to insert the new value in the + sequence. Find out whether the element is + already known. */ + void *ptr; + if (find_entry (&collate->seq_table, seq->name, namelen, + &ptr) != 0) + { + uint32_t wcs[2] = { seq->ucs4, 0 }; + + /* We have to allocate an entry. */ + elem = new_element (collate, mbcnt, len, + seq->ucs4 == ILLEGAL_CHAR_VALUE + ? NULL : wcs, seq->name, + namelen, 1); + + /* And add it to the table. */ + if (insert_entry (&collate->seq_table, seq->name, + namelen, elem) != 0) + /* This cannot happen. */ + assert (! "Internal error"); + } + else + /* Copy the result. */ + elem = ptr; + + /* Test whether this element is not already in the list. */ + if (elem->next != NULL || (collate->cursor != NULL + && elem->next == collate->cursor)) + { + lr_error (ldfile, _("\ +order for `%.*s' already defined at %s:%Zu"), + (int) namelen, seq->name, + elem->file, elem->line); + goto increment; + } + + /* Enqueue the new element. */ + elem->last = collate->cursor; + if (collate->cursor == NULL) + elem->next = NULL; + else + { + elem->next = collate->cursor->next; + elem->last->next = elem; + if (elem->next != NULL) + elem->next->last = elem; + } + if (collate->start == NULL) + { + assert (collate->cursor == NULL); + collate->start = elem; + } + collate->cursor = elem; + + /* Add the weight value. We take them from the + `ellipsis_weights' member of `collate'. */ + elem->weights = (struct element_list_t *) + obstack_alloc (&collate->mempool, + nrules * sizeof (struct element_list_t)); + for (cnt = 0; cnt < nrules; ++cnt) + if (collate->ellipsis_weight.weights[cnt].cnt == 1 + && (collate->ellipsis_weight.weights[cnt].w[0] + == ELEMENT_ELLIPSIS2)) + { + elem->weights[cnt].w = (struct element_t **) + obstack_alloc (&collate->mempool, + sizeof (struct element_t *)); + elem->weights[cnt].w[0] = elem; + elem->weights[cnt].cnt = 1; + } + else + { + /* Simply use the weight from `ellipsis_weight'. */ + elem->weights[cnt].w = + collate->ellipsis_weight.weights[cnt].w; + elem->weights[cnt].cnt = + collate->ellipsis_weight.weights[cnt].cnt; + } + } + + /* Increment for the next round. */ + increment: + for (cnt = len - 1; cnt >= 0; --cnt) + if (++mbcnt[cnt] != '\0') + break; + + /* Find out whether this was all. */ + if (cnt < 0 || memcmp (mbcnt, mbend, len) >= 0) + /* Yep, that's all. */ + break; + } + } + } + else + { + /* For symbolic range we naturally must have a beginning and an + end specified by the user. */ + if (startp == NULL) + lr_error (ldfile, _("\ +%s: symbolic range ellipsis must not directly follow `order_start'"), + "LC_COLLATE"); + else if (endp == NULL) + lr_error (ldfile, _("\ +%s: symbolic range ellipsis must not be directly followed by `order_end'"), + "LC_COLLATE"); + else + { + /* Determine the range. To do so we have to determine the + common prefix of the both names and then the numeric + values of both ends. */ + size_t lenfrom = strlen (startp->name); + size_t lento = strlen (endp->name); + char buf[lento + 1]; + int preflen = 0; + long int from; + long int to; + char *cp; + int base = ellipsis == tok_ellipsis2 ? 16 : 10; + + if (lenfrom != lento) + { + invalid_range: + lr_error (ldfile, _("\ +`%s' and `%.*s' are not valid names for symbolic range"), + startp->name, (int) lento, endp->name); + return; + } + + while (startp->name[preflen] == endp->name[preflen]) + if (startp->name[preflen] == '\0') + /* Nothing to be done. The start and end point are identical + and while inserting the end point we have already given + the user an error message. */ + return; + else + ++preflen; + + errno = 0; + from = strtol (startp->name + preflen, &cp, base); + if ((from == UINT_MAX && errno == ERANGE) || *cp != '\0') + goto invalid_range; + + errno = 0; + to = strtol (endp->name + preflen, &cp, base); + if ((to == UINT_MAX && errno == ERANGE) || *cp != '\0') + goto invalid_range; + + /* Copy the prefix. */ + memcpy (buf, startp->name, preflen); + + /* Loop over all values. */ + for (++from; from < to; ++from) + { + struct element_t *elem = NULL; + struct charseq *seq; + uint32_t wc; + int cnt; + + /* Generate the name. */ + sprintf (buf + preflen, base == 10 ? "%0*ld" : "%0*lX", + (int) (lenfrom - preflen), from); + + /* Look whether this name is already defined. */ + void *ptr; + if (find_entry (&collate->seq_table, buf, symlen, &ptr) == 0) + { + /* Copy back the result. */ + elem = ptr; + + if (elem->next != NULL || (collate->cursor != NULL + && elem->next == collate->cursor)) + { + lr_error (ldfile, _("\ +%s: order for `%.*s' already defined at %s:%Zu"), + "LC_COLLATE", (int) lenfrom, buf, + elem->file, elem->line); + continue; + } + + if (elem->name == NULL) + { + lr_error (ldfile, _("%s: `%s' must be a character"), + "LC_COLLATE", buf); + continue; + } + } + + if (elem == NULL || (elem->mbs == NULL && elem->wcs == NULL)) + { + /* Search for a character of this name. */ + seq = charmap_find_value (charmap, buf, lenfrom); + if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE) + { + wc = repertoire_find_value (repertoire, buf, lenfrom); + + if (seq != NULL) + seq->ucs4 = wc; + } + else + wc = seq->ucs4; + + if (wc == ILLEGAL_CHAR_VALUE && seq == NULL) + /* We don't know anything about a character with this + name. XXX Should we warn? */ + continue; + + if (elem == NULL) + { + uint32_t wcs[2] = { wc, 0 }; + + /* We have to allocate an entry. */ + elem = new_element (collate, + seq != NULL + ? (char *) seq->bytes : NULL, + seq != NULL ? seq->nbytes : 0, + wc == ILLEGAL_CHAR_VALUE + ? NULL : wcs, buf, lenfrom, 1); + } + else + { + /* Update the element. */ + if (seq != NULL) + { + elem->mbs = obstack_copy0 (&collate->mempool, + seq->bytes, seq->nbytes); + elem->nmbs = seq->nbytes; + } + + if (wc != ILLEGAL_CHAR_VALUE) + { + uint32_t zero = 0; + + obstack_grow (&collate->mempool, + &wc, sizeof (uint32_t)); + obstack_grow (&collate->mempool, + &zero, sizeof (uint32_t)); + elem->wcs = obstack_finish (&collate->mempool); + elem->nwcs = 1; + } + } + + elem->file = ldfile->fname; + elem->line = ldfile->lineno; + elem->section = collate->current_section; + } + + /* Enqueue the new element. */ + elem->last = collate->cursor; + elem->next = collate->cursor->next; + elem->last->next = elem; + if (elem->next != NULL) + elem->next->last = elem; + collate->cursor = elem; + + /* Now add the weights. They come from the `ellipsis_weights' + member of `collate'. */ + elem->weights = (struct element_list_t *) + obstack_alloc (&collate->mempool, + nrules * sizeof (struct element_list_t)); + for (cnt = 0; cnt < nrules; ++cnt) + if (collate->ellipsis_weight.weights[cnt].cnt == 1 + && (collate->ellipsis_weight.weights[cnt].w[0] + == ELEMENT_ELLIPSIS2)) + { + elem->weights[cnt].w = (struct element_t **) + obstack_alloc (&collate->mempool, + sizeof (struct element_t *)); + elem->weights[cnt].w[0] = elem; + elem->weights[cnt].cnt = 1; + } + else + { + /* Simly use the weight from `ellipsis_weight'. */ + elem->weights[cnt].w = + collate->ellipsis_weight.weights[cnt].w; + elem->weights[cnt].cnt = + collate->ellipsis_weight.weights[cnt].cnt; + } + } + } + } +} + + +static void +collate_startup (struct linereader *ldfile, struct localedef_t *locale, + struct localedef_t *copy_locale, int ignore_content) +{ + if (!ignore_content && locale->categories[LC_COLLATE].collate == NULL) + { + struct locale_collate_t *collate; + + if (copy_locale == NULL) + { + collate = locale->categories[LC_COLLATE].collate = + (struct locale_collate_t *) + xcalloc (1, sizeof (struct locale_collate_t)); + + /* Init the various data structures. */ + init_hash (&collate->elem_table, 100); + init_hash (&collate->sym_table, 100); + init_hash (&collate->seq_table, 500); + obstack_init (&collate->mempool); + + collate->col_weight_max = -1; + } + else + /* Reuse the copy_locale's data structures. */ + collate = locale->categories[LC_COLLATE].collate = + copy_locale->categories[LC_COLLATE].collate; + } + + ldfile->translate_strings = 0; + ldfile->return_widestr = 0; +} + + +void +collate_finish (struct localedef_t *locale, const struct charmap_t *charmap) +{ + /* Now is the time when we can assign the individual collation + values for all the symbols. We have possibly different values + for the wide- and the multibyte-character symbols. This is done + since it might make a difference in the encoding if there is in + some cases no multibyte-character but there are wide-characters. + (The other way around it is not important since theencoded + collation value in the wide-character case is 32 bits wide and + therefore requires no encoding). + + The lowest collation value assigned is 2. Zero is reserved for + the NUL byte terminating the strings in the `strxfrm'/`wcsxfrm' + functions and 1 is used to separate the individual passes for the + different rules. + + We also have to construct is list with all the bytes/words which + can come first in a sequence, followed by all the elements which + also start with this byte/word. The order is reverse which has + among others the important effect that longer strings are located + first in the list. This is required for the output data since + the algorithm used in `strcoll' etc depends on this. + + The multibyte case is easy. We simply sort into an array with + 256 elements. */ + struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate; + int mbact[nrules]; + int wcact; + int mbseqact; + int wcseqact; + struct element_t *runp; + int i; + int need_undefined = 0; + struct section_list *sect; + int ruleidx; + int nr_wide_elems = 0; + + if (collate == NULL) + { + /* No data, no check. */ + if (! be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("No definition for %s category found"), + "LC_COLLATE")); + return; + } + + /* If this assertion is hit change the type in `element_t'. */ + assert (nrules <= sizeof (runp->used_in_level) * 8); + + /* Make sure that the `position' rule is used either in all sections + or in none. */ + for (i = 0; i < nrules; ++i) + for (sect = collate->sections; sect != NULL; sect = sect->next) + if (sect != collate->current_section + && sect->rules != NULL + && ((sect->rules[i] & sort_position) + != (collate->current_section->rules[i] & sort_position))) + { + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: `position' must be used for a specific level in all sections or none"), + "LC_COLLATE")); + break; + } + + /* Find out which elements are used at which level. At the same + time we find out whether we have any undefined symbols. */ + runp = collate->start; + while (runp != NULL) + { + if (runp->mbs != NULL) + { + for (i = 0; i < nrules; ++i) + { + int j; + + for (j = 0; j < runp->weights[i].cnt; ++j) + /* A NULL pointer as the weight means IGNORE. */ + if (runp->weights[i].w[j] != NULL) + { + if (runp->weights[i].w[j]->weights == NULL) + { + WITH_CUR_LOCALE (error_at_line (0, 0, runp->file, + runp->line, + _("symbol `%s' not defined"), + runp->weights[i].w[j]->name)); + + need_undefined = 1; + runp->weights[i].w[j] = &collate->undefined; + } + else + /* Set the bit for the level. */ + runp->weights[i].w[j]->used_in_level |= 1 << i; + } + } + } + + /* Up to the next entry. */ + runp = runp->next; + } + + /* Walk through the list of defined sequences and assign weights. Also + create the data structure which will allow generating the single byte + character based tables. + + Since at each time only the weights for each of the rules are + only compared to other weights for this rule it is possible to + assign more compact weight values than simply counting all + weights in sequence. We can assign weights from 3, one for each + rule individually and only for those elements, which are actually + used for this rule. + + Why is this important? It is not for the wide char table. But + it is for the singlebyte output since here larger numbers have to + be encoded to make it possible to emit the value as a byte + string. */ + for (i = 0; i < nrules; ++i) + mbact[i] = 2; + wcact = 2; + mbseqact = 0; + wcseqact = 0; + runp = collate->start; + while (runp != NULL) + { + /* Determine the order. */ + if (runp->used_in_level != 0) + { + runp->mborder = (int *) obstack_alloc (&collate->mempool, + nrules * sizeof (int)); + + for (i = 0; i < nrules; ++i) + if ((runp->used_in_level & (1 << i)) != 0) + runp->mborder[i] = mbact[i]++; + else + runp->mborder[i] = 0; + } + + if (runp->mbs != NULL) + { + struct element_t **eptr; + struct element_t *lastp = NULL; + + /* Find the point where to insert in the list. */ + eptr = &collate->mbheads[((unsigned char *) runp->mbs)[0]]; + while (*eptr != NULL) + { + if ((*eptr)->nmbs < runp->nmbs) + break; + + if ((*eptr)->nmbs == runp->nmbs) + { + int c = memcmp ((*eptr)->mbs, runp->mbs, runp->nmbs); + + if (c == 0) + { + /* This should not happen. It means that we have + to symbols with the same byte sequence. It is + of course an error. */ + WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr)->file, + (*eptr)->line, + _("\ +symbol `%s' has the same encoding as"), (*eptr)->name); + error_at_line (0, 0, runp->file, + runp->line, + _("symbol `%s'"), + runp->name)); + goto dont_insert; + } + else if (c < 0) + /* Insert it here. */ + break; + } + + /* To the next entry. */ + lastp = *eptr; + eptr = &(*eptr)->mbnext; + } + + /* Set the pointers. */ + runp->mbnext = *eptr; + runp->mblast = lastp; + if (*eptr != NULL) + (*eptr)->mblast = runp; + *eptr = runp; + dont_insert: + ; + } + + if (runp->used_in_level) + { + runp->wcorder = wcact++; + + /* We take the opportunity to count the elements which have + wide characters. */ + ++nr_wide_elems; + } + + if (runp->is_character) + { + if (runp->nmbs == 1) + collate->mbseqorder[((unsigned char *) runp->mbs)[0]] = mbseqact++; + + runp->wcseqorder = wcseqact++; + } + else if (runp->mbs != NULL && runp->weights != NULL) + /* This is for collation elements. */ + runp->wcseqorder = wcseqact++; + + /* Up to the next entry. */ + runp = runp->next; + } + + /* Find out whether any of the `mbheads' entries is unset. In this + case we use the UNDEFINED entry. */ + for (i = 1; i < 256; ++i) + if (collate->mbheads[i] == NULL) + { + need_undefined = 1; + collate->mbheads[i] = &collate->undefined; + } + + /* Now to the wide character case. */ + collate->wcheads.p = 6; + collate->wcheads.q = 10; + wchead_table_init (&collate->wcheads); + + collate->wcseqorder.p = 6; + collate->wcseqorder.q = 10; + collseq_table_init (&collate->wcseqorder); + + /* Start adding. */ + runp = collate->start; + while (runp != NULL) + { + if (runp->wcs != NULL) + { + struct element_t *e; + struct element_t **eptr; + struct element_t *lastp; + + /* Insert the collation sequence value. */ + if (runp->is_character) + collseq_table_add (&collate->wcseqorder, runp->wcs[0], + runp->wcseqorder); + + /* Find the point where to insert in the list. */ + e = wchead_table_get (&collate->wcheads, runp->wcs[0]); + eptr = &e; + lastp = NULL; + while (*eptr != NULL) + { + if ((*eptr)->nwcs < runp->nwcs) + break; + + if ((*eptr)->nwcs == runp->nwcs) + { + int c = wmemcmp ((wchar_t *) (*eptr)->wcs, + (wchar_t *) runp->wcs, runp->nwcs); + + if (c == 0) + { + /* This should not happen. It means that we have + two symbols with the same byte sequence. It is + of course an error. */ + WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr)->file, + (*eptr)->line, + _("\ +symbol `%s' has the same encoding as"), (*eptr)->name); + error_at_line (0, 0, runp->file, + runp->line, + _("symbol `%s'"), + runp->name)); + goto dont_insertwc; + } + else if (c < 0) + /* Insert it here. */ + break; + } + + /* To the next entry. */ + lastp = *eptr; + eptr = &(*eptr)->wcnext; + } + + /* Set the pointers. */ + runp->wcnext = *eptr; + runp->wclast = lastp; + if (*eptr != NULL) + (*eptr)->wclast = runp; + *eptr = runp; + if (eptr == &e) + wchead_table_add (&collate->wcheads, runp->wcs[0], e); + dont_insertwc: + ; + } + + /* Up to the next entry. */ + runp = runp->next; + } + + /* Now determine whether the UNDEFINED entry is needed and if yes, + whether it was defined. */ + collate->undefined.used_in_level = need_undefined ? ~0ul : 0; + if (collate->undefined.file == NULL) + { + if (need_undefined) + { + /* This seems not to be enforced by recent standards. Don't + emit an error, simply append UNDEFINED at the end. */ + if (0) + WITH_CUR_LOCALE (error (0, 0, _("no definition of `UNDEFINED'"))); + + /* Add UNDEFINED at the end. */ + collate->undefined.mborder = + (int *) obstack_alloc (&collate->mempool, nrules * sizeof (int)); + + for (i = 0; i < nrules; ++i) + collate->undefined.mborder[i] = mbact[i]++; + } + + /* In any case we will need the definition for the wide character + case. But we will not complain that it is missing since the + specification strangely enough does not seem to account for + this. */ + collate->undefined.wcorder = wcact++; + } + + /* Finally, try to unify the rules for the sections. Whenever the rules + for a section are the same as those for another section give the + ruleset the same index. Since there are never many section we can + use an O(n^2) algorithm here. */ + sect = collate->sections; + while (sect != NULL && sect->rules == NULL) + sect = sect->next; + + /* Bail out if we have no sections because of earlier errors. */ + if (sect == NULL) + { + WITH_CUR_LOCALE (error (EXIT_FAILURE, 0, + _("too many errors; giving up"))); + return; + } + + ruleidx = 0; + do + { + struct section_list *osect = collate->sections; + + while (osect != sect) + if (osect->rules != NULL + && memcmp (osect->rules, sect->rules, + nrules * sizeof (osect->rules[0])) == 0) + break; + else + osect = osect->next; + + if (osect == sect) + sect->ruleidx = ruleidx++; + else + sect->ruleidx = osect->ruleidx; + + /* Next section. */ + do + sect = sect->next; + while (sect != NULL && sect->rules == NULL); + } + while (sect != NULL); + /* We are currently not prepared for more than 128 rulesets. But this + should never really be a problem. */ + assert (ruleidx <= 128); +} + + +static int32_t +output_weight (struct obstack *pool, struct locale_collate_t *collate, + struct element_t *elem) +{ + size_t cnt; + int32_t retval; + + /* Optimize the use of UNDEFINED. */ + if (elem == &collate->undefined) + /* The weights are already inserted. */ + return 0; + + /* This byte can start exactly one collation element and this is + a single byte. We can directly give the index to the weights. */ + retval = obstack_object_size (pool); + + /* Construct the weight. */ + for (cnt = 0; cnt < nrules; ++cnt) + { + char buf[elem->weights[cnt].cnt * 7]; + int len = 0; + int i; + + for (i = 0; i < elem->weights[cnt].cnt; ++i) + /* Encode the weight value. We do nothing for IGNORE entries. */ + if (elem->weights[cnt].w[i] != NULL) + len += utf8_encode (&buf[len], + elem->weights[cnt].w[i]->mborder[cnt]); + + /* And add the buffer content. */ + obstack_1grow (pool, len); + obstack_grow (pool, buf, len); + } + + return retval | ((elem->section->ruleidx & 0x7f) << 24); +} + + +static int32_t +output_weightwc (struct obstack *pool, struct locale_collate_t *collate, + struct element_t *elem) +{ + size_t cnt; + int32_t retval; + + /* Optimize the use of UNDEFINED. */ + if (elem == &collate->undefined) + /* The weights are already inserted. */ + return 0; + + /* This byte can start exactly one collation element and this is + a single byte. We can directly give the index to the weights. */ + retval = obstack_object_size (pool) / sizeof (int32_t); + + /* Construct the weight. */ + for (cnt = 0; cnt < nrules; ++cnt) + { + int32_t buf[elem->weights[cnt].cnt]; + int i; + int32_t j; + + for (i = 0, j = 0; i < elem->weights[cnt].cnt; ++i) + if (elem->weights[cnt].w[i] != NULL) + buf[j++] = elem->weights[cnt].w[i]->wcorder; + + /* And add the buffer content. */ + obstack_int32_grow (pool, j); + + obstack_grow (pool, buf, j * sizeof (int32_t)); + maybe_swap_uint32_obstack (pool, j); + } + + return retval | ((elem->section->ruleidx & 0x7f) << 24); +} + +/* If localedef is every threaded, this would need to be __thread var. */ +static struct +{ + struct obstack *weightpool; + struct obstack *extrapool; + struct obstack *indpool; + struct locale_collate_t *collate; + struct collidx_table *tablewc; +} atwc; + +static void add_to_tablewc (uint32_t ch, struct element_t *runp); + +static void +add_to_tablewc (uint32_t ch, struct element_t *runp) +{ + if (runp->wcnext == NULL && runp->nwcs == 1) + { + int32_t weigthidx = output_weightwc (atwc.weightpool, atwc.collate, + runp); + collidx_table_add (atwc.tablewc, ch, weigthidx); + } + else + { + /* As for the singlebyte table, we recognize sequences and + compress them. */ + + collidx_table_add (atwc.tablewc, ch, + -(obstack_object_size (atwc.extrapool) + / sizeof (uint32_t))); + + do + { + /* Store the current index in the weight table. We know that + the current position in the `extrapool' is aligned on a + 32-bit address. */ + int32_t weightidx; + int added; + + /* Find out wether this is a single entry or we have more than + one consecutive entry. */ + if (runp->wcnext != NULL + && runp->nwcs == runp->wcnext->nwcs + && wmemcmp ((wchar_t *) runp->wcs, + (wchar_t *)runp->wcnext->wcs, + runp->nwcs - 1) == 0 + && (runp->wcs[runp->nwcs - 1] + == runp->wcnext->wcs[runp->nwcs - 1] + 1)) + { + int i; + struct element_t *series_startp = runp; + struct element_t *curp; + + /* Now add first the initial byte sequence. */ + added = (1 + 1 + 2 * (runp->nwcs - 1)) * sizeof (int32_t); + if (sizeof (int32_t) == sizeof (int)) + obstack_make_room (atwc.extrapool, added); + + /* More than one consecutive entry. We mark this by having + a negative index into the indirect table. */ + obstack_int32_grow_fast (atwc.extrapool, + -(obstack_object_size (atwc.indpool) + / sizeof (int32_t))); + obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - 1); + + do + runp = runp->wcnext; + while (runp->wcnext != NULL + && runp->nwcs == runp->wcnext->nwcs + && wmemcmp ((wchar_t *) runp->wcs, + (wchar_t *)runp->wcnext->wcs, + runp->nwcs - 1) == 0 + && (runp->wcs[runp->nwcs - 1] + == runp->wcnext->wcs[runp->nwcs - 1] + 1)); + + /* Now walk backward from here to the beginning. */ + curp = runp; + + for (i = 1; i < runp->nwcs; ++i) + obstack_int32_grow_fast (atwc.extrapool, curp->wcs[i]); + + /* Now find the end of the consecutive sequence and + add all the indeces in the indirect pool. */ + do + { + weightidx = output_weightwc (atwc.weightpool, atwc.collate, + curp); + obstack_int32_grow (atwc.indpool, weightidx); + + curp = curp->wclast; + } + while (curp != series_startp); + + /* Add the final weight. */ + weightidx = output_weightwc (atwc.weightpool, atwc.collate, + curp); + obstack_int32_grow (atwc.indpool, weightidx); + + /* And add the end byte sequence. Without length this + time. */ + for (i = 1; i < curp->nwcs; ++i) + obstack_int32_grow (atwc.extrapool, curp->wcs[i]); + } + else + { + /* A single entry. Simply add the index and the length and + string (except for the first character which is already + tested for). */ + int i; + + /* Output the weight info. */ + weightidx = output_weightwc (atwc.weightpool, atwc.collate, + runp); + + assert (runp->nwcs > 0); + added = (1 + 1 + runp->nwcs - 1) * sizeof (int32_t); + if (sizeof (int) == sizeof (int32_t)) + obstack_make_room (atwc.extrapool, added); + + obstack_int32_grow_fast (atwc.extrapool, weightidx); + obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - 1); + for (i = 1; i < runp->nwcs; ++i) + obstack_int32_grow_fast (atwc.extrapool, runp->wcs[i]); + } + + /* Next entry. */ + runp = runp->wcnext; + } + while (runp != NULL); + } +} + +void +collate_output (struct localedef_t *locale, const struct charmap_t *charmap, + const char *output_path) +{ + struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate; + const size_t nelems = _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE); + struct locale_file file; + size_t ch; + int32_t tablemb[256]; + struct obstack weightpool; + struct obstack extrapool; + struct obstack indirectpool; + struct section_list *sect; + struct collidx_table tablewc; + uint32_t elem_size; + uint32_t *elem_table; + int i; + struct element_t *runp; + + init_locale_data (&file, nelems); + add_locale_uint32 (&file, nrules); + + /* If we have no LC_COLLATE data emit only the number of rules as zero. */ + if (collate == NULL) + { + size_t idx; + for (idx = 1; idx < nelems; idx++) + { + /* The words have to be handled specially. */ + if (idx == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB)) + add_locale_uint32 (&file, 0); + else + add_locale_empty (&file); + } + write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", &file); + return; + } + + obstack_init (&weightpool); + obstack_init (&extrapool); + obstack_init (&indirectpool); + + /* Since we are using the sign of an integer to mark indirection the + offsets in the arrays we are indirectly referring to must not be + zero since -0 == 0. Therefore we add a bit of dummy content. */ + obstack_int32_grow (&extrapool, 0); + obstack_int32_grow (&indirectpool, 0); + + /* Prepare the ruleset table. */ + for (sect = collate->sections, i = 0; sect != NULL; sect = sect->next) + if (sect->rules != NULL && sect->ruleidx == i) + { + int j; + + obstack_make_room (&weightpool, nrules); + + for (j = 0; j < nrules; ++j) + obstack_1grow_fast (&weightpool, sect->rules[j]); + ++i; + } + /* And align the output. */ + i = (nrules * i) % LOCFILE_ALIGN; + if (i > 0) + do + obstack_1grow (&weightpool, '\0'); + while (++i < LOCFILE_ALIGN); + + add_locale_raw_obstack (&file, &weightpool); + + /* Generate the 8-bit table. Walk through the lists of sequences + starting with the same byte and add them one after the other to + the table. In case we have more than one sequence starting with + the same byte we have to use extra indirection. + + First add a record for the NUL byte. This entry will never be used + so it does not matter. */ + tablemb[0] = 0; + + /* Now insert the `UNDEFINED' value if it is used. Since this value + will probably be used more than once it is good to store the + weights only once. */ + if (collate->undefined.used_in_level != 0) + output_weight (&weightpool, collate, &collate->undefined); + + for (ch = 1; ch < 256; ++ch) + if (collate->mbheads[ch]->mbnext == NULL + && collate->mbheads[ch]->nmbs <= 1) + { + tablemb[ch] = output_weight (&weightpool, collate, + collate->mbheads[ch]); + } + else + { + /* The entries in the list are sorted by length and then + alphabetically. This is the order in which we will add the + elements to the collation table. This allows simply walking + the table in sequence and stopping at the first matching + entry. Since the longer sequences are coming first in the + list they have the possibility to match first, just as it + has to be. In the worst case we are walking to the end of + the list where we put, if no singlebyte sequence is defined + in the locale definition, the weights for UNDEFINED. + + To reduce the length of the search list we compress them a bit. + This happens by collecting sequences of consecutive byte + sequences in one entry (having and begin and end byte sequence) + and add only one index into the weight table. We can find the + consecutive entries since they are also consecutive in the list. */ + struct element_t *runp = collate->mbheads[ch]; + struct element_t *lastp; + + assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool))); + + tablemb[ch] = -obstack_object_size (&extrapool); + + do + { + /* Store the current index in the weight table. We know that + the current position in the `extrapool' is aligned on a + 32-bit address. */ + int32_t weightidx; + int added; + + /* Find out wether this is a single entry or we have more than + one consecutive entry. */ + if (runp->mbnext != NULL + && runp->nmbs == runp->mbnext->nmbs + && memcmp (runp->mbs, runp->mbnext->mbs, runp->nmbs - 1) == 0 + && (runp->mbs[runp->nmbs - 1] + == runp->mbnext->mbs[runp->nmbs - 1] + 1)) + { + int i; + struct element_t *series_startp = runp; + struct element_t *curp; + + /* Compute how much space we will need. */ + added = LOCFILE_ALIGN_UP (sizeof (int32_t) + 1 + + 2 * (runp->nmbs - 1)); + assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool))); + obstack_make_room (&extrapool, added); + + /* More than one consecutive entry. We mark this by having + a negative index into the indirect table. */ + obstack_int32_grow_fast (&extrapool, + -(obstack_object_size (&indirectpool) + / sizeof (int32_t))); + + /* Now search first the end of the series. */ + do + runp = runp->mbnext; + while (runp->mbnext != NULL + && runp->nmbs == runp->mbnext->nmbs + && memcmp (runp->mbs, runp->mbnext->mbs, + runp->nmbs - 1) == 0 + && (runp->mbs[runp->nmbs - 1] + == runp->mbnext->mbs[runp->nmbs - 1] + 1)); + + /* Now walk backward from here to the beginning. */ + curp = runp; + + assert (runp->nmbs <= 256); + obstack_1grow_fast (&extrapool, curp->nmbs - 1); + for (i = 1; i < curp->nmbs; ++i) + obstack_1grow_fast (&extrapool, curp->mbs[i]); + + /* Now find the end of the consecutive sequence and + add all the indeces in the indirect pool. */ + do + { + weightidx = output_weight (&weightpool, collate, curp); + obstack_int32_grow (&indirectpool, weightidx); + + curp = curp->mblast; + } + while (curp != series_startp); + + /* Add the final weight. */ + weightidx = output_weight (&weightpool, collate, curp); + obstack_int32_grow (&indirectpool, weightidx); + + /* And add the end byte sequence. Without length this + time. */ + for (i = 1; i < curp->nmbs; ++i) + obstack_1grow_fast (&extrapool, curp->mbs[i]); + } + else + { + /* A single entry. Simply add the index and the length and + string (except for the first character which is already + tested for). */ + int i; + + /* Output the weight info. */ + weightidx = output_weight (&weightpool, collate, runp); + + added = LOCFILE_ALIGN_UP (sizeof (int32_t) + 1 + + runp->nmbs - 1); + assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool))); + obstack_make_room (&extrapool, added); + + obstack_int32_grow_fast (&extrapool, weightidx); + assert (runp->nmbs <= 256); + obstack_1grow_fast (&extrapool, runp->nmbs - 1); + + for (i = 1; i < runp->nmbs; ++i) + obstack_1grow_fast (&extrapool, runp->mbs[i]); + } + + /* Add alignment bytes if necessary. */ + while (!LOCFILE_ALIGNED_P (obstack_object_size (&extrapool))) + obstack_1grow_fast (&extrapool, '\0'); + + /* Next entry. */ + lastp = runp; + runp = runp->mbnext; + } + while (runp != NULL); + + assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool))); + + /* If the final entry in the list is not a single character we + add an UNDEFINED entry here. */ + if (lastp->nmbs != 1) + { + int added = LOCFILE_ALIGN_UP (sizeof (int32_t) + 1 + 1); + obstack_make_room (&extrapool, added); + + obstack_int32_grow_fast (&extrapool, 0); + /* XXX What rule? We just pick the first. */ + obstack_1grow_fast (&extrapool, 0); + /* Length is zero. */ + obstack_1grow_fast (&extrapool, 0); + + /* Add alignment bytes if necessary. */ + while (!LOCFILE_ALIGNED_P (obstack_object_size (&extrapool))) + obstack_1grow_fast (&extrapool, '\0'); + } + } + + /* Add padding to the tables if necessary. */ + while (!LOCFILE_ALIGNED_P (obstack_object_size (&weightpool))) + obstack_1grow (&weightpool, 0); + + /* Now add the four tables. */ + add_locale_uint32_array (&file, (const uint32_t *) tablemb, 256); + add_locale_raw_obstack (&file, &weightpool); + add_locale_raw_obstack (&file, &extrapool); + add_locale_raw_obstack (&file, &indirectpool); + + /* Now the same for the wide character table. We need to store some + more information here. */ + add_locale_empty (&file); + add_locale_empty (&file); + add_locale_empty (&file); + + /* Since we are using the sign of an integer to mark indirection the + offsets in the arrays we are indirectly referring to must not be + zero since -0 == 0. Therefore we add a bit of dummy content. */ + obstack_int32_grow (&extrapool, 0); + obstack_int32_grow (&indirectpool, 0); + + /* Now insert the `UNDEFINED' value if it is used. Since this value + will probably be used more than once it is good to store the + weights only once. */ + if (output_weightwc (&weightpool, collate, &collate->undefined) != 0) + abort (); + + /* Generate the table. Walk through the lists of sequences starting + with the same wide character and add them one after the other to + the table. In case we have more than one sequence starting with + the same byte we have to use extra indirection. */ + tablewc.p = 6; + tablewc.q = 10; + collidx_table_init (&tablewc); + + atwc.weightpool = &weightpool; + atwc.extrapool = &extrapool; + atwc.indpool = &indirectpool; + atwc.collate = collate; + atwc.tablewc = &tablewc; + + wchead_table_iterate (&collate->wcheads, add_to_tablewc); + + memset (&atwc, 0, sizeof (atwc)); + + /* Now add the four tables. */ + add_locale_collidx_table (&file, &tablewc); + add_locale_raw_obstack (&file, &weightpool); + add_locale_raw_obstack (&file, &extrapool); + add_locale_raw_obstack (&file, &indirectpool); + + /* Finally write the table with collation element names out. It is + a hash table with a simple function which gets the name of the + character as the input. One character might have many names. The + value associated with the name is an index into the weight table + where we are then interested in the first-level weight value. + + To determine how large the table should be we are counting the + elements have to put in. Since we are using internal chaining + using a secondary hash function we have to make the table a bit + larger to avoid extremely long search times. We can achieve + good results with a 40% larger table than there are entries. */ + elem_size = 0; + runp = collate->start; + while (runp != NULL) + { + if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character) + /* Yep, the element really counts. */ + ++elem_size; + + runp = runp->next; + } + /* Add 40% and find the next prime number. */ + elem_size = next_prime (elem_size * 1.4); + + /* Allocate the table. Each entry consists of two words: the hash + value and an index in a secondary table which provides the index + into the weight table and the string itself (so that a match can + be determined). */ + elem_table = (uint32_t *) obstack_alloc (&extrapool, + elem_size * 2 * sizeof (uint32_t)); + memset (elem_table, '\0', elem_size * 2 * sizeof (uint32_t)); + + /* Now add the elements. */ + runp = collate->start; + while (runp != NULL) + { + if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character) + { + /* Compute the hash value of the name. */ + uint32_t namelen = strlen (runp->name); + uint32_t hash = elem_hash (runp->name, namelen); + size_t idx = hash % elem_size; +#ifndef NDEBUG + size_t start_idx = idx; +#endif + + if (elem_table[idx * 2] != 0) + { + /* The spot is already taken. Try iterating using the value + from the secondary hashing function. */ + size_t iter = hash % (elem_size - 2) + 1; + + do + { + idx += iter; + if (idx >= elem_size) + idx -= elem_size; + assert (idx != start_idx); + } + while (elem_table[idx * 2] != 0); + } + /* This is the spot where we will insert the value. */ + elem_table[idx * 2] = hash; + elem_table[idx * 2 + 1] = obstack_object_size (&extrapool); + + /* The string itself including length. */ + obstack_1grow (&extrapool, namelen); + obstack_grow (&extrapool, runp->name, namelen); + + /* And the multibyte representation. */ + obstack_1grow (&extrapool, runp->nmbs); + obstack_grow (&extrapool, runp->mbs, runp->nmbs); + + /* And align again to 32 bits. */ + if ((1 + namelen + 1 + runp->nmbs) % sizeof (int32_t) != 0) + obstack_grow (&extrapool, "\0\0", + (sizeof (int32_t) + - ((1 + namelen + 1 + runp->nmbs) + % sizeof (int32_t)))); + + /* Now some 32-bit values: multibyte collation sequence, + wide char string (including length), and wide char + collation sequence. */ + obstack_int32_grow (&extrapool, runp->mbseqorder); + + obstack_int32_grow (&extrapool, runp->nwcs); + obstack_grow (&extrapool, runp->wcs, + runp->nwcs * sizeof (uint32_t)); + maybe_swap_uint32_obstack (&extrapool, runp->nwcs); + + obstack_int32_grow (&extrapool, runp->wcseqorder); + } + + runp = runp->next; + } + + /* Prepare to write out this data. */ + add_locale_uint32 (&file, elem_size); + add_locale_uint32_array (&file, elem_table, 2 * elem_size); + add_locale_raw_obstack (&file, &extrapool); + add_locale_raw_data (&file, collate->mbseqorder, 256); + add_locale_collseq_table (&file, &collate->wcseqorder); + add_locale_string (&file, charmap->code_set_name); + write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", &file); + + obstack_free (&weightpool, NULL); + obstack_free (&extrapool, NULL); + obstack_free (&indirectpool, NULL); +} + + +static enum token_t +skip_to (struct linereader *ldfile, struct locale_collate_t *collate, + const struct charmap_t *charmap, int to_endif) +{ + while (1) + { + struct token *now = lr_token (ldfile, charmap, NULL, NULL, 0); + enum token_t nowtok = now->tok; + + if (nowtok == tok_eof || nowtok == tok_end) + return nowtok; + + if (nowtok == tok_ifdef || nowtok == tok_ifndef) + { + lr_error (ldfile, _("%s: nested conditionals not supported"), + "LC_COLLATE"); + nowtok = skip_to (ldfile, collate, charmap, tok_endif); + if (nowtok == tok_eof || nowtok == tok_end) + return nowtok; + } + else if (nowtok == tok_endif || (!to_endif && nowtok == tok_else)) + { + lr_ignore_rest (ldfile, 1); + return nowtok; + } + else if (!to_endif && (nowtok == tok_elifdef || nowtok == tok_elifndef)) + { + /* Do not read the rest of the line. */ + return nowtok; + } + else if (nowtok == tok_else) + { + lr_error (ldfile, _("%s: more than one 'else'"), "LC_COLLATE"); + } + + lr_ignore_rest (ldfile, 0); + } +} + + +void +collate_read (struct linereader *ldfile, struct localedef_t *result, + const struct charmap_t *charmap, const char *repertoire_name, + int ignore_content) +{ + struct repertoire_t *repertoire = NULL; + struct locale_collate_t *collate; + struct token *now; + struct token *arg = NULL; + enum token_t nowtok; + enum token_t was_ellipsis = tok_none; + struct localedef_t *copy_locale = NULL; + /* Parsing state: + 0 - start + 1 - between `order-start' and `order-end' + 2 - after `order-end' + 3 - after `reorder-after', waiting for `reorder-end' + 4 - after `reorder-end' + 5 - after `reorder-sections-after', waiting for `reorder-sections-end' + 6 - after `reorder-sections-end' + */ + int state = 0; + + /* Get the repertoire we have to use. */ + if (repertoire_name != NULL) + repertoire = repertoire_read (repertoire_name); + + /* The rest of the line containing `LC_COLLATE' must be free. */ + lr_ignore_rest (ldfile, 1); + + while (1) + { + do + { + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + } + while (nowtok == tok_eol); + + if (nowtok != tok_define) + break; + + if (ignore_content) + lr_ignore_rest (ldfile, 0); + else + { + arg = lr_token (ldfile, charmap, result, NULL, verbose); + if (arg->tok != tok_ident) + SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE"); + else + { + /* Simply add the new symbol. */ + struct name_list *newsym = xmalloc (sizeof (*newsym) + + arg->val.str.lenmb + 1); + memcpy (newsym->str, arg->val.str.startmb, arg->val.str.lenmb); + newsym->str[arg->val.str.lenmb] = '\0'; + newsym->next = defined; + defined = newsym; + + lr_ignore_rest (ldfile, 1); + } + } + } + + if (nowtok == tok_copy) + { + now = lr_token (ldfile, charmap, result, NULL, verbose); + if (now->tok != tok_string) + { + SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE"); + + skip_category: + do + now = lr_token (ldfile, charmap, result, NULL, verbose); + while (now->tok != tok_eof && now->tok != tok_end); + + if (now->tok != tok_eof + || (now = lr_token (ldfile, charmap, result, NULL, verbose), + now->tok == tok_eof)) + lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE"); + else if (now->tok != tok_lc_collate) + { + lr_error (ldfile, _("\ +%1$s: definition does not end with `END %1$s'"), "LC_COLLATE"); + lr_ignore_rest (ldfile, 0); + } + else + lr_ignore_rest (ldfile, 1); + + return; + } + + if (! ignore_content) + { + /* Get the locale definition. */ + copy_locale = load_locale (LC_COLLATE, now->val.str.startmb, + repertoire_name, charmap, NULL); + if ((copy_locale->avail & COLLATE_LOCALE) == 0) + { + /* Not yet loaded. So do it now. */ + if (locfile_read (copy_locale, charmap) != 0) + goto skip_category; + } + + if (copy_locale->categories[LC_COLLATE].collate == NULL) + return; + } + + lr_ignore_rest (ldfile, 1); + + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + } + + /* Prepare the data structures. */ + collate_startup (ldfile, result, copy_locale, ignore_content); + collate = result->categories[LC_COLLATE].collate; + + while (1) + { + char ucs4buf[10]; + char *symstr; + size_t symlen; + + /* Of course we don't proceed beyond the end of file. */ + if (nowtok == tok_eof) + break; + + /* Ingore empty lines. */ + if (nowtok == tok_eol) + { + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + continue; + } + + switch (nowtok) + { + case tok_copy: + /* Allow copying other locales. */ + now = lr_token (ldfile, charmap, result, NULL, verbose); + if (now->tok != tok_string) + goto err_label; + + if (! ignore_content) + load_locale (LC_COLLATE, now->val.str.startmb, repertoire_name, + charmap, result); + + lr_ignore_rest (ldfile, 1); + break; + + case tok_coll_weight_max: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + if (state != 0) + goto err_label; + + arg = lr_token (ldfile, charmap, result, NULL, verbose); + if (arg->tok != tok_number) + goto err_label; + if (collate->col_weight_max != -1) + lr_error (ldfile, _("%s: duplicate definition of `%s'"), + "LC_COLLATE", "col_weight_max"); + else + collate->col_weight_max = arg->val.num; + lr_ignore_rest (ldfile, 1); + break; + + case tok_section_symbol: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + if (state != 0) + goto err_label; + + arg = lr_token (ldfile, charmap, result, repertoire, verbose); + if (arg->tok != tok_bsymbol) + goto err_label; + else if (!ignore_content) + { + /* Check whether this section is already known. */ + struct section_list *known = collate->sections; + while (known != NULL) + { + if (strcmp (known->name, arg->val.str.startmb) == 0) + break; + known = known->next; + } + + if (known != NULL) + { + lr_error (ldfile, + _("%s: duplicate declaration of section `%s'"), + "LC_COLLATE", arg->val.str.startmb); + free (arg->val.str.startmb); + } + else + collate->sections = make_seclist_elem (collate, + arg->val.str.startmb, + collate->sections); + + lr_ignore_rest (ldfile, known == NULL); + } + else + { + free (arg->val.str.startmb); + lr_ignore_rest (ldfile, 0); + } + break; + + case tok_collating_element: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + if (state != 0 && state != 2) + goto err_label; + + arg = lr_token (ldfile, charmap, result, repertoire, verbose); + if (arg->tok != tok_bsymbol) + goto err_label; + else + { + const char *symbol = arg->val.str.startmb; + size_t symbol_len = arg->val.str.lenmb; + + /* Next the `from' keyword. */ + arg = lr_token (ldfile, charmap, result, repertoire, verbose); + if (arg->tok != tok_from) + { + free ((char *) symbol); + goto err_label; + } + + ldfile->return_widestr = 1; + ldfile->translate_strings = 1; + + /* Finally the string with the replacement. */ + arg = lr_token (ldfile, charmap, result, repertoire, verbose); + + ldfile->return_widestr = 0; + ldfile->translate_strings = 0; + + if (arg->tok != tok_string) + goto err_label; + + if (!ignore_content && symbol != NULL) + { + /* The name is already defined. */ + if (check_duplicate (ldfile, collate, charmap, + repertoire, symbol, symbol_len)) + goto col_elem_free; + + if (arg->val.str.startmb != NULL) + insert_entry (&collate->elem_table, symbol, symbol_len, + new_element (collate, + arg->val.str.startmb, + arg->val.str.lenmb - 1, + arg->val.str.startwc, + symbol, symbol_len, 0)); + } + else + { + col_elem_free: + free ((char *) symbol); + free (arg->val.str.startmb); + free (arg->val.str.startwc); + } + lr_ignore_rest (ldfile, 1); + } + break; + + case tok_collating_symbol: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + if (state != 0 && state != 2) + goto err_label; + + arg = lr_token (ldfile, charmap, result, repertoire, verbose); + if (arg->tok != tok_bsymbol) + goto err_label; + else + { + char *symbol = arg->val.str.startmb; + size_t symbol_len = arg->val.str.lenmb; + char *endsymbol = NULL; + size_t endsymbol_len = 0; + enum token_t ellipsis = tok_none; + + arg = lr_token (ldfile, charmap, result, repertoire, verbose); + if (arg->tok == tok_ellipsis2 || arg->tok == tok_ellipsis4) + { + ellipsis = arg->tok; + + arg = lr_token (ldfile, charmap, result, repertoire, + verbose); + if (arg->tok != tok_bsymbol) + { + free (symbol); + goto err_label; + } + + endsymbol = arg->val.str.startmb; + endsymbol_len = arg->val.str.lenmb; + + lr_ignore_rest (ldfile, 1); + } + else if (arg->tok != tok_eol) + { + free (symbol); + goto err_label; + } + + if (!ignore_content) + { + if (symbol == NULL + || (ellipsis != tok_none && endsymbol == NULL)) + { + lr_error (ldfile, _("\ +%s: unknown character in collating symbol name"), + "LC_COLLATE"); + goto col_sym_free; + } + else if (ellipsis == tok_none) + { + /* A single symbol, no ellipsis. */ + if (check_duplicate (ldfile, collate, charmap, + repertoire, symbol, symbol_len)) + /* The name is already defined. */ + goto col_sym_free; + + insert_entry (&collate->sym_table, symbol, symbol_len, + new_symbol (collate, symbol, symbol_len)); + } + else if (symbol_len != endsymbol_len) + { + col_sym_inv_range: + lr_error (ldfile, + _("invalid names for character range")); + goto col_sym_free; + } + else + { + /* Oh my, we have to handle an ellipsis. First, as + usual, determine the common prefix and then + convert the rest into a range. */ + size_t prefixlen; + unsigned long int from; + unsigned long int to; + char *endp; + + for (prefixlen = 0; prefixlen < symbol_len; ++prefixlen) + if (symbol[prefixlen] != endsymbol[prefixlen]) + break; + + /* Convert the rest into numbers. */ + symbol[symbol_len] = '\0'; + from = strtoul (&symbol[prefixlen], &endp, + ellipsis == tok_ellipsis2 ? 16 : 10); + if (*endp != '\0') + goto col_sym_inv_range; + + endsymbol[symbol_len] = '\0'; + to = strtoul (&endsymbol[prefixlen], &endp, + ellipsis == tok_ellipsis2 ? 16 : 10); + if (*endp != '\0') + goto col_sym_inv_range; + + if (from > to) + goto col_sym_inv_range; + + /* Now loop over all entries. */ + while (from <= to) + { + char *symbuf; + + symbuf = (char *) obstack_alloc (&collate->mempool, + symbol_len + 1); + + /* Create the name. */ + sprintf (symbuf, + ellipsis == tok_ellipsis2 + ? "%.*s%.*lX" : "%.*s%.*lu", + (int) prefixlen, symbol, + (int) (symbol_len - prefixlen), from); + + if (check_duplicate (ldfile, collate, charmap, + repertoire, symbuf, symbol_len)) + /* The name is already defined. */ + goto col_sym_free; + + insert_entry (&collate->sym_table, symbuf, + symbol_len, + new_symbol (collate, symbuf, + symbol_len)); + + /* Increment the counter. */ + ++from; + } + + goto col_sym_free; + } + } + else + { + col_sym_free: + free (symbol); + free (endsymbol); + } + } + break; + + case tok_symbol_equivalence: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + if (state != 0) + goto err_label; + + arg = lr_token (ldfile, charmap, result, repertoire, verbose); + if (arg->tok != tok_bsymbol) + goto err_label; + else + { + const char *newname = arg->val.str.startmb; + size_t newname_len = arg->val.str.lenmb; + const char *symname; + size_t symname_len; + void *symval; /* Actually struct symbol_t* */ + + arg = lr_token (ldfile, charmap, result, repertoire, verbose); + if (arg->tok != tok_bsymbol) + { + free ((char *) newname); + goto err_label; + } + + symname = arg->val.str.startmb; + symname_len = arg->val.str.lenmb; + + if (newname == NULL) + { + lr_error (ldfile, _("\ +%s: unknown character in equivalent definition name"), + "LC_COLLATE"); + + sym_equiv_free: + free ((char *) newname); + free ((char *) symname); + break; + } + if (symname == NULL) + { + lr_error (ldfile, _("\ +%s: unknown character in equivalent definition value"), + "LC_COLLATE"); + goto sym_equiv_free; + } + + /* See whether the symbol name is already defined. */ + if (find_entry (&collate->sym_table, symname, symname_len, + &symval) != 0) + { + lr_error (ldfile, _("\ +%s: unknown symbol `%s' in equivalent definition"), + "LC_COLLATE", symname); + goto sym_equiv_free; + } + + if (insert_entry (&collate->sym_table, + newname, newname_len, symval) < 0) + { + lr_error (ldfile, _("\ +error while adding equivalent collating symbol")); + goto sym_equiv_free; + } + + free ((char *) symname); + } + lr_ignore_rest (ldfile, 1); + break; + + case tok_script: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + /* We get told about the scripts we know. */ + arg = lr_token (ldfile, charmap, result, repertoire, verbose); + if (arg->tok != tok_bsymbol) + goto err_label; + else + { + struct section_list *runp = collate->known_sections; + char *name; + + while (runp != NULL) + if (strncmp (runp->name, arg->val.str.startmb, + arg->val.str.lenmb) == 0 + && runp->name[arg->val.str.lenmb] == '\0') + break; + else + runp = runp->def_next; + + if (runp != NULL) + { + lr_error (ldfile, _("duplicate definition of script `%s'"), + runp->name); + lr_ignore_rest (ldfile, 0); + break; + } + + runp = (struct section_list *) xcalloc (1, sizeof (*runp)); + name = (char *) xmalloc (arg->val.str.lenmb + 1); + memcpy (name, arg->val.str.startmb, arg->val.str.lenmb); + name[arg->val.str.lenmb] = '\0'; + runp->name = name; + + runp->def_next = collate->known_sections; + collate->known_sections = runp; + } + lr_ignore_rest (ldfile, 1); + break; + + case tok_order_start: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + if (state != 0 && state != 1 && state != 2) + goto err_label; + state = 1; + + /* The 14652 draft does not specify whether all `order_start' lines + must contain the same number of sort-rules, but 14651 does. So + we require this here as well. */ + arg = lr_token (ldfile, charmap, result, repertoire, verbose); + if (arg->tok == tok_bsymbol) + { + /* This better should be a section name. */ + struct section_list *sp = collate->known_sections; + while (sp != NULL + && (sp->name == NULL + || strncmp (sp->name, arg->val.str.startmb, + arg->val.str.lenmb) != 0 + || sp->name[arg->val.str.lenmb] != '\0')) + sp = sp->def_next; + + if (sp == NULL) + { + lr_error (ldfile, _("\ +%s: unknown section name `%.*s'"), + "LC_COLLATE", (int) arg->val.str.lenmb, + arg->val.str.startmb); + /* We use the error section. */ + collate->current_section = &collate->error_section; + + if (collate->error_section.first == NULL) + { + /* Insert &collate->error_section at the end of + the collate->sections list. */ + if (collate->sections == NULL) + collate->sections = &collate->error_section; + else + { + sp = collate->sections; + while (sp->next != NULL) + sp = sp->next; + + sp->next = &collate->error_section; + } + collate->error_section.next = NULL; + } + } + else + { + /* One should not be allowed to open the same + section twice. */ + if (sp->first != NULL) + lr_error (ldfile, _("\ +%s: multiple order definitions for section `%s'"), + "LC_COLLATE", sp->name); + else + { + /* Insert sp in the collate->sections list, + right after collate->current_section. */ + if (collate->current_section != NULL) + { + sp->next = collate->current_section->next; + collate->current_section->next = sp; + } + else if (collate->sections == NULL) + /* This is the first section to be defined. */ + collate->sections = sp; + + collate->current_section = sp; + } + + /* Next should come the end of the line or a semicolon. */ + arg = lr_token (ldfile, charmap, result, repertoire, + verbose); + if (arg->tok == tok_eol) + { + uint32_t cnt; + + /* This means we have exactly one rule: `forward'. */ + if (nrules > 1) + lr_error (ldfile, _("\ +%s: invalid number of sorting rules"), + "LC_COLLATE"); + else + nrules = 1; + sp->rules = obstack_alloc (&collate->mempool, + (sizeof (enum coll_sort_rule) + * nrules)); + for (cnt = 0; cnt < nrules; ++cnt) + sp->rules[cnt] = sort_forward; + + /* Next line. */ + break; + } + + /* Get the next token. */ + arg = lr_token (ldfile, charmap, result, repertoire, + verbose); + } + } + else + { + /* There is no section symbol. Therefore we use the unnamed + section. */ + collate->current_section = &collate->unnamed_section; + + if (collate->unnamed_section_defined) + lr_error (ldfile, _("\ +%s: multiple order definitions for unnamed section"), + "LC_COLLATE"); + else + { + /* Insert &collate->unnamed_section at the beginning of + the collate->sections list. */ + collate->unnamed_section.next = collate->sections; + collate->sections = &collate->unnamed_section; + collate->unnamed_section_defined = true; + } + } + + /* Now read the direction names. */ + read_directions (ldfile, arg, charmap, repertoire, result); + + /* From now we need the strings untranslated. */ + ldfile->translate_strings = 0; + break; + + case tok_order_end: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + if (state != 1) + goto err_label; + + /* Handle ellipsis at end of list. */ + if (was_ellipsis != tok_none) + { + handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap, + repertoire, result); + was_ellipsis = tok_none; + } + + state = 2; + lr_ignore_rest (ldfile, 1); + break; + + case tok_reorder_after: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + if (state == 1) + { + lr_error (ldfile, _("%s: missing `order_end' keyword"), + "LC_COLLATE"); + state = 2; + + /* Handle ellipsis at end of list. */ + if (was_ellipsis != tok_none) + { + handle_ellipsis (ldfile, arg->val.str.startmb, + arg->val.str.lenmb, was_ellipsis, charmap, + repertoire, result); + was_ellipsis = tok_none; + } + } + else if (state == 0 && copy_locale == NULL) + goto err_label; + else if (state != 0 && state != 2 && state != 3) + goto err_label; + state = 3; + + arg = lr_token (ldfile, charmap, result, repertoire, verbose); + if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4) + { + /* Find this symbol in the sequence table. */ + char ucsbuf[10]; + char *startmb; + size_t lenmb; + struct element_t *insp; + int no_error = 1; + void *ptr; + + if (arg->tok == tok_bsymbol) + { + startmb = arg->val.str.startmb; + lenmb = arg->val.str.lenmb; + } + else + { + sprintf (ucsbuf, "U%08X", arg->val.ucs4); + startmb = ucsbuf; + lenmb = 9; + } + + if (find_entry (&collate->seq_table, startmb, lenmb, &ptr) == 0) + /* Yes, the symbol exists. Simply point the cursor + to it. */ + collate->cursor = (struct element_t *) ptr; + else + { + struct symbol_t *symbp; + void *ptr; + + if (find_entry (&collate->sym_table, startmb, lenmb, + &ptr) == 0) + { + symbp = ptr; + + if (symbp->order->last != NULL + || symbp->order->next != NULL) + collate->cursor = symbp->order; + else + { + /* This is a collating symbol but its position + is not yet defined. */ + lr_error (ldfile, _("\ +%s: order for collating symbol %.*s not yet defined"), + "LC_COLLATE", (int) lenmb, startmb); + collate->cursor = NULL; + no_error = 0; + } + } + else if (find_entry (&collate->elem_table, startmb, lenmb, + &ptr) == 0) + { + insp = (struct element_t *) ptr; + + if (insp->last != NULL || insp->next != NULL) + collate->cursor = insp; + else + { + /* This is a collating element but its position + is not yet defined. */ + lr_error (ldfile, _("\ +%s: order for collating element %.*s not yet defined"), + "LC_COLLATE", (int) lenmb, startmb); + collate->cursor = NULL; + no_error = 0; + } + } + else + { + /* This is bad. The symbol after which we have to + insert does not exist. */ + lr_error (ldfile, _("\ +%s: cannot reorder after %.*s: symbol not known"), + "LC_COLLATE", (int) lenmb, startmb); + collate->cursor = NULL; + no_error = 0; + } + } + + lr_ignore_rest (ldfile, no_error); + } + else + /* This must not happen. */ + goto err_label; + break; + + case tok_reorder_end: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + break; + + if (state != 3) + goto err_label; + state = 4; + lr_ignore_rest (ldfile, 1); + break; + + case tok_reorder_sections_after: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + if (state == 1) + { + lr_error (ldfile, _("%s: missing `order_end' keyword"), + "LC_COLLATE"); + state = 2; + + /* Handle ellipsis at end of list. */ + if (was_ellipsis != tok_none) + { + handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap, + repertoire, result); + was_ellipsis = tok_none; + } + } + else if (state == 3) + { + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: missing `reorder-end' keyword"), "LC_COLLATE")); + state = 4; + } + else if (state != 2 && state != 4) + goto err_label; + state = 5; + + /* Get the name of the sections we are adding after. */ + arg = lr_token (ldfile, charmap, result, repertoire, verbose); + if (arg->tok == tok_bsymbol) + { + /* Now find a section with this name. */ + struct section_list *runp = collate->sections; + + while (runp != NULL) + { + if (runp->name != NULL + && strlen (runp->name) == arg->val.str.lenmb + && memcmp (runp->name, arg->val.str.startmb, + arg->val.str.lenmb) == 0) + break; + + runp = runp->next; + } + + if (runp != NULL) + collate->current_section = runp; + else + { + /* This is bad. The section after which we have to + reorder does not exist. Therefore we cannot + process the whole rest of this reorder + specification. */ + lr_error (ldfile, _("%s: section `%.*s' not known"), + "LC_COLLATE", (int) arg->val.str.lenmb, + arg->val.str.startmb); + + do + { + lr_ignore_rest (ldfile, 0); + + now = lr_token (ldfile, charmap, result, NULL, verbose); + } + while (now->tok == tok_reorder_sections_after + || now->tok == tok_reorder_sections_end + || now->tok == tok_end); + + /* Process the token we just saw. */ + nowtok = now->tok; + continue; + } + } + else + /* This must not happen. */ + goto err_label; + break; + + case tok_reorder_sections_end: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + break; + + if (state != 5) + goto err_label; + state = 6; + lr_ignore_rest (ldfile, 1); + break; + + case tok_bsymbol: + case tok_ucs4: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + if (state != 0 && state != 1 && state != 3 && state != 5) + goto err_label; + + if ((state == 0 || state == 5) && nowtok == tok_ucs4) + goto err_label; + + if (nowtok == tok_ucs4) + { + snprintf (ucs4buf, sizeof (ucs4buf), "U%08X", now->val.ucs4); + symstr = ucs4buf; + symlen = 9; + } + else if (arg != NULL) + { + symstr = arg->val.str.startmb; + symlen = arg->val.str.lenmb; + } + else + { + lr_error (ldfile, _("%s: bad symbol <%.*s>"), "LC_COLLATE", + (int) ldfile->token.val.str.lenmb, + ldfile->token.val.str.startmb); + break; + } + + struct element_t *seqp; + if (state == 0) + { + /* We are outside an `order_start' region. This means + we must only accept definitions of values for + collation symbols since these are purely abstract + values and don't need directions associated. */ + void *ptr; + + if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == 0) + { + seqp = ptr; + + /* It's already defined. First check whether this + is really a collating symbol. */ + if (seqp->is_character) + goto err_label; + + goto move_entry; + } + else + { + void *result; + + if (find_entry (&collate->sym_table, symstr, symlen, + &result) != 0) + /* No collating symbol, it's an error. */ + goto err_label; + + /* Maybe this is the first time we define a symbol + value and it is before the first actual section. */ + if (collate->sections == NULL) + collate->sections = collate->current_section = + &collate->symbol_section; + } + + if (was_ellipsis != tok_none) + { + handle_ellipsis (ldfile, symstr, symlen, was_ellipsis, + charmap, repertoire, result); + + /* Remember that we processed the ellipsis. */ + was_ellipsis = tok_none; + + /* And don't add the value a second time. */ + break; + } + } + else if (state == 3) + { + /* It is possible that we already have this collation sequence. + In this case we move the entry. */ + void *sym; + void *ptr; + + /* If the symbol after which we have to insert was not found + ignore all entries. */ + if (collate->cursor == NULL) + { + lr_ignore_rest (ldfile, 0); + break; + } + + if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == 0) + { + seqp = (struct element_t *) ptr; + goto move_entry; + } + + if (find_entry (&collate->sym_table, symstr, symlen, &sym) == 0 + && (seqp = ((struct symbol_t *) sym)->order) != NULL) + goto move_entry; + + if (find_entry (&collate->elem_table, symstr, symlen, &ptr) == 0 + && (seqp = (struct element_t *) ptr, + seqp->last != NULL || seqp->next != NULL + || (collate->start != NULL && seqp == collate->start))) + { + move_entry: + /* Remove the entry from the old position. */ + if (seqp->last == NULL) + collate->start = seqp->next; + else + seqp->last->next = seqp->next; + if (seqp->next != NULL) + seqp->next->last = seqp->last; + + /* We also have to check whether this entry is the + first or last of a section. */ + if (seqp->section->first == seqp) + { + if (seqp->section->first == seqp->section->last) + /* This section has no content anymore. */ + seqp->section->first = seqp->section->last = NULL; + else + seqp->section->first = seqp->next; + } + else if (seqp->section->last == seqp) + seqp->section->last = seqp->last; + + /* Now insert it in the new place. */ + insert_weights (ldfile, seqp, charmap, repertoire, result, + tok_none); + break; + } + + /* Otherwise we just add a new entry. */ + } + else if (state == 5) + { + /* We are reordering sections. Find the named section. */ + struct section_list *runp = collate->sections; + struct section_list *prevp = NULL; + + while (runp != NULL) + { + if (runp->name != NULL + && strlen (runp->name) == symlen + && memcmp (runp->name, symstr, symlen) == 0) + break; + + prevp = runp; + runp = runp->next; + } + + if (runp == NULL) + { + lr_error (ldfile, _("%s: section `%.*s' not known"), + "LC_COLLATE", (int) symlen, symstr); + lr_ignore_rest (ldfile, 0); + } + else + { + if (runp != collate->current_section) + { + /* Remove the named section from the old place and + insert it in the new one. */ + prevp->next = runp->next; + + runp->next = collate->current_section->next; + collate->current_section->next = runp; + collate->current_section = runp; + } + + /* Process the rest of the line which might change + the collation rules. */ + arg = lr_token (ldfile, charmap, result, repertoire, + verbose); + if (arg->tok != tok_eof && arg->tok != tok_eol) + read_directions (ldfile, arg, charmap, repertoire, + result); + } + break; + } + else if (was_ellipsis != tok_none) + { + /* Using the information in the `ellipsis_weight' + element and this and the last value we have to handle + the ellipsis now. */ + assert (state == 1); + + handle_ellipsis (ldfile, symstr, symlen, was_ellipsis, charmap, + repertoire, result); + + /* Remember that we processed the ellipsis. */ + was_ellipsis = tok_none; + + /* And don't add the value a second time. */ + break; + } + + /* Now insert in the new place. */ + insert_value (ldfile, symstr, symlen, charmap, repertoire, result); + break; + + case tok_undefined: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + if (state != 1) + goto err_label; + + if (was_ellipsis != tok_none) + { + lr_error (ldfile, + _("%s: cannot have `%s' as end of ellipsis range"), + "LC_COLLATE", "UNDEFINED"); + + unlink_element (collate); + was_ellipsis = tok_none; + } + + /* See whether UNDEFINED already appeared somewhere. */ + if (collate->undefined.next != NULL + || &collate->undefined == collate->cursor) + { + lr_error (ldfile, + _("%s: order for `%.*s' already defined at %s:%Zu"), + "LC_COLLATE", 9, "UNDEFINED", + collate->undefined.file, + collate->undefined.line); + lr_ignore_rest (ldfile, 0); + } + else + /* Parse the weights. */ + insert_weights (ldfile, &collate->undefined, charmap, + repertoire, result, tok_none); + break; + + case tok_ellipsis2: /* symbolic hexadecimal ellipsis */ + case tok_ellipsis3: /* absolute ellipsis */ + case tok_ellipsis4: /* symbolic decimal ellipsis */ + /* This is the symbolic (decimal or hexadecimal) or absolute + ellipsis. */ + if (was_ellipsis != tok_none) + goto err_label; + + if (state != 0 && state != 1 && state != 3) + goto err_label; + + was_ellipsis = nowtok; + + insert_weights (ldfile, &collate->ellipsis_weight, charmap, + repertoire, result, nowtok); + break; + + case tok_end: + seen_end: + /* Next we assume `LC_COLLATE'. */ + if (!ignore_content) + { + if (state == 0 && copy_locale == NULL) + /* We must either see a copy statement or have + ordering values. */ + lr_error (ldfile, + _("%s: empty category description not allowed"), + "LC_COLLATE"); + else if (state == 1) + { + lr_error (ldfile, _("%s: missing `order_end' keyword"), + "LC_COLLATE"); + + /* Handle ellipsis at end of list. */ + if (was_ellipsis != tok_none) + { + handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap, + repertoire, result); + was_ellipsis = tok_none; + } + } + else if (state == 3) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: missing `reorder-end' keyword"), "LC_COLLATE")); + else if (state == 5) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: missing `reorder-sections-end' keyword"), "LC_COLLATE")); + } + arg = lr_token (ldfile, charmap, result, NULL, verbose); + if (arg->tok == tok_eof) + break; + if (arg->tok == tok_eol) + lr_error (ldfile, _("%s: incomplete `END' line"), "LC_COLLATE"); + else if (arg->tok != tok_lc_collate) + lr_error (ldfile, _("\ +%1$s: definition does not end with `END %1$s'"), "LC_COLLATE"); + lr_ignore_rest (ldfile, arg->tok == tok_lc_collate); + return; + + case tok_define: + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + arg = lr_token (ldfile, charmap, result, NULL, verbose); + if (arg->tok != tok_ident) + goto err_label; + + /* Simply add the new symbol. */ + struct name_list *newsym = xmalloc (sizeof (*newsym) + + arg->val.str.lenmb + 1); + memcpy (newsym->str, arg->val.str.startmb, arg->val.str.lenmb); + newsym->str[arg->val.str.lenmb] = '\0'; + newsym->next = defined; + defined = newsym; + + lr_ignore_rest (ldfile, 1); + break; + + case tok_undef: + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + arg = lr_token (ldfile, charmap, result, NULL, verbose); + if (arg->tok != tok_ident) + goto err_label; + + /* Remove _all_ occurrences of the symbol from the list. */ + struct name_list *prevdef = NULL; + struct name_list *curdef = defined; + while (curdef != NULL) + if (strncmp (arg->val.str.startmb, curdef->str, + arg->val.str.lenmb) == 0 + && curdef->str[arg->val.str.lenmb] == '\0') + { + if (prevdef == NULL) + defined = curdef->next; + else + prevdef->next = curdef->next; + + struct name_list *olddef = curdef; + curdef = curdef->next; + + free (olddef); + } + else + { + prevdef = curdef; + curdef = curdef->next; + } + + lr_ignore_rest (ldfile, 1); + break; + + case tok_ifdef: + case tok_ifndef: + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + found_ifdef: + arg = lr_token (ldfile, charmap, result, NULL, verbose); + if (arg->tok != tok_ident) + goto err_label; + lr_ignore_rest (ldfile, 1); + + if (collate->else_action == else_none) + { + curdef = defined; + while (curdef != NULL) + if (strncmp (arg->val.str.startmb, curdef->str, + arg->val.str.lenmb) == 0 + && curdef->str[arg->val.str.lenmb] == '\0') + break; + else + curdef = curdef->next; + + if ((nowtok == tok_ifdef && curdef != NULL) + || (nowtok == tok_ifndef && curdef == NULL)) + { + /* We have to use the if-branch. */ + collate->else_action = else_ignore; + } + else + { + /* We have to use the else-branch, if there is one. */ + nowtok = skip_to (ldfile, collate, charmap, 0); + if (nowtok == tok_else) + collate->else_action = else_seen; + else if (nowtok == tok_elifdef) + { + nowtok = tok_ifdef; + goto found_ifdef; + } + else if (nowtok == tok_elifndef) + { + nowtok = tok_ifndef; + goto found_ifdef; + } + else if (nowtok == tok_eof) + goto seen_eof; + else if (nowtok == tok_end) + goto seen_end; + } + } + else + { + /* XXX Should it really become necessary to support nested + preprocessor handling we will push the state here. */ + lr_error (ldfile, _("%s: nested conditionals not supported"), + "LC_COLLATE"); + nowtok = skip_to (ldfile, collate, charmap, 1); + if (nowtok == tok_eof) + goto seen_eof; + else if (nowtok == tok_end) + goto seen_end; + } + break; + + case tok_elifdef: + case tok_elifndef: + case tok_else: + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + lr_ignore_rest (ldfile, 1); + + if (collate->else_action == else_ignore) + { + /* Ignore everything until the endif. */ + nowtok = skip_to (ldfile, collate, charmap, 1); + if (nowtok == tok_eof) + goto seen_eof; + else if (nowtok == tok_end) + goto seen_end; + } + else + { + assert (collate->else_action == else_none); + lr_error (ldfile, _("\ +%s: '%s' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE", + nowtok == tok_else ? "else" + : nowtok == tok_elifdef ? "elifdef" : "elifndef"); + } + break; + + case tok_endif: + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + lr_ignore_rest (ldfile, 1); + + if (collate->else_action != else_ignore + && collate->else_action != else_seen) + lr_error (ldfile, _("\ +%s: 'endif' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE"); + + /* XXX If we support nested preprocessor directives we pop + the state here. */ + collate->else_action = else_none; + break; + + default: + err_label: + SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE"); + } + + /* Prepare for the next round. */ + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + } + + seen_eof: + /* When we come here we reached the end of the file. */ + lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE"); +} diff --git a/REORG.TODO/locale/programs/ld-ctype.c b/REORG.TODO/locale/programs/ld-ctype.c new file mode 100644 index 0000000000..df266c20d6 --- /dev/null +++ b/REORG.TODO/locale/programs/ld-ctype.c @@ -0,0 +1,4030 @@ +/* Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@gnu.org>, 1995. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <alloca.h> +#include <byteswap.h> +#include <endian.h> +#include <errno.h> +#include <limits.h> +#include <obstack.h> +#include <stdlib.h> +#include <string.h> +#include <wchar.h> +#include <wctype.h> +#include <stdint.h> +#include <sys/uio.h> + +#include "localedef.h" +#include "charmap.h" +#include "localeinfo.h" +#include "langinfo.h" +#include "linereader.h" +#include "locfile-token.h" +#include "locfile.h" + +#include <assert.h> + + +/* The bit used for representing a special class. */ +#define BITPOS(class) ((class) - tok_upper) +#define BIT(class) (_ISbit (BITPOS (class))) +#define BITw(class) (_ISwbit (BITPOS (class))) + +#define ELEM(ctype, collection, idx, value) \ + *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \ + &ctype->collection##_act idx, value) + + +/* To be compatible with former implementations we for now restrict + the number of bits for character classes to 16. When compatibility + is not necessary anymore increase the number to 32. */ +#define char_class_t uint16_t +#define char_class32_t uint32_t + + +/* Type to describe a transliteration action. We have a possibly + multiple character from-string and a set of multiple character + to-strings. All are 32bit values since this is what is used in + the gconv functions. */ +struct translit_to_t +{ + uint32_t *str; + + struct translit_to_t *next; +}; + +struct translit_t +{ + uint32_t *from; + + const char *fname; + size_t lineno; + + struct translit_to_t *to; + + struct translit_t *next; +}; + +struct translit_ignore_t +{ + uint32_t from; + uint32_t to; + uint32_t step; + + const char *fname; + size_t lineno; + + struct translit_ignore_t *next; +}; + + +/* Type to describe a transliteration include statement. */ +struct translit_include_t +{ + const char *copy_locale; + const char *copy_repertoire; + + struct translit_include_t *next; +}; + +/* Provide some dummy pointer for empty string. */ +static uint32_t no_str[] = { 0 }; + + +/* Sparse table of uint32_t. */ +#define TABLE idx_table +#define ELEMENT uint32_t +#define DEFAULT ((uint32_t) ~0) +#define NO_ADD_LOCALE +#include "3level.h" + +#define TABLE wcwidth_table +#define ELEMENT uint8_t +#define DEFAULT 0xff +#include "3level.h" + +#define TABLE wctrans_table +#define ELEMENT int32_t +#define DEFAULT 0 +#define wctrans_table_add wctrans_table_add_internal +#include "3level.h" +#undef wctrans_table_add +/* The wctrans_table must actually store the difference between the + desired result and the argument. */ +static inline void +wctrans_table_add (struct wctrans_table *t, uint32_t wc, uint32_t mapped_wc) +{ + wctrans_table_add_internal (t, wc, mapped_wc - wc); +} + +/* Construction of sparse 3-level tables. + See wchar-lookup.h for their structure and the meaning of p and q. */ + +struct wctype_table +{ + /* Parameters. */ + unsigned int p; + unsigned int q; + /* Working representation. */ + size_t level1_alloc; + size_t level1_size; + uint32_t *level1; + size_t level2_alloc; + size_t level2_size; + uint32_t *level2; + size_t level3_alloc; + size_t level3_size; + uint32_t *level3; + size_t result_size; +}; + +static void add_locale_wctype_table (struct locale_file *file, + struct wctype_table *t); + +/* The real definition of the struct for the LC_CTYPE locale. */ +struct locale_ctype_t +{ + uint32_t *charnames; + size_t charnames_max; + size_t charnames_act; + /* An index lookup table, to speedup find_idx. */ + struct idx_table charnames_idx; + + struct repertoire_t *repertoire; + + /* We will allow up to 8 * sizeof (uint32_t) character classes. */ +#define MAX_NR_CHARCLASS (8 * sizeof (uint32_t)) + size_t nr_charclass; + const char *classnames[MAX_NR_CHARCLASS]; + uint32_t last_class_char; + uint32_t class256_collection[256]; + uint32_t *class_collection; + size_t class_collection_max; + size_t class_collection_act; + uint32_t class_done; + uint32_t class_offset; + + struct charseq **mbdigits; + size_t mbdigits_act; + size_t mbdigits_max; + uint32_t *wcdigits; + size_t wcdigits_act; + size_t wcdigits_max; + + struct charseq *mboutdigits[10]; + uint32_t wcoutdigits[10]; + size_t outdigits_act; + + /* If the following number ever turns out to be too small simply + increase it. But I doubt it will. --drepper@gnu */ +#define MAX_NR_CHARMAP 16 + const char *mapnames[MAX_NR_CHARMAP]; + uint32_t *map_collection[MAX_NR_CHARMAP]; + uint32_t map256_collection[2][256]; + size_t map_collection_max[MAX_NR_CHARMAP]; + size_t map_collection_act[MAX_NR_CHARMAP]; + size_t map_collection_nr; + size_t last_map_idx; + int tomap_done[MAX_NR_CHARMAP]; + uint32_t map_offset; + + /* Transliteration information. */ + struct translit_include_t *translit_include; + struct translit_t *translit; + struct translit_ignore_t *translit_ignore; + uint32_t ntranslit_ignore; + + uint32_t *default_missing; + const char *default_missing_file; + size_t default_missing_lineno; + + uint32_t to_nonascii; + uint32_t nonascii_case; + + /* The arrays for the binary representation. */ + char_class_t *ctype_b; + char_class32_t *ctype32_b; + uint32_t **map_b; + uint32_t **map32_b; + uint32_t **class_b; + struct wctype_table *class_3level; + struct wctrans_table *map_3level; + uint32_t *class_name_ptr; + uint32_t *map_name_ptr; + struct wcwidth_table width; + uint32_t mb_cur_max; + const char *codeset_name; + uint32_t *translit_from_idx; + uint32_t *translit_from_tbl; + uint32_t *translit_to_idx; + uint32_t *translit_to_tbl; + uint32_t translit_idx_size; + size_t translit_from_tbl_size; + size_t translit_to_tbl_size; + + struct obstack mempool; +}; + + +/* Marker for an empty slot. This has the value 0xFFFFFFFF, regardless + whether 'int' is 16 bit, 32 bit, or 64 bit. */ +#define EMPTY ((uint32_t) ~0) + + +#define obstack_chunk_alloc xmalloc +#define obstack_chunk_free free + + +/* Prototypes for local functions. */ +static void ctype_startup (struct linereader *lr, struct localedef_t *locale, + const struct charmap_t *charmap, + struct localedef_t *copy_locale, + int ignore_content); +static void ctype_class_new (struct linereader *lr, + struct locale_ctype_t *ctype, const char *name); +static void ctype_map_new (struct linereader *lr, + struct locale_ctype_t *ctype, + const char *name, const struct charmap_t *charmap); +static uint32_t *find_idx (struct locale_ctype_t *ctype, uint32_t **table, + size_t *max, size_t *act, uint32_t idx); +static void set_class_defaults (struct locale_ctype_t *ctype, + const struct charmap_t *charmap, + struct repertoire_t *repertoire); +static void allocate_arrays (struct locale_ctype_t *ctype, + const struct charmap_t *charmap, + struct repertoire_t *repertoire); + + +static const char *longnames[] = +{ + "zero", "one", "two", "three", "four", + "five", "six", "seven", "eight", "nine" +}; +static const char *uninames[] = +{ + "U00000030", "U00000031", "U00000032", "U00000033", "U00000034", + "U00000035", "U00000036", "U00000037", "U00000038", "U00000039" +}; +static const unsigned char digits[] = "0123456789"; + + +static void +ctype_startup (struct linereader *lr, struct localedef_t *locale, + const struct charmap_t *charmap, + struct localedef_t *copy_locale, int ignore_content) +{ + unsigned int cnt; + struct locale_ctype_t *ctype; + + if (!ignore_content && locale->categories[LC_CTYPE].ctype == NULL) + { + if (copy_locale == NULL) + { + /* Allocate the needed room. */ + locale->categories[LC_CTYPE].ctype = ctype = + (struct locale_ctype_t *) xcalloc (1, + sizeof (struct locale_ctype_t)); + + /* We have seen no names yet. */ + ctype->charnames_max = charmap->mb_cur_max == 1 ? 256 : 512; + ctype->charnames = (uint32_t *) xmalloc (ctype->charnames_max + * sizeof (uint32_t)); + for (cnt = 0; cnt < 256; ++cnt) + ctype->charnames[cnt] = cnt; + ctype->charnames_act = 256; + idx_table_init (&ctype->charnames_idx); + + /* Fill character class information. */ + ctype->last_class_char = ILLEGAL_CHAR_VALUE; + /* The order of the following instructions determines the bit + positions! */ + ctype_class_new (lr, ctype, "upper"); + ctype_class_new (lr, ctype, "lower"); + ctype_class_new (lr, ctype, "alpha"); + ctype_class_new (lr, ctype, "digit"); + ctype_class_new (lr, ctype, "xdigit"); + ctype_class_new (lr, ctype, "space"); + ctype_class_new (lr, ctype, "print"); + ctype_class_new (lr, ctype, "graph"); + ctype_class_new (lr, ctype, "blank"); + ctype_class_new (lr, ctype, "cntrl"); + ctype_class_new (lr, ctype, "punct"); + ctype_class_new (lr, ctype, "alnum"); + + ctype->class_collection_max = charmap->mb_cur_max == 1 ? 256 : 512; + ctype->class_collection + = (uint32_t *) xcalloc (sizeof (unsigned long int), + ctype->class_collection_max); + ctype->class_collection_act = 256; + + /* Fill character map information. */ + ctype->last_map_idx = MAX_NR_CHARMAP; + ctype_map_new (lr, ctype, "toupper", charmap); + ctype_map_new (lr, ctype, "tolower", charmap); + + /* Fill first 256 entries in `toXXX' arrays. */ + for (cnt = 0; cnt < 256; ++cnt) + { + ctype->map_collection[0][cnt] = cnt; + ctype->map_collection[1][cnt] = cnt; + + ctype->map256_collection[0][cnt] = cnt; + ctype->map256_collection[1][cnt] = cnt; + } + + if (enc_not_ascii_compatible) + ctype->to_nonascii = 1; + + obstack_init (&ctype->mempool); + } + else + ctype = locale->categories[LC_CTYPE].ctype = + copy_locale->categories[LC_CTYPE].ctype; + } +} + + +void +ctype_finish (struct localedef_t *locale, const struct charmap_t *charmap) +{ + /* See POSIX.2, table 2-6 for the meaning of the following table. */ +#define NCLASS 12 + static const struct + { + const char *name; + const char allow[NCLASS]; + } + valid_table[NCLASS] = + { + /* The order is important. See token.h for more information. + M = Always, D = Default, - = Permitted, X = Mutually exclusive */ + { "upper", "--MX-XDDXXX-" }, + { "lower", "--MX-XDDXXX-" }, + { "alpha", "---X-XDDXXX-" }, + { "digit", "XXX--XDDXXX-" }, + { "xdigit", "-----XDDXXX-" }, + { "space", "XXXXX------X" }, + { "print", "---------X--" }, + { "graph", "---------X--" }, + { "blank", "XXXXXM-----X" }, + { "cntrl", "XXXXX-XX--XX" }, + { "punct", "XXXXX-DD-X-X" }, + { "alnum", "-----XDDXXX-" } + }; + size_t cnt; + int cls1, cls2; + uint32_t space_value; + struct charseq *space_seq; + struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype; + int warned; + const void *key; + size_t len; + void *vdata; + void *curs; + + /* Now resolve copying and also handle completely missing definitions. */ + if (ctype == NULL) + { + const char *repertoire_name; + + /* First see whether we were supposed to copy. If yes, find the + actual definition. */ + if (locale->copy_name[LC_CTYPE] != NULL) + { + /* Find the copying locale. This has to happen transitively since + the locale we are copying from might also copying another one. */ + struct localedef_t *from = locale; + + do + from = find_locale (LC_CTYPE, from->copy_name[LC_CTYPE], + from->repertoire_name, charmap); + while (from->categories[LC_CTYPE].ctype == NULL + && from->copy_name[LC_CTYPE] != NULL); + + ctype = locale->categories[LC_CTYPE].ctype + = from->categories[LC_CTYPE].ctype; + } + + /* If there is still no definition issue an warning and create an + empty one. */ + if (ctype == NULL) + { + if (! be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +No definition for %s category found"), "LC_CTYPE")); + ctype_startup (NULL, locale, charmap, NULL, 0); + ctype = locale->categories[LC_CTYPE].ctype; + } + + /* Get the repertoire we have to use. */ + repertoire_name = locale->repertoire_name ?: repertoire_global; + if (repertoire_name != NULL) + ctype->repertoire = repertoire_read (repertoire_name); + } + + /* We need the name of the currently used 8-bit character set to + make correct conversion between this 8-bit representation and the + ISO 10646 character set used internally for wide characters. */ + ctype->codeset_name = charmap->code_set_name; + if (ctype->codeset_name == NULL) + { + if (! be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +No character set name specified in charmap"))); + ctype->codeset_name = "//UNKNOWN//"; + } + + /* Set default value for classes not specified. */ + set_class_defaults (ctype, charmap, ctype->repertoire); + + /* Check according to table. */ + for (cnt = 0; cnt < ctype->class_collection_act; ++cnt) + { + uint32_t tmp = ctype->class_collection[cnt]; + + if (tmp != 0) + { + for (cls1 = 0; cls1 < NCLASS; ++cls1) + if ((tmp & _ISwbit (cls1)) != 0) + for (cls2 = 0; cls2 < NCLASS; ++cls2) + if (valid_table[cls1].allow[cls2] != '-') + { + int eq = (tmp & _ISwbit (cls2)) != 0; + switch (valid_table[cls1].allow[cls2]) + { + case 'M': + if (!eq) + { + uint32_t value = ctype->charnames[cnt]; + + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +character L'\\u%0*x' in class `%s' must be in class `%s'"), + value > 0xffff ? 8 : 4, + value, + valid_table[cls1].name, + valid_table[cls2].name)); + } + break; + + case 'X': + if (eq) + { + uint32_t value = ctype->charnames[cnt]; + + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +character L'\\u%0*x' in class `%s' must not be in class `%s'"), + value > 0xffff ? 8 : 4, + value, + valid_table[cls1].name, + valid_table[cls2].name)); + } + break; + + case 'D': + ctype->class_collection[cnt] |= _ISwbit (cls2); + break; + + default: + WITH_CUR_LOCALE (error (5, 0, _("\ +internal error in %s, line %u"), __FUNCTION__, __LINE__)); + } + } + } + } + + for (cnt = 0; cnt < 256; ++cnt) + { + uint32_t tmp = ctype->class256_collection[cnt]; + + if (tmp != 0) + { + for (cls1 = 0; cls1 < NCLASS; ++cls1) + if ((tmp & _ISbit (cls1)) != 0) + for (cls2 = 0; cls2 < NCLASS; ++cls2) + if (valid_table[cls1].allow[cls2] != '-') + { + int eq = (tmp & _ISbit (cls2)) != 0; + switch (valid_table[cls1].allow[cls2]) + { + case 'M': + if (!eq) + { + char buf[17]; + + snprintf (buf, sizeof buf, "\\%Zo", cnt); + + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +character '%s' in class `%s' must be in class `%s'"), + buf, + valid_table[cls1].name, + valid_table[cls2].name)); + } + break; + + case 'X': + if (eq) + { + char buf[17]; + + snprintf (buf, sizeof buf, "\\%Zo", cnt); + + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +character '%s' in class `%s' must not be in class `%s'"), + buf, + valid_table[cls1].name, + valid_table[cls2].name)); + } + break; + + case 'D': + ctype->class256_collection[cnt] |= _ISbit (cls2); + break; + + default: + WITH_CUR_LOCALE (error (5, 0, _("\ +internal error in %s, line %u"), __FUNCTION__, __LINE__)); + } + } + } + } + + /* ... and now test <SP> as a special case. */ + space_value = 32; + if (((cnt = BITPOS (tok_space), + (ELEM (ctype, class_collection, , space_value) + & BITw (tok_space)) == 0) + || (cnt = BITPOS (tok_blank), + (ELEM (ctype, class_collection, , space_value) + & BITw (tok_blank)) == 0))) + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"), + valid_table[cnt].name)); + } + else if (((cnt = BITPOS (tok_punct), + (ELEM (ctype, class_collection, , space_value) + & BITw (tok_punct)) != 0) + || (cnt = BITPOS (tok_graph), + (ELEM (ctype, class_collection, , space_value) + & BITw (tok_graph)) + != 0))) + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +<SP> character must not be in class `%s'"), + valid_table[cnt].name)); + } + else + ELEM (ctype, class_collection, , space_value) |= BITw (tok_print); + + space_seq = charmap_find_value (charmap, "SP", 2); + if (space_seq == NULL) + space_seq = charmap_find_value (charmap, "space", 5); + if (space_seq == NULL) + space_seq = charmap_find_value (charmap, "U00000020", 9); + if (space_seq == NULL || space_seq->nbytes != 1) + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +character <SP> not defined in character map"))); + } + else if (((cnt = BITPOS (tok_space), + (ctype->class256_collection[space_seq->bytes[0]] + & BIT (tok_space)) == 0) + || (cnt = BITPOS (tok_blank), + (ctype->class256_collection[space_seq->bytes[0]] + & BIT (tok_blank)) == 0))) + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"), + valid_table[cnt].name)); + } + else if (((cnt = BITPOS (tok_punct), + (ctype->class256_collection[space_seq->bytes[0]] + & BIT (tok_punct)) != 0) + || (cnt = BITPOS (tok_graph), + (ctype->class256_collection[space_seq->bytes[0]] + & BIT (tok_graph)) != 0))) + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +<SP> character must not be in class `%s'"), + valid_table[cnt].name)); + } + else + ctype->class256_collection[space_seq->bytes[0]] |= BIT (tok_print); + + /* Check whether all single-byte characters make to their upper/lowercase + equivalent according to the ASCII rules. */ + for (cnt = 'A'; cnt <= 'Z'; ++cnt) + { + uint32_t uppval = ctype->map256_collection[0][cnt]; + uint32_t lowval = ctype->map256_collection[1][cnt]; + uint32_t lowuppval = ctype->map256_collection[0][lowval]; + uint32_t lowlowval = ctype->map256_collection[1][lowval]; + + if (uppval != cnt + || lowval != cnt + 0x20 + || lowuppval != cnt + || lowlowval != cnt + 0x20) + ctype->nonascii_case = 1; + } + for (cnt = 0; cnt < 256; ++cnt) + if (cnt < 'A' || (cnt > 'Z' && cnt < 'a') || cnt > 'z') + if (ctype->map256_collection[0][cnt] != cnt + || ctype->map256_collection[1][cnt] != cnt) + ctype->nonascii_case = 1; + + /* Now that the tests are done make sure the name array contains all + characters which are handled in the WIDTH section of the + character set definition file. */ + if (charmap->width_rules != NULL) + for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt) + { + unsigned char bytes[charmap->mb_cur_max]; + int nbytes = charmap->width_rules[cnt].from->nbytes; + + /* We have the range of character for which the width is + specified described using byte sequences of the multibyte + charset. We have to convert this to UCS4 now. And we + cannot simply convert the beginning and the end of the + sequence, we have to iterate over the byte sequence and + convert it for every single character. */ + memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes); + + while (nbytes < charmap->width_rules[cnt].to->nbytes + || memcmp (bytes, charmap->width_rules[cnt].to->bytes, + nbytes) <= 0) + { + /* Find the UCS value for `bytes'. */ + int inner; + uint32_t wch; + struct charseq *seq + = charmap_find_symbol (charmap, (char *) bytes, nbytes); + + if (seq == NULL) + wch = ILLEGAL_CHAR_VALUE; + else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE) + wch = seq->ucs4; + else + wch = repertoire_find_value (ctype->repertoire, seq->name, + strlen (seq->name)); + + if (wch != ILLEGAL_CHAR_VALUE) + /* We are only interested in the side-effects of the + `find_idx' call. It will add appropriate entries in + the name array if this is necessary. */ + (void) find_idx (ctype, NULL, NULL, NULL, wch); + + /* "Increment" the bytes sequence. */ + inner = nbytes - 1; + while (inner >= 0 && bytes[inner] == 0xff) + --inner; + + if (inner < 0) + { + /* We have to extend the byte sequence. */ + if (nbytes >= charmap->width_rules[cnt].to->nbytes) + break; + + bytes[0] = 1; + memset (&bytes[1], 0, nbytes); + ++nbytes; + } + else + { + ++bytes[inner]; + while (++inner < nbytes) + bytes[inner] = 0; + } + } + } + + /* Now set all the other characters of the character set to the + default width. */ + curs = NULL; + while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0) + { + struct charseq *data = (struct charseq *) vdata; + + if (data->ucs4 == UNINITIALIZED_CHAR_VALUE) + data->ucs4 = repertoire_find_value (ctype->repertoire, + data->name, len); + + if (data->ucs4 != ILLEGAL_CHAR_VALUE) + (void) find_idx (ctype, NULL, NULL, NULL, data->ucs4); + } + + /* There must be a multiple of 10 digits. */ + if (ctype->mbdigits_act % 10 != 0) + { + assert (ctype->mbdigits_act == ctype->wcdigits_act); + ctype->wcdigits_act -= ctype->mbdigits_act % 10; + ctype->mbdigits_act -= ctype->mbdigits_act % 10; + WITH_CUR_LOCALE (error (0, 0, _("\ +`digit' category has not entries in groups of ten"))); + } + + /* Check the input digits. There must be a multiple of ten available. + In each group it could be that one or the other character is missing. + In this case the whole group must be removed. */ + cnt = 0; + while (cnt < ctype->mbdigits_act) + { + size_t inner; + for (inner = 0; inner < 10; ++inner) + if (ctype->mbdigits[cnt + inner] == NULL) + break; + + if (inner == 10) + cnt += 10; + else + { + /* Remove the group. */ + memmove (&ctype->mbdigits[cnt], &ctype->mbdigits[cnt + 10], + ((ctype->wcdigits_act - cnt - 10) + * sizeof (ctype->mbdigits[0]))); + ctype->mbdigits_act -= 10; + } + } + + /* If no input digits are given use the default. */ + if (ctype->mbdigits_act == 0) + { + if (ctype->mbdigits_max == 0) + { + ctype->mbdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool, + 10 * sizeof (struct charseq *)); + ctype->mbdigits_max = 10; + } + + for (cnt = 0; cnt < 10; ++cnt) + { + ctype->mbdigits[cnt] = charmap_find_symbol (charmap, + (char *) digits + cnt, 1); + if (ctype->mbdigits[cnt] == NULL) + { + ctype->mbdigits[cnt] = charmap_find_symbol (charmap, + longnames[cnt], + strlen (longnames[cnt])); + if (ctype->mbdigits[cnt] == NULL) + { + /* Hum, this ain't good. */ + WITH_CUR_LOCALE (error (0, 0, _("\ +no input digits defined and none of the standard names in the charmap"))); + + ctype->mbdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool, + sizeof (struct charseq) + 1); + + /* This is better than nothing. */ + ctype->mbdigits[cnt]->bytes[0] = digits[cnt]; + ctype->mbdigits[cnt]->nbytes = 1; + } + } + } + + ctype->mbdigits_act = 10; + } + + /* Check the wide character input digits. There must be a multiple + of ten available. In each group it could be that one or the other + character is missing. In this case the whole group must be + removed. */ + cnt = 0; + while (cnt < ctype->wcdigits_act) + { + size_t inner; + for (inner = 0; inner < 10; ++inner) + if (ctype->wcdigits[cnt + inner] == ILLEGAL_CHAR_VALUE) + break; + + if (inner == 10) + cnt += 10; + else + { + /* Remove the group. */ + memmove (&ctype->wcdigits[cnt], &ctype->wcdigits[cnt + 10], + ((ctype->wcdigits_act - cnt - 10) + * sizeof (ctype->wcdigits[0]))); + ctype->wcdigits_act -= 10; + } + } + + /* If no input digits are given use the default. */ + if (ctype->wcdigits_act == 0) + { + if (ctype->wcdigits_max == 0) + { + ctype->wcdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool, + 10 * sizeof (uint32_t)); + ctype->wcdigits_max = 10; + } + + for (cnt = 0; cnt < 10; ++cnt) + ctype->wcdigits[cnt] = L'0' + cnt; + + ctype->mbdigits_act = 10; + } + + /* Check the outdigits. */ + warned = 0; + for (cnt = 0; cnt < 10; ++cnt) + if (ctype->mboutdigits[cnt] == NULL) + { + static struct charseq replace[2]; + + if (!warned) + { + WITH_CUR_LOCALE (error (0, 0, _("\ +not all characters used in `outdigit' are available in the charmap"))); + warned = 1; + } + + replace[0].nbytes = 1; + replace[0].bytes[0] = '?'; + replace[0].bytes[1] = '\0'; + ctype->mboutdigits[cnt] = &replace[0]; + } + + warned = 0; + for (cnt = 0; cnt < 10; ++cnt) + if (ctype->wcoutdigits[cnt] == 0) + { + if (!warned) + { + WITH_CUR_LOCALE (error (0, 0, _("\ +not all characters used in `outdigit' are available in the repertoire"))); + warned = 1; + } + + ctype->wcoutdigits[cnt] = L'?'; + } + + /* Sort the entries in the translit_ignore list. */ + if (ctype->translit_ignore != NULL) + { + struct translit_ignore_t *firstp = ctype->translit_ignore; + struct translit_ignore_t *runp; + + ctype->ntranslit_ignore = 1; + + for (runp = firstp->next; runp != NULL; runp = runp->next) + { + struct translit_ignore_t *lastp = NULL; + struct translit_ignore_t *cmpp; + + ++ctype->ntranslit_ignore; + + for (cmpp = firstp; cmpp != NULL; lastp = cmpp, cmpp = cmpp->next) + if (runp->from < cmpp->from) + break; + + runp->next = lastp; + if (lastp == NULL) + firstp = runp; + } + + ctype->translit_ignore = firstp; + } +} + + +void +ctype_output (struct localedef_t *locale, const struct charmap_t *charmap, + const char *output_path) +{ + struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype; + const size_t nelems = (_NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1) + + ctype->nr_charclass + ctype->map_collection_nr); + struct locale_file file; + uint32_t default_missing_len; + size_t elem, cnt; + + /* Now prepare the output: Find the sizes of the table we can use. */ + allocate_arrays (ctype, charmap, ctype->repertoire); + + default_missing_len = (ctype->default_missing + ? wcslen ((wchar_t *) ctype->default_missing) + : 0); + + init_locale_data (&file, nelems); + for (elem = 0; elem < nelems; ++elem) + { + if (elem < _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1)) + switch (elem) + { +#define CTYPE_EMPTY(name) \ + case name: \ + add_locale_empty (&file); \ + break + + CTYPE_EMPTY(_NL_CTYPE_GAP1); + CTYPE_EMPTY(_NL_CTYPE_GAP2); + CTYPE_EMPTY(_NL_CTYPE_GAP3); + CTYPE_EMPTY(_NL_CTYPE_GAP4); + CTYPE_EMPTY(_NL_CTYPE_GAP5); + CTYPE_EMPTY(_NL_CTYPE_GAP6); + +#define CTYPE_RAW_DATA(name, base, size) \ + case _NL_ITEM_INDEX (name): \ + add_locale_raw_data (&file, base, size); \ + break + + CTYPE_RAW_DATA (_NL_CTYPE_CLASS, + ctype->ctype_b, + (256 + 128) * sizeof (char_class_t)); + +#define CTYPE_UINT32_ARRAY(name, base, n_elems) \ + case _NL_ITEM_INDEX (name): \ + add_locale_uint32_array (&file, base, n_elems); \ + break + + CTYPE_UINT32_ARRAY (_NL_CTYPE_TOUPPER, ctype->map_b[0], 256 + 128); + CTYPE_UINT32_ARRAY (_NL_CTYPE_TOLOWER, ctype->map_b[1], 256 + 128); + CTYPE_UINT32_ARRAY (_NL_CTYPE_TOUPPER32, ctype->map32_b[0], 256); + CTYPE_UINT32_ARRAY (_NL_CTYPE_TOLOWER32, ctype->map32_b[1], 256); + CTYPE_RAW_DATA (_NL_CTYPE_CLASS32, + ctype->ctype32_b, + 256 * sizeof (char_class32_t)); + +#define CTYPE_UINT32(name, value) \ + case _NL_ITEM_INDEX (name): \ + add_locale_uint32 (&file, value); \ + break + + CTYPE_UINT32 (_NL_CTYPE_CLASS_OFFSET, ctype->class_offset); + CTYPE_UINT32 (_NL_CTYPE_MAP_OFFSET, ctype->map_offset); + CTYPE_UINT32 (_NL_CTYPE_TRANSLIT_TAB_SIZE, ctype->translit_idx_size); + + CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_FROM_IDX, + ctype->translit_from_idx, + ctype->translit_idx_size); + + CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_FROM_TBL, + ctype->translit_from_tbl, + ctype->translit_from_tbl_size + / sizeof (uint32_t)); + + CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_TO_IDX, + ctype->translit_to_idx, + ctype->translit_idx_size); + + CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_TO_TBL, + ctype->translit_to_tbl, + ctype->translit_to_tbl_size / sizeof (uint32_t)); + + case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES): + /* The class name array. */ + start_locale_structure (&file); + for (cnt = 0; cnt < ctype->nr_charclass; ++cnt) + add_locale_string (&file, ctype->classnames[cnt]); + add_locale_char (&file, 0); + align_locale_data (&file, LOCFILE_ALIGN); + end_locale_structure (&file); + break; + + case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES): + /* The class name array. */ + start_locale_structure (&file); + for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt) + add_locale_string (&file, ctype->mapnames[cnt]); + add_locale_char (&file, 0); + align_locale_data (&file, LOCFILE_ALIGN); + end_locale_structure (&file); + break; + + case _NL_ITEM_INDEX (_NL_CTYPE_WIDTH): + add_locale_wcwidth_table (&file, &ctype->width); + break; + + CTYPE_UINT32 (_NL_CTYPE_MB_CUR_MAX, ctype->mb_cur_max); + + case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME): + add_locale_string (&file, ctype->codeset_name); + break; + + CTYPE_UINT32 (_NL_CTYPE_MAP_TO_NONASCII, ctype->to_nonascii); + + CTYPE_UINT32 (_NL_CTYPE_NONASCII_CASE, ctype->nonascii_case); + + case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN): + add_locale_uint32 (&file, ctype->mbdigits_act / 10); + break; + + case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN): + add_locale_uint32 (&file, ctype->wcdigits_act / 10); + break; + + case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB): + start_locale_structure (&file); + for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB); + cnt < ctype->mbdigits_act; cnt += 10) + { + add_locale_raw_data (&file, ctype->mbdigits[cnt]->bytes, + ctype->mbdigits[cnt]->nbytes); + add_locale_char (&file, 0); + } + end_locale_structure (&file); + break; + + case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB): + start_locale_structure (&file); + cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB); + add_locale_raw_data (&file, ctype->mboutdigits[cnt]->bytes, + ctype->mboutdigits[cnt]->nbytes); + add_locale_char (&file, 0); + end_locale_structure (&file); + break; + + case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC): + start_locale_structure (&file); + for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC); + cnt < ctype->wcdigits_act; cnt += 10) + add_locale_uint32 (&file, ctype->wcdigits[cnt]); + end_locale_structure (&file); + break; + + case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC): + cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC); + add_locale_uint32 (&file, ctype->wcoutdigits[cnt]); + break; + + case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN): + add_locale_uint32 (&file, default_missing_len); + break; + + case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING): + add_locale_uint32_array (&file, ctype->default_missing, + default_missing_len); + break; + + case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE_LEN): + add_locale_uint32 (&file, ctype->ntranslit_ignore); + break; + + case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE): + start_locale_structure (&file); + { + struct translit_ignore_t *runp; + for (runp = ctype->translit_ignore; runp != NULL; + runp = runp->next) + { + add_locale_uint32 (&file, runp->from); + add_locale_uint32 (&file, runp->to); + add_locale_uint32 (&file, runp->step); + } + } + end_locale_structure (&file); + break; + + default: + assert (! "unknown CTYPE element"); + } + else + { + /* Handle extra maps. */ + size_t nr = elem - _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1); + if (nr < ctype->nr_charclass) + { + start_locale_prelude (&file); + add_locale_uint32_array (&file, ctype->class_b[nr], 256 / 32); + end_locale_prelude (&file); + add_locale_wctype_table (&file, &ctype->class_3level[nr]); + } + else + { + nr -= ctype->nr_charclass; + assert (nr < ctype->map_collection_nr); + add_locale_wctrans_table (&file, &ctype->map_3level[nr]); + } + } + } + + write_locale_data (output_path, LC_CTYPE, "LC_CTYPE", &file); +} + + +/* Local functions. */ +static void +ctype_class_new (struct linereader *lr, struct locale_ctype_t *ctype, + const char *name) +{ + size_t cnt; + + for (cnt = 0; cnt < ctype->nr_charclass; ++cnt) + if (strcmp (ctype->classnames[cnt], name) == 0) + break; + + if (cnt < ctype->nr_charclass) + { + lr_error (lr, _("character class `%s' already defined"), name); + return; + } + + if (ctype->nr_charclass == MAX_NR_CHARCLASS) + /* Exit code 2 is prescribed in P1003.2b. */ + WITH_CUR_LOCALE (error (2, 0, _("\ +implementation limit: no more than %Zd character classes allowed"), + MAX_NR_CHARCLASS)); + + ctype->classnames[ctype->nr_charclass++] = name; +} + + +static void +ctype_map_new (struct linereader *lr, struct locale_ctype_t *ctype, + const char *name, const struct charmap_t *charmap) +{ + size_t max_chars = 0; + size_t cnt; + + for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt) + { + if (strcmp (ctype->mapnames[cnt], name) == 0) + break; + + if (max_chars < ctype->map_collection_max[cnt]) + max_chars = ctype->map_collection_max[cnt]; + } + + if (cnt < ctype->map_collection_nr) + { + lr_error (lr, _("character map `%s' already defined"), name); + return; + } + + if (ctype->map_collection_nr == MAX_NR_CHARMAP) + /* Exit code 2 is prescribed in P1003.2b. */ + WITH_CUR_LOCALE (error (2, 0, _("\ +implementation limit: no more than %d character maps allowed"), + MAX_NR_CHARMAP)); + + ctype->mapnames[cnt] = name; + + if (max_chars == 0) + ctype->map_collection_max[cnt] = charmap->mb_cur_max == 1 ? 256 : 512; + else + ctype->map_collection_max[cnt] = max_chars; + + ctype->map_collection[cnt] = (uint32_t *) + xcalloc (sizeof (uint32_t), ctype->map_collection_max[cnt]); + ctype->map_collection_act[cnt] = 256; + + ++ctype->map_collection_nr; +} + + +/* We have to be prepared that TABLE, MAX, and ACT can be NULL. This + is possible if we only want to extend the name array. */ +static uint32_t * +find_idx (struct locale_ctype_t *ctype, uint32_t **table, size_t *max, + size_t *act, uint32_t idx) +{ + size_t cnt; + + if (idx < 256) + return table == NULL ? NULL : &(*table)[idx]; + + /* Use the charnames_idx lookup table instead of the slow search loop. */ +#if 1 + cnt = idx_table_get (&ctype->charnames_idx, idx); + if (cnt == EMPTY) + /* Not found. */ + cnt = ctype->charnames_act; +#else + for (cnt = 256; cnt < ctype->charnames_act; ++cnt) + if (ctype->charnames[cnt] == idx) + break; +#endif + + /* We have to distinguish two cases: the name is found or not. */ + if (cnt == ctype->charnames_act) + { + /* Extend the name array. */ + if (ctype->charnames_act == ctype->charnames_max) + { + ctype->charnames_max *= 2; + ctype->charnames = (uint32_t *) + xrealloc (ctype->charnames, + sizeof (uint32_t) * ctype->charnames_max); + } + ctype->charnames[ctype->charnames_act++] = idx; + idx_table_add (&ctype->charnames_idx, idx, cnt); + } + + if (table == NULL) + /* We have done everything we are asked to do. */ + return NULL; + + if (max == NULL) + /* The caller does not want to extend the table. */ + return (cnt >= *act ? NULL : &(*table)[cnt]); + + if (cnt >= *act) + { + if (cnt >= *max) + { + size_t old_max = *max; + do + *max *= 2; + while (*max <= cnt); + + *table = + (uint32_t *) xrealloc (*table, *max * sizeof (uint32_t)); + memset (&(*table)[old_max], '\0', + (*max - old_max) * sizeof (uint32_t)); + } + + *act = cnt + 1; + } + + return &(*table)[cnt]; +} + + +static int +get_character (struct token *now, const struct charmap_t *charmap, + struct repertoire_t *repertoire, + struct charseq **seqp, uint32_t *wchp) +{ + if (now->tok == tok_bsymbol) + { + /* This will hopefully be the normal case. */ + *wchp = repertoire_find_value (repertoire, now->val.str.startmb, + now->val.str.lenmb); + *seqp = charmap_find_value (charmap, now->val.str.startmb, + now->val.str.lenmb); + } + else if (now->tok == tok_ucs4) + { + char utmp[10]; + + snprintf (utmp, sizeof (utmp), "U%08X", now->val.ucs4); + *seqp = charmap_find_value (charmap, utmp, 9); + + if (*seqp == NULL) + *seqp = repertoire_find_seq (repertoire, now->val.ucs4); + + if (*seqp == NULL) + { + /* Compute the value in the charmap from the UCS value. */ + const char *symbol = repertoire_find_symbol (repertoire, + now->val.ucs4); + + if (symbol == NULL) + *seqp = NULL; + else + *seqp = charmap_find_value (charmap, symbol, strlen (symbol)); + + if (*seqp == NULL) + { + if (repertoire != NULL) + { + /* Insert a negative entry. */ + static const struct charseq negative + = { .ucs4 = ILLEGAL_CHAR_VALUE }; + uint32_t *newp = obstack_alloc (&repertoire->mem_pool, + sizeof (uint32_t)); + *newp = now->val.ucs4; + + insert_entry (&repertoire->seq_table, newp, + sizeof (uint32_t), (void *) &negative); + } + } + else + (*seqp)->ucs4 = now->val.ucs4; + } + else if ((*seqp)->ucs4 != now->val.ucs4) + *seqp = NULL; + + *wchp = now->val.ucs4; + } + else if (now->tok == tok_charcode) + { + /* We must map from the byte code to UCS4. */ + *seqp = charmap_find_symbol (charmap, now->val.str.startmb, + now->val.str.lenmb); + + if (*seqp == NULL) + *wchp = ILLEGAL_CHAR_VALUE; + else + { + if ((*seqp)->ucs4 == UNINITIALIZED_CHAR_VALUE) + (*seqp)->ucs4 = repertoire_find_value (repertoire, (*seqp)->name, + strlen ((*seqp)->name)); + *wchp = (*seqp)->ucs4; + } + } + else + return 1; + + return 0; +} + + +/* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>' and + the .(2). counterparts. */ +static void +charclass_symbolic_ellipsis (struct linereader *ldfile, + struct locale_ctype_t *ctype, + const struct charmap_t *charmap, + struct repertoire_t *repertoire, + struct token *now, + const char *last_str, + unsigned long int class256_bit, + unsigned long int class_bit, int base, + int ignore_content, int handle_digits, int step) +{ + const char *nowstr = now->val.str.startmb; + char tmp[now->val.str.lenmb + 1]; + const char *cp; + char *endp; + unsigned long int from; + unsigned long int to; + + /* We have to compute the ellipsis values using the symbolic names. */ + assert (last_str != NULL); + + if (strlen (last_str) != now->val.str.lenmb) + { + invalid_range: + lr_error (ldfile, + _("`%s' and `%.*s' are not valid names for symbolic range"), + last_str, (int) now->val.str.lenmb, nowstr); + return; + } + + if (memcmp (last_str, nowstr, now->val.str.lenmb) == 0) + /* Nothing to do, the names are the same. */ + return; + + for (cp = last_str; *cp == *(nowstr + (cp - last_str)); ++cp) + ; + + errno = 0; + from = strtoul (cp, &endp, base); + if ((from == UINT_MAX && errno == ERANGE) || *endp != '\0') + goto invalid_range; + + to = strtoul (nowstr + (cp - last_str), &endp, base); + if ((to == UINT_MAX && errno == ERANGE) + || (endp - nowstr) != now->val.str.lenmb || from >= to) + goto invalid_range; + + /* OK, we have a range FROM - TO. Now we can create the symbolic names. */ + if (!ignore_content) + { + now->val.str.startmb = tmp; + while ((from += step) <= to) + { + struct charseq *seq; + uint32_t wch; + + sprintf (tmp, (base == 10 ? "%.*s%0*ld" : "%.*s%0*lX"), + (int) (cp - last_str), last_str, + (int) (now->val.str.lenmb - (cp - last_str)), + from); + + get_character (now, charmap, repertoire, &seq, &wch); + + if (seq != NULL && seq->nbytes == 1) + /* Yep, we can store information about this byte sequence. */ + ctype->class256_collection[seq->bytes[0]] |= class256_bit; + + if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0) + /* We have the UCS4 position. */ + *find_idx (ctype, &ctype->class_collection, + &ctype->class_collection_max, + &ctype->class_collection_act, wch) |= class_bit; + + if (handle_digits == 1) + { + /* We must store the digit values. */ + if (ctype->mbdigits_act == ctype->mbdigits_max) + { + ctype->mbdigits_max *= 2; + ctype->mbdigits = xrealloc (ctype->mbdigits, + (ctype->mbdigits_max + * sizeof (char *))); + ctype->wcdigits_max *= 2; + ctype->wcdigits = xrealloc (ctype->wcdigits, + (ctype->wcdigits_max + * sizeof (uint32_t))); + } + + ctype->mbdigits[ctype->mbdigits_act++] = seq; + ctype->wcdigits[ctype->wcdigits_act++] = wch; + } + else if (handle_digits == 2) + { + /* We must store the digit values. */ + if (ctype->outdigits_act >= 10) + { + lr_error (ldfile, _("\ +%s: field `%s' does not contain exactly ten entries"), + "LC_CTYPE", "outdigit"); + return; + } + + ctype->mboutdigits[ctype->outdigits_act] = seq; + ctype->wcoutdigits[ctype->outdigits_act] = wch; + ++ctype->outdigits_act; + } + } + } +} + + +/* Ellipsis like in `<U1234>..<U2345>' or `<U1234>..(2)..<U2345>'. */ +static void +charclass_ucs4_ellipsis (struct linereader *ldfile, + struct locale_ctype_t *ctype, + const struct charmap_t *charmap, + struct repertoire_t *repertoire, + struct token *now, uint32_t last_wch, + unsigned long int class256_bit, + unsigned long int class_bit, int ignore_content, + int handle_digits, int step) +{ + if (last_wch > now->val.ucs4) + { + lr_error (ldfile, _("\ +to-value <U%0*X> of range is smaller than from-value <U%0*X>"), + (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, now->val.ucs4, + (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, last_wch); + return; + } + + if (!ignore_content) + while ((last_wch += step) <= now->val.ucs4) + { + /* We have to find out whether there is a byte sequence corresponding + to this UCS4 value. */ + struct charseq *seq; + char utmp[10]; + + snprintf (utmp, sizeof (utmp), "U%08X", last_wch); + seq = charmap_find_value (charmap, utmp, 9); + if (seq == NULL) + { + snprintf (utmp, sizeof (utmp), "U%04X", last_wch); + seq = charmap_find_value (charmap, utmp, 5); + } + + if (seq == NULL) + /* Try looking in the repertoire map. */ + seq = repertoire_find_seq (repertoire, last_wch); + + /* If this is the first time we look for this sequence create a new + entry. */ + if (seq == NULL) + { + static const struct charseq negative + = { .ucs4 = ILLEGAL_CHAR_VALUE }; + + /* Find the symbolic name for this UCS4 value. */ + if (repertoire != NULL) + { + const char *symbol = repertoire_find_symbol (repertoire, + last_wch); + uint32_t *newp = obstack_alloc (&repertoire->mem_pool, + sizeof (uint32_t)); + *newp = last_wch; + + if (symbol != NULL) + /* We have a name, now search the multibyte value. */ + seq = charmap_find_value (charmap, symbol, strlen (symbol)); + + if (seq == NULL) + /* We have to create a fake entry. */ + seq = (struct charseq *) &negative; + else + seq->ucs4 = last_wch; + + insert_entry (&repertoire->seq_table, newp, sizeof (uint32_t), + seq); + } + else + /* We have to create a fake entry. */ + seq = (struct charseq *) &negative; + } + + /* We have a name, now search the multibyte value. */ + if (seq->ucs4 == last_wch && seq->nbytes == 1) + /* Yep, we can store information about this byte sequence. */ + ctype->class256_collection[(size_t) seq->bytes[0]] + |= class256_bit; + + /* And of course we have the UCS4 position. */ + if (class_bit != 0) + *find_idx (ctype, &ctype->class_collection, + &ctype->class_collection_max, + &ctype->class_collection_act, last_wch) |= class_bit; + + if (handle_digits == 1) + { + /* We must store the digit values. */ + if (ctype->mbdigits_act == ctype->mbdigits_max) + { + ctype->mbdigits_max *= 2; + ctype->mbdigits = xrealloc (ctype->mbdigits, + (ctype->mbdigits_max + * sizeof (char *))); + ctype->wcdigits_max *= 2; + ctype->wcdigits = xrealloc (ctype->wcdigits, + (ctype->wcdigits_max + * sizeof (uint32_t))); + } + + ctype->mbdigits[ctype->mbdigits_act++] = (seq->ucs4 == last_wch + ? seq : NULL); + ctype->wcdigits[ctype->wcdigits_act++] = last_wch; + } + else if (handle_digits == 2) + { + /* We must store the digit values. */ + if (ctype->outdigits_act >= 10) + { + lr_error (ldfile, _("\ +%s: field `%s' does not contain exactly ten entries"), + "LC_CTYPE", "outdigit"); + return; + } + + ctype->mboutdigits[ctype->outdigits_act] = (seq->ucs4 == last_wch + ? seq : NULL); + ctype->wcoutdigits[ctype->outdigits_act] = last_wch; + ++ctype->outdigits_act; + } + } +} + + +/* Ellipsis as in `/xea/x12.../xea/x34'. */ +static void +charclass_charcode_ellipsis (struct linereader *ldfile, + struct locale_ctype_t *ctype, + const struct charmap_t *charmap, + struct repertoire_t *repertoire, + struct token *now, char *last_charcode, + uint32_t last_charcode_len, + unsigned long int class256_bit, + unsigned long int class_bit, int ignore_content, + int handle_digits) +{ + /* First check whether the to-value is larger. */ + if (now->val.charcode.nbytes != last_charcode_len) + { + lr_error (ldfile, _("\ +start and end character sequence of range must have the same length")); + return; + } + + if (memcmp (last_charcode, now->val.charcode.bytes, last_charcode_len) > 0) + { + lr_error (ldfile, _("\ +to-value character sequence is smaller than from-value sequence")); + return; + } + + if (!ignore_content) + { + do + { + /* Increment the byte sequence value. */ + struct charseq *seq; + uint32_t wch; + int i; + + for (i = last_charcode_len - 1; i >= 0; --i) + if (++last_charcode[i] != 0) + break; + + if (last_charcode_len == 1) + /* Of course we have the charcode value. */ + ctype->class256_collection[(size_t) last_charcode[0]] + |= class256_bit; + + /* Find the symbolic name. */ + seq = charmap_find_symbol (charmap, last_charcode, + last_charcode_len); + if (seq != NULL) + { + if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE) + seq->ucs4 = repertoire_find_value (repertoire, seq->name, + strlen (seq->name)); + wch = seq == NULL ? ILLEGAL_CHAR_VALUE : seq->ucs4; + + if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0) + *find_idx (ctype, &ctype->class_collection, + &ctype->class_collection_max, + &ctype->class_collection_act, wch) |= class_bit; + } + else + wch = ILLEGAL_CHAR_VALUE; + + if (handle_digits == 1) + { + /* We must store the digit values. */ + if (ctype->mbdigits_act == ctype->mbdigits_max) + { + ctype->mbdigits_max *= 2; + ctype->mbdigits = xrealloc (ctype->mbdigits, + (ctype->mbdigits_max + * sizeof (char *))); + ctype->wcdigits_max *= 2; + ctype->wcdigits = xrealloc (ctype->wcdigits, + (ctype->wcdigits_max + * sizeof (uint32_t))); + } + + seq = xmalloc (sizeof (struct charseq) + last_charcode_len); + memcpy ((char *) (seq + 1), last_charcode, last_charcode_len); + seq->nbytes = last_charcode_len; + + ctype->mbdigits[ctype->mbdigits_act++] = seq; + ctype->wcdigits[ctype->wcdigits_act++] = wch; + } + else if (handle_digits == 2) + { + struct charseq *seq; + /* We must store the digit values. */ + if (ctype->outdigits_act >= 10) + { + lr_error (ldfile, _("\ +%s: field `%s' does not contain exactly ten entries"), + "LC_CTYPE", "outdigit"); + return; + } + + seq = xmalloc (sizeof (struct charseq) + last_charcode_len); + memcpy ((char *) (seq + 1), last_charcode, last_charcode_len); + seq->nbytes = last_charcode_len; + + ctype->mboutdigits[ctype->outdigits_act] = seq; + ctype->wcoutdigits[ctype->outdigits_act] = wch; + ++ctype->outdigits_act; + } + } + while (memcmp (last_charcode, now->val.charcode.bytes, + last_charcode_len) != 0); + } +} + + +static uint32_t * +find_translit2 (struct locale_ctype_t *ctype, const struct charmap_t *charmap, + uint32_t wch) +{ + struct translit_t *trunp = ctype->translit; + struct translit_ignore_t *tirunp = ctype->translit_ignore; + + while (trunp != NULL) + { + /* XXX We simplify things here. The transliterations we look + for are only allowed to have one character. */ + if (trunp->from[0] == wch && trunp->from[1] == 0) + { + /* Found it. Now look for a transliteration which can be + represented with the character set. */ + struct translit_to_t *torunp = trunp->to; + + while (torunp != NULL) + { + int i; + + for (i = 0; torunp->str[i] != 0; ++i) + { + char utmp[10]; + + snprintf (utmp, sizeof (utmp), "U%08X", torunp->str[i]); + if (charmap_find_value (charmap, utmp, 9) == NULL) + /* This character cannot be represented. */ + break; + } + + if (torunp->str[i] == 0) + return torunp->str; + + torunp = torunp->next; + } + + break; + } + + trunp = trunp->next; + } + + /* Check for ignored chars. */ + while (tirunp != NULL) + { + if (tirunp->from <= wch && tirunp->to >= wch) + { + uint32_t wi; + + for (wi = tirunp->from; wi <= wch; wi += tirunp->step) + if (wi == wch) + return no_str; + } + } + + /* Nothing found. */ + return NULL; +} + + +uint32_t * +find_translit (struct localedef_t *locale, const struct charmap_t *charmap, + uint32_t wch) +{ + struct locale_ctype_t *ctype; + uint32_t *result = NULL; + + assert (locale != NULL); + ctype = locale->categories[LC_CTYPE].ctype; + + if (ctype == NULL) + return NULL; + + if (ctype->translit != NULL) + result = find_translit2 (ctype, charmap, wch); + + if (result == NULL) + { + struct translit_include_t *irunp = ctype->translit_include; + + while (irunp != NULL && result == NULL) + { + result = find_translit (find_locale (CTYPE_LOCALE, + irunp->copy_locale, + irunp->copy_repertoire, + charmap), + charmap, wch); + irunp = irunp->next; + } + } + + return result; +} + + +/* Read one transliteration entry. */ +static uint32_t * +read_widestring (struct linereader *ldfile, struct token *now, + const struct charmap_t *charmap, + struct repertoire_t *repertoire) +{ + uint32_t *wstr; + + if (now->tok == tok_default_missing) + /* The special name "" will denote this case. */ + wstr = no_str; + else if (now->tok == tok_bsymbol) + { + /* Get the value from the repertoire. */ + wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t)); + wstr[0] = repertoire_find_value (repertoire, now->val.str.startmb, + now->val.str.lenmb); + if (wstr[0] == ILLEGAL_CHAR_VALUE) + { + /* We cannot proceed, we don't know the UCS4 value. */ + free (wstr); + return NULL; + } + + wstr[1] = 0; + } + else if (now->tok == tok_ucs4) + { + wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t)); + wstr[0] = now->val.ucs4; + wstr[1] = 0; + } + else if (now->tok == tok_charcode) + { + /* Argh, we have to convert to the symbol name first and then to the + UCS4 value. */ + struct charseq *seq = charmap_find_symbol (charmap, + now->val.str.startmb, + now->val.str.lenmb); + if (seq == NULL) + /* Cannot find the UCS4 value. */ + return NULL; + + if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE) + seq->ucs4 = repertoire_find_value (repertoire, seq->name, + strlen (seq->name)); + if (seq->ucs4 == ILLEGAL_CHAR_VALUE) + /* We cannot proceed, we don't know the UCS4 value. */ + return NULL; + + wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t)); + wstr[0] = seq->ucs4; + wstr[1] = 0; + } + else if (now->tok == tok_string) + { + wstr = now->val.str.startwc; + if (wstr == NULL || wstr[0] == 0) + return NULL; + } + else + { + if (now->tok != tok_eol && now->tok != tok_eof) + lr_ignore_rest (ldfile, 0); + SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE"); + return (uint32_t *) -1l; + } + + return wstr; +} + + +static void +read_translit_entry (struct linereader *ldfile, struct locale_ctype_t *ctype, + struct token *now, const struct charmap_t *charmap, + struct repertoire_t *repertoire) +{ + uint32_t *from_wstr = read_widestring (ldfile, now, charmap, repertoire); + struct translit_t *result; + struct translit_to_t **top; + struct obstack *ob = &ctype->mempool; + int first; + int ignore; + + if (from_wstr == NULL) + /* There is no valid from string. */ + return; + + result = (struct translit_t *) obstack_alloc (ob, + sizeof (struct translit_t)); + result->from = from_wstr; + result->fname = ldfile->fname; + result->lineno = ldfile->lineno; + result->next = NULL; + result->to = NULL; + top = &result->to; + first = 1; + ignore = 0; + + while (1) + { + uint32_t *to_wstr; + + /* Next we have one or more transliterations. They are + separated by semicolons. */ + now = lr_token (ldfile, charmap, NULL, repertoire, verbose); + + if (!first && (now->tok == tok_semicolon || now->tok == tok_eol)) + { + /* One string read. */ + const uint32_t zero = 0; + + if (!ignore) + { + obstack_grow (ob, &zero, 4); + to_wstr = obstack_finish (ob); + + *top = obstack_alloc (ob, sizeof (struct translit_to_t)); + (*top)->str = to_wstr; + (*top)->next = NULL; + } + + if (now->tok == tok_eol) + { + result->next = ctype->translit; + ctype->translit = result; + return; + } + + if (!ignore) + top = &(*top)->next; + ignore = 0; + } + else + { + to_wstr = read_widestring (ldfile, now, charmap, repertoire); + if (to_wstr == (uint32_t *) -1l) + { + /* An error occurred. */ + obstack_free (ob, result); + return; + } + + if (to_wstr == NULL) + ignore = 1; + else + /* This value is usable. */ + obstack_grow (ob, to_wstr, wcslen ((wchar_t *) to_wstr) * 4); + + first = 0; + } + } +} + + +static void +read_translit_ignore_entry (struct linereader *ldfile, + struct locale_ctype_t *ctype, + const struct charmap_t *charmap, + struct repertoire_t *repertoire) +{ + /* We expect a semicolon-separated list of characters we ignore. We are + only interested in the wide character definitions. These must be + single characters, possibly defining a range when an ellipsis is used. */ + while (1) + { + struct token *now = lr_token (ldfile, charmap, NULL, repertoire, + verbose); + struct translit_ignore_t *newp; + uint32_t from; + + if (now->tok == tok_eol || now->tok == tok_eof) + { + lr_error (ldfile, + _("premature end of `translit_ignore' definition")); + return; + } + + if (now->tok != tok_bsymbol && now->tok != tok_ucs4) + { + lr_error (ldfile, _("syntax error")); + lr_ignore_rest (ldfile, 0); + return; + } + + if (now->tok == tok_ucs4) + from = now->val.ucs4; + else + /* Try to get the value. */ + from = repertoire_find_value (repertoire, now->val.str.startmb, + now->val.str.lenmb); + + if (from == ILLEGAL_CHAR_VALUE) + { + lr_error (ldfile, "invalid character name"); + newp = NULL; + } + else + { + newp = (struct translit_ignore_t *) + obstack_alloc (&ctype->mempool, sizeof (struct translit_ignore_t)); + newp->from = from; + newp->to = from; + newp->step = 1; + + newp->next = ctype->translit_ignore; + ctype->translit_ignore = newp; + } + + /* Now we expect either a semicolon, an ellipsis, or the end of the + line. */ + now = lr_token (ldfile, charmap, NULL, repertoire, verbose); + + if (now->tok == tok_ellipsis2 || now->tok == tok_ellipsis2_2) + { + /* XXX Should we bother implementing `....'? `...' certainly + will not be implemented. */ + uint32_t to; + int step = now->tok == tok_ellipsis2_2 ? 2 : 1; + + now = lr_token (ldfile, charmap, NULL, repertoire, verbose); + + if (now->tok == tok_eol || now->tok == tok_eof) + { + lr_error (ldfile, + _("premature end of `translit_ignore' definition")); + return; + } + + if (now->tok != tok_bsymbol && now->tok != tok_ucs4) + { + lr_error (ldfile, _("syntax error")); + lr_ignore_rest (ldfile, 0); + return; + } + + if (now->tok == tok_ucs4) + to = now->val.ucs4; + else + /* Try to get the value. */ + to = repertoire_find_value (repertoire, now->val.str.startmb, + now->val.str.lenmb); + + if (to == ILLEGAL_CHAR_VALUE) + lr_error (ldfile, "invalid character name"); + else + { + /* Make sure the `to'-value is larger. */ + if (to >= from) + { + newp->to = to; + newp->step = step; + } + else + lr_error (ldfile, _("\ +to-value <U%0*X> of range is smaller than from-value <U%0*X>"), + (to | from) < 65536 ? 4 : 8, to, + (to | from) < 65536 ? 4 : 8, from); + } + + /* And the next token. */ + now = lr_token (ldfile, charmap, NULL, repertoire, verbose); + } + + if (now->tok == tok_eol || now->tok == tok_eof) + /* We are done. */ + return; + + if (now->tok == tok_semicolon) + /* Next round. */ + continue; + + /* If we come here something is wrong. */ + lr_error (ldfile, _("syntax error")); + lr_ignore_rest (ldfile, 0); + return; + } +} + + +/* The parser for the LC_CTYPE section of the locale definition. */ +void +ctype_read (struct linereader *ldfile, struct localedef_t *result, + const struct charmap_t *charmap, const char *repertoire_name, + int ignore_content) +{ + struct repertoire_t *repertoire = NULL; + struct locale_ctype_t *ctype; + struct token *now; + enum token_t nowtok; + size_t cnt; + uint32_t last_wch = 0; + enum token_t last_token; + enum token_t ellipsis_token; + int step; + char last_charcode[16]; + size_t last_charcode_len = 0; + const char *last_str = NULL; + int mapidx; + struct localedef_t *copy_locale = NULL; + + /* Get the repertoire we have to use. */ + if (repertoire_name != NULL) + repertoire = repertoire_read (repertoire_name); + + /* The rest of the line containing `LC_CTYPE' must be free. */ + lr_ignore_rest (ldfile, 1); + + + do + { + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + nowtok = now->tok; + } + while (nowtok == tok_eol); + + /* If we see `copy' now we are almost done. */ + if (nowtok == tok_copy) + { + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + if (now->tok != tok_string) + { + SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE"); + + skip_category: + do + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + while (now->tok != tok_eof && now->tok != tok_end); + + if (now->tok != tok_eof + || (now = lr_token (ldfile, charmap, NULL, NULL, verbose), + now->tok == tok_eof)) + lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE"); + else if (now->tok != tok_lc_ctype) + { + lr_error (ldfile, _("\ +%1$s: definition does not end with `END %1$s'"), "LC_CTYPE"); + lr_ignore_rest (ldfile, 0); + } + else + lr_ignore_rest (ldfile, 1); + + return; + } + + if (! ignore_content) + { + /* Get the locale definition. */ + copy_locale = load_locale (LC_CTYPE, now->val.str.startmb, + repertoire_name, charmap, NULL); + if ((copy_locale->avail & CTYPE_LOCALE) == 0) + { + /* Not yet loaded. So do it now. */ + if (locfile_read (copy_locale, charmap) != 0) + goto skip_category; + } + + if (copy_locale->categories[LC_CTYPE].ctype == NULL) + return; + } + + lr_ignore_rest (ldfile, 1); + + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + nowtok = now->tok; + } + + /* Prepare the data structures. */ + ctype_startup (ldfile, result, charmap, copy_locale, ignore_content); + ctype = result->categories[LC_CTYPE].ctype; + + /* Remember the repertoire we use. */ + if (!ignore_content) + ctype->repertoire = repertoire; + + while (1) + { + unsigned long int class_bit = 0; + unsigned long int class256_bit = 0; + int handle_digits = 0; + + /* Of course we don't proceed beyond the end of file. */ + if (nowtok == tok_eof) + break; + + /* Ingore empty lines. */ + if (nowtok == tok_eol) + { + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + nowtok = now->tok; + continue; + } + + switch (nowtok) + { + case tok_charclass: + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + while (now->tok == tok_ident || now->tok == tok_string) + { + ctype_class_new (ldfile, ctype, now->val.str.startmb); + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + if (now->tok != tok_semicolon) + break; + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + } + if (now->tok != tok_eol) + SYNTAX_ERROR (_("\ +%s: syntax error in definition of new character class"), "LC_CTYPE"); + break; + + case tok_charconv: + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + while (now->tok == tok_ident || now->tok == tok_string) + { + ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap); + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + if (now->tok != tok_semicolon) + break; + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + } + if (now->tok != tok_eol) + SYNTAX_ERROR (_("\ +%s: syntax error in definition of new character map"), "LC_CTYPE"); + break; + + case tok_class: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + /* We simply forget the `class' keyword and use the following + operand to determine the bit. */ + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + if (now->tok == tok_ident || now->tok == tok_string) + { + /* Must can be one of the predefined class names. */ + for (cnt = 0; cnt < ctype->nr_charclass; ++cnt) + if (strcmp (ctype->classnames[cnt], now->val.str.startmb) == 0) + break; + if (cnt >= ctype->nr_charclass) + { + /* OK, it's a new class. */ + ctype_class_new (ldfile, ctype, now->val.str.startmb); + + class_bit = _ISwbit (ctype->nr_charclass - 1); + } + else + { + class_bit = _ISwbit (cnt); + + free (now->val.str.startmb); + } + } + else if (now->tok == tok_digit) + goto handle_tok_digit; + else if (now->tok < tok_upper || now->tok > tok_blank) + goto err_label; + else + { + class_bit = BITw (now->tok); + class256_bit = BIT (now->tok); + } + + /* The next character must be a semicolon. */ + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + if (now->tok != tok_semicolon) + goto err_label; + goto read_charclass; + + case tok_upper: + case tok_lower: + case tok_alpha: + case tok_alnum: + case tok_space: + case tok_cntrl: + case tok_punct: + case tok_graph: + case tok_print: + case tok_xdigit: + case tok_blank: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + class_bit = BITw (now->tok); + class256_bit = BIT (now->tok); + handle_digits = 0; + read_charclass: + ctype->class_done |= class_bit; + last_token = tok_none; + ellipsis_token = tok_none; + step = 1; + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + while (now->tok != tok_eol && now->tok != tok_eof) + { + uint32_t wch; + struct charseq *seq; + + if (ellipsis_token == tok_none) + { + if (get_character (now, charmap, repertoire, &seq, &wch)) + goto err_label; + + if (!ignore_content && seq != NULL && seq->nbytes == 1) + /* Yep, we can store information about this byte + sequence. */ + ctype->class256_collection[seq->bytes[0]] |= class256_bit; + + if (!ignore_content && wch != ILLEGAL_CHAR_VALUE + && class_bit != 0) + /* We have the UCS4 position. */ + *find_idx (ctype, &ctype->class_collection, + &ctype->class_collection_max, + &ctype->class_collection_act, wch) |= class_bit; + + last_token = now->tok; + /* Terminate the string. */ + if (last_token == tok_bsymbol) + { + now->val.str.startmb[now->val.str.lenmb] = '\0'; + last_str = now->val.str.startmb; + } + else + last_str = NULL; + last_wch = wch; + memcpy (last_charcode, now->val.charcode.bytes, 16); + last_charcode_len = now->val.charcode.nbytes; + + if (!ignore_content && handle_digits == 1) + { + /* We must store the digit values. */ + if (ctype->mbdigits_act == ctype->mbdigits_max) + { + ctype->mbdigits_max += 10; + ctype->mbdigits = xrealloc (ctype->mbdigits, + (ctype->mbdigits_max + * sizeof (char *))); + ctype->wcdigits_max += 10; + ctype->wcdigits = xrealloc (ctype->wcdigits, + (ctype->wcdigits_max + * sizeof (uint32_t))); + } + + ctype->mbdigits[ctype->mbdigits_act++] = seq; + ctype->wcdigits[ctype->wcdigits_act++] = wch; + } + else if (!ignore_content && handle_digits == 2) + { + /* We must store the digit values. */ + if (ctype->outdigits_act >= 10) + { + lr_error (ldfile, _("\ +%s: field `%s' does not contain exactly ten entries"), + "LC_CTYPE", "outdigit"); + lr_ignore_rest (ldfile, 0); + break; + } + + ctype->mboutdigits[ctype->outdigits_act] = seq; + ctype->wcoutdigits[ctype->outdigits_act] = wch; + ++ctype->outdigits_act; + } + } + else + { + /* Now it gets complicated. We have to resolve the + ellipsis problem. First we must distinguish between + the different kind of ellipsis and this must match the + tokens we have seen. */ + assert (last_token != tok_none); + + if (last_token != now->tok) + { + lr_error (ldfile, _("\ +ellipsis range must be marked by two operands of same type")); + lr_ignore_rest (ldfile, 0); + break; + } + + if (last_token == tok_bsymbol) + { + if (ellipsis_token == tok_ellipsis3) + lr_error (ldfile, _("with symbolic name range values \ +the absolute ellipsis `...' must not be used")); + + charclass_symbolic_ellipsis (ldfile, ctype, charmap, + repertoire, now, last_str, + class256_bit, class_bit, + (ellipsis_token + == tok_ellipsis4 + ? 10 : 16), + ignore_content, + handle_digits, step); + } + else if (last_token == tok_ucs4) + { + if (ellipsis_token != tok_ellipsis2) + lr_error (ldfile, _("\ +with UCS range values one must use the hexadecimal symbolic ellipsis `..'")); + + charclass_ucs4_ellipsis (ldfile, ctype, charmap, + repertoire, now, last_wch, + class256_bit, class_bit, + ignore_content, handle_digits, + step); + } + else + { + assert (last_token == tok_charcode); + + if (ellipsis_token != tok_ellipsis3) + lr_error (ldfile, _("\ +with character code range values one must use the absolute ellipsis `...'")); + + charclass_charcode_ellipsis (ldfile, ctype, charmap, + repertoire, now, + last_charcode, + last_charcode_len, + class256_bit, class_bit, + ignore_content, + handle_digits); + } + + /* Now we have used the last value. */ + last_token = tok_none; + } + + /* Next we expect a semicolon or the end of the line. */ + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + if (now->tok == tok_eol || now->tok == tok_eof) + break; + + if (last_token != tok_none + && now->tok >= tok_ellipsis2 && now->tok <= tok_ellipsis4_2) + { + if (now->tok == tok_ellipsis2_2) + { + now->tok = tok_ellipsis2; + step = 2; + } + else if (now->tok == tok_ellipsis4_2) + { + now->tok = tok_ellipsis4; + step = 2; + } + + ellipsis_token = now->tok; + + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + continue; + } + + if (now->tok != tok_semicolon) + goto err_label; + + /* And get the next character. */ + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + + ellipsis_token = tok_none; + step = 1; + } + break; + + case tok_digit: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + handle_tok_digit: + class_bit = _ISwdigit; + class256_bit = _ISdigit; + handle_digits = 1; + goto read_charclass; + + case tok_outdigit: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + if (ctype->outdigits_act != 0) + lr_error (ldfile, _("\ +%s: field `%s' declared more than once"), + "LC_CTYPE", "outdigit"); + class_bit = 0; + class256_bit = 0; + handle_digits = 2; + goto read_charclass; + + case tok_toupper: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + mapidx = 0; + goto read_mapping; + + case tok_tolower: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + mapidx = 1; + goto read_mapping; + + case tok_map: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + /* We simply forget the `map' keyword and use the following + operand to determine the mapping. */ + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + if (now->tok == tok_ident || now->tok == tok_string) + { + size_t cnt; + + for (cnt = 2; cnt < ctype->map_collection_nr; ++cnt) + if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0) + break; + + if (cnt < ctype->map_collection_nr) + free (now->val.str.startmb); + else + /* OK, it's a new map. */ + ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap); + + mapidx = cnt; + } + else if (now->tok < tok_toupper || now->tok > tok_tolower) + goto err_label; + else + mapidx = now->tok - tok_toupper; + + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + /* This better should be a semicolon. */ + if (now->tok != tok_semicolon) + goto err_label; + + read_mapping: + /* Test whether this mapping was already defined. */ + if (ctype->tomap_done[mapidx]) + { + lr_error (ldfile, _("duplicated definition for mapping `%s'"), + ctype->mapnames[mapidx]); + lr_ignore_rest (ldfile, 0); + break; + } + ctype->tomap_done[mapidx] = 1; + + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + while (now->tok != tok_eol && now->tok != tok_eof) + { + struct charseq *from_seq; + uint32_t from_wch; + struct charseq *to_seq; + uint32_t to_wch; + + /* Every pair starts with an opening brace. */ + if (now->tok != tok_open_brace) + goto err_label; + + /* Next comes the from-value. */ + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + if (get_character (now, charmap, repertoire, &from_seq, + &from_wch) != 0) + goto err_label; + + /* The next is a comma. */ + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + if (now->tok != tok_comma) + goto err_label; + + /* And the other value. */ + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + if (get_character (now, charmap, repertoire, &to_seq, + &to_wch) != 0) + goto err_label; + + /* And the last thing is the closing brace. */ + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + if (now->tok != tok_close_brace) + goto err_label; + + if (!ignore_content) + { + /* Check whether the mapping converts from an ASCII value + to a non-ASCII value. */ + if (from_seq != NULL && from_seq->nbytes == 1 + && isascii (from_seq->bytes[0]) + && to_seq != NULL && (to_seq->nbytes != 1 + || !isascii (to_seq->bytes[0]))) + ctype->to_nonascii = 1; + + if (mapidx < 2 && from_seq != NULL && to_seq != NULL + && from_seq->nbytes == 1 && to_seq->nbytes == 1) + /* We can use this value. */ + ctype->map256_collection[mapidx][from_seq->bytes[0]] + = to_seq->bytes[0]; + + if (from_wch != ILLEGAL_CHAR_VALUE + && to_wch != ILLEGAL_CHAR_VALUE) + /* Both correct values. */ + *find_idx (ctype, &ctype->map_collection[mapidx], + &ctype->map_collection_max[mapidx], + &ctype->map_collection_act[mapidx], + from_wch) = to_wch; + } + + /* Now comes a semicolon or the end of the line/file. */ + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + if (now->tok == tok_semicolon) + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + } + break; + + case tok_translit_start: + /* Ignore the entire translit section with its peculiar syntax + if we don't need the input. */ + if (ignore_content) + { + do + { + lr_ignore_rest (ldfile, 0); + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + } + while (now->tok != tok_translit_end && now->tok != tok_eof); + + if (now->tok == tok_eof) + lr_error (ldfile, _(\ +"%s: `translit_start' section does not end with `translit_end'"), + "LC_CTYPE"); + + break; + } + + /* The rest of the line better should be empty. */ + lr_ignore_rest (ldfile, 1); + + /* We count here the number of allocated entries in the `translit' + array. */ + cnt = 0; + + ldfile->translate_strings = 1; + ldfile->return_widestr = 1; + + /* We proceed until we see the `translit_end' token. */ + while (now = lr_token (ldfile, charmap, NULL, repertoire, verbose), + now->tok != tok_translit_end && now->tok != tok_eof) + { + if (now->tok == tok_eol) + /* Ignore empty lines. */ + continue; + + if (now->tok == tok_include) + { + /* We have to include locale. */ + const char *locale_name; + const char *repertoire_name; + struct translit_include_t *include_stmt, **include_ptr; + + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + /* This should be a string or an identifier. In any + case something to name a locale. */ + if (now->tok != tok_string && now->tok != tok_ident) + { + translit_syntax: + lr_error (ldfile, _("%s: syntax error"), "LC_CTYPE"); + lr_ignore_rest (ldfile, 0); + continue; + } + locale_name = now->val.str.startmb; + + /* Next should be a semicolon. */ + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + if (now->tok != tok_semicolon) + goto translit_syntax; + + /* Now the repertoire name. */ + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + if ((now->tok != tok_string && now->tok != tok_ident) + || now->val.str.startmb == NULL) + goto translit_syntax; + repertoire_name = now->val.str.startmb; + if (repertoire_name[0] == '\0') + /* Ignore the empty string. */ + repertoire_name = NULL; + + /* Save the include statement for later processing. */ + include_stmt = (struct translit_include_t *) + xmalloc (sizeof (struct translit_include_t)); + include_stmt->copy_locale = locale_name; + include_stmt->copy_repertoire = repertoire_name; + include_stmt->next = NULL; + + include_ptr = &ctype->translit_include; + while (*include_ptr != NULL) + include_ptr = &(*include_ptr)->next; + *include_ptr = include_stmt; + + /* The rest of the line must be empty. */ + lr_ignore_rest (ldfile, 1); + + /* Make sure the locale is read. */ + add_to_readlist (LC_CTYPE, locale_name, repertoire_name, + 1, NULL); + continue; + } + else if (now->tok == tok_default_missing) + { + uint32_t *wstr; + + while (1) + { + /* We expect a single character or string as the + argument. */ + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + wstr = read_widestring (ldfile, now, charmap, + repertoire); + + if (wstr != NULL) + { + if (ctype->default_missing != NULL) + { + lr_error (ldfile, _("\ +%s: duplicate `default_missing' definition"), "LC_CTYPE"); + WITH_CUR_LOCALE (error_at_line (0, 0, + ctype->default_missing_file, + ctype->default_missing_lineno, + _("\ +previous definition was here"))); + } + else + { + ctype->default_missing = wstr; + ctype->default_missing_file = ldfile->fname; + ctype->default_missing_lineno = ldfile->lineno; + } + /* We can have more entries, ignore them. */ + lr_ignore_rest (ldfile, 0); + break; + } + else if (wstr == (uint32_t *) -1l) + /* This was an syntax error. */ + break; + + /* Maybe there is another replacement we can use. */ + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + if (now->tok == tok_eol || now->tok == tok_eof) + { + /* Nothing found. We tell the user. */ + lr_error (ldfile, _("\ +%s: no representable `default_missing' definition found"), "LC_CTYPE"); + break; + } + if (now->tok != tok_semicolon) + goto translit_syntax; + } + + continue; + } + else if (now->tok == tok_translit_ignore) + { + read_translit_ignore_entry (ldfile, ctype, charmap, + repertoire); + continue; + } + + read_translit_entry (ldfile, ctype, now, charmap, repertoire); + } + ldfile->return_widestr = 0; + + if (now->tok == tok_eof) + lr_error (ldfile, _(\ +"%s: `translit_start' section does not end with `translit_end'"), + "LC_CTYPE"); + + break; + + case tok_ident: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + /* This could mean one of several things. First test whether + it's a character class name. */ + for (cnt = 0; cnt < ctype->nr_charclass; ++cnt) + if (strcmp (now->val.str.startmb, ctype->classnames[cnt]) == 0) + break; + if (cnt < ctype->nr_charclass) + { + class_bit = _ISwbit (cnt); + class256_bit = cnt <= 11 ? _ISbit (cnt) : 0; + free (now->val.str.startmb); + goto read_charclass; + } + for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt) + if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0) + break; + if (cnt < ctype->map_collection_nr) + { + mapidx = cnt; + free (now->val.str.startmb); + goto read_mapping; + } + break; + + case tok_end: + /* Next we assume `LC_CTYPE'. */ + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + if (now->tok == tok_eof) + break; + if (now->tok == tok_eol) + lr_error (ldfile, _("%s: incomplete `END' line"), + "LC_CTYPE"); + else if (now->tok != tok_lc_ctype) + lr_error (ldfile, _("\ +%1$s: definition does not end with `END %1$s'"), "LC_CTYPE"); + lr_ignore_rest (ldfile, now->tok == tok_lc_ctype); + return; + + default: + err_label: + if (now->tok != tok_eof) + SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE"); + } + + /* Prepare for the next round. */ + now = lr_token (ldfile, charmap, NULL, NULL, verbose); + nowtok = now->tok; + } + + /* When we come here we reached the end of the file. */ + lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE"); +} + + +/* Subroutine of set_class_defaults, below. */ +static void +set_one_default (struct locale_ctype_t *ctype, + const struct charmap_t *charmap, + int bitpos, int from, int to) +{ + char tmp[2]; + int ch; + int bit = _ISbit (bitpos); + int bitw = _ISwbit (bitpos); + /* Define string. */ + strcpy (tmp, "?"); + + for (ch = from; ch <= to; ++ch) + { + struct charseq *seq; + tmp[0] = ch; + + seq = charmap_find_value (charmap, tmp, 1); + if (seq == NULL) + { + char buf[10]; + sprintf (buf, "U%08X", ch); + seq = charmap_find_value (charmap, buf, 9); + } + if (seq == NULL) + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", tmp)); + } + else if (seq->nbytes != 1) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: character `%s' in charmap not representable with one byte"), + "LC_CTYPE", tmp)); + else + ctype->class256_collection[seq->bytes[0]] |= bit; + + /* No need to search here, the ASCII value is also the Unicode + value. */ + ELEM (ctype, class_collection, , ch) |= bitw; + } +} + +static void +set_class_defaults (struct locale_ctype_t *ctype, + const struct charmap_t *charmap, + struct repertoire_t *repertoire) +{ +#define set_default(bitpos, from, to) \ + set_one_default (ctype, charmap, bitpos, from, to) + + /* These function defines the default values for the classes and conversions + according to POSIX.2 2.5.2.1. + It may seem that the order of these if-blocks is arbitrary but it is NOT. + Don't move them unless you know what you do! */ + + /* Set default values if keyword was not present. */ + if ((ctype->class_done & BITw (tok_upper)) == 0) + /* "If this keyword [lower] is not specified, the lowercase letters + `A' through `Z', ..., shall automatically belong to this class, + with implementation defined character values." [P1003.2, 2.5.2.1] */ + set_default (BITPOS (tok_upper), 'A', 'Z'); + + if ((ctype->class_done & BITw (tok_lower)) == 0) + /* "If this keyword [lower] is not specified, the lowercase letters + `a' through `z', ..., shall automatically belong to this class, + with implementation defined character values." [P1003.2, 2.5.2.1] */ + set_default (BITPOS (tok_lower), 'a', 'z'); + + if ((ctype->class_done & BITw (tok_alpha)) == 0) + { + /* Table 2-6 in P1003.2 says that characters in class `upper' or + class `lower' *must* be in class `alpha'. */ + unsigned long int mask = BIT (tok_upper) | BIT (tok_lower); + unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower); + + for (size_t cnt = 0; cnt < 256; ++cnt) + if ((ctype->class256_collection[cnt] & mask) != 0) + ctype->class256_collection[cnt] |= BIT (tok_alpha); + + for (size_t cnt = 0; cnt < ctype->class_collection_act; ++cnt) + if ((ctype->class_collection[cnt] & maskw) != 0) + ctype->class_collection[cnt] |= BITw (tok_alpha); + } + + if ((ctype->class_done & BITw (tok_digit)) == 0) + /* "If this keyword [digit] is not specified, the digits `0' through + `9', ..., shall automatically belong to this class, with + implementation-defined character values." [P1003.2, 2.5.2.1] */ + set_default (BITPOS (tok_digit), '0', '9'); + + /* "Only characters specified for the `alpha' and `digit' keyword + shall be specified. Characters specified for the keyword `alpha' + and `digit' are automatically included in this class. */ + { + unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit); + unsigned long int maskw = BITw (tok_alpha) | BITw (tok_digit); + + for (size_t cnt = 0; cnt < 256; ++cnt) + if ((ctype->class256_collection[cnt] & mask) != 0) + ctype->class256_collection[cnt] |= BIT (tok_alnum); + + for (size_t cnt = 0; cnt < ctype->class_collection_act; ++cnt) + if ((ctype->class_collection[cnt] & maskw) != 0) + ctype->class_collection[cnt] |= BITw (tok_alnum); + } + + if ((ctype->class_done & BITw (tok_space)) == 0) + /* "If this keyword [space] is not specified, the characters <space>, + <form-feed>, <newline>, <carriage-return>, <tab>, and + <vertical-tab>, ..., shall automatically belong to this class, + with implementation-defined character values." [P1003.2, 2.5.2.1] */ + { + struct charseq *seq; + + seq = charmap_find_value (charmap, "space", 5); + if (seq == NULL) + seq = charmap_find_value (charmap, "SP", 2); + if (seq == NULL) + seq = charmap_find_value (charmap, "U00000020", 9); + if (seq == NULL) + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", "<space>")); + } + else if (seq->nbytes != 1) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: character `%s' in charmap not representable with one byte"), + "LC_CTYPE", "<space>")); + else + ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space); + + /* No need to search. */ + ELEM (ctype, class_collection, , L' ') |= BITw (tok_space); + + seq = charmap_find_value (charmap, "form-feed", 9); + if (seq == NULL) + seq = charmap_find_value (charmap, "U0000000C", 9); + if (seq == NULL) + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", "<form-feed>")); + } + else if (seq->nbytes != 1) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: character `%s' in charmap not representable with one byte"), + "LC_CTYPE", "<form-feed>")); + else + ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space); + + /* No need to search. */ + ELEM (ctype, class_collection, , L'\f') |= BITw (tok_space); + + + seq = charmap_find_value (charmap, "newline", 7); + if (seq == NULL) + seq = charmap_find_value (charmap, "U0000000A", 9); + if (seq == NULL) + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", "<newline>")); + } + else if (seq->nbytes != 1) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: character `%s' in charmap not representable with one byte"), + "LC_CTYPE", "<newline>")); + else + ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space); + + /* No need to search. */ + ELEM (ctype, class_collection, , L'\n') |= BITw (tok_space); + + + seq = charmap_find_value (charmap, "carriage-return", 15); + if (seq == NULL) + seq = charmap_find_value (charmap, "U0000000D", 9); + if (seq == NULL) + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", "<carriage-return>")); + } + else if (seq->nbytes != 1) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: character `%s' in charmap not representable with one byte"), + "LC_CTYPE", "<carriage-return>")); + else + ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space); + + /* No need to search. */ + ELEM (ctype, class_collection, , L'\r') |= BITw (tok_space); + + + seq = charmap_find_value (charmap, "tab", 3); + if (seq == NULL) + seq = charmap_find_value (charmap, "U00000009", 9); + if (seq == NULL) + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", "<tab>")); + } + else if (seq->nbytes != 1) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: character `%s' in charmap not representable with one byte"), + "LC_CTYPE", "<tab>")); + else + ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space); + + /* No need to search. */ + ELEM (ctype, class_collection, , L'\t') |= BITw (tok_space); + + + seq = charmap_find_value (charmap, "vertical-tab", 12); + if (seq == NULL) + seq = charmap_find_value (charmap, "U0000000B", 9); + if (seq == NULL) + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", "<vertical-tab>")); + } + else if (seq->nbytes != 1) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: character `%s' in charmap not representable with one byte"), + "LC_CTYPE", "<vertical-tab>")); + else + ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space); + + /* No need to search. */ + ELEM (ctype, class_collection, , L'\v') |= BITw (tok_space); + } + + if ((ctype->class_done & BITw (tok_xdigit)) == 0) + /* "If this keyword is not specified, the digits `0' to `9', the + uppercase letters `A' through `F', and the lowercase letters `a' + through `f', ..., shell automatically belong to this class, with + implementation defined character values." [P1003.2, 2.5.2.1] */ + { + set_default (BITPOS (tok_xdigit), '0', '9'); + set_default (BITPOS (tok_xdigit), 'A', 'F'); + set_default (BITPOS (tok_xdigit), 'a', 'f'); + } + + if ((ctype->class_done & BITw (tok_blank)) == 0) + /* "If this keyword [blank] is unspecified, the characters <space> and + <tab> shall belong to this character class." [P1003.2, 2.5.2.1] */ + { + struct charseq *seq; + + seq = charmap_find_value (charmap, "space", 5); + if (seq == NULL) + seq = charmap_find_value (charmap, "SP", 2); + if (seq == NULL) + seq = charmap_find_value (charmap, "U00000020", 9); + if (seq == NULL) + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", "<space>")); + } + else if (seq->nbytes != 1) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: character `%s' in charmap not representable with one byte"), + "LC_CTYPE", "<space>")); + else + ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank); + + /* No need to search. */ + ELEM (ctype, class_collection, , L' ') |= BITw (tok_blank); + + + seq = charmap_find_value (charmap, "tab", 3); + if (seq == NULL) + seq = charmap_find_value (charmap, "U00000009", 9); + if (seq == NULL) + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", "<tab>")); + } + else if (seq->nbytes != 1) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: character `%s' in charmap not representable with one byte"), + "LC_CTYPE", "<tab>")); + else + ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank); + + /* No need to search. */ + ELEM (ctype, class_collection, , L'\t') |= BITw (tok_blank); + } + + if ((ctype->class_done & BITw (tok_graph)) == 0) + /* "If this keyword [graph] is not specified, characters specified for + the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct', + shall belong to this character class." [P1003.2, 2.5.2.1] */ + { + unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) | + BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct); + unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) | + BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) | + BITw (tok_punct); + + for (size_t cnt = 0; cnt < ctype->class_collection_act; ++cnt) + if ((ctype->class_collection[cnt] & maskw) != 0) + ctype->class_collection[cnt] |= BITw (tok_graph); + + for (size_t cnt = 0; cnt < 256; ++cnt) + if ((ctype->class256_collection[cnt] & mask) != 0) + ctype->class256_collection[cnt] |= BIT (tok_graph); + } + + if ((ctype->class_done & BITw (tok_print)) == 0) + /* "If this keyword [print] is not provided, characters specified for + the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct', + and the <space> character shall belong to this character class." + [P1003.2, 2.5.2.1] */ + { + unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) | + BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct); + unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) | + BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) | + BITw (tok_punct); + struct charseq *seq; + + for (size_t cnt = 0; cnt < ctype->class_collection_act; ++cnt) + if ((ctype->class_collection[cnt] & maskw) != 0) + ctype->class_collection[cnt] |= BITw (tok_print); + + for (size_t cnt = 0; cnt < 256; ++cnt) + if ((ctype->class256_collection[cnt] & mask) != 0) + ctype->class256_collection[cnt] |= BIT (tok_print); + + + seq = charmap_find_value (charmap, "space", 5); + if (seq == NULL) + seq = charmap_find_value (charmap, "SP", 2); + if (seq == NULL) + seq = charmap_find_value (charmap, "U00000020", 9); + if (seq == NULL) + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", "<space>")); + } + else if (seq->nbytes != 1) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: character `%s' in charmap not representable with one byte"), + "LC_CTYPE", "<space>")); + else + ctype->class256_collection[seq->bytes[0]] |= BIT (tok_print); + + /* No need to search. */ + ELEM (ctype, class_collection, , L' ') |= BITw (tok_print); + } + + if (ctype->tomap_done[0] == 0) + /* "If this keyword [toupper] is not specified, the lowercase letters + `a' through `z', and their corresponding uppercase letters `A' to + `Z', ..., shall automatically be included, with implementation- + defined character values." [P1003.2, 2.5.2.1] */ + { + char tmp[4]; + int ch; + + strcpy (tmp, "<?>"); + + for (ch = 'a'; ch <= 'z'; ++ch) + { + struct charseq *seq_from, *seq_to; + + tmp[1] = (char) ch; + + seq_from = charmap_find_value (charmap, &tmp[1], 1); + if (seq_from == NULL) + { + char buf[10]; + sprintf (buf, "U%08X", ch); + seq_from = charmap_find_value (charmap, buf, 9); + } + if (seq_from == NULL) + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", tmp)); + } + else if (seq_from->nbytes != 1) + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: character `%s' needed as default value not representable with one byte"), + "LC_CTYPE", tmp)); + } + else + { + /* This conversion is implementation defined. */ + tmp[1] = (char) (ch + ('A' - 'a')); + seq_to = charmap_find_value (charmap, &tmp[1], 1); + if (seq_to == NULL) + { + char buf[10]; + sprintf (buf, "U%08X", ch + ('A' - 'a')); + seq_to = charmap_find_value (charmap, buf, 9); + } + if (seq_to == NULL) + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: character `%s' not defined while needed as default value"), + "LC_CTYPE", tmp)); + } + else if (seq_to->nbytes != 1) + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: character `%s' needed as default value not representable with one byte"), + "LC_CTYPE", tmp)); + } + else + /* The index [0] is determined by the order of the + `ctype_map_newP' calls in `ctype_startup'. */ + ctype->map256_collection[0][seq_from->bytes[0]] + = seq_to->bytes[0]; + } + + /* No need to search. */ + ELEM (ctype, map_collection, [0], ch) = ch + ('A' - 'a'); + } + } + + if (ctype->tomap_done[1] == 0) + /* "If this keyword [tolower] is not specified, the mapping shall be + the reverse mapping of the one specified to `toupper'." [P1003.2] */ + { + for (size_t cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt) + if (ctype->map_collection[0][cnt] != 0) + ELEM (ctype, map_collection, [1], + ctype->map_collection[0][cnt]) + = ctype->charnames[cnt]; + + for (size_t cnt = 0; cnt < 256; ++cnt) + if (ctype->map256_collection[0][cnt] != 0) + ctype->map256_collection[1][ctype->map256_collection[0][cnt]] = cnt; + } + + if (ctype->outdigits_act != 10) + { + if (ctype->outdigits_act != 0) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: field `%s' does not contain exactly ten entries"), + "LC_CTYPE", "outdigit")); + + for (size_t cnt = ctype->outdigits_act; cnt < 10; ++cnt) + { + ctype->mboutdigits[cnt] = charmap_find_symbol (charmap, + (char *) digits + cnt, + 1); + + if (ctype->mboutdigits[cnt] == NULL) + ctype->mboutdigits[cnt] = charmap_find_symbol (charmap, + longnames[cnt], + strlen (longnames[cnt])); + + if (ctype->mboutdigits[cnt] == NULL) + ctype->mboutdigits[cnt] = charmap_find_symbol (charmap, + uninames[cnt], 9); + + if (ctype->mboutdigits[cnt] == NULL) + { + /* Provide a replacement. */ + WITH_CUR_LOCALE (error (0, 0, _("\ +no output digits defined and none of the standard names in the charmap"))); + + ctype->mboutdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool, + sizeof (struct charseq) + + 1); + + /* This is better than nothing. */ + ctype->mboutdigits[cnt]->bytes[0] = digits[cnt]; + ctype->mboutdigits[cnt]->nbytes = 1; + } + + ctype->wcoutdigits[cnt] = L'0' + cnt; + } + + ctype->outdigits_act = 10; + } + +#undef set_default +} + + +/* Initialize. Assumes t->p and t->q have already been set. */ +static inline void +wctype_table_init (struct wctype_table *t) +{ + t->level1 = NULL; + t->level1_alloc = t->level1_size = 0; + t->level2 = NULL; + t->level2_alloc = t->level2_size = 0; + t->level3 = NULL; + t->level3_alloc = t->level3_size = 0; +} + +/* Retrieve an entry. */ +static inline int +wctype_table_get (struct wctype_table *t, uint32_t wc) +{ + uint32_t index1 = wc >> (t->q + t->p + 5); + if (index1 < t->level1_size) + { + uint32_t lookup1 = t->level1[index1]; + if (lookup1 != EMPTY) + { + uint32_t index2 = ((wc >> (t->p + 5)) & ((1 << t->q) - 1)) + + (lookup1 << t->q); + uint32_t lookup2 = t->level2[index2]; + if (lookup2 != EMPTY) + { + uint32_t index3 = ((wc >> 5) & ((1 << t->p) - 1)) + + (lookup2 << t->p); + uint32_t lookup3 = t->level3[index3]; + uint32_t index4 = wc & 0x1f; + + return (lookup3 >> index4) & 1; + } + } + } + return 0; +} + +/* Add one entry. */ +static void +wctype_table_add (struct wctype_table *t, uint32_t wc) +{ + uint32_t index1 = wc >> (t->q + t->p + 5); + uint32_t index2 = (wc >> (t->p + 5)) & ((1 << t->q) - 1); + uint32_t index3 = (wc >> 5) & ((1 << t->p) - 1); + uint32_t index4 = wc & 0x1f; + size_t i, i1, i2; + + if (index1 >= t->level1_size) + { + if (index1 >= t->level1_alloc) + { + size_t alloc = 2 * t->level1_alloc; + if (alloc <= index1) + alloc = index1 + 1; + t->level1 = (uint32_t *) xrealloc ((char *) t->level1, + alloc * sizeof (uint32_t)); + t->level1_alloc = alloc; + } + while (index1 >= t->level1_size) + t->level1[t->level1_size++] = EMPTY; + } + + if (t->level1[index1] == EMPTY) + { + if (t->level2_size == t->level2_alloc) + { + size_t alloc = 2 * t->level2_alloc + 1; + t->level2 = (uint32_t *) xrealloc ((char *) t->level2, + (alloc << t->q) * sizeof (uint32_t)); + t->level2_alloc = alloc; + } + i1 = t->level2_size << t->q; + i2 = (t->level2_size + 1) << t->q; + for (i = i1; i < i2; i++) + t->level2[i] = EMPTY; + t->level1[index1] = t->level2_size++; + } + + index2 += t->level1[index1] << t->q; + + if (t->level2[index2] == EMPTY) + { + if (t->level3_size == t->level3_alloc) + { + size_t alloc = 2 * t->level3_alloc + 1; + t->level3 = (uint32_t *) xrealloc ((char *) t->level3, + (alloc << t->p) * sizeof (uint32_t)); + t->level3_alloc = alloc; + } + i1 = t->level3_size << t->p; + i2 = (t->level3_size + 1) << t->p; + for (i = i1; i < i2; i++) + t->level3[i] = 0; + t->level2[index2] = t->level3_size++; + } + + index3 += t->level2[index2] << t->p; + + t->level3[index3] |= (uint32_t)1 << index4; +} + +/* Finalize and shrink. */ +static void +add_locale_wctype_table (struct locale_file *file, struct wctype_table *t) +{ + size_t i, j, k; + uint32_t reorder3[t->level3_size]; + uint32_t reorder2[t->level2_size]; + uint32_t level2_offset, level3_offset; + + /* Uniquify level3 blocks. */ + k = 0; + for (j = 0; j < t->level3_size; j++) + { + for (i = 0; i < k; i++) + if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p], + (1 << t->p) * sizeof (uint32_t)) == 0) + break; + /* Relocate block j to block i. */ + reorder3[j] = i; + if (i == k) + { + if (i != j) + memcpy (&t->level3[i << t->p], &t->level3[j << t->p], + (1 << t->p) * sizeof (uint32_t)); + k++; + } + } + t->level3_size = k; + + for (i = 0; i < (t->level2_size << t->q); i++) + if (t->level2[i] != EMPTY) + t->level2[i] = reorder3[t->level2[i]]; + + /* Uniquify level2 blocks. */ + k = 0; + for (j = 0; j < t->level2_size; j++) + { + for (i = 0; i < k; i++) + if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q], + (1 << t->q) * sizeof (uint32_t)) == 0) + break; + /* Relocate block j to block i. */ + reorder2[j] = i; + if (i == k) + { + if (i != j) + memcpy (&t->level2[i << t->q], &t->level2[j << t->q], + (1 << t->q) * sizeof (uint32_t)); + k++; + } + } + t->level2_size = k; + + for (i = 0; i < t->level1_size; i++) + if (t->level1[i] != EMPTY) + t->level1[i] = reorder2[t->level1[i]]; + + t->result_size = + 5 * sizeof (uint32_t) + + t->level1_size * sizeof (uint32_t) + + (t->level2_size << t->q) * sizeof (uint32_t) + + (t->level3_size << t->p) * sizeof (uint32_t); + + level2_offset = + 5 * sizeof (uint32_t) + + t->level1_size * sizeof (uint32_t); + level3_offset = + 5 * sizeof (uint32_t) + + t->level1_size * sizeof (uint32_t) + + (t->level2_size << t->q) * sizeof (uint32_t); + + start_locale_structure (file); + add_locale_uint32 (file, t->q + t->p + 5); + add_locale_uint32 (file, t->level1_size); + add_locale_uint32 (file, t->p + 5); + add_locale_uint32 (file, (1 << t->q) - 1); + add_locale_uint32 (file, (1 << t->p) - 1); + + for (i = 0; i < t->level1_size; i++) + add_locale_uint32 + (file, + t->level1[i] == EMPTY + ? 0 + : (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset); + + for (i = 0; i < (t->level2_size << t->q); i++) + add_locale_uint32 + (file, + t->level2[i] == EMPTY + ? 0 + : (t->level2[i] << t->p) * sizeof (uint32_t) + level3_offset); + + add_locale_uint32_array (file, t->level3, t->level3_size << t->p); + end_locale_structure (file); + + if (t->level1_alloc > 0) + free (t->level1); + if (t->level2_alloc > 0) + free (t->level2); + if (t->level3_alloc > 0) + free (t->level3); +} + +/* Flattens the included transliterations into a translit list. + Inserts them in the list at `cursor', and returns the new cursor. */ +static struct translit_t ** +translit_flatten (struct locale_ctype_t *ctype, + const struct charmap_t *charmap, + struct translit_t **cursor) +{ + while (ctype->translit_include != NULL) + { + const char *copy_locale = ctype->translit_include->copy_locale; + const char *copy_repertoire = ctype->translit_include->copy_repertoire; + struct localedef_t *other; + + /* Unchain the include statement. During the depth-first traversal + we don't want to visit any locale more than once. */ + ctype->translit_include = ctype->translit_include->next; + + other = find_locale (LC_CTYPE, copy_locale, copy_repertoire, charmap); + + if (other == NULL || other->categories[LC_CTYPE].ctype == NULL) + { + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: transliteration data from locale `%s' not available"), + "LC_CTYPE", copy_locale)); + } + else + { + struct locale_ctype_t *other_ctype = + other->categories[LC_CTYPE].ctype; + + cursor = translit_flatten (other_ctype, charmap, cursor); + assert (other_ctype->translit_include == NULL); + + if (other_ctype->translit != NULL) + { + /* Insert the other_ctype->translit list at *cursor. */ + struct translit_t *endp = other_ctype->translit; + while (endp->next != NULL) + endp = endp->next; + + endp->next = *cursor; + *cursor = other_ctype->translit; + + /* Avoid any risk of circular lists. */ + other_ctype->translit = NULL; + + cursor = &endp->next; + } + + if (ctype->default_missing == NULL) + ctype->default_missing = other_ctype->default_missing; + } + } + + return cursor; +} + +static void +allocate_arrays (struct locale_ctype_t *ctype, const struct charmap_t *charmap, + struct repertoire_t *repertoire) +{ + size_t idx, nr; + const void *key; + size_t len; + void *vdata; + void *curs; + + /* You wonder about this amount of memory? This is only because some + users do not manage to address the array with unsigned values or + data types with range >= 256. '\200' would result in the array + index -128. To help these poor people we duplicate the entries for + 128 up to 255 below the entry for \0. */ + ctype->ctype_b = (char_class_t *) xcalloc (256 + 128, sizeof (char_class_t)); + ctype->ctype32_b = (char_class32_t *) xcalloc (256, sizeof (char_class32_t)); + ctype->class_b = (uint32_t **) + xmalloc (ctype->nr_charclass * sizeof (uint32_t *)); + ctype->class_3level = (struct wctype_table *) + xmalloc (ctype->nr_charclass * sizeof (struct wctype_table)); + + /* This is the array accessed using the multibyte string elements. */ + for (idx = 0; idx < 256; ++idx) + ctype->ctype_b[128 + idx] = ctype->class256_collection[idx]; + + /* Mirror first 127 entries. We must take care that entry -1 is not + mirrored because EOF == -1. */ + for (idx = 0; idx < 127; ++idx) + ctype->ctype_b[idx] = ctype->ctype_b[256 + idx]; + + /* The 32 bit array contains all characters < 0x100. */ + for (idx = 0; idx < ctype->class_collection_act; ++idx) + if (ctype->charnames[idx] < 0x100) + ctype->ctype32_b[ctype->charnames[idx]] = ctype->class_collection[idx]; + + for (nr = 0; nr < ctype->nr_charclass; nr++) + { + ctype->class_b[nr] = (uint32_t *) xcalloc (256 / 32, sizeof (uint32_t)); + + /* We only set CLASS_B for the bits in the ISO C classes, not + the user defined classes. The number should not change but + who knows. */ +#define LAST_ISO_C_BIT 11 + if (nr <= LAST_ISO_C_BIT) + for (idx = 0; idx < 256; ++idx) + if (ctype->class256_collection[idx] & _ISbit (nr)) + ctype->class_b[nr][idx >> 5] |= (uint32_t) 1 << (idx & 0x1f); + } + + for (nr = 0; nr < ctype->nr_charclass; nr++) + { + struct wctype_table *t; + + t = &ctype->class_3level[nr]; + t->p = 4; /* or: 5 */ + t->q = 7; /* or: 6 */ + wctype_table_init (t); + + for (idx = 0; idx < ctype->class_collection_act; ++idx) + if (ctype->class_collection[idx] & _ISwbit (nr)) + wctype_table_add (t, ctype->charnames[idx]); + + if (verbose) + WITH_CUR_LOCALE (fprintf (stderr, _("\ +%s: table for class \"%s\": %lu bytes\n"), + "LC_CTYPE", ctype->classnames[nr], + (unsigned long int) t->result_size)); + } + + /* Room for table of mappings. */ + ctype->map_b = (uint32_t **) xmalloc (2 * sizeof (uint32_t *)); + ctype->map32_b = (uint32_t **) xmalloc (ctype->map_collection_nr + * sizeof (uint32_t *)); + ctype->map_3level = (struct wctrans_table *) + xmalloc (ctype->map_collection_nr * sizeof (struct wctrans_table)); + + /* Fill in all mappings. */ + for (idx = 0; idx < 2; ++idx) + { + unsigned int idx2; + + /* Allocate table. */ + ctype->map_b[idx] = (uint32_t *) + xmalloc ((256 + 128) * sizeof (uint32_t)); + + /* Copy values from collection. */ + for (idx2 = 0; idx2 < 256; ++idx2) + ctype->map_b[idx][128 + idx2] = ctype->map256_collection[idx][idx2]; + + /* Mirror first 127 entries. We must take care not to map entry + -1 because EOF == -1. */ + for (idx2 = 0; idx2 < 127; ++idx2) + ctype->map_b[idx][idx2] = ctype->map_b[idx][256 + idx2]; + + /* EOF must map to EOF. */ + ctype->map_b[idx][127] = EOF; + } + + for (idx = 0; idx < ctype->map_collection_nr; ++idx) + { + unsigned int idx2; + + /* Allocate table. */ + ctype->map32_b[idx] = (uint32_t *) xmalloc (256 * sizeof (uint32_t)); + + /* Copy values from collection. Default is identity mapping. */ + for (idx2 = 0; idx2 < 256; ++idx2) + ctype->map32_b[idx][idx2] = + (ctype->map_collection[idx][idx2] != 0 + ? ctype->map_collection[idx][idx2] + : idx2); + } + + for (nr = 0; nr < ctype->map_collection_nr; nr++) + { + struct wctrans_table *t; + + t = &ctype->map_3level[nr]; + t->p = 7; + t->q = 9; + wctrans_table_init (t); + + for (idx = 0; idx < ctype->map_collection_act[nr]; ++idx) + if (ctype->map_collection[nr][idx] != 0) + wctrans_table_add (t, ctype->charnames[idx], + ctype->map_collection[nr][idx]); + + if (verbose) + WITH_CUR_LOCALE (fprintf (stderr, _("\ +%s: table for map \"%s\": %lu bytes\n"), + "LC_CTYPE", ctype->mapnames[nr], + (unsigned long int) t->result_size)); + } + + /* Extra array for class and map names. */ + ctype->class_name_ptr = (uint32_t *) xmalloc (ctype->nr_charclass + * sizeof (uint32_t)); + ctype->map_name_ptr = (uint32_t *) xmalloc (ctype->map_collection_nr + * sizeof (uint32_t)); + + ctype->class_offset = _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1); + ctype->map_offset = ctype->class_offset + ctype->nr_charclass; + + /* Array for width information. Because the expected widths are very + small (never larger than 2) we use only one single byte. This + saves space. + We put only printable characters in the table. wcwidth is specified + to return -1 for non-printable characters. Doing the check here + saves a run-time check. + But we put L'\0' in the table. This again saves a run-time check. */ + { + struct wcwidth_table *t; + + t = &ctype->width; + t->p = 7; + t->q = 9; + wcwidth_table_init (t); + + /* First set all the printable characters of the character set to + the default width. */ + curs = NULL; + while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0) + { + struct charseq *data = (struct charseq *) vdata; + + if (data->ucs4 == UNINITIALIZED_CHAR_VALUE) + data->ucs4 = repertoire_find_value (ctype->repertoire, + data->name, len); + + if (data->ucs4 != ILLEGAL_CHAR_VALUE) + { + uint32_t *class_bits = + find_idx (ctype, &ctype->class_collection, NULL, + &ctype->class_collection_act, data->ucs4); + + if (class_bits != NULL && (*class_bits & BITw (tok_print))) + wcwidth_table_add (t, data->ucs4, charmap->width_default); + } + } + + /* Now add the explicitly specified widths. */ + if (charmap->width_rules != NULL) + for (size_t cnt = 0; cnt < charmap->nwidth_rules; ++cnt) + { + unsigned char bytes[charmap->mb_cur_max]; + int nbytes = charmap->width_rules[cnt].from->nbytes; + + /* We have the range of character for which the width is + specified described using byte sequences of the multibyte + charset. We have to convert this to UCS4 now. And we + cannot simply convert the beginning and the end of the + sequence, we have to iterate over the byte sequence and + convert it for every single character. */ + memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes); + + while (nbytes < charmap->width_rules[cnt].to->nbytes + || memcmp (bytes, charmap->width_rules[cnt].to->bytes, + nbytes) <= 0) + { + /* Find the UCS value for `bytes'. */ + int inner; + uint32_t wch; + struct charseq *seq = + charmap_find_symbol (charmap, (char *) bytes, nbytes); + + if (seq == NULL) + wch = ILLEGAL_CHAR_VALUE; + else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE) + wch = seq->ucs4; + else + wch = repertoire_find_value (ctype->repertoire, seq->name, + strlen (seq->name)); + + if (wch != ILLEGAL_CHAR_VALUE) + { + /* Store the value. */ + uint32_t *class_bits = + find_idx (ctype, &ctype->class_collection, NULL, + &ctype->class_collection_act, wch); + + if (class_bits != NULL && (*class_bits & BITw (tok_print))) + wcwidth_table_add (t, wch, + charmap->width_rules[cnt].width); + } + + /* "Increment" the bytes sequence. */ + inner = nbytes - 1; + while (inner >= 0 && bytes[inner] == 0xff) + --inner; + + if (inner < 0) + { + /* We have to extend the byte sequence. */ + if (nbytes >= charmap->width_rules[cnt].to->nbytes) + break; + + bytes[0] = 1; + memset (&bytes[1], 0, nbytes); + ++nbytes; + } + else + { + ++bytes[inner]; + while (++inner < nbytes) + bytes[inner] = 0; + } + } + } + + /* Set the width of L'\0' to 0. */ + wcwidth_table_add (t, 0, 0); + + if (verbose) + WITH_CUR_LOCALE (fprintf (stderr, _("%s: table for width: %lu bytes\n"), + "LC_CTYPE", (unsigned long int) t->result_size)); + } + + /* Set MB_CUR_MAX. */ + ctype->mb_cur_max = charmap->mb_cur_max; + + /* Now determine the table for the transliteration information. + + XXX It is not yet clear to me whether it is worth implementing a + complicated algorithm which uses a hash table to locate the entries. + For now I'll use a simple array which can be searching using binary + search. */ + if (ctype->translit_include != NULL) + /* Traverse the locales mentioned in the `include' statements in a + depth-first way and fold in their transliteration information. */ + translit_flatten (ctype, charmap, &ctype->translit); + + if (ctype->translit != NULL) + { + /* First count how many entries we have. This is the upper limit + since some entries from the included files might be overwritten. */ + size_t number = 0; + struct translit_t *runp = ctype->translit; + struct translit_t **sorted; + size_t from_len, to_len; + + while (runp != NULL) + { + ++number; + runp = runp->next; + } + + /* Next we allocate an array large enough and fill in the values. */ + sorted = (struct translit_t **) alloca (number + * sizeof (struct translit_t **)); + runp = ctype->translit; + number = 0; + do + { + /* Search for the place where to insert this string. + XXX Better use a real sorting algorithm later. */ + size_t idx = 0; + int replace = 0; + + while (idx < number) + { + int res = wcscmp ((const wchar_t *) sorted[idx]->from, + (const wchar_t *) runp->from); + if (res == 0) + { + replace = 1; + break; + } + if (res > 0) + break; + ++idx; + } + + if (replace) + sorted[idx] = runp; + else + { + memmove (&sorted[idx + 1], &sorted[idx], + (number - idx) * sizeof (struct translit_t *)); + sorted[idx] = runp; + ++number; + } + + runp = runp->next; + } + while (runp != NULL); + + /* The next step is putting all the possible transliteration + strings in one memory block so that we can write it out. + We need several different blocks: + - index to the from-string array + - from-string array + - index to the to-string array + - to-string array. + */ + from_len = to_len = 0; + for (size_t cnt = 0; cnt < number; ++cnt) + { + struct translit_to_t *srunp; + from_len += wcslen ((const wchar_t *) sorted[cnt]->from) + 1; + srunp = sorted[cnt]->to; + while (srunp != NULL) + { + to_len += wcslen ((const wchar_t *) srunp->str) + 1; + srunp = srunp->next; + } + /* Plus one for the extra NUL character marking the end of + the list for the current entry. */ + ++to_len; + } + + /* We can allocate the arrays for the results. */ + ctype->translit_from_idx = xmalloc (number * sizeof (uint32_t)); + ctype->translit_from_tbl = xmalloc (from_len * sizeof (uint32_t)); + ctype->translit_to_idx = xmalloc (number * sizeof (uint32_t)); + ctype->translit_to_tbl = xmalloc (to_len * sizeof (uint32_t)); + + from_len = 0; + to_len = 0; + for (size_t cnt = 0; cnt < number; ++cnt) + { + size_t len; + struct translit_to_t *srunp; + + ctype->translit_from_idx[cnt] = from_len; + ctype->translit_to_idx[cnt] = to_len; + + len = wcslen ((const wchar_t *) sorted[cnt]->from) + 1; + wmemcpy ((wchar_t *) &ctype->translit_from_tbl[from_len], + (const wchar_t *) sorted[cnt]->from, len); + from_len += len; + + ctype->translit_to_idx[cnt] = to_len; + srunp = sorted[cnt]->to; + while (srunp != NULL) + { + len = wcslen ((const wchar_t *) srunp->str) + 1; + wmemcpy ((wchar_t *) &ctype->translit_to_tbl[to_len], + (const wchar_t *) srunp->str, len); + to_len += len; + srunp = srunp->next; + } + ctype->translit_to_tbl[to_len++] = L'\0'; + } + + /* Store the information about the length. */ + ctype->translit_idx_size = number; + ctype->translit_from_tbl_size = from_len * sizeof (uint32_t); + ctype->translit_to_tbl_size = to_len * sizeof (uint32_t); + } + else + { + ctype->translit_from_idx = no_str; + ctype->translit_from_tbl = no_str; + ctype->translit_to_tbl = no_str; + ctype->translit_idx_size = 0; + ctype->translit_from_tbl_size = 0; + ctype->translit_to_tbl_size = 0; + } +} diff --git a/REORG.TODO/locale/programs/ld-identification.c b/REORG.TODO/locale/programs/ld-identification.c new file mode 100644 index 0000000000..3e3ea649d7 --- /dev/null +++ b/REORG.TODO/locale/programs/ld-identification.c @@ -0,0 +1,416 @@ +/* Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <error.h> +#include <langinfo.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <sys/uio.h> + +#include <assert.h> + +#include "localedef.h" +#include "localeinfo.h" +#include "locfile.h" + + +/* The real definition of the struct for the LC_IDENTIFICATION locale. */ +struct locale_identification_t +{ + const char *title; + const char *source; + const char *address; + const char *contact; + const char *email; + const char *tel; + const char *fax; + const char *language; + const char *territory; + const char *audience; + const char *application; + const char *abbreviation; + const char *revision; + const char *date; + const char *category[__LC_LAST]; +}; + + +static const char *category_name[__LC_LAST] = +{ + [LC_CTYPE] = "LC_CTYPE", + [LC_NUMERIC] = "LC_NUMERIC", + [LC_TIME] = "LC_TIME", + [LC_COLLATE] = "LC_COLLATE", + [LC_MONETARY] = "LC_MONETARY", + [LC_MESSAGES] = "LC_MESSAGES", + [LC_ALL] = "LC_ALL", + [LC_PAPER] = "LC_PAPER", + [LC_NAME] = "LC_NAME", + [LC_ADDRESS] = "LC_ADDRESS", + [LC_TELEPHONE] = "LC_TELEPHONE", + [LC_MEASUREMENT] = "LC_MEASUREMENT", + [LC_IDENTIFICATION] = "LC_IDENTIFICATION" +}; + + +static void +identification_startup (struct linereader *lr, struct localedef_t *locale, + int ignore_content) +{ + if (!ignore_content) + { + locale->categories[LC_IDENTIFICATION].identification = + (struct locale_identification_t *) + xcalloc (1, sizeof (struct locale_identification_t)); + + locale->categories[LC_IDENTIFICATION].identification->category[LC_ALL] = + ""; + } + + if (lr != NULL) + { + lr->translate_strings = 1; + lr->return_widestr = 0; + } +} + + +void +identification_finish (struct localedef_t *locale, + const struct charmap_t *charmap) +{ + struct locale_identification_t *identification + = locale->categories[LC_IDENTIFICATION].identification; + int nothing = 0; + size_t num; + + /* Now resolve copying and also handle completely missing definitions. */ + if (identification == NULL) + { + /* First see whether we were supposed to copy. If yes, find the + actual definition. */ + if (locale->copy_name[LC_IDENTIFICATION] != NULL) + { + /* Find the copying locale. This has to happen transitively since + the locale we are copying from might also copying another one. */ + struct localedef_t *from = locale; + + do + from = find_locale (LC_IDENTIFICATION, + from->copy_name[LC_IDENTIFICATION], + from->repertoire_name, charmap); + while (from->categories[LC_IDENTIFICATION].identification == NULL + && from->copy_name[LC_IDENTIFICATION] != NULL); + + identification = locale->categories[LC_IDENTIFICATION].identification + = from->categories[LC_IDENTIFICATION].identification; + } + + /* If there is still no definition issue an warning and create an + empty one. */ + if (identification == NULL) + { + if (! be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +No definition for %s category found"), "LC_IDENTIFICATION")); + identification_startup (NULL, locale, 0); + identification + = locale->categories[LC_IDENTIFICATION].identification; + nothing = 1; + } + } + +#define TEST_ELEM(cat) \ + if (identification->cat == NULL) \ + { \ + if (verbose && ! nothing) \ + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"), \ + "LC_IDENTIFICATION", #cat)); \ + identification->cat = ""; \ + } + + TEST_ELEM (title); + TEST_ELEM (source); + TEST_ELEM (address); + TEST_ELEM (contact); + TEST_ELEM (email); + TEST_ELEM (tel); + TEST_ELEM (fax); + TEST_ELEM (language); + TEST_ELEM (territory); + TEST_ELEM (audience); + TEST_ELEM (application); + TEST_ELEM (abbreviation); + TEST_ELEM (revision); + TEST_ELEM (date); + + for (num = 0; num < __LC_LAST; ++num) + { + /* We don't accept/parse this category, so skip it early. */ + if (num == LC_ALL) + continue; + + if (identification->category[num] == NULL) + { + if (verbose && ! nothing) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: no identification for category `%s'"), + "LC_IDENTIFICATION", category_name[num])); + identification->category[num] = ""; + } + else + { + /* Only list the standards we care about. This is based on the + ISO 30112 WD10 [2014] standard which supersedes all previous + revisions of the ISO 14652 standard. */ + static const char * const standards[] = + { + "posix:1993", + "i18n:2004", + "i18n:2012", + }; + size_t i; + bool matched = false; + + for (i = 0; i < sizeof (standards) / sizeof (standards[0]); ++i) + if (strcmp (identification->category[num], standards[i]) == 0) + matched = true; + + if (matched != true) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: unknown standard `%s' for category `%s'"), + "LC_IDENTIFICATION", + identification->category[num], + category_name[num])); + } + } +} + + +void +identification_output (struct localedef_t *locale, + const struct charmap_t *charmap, + const char *output_path) +{ + struct locale_identification_t *identification + = locale->categories[LC_IDENTIFICATION].identification; + struct locale_file file; + size_t num; + + init_locale_data (&file, _NL_ITEM_INDEX (_NL_NUM_LC_IDENTIFICATION)); + add_locale_string (&file, identification->title); + add_locale_string (&file, identification->source); + add_locale_string (&file, identification->address); + add_locale_string (&file, identification->contact); + add_locale_string (&file, identification->email); + add_locale_string (&file, identification->tel); + add_locale_string (&file, identification->fax); + add_locale_string (&file, identification->language); + add_locale_string (&file, identification->territory); + add_locale_string (&file, identification->audience); + add_locale_string (&file, identification->application); + add_locale_string (&file, identification->abbreviation); + add_locale_string (&file, identification->revision); + add_locale_string (&file, identification->date); + start_locale_structure (&file); + for (num = 0; num < __LC_LAST; ++num) + if (num != LC_ALL) + add_locale_string (&file, identification->category[num]); + end_locale_structure (&file); + add_locale_string (&file, charmap->code_set_name); + write_locale_data (output_path, LC_IDENTIFICATION, "LC_IDENTIFICATION", + &file); +} + + +/* The parser for the LC_IDENTIFICATION section of the locale definition. */ +void +identification_read (struct linereader *ldfile, struct localedef_t *result, + const struct charmap_t *charmap, const char *repertoire_name, + int ignore_content) +{ + struct locale_identification_t *identification; + struct token *now; + struct token *arg; + struct token *cattok; + int category; + enum token_t nowtok; + + /* The rest of the line containing `LC_IDENTIFICATION' must be free. */ + lr_ignore_rest (ldfile, 1); + + do + { + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + } + while (nowtok == tok_eol); + + /* If we see `copy' now we are almost done. */ + if (nowtok == tok_copy) + { + handle_copy (ldfile, charmap, repertoire_name, result, + tok_lc_identification, LC_IDENTIFICATION, + "LC_IDENTIFICATION", ignore_content); + return; + } + + /* Prepare the data structures. */ + identification_startup (ldfile, result, ignore_content); + identification = result->categories[LC_IDENTIFICATION].identification; + + while (1) + { + /* Of course we don't proceed beyond the end of file. */ + if (nowtok == tok_eof) + break; + + /* Ignore empty lines. */ + if (nowtok == tok_eol) + { + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + continue; + } + + switch (nowtok) + { +#define STR_ELEM(cat) \ + case tok_##cat: \ + /* Ignore the rest of the line if we don't need the input of \ + this line. */ \ + if (ignore_content) \ + { \ + lr_ignore_rest (ldfile, 0); \ + break; \ + } \ + \ + arg = lr_token (ldfile, charmap, result, NULL, verbose); \ + if (arg->tok != tok_string) \ + goto err_label; \ + if (identification->cat != NULL) \ + lr_error (ldfile, _("\ +%s: field `%s' declared more than once"), "LC_IDENTIFICATION", #cat); \ + else if (!ignore_content && arg->val.str.startmb == NULL) \ + { \ + lr_error (ldfile, _("\ +%s: unknown character in field `%s'"), "LC_IDENTIFICATION", #cat); \ + identification->cat = ""; \ + } \ + else if (!ignore_content) \ + identification->cat = arg->val.str.startmb; \ + break + + STR_ELEM (title); + STR_ELEM (source); + STR_ELEM (address); + STR_ELEM (contact); + STR_ELEM (email); + STR_ELEM (tel); + STR_ELEM (fax); + STR_ELEM (language); + STR_ELEM (territory); + STR_ELEM (audience); + STR_ELEM (application); + STR_ELEM (abbreviation); + STR_ELEM (revision); + STR_ELEM (date); + + case tok_category: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + /* We expect two operands. */ + arg = lr_token (ldfile, charmap, result, NULL, verbose); + if (arg->tok != tok_string && arg->tok != tok_ident) + goto err_label; + /* Next is a semicolon. */ + cattok = lr_token (ldfile, charmap, result, NULL, verbose); + if (cattok->tok != tok_semicolon) + goto err_label; + /* Now a LC_xxx identifier. */ + cattok = lr_token (ldfile, charmap, result, NULL, verbose); + switch (cattok->tok) + { +#define CATEGORY(lname, uname) \ + case tok_lc_##lname: \ + category = LC_##uname; \ + break + + CATEGORY (identification, IDENTIFICATION); + CATEGORY (ctype, CTYPE); + CATEGORY (collate, COLLATE); + CATEGORY (time, TIME); + CATEGORY (numeric, NUMERIC); + CATEGORY (monetary, MONETARY); + CATEGORY (messages, MESSAGES); + CATEGORY (paper, PAPER); + CATEGORY (name, NAME); + CATEGORY (address, ADDRESS); + CATEGORY (telephone, TELEPHONE); + CATEGORY (measurement, MEASUREMENT); + + default: + goto err_label; + } + if (identification->category[category] != NULL) + { + lr_error (ldfile, _("\ +%s: duplicate category version definition"), "LC_IDENTIFICATION"); + free (arg->val.str.startmb); + } + else + identification->category[category] = arg->val.str.startmb; + break; + + case tok_end: + /* Next we assume `LC_IDENTIFICATION'. */ + arg = lr_token (ldfile, charmap, result, NULL, verbose); + if (arg->tok == tok_eof) + break; + if (arg->tok == tok_eol) + lr_error (ldfile, _("%s: incomplete `END' line"), + "LC_IDENTIFICATION"); + else if (arg->tok != tok_lc_identification) + lr_error (ldfile, _("\ +%1$s: definition does not end with `END %1$s'"), "LC_IDENTIFICATION"); + lr_ignore_rest (ldfile, arg->tok == tok_lc_identification); + return; + + default: + err_label: + SYNTAX_ERROR (_("%s: syntax error"), "LC_IDENTIFICATION"); + } + + /* Prepare for the next round. */ + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + } + + /* When we come here we reached the end of the file. */ + lr_error (ldfile, _("%s: premature end of file"), "LC_IDENTIFICATION"); +} diff --git a/REORG.TODO/locale/programs/ld-measurement.c b/REORG.TODO/locale/programs/ld-measurement.c new file mode 100644 index 0000000000..92c849ebfb --- /dev/null +++ b/REORG.TODO/locale/programs/ld-measurement.c @@ -0,0 +1,233 @@ +/* Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <error.h> +#include <langinfo.h> +#include <string.h> +#include <stdint.h> +#include <sys/uio.h> + +#include <assert.h> + +#include "localedef.h" +#include "localeinfo.h" +#include "locfile.h" + + +/* The real definition of the struct for the LC_MEASUREMENT locale. */ +struct locale_measurement_t +{ + unsigned char measurement; +}; + + +static void +measurement_startup (struct linereader *lr, struct localedef_t *locale, + int ignore_content) +{ + if (!ignore_content) + locale->categories[LC_MEASUREMENT].measurement = + (struct locale_measurement_t *) + xcalloc (1, sizeof (struct locale_measurement_t)); + + if (lr != NULL) + { + lr->translate_strings = 1; + lr->return_widestr = 0; + } +} + + +void +measurement_finish (struct localedef_t *locale, + const struct charmap_t *charmap) +{ + struct locale_measurement_t *measurement = + locale->categories[LC_MEASUREMENT].measurement; + int nothing = 0; + + /* Now resolve copying and also handle completely missing definitions. */ + if (measurement == NULL) + { + /* First see whether we were supposed to copy. If yes, find the + actual definition. */ + if (locale->copy_name[LC_MEASUREMENT] != NULL) + { + /* Find the copying locale. This has to happen transitively since + the locale we are copying from might also copying another one. */ + struct localedef_t *from = locale; + + do + from = find_locale (LC_MEASUREMENT, + from->copy_name[LC_MEASUREMENT], + from->repertoire_name, charmap); + while (from->categories[LC_MEASUREMENT].measurement == NULL + && from->copy_name[LC_MEASUREMENT] != NULL); + + measurement = locale->categories[LC_MEASUREMENT].measurement + = from->categories[LC_MEASUREMENT].measurement; + } + + /* If there is still no definition issue an warning and create an + empty one. */ + if (measurement == NULL) + { + if (! be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +No definition for %s category found"), "LC_MEASUREMENT")); + measurement_startup (NULL, locale, 0); + measurement = locale->categories[LC_MEASUREMENT].measurement; + nothing = 1; + } + } + + if (measurement->measurement == 0) + { + if (! nothing) + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"), + "LC_MEASUREMENT", "measurement")); + /* Use as the default value the value of the i18n locale. */ + measurement->measurement = 1; + } + else + { + if (measurement->measurement > 3) + WITH_CUR_LOCALE (error (0, 0, _("%s: invalid value for field `%s'"), + "LC_MEASUREMENT", "measurement")); + } +} + + +void +measurement_output (struct localedef_t *locale, + const struct charmap_t *charmap, const char *output_path) +{ + struct locale_measurement_t *measurement = + locale->categories[LC_MEASUREMENT].measurement; + struct locale_file file; + + init_locale_data (&file, _NL_ITEM_INDEX (_NL_NUM_LC_MEASUREMENT)); + add_locale_char (&file, measurement->measurement); + add_locale_string (&file, charmap->code_set_name); + write_locale_data (output_path, LC_MEASUREMENT, "LC_MEASUREMENT", &file); +} + + +/* The parser for the LC_MEASUREMENT section of the locale definition. */ +void +measurement_read (struct linereader *ldfile, struct localedef_t *result, + const struct charmap_t *charmap, const char *repertoire_name, + int ignore_content) +{ + struct locale_measurement_t *measurement; + struct token *now; + struct token *arg; + enum token_t nowtok; + + /* The rest of the line containing `LC_MEASUREMENT' must be free. */ + lr_ignore_rest (ldfile, 1); + + do + { + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + } + while (nowtok == tok_eol); + + /* If we see `copy' now we are almost done. */ + if (nowtok == tok_copy) + { + handle_copy (ldfile, charmap, repertoire_name, result, + tok_lc_measurement, LC_MEASUREMENT, "LC_MEASUREMENT", + ignore_content); + return; + } + + /* Prepare the data structures. */ + measurement_startup (ldfile, result, ignore_content); + measurement = result->categories[LC_MEASUREMENT].measurement; + + while (1) + { + /* Of course we don't proceed beyond the end of file. */ + if (nowtok == tok_eof) + break; + + /* Ingore empty lines. */ + if (nowtok == tok_eol) + { + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + continue; + } + + switch (nowtok) + { +#define INT_ELEM(cat) \ + case tok_##cat: \ + /* Ignore the rest of the line if we don't need the input of \ + this line. */ \ + if (ignore_content) \ + { \ + lr_ignore_rest (ldfile, 0); \ + break; \ + } \ + \ + arg = lr_token (ldfile, charmap, result, NULL, verbose); \ + if (arg->tok != tok_number) \ + goto err_label; \ + else if (measurement->cat != 0) \ + lr_error (ldfile, _("%s: field `%s' declared more than once"), \ + "LC_MEASUREMENT", #cat); \ + else if (!ignore_content) \ + measurement->cat = arg->val.num; \ + break + + INT_ELEM (measurement); + + case tok_end: + /* Next we assume `LC_MEASUREMENT'. */ + arg = lr_token (ldfile, charmap, result, NULL, verbose); + if (arg->tok == tok_eof) + break; + if (arg->tok == tok_eol) + lr_error (ldfile, _("%s: incomplete `END' line"), + "LC_MEASUREMENT"); + else if (arg->tok != tok_lc_measurement) + lr_error (ldfile, _("\ +%1$s: definition does not end with `END %1$s'"), "LC_MEASUREMENT"); + lr_ignore_rest (ldfile, arg->tok == tok_lc_measurement); + return; + + default: + err_label: + SYNTAX_ERROR (_("%s: syntax error"), "LC_MEASUREMENT"); + } + + /* Prepare for the next round. */ + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + } + + /* When we come here we reached the end of the file. */ + lr_error (ldfile, _("%s: premature end of file"), + "LC_MEASUREMENT"); +} diff --git a/REORG.TODO/locale/programs/ld-messages.c b/REORG.TODO/locale/programs/ld-messages.c new file mode 100644 index 0000000000..bc86ec0ccf --- /dev/null +++ b/REORG.TODO/locale/programs/ld-messages.c @@ -0,0 +1,315 @@ +/* Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@gnu.org>, 1995. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <langinfo.h> +#include <sys/types.h> +#include <regex.h> +#include <string.h> +#include <stdint.h> +#include <sys/uio.h> + +#include <assert.h> + +#include "localedef.h" +#include "linereader.h" +#include "localeinfo.h" +#include "locfile.h" + + +/* The real definition of the struct for the LC_MESSAGES locale. */ +struct locale_messages_t +{ + const char *yesexpr; + const char *noexpr; + const char *yesstr; + const char *nostr; +}; + + +static void +messages_startup (struct linereader *lr, struct localedef_t *locale, + int ignore_content) +{ + if (!ignore_content) + locale->categories[LC_MESSAGES].messages = + (struct locale_messages_t *) xcalloc (1, + sizeof (struct locale_messages_t)); + + if (lr != NULL) + { + lr->translate_strings = 1; + lr->return_widestr = 0; + } +} + + +void +messages_finish (struct localedef_t *locale, const struct charmap_t *charmap) +{ + struct locale_messages_t *messages + = locale->categories[LC_MESSAGES].messages; + int nothing = 0; + + /* Now resolve copying and also handle completely missing definitions. */ + if (messages == NULL) + { + /* First see whether we were supposed to copy. If yes, find the + actual definition. */ + if (locale->copy_name[LC_MESSAGES] != NULL) + { + /* Find the copying locale. This has to happen transitively since + the locale we are copying from might also copying another one. */ + struct localedef_t *from = locale; + + do + from = find_locale (LC_MESSAGES, from->copy_name[LC_MESSAGES], + from->repertoire_name, charmap); + while (from->categories[LC_MESSAGES].messages == NULL + && from->copy_name[LC_MESSAGES] != NULL); + + messages = locale->categories[LC_MESSAGES].messages + = from->categories[LC_MESSAGES].messages; + } + + /* If there is still no definition issue an warning and create an + empty one. */ + if (messages == NULL) + { + if (! be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +No definition for %s category found"), "LC_MESSAGES")); + messages_startup (NULL, locale, 0); + messages = locale->categories[LC_MESSAGES].messages; + nothing = 1; + } + } + + /* The fields YESSTR and NOSTR are optional. */ + if (messages->yesstr == NULL) + messages->yesstr = ""; + if (messages->nostr == NULL) + messages->nostr = ""; + + if (messages->yesexpr == NULL) + { + if (! be_quiet && ! nothing) + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' undefined"), + "LC_MESSAGES", "yesexpr")); + messages->yesexpr = "^[yY]"; + } + else if (messages->yesexpr[0] == '\0') + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: value for field `%s' must not be an empty string"), + "LC_MESSAGES", "yesexpr")); + } + else + { + int result; + regex_t re; + + /* Test whether it are correct regular expressions. */ + result = regcomp (&re, messages->yesexpr, REG_EXTENDED); + if (result != 0 && !be_quiet) + { + char errbuf[BUFSIZ]; + + (void) regerror (result, &re, errbuf, BUFSIZ); + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: no correct regular expression for field `%s': %s"), + "LC_MESSAGES", "yesexpr", errbuf)); + } + else if (result != 0) + regfree (&re); + } + + if (messages->noexpr == NULL) + { + if (! be_quiet && ! nothing) + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' undefined"), + "LC_MESSAGES", "noexpr")); + messages->noexpr = "^[nN]"; + } + else if (messages->noexpr[0] == '\0') + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: value for field `%s' must not be an empty string"), + "LC_MESSAGES", "noexpr")); + } + else + { + int result; + regex_t re; + + /* Test whether it are correct regular expressions. */ + result = regcomp (&re, messages->noexpr, REG_EXTENDED); + if (result != 0 && !be_quiet) + { + char errbuf[BUFSIZ]; + + (void) regerror (result, &re, errbuf, BUFSIZ); + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: no correct regular expression for field `%s': %s"), + "LC_MESSAGES", "noexpr", errbuf)); + } + else if (result != 0) + regfree (&re); + } +} + + +void +messages_output (struct localedef_t *locale, const struct charmap_t *charmap, + const char *output_path) +{ + struct locale_messages_t *messages + = locale->categories[LC_MESSAGES].messages; + struct locale_file file; + + init_locale_data (&file, _NL_ITEM_INDEX (_NL_NUM_LC_MESSAGES)); + add_locale_string (&file, messages->yesexpr); + add_locale_string (&file, messages->noexpr); + add_locale_string (&file, messages->yesstr); + add_locale_string (&file, messages->nostr); + add_locale_string (&file, charmap->code_set_name); + write_locale_data (output_path, LC_MESSAGES, "LC_MESSAGES", &file); +} + + +/* The parser for the LC_MESSAGES section of the locale definition. */ +void +messages_read (struct linereader *ldfile, struct localedef_t *result, + const struct charmap_t *charmap, const char *repertoire_name, + int ignore_content) +{ + struct repertoire_t *repertoire = NULL; + struct locale_messages_t *messages; + struct token *now; + enum token_t nowtok; + + /* Get the repertoire we have to use. */ + if (repertoire_name != NULL) + repertoire = repertoire_read (repertoire_name); + + /* The rest of the line containing `LC_MESSAGES' must be free. */ + lr_ignore_rest (ldfile, 1); + + + do + { + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + } + while (nowtok == tok_eol); + + /* If we see `copy' now we are almost done. */ + if (nowtok == tok_copy) + { + handle_copy (ldfile, charmap, repertoire_name, result, tok_lc_messages, + LC_MESSAGES, "LC_MESSAGES", ignore_content); + return; + } + + /* Prepare the data structures. */ + messages_startup (ldfile, result, ignore_content); + messages = result->categories[LC_MESSAGES].messages; + + while (1) + { + struct token *arg; + + /* Of course we don't proceed beyond the end of file. */ + if (nowtok == tok_eof) + break; + + /* Ignore empty lines. */ + if (nowtok == tok_eol) + { + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + continue; + } + + switch (nowtok) + { +#define STR_ELEM(cat) \ + case tok_##cat: \ + /* Ignore the rest of the line if we don't need the input of \ + this line. */ \ + if (ignore_content) \ + { \ + lr_ignore_rest (ldfile, 0); \ + break; \ + } \ + \ + if (messages->cat != NULL) \ + { \ + lr_error (ldfile, _("\ +%s: field `%s' declared more than once"), "LC_MESSAGES", #cat); \ + lr_ignore_rest (ldfile, 0); \ + break; \ + } \ + now = lr_token (ldfile, charmap, result, repertoire, verbose); \ + if (now->tok != tok_string) \ + goto syntax_error; \ + else if (!ignore_content && now->val.str.startmb == NULL) \ + { \ + lr_error (ldfile, _("\ +%s: unknown character in field `%s'"), "LC_MESSAGES", #cat); \ + messages->cat = ""; \ + } \ + else if (!ignore_content) \ + messages->cat = now->val.str.startmb; \ + break + + STR_ELEM (yesexpr); + STR_ELEM (noexpr); + STR_ELEM (yesstr); + STR_ELEM (nostr); + + case tok_end: + /* Next we assume `LC_MESSAGES'. */ + arg = lr_token (ldfile, charmap, result, NULL, verbose); + if (arg->tok == tok_eof) + break; + if (arg->tok == tok_eol) + lr_error (ldfile, _("%s: incomplete `END' line"), "LC_MESSAGES"); + else if (arg->tok != tok_lc_messages) + lr_error (ldfile, _("\ +%1$s: definition does not end with `END %1$s'"), "LC_MESSAGES"); + lr_ignore_rest (ldfile, arg->tok == tok_lc_messages); + return; + + default: + syntax_error: + SYNTAX_ERROR (_("%s: syntax error"), "LC_MESSAGES"); + } + + /* Prepare for the next round. */ + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + } + + /* When we come here we reached the end of the file. */ + lr_error (ldfile, _("%s: premature end of file"), "LC_MESSAGES"); +} diff --git a/REORG.TODO/locale/programs/ld-monetary.c b/REORG.TODO/locale/programs/ld-monetary.c new file mode 100644 index 0000000000..cd50541603 --- /dev/null +++ b/REORG.TODO/locale/programs/ld-monetary.c @@ -0,0 +1,757 @@ +/* Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@gnu.org>, 1995. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <byteswap.h> +#include <langinfo.h> +#include <limits.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <sys/uio.h> + +#include <assert.h> + +#include "localedef.h" +#include "linereader.h" +#include "localeinfo.h" +#include "locfile.h" + + +/* The real definition of the struct for the LC_MONETARY locale. */ +struct locale_monetary_t +{ + const char *int_curr_symbol; + const char *currency_symbol; + const char *mon_decimal_point; + const char *mon_thousands_sep; + uint32_t mon_decimal_point_wc; + uint32_t mon_thousands_sep_wc; + char *mon_grouping; + size_t mon_grouping_len; + const char *positive_sign; + const char *negative_sign; + signed char int_frac_digits; + signed char frac_digits; + signed char p_cs_precedes; + signed char p_sep_by_space; + signed char n_cs_precedes; + signed char n_sep_by_space; + signed char p_sign_posn; + signed char n_sign_posn; + signed char int_p_cs_precedes; + signed char int_p_sep_by_space; + signed char int_n_cs_precedes; + signed char int_n_sep_by_space; + signed char int_p_sign_posn; + signed char int_n_sign_posn; + const char *duo_int_curr_symbol; + const char *duo_currency_symbol; + signed char duo_int_frac_digits; + signed char duo_frac_digits; + signed char duo_p_cs_precedes; + signed char duo_p_sep_by_space; + signed char duo_n_cs_precedes; + signed char duo_n_sep_by_space; + signed char duo_p_sign_posn; + signed char duo_n_sign_posn; + signed char duo_int_p_cs_precedes; + signed char duo_int_p_sep_by_space; + signed char duo_int_n_cs_precedes; + signed char duo_int_n_sep_by_space; + signed char duo_int_p_sign_posn; + signed char duo_int_n_sign_posn; + uint32_t uno_valid_from; + uint32_t uno_valid_to; + uint32_t duo_valid_from; + uint32_t duo_valid_to; + uint32_t conversion_rate[2]; + char *crncystr; +}; + + +/* The content iof the field int_curr_symbol has to be taken from + ISO-4217. We test for correct values. */ +#define DEFINE_INT_CURR(str) str, +static const char *const valid_int_curr[] = + { +# include "../iso-4217.def" + }; +#define NR_VALID_INT_CURR ((sizeof (valid_int_curr) \ + / sizeof (valid_int_curr[0]))) +#undef DEFINE_INT_CURR + + +/* Prototypes for local functions. */ +static int curr_strcmp (const char *s1, const char **s2); + + +static void +monetary_startup (struct linereader *lr, struct localedef_t *locale, + int ignore_content) +{ + if (!ignore_content) + { + struct locale_monetary_t *monetary; + + locale->categories[LC_MONETARY].monetary = monetary = + (struct locale_monetary_t *) xmalloc (sizeof (*monetary)); + + memset (monetary, '\0', sizeof (struct locale_monetary_t)); + + monetary->mon_grouping = NULL; + monetary->mon_grouping_len = 0; + + monetary->int_frac_digits = -2; + monetary->frac_digits = -2; + monetary->p_cs_precedes = -2; + monetary->p_sep_by_space = -2; + monetary->n_cs_precedes = -2; + monetary->n_sep_by_space = -2; + monetary->p_sign_posn = -2; + monetary->n_sign_posn = -2; + monetary->int_p_cs_precedes = -2; + monetary->int_p_sep_by_space = -2; + monetary->int_n_cs_precedes = -2; + monetary->int_n_sep_by_space = -2; + monetary->int_p_sign_posn = -2; + monetary->int_n_sign_posn = -2; + monetary->duo_int_frac_digits = -2; + monetary->duo_frac_digits = -2; + monetary->duo_p_cs_precedes = -2; + monetary->duo_p_sep_by_space = -2; + monetary->duo_n_cs_precedes = -2; + monetary->duo_n_sep_by_space = -2; + monetary->duo_p_sign_posn = -2; + monetary->duo_n_sign_posn = -2; + monetary->duo_int_p_cs_precedes = -2; + monetary->duo_int_p_sep_by_space = -2; + monetary->duo_int_n_cs_precedes = -2; + monetary->duo_int_n_sep_by_space = -2; + monetary->duo_int_p_sign_posn = -2; + monetary->duo_int_n_sign_posn = -2; + } + + if (lr != NULL) + { + lr->translate_strings = 1; + lr->return_widestr = 0; + } +} + + +void +monetary_finish (struct localedef_t *locale, const struct charmap_t *charmap) +{ + struct locale_monetary_t *monetary + = locale->categories[LC_MONETARY].monetary; + int nothing = 0; + + /* Now resolve copying and also handle completely missing definitions. */ + if (monetary == NULL) + { + /* First see whether we were supposed to copy. If yes, find the + actual definition. */ + if (locale->copy_name[LC_MONETARY] != NULL) + { + /* Find the copying locale. This has to happen transitively since + the locale we are copying from might also copying another one. */ + struct localedef_t *from = locale; + + do + from = find_locale (LC_MONETARY, from->copy_name[LC_MONETARY], + from->repertoire_name, charmap); + while (from->categories[LC_MONETARY].monetary == NULL + && from->copy_name[LC_MONETARY] != NULL); + + monetary = locale->categories[LC_MONETARY].monetary + = from->categories[LC_MONETARY].monetary; + } + + /* If there is still no definition issue an warning and create an + empty one. */ + if (monetary == NULL) + { + if (! be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +No definition for %s category found"), "LC_MONETARY")); + monetary_startup (NULL, locale, 0); + monetary = locale->categories[LC_MONETARY].monetary; + nothing = 1; + } + } + +#define TEST_ELEM(cat, initval) \ + if (monetary->cat == NULL) \ + { \ + if (! be_quiet && ! nothing) \ + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"), \ + "LC_MONETARY", #cat)); \ + monetary->cat = initval; \ + } + + TEST_ELEM (int_curr_symbol, ""); + TEST_ELEM (currency_symbol, ""); + TEST_ELEM (mon_decimal_point, "."); + TEST_ELEM (mon_thousands_sep, ""); + TEST_ELEM (positive_sign, ""); + TEST_ELEM (negative_sign, ""); + + /* The international currency symbol must come from ISO 4217. */ + if (monetary->int_curr_symbol != NULL) + { + if (strlen (monetary->int_curr_symbol) != 4) + { + if (! be_quiet && ! nothing) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: value of field `int_curr_symbol' has wrong length"), + "LC_MONETARY")); + } + else + { /* Check the first three characters against ISO 4217 */ + char symbol[4]; + strncpy (symbol, monetary->int_curr_symbol, 3); + symbol[3] = '\0'; + if (bsearch (symbol, valid_int_curr, NR_VALID_INT_CURR, + sizeof (const char *), + (comparison_fn_t) curr_strcmp) == NULL + && !be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: value of field `int_curr_symbol' does \ +not correspond to a valid name in ISO 4217"), + "LC_MONETARY")); + } + } + + /* The decimal point must not be empty. This is not said explicitly + in POSIX but ANSI C (ISO/IEC 9899) says in 4.4.2.1 it has to be + != "". */ + if (monetary->mon_decimal_point == NULL) + { + if (! be_quiet && ! nothing) + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"), + "LC_MONETARY", "mon_decimal_point")); + monetary->mon_decimal_point = "."; + } + else if (monetary->mon_decimal_point[0] == '\0' && ! be_quiet && ! nothing) + { + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: value for field `%s' must not be an empty string"), + "LC_MONETARY", "mon_decimal_point")); + } + if (monetary->mon_decimal_point_wc == L'\0') + monetary->mon_decimal_point_wc = L'.'; + + if (monetary->mon_grouping_len == 0) + { + if (! be_quiet && ! nothing) + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"), + "LC_MONETARY", "mon_grouping")); + + monetary->mon_grouping = (char *) "\177"; + monetary->mon_grouping_len = 1; + } + +#undef TEST_ELEM +#define TEST_ELEM(cat, min, max, initval) \ + if (monetary->cat == -2) \ + { \ + if (! be_quiet && ! nothing) \ + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"), \ + "LC_MONETARY", #cat)); \ + monetary->cat = initval; \ + } \ + else if ((monetary->cat < min || monetary->cat > max) \ + && min < max \ + && !be_quiet && !nothing) \ + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: value for field `%s' must be in range %d...%d"), \ + "LC_MONETARY", #cat, min, max)) + + TEST_ELEM (int_frac_digits, 1, 0, -1); + TEST_ELEM (frac_digits, 1, 0, -1); + TEST_ELEM (p_cs_precedes, -1, 1, -1); + TEST_ELEM (p_sep_by_space, -1, 2, -1); + TEST_ELEM (n_cs_precedes, -1, 1, -1); + TEST_ELEM (n_sep_by_space, -1, 2, -1); + TEST_ELEM (p_sign_posn, -1, 4, -1); + TEST_ELEM (n_sign_posn, -1, 4, -1); + + /* The non-POSIX.2 extensions are optional. */ + if (monetary->duo_int_curr_symbol == NULL) + monetary->duo_int_curr_symbol = monetary->int_curr_symbol; + if (monetary->duo_currency_symbol == NULL) + monetary->duo_currency_symbol = monetary->currency_symbol; + + if (monetary->duo_int_frac_digits == -2) + monetary->duo_int_frac_digits = monetary->int_frac_digits; + if (monetary->duo_frac_digits == -2) + monetary->duo_frac_digits = monetary->frac_digits; + +#undef TEST_ELEM +#define TEST_ELEM(cat, alt, min, max) \ + if (monetary->cat == -2) \ + monetary->cat = monetary->alt; \ + else if ((monetary->cat < min || monetary->cat > max) && !be_quiet \ + && ! nothing) \ + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: value for field `%s' must be in range %d...%d"), \ + "LC_MONETARY", #cat, min, max)) + + TEST_ELEM (int_p_cs_precedes, p_cs_precedes, -1, 1); + TEST_ELEM (int_p_sep_by_space, p_sep_by_space, -1, 2); + TEST_ELEM (int_n_cs_precedes, n_cs_precedes, -1, 1); + TEST_ELEM (int_n_sep_by_space, n_sep_by_space, -1, 2); + TEST_ELEM (int_p_sign_posn, p_sign_posn, -1, 4); + TEST_ELEM (int_n_sign_posn, n_sign_posn, -1, 4); + + TEST_ELEM (duo_p_cs_precedes, p_cs_precedes, -1, 1); + TEST_ELEM (duo_p_sep_by_space, p_sep_by_space, -1, 2); + TEST_ELEM (duo_n_cs_precedes, n_cs_precedes, -1, 1); + TEST_ELEM (duo_n_sep_by_space, n_sep_by_space, -1, 2); + TEST_ELEM (duo_int_p_cs_precedes, int_p_cs_precedes, -1, 1); + TEST_ELEM (duo_int_p_sep_by_space, int_p_sep_by_space, -1, 2); + TEST_ELEM (duo_int_n_cs_precedes, int_n_cs_precedes, -1, 1); + TEST_ELEM (duo_int_n_sep_by_space, int_n_sep_by_space, -1, 2); + TEST_ELEM (duo_p_sign_posn, p_sign_posn, -1, 4); + TEST_ELEM (duo_n_sign_posn, n_sign_posn, -1, 4); + TEST_ELEM (duo_int_p_sign_posn, int_p_sign_posn, -1, 4); + TEST_ELEM (duo_int_n_sign_posn, int_n_sign_posn, -1, 4); + + if (monetary->uno_valid_from == 0) + monetary->uno_valid_from = 10101; + if (monetary->uno_valid_to == 0) + monetary->uno_valid_to = 99991231; + if (monetary->duo_valid_from == 0) + monetary->duo_valid_from = 10101; + if (monetary->duo_valid_to == 0) + monetary->duo_valid_to = 99991231; + + if (monetary->conversion_rate[0] == 0) + { + monetary->conversion_rate[0] = 1; + monetary->conversion_rate[1] = 1; + } + + /* Create the crncystr entry. */ + monetary->crncystr = (char *) xmalloc (strlen (monetary->currency_symbol) + + 2); + monetary->crncystr[0] = monetary->p_cs_precedes ? '-' : '+'; + strcpy (&monetary->crncystr[1], monetary->currency_symbol); +} + + +void +monetary_output (struct localedef_t *locale, const struct charmap_t *charmap, + const char *output_path) +{ + struct locale_monetary_t *monetary + = locale->categories[LC_MONETARY].monetary; + struct locale_file file; + + init_locale_data (&file, _NL_ITEM_INDEX (_NL_NUM_LC_MONETARY)); + add_locale_string (&file, monetary->int_curr_symbol); + add_locale_string (&file, monetary->currency_symbol); + add_locale_string (&file, monetary->mon_decimal_point); + add_locale_string (&file, monetary->mon_thousands_sep); + add_locale_raw_data (&file, monetary->mon_grouping, + monetary->mon_grouping_len); + add_locale_string (&file, monetary->positive_sign); + add_locale_string (&file, monetary->negative_sign); + add_locale_char (&file, monetary->int_frac_digits); + add_locale_char (&file, monetary->frac_digits); + add_locale_char (&file, monetary->p_cs_precedes); + add_locale_char (&file, monetary->p_sep_by_space); + add_locale_char (&file, monetary->n_cs_precedes); + add_locale_char (&file, monetary->n_sep_by_space); + add_locale_char (&file, monetary->p_sign_posn); + add_locale_char (&file, monetary->n_sign_posn); + add_locale_string (&file, monetary->crncystr); + add_locale_char (&file, monetary->int_p_cs_precedes); + add_locale_char (&file, monetary->int_p_sep_by_space); + add_locale_char (&file, monetary->int_n_cs_precedes); + add_locale_char (&file, monetary->int_n_sep_by_space); + add_locale_char (&file, monetary->int_p_sign_posn); + add_locale_char (&file, monetary->int_n_sign_posn); + add_locale_string (&file, monetary->duo_int_curr_symbol); + add_locale_string (&file, monetary->duo_currency_symbol); + add_locale_char (&file, monetary->duo_int_frac_digits); + add_locale_char (&file, monetary->duo_frac_digits); + add_locale_char (&file, monetary->duo_p_cs_precedes); + add_locale_char (&file, monetary->duo_p_sep_by_space); + add_locale_char (&file, monetary->duo_n_cs_precedes); + add_locale_char (&file, monetary->duo_n_sep_by_space); + add_locale_char (&file, monetary->duo_int_p_cs_precedes); + add_locale_char (&file, monetary->duo_int_p_sep_by_space); + add_locale_char (&file, monetary->duo_int_n_cs_precedes); + add_locale_char (&file, monetary->duo_int_n_sep_by_space); + add_locale_char (&file, monetary->duo_p_sign_posn); + add_locale_char (&file, monetary->duo_n_sign_posn); + add_locale_char (&file, monetary->duo_int_p_sign_posn); + add_locale_char (&file, monetary->duo_int_n_sign_posn); + add_locale_uint32 (&file, monetary->uno_valid_from); + add_locale_uint32 (&file, monetary->uno_valid_to); + add_locale_uint32 (&file, monetary->duo_valid_from); + add_locale_uint32 (&file, monetary->duo_valid_to); + add_locale_uint32_array (&file, monetary->conversion_rate, 2); + add_locale_uint32 (&file, monetary->mon_decimal_point_wc); + add_locale_uint32 (&file, monetary->mon_thousands_sep_wc); + add_locale_string (&file, charmap->code_set_name); + write_locale_data (output_path, LC_MONETARY, "LC_MONETARY", &file); +} + + +static int +curr_strcmp (const char *s1, const char **s2) +{ + return strcmp (s1, *s2); +} + + +/* The parser for the LC_MONETARY section of the locale definition. */ +void +monetary_read (struct linereader *ldfile, struct localedef_t *result, + const struct charmap_t *charmap, const char *repertoire_name, + int ignore_content) +{ + struct repertoire_t *repertoire = NULL; + struct locale_monetary_t *monetary; + struct token *now; + enum token_t nowtok; + + /* Get the repertoire we have to use. */ + if (repertoire_name != NULL) + repertoire = repertoire_read (repertoire_name); + + /* The rest of the line containing `LC_MONETARY' must be free. */ + lr_ignore_rest (ldfile, 1); + + do + { + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + } + while (nowtok == tok_eol); + + /* If we see `copy' now we are almost done. */ + if (nowtok == tok_copy) + { + handle_copy (ldfile, charmap, repertoire_name, result, tok_lc_monetary, + LC_MONETARY, "LC_MONETARY", ignore_content); + return; + } + + /* Prepare the data structures. */ + monetary_startup (ldfile, result, ignore_content); + monetary = result->categories[LC_MONETARY].monetary; + + while (1) + { + /* Of course we don't proceed beyond the end of file. */ + if (nowtok == tok_eof) + break; + + /* Ignore empty lines. */ + if (nowtok == tok_eol) + { + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + continue; + } + + switch (nowtok) + { +#define STR_ELEM(cat) \ + case tok_##cat: \ + /* Ignore the rest of the line if we don't need the input of \ + this line. */ \ + if (ignore_content) \ + { \ + lr_ignore_rest (ldfile, 0); \ + break; \ + } \ + \ + now = lr_token (ldfile, charmap, result, NULL, verbose); \ + if (now->tok != tok_string) \ + goto err_label; \ + else if (monetary->cat != NULL) \ + lr_error (ldfile, _("%s: field `%s' declared more than once"), \ + "LC_MONETARY", #cat); \ + else if (!ignore_content && now->val.str.startmb == NULL) \ + { \ + lr_error (ldfile, _("\ +%s: unknown character in field `%s'"), "LC_MONETARY", #cat); \ + monetary->cat = ""; \ + } \ + else if (!ignore_content) \ + monetary->cat = now->val.str.startmb; \ + lr_ignore_rest (ldfile, 1); \ + break + + STR_ELEM (int_curr_symbol); + STR_ELEM (currency_symbol); + STR_ELEM (positive_sign); + STR_ELEM (negative_sign); + STR_ELEM (duo_int_curr_symbol); + STR_ELEM (duo_currency_symbol); + +#define STR_ELEM_WC(cat) \ + case tok_##cat: \ + /* Ignore the rest of the line if we don't need the input of \ + this line. */ \ + if (ignore_content) \ + { \ + lr_ignore_rest (ldfile, 0); \ + break; \ + } \ + \ + ldfile->return_widestr = 1; \ + now = lr_token (ldfile, charmap, result, repertoire, verbose); \ + if (now->tok != tok_string) \ + goto err_label; \ + if (monetary->cat != NULL) \ + lr_error (ldfile, _("\ +%s: field `%s' declared more than once"), "LC_MONETARY", #cat); \ + else if (!ignore_content && now->val.str.startmb == NULL) \ + { \ + lr_error (ldfile, _("\ +%s: unknown character in field `%s'"), "LC_MONETARY", #cat); \ + monetary->cat = ""; \ + monetary->cat##_wc = L'\0'; \ + } \ + else if (now->val.str.startwc != NULL && now->val.str.lenwc > 2) \ + { \ + lr_error (ldfile, _("\ +%s: value for field `%s' must be a single character"), "LC_MONETARY", #cat); \ + } \ + else if (!ignore_content) \ + { \ + monetary->cat = now->val.str.startmb; \ + \ + if (now->val.str.startwc != NULL) \ + monetary->cat##_wc = *now->val.str.startwc; \ + } \ + ldfile->return_widestr = 0; \ + break + + STR_ELEM_WC (mon_decimal_point); + STR_ELEM_WC (mon_thousands_sep); + +#define INT_ELEM(cat) \ + case tok_##cat: \ + /* Ignore the rest of the line if we don't need the input of \ + this line. */ \ + if (ignore_content) \ + { \ + lr_ignore_rest (ldfile, 0); \ + break; \ + } \ + \ + now = lr_token (ldfile, charmap, result, NULL, verbose); \ + if (now->tok != tok_minus1 && now->tok != tok_number) \ + goto err_label; \ + else if (monetary->cat != -2) \ + lr_error (ldfile, _("%s: field `%s' declared more than once"), \ + "LC_MONETARY", #cat); \ + else if (!ignore_content) \ + monetary->cat = now->tok == tok_minus1 ? -1 : now->val.num; \ + break + + INT_ELEM (int_frac_digits); + INT_ELEM (frac_digits); + INT_ELEM (p_cs_precedes); + INT_ELEM (p_sep_by_space); + INT_ELEM (n_cs_precedes); + INT_ELEM (n_sep_by_space); + INT_ELEM (p_sign_posn); + INT_ELEM (n_sign_posn); + INT_ELEM (int_p_cs_precedes); + INT_ELEM (int_p_sep_by_space); + INT_ELEM (int_n_cs_precedes); + INT_ELEM (int_n_sep_by_space); + INT_ELEM (int_p_sign_posn); + INT_ELEM (int_n_sign_posn); + INT_ELEM (duo_int_frac_digits); + INT_ELEM (duo_frac_digits); + INT_ELEM (duo_p_cs_precedes); + INT_ELEM (duo_p_sep_by_space); + INT_ELEM (duo_n_cs_precedes); + INT_ELEM (duo_n_sep_by_space); + INT_ELEM (duo_p_sign_posn); + INT_ELEM (duo_n_sign_posn); + INT_ELEM (duo_int_p_cs_precedes); + INT_ELEM (duo_int_p_sep_by_space); + INT_ELEM (duo_int_n_cs_precedes); + INT_ELEM (duo_int_n_sep_by_space); + INT_ELEM (duo_int_p_sign_posn); + INT_ELEM (duo_int_n_sign_posn); + INT_ELEM (uno_valid_from); + INT_ELEM (uno_valid_to); + INT_ELEM (duo_valid_from); + INT_ELEM (duo_valid_to); + + case tok_mon_grouping: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + now = lr_token (ldfile, charmap, result, NULL, verbose); + if (now->tok != tok_minus1 && now->tok != tok_number) + goto err_label; + else + { + size_t act = 0; + size_t max = 10; + char *grouping = ignore_content ? NULL : xmalloc (max); + + do + { + if (act + 1 >= max) + { + max *= 2; + grouping = xrealloc (grouping, max); + } + + if (act > 0 && grouping[act - 1] == '\177') + { + lr_error (ldfile, _("\ +%s: `-1' must be last entry in `%s' field"), + "LC_MONETARY", "mon_grouping"); + lr_ignore_rest (ldfile, 0); + break; + } + + if (now->tok == tok_minus1) + { + if (!ignore_content) + grouping[act++] = '\177'; + } + else if (now->val.num == 0) + { + /* A value of 0 disables grouping from here on but + we must not store a NUL character since this + terminates the string. Use something different + which must not be used otherwise. */ + if (!ignore_content) + grouping[act++] = '\377'; + } + else if (now->val.num > 126) + lr_error (ldfile, _("\ +%s: values for field `%s' must be smaller than 127"), + "LC_MONETARY", "mon_grouping"); + else if (!ignore_content) + grouping[act++] = now->val.num; + + /* Next must be semicolon. */ + now = lr_token (ldfile, charmap, result, NULL, verbose); + if (now->tok != tok_semicolon) + break; + + now = lr_token (ldfile, charmap, result, NULL, verbose); + } + while (now->tok == tok_minus1 || now->tok == tok_number); + + if (now->tok != tok_eol) + goto err_label; + + if (!ignore_content) + { + /* A single -1 means no grouping. */ + if (act == 1 && grouping[0] == '\177') + act--; + grouping[act++] = '\0'; + + monetary->mon_grouping = xrealloc (grouping, act); + monetary->mon_grouping_len = act; + } + } + break; + + case tok_conversion_rate: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + now = lr_token (ldfile, charmap, result, NULL, verbose); + if (now->tok != tok_number) + goto err_label; + if (now->val.num == 0) + { + invalid_conversion_rate: + lr_error (ldfile, _("conversion rate value cannot be zero")); + if (!ignore_content) + { + monetary->conversion_rate[0] = 1; + monetary->conversion_rate[1] = 1; + } + break; + } + if (!ignore_content) + monetary->conversion_rate[0] = now->val.num; + /* Next must be a semicolon. */ + now = lr_token (ldfile, charmap, result, NULL, verbose); + if (now->tok != tok_semicolon) + goto err_label; + /* And another number. */ + now = lr_token (ldfile, charmap, result, NULL, verbose); + if (now->tok != tok_number) + goto err_label; + if (now->val.num == 0) + goto invalid_conversion_rate; + if (!ignore_content) + monetary->conversion_rate[1] = now->val.num; + /* The rest of the line must be empty. */ + lr_ignore_rest (ldfile, 1); + break; + + case tok_end: + /* Next we assume `LC_MONETARY'. */ + now = lr_token (ldfile, charmap, result, NULL, verbose); + if (now->tok == tok_eof) + break; + if (now->tok == tok_eol) + lr_error (ldfile, _("%s: incomplete `END' line"), "LC_MONETARY"); + else if (now->tok != tok_lc_monetary) + lr_error (ldfile, _("\ +%1$s: definition does not end with `END %1$s'"), "LC_MONETARY"); + lr_ignore_rest (ldfile, now->tok == tok_lc_monetary); + return; + + default: + err_label: + SYNTAX_ERROR (_("%s: syntax error"), "LC_MONETARY"); + } + + /* Prepare for the next round. */ + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + } + + /* When we come here we reached the end of the file. */ + lr_error (ldfile, _("%s: premature end of file"), "LC_MONETARY"); +} diff --git a/REORG.TODO/locale/programs/ld-name.c b/REORG.TODO/locale/programs/ld-name.c new file mode 100644 index 0000000000..ee50ae7322 --- /dev/null +++ b/REORG.TODO/locale/programs/ld-name.c @@ -0,0 +1,281 @@ +/* Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <langinfo.h> +#include <string.h> +#include <stdint.h> +#include <sys/uio.h> + +#include <assert.h> + +#include "localedef.h" +#include "localeinfo.h" +#include "locfile.h" + + +/* The real definition of the struct for the LC_NAME locale. */ +struct locale_name_t +{ + const char *name_fmt; + const char *name_gen; + const char *name_mr; + const char *name_mrs; + const char *name_miss; + const char *name_ms; +}; + + +static void +name_startup (struct linereader *lr, struct localedef_t *locale, + int ignore_content) +{ + if (!ignore_content) + locale->categories[LC_NAME].name = + (struct locale_name_t *) xcalloc (1, sizeof (struct locale_name_t)); + + if (lr != NULL) + { + lr->translate_strings = 1; + lr->return_widestr = 0; + } +} + + +void +name_finish (struct localedef_t *locale, const struct charmap_t *charmap) +{ + struct locale_name_t *name = locale->categories[LC_NAME].name; + int nothing = 0; + + /* Now resolve copying and also handle completely missing definitions. */ + if (name == NULL) + { + /* First see whether we were supposed to copy. If yes, find the + actual definition. */ + if (locale->copy_name[LC_NAME] != NULL) + { + /* Find the copying locale. This has to happen transitively since + the locale we are copying from might also copying another one. */ + struct localedef_t *from = locale; + + do + from = find_locale (LC_NAME, from->copy_name[LC_NAME], + from->repertoire_name, charmap); + while (from->categories[LC_NAME].name == NULL + && from->copy_name[LC_NAME] != NULL); + + name = locale->categories[LC_NAME].name + = from->categories[LC_NAME].name; + } + + /* If there is still no definition issue an warning and create an + empty one. */ + if (name == NULL) + { + if (! be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +No definition for %s category found"), "LC_NAME")); + name_startup (NULL, locale, 0); + name = locale->categories[LC_NAME].name; + nothing = 1; + } + } + + if (name->name_fmt == NULL) + { + if (! nothing) + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"), + "LC_NAME", "name_fmt")); + /* Use as the default value the value of the i18n locale. */ + name->name_fmt = "%p%t%g%t%m%t%f"; + } + else + { + /* We must check whether the format string contains only the + allowed escape sequences. */ + const char *cp = name->name_fmt; + + if (*cp == '\0') + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' must not be empty"), + "LC_NAME", "name_fmt")); + else + while (*cp != '\0') + { + if (*cp == '%') + { + if (*++cp == 'R') + /* Romanize-flag. */ + ++cp; + if (strchr ("dfFgGlomMpsSt", *cp) == NULL) + { + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: invalid escape sequence in field `%s'"), "LC_NAME", "name_fmt")); + break; + } + } + ++cp; + } + } + +#define TEST_ELEM(cat) \ + if (name->cat == NULL) \ + { \ + if (verbose && ! nothing) \ + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"), \ + "LC_NAME", #cat)); \ + name->cat = ""; \ + } + + TEST_ELEM (name_gen); + TEST_ELEM (name_mr); + TEST_ELEM (name_mrs); + TEST_ELEM (name_miss); + TEST_ELEM (name_ms); +} + + +void +name_output (struct localedef_t *locale, const struct charmap_t *charmap, + const char *output_path) +{ + struct locale_name_t *name = locale->categories[LC_NAME].name; + struct locale_file file; + + init_locale_data (&file, _NL_ITEM_INDEX (_NL_NUM_LC_NAME)); + add_locale_string (&file, name->name_fmt); + add_locale_string (&file, name->name_gen); + add_locale_string (&file, name->name_mr); + add_locale_string (&file, name->name_mrs); + add_locale_string (&file, name->name_miss); + add_locale_string (&file, name->name_ms); + add_locale_string (&file, charmap->code_set_name); + write_locale_data (output_path, LC_NAME, "LC_NAME", &file); +} + + +/* The parser for the LC_NAME section of the locale definition. */ +void +name_read (struct linereader *ldfile, struct localedef_t *result, + const struct charmap_t *charmap, const char *repertoire_name, + int ignore_content) +{ + struct locale_name_t *name; + struct token *now; + struct token *arg; + enum token_t nowtok; + + /* The rest of the line containing `LC_NAME' must be empty. */ + lr_ignore_rest (ldfile, 1); + + do + { + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + } + while (nowtok == tok_eol); + + /* If we see `copy' now we are almost done. */ + if (nowtok == tok_copy) + { + handle_copy (ldfile, charmap, repertoire_name, result, tok_lc_name, + LC_NAME, "LC_NAME", ignore_content); + return; + } + + /* Prepare the data structures. */ + name_startup (ldfile, result, ignore_content); + name = result->categories[LC_NAME].name; + + while (1) + { + /* Of course we don't proceed beyond the end of file. */ + if (nowtok == tok_eof) + break; + + /* Ignore empty lines. */ + if (nowtok == tok_eol) + { + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + continue; + } + + switch (nowtok) + { +#define STR_ELEM(cat) \ + case tok_##cat: \ + /* Ignore the rest of the line if we don't need the input of \ + this line. */ \ + if (ignore_content) \ + { \ + lr_ignore_rest (ldfile, 0); \ + break; \ + } \ + \ + arg = lr_token (ldfile, charmap, result, NULL, verbose); \ + if (arg->tok != tok_string) \ + goto err_label; \ + if (name->cat != NULL) \ + lr_error (ldfile, _("%s: field `%s' declared more than once"), \ + "LC_NAME", #cat); \ + else if (!ignore_content && arg->val.str.startmb == NULL) \ + { \ + lr_error (ldfile, _("%s: unknown character in field `%s'"), \ + "LC_NAME", #cat); \ + name->cat = ""; \ + } \ + else if (!ignore_content) \ + name->cat = arg->val.str.startmb; \ + break + + STR_ELEM (name_fmt); + STR_ELEM (name_gen); + STR_ELEM (name_mr); + STR_ELEM (name_mrs); + STR_ELEM (name_miss); + STR_ELEM (name_ms); + + case tok_end: + /* Next we assume `LC_NAME'. */ + arg = lr_token (ldfile, charmap, result, NULL, verbose); + if (arg->tok == tok_eof) + break; + if (arg->tok == tok_eol) + lr_error (ldfile, _("%s: incomplete `END' line"), "LC_NAME"); + else if (arg->tok != tok_lc_name) + lr_error (ldfile, _("\ +%1$s: definition does not end with `END %1$s'"), "LC_NAME"); + lr_ignore_rest (ldfile, arg->tok == tok_lc_name); + return; + + default: + err_label: + SYNTAX_ERROR (_("%s: syntax error"), "LC_NAME"); + } + + /* Prepare for the next round. */ + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + } + + /* When we come here we reached the end of the file. */ + lr_error (ldfile, _("%s: premature end of file"), "LC_NAME"); +} diff --git a/REORG.TODO/locale/programs/ld-numeric.c b/REORG.TODO/locale/programs/ld-numeric.c new file mode 100644 index 0000000000..a81ff04f93 --- /dev/null +++ b/REORG.TODO/locale/programs/ld-numeric.c @@ -0,0 +1,343 @@ +/* Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@gnu.org>, 1995. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <langinfo.h> +#include <string.h> +#include <stdint.h> +#include <sys/uio.h> + +#include <assert.h> + +#include "localedef.h" +#include "linereader.h" +#include "localeinfo.h" +#include "locfile.h" + + +/* The real definition of the struct for the LC_NUMERIC locale. */ +struct locale_numeric_t +{ + const char *decimal_point; + const char *thousands_sep; + char *grouping; + size_t grouping_len; + uint32_t decimal_point_wc; + uint32_t thousands_sep_wc; +}; + + +static void +numeric_startup (struct linereader *lr, struct localedef_t *locale, + int ignore_content) +{ + if (!ignore_content) + { + locale->categories[LC_NUMERIC].numeric = + (struct locale_numeric_t *) xcalloc (1, + sizeof (struct locale_numeric_t)); + } + + if (lr != NULL) + { + lr->translate_strings = 1; + lr->return_widestr = 0; + } +} + + +void +numeric_finish (struct localedef_t *locale, const struct charmap_t *charmap) +{ + struct locale_numeric_t *numeric = locale->categories[LC_NUMERIC].numeric; + int nothing = 0; + + /* Now resolve copying and also handle completely missing definitions. */ + if (numeric == NULL) + { + /* First see whether we were supposed to copy. If yes, find the + actual definition. */ + if (locale->copy_name[LC_NUMERIC] != NULL) + { + /* Find the copying locale. This has to happen transitively since + the locale we are copying from might also copying another one. */ + struct localedef_t *from = locale; + + do + from = find_locale (LC_NUMERIC, from->copy_name[LC_NUMERIC], + from->repertoire_name, charmap); + while (from->categories[LC_NUMERIC].numeric == NULL + && from->copy_name[LC_NUMERIC] != NULL); + + numeric = locale->categories[LC_NUMERIC].numeric + = from->categories[LC_NUMERIC].numeric; + } + + /* If there is still no definition issue an warning and create an + empty one. */ + if (numeric == NULL) + { + if (! be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +No definition for %s category found"), "LC_NUMERIC")); + numeric_startup (NULL, locale, 0); + numeric = locale->categories[LC_NUMERIC].numeric; + nothing = 1; + } + } + + /* The decimal point must not be empty. This is not said explicitly + in POSIX but ANSI C (ISO/IEC 9899) says in 4.4.2.1 it has to be + != "". */ + if (numeric->decimal_point == NULL) + { + if (! be_quiet && ! nothing) + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"), + "LC_NUMERIC", "decimal_point")); + numeric->decimal_point = "."; + } + else if (numeric->decimal_point[0] == '\0' && ! be_quiet && ! nothing) + { + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: value for field `%s' must not be an empty string"), + "LC_NUMERIC", "decimal_point")); + } + if (numeric->decimal_point_wc == L'\0') + numeric->decimal_point_wc = L'.'; + + if (numeric->grouping_len == 0 && ! be_quiet && ! nothing) + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"), + "LC_NUMERIC", "grouping")); +} + + +void +numeric_output (struct localedef_t *locale, const struct charmap_t *charmap, + const char *output_path) +{ + struct locale_numeric_t *numeric = locale->categories[LC_NUMERIC].numeric; + struct locale_file file; + + init_locale_data (&file, _NL_ITEM_INDEX (_NL_NUM_LC_NUMERIC)); + add_locale_string (&file, numeric->decimal_point ?: ""); + add_locale_string (&file, numeric->thousands_sep ?: ""); + add_locale_raw_data (&file, numeric->grouping, numeric->grouping_len); + add_locale_uint32 (&file, numeric->decimal_point_wc); + add_locale_uint32 (&file, numeric->thousands_sep_wc); + add_locale_string (&file, charmap->code_set_name); + write_locale_data (output_path, LC_NUMERIC, "LC_NUMERIC", &file); +} + + +/* The parser for the LC_NUMERIC section of the locale definition. */ +void +numeric_read (struct linereader *ldfile, struct localedef_t *result, + const struct charmap_t *charmap, const char *repertoire_name, + int ignore_content) +{ + struct repertoire_t *repertoire = NULL; + struct locale_numeric_t *numeric; + struct token *now; + enum token_t nowtok; + + /* Get the repertoire we have to use. */ + if (repertoire_name != NULL) + repertoire = repertoire_read (repertoire_name); + + /* The rest of the line containing `LC_NUMERIC' must be free. */ + lr_ignore_rest (ldfile, 1); + + + do + { + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + } + while (nowtok == tok_eol); + + /* If we see `copy' now we are almost done. */ + if (nowtok == tok_copy) + { + handle_copy (ldfile, charmap, repertoire_name, result, tok_lc_numeric, + LC_NUMERIC, "LC_NUMERIC", ignore_content); + return; + } + + /* Prepare the data structures. */ + numeric_startup (ldfile, result, ignore_content); + numeric = result->categories[LC_NUMERIC].numeric; + + while (1) + { + /* Of course we don't proceed beyond the end of file. */ + if (nowtok == tok_eof) + break; + + /* Ingore empty lines. */ + if (nowtok == tok_eol) + { + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + continue; + } + + switch (nowtok) + { +#define STR_ELEM(cat) \ + case tok_##cat: \ + /* Ignore the rest of the line if we don't need the input of \ + this line. */ \ + if (ignore_content) \ + { \ + lr_ignore_rest (ldfile, 0); \ + break; \ + } \ + \ + ldfile->return_widestr = 1; \ + now = lr_token (ldfile, charmap, result, repertoire, verbose); \ + if (now->tok != tok_string) \ + goto err_label; \ + if (numeric->cat != NULL) \ + lr_error (ldfile, _("\ +%s: field `%s' declared more than once"), "LC_NUMERIC", #cat); \ + else if (!ignore_content && now->val.str.startmb == NULL) \ + { \ + lr_error (ldfile, _("\ +%s: unknown character in field `%s'"), "LC_NUMERIC", #cat); \ + numeric->cat = ""; \ + numeric->cat##_wc = L'\0'; \ + } \ + else if (now->val.str.startwc != NULL && now->val.str.lenwc > 2) \ + { \ + lr_error (ldfile, _("\ +%s: value for field `%s' must be a single character"), "LC_NUMERIC", #cat); \ + } \ + else if (!ignore_content) \ + { \ + numeric->cat = now->val.str.startmb; \ + \ + if (now->val.str.startwc != NULL) \ + numeric->cat##_wc = *now->val.str.startwc; \ + } \ + ldfile->return_widestr = 0; \ + break + + STR_ELEM (decimal_point); + STR_ELEM (thousands_sep); + + case tok_grouping: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + now = lr_token (ldfile, charmap, result, NULL, verbose); + if (now->tok != tok_minus1 && now->tok != tok_number) + goto err_label; + else + { + size_t act = 0; + size_t max = 10; + char *grouping = xmalloc (max); + + do + { + if (act + 1 >= max) + { + max *= 2; + grouping = xrealloc (grouping, max); + } + + if (act > 0 && grouping[act - 1] == '\177') + { + lr_error (ldfile, _("\ +%s: `-1' must be last entry in `%s' field"), "LC_NUMERIC", "grouping"); + lr_ignore_rest (ldfile, 0); + break; + } + + if (now->tok == tok_minus1) + grouping[act++] = '\177'; + else if (now->val.num == 0) + { + /* A value of 0 disables grouping from here on but + we must not store a NUL character since this + terminates the string. Use something different + which must not be used otherwise. */ + grouping[act++] = '\377'; + } + else if (now->val.num > 126) + lr_error (ldfile, _("\ +%s: values for field `%s' must be smaller than 127"), + "LC_NUMERIC", "grouping"); + else + grouping[act++] = now->val.num; + + /* Next must be semicolon. */ + now = lr_token (ldfile, charmap, result, NULL, verbose); + if (now->tok != tok_semicolon) + break; + + now = lr_token (ldfile, charmap, result, NULL, verbose); + } + while (now->tok == tok_minus1 || now->tok == tok_number); + + if (now->tok != tok_eol) + goto err_label; + + /* A single -1 means no grouping. */ + if (act == 1 && grouping[0] == '\177') + act--; + grouping[act++] = '\0'; + + numeric->grouping = xrealloc (grouping, act); + numeric->grouping_len = act; + } + break; + + case tok_end: + /* Next we assume `LC_NUMERIC'. */ + now = lr_token (ldfile, charmap, result, NULL, verbose); + if (now->tok == tok_eof) + break; + if (now->tok == tok_eol) + lr_error (ldfile, _("%s: incomplete `END' line"), "LC_NUMERIC"); + else if (now->tok != tok_lc_numeric) + lr_error (ldfile, _("\ +%1$s: definition does not end with `END %1$s'"), "LC_NUMERIC"); + lr_ignore_rest (ldfile, now->tok == tok_lc_numeric); + return; + + default: + err_label: + SYNTAX_ERROR (_("%s: syntax error"), "LC_NUMERIC"); + } + + /* Prepare for the next round. */ + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + } + + /* When we come here we reached the end of the file. */ + lr_error (ldfile, _("%s: premature end of file"), "LC_NUMERIC"); +} diff --git a/REORG.TODO/locale/programs/ld-paper.c b/REORG.TODO/locale/programs/ld-paper.c new file mode 100644 index 0000000000..df7ce12036 --- /dev/null +++ b/REORG.TODO/locale/programs/ld-paper.c @@ -0,0 +1,231 @@ +/* Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <error.h> +#include <langinfo.h> +#include <string.h> +#include <stdint.h> +#include <sys/uio.h> + +#include <assert.h> + +#include "localedef.h" +#include "localeinfo.h" +#include "locfile.h" + + +/* The real definition of the struct for the LC_PAPER locale. */ +struct locale_paper_t +{ + uint32_t height; + uint32_t width; +}; + + +static void +paper_startup (struct linereader *lr, struct localedef_t *locale, + int ignore_content) +{ + if (!ignore_content) + locale->categories[LC_PAPER].paper = + (struct locale_paper_t *) xcalloc (1, sizeof (struct locale_paper_t)); + + if (lr != NULL) + { + lr->translate_strings = 1; + lr->return_widestr = 0; + } +} + + +void +paper_finish (struct localedef_t *locale, const struct charmap_t *charmap) +{ + struct locale_paper_t *paper = locale->categories[LC_PAPER].paper; + int nothing = 0; + + /* Now resolve copying and also handle completely missing definitions. */ + if (paper == NULL) + { + /* First see whether we were supposed to copy. If yes, find the + actual definition. */ + if (locale->copy_name[LC_PAPER] != NULL) + { + /* Find the copying locale. This has to happen transitively since + the locale we are copying from might also copying another one. */ + struct localedef_t *from = locale; + + do + from = find_locale (LC_PAPER, from->copy_name[LC_PAPER], + from->repertoire_name, charmap); + while (from->categories[LC_PAPER].paper == NULL + && from->copy_name[LC_PAPER] != NULL); + + paper = locale->categories[LC_PAPER].paper + = from->categories[LC_PAPER].paper; + } + + /* If there is still no definition issue an warning and create an + empty one. */ + if (paper == NULL) + { + if (! be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +No definition for %s category found"), "LC_PAPER")); + paper_startup (NULL, locale, 0); + paper = locale->categories[LC_PAPER].paper; + nothing = 1; + } + } + + if (paper->height == 0) + { + if (! nothing) + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"), + "LC_PAPER", "height")); + /* Use as default values the values from the i18n locale. */ + paper->height = 297; + } + + if (paper->width == 0) + { + if (! nothing) + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"), + "LC_PAPER", "width")); + /* Use as default values the values from the i18n locale. */ + paper->width = 210; + } +} + + +void +paper_output (struct localedef_t *locale, const struct charmap_t *charmap, + const char *output_path) +{ + struct locale_paper_t *paper = locale->categories[LC_PAPER].paper; + struct locale_file file; + + init_locale_data (&file, _NL_ITEM_INDEX (_NL_NUM_LC_PAPER)); + add_locale_uint32 (&file, paper->height); + add_locale_uint32 (&file, paper->width); + add_locale_string (&file, charmap->code_set_name); + write_locale_data (output_path, LC_PAPER, "LC_PAPER", &file); +} + + +/* The parser for the LC_PAPER section of the locale definition. */ +void +paper_read (struct linereader *ldfile, struct localedef_t *result, + const struct charmap_t *charmap, const char *repertoire_name, + int ignore_content) +{ + struct locale_paper_t *paper; + struct token *now; + struct token *arg; + enum token_t nowtok; + + /* The rest of the line containing `LC_PAPER' must be empty. */ + lr_ignore_rest (ldfile, 1); + + do + { + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + } + while (nowtok == tok_eol); + + /* If we see `copy' now we are almost done. */ + if (nowtok == tok_copy) + { + handle_copy (ldfile, charmap, repertoire_name, result, tok_lc_paper, + LC_PAPER, "LC_PAPER", ignore_content); + return; + } + + /* Prepare the data structures. */ + paper_startup (ldfile, result, ignore_content); + paper = result->categories[LC_PAPER].paper; + + while (1) + { + /* Of course we don't proceed beyond the end of file. */ + if (nowtok == tok_eof) + break; + + /* Ingore empty lines. */ + if (nowtok == tok_eol) + { + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + continue; + } + + switch (nowtok) + { +#define INT_ELEM(cat) \ + case tok_##cat: \ + /* Ignore the rest of the line if we don't need the input of \ + this line. */ \ + if (ignore_content) \ + { \ + lr_ignore_rest (ldfile, 0); \ + break; \ + } \ + \ + arg = lr_token (ldfile, charmap, result, NULL, verbose); \ + if (arg->tok != tok_number) \ + goto err_label; \ + else if (paper->cat != 0) \ + lr_error (ldfile, _("%s: field `%s' declared more than once"), \ + "LC_PAPER", #cat); \ + else if (!ignore_content) \ + paper->cat = arg->val.num; \ + break + + INT_ELEM (height); + INT_ELEM (width); + + case tok_end: + /* Next we assume `LC_PAPER'. */ + arg = lr_token (ldfile, charmap, result, NULL, verbose); + if (arg->tok == tok_eof) + break; + if (arg->tok == tok_eol) + lr_error (ldfile, _("%s: incomplete `END' line"), "LC_PAPER"); + else if (arg->tok != tok_lc_paper) + lr_error (ldfile, _("\ +%1$s: definition does not end with `END %1$s'"), "LC_PAPER"); + lr_ignore_rest (ldfile, arg->tok == tok_lc_paper); + return; + + default: + err_label: + SYNTAX_ERROR (_("%s: syntax error"), "LC_PAPER"); + } + + /* Prepare for the next round. */ + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + } + + /* When we come here we reached the end of the file. */ + lr_error (ldfile, _("%s: premature end of file"), "LC_PAPER"); +} diff --git a/REORG.TODO/locale/programs/ld-telephone.c b/REORG.TODO/locale/programs/ld-telephone.c new file mode 100644 index 0000000000..b62280aeec --- /dev/null +++ b/REORG.TODO/locale/programs/ld-telephone.c @@ -0,0 +1,295 @@ +/* Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <error.h> +#include <langinfo.h> +#include <string.h> +#include <stdint.h> +#include <sys/uio.h> + +#include <assert.h> + +#include "localedef.h" +#include "localeinfo.h" +#include "locfile.h" + + +/* The real definition of the struct for the LC_TELEPHONE locale. */ +struct locale_telephone_t +{ + const char *tel_int_fmt; + const char *tel_dom_fmt; + const char *int_select; + const char *int_prefix; +}; + + +static void +telephone_startup (struct linereader *lr, struct localedef_t *locale, + int ignore_content) +{ + if (!ignore_content) + locale->categories[LC_TELEPHONE].telephone = (struct locale_telephone_t *) + xcalloc (1, sizeof (struct locale_telephone_t)); + + if (lr != NULL) + { + lr->translate_strings = 1; + lr->return_widestr = 0; + } +} + + +void +telephone_finish (struct localedef_t *locale, const struct charmap_t *charmap) +{ + struct locale_telephone_t *telephone = + locale->categories[LC_TELEPHONE].telephone; + int nothing = 0; + + /* Now resolve copying and also handle completely missing definitions. */ + if (telephone == NULL) + { + /* First see whether we were supposed to copy. If yes, find the + actual definition. */ + if (locale->copy_name[LC_TELEPHONE] != NULL) + { + /* Find the copying locale. This has to happen transitively since + the locale we are copying from might also copying another one. */ + struct localedef_t *from = locale; + + do + from = find_locale (LC_TELEPHONE, from->copy_name[LC_TELEPHONE], + from->repertoire_name, charmap); + while (from->categories[LC_TELEPHONE].telephone == NULL + && from->copy_name[LC_TELEPHONE] != NULL); + + telephone = locale->categories[LC_TELEPHONE].telephone + = from->categories[LC_TELEPHONE].telephone; + } + + /* If there is still no definition issue an warning and create an + empty one. */ + if (telephone == NULL) + { + if (! be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +No definition for %s category found"), "LC_TELEPHONE")); + telephone_startup (NULL, locale, 0); + telephone = locale->categories[LC_TELEPHONE].telephone; + nothing = 1; + } + } + + if (telephone->tel_int_fmt == NULL) + { + if (! nothing) + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"), + "LC_TELEPHONE", "tel_int_fmt")); + /* Use as the default value the value of the i18n locale. */ + telephone->tel_int_fmt = "+%c %a%t%l"; + } + else + { + /* We must check whether the format string contains only the + allowed escape sequences. */ + const char *cp = telephone->tel_int_fmt; + + if (*cp == '\0') + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' must not be empty"), + "LC_TELEPHONE", "tel_int_fmt")); + else + while (*cp != '\0') + { + if (*cp == '%') + { + if (strchr ("aAcCelt", *++cp) == NULL) + { + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: invalid escape sequence in field `%s'"), "LC_TELEPHONE", "tel_int_fmt")); + break; + } + } + ++cp; + } + } + + if (telephone->tel_dom_fmt == NULL) + telephone->tel_dom_fmt = ""; + else if (telephone->tel_dom_fmt[0] != '\0') + { + /* We must check whether the format string contains only the + allowed escape sequences. */ + const char *cp = telephone->tel_dom_fmt; + + while (*cp != '\0') + { + if (*cp == '%') + { + if (strchr ("aAcCelt", *++cp) == NULL) + { + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: invalid escape sequence in field `%s'"), "LC_TELEPHONE", "tel_dom_fmt")); + break; + } + } + ++cp; + } + } + +#define TEST_ELEM(cat) \ + if (telephone->cat == NULL) \ + { \ + if (verbose && ! nothing) \ + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"), \ + "LC_TELEPHONE", #cat)); \ + telephone->cat = ""; \ + } + + TEST_ELEM (int_select); + TEST_ELEM (int_prefix); +} + + +void +telephone_output (struct localedef_t *locale, const struct charmap_t *charmap, + const char *output_path) +{ + struct locale_telephone_t *telephone = + locale->categories[LC_TELEPHONE].telephone; + struct locale_file file; + + init_locale_data (&file, _NL_ITEM_INDEX (_NL_NUM_LC_TELEPHONE)); + add_locale_string (&file, telephone->tel_int_fmt); + add_locale_string (&file, telephone->tel_dom_fmt); + add_locale_string (&file, telephone->int_select); + add_locale_string (&file, telephone->int_prefix); + add_locale_string (&file, charmap->code_set_name); + write_locale_data (output_path, LC_TELEPHONE, "LC_TELEPHONE", &file); +} + + +/* The parser for the LC_TELEPHONE section of the locale definition. */ +void +telephone_read (struct linereader *ldfile, struct localedef_t *result, + const struct charmap_t *charmap, const char *repertoire_name, + int ignore_content) +{ + struct locale_telephone_t *telephone; + struct token *now; + struct token *arg; + enum token_t nowtok; + + /* The rest of the line containing `LC_TELEPHONE' must be free. */ + lr_ignore_rest (ldfile, 1); + + do + { + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + } + while (nowtok == tok_eol); + + /* If we see `copy' now we are almost done. */ + if (nowtok == tok_copy) + { + handle_copy (ldfile, charmap, repertoire_name, result, tok_lc_telephone, + LC_TELEPHONE, "LC_TELEPHONE", ignore_content); + return; + } + + /* Prepare the data structures. */ + telephone_startup (ldfile, result, ignore_content); + telephone = result->categories[LC_TELEPHONE].telephone; + + while (1) + { + /* Of course we don't proceed beyond the end of file. */ + if (nowtok == tok_eof) + break; + + /* Ingore empty lines. */ + if (nowtok == tok_eol) + { + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + continue; + } + + switch (nowtok) + { +#define STR_ELEM(cat) \ + case tok_##cat: \ + /* Ignore the rest of the line if we don't need the input of \ + this line. */ \ + if (ignore_content) \ + { \ + lr_ignore_rest (ldfile, 0); \ + break; \ + } \ + \ + arg = lr_token (ldfile, charmap, result, NULL, verbose); \ + if (arg->tok != tok_string) \ + goto err_label; \ + if (telephone->cat != NULL) \ + lr_error (ldfile, _("%s: field `%s' declared more than once"), \ + "LC_TELEPHONE", #cat); \ + else if (!ignore_content && arg->val.str.startmb == NULL) \ + { \ + lr_error (ldfile, _("%s: unknown character in field `%s'"), \ + "LC_TELEPHONE", #cat); \ + telephone->cat = ""; \ + } \ + else if (!ignore_content) \ + telephone->cat = arg->val.str.startmb; \ + break + + STR_ELEM (tel_int_fmt); + STR_ELEM (tel_dom_fmt); + STR_ELEM (int_select); + STR_ELEM (int_prefix); + + case tok_end: + /* Next we assume `LC_TELEPHONE'. */ + arg = lr_token (ldfile, charmap, result, NULL, verbose); + if (arg->tok == tok_eof) + break; + if (arg->tok == tok_eol) + lr_error (ldfile, _("%s: incomplete `END' line"), "LC_TELEPHONE"); + else if (arg->tok != tok_lc_telephone) + lr_error (ldfile, _("\ +%1$s: definition does not end with `END %1$s'"), "LC_TELEPHONE"); + lr_ignore_rest (ldfile, arg->tok == tok_lc_telephone); + return; + + default: + err_label: + SYNTAX_ERROR (_("%s: syntax error"), "LC_TELEPHONE"); + } + + /* Prepare for the next round. */ + now = lr_token (ldfile, charmap, result, NULL, verbose); + nowtok = now->tok; + } + + /* When we come here we reached the end of the file. */ + lr_error (ldfile, _("%s: premature end of file"), "LC_TELEPHONE"); +} diff --git a/REORG.TODO/locale/programs/ld-time.c b/REORG.TODO/locale/programs/ld-time.c new file mode 100644 index 0000000000..32e9c41e35 --- /dev/null +++ b/REORG.TODO/locale/programs/ld-time.c @@ -0,0 +1,964 @@ +/* Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@gnu.org>, 1995. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <byteswap.h> +#include <langinfo.h> +#include <stdlib.h> +#include <string.h> +#include <wchar.h> +#include <stdint.h> +#include <sys/uio.h> + +#include <assert.h> + +#include "localedef.h" +#include "linereader.h" +#include "localeinfo.h" +#include "locfile.h" + + +/* Entry describing an entry of the era specification. */ +struct era_data +{ + int32_t direction; + int32_t offset; + int32_t start_date[3]; + int32_t stop_date[3]; + const char *name; + const char *format; + uint32_t *wname; + uint32_t *wformat; +}; + + +/* The real definition of the struct for the LC_TIME locale. */ +struct locale_time_t +{ + const char *abday[7]; + const uint32_t *wabday[7]; + int abday_defined; + const char *day[7]; + const uint32_t *wday[7]; + int day_defined; + const char *abmon[12]; + const uint32_t *wabmon[12]; + int abmon_defined; + const char *mon[12]; + const uint32_t *wmon[12]; + int mon_defined; + const char *am_pm[2]; + const uint32_t *wam_pm[2]; + int am_pm_defined; + const char *d_t_fmt; + const uint32_t *wd_t_fmt; + const char *d_fmt; + const uint32_t *wd_fmt; + const char *t_fmt; + const uint32_t *wt_fmt; + const char *t_fmt_ampm; + const uint32_t *wt_fmt_ampm; + const char **era; + const uint32_t **wera; + uint32_t num_era; + const char *era_year; + const uint32_t *wera_year; + const char *era_d_t_fmt; + const uint32_t *wera_d_t_fmt; + const char *era_t_fmt; + const uint32_t *wera_t_fmt; + const char *era_d_fmt; + const uint32_t *wera_d_fmt; + const char *alt_digits[100]; + const uint32_t *walt_digits[100]; + const char *date_fmt; + const uint32_t *wdate_fmt; + int alt_digits_defined; + unsigned char week_ndays; + uint32_t week_1stday; + unsigned char week_1stweek; + unsigned char first_weekday; + unsigned char first_workday; + unsigned char cal_direction; + const char *timezone; + const uint32_t *wtimezone; + + struct era_data *era_entries; +}; + + +/* This constant is used to represent an empty wide character string. */ +static const uint32_t empty_wstr[1] = { 0 }; + + +static void +time_startup (struct linereader *lr, struct localedef_t *locale, + int ignore_content) +{ + if (!ignore_content) + locale->categories[LC_TIME].time = + (struct locale_time_t *) xcalloc (1, sizeof (struct locale_time_t)); + + if (lr != NULL) + { + lr->translate_strings = 1; + lr->return_widestr = 1; + } +} + + +void +time_finish (struct localedef_t *locale, const struct charmap_t *charmap) +{ + struct locale_time_t *time = locale->categories[LC_TIME].time; + int nothing = 0; + + /* Now resolve copying and also handle completely missing definitions. */ + if (time == NULL) + { + /* First see whether we were supposed to copy. If yes, find the + actual definition. */ + if (locale->copy_name[LC_TIME] != NULL) + { + /* Find the copying locale. This has to happen transitively since + the locale we are copying from might also copying another one. */ + struct localedef_t *from = locale; + + do + from = find_locale (LC_TIME, from->copy_name[LC_TIME], + from->repertoire_name, charmap); + while (from->categories[LC_TIME].time == NULL + && from->copy_name[LC_TIME] != NULL); + + time = locale->categories[LC_TIME].time + = from->categories[LC_TIME].time; + } + + /* If there is still no definition issue an warning and create an + empty one. */ + if (time == NULL) + { + if (! be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +No definition for %s category found"), "LC_TIME")); + time_startup (NULL, locale, 0); + time = locale->categories[LC_TIME].time; + nothing = 1; + } + } + +#define noparen(arg1, argn...) arg1, ##argn +#define TESTARR_ELEM(cat, val) \ + if (!time->cat##_defined) \ + { \ + const char *initval[] = { noparen val }; \ + unsigned int i; \ + \ + if (! be_quiet && ! nothing) \ + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"), \ + "LC_TIME", #cat)); \ + \ + for (i = 0; i < sizeof (initval) / sizeof (initval[0]); ++i) \ + time->cat[i] = initval[i]; \ + } + + TESTARR_ELEM (abday, ( "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" )); + TESTARR_ELEM (day, ( "Sunday", "Monday", "Tuesday", "Wednesday", + "Thursday", "Friday", "Saturday" )); + TESTARR_ELEM (abmon, ( "Jan", "Feb", "Mar", "Apr", "May", "Jun", + "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" )); + TESTARR_ELEM (mon, ( "January", "February", "March", "April", + "May", "June", "July", "August", + "September", "October", "November", "December" )); + TESTARR_ELEM (am_pm, ( "AM", "PM" )); + +#define TEST_ELEM(cat, initval) \ + if (time->cat == NULL) \ + { \ + if (! be_quiet && ! nothing) \ + WITH_CUR_LOCALE (error (0, 0, _("%s: field `%s' not defined"), \ + "LC_TIME", #cat)); \ + \ + time->cat = initval; \ + } + + TEST_ELEM (d_t_fmt, "%a %b %e %H:%M:%S %Y"); + TEST_ELEM (d_fmt, "%m/%d/%y"); + TEST_ELEM (t_fmt, "%H:%M:%S"); + + /* According to C.Y.Alexis Cheng <alexis@vnet.ibm.com> the T_FMT_AMPM + field is optional. */ + if (time->t_fmt_ampm == NULL) + { + if (time->am_pm[0][0] == '\0' && time->am_pm[1][0] == '\0') + { + /* No AM/PM strings defined, use the 24h format as default. */ + time->t_fmt_ampm = time->t_fmt; + time->wt_fmt_ampm = time->wt_fmt; + } + else + { + time->t_fmt_ampm = "%I:%M:%S %p"; + time->wt_fmt_ampm = (const uint32_t *) L"%I:%M:%S %p"; + } + } + + /* Now process the era entries. */ + if (time->num_era != 0) + { + const int days_per_month[12] = { 31, 29, 31, 30, 31, 30, + 31, 31, 30, 31 ,30, 31 }; + size_t idx; + wchar_t *wstr; + + time->era_entries = + (struct era_data *) xmalloc (time->num_era + * sizeof (struct era_data)); + + for (idx = 0; idx < time->num_era; ++idx) + { + size_t era_len = strlen (time->era[idx]); + char *str = xmalloc ((era_len + 1 + 3) & ~3); + char *endp; + + memcpy (str, time->era[idx], era_len + 1); + + /* First character must be + or - for the direction. */ + if (*str != '+' && *str != '-') + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: direction flag in string %Zd in `era' field is not '+' nor '-'"), + "LC_TIME", idx + 1)); + /* Default arbitrarily to '+'. */ + time->era_entries[idx].direction = '+'; + } + else + time->era_entries[idx].direction = *str; + if (*++str != ':') + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: direction flag in string %Zd in `era' field is not a single character"), + "LC_TIME", idx + 1)); + (void) strsep (&str, ":"); + } + else + ++str; + + /* Now the offset year. */ + time->era_entries[idx].offset = strtol (str, &endp, 10); + if (endp == str) + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: invalid number for offset in string %Zd in `era' field"), + "LC_TIME", idx + 1)); + (void) strsep (&str, ":"); + } + else if (*endp != ':') + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: garbage at end of offset value in string %Zd in `era' field"), + "LC_TIME", idx + 1)); + (void) strsep (&str, ":"); + } + else + str = endp + 1; + + /* Next is the starting date in ISO format. */ + if (strncmp (str, "-*", 2) == 0) + { + time->era_entries[idx].start_date[0] = + time->era_entries[idx].start_date[1] = + time->era_entries[idx].start_date[2] = 0x80000000; + if (str[2] != ':') + goto garbage_start_date; + str += 3; + } + else if (strncmp (str, "+*", 2) == 0) + { + time->era_entries[idx].start_date[0] = + time->era_entries[idx].start_date[1] = + time->era_entries[idx].start_date[2] = 0x7fffffff; + if (str[2] != ':') + goto garbage_start_date; + str += 3; + } + else + { + time->era_entries[idx].start_date[0] = strtol (str, &endp, 10); + if (endp == str || *endp != '/') + goto invalid_start_date; + else + str = endp + 1; + time->era_entries[idx].start_date[0] -= 1900; + /* year -1 represent 1 B.C. (not -1 A.D.) */ + if (time->era_entries[idx].start_date[0] < -1900) + ++time->era_entries[idx].start_date[0]; + + time->era_entries[idx].start_date[1] = strtol (str, &endp, 10); + if (endp == str || *endp != '/') + goto invalid_start_date; + else + str = endp + 1; + time->era_entries[idx].start_date[1] -= 1; + + time->era_entries[idx].start_date[2] = strtol (str, &endp, 10); + if (endp == str) + { + invalid_start_date: + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: invalid starting date in string %Zd in `era' field"), + "LC_TIME", idx + 1)); + (void) strsep (&str, ":"); + } + else if (*endp != ':') + { + garbage_start_date: + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: garbage at end of starting date in string %Zd in `era' field "), + "LC_TIME", idx + 1)); + (void) strsep (&str, ":"); + } + else + { + str = endp + 1; + + /* Check for valid value. */ + if ((time->era_entries[idx].start_date[1] < 0 + || time->era_entries[idx].start_date[1] >= 12 + || time->era_entries[idx].start_date[2] < 0 + || (time->era_entries[idx].start_date[2] + > days_per_month[time->era_entries[idx].start_date[1]]) + || (time->era_entries[idx].start_date[1] == 2 + && time->era_entries[idx].start_date[2] == 29 + && !__isleap (time->era_entries[idx].start_date[0]))) + && !be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: starting date is invalid in string %Zd in `era' field"), + "LC_TIME", idx + 1)); + } + } + + /* Next is the stopping date in ISO format. */ + if (strncmp (str, "-*", 2) == 0) + { + time->era_entries[idx].stop_date[0] = + time->era_entries[idx].stop_date[1] = + time->era_entries[idx].stop_date[2] = 0x80000000; + if (str[2] != ':') + goto garbage_stop_date; + str += 3; + } + else if (strncmp (str, "+*", 2) == 0) + { + time->era_entries[idx].stop_date[0] = + time->era_entries[idx].stop_date[1] = + time->era_entries[idx].stop_date[2] = 0x7fffffff; + if (str[2] != ':') + goto garbage_stop_date; + str += 3; + } + else + { + time->era_entries[idx].stop_date[0] = strtol (str, &endp, 10); + if (endp == str || *endp != '/') + goto invalid_stop_date; + else + str = endp + 1; + time->era_entries[idx].stop_date[0] -= 1900; + /* year -1 represent 1 B.C. (not -1 A.D.) */ + if (time->era_entries[idx].stop_date[0] < -1900) + ++time->era_entries[idx].stop_date[0]; + + time->era_entries[idx].stop_date[1] = strtol (str, &endp, 10); + if (endp == str || *endp != '/') + goto invalid_stop_date; + else + str = endp + 1; + time->era_entries[idx].stop_date[1] -= 1; + + time->era_entries[idx].stop_date[2] = strtol (str, &endp, 10); + if (endp == str) + { + invalid_stop_date: + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: invalid stopping date in string %Zd in `era' field"), + "LC_TIME", idx + 1)); + (void) strsep (&str, ":"); + } + else if (*endp != ':') + { + garbage_stop_date: + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: garbage at end of stopping date in string %Zd in `era' field"), + "LC_TIME", idx + 1)); + (void) strsep (&str, ":"); + } + else + { + str = endp + 1; + + /* Check for valid value. */ + if ((time->era_entries[idx].stop_date[1] < 0 + || time->era_entries[idx].stop_date[1] >= 12 + || time->era_entries[idx].stop_date[2] < 0 + || (time->era_entries[idx].stop_date[2] + > days_per_month[time->era_entries[idx].stop_date[1]]) + || (time->era_entries[idx].stop_date[1] == 2 + && time->era_entries[idx].stop_date[2] == 29 + && !__isleap (time->era_entries[idx].stop_date[0]))) + && !be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: invalid stopping date in string %Zd in `era' field"), + "LC_TIME", idx + 1)); + } + } + + if (str == NULL || *str == '\0') + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: missing era name in string %Zd in `era' field"), "LC_TIME", idx + 1)); + time->era_entries[idx].name = + time->era_entries[idx].format = ""; + } + else + { + time->era_entries[idx].name = strsep (&str, ":"); + + if (str == NULL || *str == '\0') + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: missing era format in string %Zd in `era' field"), + "LC_TIME", idx + 1)); + time->era_entries[idx].name = + time->era_entries[idx].format = ""; + } + else + time->era_entries[idx].format = str; + } + + /* Now generate the wide character name and format. */ + wstr = wcschr ((wchar_t *) time->wera[idx], L':');/* end direction */ + wstr = wstr ? wcschr (wstr + 1, L':') : NULL; /* end offset */ + wstr = wstr ? wcschr (wstr + 1, L':') : NULL; /* end start */ + wstr = wstr ? wcschr (wstr + 1, L':') : NULL; /* end end */ + if (wstr != NULL) + { + time->era_entries[idx].wname = (uint32_t *) wstr + 1; + wstr = wcschr (wstr + 1, L':'); /* end name */ + if (wstr != NULL) + { + *wstr = L'\0'; + time->era_entries[idx].wformat = (uint32_t *) wstr + 1; + } + else + time->era_entries[idx].wname = + time->era_entries[idx].wformat = (uint32_t *) L""; + } + else + time->era_entries[idx].wname = + time->era_entries[idx].wformat = (uint32_t *) L""; + } + } + + /* Set up defaults based on ISO 30112 WD10 [2014]. */ + if (time->week_ndays == 0) + time->week_ndays = 7; + + if (time->week_1stday == 0) + time->week_1stday = 19971130; + + if (time->week_1stweek == 0) + time->week_1stweek = 7; + + if (time->week_1stweek > time->week_ndays) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: third operand for value of field `%s' must not be larger than %d"), + "LC_TIME", "week", 7)); + + if (time->first_weekday == '\0') + /* The definition does not specify this so the default is used. */ + time->first_weekday = 1; + else if (time->first_weekday > time->week_ndays) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: values for field `%s' must not be larger than %d"), + "LC_TIME", "first_weekday", 7)); + + if (time->first_workday == '\0') + /* The definition does not specify this so the default is used. */ + time->first_workday = 2; + else if (time->first_workday > time->week_ndays) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: values for field `%s' must not be larger than %d"), + "LC_TIME", "first_workday", 7)); + + if (time->cal_direction == '\0') + /* The definition does not specify this so the default is used. */ + time->cal_direction = 1; + else if (time->cal_direction > 3) + WITH_CUR_LOCALE (error (0, 0, _("\ +%s: values for field `%s' must not be larger than %d"), + "LC_TIME", "cal_direction", 3)); + + /* XXX We don't perform any tests on the timezone value since this is + simply useless, stupid $&$!@... */ + if (time->timezone == NULL) + time->timezone = ""; + + if (time->date_fmt == NULL) + time->date_fmt = "%a %b %e %H:%M:%S %Z %Y"; + if (time->wdate_fmt == NULL) + time->wdate_fmt = (const uint32_t *) L"%a %b %e %H:%M:%S %Z %Y"; +} + + +void +time_output (struct localedef_t *locale, const struct charmap_t *charmap, + const char *output_path) +{ + struct locale_time_t *time = locale->categories[LC_TIME].time; + struct locale_file file; + size_t num, n; + + init_locale_data (&file, _NL_ITEM_INDEX (_NL_NUM_LC_TIME)); + + /* The ab'days. */ + for (n = 0; n < 7; ++n) + add_locale_string (&file, time->abday[n] ?: ""); + + /* The days. */ + for (n = 0; n < 7; ++n) + add_locale_string (&file, time->day[n] ?: ""); + + /* The ab'mons. */ + for (n = 0; n < 12; ++n) + add_locale_string (&file, time->abmon[n] ?: ""); + + /* The mons. */ + for (n = 0; n < 12; ++n) + add_locale_string (&file, time->mon[n] ?: ""); + + /* AM/PM. */ + for (n = 0; n < 2; ++n) + add_locale_string (&file, time->am_pm[n]); + + add_locale_string (&file, time->d_t_fmt ?: ""); + add_locale_string (&file, time->d_fmt ?: ""); + add_locale_string (&file, time->t_fmt ?: ""); + add_locale_string (&file, time->t_fmt_ampm ?: ""); + + start_locale_structure (&file); + for (num = 0; num < time->num_era; ++num) + add_locale_string (&file, time->era[num]); + end_locale_structure (&file); + + add_locale_string (&file, time->era_year ?: ""); + add_locale_string (&file, time->era_d_fmt ?: ""); + + start_locale_structure (&file); + for (num = 0; num < 100; ++num) + add_locale_string (&file, time->alt_digits[num] ?: ""); + end_locale_structure (&file); + + add_locale_string (&file, time->era_d_t_fmt ?: ""); + add_locale_string (&file, time->era_t_fmt ?: ""); + add_locale_uint32 (&file, time->num_era); + + start_locale_structure (&file); + for (num = 0; num < time->num_era; ++num) + { + add_locale_uint32 (&file, time->era_entries[num].direction); + add_locale_uint32 (&file, time->era_entries[num].offset); + add_locale_uint32 (&file, time->era_entries[num].start_date[0]); + add_locale_uint32 (&file, time->era_entries[num].start_date[1]); + add_locale_uint32 (&file, time->era_entries[num].start_date[2]); + add_locale_uint32 (&file, time->era_entries[num].stop_date[0]); + add_locale_uint32 (&file, time->era_entries[num].stop_date[1]); + add_locale_uint32 (&file, time->era_entries[num].stop_date[2]); + add_locale_string (&file, time->era_entries[num].name); + add_locale_string (&file, time->era_entries[num].format); + add_locale_wstring (&file, time->era_entries[num].wname); + add_locale_wstring (&file, time->era_entries[num].wformat); + } + end_locale_structure (&file); + + /* The wide character ab'days. */ + for (n = 0; n < 7; ++n) + add_locale_wstring (&file, time->wabday[n] ?: empty_wstr); + + /* The wide character days. */ + for (n = 0; n < 7; ++n) + add_locale_wstring (&file, time->wday[n] ?: empty_wstr); + + /* The wide character ab'mons. */ + for (n = 0; n < 12; ++n) + add_locale_wstring (&file, time->wabmon[n] ?: empty_wstr); + + /* The wide character mons. */ + for (n = 0; n < 12; ++n) + add_locale_wstring (&file, time->wmon[n] ?: empty_wstr); + + /* Wide character AM/PM. */ + for (n = 0; n < 2; ++n) + add_locale_wstring (&file, time->wam_pm[n] ?: empty_wstr); + + add_locale_wstring (&file, time->wd_t_fmt ?: empty_wstr); + add_locale_wstring (&file, time->wd_fmt ?: empty_wstr); + add_locale_wstring (&file, time->wt_fmt ?: empty_wstr); + add_locale_wstring (&file, time->wt_fmt_ampm ?: empty_wstr); + add_locale_wstring (&file, time->wera_year ?: empty_wstr); + add_locale_wstring (&file, time->wera_d_fmt ?: empty_wstr); + + start_locale_structure (&file); + for (num = 0; num < 100; ++num) + add_locale_wstring (&file, time->walt_digits[num] ?: empty_wstr); + end_locale_structure (&file); + + add_locale_wstring (&file, time->wera_d_t_fmt ?: empty_wstr); + add_locale_wstring (&file, time->wera_t_fmt ?: empty_wstr); + add_locale_char (&file, time->week_ndays); + add_locale_uint32 (&file, time->week_1stday); + add_locale_char (&file, time->week_1stweek); + add_locale_char (&file, time->first_weekday); + add_locale_char (&file, time->first_workday); + add_locale_char (&file, time->cal_direction); + add_locale_string (&file, time->timezone); + add_locale_string (&file, time->date_fmt); + add_locale_wstring (&file, time->wdate_fmt); + add_locale_string (&file, charmap->code_set_name); + write_locale_data (output_path, LC_TIME, "LC_TIME", &file); +} + + +/* The parser for the LC_TIME section of the locale definition. */ +void +time_read (struct linereader *ldfile, struct localedef_t *result, + const struct charmap_t *charmap, const char *repertoire_name, + int ignore_content) +{ + struct repertoire_t *repertoire = NULL; + struct locale_time_t *time; + struct token *now; + enum token_t nowtok; + size_t cnt; + + /* Get the repertoire we have to use. */ + if (repertoire_name != NULL) + repertoire = repertoire_read (repertoire_name); + + /* The rest of the line containing `LC_TIME' must be free. */ + lr_ignore_rest (ldfile, 1); + + + do + { + now = lr_token (ldfile, charmap, result, repertoire, verbose); + nowtok = now->tok; + } + while (nowtok == tok_eol); + + /* If we see `copy' now we are almost done. */ + if (nowtok == tok_copy) + { + handle_copy (ldfile, charmap, repertoire_name, result, tok_lc_time, + LC_TIME, "LC_TIME", ignore_content); + return; + } + + /* Prepare the data structures. */ + time_startup (ldfile, result, ignore_content); + time = result->categories[LC_TIME].time; + + while (1) + { + /* Of course we don't proceed beyond the end of file. */ + if (nowtok == tok_eof) + break; + + /* Ingore empty lines. */ + if (nowtok == tok_eol) + { + now = lr_token (ldfile, charmap, result, repertoire, verbose); + nowtok = now->tok; + continue; + } + + switch (nowtok) + { +#define STRARR_ELEM(cat, min, max) \ + case tok_##cat: \ + /* Ignore the rest of the line if we don't need the input of \ + this line. */ \ + if (ignore_content) \ + { \ + lr_ignore_rest (ldfile, 0); \ + break; \ + } \ + \ + for (cnt = 0; cnt < max; ++cnt) \ + { \ + now = lr_token (ldfile, charmap, result, repertoire, verbose); \ + if (now->tok == tok_eol) \ + { \ + if (cnt < min) \ + lr_error (ldfile, _("%s: too few values for field `%s'"), \ + "LC_TIME", #cat); \ + if (!ignore_content) \ + do \ + { \ + time->cat[cnt] = ""; \ + time->w##cat[cnt] = empty_wstr; \ + } \ + while (++cnt < max); \ + break; \ + } \ + else if (now->tok != tok_string) \ + goto err_label; \ + else if (!ignore_content && (now->val.str.startmb == NULL \ + || now->val.str.startwc == NULL)) \ + { \ + lr_error (ldfile, _("%s: unknown character in field `%s'"), \ + "LC_TIME", #cat); \ + time->cat[cnt] = ""; \ + time->w##cat[cnt] = empty_wstr; \ + } \ + else if (!ignore_content) \ + { \ + time->cat[cnt] = now->val.str.startmb; \ + time->w##cat[cnt] = now->val.str.startwc; \ + } \ + \ + /* Match the semicolon. */ \ + now = lr_token (ldfile, charmap, result, repertoire, verbose); \ + if (now->tok != tok_semicolon && now->tok != tok_eol) \ + break; \ + } \ + if (now->tok != tok_eol) \ + { \ + while (!ignore_content && cnt < min) \ + { \ + time->cat[cnt] = ""; \ + time->w##cat[cnt++] = empty_wstr; \ + } \ + \ + if (now->tok == tok_semicolon) \ + { \ + now = lr_token (ldfile, charmap, result, repertoire, \ + verbose); \ + if (now->tok == tok_eol) \ + lr_error (ldfile, _("extra trailing semicolon")); \ + else if (now->tok == tok_string) \ + { \ + lr_error (ldfile, _("\ +%s: too many values for field `%s'"), \ + "LC_TIME", #cat); \ + lr_ignore_rest (ldfile, 0); \ + } \ + else \ + goto err_label; \ + } \ + else \ + goto err_label; \ + } \ + time->cat##_defined = 1; \ + break + + STRARR_ELEM (abday, 7, 7); + STRARR_ELEM (day, 7, 7); + STRARR_ELEM (abmon, 12, 12); + STRARR_ELEM (mon, 12, 12); + STRARR_ELEM (am_pm, 2, 2); + STRARR_ELEM (alt_digits, 0, 100); + + case tok_era: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + do + { + now = lr_token (ldfile, charmap, result, repertoire, verbose); + if (now->tok != tok_string) + goto err_label; + if (!ignore_content && (now->val.str.startmb == NULL + || now->val.str.startwc == NULL)) + { + lr_error (ldfile, _("%s: unknown character in field `%s'"), + "LC_TIME", "era"); + lr_ignore_rest (ldfile, 0); + break; + } + if (!ignore_content) + { + time->era = xrealloc (time->era, + (time->num_era + 1) * sizeof (char *)); + time->era[time->num_era] = now->val.str.startmb; + + time->wera = xrealloc (time->wera, + (time->num_era + 1) + * sizeof (char *)); + time->wera[time->num_era++] = now->val.str.startwc; + } + now = lr_token (ldfile, charmap, result, repertoire, verbose); + if (now->tok != tok_eol && now->tok != tok_semicolon) + goto err_label; + } + while (now->tok == tok_semicolon); + break; + +#define STR_ELEM(cat) \ + case tok_##cat: \ + /* Ignore the rest of the line if we don't need the input of \ + this line. */ \ + if (ignore_content) \ + { \ + lr_ignore_rest (ldfile, 0); \ + break; \ + } \ + \ + now = lr_token (ldfile, charmap, result, repertoire, verbose); \ + if (now->tok != tok_string) \ + goto err_label; \ + else if (time->cat != NULL) \ + lr_error (ldfile, _("\ +%s: field `%s' declared more than once"), "LC_TIME", #cat); \ + else if (!ignore_content && (now->val.str.startmb == NULL \ + || now->val.str.startwc == NULL)) \ + { \ + lr_error (ldfile, _("%s: unknown character in field `%s'"), \ + "LC_TIME", #cat); \ + time->cat = ""; \ + time->w##cat = empty_wstr; \ + } \ + else if (!ignore_content) \ + { \ + time->cat = now->val.str.startmb; \ + time->w##cat = now->val.str.startwc; \ + } \ + break + + STR_ELEM (d_t_fmt); + STR_ELEM (d_fmt); + STR_ELEM (t_fmt); + STR_ELEM (t_fmt_ampm); + STR_ELEM (era_year); + STR_ELEM (era_d_t_fmt); + STR_ELEM (era_d_fmt); + STR_ELEM (era_t_fmt); + STR_ELEM (timezone); + STR_ELEM (date_fmt); + +#define INT_ELEM(cat) \ + case tok_##cat: \ + /* Ignore the rest of the line if we don't need the input of \ + this line. */ \ + if (ignore_content) \ + { \ + lr_ignore_rest (ldfile, 0); \ + break; \ + } \ + \ + now = lr_token (ldfile, charmap, result, repertoire, verbose); \ + if (now->tok != tok_number) \ + goto err_label; \ + else if (time->cat != 0) \ + lr_error (ldfile, _("%s: field `%s' declared more than once"), \ + "LC_TIME", #cat); \ + else if (!ignore_content) \ + time->cat = now->val.num; \ + break + + INT_ELEM (first_weekday); + INT_ELEM (first_workday); + INT_ELEM (cal_direction); + + case tok_week: + /* Ignore the rest of the line if we don't need the input of + this line. */ + if (ignore_content) + { + lr_ignore_rest (ldfile, 0); + break; + } + + now = lr_token (ldfile, charmap, result, repertoire, verbose); + if (now->tok != tok_number) + goto err_label; + time->week_ndays = now->val.num; + + now = lr_token (ldfile, charmap, result, repertoire, verbose); + if (now->tok != tok_semicolon) + goto err_label; + + now = lr_token (ldfile, charmap, result, repertoire, verbose); + if (now->tok != tok_number) + goto err_label; + time->week_1stday = now->val.num; + + now = lr_token (ldfile, charmap, result, repertoire, verbose); + if (now->tok != tok_semicolon) + goto err_label; + + now = lr_token (ldfile, charmap, result, repertoire, verbose); + if (now->tok != tok_number) + goto err_label; + time->week_1stweek = now->val.num; + + lr_ignore_rest (ldfile, 1); + break; + + case tok_end: + /* Next we assume `LC_TIME'. */ + now = lr_token (ldfile, charmap, result, repertoire, verbose); + if (now->tok == tok_eof) + break; + if (now->tok == tok_eol) + lr_error (ldfile, _("%s: incomplete `END' line"), "LC_TIME"); + else if (now->tok != tok_lc_time) + lr_error (ldfile, _("\ +%1$s: definition does not end with `END %1$s'"), "LC_TIME"); + lr_ignore_rest (ldfile, now->tok == tok_lc_time); + return; + + default: + err_label: + SYNTAX_ERROR (_("%s: syntax error"), "LC_TIME"); + } + + /* Prepare for the next round. */ + now = lr_token (ldfile, charmap, result, repertoire, verbose); + nowtok = now->tok; + } + + /* When we come here we reached the end of the file. */ + lr_error (ldfile, _("%s: premature end of file"), "LC_TIME"); +} diff --git a/REORG.TODO/locale/programs/linereader.c b/REORG.TODO/locale/programs/linereader.c new file mode 100644 index 0000000000..52b340963a --- /dev/null +++ b/REORG.TODO/locale/programs/linereader.c @@ -0,0 +1,886 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@gnu.org>, 1996. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <assert.h> +#include <ctype.h> +#include <errno.h> +#include <libintl.h> +#include <stdarg.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> + +#include "localedef.h" +#include "charmap.h" +#include "error.h" +#include "linereader.h" +#include "locfile.h" + +/* Prototypes for local functions. */ +static struct token *get_toplvl_escape (struct linereader *lr); +static struct token *get_symname (struct linereader *lr); +static struct token *get_ident (struct linereader *lr); +static struct token *get_string (struct linereader *lr, + const struct charmap_t *charmap, + struct localedef_t *locale, + const struct repertoire_t *repertoire, + int verbose); + + +struct linereader * +lr_open (const char *fname, kw_hash_fct_t hf) +{ + FILE *fp; + + if (fname == NULL || strcmp (fname, "-") == 0 + || strcmp (fname, "/dev/stdin") == 0) + return lr_create (stdin, "<stdin>", hf); + else + { + fp = fopen (fname, "rm"); + if (fp == NULL) + return NULL; + return lr_create (fp, fname, hf); + } +} + +struct linereader * +lr_create (FILE *fp, const char *fname, kw_hash_fct_t hf) +{ + struct linereader *result; + int n; + + result = (struct linereader *) xmalloc (sizeof (*result)); + + result->fp = fp; + result->fname = xstrdup (fname); + result->buf = NULL; + result->bufsize = 0; + result->lineno = 1; + result->idx = 0; + result->comment_char = '#'; + result->escape_char = '\\'; + result->translate_strings = 1; + result->return_widestr = 0; + + n = getdelim (&result->buf, &result->bufsize, '\n', result->fp); + if (n < 0) + { + int save = errno; + fclose (result->fp); + free ((char *) result->fname); + free (result); + errno = save; + return NULL; + } + + if (n > 1 && result->buf[n - 2] == '\\' && result->buf[n - 1] == '\n') + n -= 2; + + result->buf[n] = '\0'; + result->bufact = n; + result->hash_fct = hf; + + return result; +} + + +int +lr_eof (struct linereader *lr) +{ + return lr->bufact = 0; +} + + +void +lr_ignore_rest (struct linereader *lr, int verbose) +{ + if (verbose) + { + while (isspace (lr->buf[lr->idx]) && lr->buf[lr->idx] != '\n' + && lr->buf[lr->idx] != lr->comment_char) + if (lr->buf[lr->idx] == '\0') + { + if (lr_next (lr) < 0) + return; + } + else + ++lr->idx; + + if (lr->buf[lr->idx] != '\n' && ! feof (lr->fp) + && lr->buf[lr->idx] != lr->comment_char) + lr_error (lr, _("trailing garbage at end of line")); + } + + /* Ignore continued line. */ + while (lr->bufact > 0 && lr->buf[lr->bufact - 1] != '\n') + if (lr_next (lr) < 0) + break; + + lr->idx = lr->bufact; +} + + +void +lr_close (struct linereader *lr) +{ + fclose (lr->fp); + free (lr->buf); + free (lr); +} + + +int +lr_next (struct linereader *lr) +{ + int n; + + n = getdelim (&lr->buf, &lr->bufsize, '\n', lr->fp); + if (n < 0) + return -1; + + ++lr->lineno; + + if (n > 1 && lr->buf[n - 2] == lr->escape_char && lr->buf[n - 1] == '\n') + { +#if 0 + /* XXX Is this correct? */ + /* An escaped newline character is substituted with a single <SP>. */ + --n; + lr->buf[n - 1] = ' '; +#else + n -= 2; +#endif + } + + lr->buf[n] = '\0'; + lr->bufact = n; + lr->idx = 0; + + return 0; +} + + +/* Defined in error.c. */ +/* This variable is incremented each time `error' is called. */ +extern unsigned int error_message_count; + +/* The calling program should define program_name and set it to the + name of the executing program. */ +extern char *program_name; + + +struct token * +lr_token (struct linereader *lr, const struct charmap_t *charmap, + struct localedef_t *locale, const struct repertoire_t *repertoire, + int verbose) +{ + int ch; + + while (1) + { + do + { + ch = lr_getc (lr); + + if (ch == EOF) + { + lr->token.tok = tok_eof; + return &lr->token; + }; + + if (ch == '\n') + { + lr->token.tok = tok_eol; + return &lr->token; + } + } + while (isspace (ch)); + + if (ch != lr->comment_char) + break; + + /* Is there an newline at the end of the buffer? */ + if (lr->buf[lr->bufact - 1] != '\n') + { + /* No. Some people want this to mean that only the line in + the file not the logical, concatenated line is ignored. + Let's try this. */ + lr->idx = lr->bufact; + continue; + } + + /* Ignore rest of line. */ + lr_ignore_rest (lr, 0); + lr->token.tok = tok_eol; + return &lr->token; + } + + /* Match escape sequences. */ + if (ch == lr->escape_char) + return get_toplvl_escape (lr); + + /* Match ellipsis. */ + if (ch == '.') + { + if (strncmp (&lr->buf[lr->idx], "...(2)....", 10) == 0) + { + int cnt; + for (cnt = 0; cnt < 10; ++cnt) + lr_getc (lr); + lr->token.tok = tok_ellipsis4_2; + return &lr->token; + } + if (strncmp (&lr->buf[lr->idx], "...", 3) == 0) + { + lr_getc (lr); + lr_getc (lr); + lr_getc (lr); + lr->token.tok = tok_ellipsis4; + return &lr->token; + } + if (strncmp (&lr->buf[lr->idx], "..", 2) == 0) + { + lr_getc (lr); + lr_getc (lr); + lr->token.tok = tok_ellipsis3; + return &lr->token; + } + if (strncmp (&lr->buf[lr->idx], ".(2)..", 6) == 0) + { + int cnt; + for (cnt = 0; cnt < 6; ++cnt) + lr_getc (lr); + lr->token.tok = tok_ellipsis2_2; + return &lr->token; + } + if (lr->buf[lr->idx] == '.') + { + lr_getc (lr); + lr->token.tok = tok_ellipsis2; + return &lr->token; + } + } + + switch (ch) + { + case '<': + return get_symname (lr); + + case '0' ... '9': + lr->token.tok = tok_number; + lr->token.val.num = ch - '0'; + + while (isdigit (ch = lr_getc (lr))) + { + lr->token.val.num *= 10; + lr->token.val.num += ch - '0'; + } + if (isalpha (ch)) + lr_error (lr, _("garbage at end of number")); + lr_ungetn (lr, 1); + + return &lr->token; + + case ';': + lr->token.tok = tok_semicolon; + return &lr->token; + + case ',': + lr->token.tok = tok_comma; + return &lr->token; + + case '(': + lr->token.tok = tok_open_brace; + return &lr->token; + + case ')': + lr->token.tok = tok_close_brace; + return &lr->token; + + case '"': + return get_string (lr, charmap, locale, repertoire, verbose); + + case '-': + ch = lr_getc (lr); + if (ch == '1') + { + lr->token.tok = tok_minus1; + return &lr->token; + } + lr_ungetn (lr, 2); + break; + } + + return get_ident (lr); +} + + +static struct token * +get_toplvl_escape (struct linereader *lr) +{ + /* This is supposed to be a numeric value. We return the + numerical value and the number of bytes. */ + size_t start_idx = lr->idx - 1; + unsigned char *bytes = lr->token.val.charcode.bytes; + size_t nbytes = 0; + int ch; + + do + { + unsigned int byte = 0; + unsigned int base = 8; + + ch = lr_getc (lr); + + if (ch == 'd') + { + base = 10; + ch = lr_getc (lr); + } + else if (ch == 'x') + { + base = 16; + ch = lr_getc (lr); + } + + if ((base == 16 && !isxdigit (ch)) + || (base != 16 && (ch < '0' || ch >= (int) ('0' + base)))) + { + esc_error: + lr->token.val.str.startmb = &lr->buf[start_idx]; + + while (ch != EOF && !isspace (ch)) + ch = lr_getc (lr); + lr->token.val.str.lenmb = lr->idx - start_idx; + + lr->token.tok = tok_error; + return &lr->token; + } + + if (isdigit (ch)) + byte = ch - '0'; + else + byte = tolower (ch) - 'a' + 10; + + ch = lr_getc (lr); + if ((base == 16 && !isxdigit (ch)) + || (base != 16 && (ch < '0' || ch >= (int) ('0' + base)))) + goto esc_error; + + byte *= base; + if (isdigit (ch)) + byte += ch - '0'; + else + byte += tolower (ch) - 'a' + 10; + + ch = lr_getc (lr); + if (base != 16 && isdigit (ch)) + { + byte *= base; + byte += ch - '0'; + + ch = lr_getc (lr); + } + + bytes[nbytes++] = byte; + } + while (ch == lr->escape_char + && nbytes < (int) sizeof (lr->token.val.charcode.bytes)); + + if (!isspace (ch)) + lr_error (lr, _("garbage at end of character code specification")); + + lr_ungetn (lr, 1); + + lr->token.tok = tok_charcode; + lr->token.val.charcode.nbytes = nbytes; + + return &lr->token; +} + + +#define ADDC(ch) \ + do \ + { \ + if (bufact == bufmax) \ + { \ + bufmax *= 2; \ + buf = xrealloc (buf, bufmax); \ + } \ + buf[bufact++] = (ch); \ + } \ + while (0) + + +#define ADDS(s, l) \ + do \ + { \ + size_t _l = (l); \ + if (bufact + _l > bufmax) \ + { \ + if (bufact < _l) \ + bufact = _l; \ + bufmax *= 2; \ + buf = xrealloc (buf, bufmax); \ + } \ + memcpy (&buf[bufact], s, _l); \ + bufact += _l; \ + } \ + while (0) + + +#define ADDWC(ch) \ + do \ + { \ + if (buf2act == buf2max) \ + { \ + buf2max *= 2; \ + buf2 = xrealloc (buf2, buf2max * 4); \ + } \ + buf2[buf2act++] = (ch); \ + } \ + while (0) + + +static struct token * +get_symname (struct linereader *lr) +{ + /* Symbol in brackets. We must distinguish three kinds: + 1. reserved words + 2. ISO 10646 position values + 3. all other. */ + char *buf; + size_t bufact = 0; + size_t bufmax = 56; + const struct keyword_t *kw; + int ch; + + buf = (char *) xmalloc (bufmax); + + do + { + ch = lr_getc (lr); + if (ch == lr->escape_char) + { + int c2 = lr_getc (lr); + ADDC (c2); + + if (c2 == '\n') + ch = '\n'; + } + else + ADDC (ch); + } + while (ch != '>' && ch != '\n'); + + if (ch == '\n') + lr_error (lr, _("unterminated symbolic name")); + + /* Test for ISO 10646 position value. */ + if (buf[0] == 'U' && (bufact == 6 || bufact == 10)) + { + char *cp = buf + 1; + while (cp < &buf[bufact - 1] && isxdigit (*cp)) + ++cp; + + if (cp == &buf[bufact - 1]) + { + /* Yes, it is. */ + lr->token.tok = tok_ucs4; + lr->token.val.ucs4 = strtoul (buf + 1, NULL, 16); + + return &lr->token; + } + } + + /* It is a symbolic name. Test for reserved words. */ + kw = lr->hash_fct (buf, bufact - 1); + + if (kw != NULL && kw->symname_or_ident == 1) + { + lr->token.tok = kw->token; + free (buf); + } + else + { + lr->token.tok = tok_bsymbol; + + buf = xrealloc (buf, bufact + 1); + buf[bufact] = '\0'; + + lr->token.val.str.startmb = buf; + lr->token.val.str.lenmb = bufact - 1; + } + + return &lr->token; +} + + +static struct token * +get_ident (struct linereader *lr) +{ + char *buf; + size_t bufact; + size_t bufmax = 56; + const struct keyword_t *kw; + int ch; + + buf = xmalloc (bufmax); + bufact = 0; + + ADDC (lr->buf[lr->idx - 1]); + + while (!isspace ((ch = lr_getc (lr))) && ch != '"' && ch != ';' + && ch != '<' && ch != ',' && ch != EOF) + { + if (ch == lr->escape_char) + { + ch = lr_getc (lr); + if (ch == '\n' || ch == EOF) + { + lr_error (lr, _("invalid escape sequence")); + break; + } + } + ADDC (ch); + } + + lr_ungetc (lr, ch); + + kw = lr->hash_fct (buf, bufact); + + if (kw != NULL && kw->symname_or_ident == 0) + { + lr->token.tok = kw->token; + free (buf); + } + else + { + lr->token.tok = tok_ident; + + buf = xrealloc (buf, bufact + 1); + buf[bufact] = '\0'; + + lr->token.val.str.startmb = buf; + lr->token.val.str.lenmb = bufact; + } + + return &lr->token; +} + + +static struct token * +get_string (struct linereader *lr, const struct charmap_t *charmap, + struct localedef_t *locale, const struct repertoire_t *repertoire, + int verbose) +{ + int return_widestr = lr->return_widestr; + char *buf; + wchar_t *buf2 = NULL; + size_t bufact; + size_t bufmax = 56; + + /* We must return two different strings. */ + buf = xmalloc (bufmax); + bufact = 0; + + /* We know it'll be a string. */ + lr->token.tok = tok_string; + + /* If we need not translate the strings (i.e., expand <...> parts) + we can run a simple loop. */ + if (!lr->translate_strings) + { + int ch; + + buf2 = NULL; + while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF) + ADDC (ch); + + /* Catch errors with trailing escape character. */ + if (bufact > 0 && buf[bufact - 1] == lr->escape_char + && (bufact == 1 || buf[bufact - 2] != lr->escape_char)) + { + lr_error (lr, _("illegal escape sequence at end of string")); + --bufact; + } + else if (ch == '\n' || ch == EOF) + lr_error (lr, _("unterminated string")); + + ADDC ('\0'); + } + else + { + int illegal_string = 0; + size_t buf2act = 0; + size_t buf2max = 56 * sizeof (uint32_t); + int ch; + int warned = 0; + + /* We have to provide the wide character result as well. */ + if (return_widestr) + buf2 = xmalloc (buf2max); + + /* Read until the end of the string (or end of the line or file). */ + while ((ch = lr_getc (lr)) != '"' && ch != '\n' && ch != EOF) + { + size_t startidx; + uint32_t wch; + struct charseq *seq; + + if (ch != '<') + { + /* The standards leave it up to the implementation to decide + what to do with character which stand for themself. We + could jump through hoops to find out the value relative to + the charmap and the repertoire map, but instead we leave + it up to the locale definition author to write a better + definition. We assume here that every character which + stands for itself is encoded using ISO 8859-1. Using the + escape character is allowed. */ + if (ch == lr->escape_char) + { + ch = lr_getc (lr); + if (ch == '\n' || ch == EOF) + break; + } + + if (verbose && !warned) + { + lr_error (lr, _("\ +non-symbolic character value should not be used")); + warned = 1; + } + + ADDC (ch); + if (return_widestr) + ADDWC ((uint32_t) ch); + + continue; + } + + /* Now we have to search for the end of the symbolic name, i.e., + the closing '>'. */ + startidx = bufact; + while ((ch = lr_getc (lr)) != '>' && ch != '\n' && ch != EOF) + { + if (ch == lr->escape_char) + { + ch = lr_getc (lr); + if (ch == '\n' || ch == EOF) + break; + } + ADDC (ch); + } + if (ch == '\n' || ch == EOF) + /* Not a correct string. */ + break; + if (bufact == startidx) + { + /* <> is no correct name. Ignore it and also signal an + error. */ + illegal_string = 1; + continue; + } + + /* It might be a Uxxxx symbol. */ + if (buf[startidx] == 'U' + && (bufact - startidx == 5 || bufact - startidx == 9)) + { + char *cp = buf + startidx + 1; + while (cp < &buf[bufact] && isxdigit (*cp)) + ++cp; + + if (cp == &buf[bufact]) + { + char utmp[10]; + + /* Yes, it is. */ + ADDC ('\0'); + wch = strtoul (buf + startidx + 1, NULL, 16); + + /* Now forget about the name we just added. */ + bufact = startidx; + + if (return_widestr) + ADDWC (wch); + + /* See whether the charmap contains the Uxxxxxxxx names. */ + snprintf (utmp, sizeof (utmp), "U%08X", wch); + seq = charmap_find_value (charmap, utmp, 9); + + if (seq == NULL) + { + /* No, this isn't the case. Now determine from + the repertoire the name of the character and + find it in the charmap. */ + if (repertoire != NULL) + { + const char *symbol; + + symbol = repertoire_find_symbol (repertoire, wch); + + if (symbol != NULL) + seq = charmap_find_value (charmap, symbol, + strlen (symbol)); + } + + if (seq == NULL) + { +#ifndef NO_TRANSLITERATION + /* Transliterate if possible. */ + if (locale != NULL) + { + uint32_t *translit; + + if ((locale->avail & CTYPE_LOCALE) == 0) + { + /* Load the CTYPE data now. */ + int old_needed = locale->needed; + + locale->needed = 0; + locale = load_locale (LC_CTYPE, + locale->name, + locale->repertoire_name, + charmap, locale); + locale->needed = old_needed; + } + + if ((locale->avail & CTYPE_LOCALE) != 0 + && ((translit = find_translit (locale, + charmap, wch)) + != NULL)) + /* The CTYPE data contains a matching + transliteration. */ + { + int i; + + for (i = 0; translit[i] != 0; ++i) + { + char utmp[10]; + + snprintf (utmp, sizeof (utmp), "U%08X", + translit[i]); + seq = charmap_find_value (charmap, utmp, + 9); + assert (seq != NULL); + ADDS (seq->bytes, seq->nbytes); + } + + continue; + } + } +#endif /* NO_TRANSLITERATION */ + + /* Not a known name. */ + illegal_string = 1; + } + } + + if (seq != NULL) + ADDS (seq->bytes, seq->nbytes); + + continue; + } + } + + /* We now have the symbolic name in buf[startidx] to + buf[bufact-1]. Now find out the value for this character + in the charmap as well as in the repertoire map (in this + order). */ + seq = charmap_find_value (charmap, &buf[startidx], + bufact - startidx); + + if (seq == NULL) + { + /* This name is not in the charmap. */ + lr_error (lr, _("symbol `%.*s' not in charmap"), + (int) (bufact - startidx), &buf[startidx]); + illegal_string = 1; + } + + if (return_widestr) + { + /* Now the same for the multibyte representation. */ + if (seq != NULL && seq->ucs4 != UNINITIALIZED_CHAR_VALUE) + wch = seq->ucs4; + else + { + wch = repertoire_find_value (repertoire, &buf[startidx], + bufact - startidx); + if (seq != NULL) + seq->ucs4 = wch; + } + + if (wch == ILLEGAL_CHAR_VALUE) + { + /* This name is not in the repertoire map. */ + lr_error (lr, _("symbol `%.*s' not in repertoire map"), + (int) (bufact - startidx), &buf[startidx]); + illegal_string = 1; + } + else + ADDWC (wch); + } + + /* Now forget about the name we just added. */ + bufact = startidx; + + /* And copy the bytes. */ + if (seq != NULL) + ADDS (seq->bytes, seq->nbytes); + } + + if (ch == '\n' || ch == EOF) + { + lr_error (lr, _("unterminated string")); + illegal_string = 1; + } + + if (illegal_string) + { + free (buf); + free (buf2); + lr->token.val.str.startmb = NULL; + lr->token.val.str.lenmb = 0; + lr->token.val.str.startwc = NULL; + lr->token.val.str.lenwc = 0; + + return &lr->token; + } + + ADDC ('\0'); + + if (return_widestr) + { + ADDWC (0); + lr->token.val.str.startwc = xrealloc (buf2, + buf2act * sizeof (uint32_t)); + lr->token.val.str.lenwc = buf2act; + } + } + + lr->token.val.str.startmb = xrealloc (buf, bufact); + lr->token.val.str.lenmb = bufact; + + return &lr->token; +} diff --git a/REORG.TODO/locale/programs/linereader.h b/REORG.TODO/locale/programs/linereader.h new file mode 100644 index 0000000000..3965db558c --- /dev/null +++ b/REORG.TODO/locale/programs/linereader.h @@ -0,0 +1,146 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper, <drepper@gnu.org>. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifndef _LINEREADER_H +#define _LINEREADER_H 1 + +#include <ctype.h> +#include <libintl.h> +#include <stdint.h> +#include <stdio.h> + +#include "charmap.h" +#include "error.h" +#include "locfile-token.h" +#include "repertoire.h" + + +typedef const struct keyword_t *(*kw_hash_fct_t) (const char *, unsigned int); +struct charset_t; +struct localedef_t; + +struct token +{ + enum token_t tok; + union + { + struct + { + char *startmb; + size_t lenmb; + uint32_t *startwc; + size_t lenwc; + } str; + unsigned long int num; + struct + { + /* This element is sized on the safe expectation that no single + character in any character set uses more than 16 bytes. */ + unsigned char bytes[16]; + int nbytes; + } charcode; + uint32_t ucs4; + } val; +}; + + +struct linereader +{ + FILE *fp; + const char *fname; + char *buf; + size_t bufsize; + size_t bufact; + size_t lineno; + + size_t idx; + + char comment_char; + char escape_char; + + struct token token; + + int translate_strings; + int return_widestr; + + kw_hash_fct_t hash_fct; +}; + + +/* Functions defined in linereader.c. */ +extern struct linereader *lr_open (const char *fname, kw_hash_fct_t hf); +extern struct linereader *lr_create (FILE *fp, const char *fname, + kw_hash_fct_t hf); +extern int lr_eof (struct linereader *lr); +extern void lr_close (struct linereader *lr); +extern int lr_next (struct linereader *lr); +extern struct token *lr_token (struct linereader *lr, + const struct charmap_t *charmap, + struct localedef_t *locale, + const struct repertoire_t *repertoire, + int verbose); +extern void lr_ignore_rest (struct linereader *lr, int verbose); + + +#define lr_error(lr, fmt, args...) \ + WITH_CUR_LOCALE (error_at_line (0, 0, lr->fname, lr->lineno, fmt, ## args)) + + + +static inline int +__attribute ((always_inline)) +lr_getc (struct linereader *lr) +{ + if (lr->idx == lr->bufact) + { + if (lr->bufact != 0) + if (lr_next (lr) < 0) + return EOF; + + if (lr->bufact == 0) + return EOF; + } + + return lr->buf[lr->idx] == '\32' ? EOF : lr->buf[lr->idx++]; +} + + +static inline int +__attribute ((always_inline)) +lr_ungetc (struct linereader *lr, int ch) +{ + if (lr->idx == 0) + return -1; + + if (ch != EOF) + lr->buf[--lr->idx] = ch; + return 0; +} + + +static inline int +lr_ungetn (struct linereader *lr, size_t n) +{ + if (lr->idx < n) + return -1; + + lr->idx -= n; + return 0; +} + + +#endif /* linereader.h */ diff --git a/REORG.TODO/locale/programs/locale-spec.c b/REORG.TODO/locale/programs/locale-spec.c new file mode 100644 index 0000000000..4e9bf81b78 --- /dev/null +++ b/REORG.TODO/locale/programs/locale-spec.c @@ -0,0 +1,131 @@ +/* Handle special requests. + Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <error.h> +#include <libintl.h> +#include <stdio.h> +#include <string.h> +#include <wchar.h> + +#include "localeinfo.h" + + +/* We provide support for some special names. This helps debugging + and may be useful for advanced usage of the provided information + outside C. */ +void +locale_special (const char *name, int show_category_name, + int show_keyword_name) +{ +#if 0 + /* "collate-elements": print collation elements of locale. */ + if (strcmp (name, "collate-elements") == 0) + { + size_t nelem = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_ELEM_HASH_SIZE); + + if (show_category_name) + puts ("LC_COLLATE"); + if (show_keyword_name) + fputs ("collate-elements=", stdout); + + if (nelem != 0) + { + int first = 1; + size_t cnt; + + for (cnt = 0; cnt < nelem; ++cnt) + if (__collate_element_hash[2 * cnt] != (~((u_int32_t) 0))) + { + size_t idx = __collate_element_hash[2 * cnt]; + + printf ("%s<%s>", first ? "" : ";", + &__collate_element_strings[idx]); + + /* We don't print the string. This is only confusing + because only the programs have to know the + encoding. The code is left in place because it + shows how to get the information. */ + { + const wchar_t *wp; + + idx = __collate_element_hash[2 * cnt + 1]; + wp = &__collate_element_values[idx]; + while (*wp != L'\0') + { + /********************************************\ + |* XXX The element values are really wide *| + |* chars. But we are currently not able to *| + |* print these so fake here. *| + \********************************************/ + int ch = wctob (*wp++); + if (ch != EOF) + putchar (ch); + else + fputs ("<??\?>", stdout); + } + + putchar ('"'); + } + first = 0; + } + } + putchar ('\n'); + return; + } + + if (strcmp (name, "collate-classes") == 0) + { + size_t nelem = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZE); + size_t cnt; + int first = 1; + + if (show_category_name) + puts ("LC_COLLATE"); + if (show_keyword_name) + fputs ("collate-classes=", stdout); + + for (cnt = 0; cnt < nelem; ++cnt) + if (__collate_symbol_hash[2 * cnt] != 0xffffffff) + { + printf ("%s<%s>", first ? "" : ",", + &__collate_symbol_strings[__collate_symbol_hash[2 * cnt]]); +#if 0 + { + size_t idx = __collate_symbol_hash[2 * cnt + 1]; + size_t cls; + + putchar ('='); + for (cls = 0; cls < __collate_symbol_classes[idx]; ++cls) + printf ("%s%d", cls == 0 ? "" : ":", + __collate_symbol_classes[idx + 1 + cls]); + } +#endif + first = 0; + } + putchar ('\n'); + return; + } +#endif + + /* If nothing matches, fail. */ + error (1, 0, gettext ("unknown name \"%s\""), name); +} diff --git a/REORG.TODO/locale/programs/locale.c b/REORG.TODO/locale/programs/locale.c new file mode 100644 index 0000000000..941290089b --- /dev/null +++ b/REORG.TODO/locale/programs/locale.c @@ -0,0 +1,989 @@ +/* Implementation of the locale program according to POSIX 9945-2. + Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1995. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <argp.h> +#include <argz.h> +#include <dirent.h> +#include <errno.h> +#include <error.h> +#include <fcntl.h> +#include <langinfo.h> +#include <libintl.h> +#include <limits.h> +#include <locale.h> +#include <search.h> +#include <stdio.h> +#include <stdio_ext.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <stdint.h> +#include <sys/mman.h> +#include <sys/stat.h> + +#include "localeinfo.h" +#include "charmap-dir.h" +#include "../locarchive.h" +#include <programs/xmalloc.h> + +#define ARCHIVE_NAME COMPLOCALEDIR "/locale-archive" + +/* If set print the name of the category. */ +static int show_category_name; + +/* If set print the name of the item. */ +static int show_keyword_name; + +/* Print names of all available locales. */ +static int do_all; + +/* Print names of all available character maps. */ +static int do_charmaps = 0; + +/* Nonzero if verbose output is wanted. */ +static int verbose; + +/* Name and version of program. */ +static void print_version (FILE *stream, struct argp_state *state); +void (*argp_program_version_hook) (FILE *, struct argp_state *) = print_version; + +/* Definitions of arguments for argp functions. */ +static const struct argp_option options[] = +{ + { NULL, 0, NULL, 0, N_("System information:") }, + { "all-locales", 'a', NULL, OPTION_NO_USAGE, + N_("Write names of available locales") }, + { "charmaps", 'm', NULL, OPTION_NO_USAGE, + N_("Write names of available charmaps") }, + { NULL, 0, NULL, 0, N_("Modify output format:") }, + { "category-name", 'c', NULL, 0, N_("Write names of selected categories") }, + { "keyword-name", 'k', NULL, 0, N_("Write names of selected keywords") }, + { "verbose", 'v', NULL, 0, N_("Print more information") }, + { NULL, 0, NULL, 0, NULL } +}; + +/* Short description of program. */ +static const char doc[] = N_("Get locale-specific information."); + +/* Strings for arguments in help texts. */ +static const char args_doc[] = N_("NAME\n[-a|-m]"); + +/* Prototype for option handler. */ +static error_t parse_opt (int key, char *arg, struct argp_state *state); + +/* Function to print some extra text in the help message. */ +static char *more_help (int key, const char *text, void *input); + +/* Data structure to communicate with argp functions. */ +static struct argp argp = +{ + options, parse_opt, args_doc, doc, NULL, more_help +}; + + +/* We don't have these constants defined because we don't use them. Give + default values. */ +#define CTYPE_MB_CUR_MIN 0 +#define CTYPE_MB_CUR_MAX 0 +#define CTYPE_HASH_SIZE 0 +#define CTYPE_HASH_LAYERS 0 +#define CTYPE_CLASS 0 +#define CTYPE_TOUPPER_EB 0 +#define CTYPE_TOLOWER_EB 0 +#define CTYPE_TOUPPER_EL 0 +#define CTYPE_TOLOWER_EL 0 + +/* Definition of the data structure which represents a category and its + items. */ +struct category +{ + int cat_id; + const char *name; + size_t number; + struct cat_item + { + int item_id; + const char *name; + enum { std, opt } status; + enum value_type value_type; + int min; + int max; + } *item_desc; +}; + +/* Simple helper macro. */ +#define NELEMS(arr) ((sizeof (arr)) / (sizeof (arr[0]))) + +/* For some tricky stuff. */ +#define NO_PAREN(Item, More...) Item, ## More + +/* We have all categories defined in `categories.def'. Now construct + the description and data structure used for all categories. */ +#define DEFINE_ELEMENT(Item, More...) { Item, ## More }, +#define DEFINE_CATEGORY(category, name, items, postload) \ + static struct cat_item category##_desc[] = \ + { \ + NO_PAREN items \ + }; + +#include "categories.def" +#undef DEFINE_CATEGORY + +static struct category category[] = + { +#define DEFINE_CATEGORY(category, name, items, postload) \ + [category] = { _NL_NUM_##category, name, NELEMS (category##_desc), \ + category##_desc }, +#include "categories.def" +#undef DEFINE_CATEGORY + }; +#define NCATEGORIES NELEMS (category) + + +/* Automatically set variable. */ +extern const char *__progname; + +/* helper function for extended name handling. */ +extern void locale_special (const char *name, int show_category_name, + int show_keyword_name); + +/* Prototypes for local functions. */ +static void print_LC_IDENTIFICATION (void *mapped, size_t size); +static void print_LC_CTYPE (void *mapped, size_t size); +static void write_locales (void); +static int nameentcmp (const void *a, const void *b); +static int write_archive_locales (void **all_datap, char *linebuf); +static void write_charmaps (void); +static void show_locale_vars (void); +static void show_info (const char *name); + + +int +main (int argc, char *argv[]) +{ + int remaining; + + /* Set initial values for global variables. */ + show_category_name = 0; + show_keyword_name = 0; + + /* Set locale. Do not set LC_ALL because the other categories must + not be affected (according to POSIX.2). */ + if (setlocale (LC_CTYPE, "") == NULL) + error (0, errno, gettext ("Cannot set LC_CTYPE to default locale")); + if (setlocale (LC_MESSAGES, "") == NULL) + error (0, errno, gettext ("Cannot set LC_MESSAGES to default locale")); + + /* Initialize the message catalog. */ + textdomain (PACKAGE); + + /* Parse and process arguments. */ + argp_parse (&argp, argc, argv, 0, &remaining, NULL); + + /* `-a' requests the names of all available locales. */ + if (do_all != 0) + { + if (setlocale (LC_COLLATE, "") == NULL) + error (0, errno, + gettext ("Cannot set LC_COLLATE to default locale")); + write_locales (); + exit (EXIT_SUCCESS); + } + + /* `m' requests the names of all available charmaps. The names can be + used for the -f argument to localedef(1). */ + if (do_charmaps != 0) + { + write_charmaps (); + exit (EXIT_SUCCESS); + } + + /* Specific information about the current locale are requested. + Change to this locale now. */ + if (setlocale (LC_ALL, "") == NULL) + error (0, errno, gettext ("Cannot set LC_ALL to default locale")); + + /* If no real argument is given we have to print the contents of the + current locale definition variables. These are LANG and the LC_*. */ + if (remaining == argc && show_keyword_name == 0 && show_category_name == 0) + { + show_locale_vars (); + exit (EXIT_SUCCESS); + } + + /* Process all given names. */ + while (remaining < argc) + show_info (argv[remaining++]); + + exit (EXIT_SUCCESS); +} + + +/* Handle program arguments. */ +static error_t +parse_opt (int key, char *arg, struct argp_state *state) +{ + switch (key) + { + case 'a': + do_all = 1; + break; + case 'c': + show_category_name = 1; + break; + case 'm': + do_charmaps = 1; + break; + case 'k': + show_keyword_name = 1; + break; + case 'v': + verbose = 1; + break; + default: + return ARGP_ERR_UNKNOWN; + } + return 0; +} + + +static char * +more_help (int key, const char *text, void *input) +{ + char *tp = NULL; + switch (key) + { + case ARGP_KEY_HELP_EXTRA: + /* We print some extra information. */ + if (asprintf (&tp, gettext ("\ +For bug reporting instructions, please see:\n\ +%s.\n"), REPORT_BUGS_TO) < 0) + return NULL; + return tp; + default: + break; + } + return (char *) text; +} + + +/* Print the version information. */ +static void +print_version (FILE *stream, struct argp_state *state) +{ + fprintf (stream, "locale %s%s\n", PKGVERSION, VERSION); + fprintf (stream, gettext ("\ +Copyright (C) %s Free Software Foundation, Inc.\n\ +This is free software; see the source for copying conditions. There is NO\n\ +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\ +"), "2017"); + fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper"); +} + + +/* Simple action function which prints arguments as strings. */ +static void +print_names (const void *nodep, VISIT value, int level) +{ + if (value == postorder || value == leaf) + puts (*(char **) nodep); +} + + +static int +select_dirs (const struct dirent *dirent) +{ + int result = 0; + + if (strcmp (dirent->d_name, ".") != 0 && strcmp (dirent->d_name, "..") != 0) + { + mode_t mode = 0; + +#ifdef _DIRENT_HAVE_D_TYPE + if (dirent->d_type != DT_UNKNOWN && dirent->d_type != DT_LNK) + mode = DTTOIF (dirent->d_type); + else +#endif + { + struct stat64 st; + char buf[sizeof (COMPLOCALEDIR) + + strlen (dirent->d_name) + 1]; + + stpcpy (stpcpy (stpcpy (buf, COMPLOCALEDIR), "/"), + dirent->d_name); + + if (stat64 (buf, &st) == 0) + mode = st.st_mode; + } + + result = S_ISDIR (mode); + } + + return result; +} + + +static void +print_LC_IDENTIFICATION (void *mapped, size_t size) +{ + /* Read the information from the file. */ + struct + { + unsigned int magic; + unsigned int nstrings; + unsigned int strindex[0]; + } *filedata = mapped; + + if (filedata->magic == LIMAGIC (LC_IDENTIFICATION) + && (sizeof *filedata + + (filedata->nstrings + * sizeof (unsigned int)) + <= size)) + { + const char *str; + +#define HANDLE(idx, name) \ + str = ((char *) mapped \ + + filedata->strindex[_NL_ITEM_INDEX (_NL_IDENTIFICATION_##idx)]); \ + if (*str != '\0') \ + printf ("%9s | %s\n", name, str) + HANDLE (TITLE, "title"); + HANDLE (SOURCE, "source"); + HANDLE (ADDRESS, "address"); + HANDLE (CONTACT, "contact"); + HANDLE (EMAIL, "email"); + HANDLE (TEL, "telephone"); + HANDLE (FAX, "fax"); + HANDLE (LANGUAGE, "language"); + HANDLE (TERRITORY, "territory"); + HANDLE (AUDIENCE, "audience"); + HANDLE (APPLICATION, "application"); + HANDLE (ABBREVIATION, "abbreviation"); + HANDLE (REVISION, "revision"); + HANDLE (DATE, "date"); + } +} + + +static void +print_LC_CTYPE (void *mapped, size_t size) +{ + struct + { + unsigned int magic; + unsigned int nstrings; + unsigned int strindex[0]; + } *filedata = mapped; + + if (filedata->magic == LIMAGIC (LC_CTYPE) + && (sizeof *filedata + + (filedata->nstrings + * sizeof (unsigned int)) + <= size)) + { + const char *str; + + str = ((char *) mapped + + filedata->strindex[_NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME)]); + if (*str != '\0') + printf (" codeset | %s\n", str); + } +} + + +/* Write the names of all available locales to stdout. We have some + sources of the information: the contents of the locale directory + and the locale.alias file. To avoid duplicates and print the + result is a reasonable order we put all entries is a search tree + and print them afterwards. */ +static void +write_locales (void) +{ + char linebuf[80]; + void *all_data = NULL; + struct dirent **dirents; + int ndirents; + int cnt; + char *alias_path; + size_t alias_path_len; + char *entry; + int first_locale = 1; + +#define PUT(name) tsearch (name, &all_data, \ + (int (*) (const void *, const void *)) strcoll) +#define GET(name) tfind (name, &all_data, \ + (int (*) (const void *, const void *)) strcoll) + + /* `POSIX' locale is always available (POSIX.2 4.34.3). */ + PUT ("POSIX"); + /* And so is the "C" locale. */ + PUT ("C"); + + memset (linebuf, '-', sizeof (linebuf) - 1); + linebuf[sizeof (linebuf) - 1] = '\0'; + + /* First scan the locale archive. */ + if (write_archive_locales (&all_data, linebuf)) + first_locale = 0; + + /* Now we can look for all files in the directory. */ + ndirents = scandir (COMPLOCALEDIR, &dirents, select_dirs, + alphasort); + for (cnt = 0; cnt < ndirents; ++cnt) + { + /* Test whether at least the LC_CTYPE data is there. Some + directories only contain translations. */ + char buf[sizeof (COMPLOCALEDIR) + + strlen (dirents[cnt]->d_name) + + sizeof "/LC_IDENTIFICATION"]; + char *enddir; + struct stat64 st; + + stpcpy (enddir = stpcpy (stpcpy (stpcpy (buf, + COMPLOCALEDIR), + "/"), + dirents[cnt]->d_name), + "/LC_IDENTIFICATION"); + + if (stat64 (buf, &st) == 0 && S_ISREG (st.st_mode)) + { + if (verbose && GET (dirents[cnt]->d_name) == NULL) + { + /* Provide some nice output of all kinds of + information. */ + int fd; + + if (! first_locale) + putchar_unlocked ('\n'); + first_locale = 0; + + printf ("locale: %-15.15s directory: %.*s\n%s\n", + dirents[cnt]->d_name, (int) (enddir - buf), buf, + linebuf); + + fd = open64 (buf, O_RDONLY); + if (fd != -1) + { + void *mapped = mmap64 (NULL, st.st_size, PROT_READ, + MAP_SHARED, fd, 0); + if (mapped != MAP_FAILED) + { + print_LC_IDENTIFICATION (mapped, st.st_size); + + munmap (mapped, st.st_size); + } + + close (fd); + + /* Now try to get the charset information. */ + strcpy (enddir, "/LC_CTYPE"); + fd = open64 (buf, O_RDONLY); + if (fd != -1 && fstat64 (fd, &st) >= 0 + && ((mapped = mmap64 (NULL, st.st_size, PROT_READ, + MAP_SHARED, fd, 0)) + != MAP_FAILED)) + { + print_LC_CTYPE (mapped, st.st_size); + + munmap (mapped, st.st_size); + } + + if (fd != -1) + close (fd); + } + } + + /* If the verbose format is not selected we simply + collect the names. */ + PUT (xstrdup (dirents[cnt]->d_name)); + } + } + if (ndirents > 0) + free (dirents); + + /* Now read the locale.alias files. */ + if (argz_create_sep (LOCALE_ALIAS_PATH, ':', &alias_path, &alias_path_len)) + error (1, errno, gettext ("while preparing output")); + + entry = NULL; + while ((entry = argz_next (alias_path, alias_path_len, entry))) + { + static const char aliasfile[] = "/locale.alias"; + FILE *fp; + char full_name[strlen (entry) + sizeof aliasfile]; + + stpcpy (stpcpy (full_name, entry), aliasfile); + fp = fopen (full_name, "rm"); + if (fp == NULL) + /* Ignore non-existing files. */ + continue; + + /* No threads present. */ + __fsetlocking (fp, FSETLOCKING_BYCALLER); + + while (! feof_unlocked (fp)) + { + /* It is a reasonable approach to use a fix buffer here + because + a) we are only interested in the first two fields + b) these fields must be usable as file names and so must + not be that long */ + char buf[BUFSIZ]; + char *alias; + char *value; + char *cp; + + if (fgets_unlocked (buf, BUFSIZ, fp) == NULL) + /* EOF reached. */ + break; + + cp = buf; + /* Ignore leading white space. */ + while (isspace (cp[0]) && cp[0] != '\n') + ++cp; + + /* A leading '#' signals a comment line. */ + if (cp[0] != '\0' && cp[0] != '#' && cp[0] != '\n') + { + alias = cp++; + while (cp[0] != '\0' && !isspace (cp[0])) + ++cp; + /* Terminate alias name. */ + if (cp[0] != '\0') + *cp++ = '\0'; + + /* Now look for the beginning of the value. */ + while (isspace (cp[0])) + ++cp; + + if (cp[0] != '\0') + { + value = cp++; + while (cp[0] != '\0' && !isspace (cp[0])) + ++cp; + /* Terminate value. */ + if (cp[0] == '\n') + { + /* This has to be done to make the following + test for the end of line possible. We are + looking for the terminating '\n' which do not + overwrite here. */ + *cp++ = '\0'; + *cp = '\n'; + } + else if (cp[0] != '\0') + *cp++ = '\0'; + + /* Add the alias. */ + if (! verbose && GET (value) != NULL) + PUT (xstrdup (alias)); + } + } + + /* Possibly not the whole line fits into the buffer. + Ignore the rest of the line. */ + while (strchr (cp, '\n') == NULL) + { + cp = buf; + if (fgets_unlocked (buf, BUFSIZ, fp) == NULL) + /* Make sure the inner loop will be left. The outer + loop will exit at the `feof' test. */ + *cp = '\n'; + } + } + + fclose (fp); + } + + if (! verbose) + { + twalk (all_data, print_names); + } +} + + +struct nameent +{ + char *name; + uint32_t locrec_offset; +}; + + +static int +nameentcmp (const void *a, const void *b) +{ + return strcoll (((const struct nameent *) a)->name, + ((const struct nameent *) b)->name); +} + + +static int +write_archive_locales (void **all_datap, char *linebuf) +{ + struct stat64 st; + void *all_data = *all_datap; + size_t len = 0; + struct locarhead *head; + struct namehashent *namehashtab; + char *addr = MAP_FAILED; + int fd, ret = 0; + uint32_t cnt; + + fd = open64 (ARCHIVE_NAME, O_RDONLY); + if (fd < 0) + return 0; + + if (fstat64 (fd, &st) < 0 || st.st_size < sizeof (*head)) + goto error_out; + + len = st.st_size; + addr = mmap64 (NULL, len, PROT_READ, MAP_SHARED, fd, 0); + if (addr == MAP_FAILED) + goto error_out; + + head = (struct locarhead *) addr; + if (head->namehash_offset + head->namehash_size > len + || head->string_offset + head->string_size > len + || head->locrectab_offset + head->locrectab_size > len + || head->sumhash_offset + head->sumhash_size > len) + goto error_out; + + namehashtab = (struct namehashent *) (addr + head->namehash_offset); + if (! verbose) + { + for (cnt = 0; cnt < head->namehash_size; ++cnt) + if (namehashtab[cnt].locrec_offset != 0) + { + PUT (xstrdup (addr + namehashtab[cnt].name_offset)); + ++ret; + } + } + else + { + struct nameent *names; + uint32_t used; + + names = (struct nameent *) xmalloc (head->namehash_used + * sizeof (struct nameent)); + for (cnt = used = 0; cnt < head->namehash_size; ++cnt) + if (namehashtab[cnt].locrec_offset != 0) + { + names[used].name = addr + namehashtab[cnt].name_offset; + names[used++].locrec_offset = namehashtab[cnt].locrec_offset; + } + + /* Sort the names. */ + qsort (names, used, sizeof (struct nameent), nameentcmp); + + for (cnt = 0; cnt < used; ++cnt) + { + struct locrecent *locrec; + + PUT (xstrdup (names[cnt].name)); + + if (cnt) + putchar_unlocked ('\n'); + + printf ("locale: %-15.15s archive: " ARCHIVE_NAME "\n%s\n", + names[cnt].name, linebuf); + + locrec = (struct locrecent *) (addr + names[cnt].locrec_offset); + + print_LC_IDENTIFICATION (addr + + locrec->record[LC_IDENTIFICATION].offset, + locrec->record[LC_IDENTIFICATION].len); + + print_LC_CTYPE (addr + locrec->record[LC_CTYPE].offset, + locrec->record[LC_CTYPE].len); + } + + ret = used; + } + +error_out: + if (addr != MAP_FAILED) + munmap (addr, len); + close (fd); + *all_datap = all_data; + return ret; +} + + +/* Write the names of all available character maps to stdout. */ +static void +write_charmaps (void) +{ + void *all_data = NULL; + CHARMAP_DIR *dir; + const char *dirent; + + /* Look for all files in the charmap directory. */ + dir = charmap_opendir (CHARMAP_PATH); + if (dir == NULL) + return; + + while ((dirent = charmap_readdir (dir)) != NULL) + { + char **aliases; + char **p; + + PUT (xstrdup (dirent)); + + aliases = charmap_aliases (CHARMAP_PATH, dirent); + +#if 0 + /* Add the code_set_name and the aliases. */ + for (p = aliases; *p; p++) + PUT (xstrdup (*p)); +#else + /* Add the code_set_name only. Most aliases are obsolete. */ + p = aliases; + if (*p) + PUT (xstrdup (*p)); +#endif + + charmap_free_aliases (aliases); + } + + charmap_closedir (dir); + + twalk (all_data, print_names); +} + +/* Print a properly quoted assignment of NAME with VAL, using double + quotes iff DQUOTE is true. */ +static void +print_assignment (const char *name, const char *val, bool dquote) +{ + printf ("%s=", name); + if (dquote) + putchar ('"'); + while (*val != '\0') + { + size_t segment + = strcspn (val, dquote ? "$`\"\\" : "~|&;<>()$`\\\"' \t\n"); + printf ("%.*s", (int) segment, val); + val += segment; + if (*val == '\0') + break; + putchar ('\\'); + putchar (*val++); + } + if (dquote) + putchar ('"'); + putchar ('\n'); +} + +/* We have to show the contents of the environments determining the + locale. */ +static void +show_locale_vars (void) +{ + const char *lcall = getenv ("LC_ALL") ?: ""; + const char *lang = getenv ("LANG") ?: ""; + + /* LANG has to be the first value. */ + print_assignment ("LANG", lang, false); + + /* Now all categories in an unspecified order. */ + for (size_t cat_no = 0; cat_no < NCATEGORIES; ++cat_no) + if (cat_no != LC_ALL) + { + const char *name = category[cat_no].name; + const char *val = getenv (name); + + if (lcall[0] != '\0' || val == NULL) + print_assignment (name, + lcall[0] != '\0' ? lcall + : lang[0] != '\0' ? lang + : "POSIX", + true); + else + print_assignment (name, val, false); + } + + /* The last is the LC_ALL value. */ + print_assignment ("LC_ALL", lcall, false); +} + + +/* Subroutine of show_info, below. */ +static void +print_item (struct cat_item *item) +{ + switch (item->value_type) + { + case string: + if (show_keyword_name) + printf ("%s=\"", item->name); + fputs (nl_langinfo (item->item_id) ? : "", stdout); + if (show_keyword_name) + putchar ('"'); + putchar ('\n'); + break; + case stringarray: + { + const char *val; + int cnt; + + if (show_keyword_name) + printf ("%s=\"", item->name); + + for (cnt = 0; cnt < item->max - 1; ++cnt) + { + val = nl_langinfo (item->item_id + cnt); + if (val != NULL) + fputs (val, stdout); + putchar (';'); + } + + val = nl_langinfo (item->item_id + cnt); + if (val != NULL) + fputs (val, stdout); + + if (show_keyword_name) + putchar ('"'); + putchar ('\n'); + } + break; + case stringlist: + { + int first = 1; + const char *val = nl_langinfo (item->item_id) ? : ""; + + if (show_keyword_name) + printf ("%s=", item->name); + + for (int cnt = 0; cnt < item->max && *val != '\0'; ++cnt) + { + printf ("%s%s%s%s", first ? "" : ";", + show_keyword_name ? "\"" : "", val, + show_keyword_name ? "\"" : ""); + val = strchr (val, '\0') + 1; + first = 0; + } + putchar ('\n'); + } + break; + case byte: + { + const char *val = nl_langinfo (item->item_id); + + if (show_keyword_name) + printf ("%s=", item->name); + + if (val != NULL) + printf ("%d", *val == '\377' ? -1 : *val); + putchar ('\n'); + } + break; + case bytearray: + { + const char *val = nl_langinfo (item->item_id); + int cnt = val ? strlen (val) : 0; + + if (show_keyword_name) + printf ("%s=", item->name); + + while (cnt > 1) + { + printf ("%d;", *val == '\177' ? -1 : *val); + --cnt; + ++val; + } + + printf ("%d\n", cnt == 0 || *val == '\177' ? -1 : *val); + } + break; + case word: + { + union { unsigned int word; char *string; } val; + val.string = nl_langinfo (item->item_id); + if (show_keyword_name) + printf ("%s=", item->name); + + printf ("%d\n", val.word); + } + break; + case wordarray: + { + int first = 1; + union { unsigned int *wordarray; char *string; } val; + + val.string = nl_langinfo (item->item_id); + if (show_keyword_name) + printf ("%s=", item->name); + + for (int cnt = 0; cnt < item->max; ++cnt) + { + printf ("%s%d", first ? "" : ";", val.wordarray[cnt]); + first = 0; + } + putchar ('\n'); + } + break; + case wstring: + case wstringarray: + case wstringlist: + /* We don't print wide character information since the same + information is available in a multibyte string. */ + default: + break; + } +} + +/* Show the information request for NAME. */ +static void +show_info (const char *name) +{ + for (size_t cat_no = 0; cat_no < NCATEGORIES; ++cat_no) + if (cat_no != LC_ALL) + { + if (strcmp (name, category[cat_no].name) == 0) + /* Print the whole category. */ + { + if (show_category_name != 0) + puts (category[cat_no].name); + + for (size_t item_no = 0; + item_no < category[cat_no].number; + ++item_no) + print_item (&category[cat_no].item_desc[item_no]); + + return; + } + + for (size_t item_no = 0; item_no < category[cat_no].number; ++item_no) + if (strcmp (name, category[cat_no].item_desc[item_no].name) == 0) + { + if (show_category_name != 0) + puts (category[cat_no].name); + + print_item (&category[cat_no].item_desc[item_no]); + return; + } + } + + /* The name is not a standard one. + For testing and perhaps advanced use allow some more symbols. */ + locale_special (name, show_category_name, show_keyword_name); +} diff --git a/REORG.TODO/locale/programs/localedef.c b/REORG.TODO/locale/programs/localedef.c new file mode 100644 index 0000000000..6acc1342c7 --- /dev/null +++ b/REORG.TODO/locale/programs/localedef.c @@ -0,0 +1,626 @@ +/* Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1995. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <argp.h> +#include <errno.h> +#include <fcntl.h> +#include <libintl.h> +#include <locale.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <error.h> +#include <sys/mman.h> +#include <sys/stat.h> + +#include "localedef.h" +#include "charmap.h" +#include "locfile.h" + +/* Undefine the following line in the production version. */ +/* #define NDEBUG 1 */ +#include <assert.h> + + +/* List of copied locales. */ +struct copy_def_list_t *copy_list; + +/* If this is defined be POSIX conform. */ +int posix_conformance; + +/* If not zero give a lot more messages. */ +int verbose; + +/* If not zero suppress warnings and information messages. */ +int be_quiet; + +/* If not zero force output even if warning were issued. */ +static int force_output; + +/* Prefix for output files. */ +const char *output_prefix; + +/* Name of the character map file. */ +static const char *charmap_file; + +/* Name of the locale definition file. */ +static const char *input_file; + +/* Name of the repertoire map file. */ +const char *repertoire_global; + +/* Name of the locale.alias file. */ +const char *alias_file; + +/* List of all locales. */ +static struct localedef_t *locales; + +/* If true don't add locale data to archive. */ +bool no_archive; + +/* If true add named locales to archive. */ +static bool add_to_archive; + +/* If true delete named locales from archive. */ +static bool delete_from_archive; + +/* If true replace archive content when adding. */ +static bool replace_archive; + +/* If true list archive content. */ +static bool list_archive; + +/* Maximum number of retries when opening the locale archive. */ +int max_locarchive_open_retry = 10; + + +/* Name and version of program. */ +static void print_version (FILE *stream, struct argp_state *state); +void (*argp_program_version_hook) (FILE *, struct argp_state *) = print_version; + +#define OPT_POSIX 301 +#define OPT_QUIET 302 +#define OPT_PREFIX 304 +#define OPT_NO_ARCHIVE 305 +#define OPT_ADD_TO_ARCHIVE 306 +#define OPT_REPLACE 307 +#define OPT_DELETE_FROM_ARCHIVE 308 +#define OPT_LIST_ARCHIVE 309 +#define OPT_LITTLE_ENDIAN 400 +#define OPT_BIG_ENDIAN 401 + +/* Definitions of arguments for argp functions. */ +static const struct argp_option options[] = +{ + { NULL, 0, NULL, 0, N_("Input Files:") }, + { "charmap", 'f', N_("FILE"), 0, + N_("Symbolic character names defined in FILE") }, + { "inputfile", 'i', N_("FILE"), 0, + N_("Source definitions are found in FILE") }, + { "repertoire-map", 'u', N_("FILE"), 0, + N_("FILE contains mapping from symbolic names to UCS4 values") }, + + { NULL, 0, NULL, 0, N_("Output control:") }, + { "force", 'c', NULL, 0, + N_("Create output even if warning messages were issued") }, + { "prefix", OPT_PREFIX, N_("PATH"), 0, N_("Optional output file prefix") }, + { "posix", OPT_POSIX, NULL, 0, N_("Strictly conform to POSIX") }, + { "quiet", OPT_QUIET, NULL, 0, + N_("Suppress warnings and information messages") }, + { "verbose", 'v', NULL, 0, N_("Print more messages") }, + { NULL, 0, NULL, 0, N_("Archive control:") }, + { "no-archive", OPT_NO_ARCHIVE, NULL, 0, + N_("Don't add new data to archive") }, + { "add-to-archive", OPT_ADD_TO_ARCHIVE, NULL, 0, + N_("Add locales named by parameters to archive") }, + { "replace", OPT_REPLACE, NULL, 0, N_("Replace existing archive content") }, + { "delete-from-archive", OPT_DELETE_FROM_ARCHIVE, NULL, 0, + N_("Remove locales named by parameters from archive") }, + { "list-archive", OPT_LIST_ARCHIVE, NULL, 0, N_("List content of archive") }, + { "alias-file", 'A', N_("FILE"), 0, + N_("locale.alias file to consult when making archive")}, + { "little-endian", OPT_LITTLE_ENDIAN, NULL, 0, + N_("Generate little-endian output") }, + { "big-endian", OPT_BIG_ENDIAN, NULL, 0, + N_("Generate big-endian output") }, + { NULL, 0, NULL, 0, NULL } +}; + +/* Short description of program. */ +static const char doc[] = N_("Compile locale specification"); + +/* Strings for arguments in help texts. */ +static const char args_doc[] = N_("\ +NAME\n\ +[--add-to-archive|--delete-from-archive] FILE...\n\ +--list-archive [FILE]"); + +/* Prototype for option handler. */ +static error_t parse_opt (int key, char *arg, struct argp_state *state); + +/* Function to print some extra text in the help message. */ +static char *more_help (int key, const char *text, void *input); + +/* Data structure to communicate with argp functions. */ +static struct argp argp = +{ + options, parse_opt, args_doc, doc, NULL, more_help +}; + + +/* Prototypes for local functions. */ +static void error_print (void); +static const char *construct_output_path (char *path); +static const char *normalize_codeset (const char *codeset, size_t name_len); + + +int +main (int argc, char *argv[]) +{ + const char *output_path; + int cannot_write_why; + struct charmap_t *charmap; + struct localedef_t global; + int remaining; + + /* Set initial values for global variables. */ + copy_list = NULL; + posix_conformance = getenv ("POSIXLY_CORRECT") != NULL; + error_print_progname = error_print; + + /* Set locale. Do not set LC_ALL because the other categories must + not be affected (according to POSIX.2). */ + setlocale (LC_MESSAGES, ""); + setlocale (LC_CTYPE, ""); + + /* Initialize the message catalog. */ + textdomain (_libc_intl_domainname); + + /* Parse and process arguments. */ + argp_err_exit_status = 4; + argp_parse (&argp, argc, argv, 0, &remaining, NULL); + + /* Handle a few special cases. */ + if (list_archive) + show_archive_content (remaining > 1 ? argv[remaining] : NULL, verbose); + if (add_to_archive) + return add_locales_to_archive (argc - remaining, &argv[remaining], + replace_archive); + if (delete_from_archive) + return delete_locales_from_archive (argc - remaining, &argv[remaining]); + + /* POSIX.2 requires to be verbose about missing characters in the + character map. */ + verbose |= posix_conformance; + + if (argc - remaining != 1) + { + /* We need exactly one non-option parameter. */ + argp_help (&argp, stdout, ARGP_HELP_SEE | ARGP_HELP_EXIT_ERR, + program_invocation_short_name); + exit (4); + } + + /* The parameter describes the output path of the constructed files. + If the described files cannot be written return a NULL pointer. */ + output_path = construct_output_path (argv[remaining]); + if (output_path == NULL && ! no_archive) + error (4, errno, _("cannot create directory for output files")); + cannot_write_why = errno; + + /* Now that the parameters are processed we have to reset the local + ctype locale. (P1003.2 4.35.5.2) */ + setlocale (LC_CTYPE, "POSIX"); + + /* Look whether the system really allows locale definitions. POSIX + defines error code 3 for this situation so I think it must be + a fatal error (see P1003.2 4.35.8). */ + if (sysconf (_SC_2_LOCALEDEF) < 0) + WITH_CUR_LOCALE (error (3, 0, _("\ +FATAL: system does not define `_POSIX2_LOCALEDEF'"))); + + /* Process charmap file. */ + charmap = charmap_read (charmap_file, verbose, 1, be_quiet, 1); + + /* Add the first entry in the locale list. */ + memset (&global, '\0', sizeof (struct localedef_t)); + global.name = input_file ?: "/dev/stdin"; + global.needed = ALL_LOCALES; + locales = &global; + + /* Now read the locale file. */ + if (locfile_read (&global, charmap) != 0) + WITH_CUR_LOCALE (error (4, errno, _("\ +cannot open locale definition file `%s'"), input_file)); + + /* Perhaps we saw some `copy' instructions. */ + while (1) + { + struct localedef_t *runp = locales; + + while (runp != NULL && (runp->needed & runp->avail) == runp->needed) + runp = runp->next; + + if (runp == NULL) + /* Everything read. */ + break; + + if (locfile_read (runp, charmap) != 0) + WITH_CUR_LOCALE (error (4, errno, _("\ +cannot open locale definition file `%s'"), runp->name)); + } + + /* Check the categories we processed in source form. */ + check_all_categories (locales, charmap); + + /* We are now able to write the data files. If warning were given we + do it only if it is explicitly requested (--force). */ + if (error_message_count == 0 || force_output != 0) + { + if (cannot_write_why != 0) + WITH_CUR_LOCALE (error (4, cannot_write_why, _("\ +cannot write output files to `%s'"), output_path ? : argv[remaining])); + else + write_all_categories (locales, charmap, argv[remaining], output_path); + } + else + WITH_CUR_LOCALE (error (4, 0, _("\ +no output file produced because warnings were issued"))); + + /* This exit status is prescribed by POSIX.2 4.35.7. */ + exit (error_message_count != 0); +} + + +/* Handle program arguments. */ +static error_t +parse_opt (int key, char *arg, struct argp_state *state) +{ + switch (key) + { + case OPT_QUIET: + be_quiet = 1; + break; + case OPT_POSIX: + posix_conformance = 1; + break; + case OPT_PREFIX: + output_prefix = arg; + break; + case OPT_NO_ARCHIVE: + no_archive = true; + break; + case OPT_ADD_TO_ARCHIVE: + add_to_archive = true; + break; + case OPT_REPLACE: + replace_archive = true; + break; + case OPT_DELETE_FROM_ARCHIVE: + delete_from_archive = true; + break; + case OPT_LIST_ARCHIVE: + list_archive = true; + break; + case OPT_LITTLE_ENDIAN: + set_big_endian (false); + break; + case OPT_BIG_ENDIAN: + set_big_endian (true); + break; + case 'c': + force_output = 1; + break; + case 'f': + charmap_file = arg; + break; + case 'A': + alias_file = arg; + break; + case 'i': + input_file = arg; + break; + case 'u': + repertoire_global = arg; + break; + case 'v': + verbose = 1; + break; + default: + return ARGP_ERR_UNKNOWN; + } + return 0; +} + + +static char * +more_help (int key, const char *text, void *input) +{ + char *cp; + char *tp; + + switch (key) + { + case ARGP_KEY_HELP_EXTRA: + /* We print some extra information. */ + if (asprintf (&tp, gettext ("\ +For bug reporting instructions, please see:\n\ +%s.\n"), REPORT_BUGS_TO) < 0) + return NULL; + if (asprintf (&cp, gettext ("\ +System's directory for character maps : %s\n\ + repertoire maps: %s\n\ + locale path : %s\n\ +%s"), + CHARMAP_PATH, REPERTOIREMAP_PATH, LOCALE_PATH, tp) < 0) + { + free (tp); + return NULL; + } + return cp; + default: + break; + } + return (char *) text; +} + +/* Print the version information. */ +static void +print_version (FILE *stream, struct argp_state *state) +{ + fprintf (stream, "localedef %s%s\n", PKGVERSION, VERSION); + fprintf (stream, gettext ("\ +Copyright (C) %s Free Software Foundation, Inc.\n\ +This is free software; see the source for copying conditions. There is NO\n\ +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\ +"), "2017"); + fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper"); +} + + +/* The address of this function will be assigned to the hook in the error + functions. */ +static void +error_print (void) +{ +} + + +/* The parameter to localedef describes the output path. If it does + contain a '/' character it is a relative path. Otherwise it names the + locale this definition is for. */ +static const char * +construct_output_path (char *path) +{ + const char *normal = NULL; + char *result; + char *endp; + + if (strchr (path, '/') == NULL) + { + /* This is a system path. First examine whether the locale name + contains a reference to the codeset. This should be + normalized. */ + char *startp; + + startp = path; + /* We must be prepared for finding a CEN name or a location of + the introducing `.' where it is not possible anymore. */ + while (*startp != '\0' && *startp != '@' && *startp != '.') + ++startp; + if (*startp == '.') + { + /* We found a codeset specification. Now find the end. */ + endp = ++startp; + while (*endp != '\0' && *endp != '@') + ++endp; + + if (endp > startp) + normal = normalize_codeset (startp, endp - startp); + } + else + /* This is to keep gcc quiet. */ + endp = NULL; + + /* We put an additional '\0' at the end of the string because at + the end of the function we need another byte for the trailing + '/'. */ + ssize_t n; + if (normal == NULL) + n = asprintf (&result, "%s%s/%s%c", output_prefix ?: "", + COMPLOCALEDIR, path, '\0'); + else + n = asprintf (&result, "%s%s/%.*s%s%s%c", + output_prefix ?: "", COMPLOCALEDIR, + (int) (startp - path), path, normal, endp, '\0'); + + if (n < 0) + return NULL; + + endp = result + n - 1; + } + else + { + /* This is a user path. Please note the additional byte in the + memory allocation. */ + size_t len = strlen (path) + 1; + result = xmalloc (len + 1); + endp = mempcpy (result, path, len) - 1; + + /* If the user specified an output path we cannot add the output + to the archive. */ + no_archive = true; + } + + errno = 0; + + if (no_archive && euidaccess (result, W_OK) == -1) + /* Perhaps the directory does not exist now. Try to create it. */ + if (errno == ENOENT) + { + errno = 0; + if (mkdir (result, 0777) < 0) + return NULL; + } + + *endp++ = '/'; + *endp = '\0'; + + return result; +} + + +/* Normalize codeset name. There is no standard for the codeset + names. Normalization allows the user to use any of the common + names. */ +static const char * +normalize_codeset (const char *codeset, size_t name_len) +{ + int len = 0; + int only_digit = 1; + char *retval; + char *wp; + size_t cnt; + + for (cnt = 0; cnt < name_len; ++cnt) + if (isalnum (codeset[cnt])) + { + ++len; + + if (isalpha (codeset[cnt])) + only_digit = 0; + } + + retval = (char *) malloc ((only_digit ? 3 : 0) + len + 1); + + if (retval != NULL) + { + if (only_digit) + wp = stpcpy (retval, "iso"); + else + wp = retval; + + for (cnt = 0; cnt < name_len; ++cnt) + if (isalpha (codeset[cnt])) + *wp++ = tolower (codeset[cnt]); + else if (isdigit (codeset[cnt])) + *wp++ = codeset[cnt]; + + *wp = '\0'; + } + + return (const char *) retval; +} + + +struct localedef_t * +add_to_readlist (int category, const char *name, const char *repertoire_name, + int generate, struct localedef_t *copy_locale) +{ + struct localedef_t *runp = locales; + + while (runp != NULL && strcmp (name, runp->name) != 0) + runp = runp->next; + + if (runp == NULL) + { + /* Add a new entry at the end. */ + struct localedef_t *newp; + + assert (generate == 1); + + newp = xcalloc (1, sizeof (struct localedef_t)); + newp->name = name; + newp->repertoire_name = repertoire_name; + + if (locales == NULL) + runp = locales = newp; + else + { + runp = locales; + while (runp->next != NULL) + runp = runp->next; + runp = runp->next = newp; + } + } + + if (generate + && (runp->needed & (1 << category)) != 0 + && (runp->avail & (1 << category)) == 0) + WITH_CUR_LOCALE (error (5, 0, _("\ +circular dependencies between locale definitions"))); + + if (copy_locale != NULL) + { + if (runp->categories[category].generic != NULL) + WITH_CUR_LOCALE (error (5, 0, _("\ +cannot add already read locale `%s' a second time"), name)); + else + runp->categories[category].generic = + copy_locale->categories[category].generic; + } + + runp->needed |= 1 << category; + + return runp; +} + + +struct localedef_t * +find_locale (int category, const char *name, const char *repertoire_name, + const struct charmap_t *charmap) +{ + struct localedef_t *result; + + /* Find the locale, but do not generate it since this would be a bug. */ + result = add_to_readlist (category, name, repertoire_name, 0, NULL); + + assert (result != NULL); + + if ((result->avail & (1 << category)) == 0 + && locfile_read (result, charmap) != 0) + WITH_CUR_LOCALE (error (4, errno, _("\ +cannot open locale definition file `%s'"), result->name)); + + return result; +} + + +struct localedef_t * +load_locale (int category, const char *name, const char *repertoire_name, + const struct charmap_t *charmap, struct localedef_t *copy_locale) +{ + struct localedef_t *result; + + /* Generate the locale if it does not exist. */ + result = add_to_readlist (category, name, repertoire_name, 1, copy_locale); + + assert (result != NULL); + + if ((result->avail & (1 << category)) == 0 + && locfile_read (result, charmap) != 0) + WITH_CUR_LOCALE (error (4, errno, _("\ +cannot open locale definition file `%s'"), result->name)); + + return result; +} diff --git a/REORG.TODO/locale/programs/localedef.h b/REORG.TODO/locale/programs/localedef.h new file mode 100644 index 0000000000..74a2eba74a --- /dev/null +++ b/REORG.TODO/locale/programs/localedef.h @@ -0,0 +1,177 @@ +/* General definitions for localedef(1). + Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifndef _LOCALEDEF_H +#define _LOCALEDEF_H 1 + +/* Get the basic locale definitions. */ +#include <errno.h> +#include <locale.h> +#include <stdbool.h> +#include <stddef.h> + +#include "repertoire.h" +#include "../locarchive.h" + + +/* We need a bitmask for the locales. */ +enum +{ + CTYPE_LOCALE = 1 << LC_CTYPE, + NUMERIC_LOCALE = 1 << LC_NUMERIC, + TIME_LOCALE = 1 << LC_TIME, + COLLATE_LOCALE = 1 << LC_COLLATE, + MONETARY_LOCALE = 1 << LC_MONETARY, + MESSAGES_LOCALE = 1 << LC_MESSAGES, + PAPER_LOCALE = 1 << LC_PAPER, + NAME_LOCALE = 1 << LC_NAME, + ADDRESS_LOCALE = 1 << LC_ADDRESS, + TELEPHONE_LOCALE = 1 << LC_TELEPHONE, + MEASUREMENT_LOCALE = 1 << LC_MEASUREMENT, + IDENTIFICATION_LOCALE = 1 << LC_IDENTIFICATION, + ALL_LOCALES = (1 << LC_CTYPE + | 1 << LC_NUMERIC + | 1 << LC_TIME + | 1 << LC_COLLATE + | 1 << LC_MONETARY + | 1 << LC_MESSAGES + | 1 << LC_PAPER + | 1 << LC_NAME + | 1 << LC_ADDRESS + | 1 << LC_TELEPHONE + | 1 << LC_MEASUREMENT + | 1 << LC_IDENTIFICATION) +}; + + +/* Opaque types for the different locales. */ +struct locale_ctype_t; +struct locale_collate_t; +struct locale_monetary_t; +struct locale_numeric_t; +struct locale_time_t; +struct locale_messages_t; +struct locale_paper_t; +struct locale_name_t; +struct locale_address_t; +struct locale_telephone_t; +struct locale_measurement_t; +struct locale_identification_t; + + +/* Definitions for the locale. */ +struct localedef_t +{ + struct localedef_t *next; + + const char *name; + + int needed; + int avail; + + union + { + void *generic; + struct locale_ctype_t *ctype; + struct locale_collate_t *collate; + struct locale_monetary_t *monetary; + struct locale_numeric_t *numeric; + struct locale_time_t *time; + struct locale_messages_t *messages; + struct locale_paper_t *paper; + struct locale_name_t *name; + struct locale_address_t *address; + struct locale_telephone_t *telephone; + struct locale_measurement_t *measurement; + struct locale_identification_t *identification; + } categories[__LC_LAST]; + + size_t len[__LC_LAST]; + + const char *copy_name[__LC_LAST]; + + const char *repertoire_name; +}; + + +/* Global variables of the localedef program. */ +extern int verbose; +extern int be_quiet; +extern const char *repertoire_global; +extern int max_locarchive_open_retry; +extern bool no_archive; +extern const char *alias_file; + + +/* Prototypes for a few program-wide used functions. */ +#include <programs/xmalloc.h> + + +/* Wrapper to switch LC_CTYPE back to the locale specified in the + environment for output. */ +#define WITH_CUR_LOCALE(stmt) \ + do { \ + int saved_errno = errno; \ + const char *cur_locale_ = setlocale (LC_CTYPE, NULL); \ + setlocale (LC_CTYPE, ""); \ + errno = saved_errno; \ + stmt; \ + setlocale (LC_CTYPE, cur_locale_); \ + } while (0) + + +/* Mark given locale as to be read. */ +extern struct localedef_t *add_to_readlist (int locale, const char *name, + const char *repertoire_name, + int generate, + struct localedef_t *copy_locale); + +/* Find the information for the locale NAME. */ +extern struct localedef_t *find_locale (int locale, const char *name, + const char *repertoire_name, + const struct charmap_t *charmap); + +/* Load (if necessary) the information for the locale NAME. */ +extern struct localedef_t *load_locale (int locale, const char *name, + const char *repertoire_name, + const struct charmap_t *charmap, + struct localedef_t *copy_locale); + + +/* Open the locale archive. */ +extern void open_archive (struct locarhandle *ah, bool readonly); + +/* Close the locale archive. */ +extern void close_archive (struct locarhandle *ah); + +/* Add given locale data to the archive. */ +extern int add_locale_to_archive (struct locarhandle *ah, const char *name, + locale_data_t data, bool replace); + +/* Add content of named directories to locale archive. */ +extern int add_locales_to_archive (size_t nlist, char *list[], bool replace); + +/* Removed named locales from archive. */ +extern int delete_locales_from_archive (size_t nlist, char *list[]); + +/* List content of locale archive. If FNAME is non-null use that as + the locale archive to list, otherwise the default. */ +extern void show_archive_content (const char *fname, + int verbose) __attribute__ ((noreturn)); + +#endif /* localedef.h */ diff --git a/REORG.TODO/locale/programs/locarchive.c b/REORG.TODO/locale/programs/locarchive.c new file mode 100644 index 0000000000..f67b7b8d99 --- /dev/null +++ b/REORG.TODO/locale/programs/locarchive.c @@ -0,0 +1,1757 @@ +/* Copyright (C) 2002-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@redhat.com>, 2002. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <assert.h> +#include <dirent.h> +#include <errno.h> +#include <error.h> +#include <fcntl.h> +#include <inttypes.h> +#include <libintl.h> +#include <locale.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdio_ext.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <unistd.h> +#include <stdint.h> +#include <sys/mman.h> +#include <sys/param.h> +#include <sys/shm.h> +#include <sys/stat.h> + +#include <libc-mmap.h> +#include <libc-pointer-arith.h> +#include "../../crypt/md5.h" +#include "../localeinfo.h" +#include "../locarchive.h" +#include "localedef.h" +#include "locfile.h" + +/* Define the hash function. We define the function as static inline. + We must change the name so as not to conflict with simple-hash.h. */ +#define compute_hashval static archive_hashval +#define hashval_t uint32_t +#include "hashval.h" +#undef compute_hashval + +extern const char *output_prefix; + +#define ARCHIVE_NAME COMPLOCALEDIR "/locale-archive" + +static const char *locnames[] = + { +#define DEFINE_CATEGORY(category, category_name, items, a) \ + [category] = category_name, +#include "categories.def" +#undef DEFINE_CATEGORY + }; + + +/* Size of the initial archive header. */ +#define INITIAL_NUM_NAMES 900 +#define INITIAL_SIZE_STRINGS 7500 +#define INITIAL_NUM_LOCREC 420 +#define INITIAL_NUM_SUMS 2000 + + +/* Get and set values (possibly endian-swapped) in structures mapped + from or written directly to locale archives. */ +#define GET(FIELD) maybe_swap_uint32 (FIELD) +#define SET(FIELD, VALUE) ((FIELD) = maybe_swap_uint32 (VALUE)) +#define INC(FIELD, INCREMENT) SET (FIELD, GET (FIELD) + (INCREMENT)) + + +/* Size of the reserved address space area. */ +#define RESERVE_MMAP_SIZE 512 * 1024 * 1024 + +/* To prepare for enlargements of the mmaped area reserve some address + space. On some machines, being a file mapping rather than an anonymous + mapping affects the address selection. So do this mapping from the + actual file, even though it's only a dummy to reserve address space. */ +static void * +prepare_address_space (int fd, size_t total, size_t *reserved, int *xflags, + void **mmap_base, size_t *mmap_len) +{ + if (total < RESERVE_MMAP_SIZE) + { + void *p = mmap64 (NULL, RESERVE_MMAP_SIZE, PROT_NONE, MAP_SHARED, fd, 0); + if (p != MAP_FAILED) + { + void *aligned_p = PTR_ALIGN_UP (p, MAP_FIXED_ALIGNMENT); + size_t align_adjust = aligned_p - p; + *mmap_base = p; + *mmap_len = RESERVE_MMAP_SIZE; + assert (align_adjust < RESERVE_MMAP_SIZE); + *reserved = RESERVE_MMAP_SIZE - align_adjust; + *xflags = MAP_FIXED; + return aligned_p; + } + } + + *reserved = total; + *xflags = 0; + *mmap_base = NULL; + *mmap_len = 0; + return NULL; +} + + +static void +create_archive (const char *archivefname, struct locarhandle *ah) +{ + int fd; + char fname[strlen (archivefname) + sizeof (".XXXXXX")]; + struct locarhead head; + size_t total; + + strcpy (stpcpy (fname, archivefname), ".XXXXXX"); + + /* Create a temporary file in the correct directory. */ + fd = mkstemp (fname); + if (fd == -1) + error (EXIT_FAILURE, errno, _("cannot create temporary file: %s"), fname); + + /* Create the initial content of the archive. */ + SET (head.magic, AR_MAGIC); + SET (head.serial, 0); + SET (head.namehash_offset, sizeof (struct locarhead)); + SET (head.namehash_used, 0); + SET (head.namehash_size, next_prime (INITIAL_NUM_NAMES)); + + SET (head.string_offset, + (GET (head.namehash_offset) + + GET (head.namehash_size) * sizeof (struct namehashent))); + SET (head.string_used, 0); + SET (head.string_size, INITIAL_SIZE_STRINGS); + + SET (head.locrectab_offset, + GET (head.string_offset) + GET (head.string_size)); + SET (head.locrectab_used, 0); + SET (head.locrectab_size, INITIAL_NUM_LOCREC); + + SET (head.sumhash_offset, + (GET (head.locrectab_offset) + + GET (head.locrectab_size) * sizeof (struct locrecent))); + SET (head.sumhash_used, 0); + SET (head.sumhash_size, next_prime (INITIAL_NUM_SUMS)); + + total = (GET (head.sumhash_offset) + + GET (head.sumhash_size) * sizeof (struct sumhashent)); + + /* Write out the header and create room for the other data structures. */ + if (TEMP_FAILURE_RETRY (write (fd, &head, sizeof (head))) != sizeof (head)) + { + int errval = errno; + unlink (fname); + error (EXIT_FAILURE, errval, _("cannot initialize archive file")); + } + + if (ftruncate64 (fd, total) != 0) + { + int errval = errno; + unlink (fname); + error (EXIT_FAILURE, errval, _("cannot resize archive file")); + } + + size_t reserved, mmap_len; + int xflags; + void *mmap_base; + void *p = prepare_address_space (fd, total, &reserved, &xflags, &mmap_base, + &mmap_len); + + /* Map the header and all the administration data structures. */ + p = mmap64 (p, total, PROT_READ | PROT_WRITE, MAP_SHARED | xflags, fd, 0); + if (p == MAP_FAILED) + { + int errval = errno; + unlink (fname); + error (EXIT_FAILURE, errval, _("cannot map archive header")); + } + + /* Now try to rename it. We don't use the rename function since + this would overwrite a file which has been created in + parallel. */ + if (link (fname, archivefname) == -1) + { + int errval = errno; + + /* We cannot use the just created file. */ + close (fd); + unlink (fname); + + if (errval == EEXIST) + { + /* There is already an archive. Must have been a localedef run + which happened in parallel. Simply open this file then. */ + open_archive (ah, false); + return; + } + + error (EXIT_FAILURE, errval, _("failed to create new locale archive")); + } + + /* Remove the temporary name. */ + unlink (fname); + + /* Make the file globally readable. */ + if (fchmod (fd, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH) == -1) + { + int errval = errno; + unlink (archivefname); + error (EXIT_FAILURE, errval, + _("cannot change mode of new locale archive")); + } + + ah->fname = NULL; + ah->fd = fd; + ah->mmap_base = mmap_base; + ah->mmap_len = mmap_len; + ah->addr = p; + ah->mmaped = total; + ah->reserved = reserved; +} + + +/* This structure and qsort comparator function are used below to sort an + old archive's locrec table in order of data position in the file. */ +struct oldlocrecent +{ + unsigned int cnt; + struct locrecent *locrec; +}; + +static int +oldlocrecentcmp (const void *a, const void *b) +{ + struct locrecent *la = ((const struct oldlocrecent *) a)->locrec; + struct locrecent *lb = ((const struct oldlocrecent *) b)->locrec; + uint32_t start_a = -1, end_a = 0; + uint32_t start_b = -1, end_b = 0; + int cnt; + + for (cnt = 0; cnt < __LC_LAST; ++cnt) + if (cnt != LC_ALL) + { + if (GET (la->record[cnt].offset) < start_a) + start_a = GET (la->record[cnt].offset); + if (GET (la->record[cnt].offset) + GET (la->record[cnt].len) > end_a) + end_a = GET (la->record[cnt].offset) + GET (la->record[cnt].len); + } + assert (start_a != (uint32_t)-1); + assert (end_a != 0); + + for (cnt = 0; cnt < __LC_LAST; ++cnt) + if (cnt != LC_ALL) + { + if (GET (lb->record[cnt].offset) < start_b) + start_b = GET (lb->record[cnt].offset); + if (GET (lb->record[cnt].offset) + GET (lb->record[cnt].len) > end_b) + end_b = GET (lb->record[cnt].offset) + GET (lb->record[cnt].len); + } + assert (start_b != (uint32_t)-1); + assert (end_b != 0); + + if (start_a != start_b) + return (int)start_a - (int)start_b; + return (int)end_a - (int)end_b; +} + + +/* forward decls for below */ +static uint32_t add_locale (struct locarhandle *ah, const char *name, + locale_data_t data, bool replace); +static void add_alias (struct locarhandle *ah, const char *alias, + bool replace, const char *oldname, + uint32_t *locrec_offset_p); + + +static bool +file_data_available_p (struct locarhandle *ah, uint32_t offset, uint32_t size) +{ + if (offset < ah->mmaped && offset + size <= ah->mmaped) + return true; + + struct stat64 st; + if (fstat64 (ah->fd, &st) != 0) + return false; + + if (st.st_size > ah->reserved) + return false; + + size_t start = ALIGN_DOWN (ah->mmaped, MAP_FIXED_ALIGNMENT); + void *p = mmap64 (ah->addr + start, st.st_size - start, + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, + ah->fd, start); + if (p == MAP_FAILED) + { + ah->mmaped = start; + return false; + } + + ah->mmaped = st.st_size; + return true; +} + + +static int +compare_from_file (struct locarhandle *ah, void *p1, uint32_t offset2, + uint32_t size) +{ + void *p2 = xmalloc (size); + if (pread (ah->fd, p2, size, offset2) != size) + WITH_CUR_LOCALE (error (4, errno, + _("cannot read data from locale archive"))); + + int res = memcmp (p1, p2, size); + free (p2); + return res; +} + + +static void +enlarge_archive (struct locarhandle *ah, const struct locarhead *head) +{ + struct stat64 st; + int fd; + struct locarhead newhead; + size_t total; + unsigned int cnt, loccnt; + struct namehashent *oldnamehashtab; + struct locarhandle new_ah; + size_t prefix_len = output_prefix ? strlen (output_prefix) : 0; + char archivefname[prefix_len + sizeof (ARCHIVE_NAME)]; + char fname[prefix_len + sizeof (ARCHIVE_NAME) + sizeof (".XXXXXX") - 1]; + + if (output_prefix) + memcpy (archivefname, output_prefix, prefix_len); + strcpy (archivefname + prefix_len, ARCHIVE_NAME); + strcpy (stpcpy (fname, archivefname), ".XXXXXX"); + + /* Not all of the old file has to be mapped. Change this now this + we will have to access the whole content. */ + if (fstat64 (ah->fd, &st) != 0) + enomap: + error (EXIT_FAILURE, errno, _("cannot map locale archive file")); + + if (st.st_size < ah->reserved) + ah->addr = mmap64 (ah->addr, st.st_size, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_FIXED, ah->fd, 0); + else + { + if (ah->mmap_base) + munmap (ah->mmap_base, ah->mmap_len); + else + munmap (ah->addr, ah->reserved); + ah->addr = mmap64 (NULL, st.st_size, PROT_READ | PROT_WRITE, + MAP_SHARED, ah->fd, 0); + ah->reserved = st.st_size; + ah->mmap_base = NULL; + ah->mmap_len = 0; + head = ah->addr; + } + if (ah->addr == MAP_FAILED) + goto enomap; + ah->mmaped = st.st_size; + + /* Create a temporary file in the correct directory. */ + fd = mkstemp (fname); + if (fd == -1) + error (EXIT_FAILURE, errno, _("cannot create temporary file: %s"), fname); + + /* Copy the existing head information. */ + newhead = *head; + + /* Create the new archive header. The sizes of the various tables + should be double from what is currently used. */ + SET (newhead.namehash_size, + MAX (next_prime (2 * GET (newhead.namehash_used)), + GET (newhead.namehash_size))); + if (verbose) + printf ("name: size: %u, used: %d, new: size: %u\n", + GET (head->namehash_size), + GET (head->namehash_used), GET (newhead.namehash_size)); + + SET (newhead.string_offset, (GET (newhead.namehash_offset) + + (GET (newhead.namehash_size) + * sizeof (struct namehashent)))); + /* Keep the string table size aligned to 4 bytes, so that + all the struct { uint32_t } types following are happy. */ + SET (newhead.string_size, MAX ((2 * GET (newhead.string_used) + 3) & -4, + GET (newhead.string_size))); + + SET (newhead.locrectab_offset, + GET (newhead.string_offset) + GET (newhead.string_size)); + SET (newhead.locrectab_size, MAX (2 * GET (newhead.locrectab_used), + GET (newhead.locrectab_size))); + + SET (newhead.sumhash_offset, (GET (newhead.locrectab_offset) + + (GET (newhead.locrectab_size) + * sizeof (struct locrecent)))); + SET (newhead.sumhash_size, + MAX (next_prime (2 * GET (newhead.sumhash_used)), + GET (newhead.sumhash_size))); + + total = (GET (newhead.sumhash_offset) + + GET (newhead.sumhash_size) * sizeof (struct sumhashent)); + + /* The new file is empty now. */ + SET (newhead.namehash_used, 0); + SET (newhead.string_used, 0); + SET (newhead.locrectab_used, 0); + SET (newhead.sumhash_used, 0); + + /* Write out the header and create room for the other data structures. */ + if (TEMP_FAILURE_RETRY (write (fd, &newhead, sizeof (newhead))) + != sizeof (newhead)) + { + int errval = errno; + unlink (fname); + error (EXIT_FAILURE, errval, _("cannot initialize archive file")); + } + + if (ftruncate64 (fd, total) != 0) + { + int errval = errno; + unlink (fname); + error (EXIT_FAILURE, errval, _("cannot resize archive file")); + } + + size_t reserved, mmap_len; + int xflags; + void *mmap_base; + void *p = prepare_address_space (fd, total, &reserved, &xflags, &mmap_base, + &mmap_len); + + /* Map the header and all the administration data structures. */ + p = mmap64 (p, total, PROT_READ | PROT_WRITE, MAP_SHARED | xflags, fd, 0); + if (p == MAP_FAILED) + { + int errval = errno; + unlink (fname); + error (EXIT_FAILURE, errval, _("cannot map archive header")); + } + + /* Lock the new file. */ + if (lockf64 (fd, F_LOCK, total) != 0) + { + int errval = errno; + unlink (fname); + error (EXIT_FAILURE, errval, _("cannot lock new archive")); + } + + new_ah.mmaped = total; + new_ah.mmap_base = mmap_base; + new_ah.mmap_len = mmap_len; + new_ah.addr = p; + new_ah.fd = fd; + new_ah.reserved = reserved; + + /* Walk through the hash name hash table to find out what data is + still referenced and transfer it into the new file. */ + oldnamehashtab = (struct namehashent *) ((char *) ah->addr + + GET (head->namehash_offset)); + + /* Sort the old locrec table in order of data position. */ + struct oldlocrecent oldlocrecarray[GET (head->namehash_size)]; + for (cnt = 0, loccnt = 0; cnt < GET (head->namehash_size); ++cnt) + if (GET (oldnamehashtab[cnt].locrec_offset) != 0) + { + oldlocrecarray[loccnt].cnt = cnt; + oldlocrecarray[loccnt++].locrec + = (struct locrecent *) ((char *) ah->addr + + GET (oldnamehashtab[cnt].locrec_offset)); + } + qsort (oldlocrecarray, loccnt, sizeof (struct oldlocrecent), + oldlocrecentcmp); + + uint32_t last_locrec_offset = 0; + for (cnt = 0; cnt < loccnt; ++cnt) + { + /* Insert this entry in the new hash table. */ + locale_data_t old_data; + unsigned int idx; + struct locrecent *oldlocrec = oldlocrecarray[cnt].locrec; + + for (idx = 0; idx < __LC_LAST; ++idx) + if (idx != LC_ALL) + { + old_data[idx].size = GET (oldlocrec->record[idx].len); + old_data[idx].addr + = ((char *) ah->addr + GET (oldlocrec->record[idx].offset)); + + __md5_buffer (old_data[idx].addr, old_data[idx].size, + old_data[idx].sum); + } + + if (cnt > 0 && oldlocrecarray[cnt - 1].locrec == oldlocrec) + { + const char *oldname + = ((char *) ah->addr + + GET (oldnamehashtab[oldlocrecarray[cnt + - 1].cnt].name_offset)); + + add_alias + (&new_ah, + ((char *) ah->addr + + GET (oldnamehashtab[oldlocrecarray[cnt].cnt].name_offset)), + 0, oldname, &last_locrec_offset); + continue; + } + + last_locrec_offset = + add_locale + (&new_ah, + ((char *) ah->addr + + GET (oldnamehashtab[oldlocrecarray[cnt].cnt].name_offset)), + old_data, 0); + if (last_locrec_offset == 0) + error (EXIT_FAILURE, 0, _("cannot extend locale archive file")); + } + + /* Make the file globally readable. */ + if (fchmod (fd, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH) == -1) + { + int errval = errno; + unlink (fname); + error (EXIT_FAILURE, errval, + _("cannot change mode of resized locale archive")); + } + + /* Rename the new file. */ + if (rename (fname, archivefname) != 0) + { + int errval = errno; + unlink (fname); + error (EXIT_FAILURE, errval, _("cannot rename new archive")); + } + + /* Close the old file. */ + close_archive (ah); + + /* Add the information for the new one. */ + *ah = new_ah; +} + + +void +open_archive (struct locarhandle *ah, bool readonly) +{ + struct stat64 st; + struct stat64 st2; + int fd; + struct locarhead head; + int retry = 0; + size_t prefix_len = output_prefix ? strlen (output_prefix) : 0; + char default_fname[prefix_len + sizeof (ARCHIVE_NAME)]; + const char *archivefname = ah->fname; + + /* If ah has a non-NULL fname open that otherwise open the default. */ + if (archivefname == NULL) + { + archivefname = default_fname; + if (output_prefix) + memcpy (default_fname, output_prefix, prefix_len); + strcpy (default_fname + prefix_len, ARCHIVE_NAME); + } + + while (1) + { + /* Open the archive. We must have exclusive write access. */ + fd = open64 (archivefname, readonly ? O_RDONLY : O_RDWR); + if (fd == -1) + { + /* Maybe the file does not yet exist? If we are opening + the default locale archive we ignore the failure and + list an empty archive, otherwise we print an error + and exit. */ + if (errno == ENOENT && archivefname == default_fname) + { + if (readonly) + { + static const struct locarhead nullhead = + { + .namehash_used = 0, + .namehash_offset = 0, + .namehash_size = 0 + }; + + ah->addr = (void *) &nullhead; + ah->fd = -1; + } + else + create_archive (archivefname, ah); + + return; + } + else + error (EXIT_FAILURE, errno, _("cannot open locale archive \"%s\""), + archivefname); + } + + if (fstat64 (fd, &st) < 0) + error (EXIT_FAILURE, errno, _("cannot stat locale archive \"%s\""), + archivefname); + + if (!readonly && lockf64 (fd, F_LOCK, sizeof (struct locarhead)) == -1) + { + close (fd); + + if (retry++ < max_locarchive_open_retry) + { + struct timespec req; + + /* Wait for a bit. */ + req.tv_sec = 0; + req.tv_nsec = 1000000 * (random () % 500 + 1); + (void) nanosleep (&req, NULL); + + continue; + } + + error (EXIT_FAILURE, errno, _("cannot lock locale archive \"%s\""), + archivefname); + } + + /* One more check. Maybe another process replaced the archive file + with a new, larger one since we opened the file. */ + if (stat64 (archivefname, &st2) == -1 + || st.st_dev != st2.st_dev + || st.st_ino != st2.st_ino) + { + (void) lockf64 (fd, F_ULOCK, sizeof (struct locarhead)); + close (fd); + continue; + } + + /* Leave the loop. */ + break; + } + + /* Read the header. */ + if (TEMP_FAILURE_RETRY (read (fd, &head, sizeof (head))) != sizeof (head)) + { + (void) lockf64 (fd, F_ULOCK, sizeof (struct locarhead)); + error (EXIT_FAILURE, errno, _("cannot read archive header")); + } + + ah->fd = fd; + ah->mmaped = st.st_size; + + size_t reserved, mmap_len; + int xflags; + void *mmap_base; + void *p = prepare_address_space (fd, st.st_size, &reserved, &xflags, + &mmap_base, &mmap_len); + + /* Map the entire file. We might need to compare the category data + in the file with the newly added data. */ + ah->addr = mmap64 (p, st.st_size, PROT_READ | (readonly ? 0 : PROT_WRITE), + MAP_SHARED | xflags, fd, 0); + if (ah->addr == MAP_FAILED) + { + (void) lockf64 (fd, F_ULOCK, sizeof (struct locarhead)); + error (EXIT_FAILURE, errno, _("cannot map archive header")); + } + ah->reserved = reserved; + ah->mmap_base = mmap_base; + ah->mmap_len = mmap_len; +} + + +void +close_archive (struct locarhandle *ah) +{ + if (ah->fd != -1) + { + if (ah->mmap_base) + munmap (ah->mmap_base, ah->mmap_len); + else + munmap (ah->addr, ah->reserved); + close (ah->fd); + } +} + +#include "../../intl/explodename.c" +#include "../../intl/l10nflist.c" + +static struct namehashent * +insert_name (struct locarhandle *ah, + const char *name, size_t name_len, bool replace) +{ + const struct locarhead *const head = ah->addr; + struct namehashent *namehashtab + = (struct namehashent *) ((char *) ah->addr + + GET (head->namehash_offset)); + unsigned int insert_idx, idx, incr; + + /* Hash value of the locale name. */ + uint32_t hval = archive_hashval (name, name_len); + + insert_idx = -1; + idx = hval % GET (head->namehash_size); + incr = 1 + hval % (GET (head->namehash_size) - 2); + + /* If the name_offset field is zero this means this is a + deleted entry and therefore no entry can be found. */ + while (GET (namehashtab[idx].name_offset) != 0) + { + if (GET (namehashtab[idx].hashval) == hval + && (strcmp (name, + (char *) ah->addr + GET (namehashtab[idx].name_offset)) + == 0)) + { + /* Found the entry. */ + if (GET (namehashtab[idx].locrec_offset) != 0 && ! replace) + { + if (! be_quiet) + error (0, 0, _("locale '%s' already exists"), name); + return NULL; + } + + break; + } + + if (GET (namehashtab[idx].hashval) == hval && ! be_quiet) + { + error (0, 0, "hash collision (%u) %s, %s", + hval, name, + (char *) ah->addr + GET (namehashtab[idx].name_offset)); + } + + /* Remember the first place we can insert the new entry. */ + if (GET (namehashtab[idx].locrec_offset) == 0 && insert_idx == -1) + insert_idx = idx; + + idx += incr; + if (idx >= GET (head->namehash_size)) + idx -= GET (head->namehash_size); + } + + /* Add as early as possible. */ + if (insert_idx != -1) + idx = insert_idx; + + SET (namehashtab[idx].hashval, hval); /* no-op if replacing an old entry. */ + return &namehashtab[idx]; +} + +static void +add_alias (struct locarhandle *ah, const char *alias, bool replace, + const char *oldname, uint32_t *locrec_offset_p) +{ + uint32_t locrec_offset = *locrec_offset_p; + struct locarhead *head = ah->addr; + const size_t name_len = strlen (alias); + struct namehashent *namehashent = insert_name (ah, alias, strlen (alias), + replace); + if (namehashent == NULL && ! replace) + return; + + if (GET (namehashent->name_offset) == 0) + { + /* We are adding a new hash entry for this alias. + Determine whether we have to resize the file. */ + if (GET (head->string_used) + name_len + 1 > GET (head->string_size) + || (100 * GET (head->namehash_used) + > 75 * GET (head->namehash_size))) + { + /* The current archive is not large enough. */ + enlarge_archive (ah, head); + + /* The locrecent might have moved, so we have to look up + the old name afresh. */ + namehashent = insert_name (ah, oldname, strlen (oldname), true); + assert (GET (namehashent->name_offset) != 0); + assert (GET (namehashent->locrec_offset) != 0); + *locrec_offset_p = GET (namehashent->locrec_offset); + + /* Tail call to try the whole thing again. */ + add_alias (ah, alias, replace, oldname, locrec_offset_p); + return; + } + + /* Add the name string. */ + memcpy (ah->addr + GET (head->string_offset) + GET (head->string_used), + alias, name_len + 1); + SET (namehashent->name_offset, + GET (head->string_offset) + GET (head->string_used)); + INC (head->string_used, name_len + 1); + + INC (head->namehash_used, 1); + } + + if (GET (namehashent->locrec_offset) != 0) + { + /* Replacing an existing entry. + Mark that we are no longer using the old locrecent. */ + struct locrecent *locrecent + = (struct locrecent *) ((char *) ah->addr + + GET (namehashent->locrec_offset)); + INC (locrecent->refs, -1); + } + + /* Point this entry at the locrecent installed for the main name. */ + SET (namehashent->locrec_offset, locrec_offset); +} + +static int /* qsort comparator used below */ +cmpcategorysize (const void *a, const void *b) +{ + if (*(const void **) a == NULL) + return 1; + if (*(const void **) b == NULL) + return -1; + return ((*(const struct locale_category_data **) a)->size + - (*(const struct locale_category_data **) b)->size); +} + +/* Check the content of the archive for duplicates. Add the content + of the files if necessary. Returns the locrec_offset. */ +static uint32_t +add_locale (struct locarhandle *ah, + const char *name, locale_data_t data, bool replace) +{ + /* First look for the name. If it already exists and we are not + supposed to replace it don't do anything. If it does not exist + we have to allocate a new locale record. */ + size_t name_len = strlen (name); + uint32_t file_offsets[__LC_LAST]; + unsigned int num_new_offsets = 0; + struct sumhashent *sumhashtab; + uint32_t hval; + unsigned int cnt, idx; + struct locarhead *head; + struct namehashent *namehashent; + unsigned int incr; + struct locrecent *locrecent; + off64_t lastoffset; + char *ptr; + struct locale_category_data *size_order[__LC_LAST]; + /* Page size alignment is a minor optimization for locality; use a + common value here rather than making the localedef output depend + on the page size of the system on which localedef is run. See + <https://sourceware.org/glibc/wiki/Development_Todo/Master#Locale_archive_alignment> + for more discussion. */ + const size_t pagesz = 4096; + int small_mask; + + head = ah->addr; + sumhashtab = (struct sumhashent *) ((char *) ah->addr + + GET (head->sumhash_offset)); + + memset (file_offsets, 0, sizeof (file_offsets)); + + size_order[LC_ALL] = NULL; + for (cnt = 0; cnt < __LC_LAST; ++cnt) + if (cnt != LC_ALL) + size_order[cnt] = &data[cnt]; + + /* Sort the array in ascending order of data size. */ + qsort (size_order, __LC_LAST, sizeof size_order[0], cmpcategorysize); + + small_mask = 0; + data[LC_ALL].size = 0; + for (cnt = 0; cnt < __LC_LAST; ++cnt) + if (size_order[cnt] != NULL) + { + const size_t rounded_size = (size_order[cnt]->size + 15) & -16; + if (data[LC_ALL].size + rounded_size > 2 * pagesz) + { + /* This category makes the small-categories block + stop being small, so this is the end of the road. */ + do + size_order[cnt++] = NULL; + while (cnt < __LC_LAST); + break; + } + data[LC_ALL].size += rounded_size; + small_mask |= 1 << (size_order[cnt] - data); + } + + /* Copy the data for all the small categories into the LC_ALL + pseudo-category. */ + + data[LC_ALL].addr = alloca (data[LC_ALL].size); + memset (data[LC_ALL].addr, 0, data[LC_ALL].size); + + ptr = data[LC_ALL].addr; + for (cnt = 0; cnt < __LC_LAST; ++cnt) + if (small_mask & (1 << cnt)) + { + memcpy (ptr, data[cnt].addr, data[cnt].size); + ptr += (data[cnt].size + 15) & -16; + } + __md5_buffer (data[LC_ALL].addr, data[LC_ALL].size, data[LC_ALL].sum); + + /* For each locale category data set determine whether the same data + is already somewhere in the archive. */ + for (cnt = 0; cnt < __LC_LAST; ++cnt) + if (small_mask == 0 ? cnt != LC_ALL : !(small_mask & (1 << cnt))) + { + ++num_new_offsets; + + /* Compute the hash value of the checksum to determine a + starting point for the search in the MD5 hash value + table. */ + hval = archive_hashval (data[cnt].sum, 16); + + idx = hval % GET (head->sumhash_size); + incr = 1 + hval % (GET (head->sumhash_size) - 2); + + while (GET (sumhashtab[idx].file_offset) != 0) + { + if (memcmp (data[cnt].sum, sumhashtab[idx].sum, 16) == 0) + { + /* Check the content, there could be a collision of + the hash sum. + + Unfortunately the sumhashent record does not include + the size of the stored data. So we have to search for + it. */ + locrecent + = (struct locrecent *) ((char *) ah->addr + + GET (head->locrectab_offset)); + size_t iloc; + for (iloc = 0; iloc < GET (head->locrectab_used); ++iloc) + if (GET (locrecent[iloc].refs) != 0 + && (GET (locrecent[iloc].record[cnt].offset) + == GET (sumhashtab[idx].file_offset))) + break; + + if (iloc != GET (head->locrectab_used) + && data[cnt].size == GET (locrecent[iloc].record[cnt].len) + /* We have to compare the content. Either we can + have the data mmaped or we have to read from + the file. */ + && (file_data_available_p + (ah, GET (sumhashtab[idx].file_offset), + data[cnt].size) + ? memcmp (data[cnt].addr, + (char *) ah->addr + + GET (sumhashtab[idx].file_offset), + data[cnt].size) == 0 + : compare_from_file (ah, data[cnt].addr, + GET (sumhashtab[idx].file_offset), + data[cnt].size) == 0)) + { + /* Found it. */ + file_offsets[cnt] = GET (sumhashtab[idx].file_offset); + --num_new_offsets; + break; + } + } + + idx += incr; + if (idx >= GET (head->sumhash_size)) + idx -= GET (head->sumhash_size); + } + } + + /* Find a slot for the locale name in the hash table. */ + namehashent = insert_name (ah, name, name_len, replace); + if (namehashent == NULL) /* Already exists and !REPLACE. */ + return 0; + + /* Determine whether we have to resize the file. */ + if ((100 * (GET (head->sumhash_used) + num_new_offsets) + > 75 * GET (head->sumhash_size)) + || (GET (namehashent->locrec_offset) == 0 + && (GET (head->locrectab_used) == GET (head->locrectab_size) + || (GET (head->string_used) + name_len + 1 + > GET (head->string_size)) + || (100 * GET (head->namehash_used) + > 75 * GET (head->namehash_size))))) + { + /* The current archive is not large enough. */ + enlarge_archive (ah, head); + return add_locale (ah, name, data, replace); + } + + /* Add the locale data which is not yet in the archive. */ + for (cnt = 0, lastoffset = 0; cnt < __LC_LAST; ++cnt) + if ((small_mask == 0 ? cnt != LC_ALL : !(small_mask & (1 << cnt))) + && file_offsets[cnt] == 0) + { + /* The data for this section is not yet available in the + archive. Append it. */ + off64_t lastpos; + uint32_t md5hval; + + lastpos = lseek64 (ah->fd, 0, SEEK_END); + if (lastpos == (off64_t) -1) + error (EXIT_FAILURE, errno, _("cannot add to locale archive")); + + /* If block of small categories would cross page boundary, + align it unless it immediately follows a large category. */ + if (cnt == LC_ALL && lastoffset != lastpos + && ((((lastpos & (pagesz - 1)) + data[cnt].size + pagesz - 1) + & -pagesz) + > ((data[cnt].size + pagesz - 1) & -pagesz))) + { + size_t sz = pagesz - (lastpos & (pagesz - 1)); + char *zeros = alloca (sz); + + memset (zeros, 0, sz); + if (TEMP_FAILURE_RETRY (write (ah->fd, zeros, sz) != sz)) + error (EXIT_FAILURE, errno, + _("cannot add to locale archive")); + + lastpos += sz; + } + + /* Align all data to a 16 byte boundary. */ + if ((lastpos & 15) != 0) + { + static const char zeros[15] = { 0, }; + + if (TEMP_FAILURE_RETRY (write (ah->fd, zeros, 16 - (lastpos & 15))) + != 16 - (lastpos & 15)) + error (EXIT_FAILURE, errno, _("cannot add to locale archive")); + + lastpos += 16 - (lastpos & 15); + } + + /* Remember the position. */ + file_offsets[cnt] = lastpos; + lastoffset = lastpos + data[cnt].size; + + /* Write the data. */ + if (TEMP_FAILURE_RETRY (write (ah->fd, data[cnt].addr, data[cnt].size)) + != data[cnt].size) + error (EXIT_FAILURE, errno, _("cannot add to locale archive")); + + /* Add the hash value to the hash table. */ + md5hval = archive_hashval (data[cnt].sum, 16); + + idx = md5hval % GET (head->sumhash_size); + incr = 1 + md5hval % (GET (head->sumhash_size) - 2); + + while (GET (sumhashtab[idx].file_offset) != 0) + { + idx += incr; + if (idx >= GET (head->sumhash_size)) + idx -= GET (head->sumhash_size); + } + + memcpy (sumhashtab[idx].sum, data[cnt].sum, 16); + SET (sumhashtab[idx].file_offset, file_offsets[cnt]); + + INC (head->sumhash_used, 1); + } + + lastoffset = file_offsets[LC_ALL]; + for (cnt = 0; cnt < __LC_LAST; ++cnt) + if (small_mask & (1 << cnt)) + { + file_offsets[cnt] = lastoffset; + lastoffset += (data[cnt].size + 15) & -16; + } + + if (GET (namehashent->name_offset) == 0) + { + /* Add the name string. */ + memcpy ((char *) ah->addr + GET (head->string_offset) + + GET (head->string_used), + name, name_len + 1); + SET (namehashent->name_offset, + GET (head->string_offset) + GET (head->string_used)); + INC (head->string_used, name_len + 1); + INC (head->namehash_used, 1); + } + + if (GET (namehashent->locrec_offset == 0)) + { + /* Allocate a name location record. */ + SET (namehashent->locrec_offset, (GET (head->locrectab_offset) + + (GET (head->locrectab_used) + * sizeof (struct locrecent)))); + INC (head->locrectab_used, 1); + locrecent = (struct locrecent *) ((char *) ah->addr + + GET (namehashent->locrec_offset)); + SET (locrecent->refs, 1); + } + else + { + /* If there are other aliases pointing to this locrecent, + we still need a new one. If not, reuse the old one. */ + + locrecent = (struct locrecent *) ((char *) ah->addr + + GET (namehashent->locrec_offset)); + if (GET (locrecent->refs) > 1) + { + INC (locrecent->refs, -1); + SET (namehashent->locrec_offset, (GET (head->locrectab_offset) + + (GET (head->locrectab_used) + * sizeof (struct locrecent)))); + INC (head->locrectab_used, 1); + locrecent + = (struct locrecent *) ((char *) ah->addr + + GET (namehashent->locrec_offset)); + SET (locrecent->refs, 1); + } + } + + /* Fill in the table with the locations of the locale data. */ + for (cnt = 0; cnt < __LC_LAST; ++cnt) + { + SET (locrecent->record[cnt].offset, file_offsets[cnt]); + SET (locrecent->record[cnt].len, data[cnt].size); + } + + return GET (namehashent->locrec_offset); +} + + +/* Check the content of the archive for duplicates. Add the content + of the files if necessary. Add all the names, possibly overwriting + old files. */ +int +add_locale_to_archive (struct locarhandle *ah, const char *name, + locale_data_t data, bool replace) +{ + char *normalized_name = NULL; + uint32_t locrec_offset; + + /* First analyze the name to decide how to archive it. */ + const char *language; + const char *modifier; + const char *territory; + const char *codeset; + const char *normalized_codeset; + int mask = _nl_explode_name (strdupa (name), + &language, &modifier, &territory, + &codeset, &normalized_codeset); + if (mask == -1) + return -1; + + if (mask & XPG_NORM_CODESET) + /* This name contains a codeset in unnormalized form. + We will store it in the archive with a normalized name. */ + asprintf (&normalized_name, "%s%s%s.%s%s%s", + language, territory == NULL ? "" : "_", territory ?: "", + (mask & XPG_NORM_CODESET) ? normalized_codeset : codeset, + modifier == NULL ? "" : "@", modifier ?: ""); + + /* This call does the main work. */ + locrec_offset = add_locale (ah, normalized_name ?: name, data, replace); + if (locrec_offset == 0) + { + free (normalized_name); + if (mask & XPG_NORM_CODESET) + free ((char *) normalized_codeset); + return -1; + } + + if ((mask & XPG_CODESET) == 0) + { + /* This name lacks a codeset, so determine the locale's codeset and + add an alias for its name with normalized codeset appended. */ + + const struct + { + unsigned int magic; + unsigned int nstrings; + unsigned int strindex[0]; + } *filedata = data[LC_CTYPE].addr; + codeset = (char *) filedata + + maybe_swap_uint32 (filedata->strindex[_NL_ITEM_INDEX + (_NL_CTYPE_CODESET_NAME)]); + char *normalized_codeset_name = NULL; + + normalized_codeset = _nl_normalize_codeset (codeset, strlen (codeset)); + mask |= XPG_NORM_CODESET; + + asprintf (&normalized_codeset_name, "%s%s%s.%s%s%s", + language, territory == NULL ? "" : "_", territory ?: "", + normalized_codeset, + modifier == NULL ? "" : "@", modifier ?: ""); + + add_alias (ah, normalized_codeset_name, replace, + normalized_name ?: name, &locrec_offset); + free (normalized_codeset_name); + } + + /* Now read the locale.alias files looking for lines whose + right hand side matches our name after normalization. */ + int result = 0; + if (alias_file != NULL) + { + FILE *fp; + fp = fopen (alias_file, "rm"); + if (fp == NULL) + error (1, errno, _("locale alias file `%s' not found"), + alias_file); + + /* No threads present. */ + __fsetlocking (fp, FSETLOCKING_BYCALLER); + + while (! feof_unlocked (fp)) + { + /* It is a reasonable approach to use a fix buffer here + because + a) we are only interested in the first two fields + b) these fields must be usable as file names and so must + not be that long */ + char buf[BUFSIZ]; + char *alias; + char *value; + char *cp; + + if (fgets_unlocked (buf, BUFSIZ, fp) == NULL) + /* EOF reached. */ + break; + + cp = buf; + /* Ignore leading white space. */ + while (isspace (cp[0]) && cp[0] != '\n') + ++cp; + + /* A leading '#' signals a comment line. */ + if (cp[0] != '\0' && cp[0] != '#' && cp[0] != '\n') + { + alias = cp++; + while (cp[0] != '\0' && !isspace (cp[0])) + ++cp; + /* Terminate alias name. */ + if (cp[0] != '\0') + *cp++ = '\0'; + + /* Now look for the beginning of the value. */ + while (isspace (cp[0])) + ++cp; + + if (cp[0] != '\0') + { + value = cp++; + while (cp[0] != '\0' && !isspace (cp[0])) + ++cp; + /* Terminate value. */ + if (cp[0] == '\n') + { + /* This has to be done to make the following + test for the end of line possible. We are + looking for the terminating '\n' which do not + overwrite here. */ + *cp++ = '\0'; + *cp = '\n'; + } + else if (cp[0] != '\0') + *cp++ = '\0'; + + /* Does this alias refer to our locale? We will + normalize the right hand side and compare the + elements of the normalized form. */ + { + const char *rhs_language; + const char *rhs_modifier; + const char *rhs_territory; + const char *rhs_codeset; + const char *rhs_normalized_codeset; + int rhs_mask = _nl_explode_name (value, + &rhs_language, + &rhs_modifier, + &rhs_territory, + &rhs_codeset, + &rhs_normalized_codeset); + if (rhs_mask == -1) + { + result = -1; + goto out; + } + if (!strcmp (language, rhs_language) + && ((rhs_mask & XPG_CODESET) + /* He has a codeset, it must match normalized. */ + ? !strcmp ((mask & XPG_NORM_CODESET) + ? normalized_codeset : codeset, + (rhs_mask & XPG_NORM_CODESET) + ? rhs_normalized_codeset : rhs_codeset) + /* He has no codeset, we must also have none. */ + : (mask & XPG_CODESET) == 0) + /* Codeset (or lack thereof) matches. */ + && !strcmp (territory ?: "", rhs_territory ?: "") + && !strcmp (modifier ?: "", rhs_modifier ?: "")) + /* We have a winner. */ + add_alias (ah, alias, replace, + normalized_name ?: name, &locrec_offset); + if (rhs_mask & XPG_NORM_CODESET) + free ((char *) rhs_normalized_codeset); + } + } + } + + /* Possibly not the whole line fits into the buffer. + Ignore the rest of the line. */ + while (strchr (cp, '\n') == NULL) + { + cp = buf; + if (fgets_unlocked (buf, BUFSIZ, fp) == NULL) + /* Make sure the inner loop will be left. The outer + loop will exit at the `feof' test. */ + *cp = '\n'; + } + } + + out: + fclose (fp); + } + + free (normalized_name); + + if (mask & XPG_NORM_CODESET) + free ((char *) normalized_codeset); + + return result; +} + + +int +add_locales_to_archive (size_t nlist, char *list[], bool replace) +{ + struct locarhandle ah; + int result = 0; + + /* Open the archive. This call never returns if we cannot + successfully open the archive. */ + ah.fname = NULL; + open_archive (&ah, false); + + while (nlist-- > 0) + { + const char *fname = *list++; + size_t fnamelen = strlen (fname); + struct stat64 st; + DIR *dirp; + struct dirent64 *d; + int seen; + locale_data_t data; + int cnt; + + if (! be_quiet) + printf (_("Adding %s\n"), fname); + + /* First see whether this really is a directory and whether it + contains all the require locale category files. */ + if (stat64 (fname, &st) < 0) + { + error (0, 0, _("stat of \"%s\" failed: %s: ignored"), fname, + strerror (errno)); + continue; + } + if (!S_ISDIR (st.st_mode)) + { + error (0, 0, _("\"%s\" is no directory; ignored"), fname); + continue; + } + + dirp = opendir (fname); + if (dirp == NULL) + { + error (0, 0, _("cannot open directory \"%s\": %s: ignored"), + fname, strerror (errno)); + continue; + } + + seen = 0; + while ((d = readdir64 (dirp)) != NULL) + { + for (cnt = 0; cnt < __LC_LAST; ++cnt) + if (cnt != LC_ALL) + if (strcmp (d->d_name, locnames[cnt]) == 0) + { + unsigned char d_type; + + /* We have an object of the required name. If it's + a directory we have to look at a file with the + prefix "SYS_". Otherwise we have found what we + are looking for. */ +#ifdef _DIRENT_HAVE_D_TYPE + d_type = d->d_type; + + if (d_type != DT_REG) +#endif + { + char fullname[fnamelen + 2 * strlen (d->d_name) + 7]; + +#ifdef _DIRENT_HAVE_D_TYPE + if (d_type == DT_UNKNOWN) +#endif + { + strcpy (stpcpy (stpcpy (fullname, fname), "/"), + d->d_name); + + if (stat64 (fullname, &st) == -1) + /* We cannot stat the file, ignore it. */ + break; + + d_type = IFTODT (st.st_mode); + } + + if (d_type == DT_DIR) + { + /* We have to do more tests. The file is a + directory and it therefore must contain a + regular file with the same name except a + "SYS_" prefix. */ + char *t = stpcpy (stpcpy (fullname, fname), "/"); + strcpy (stpcpy (stpcpy (t, d->d_name), "/SYS_"), + d->d_name); + + if (stat64 (fullname, &st) == -1) + /* There is no SYS_* file or we cannot + access it. */ + break; + + d_type = IFTODT (st.st_mode); + } + } + + /* If we found a regular file (eventually after + following a symlink) we are successful. */ + if (d_type == DT_REG) + ++seen; + break; + } + } + + closedir (dirp); + + if (seen != __LC_LAST - 1) + { + /* We don't have all locale category files. Ignore the name. */ + error (0, 0, _("incomplete set of locale files in \"%s\""), + fname); + continue; + } + + /* Add the files to the archive. To do this we first compute + sizes and the MD5 sums of all the files. */ + for (cnt = 0; cnt < __LC_LAST; ++cnt) + if (cnt != LC_ALL) + { + char fullname[fnamelen + 2 * strlen (locnames[cnt]) + 7]; + int fd; + + strcpy (stpcpy (stpcpy (fullname, fname), "/"), locnames[cnt]); + fd = open64 (fullname, O_RDONLY); + if (fd == -1 || fstat64 (fd, &st) == -1) + { + /* Cannot read the file. */ + if (fd != -1) + close (fd); + break; + } + + if (S_ISDIR (st.st_mode)) + { + char *t; + close (fd); + t = stpcpy (stpcpy (fullname, fname), "/"); + strcpy (stpcpy (stpcpy (t, locnames[cnt]), "/SYS_"), + locnames[cnt]); + + fd = open64 (fullname, O_RDONLY); + if (fd == -1 || fstat64 (fd, &st) == -1 + || !S_ISREG (st.st_mode)) + { + if (fd != -1) + close (fd); + break; + } + } + + /* Map the file. */ + data[cnt].addr = mmap64 (NULL, st.st_size, PROT_READ, MAP_SHARED, + fd, 0); + if (data[cnt].addr == MAP_FAILED) + { + /* Cannot map it. */ + close (fd); + break; + } + + data[cnt].size = st.st_size; + __md5_buffer (data[cnt].addr, st.st_size, data[cnt].sum); + + /* We don't need the file descriptor anymore. */ + close (fd); + } + + if (cnt != __LC_LAST) + { + while (cnt-- > 0) + if (cnt != LC_ALL) + munmap (data[cnt].addr, data[cnt].size); + + error (0, 0, _("cannot read all files in \"%s\": ignored"), fname); + + continue; + } + + result |= add_locale_to_archive (&ah, basename (fname), data, replace); + + for (cnt = 0; cnt < __LC_LAST; ++cnt) + if (cnt != LC_ALL) + munmap (data[cnt].addr, data[cnt].size); + } + + /* We are done. */ + close_archive (&ah); + + return result; +} + + +int +delete_locales_from_archive (size_t nlist, char *list[]) +{ + struct locarhandle ah; + struct locarhead *head; + struct namehashent *namehashtab; + + /* Open the archive. This call never returns if we cannot + successfully open the archive. */ + ah.fname = NULL; + open_archive (&ah, false); + + head = ah.addr; + namehashtab = (struct namehashent *) ((char *) ah.addr + + GET (head->namehash_offset)); + + while (nlist-- > 0) + { + const char *locname = *list++; + uint32_t hval; + unsigned int idx; + unsigned int incr; + + /* Search for this locale in the archive. */ + hval = archive_hashval (locname, strlen (locname)); + + idx = hval % GET (head->namehash_size); + incr = 1 + hval % (GET (head->namehash_size) - 2); + + /* If the name_offset field is zero this means this is no + deleted entry and therefore no entry can be found. */ + while (GET (namehashtab[idx].name_offset) != 0) + { + if (GET (namehashtab[idx].hashval) == hval + && (strcmp (locname, + ((char *) ah.addr + + GET (namehashtab[idx].name_offset))) + == 0)) + { + /* Found the entry. Now mark it as removed by zero-ing + the reference to the locale record. */ + SET (namehashtab[idx].locrec_offset, 0); + break; + } + + idx += incr; + if (idx >= GET (head->namehash_size)) + idx -= GET (head->namehash_size); + } + + if (GET (namehashtab[idx].name_offset) == 0 && ! be_quiet) + error (0, 0, _("locale \"%s\" not in archive"), locname); + } + + close_archive (&ah); + + return 0; +} + + +struct nameent +{ + char *name; + uint32_t locrec_offset; +}; + + +struct dataent +{ + const unsigned char *sum; + uint32_t file_offset; + uint32_t nlink; +}; + + +static int +nameentcmp (const void *a, const void *b) +{ + return strcmp (((const struct nameent *) a)->name, + ((const struct nameent *) b)->name); +} + + +static int +dataentcmp (const void *a, const void *b) +{ + if (((const struct dataent *) a)->file_offset + < ((const struct dataent *) b)->file_offset) + return -1; + + if (((const struct dataent *) a)->file_offset + > ((const struct dataent *) b)->file_offset) + return 1; + + return 0; +} + + +void +show_archive_content (const char *fname, int verbose) +{ + struct locarhandle ah; + struct locarhead *head; + struct namehashent *namehashtab; + struct nameent *names; + size_t cnt, used; + + /* Open the archive. This call never returns if we cannot + successfully open the archive. */ + ah.fname = fname; + open_archive (&ah, true); + + head = ah.addr; + + names = (struct nameent *) xmalloc (GET (head->namehash_used) + * sizeof (struct nameent)); + + namehashtab = (struct namehashent *) ((char *) ah.addr + + GET (head->namehash_offset)); + for (cnt = used = 0; cnt < GET (head->namehash_size); ++cnt) + if (GET (namehashtab[cnt].locrec_offset) != 0) + { + assert (used < GET (head->namehash_used)); + names[used].name = ah.addr + GET (namehashtab[cnt].name_offset); + names[used++].locrec_offset = GET (namehashtab[cnt].locrec_offset); + } + + /* Sort the names. */ + qsort (names, used, sizeof (struct nameent), nameentcmp); + + if (verbose) + { + struct dataent *files; + struct sumhashent *sumhashtab; + int sumused; + + files = (struct dataent *) xmalloc (GET (head->sumhash_used) + * sizeof (struct dataent)); + + sumhashtab = (struct sumhashent *) ((char *) ah.addr + + GET (head->sumhash_offset)); + for (cnt = sumused = 0; cnt < GET (head->sumhash_size); ++cnt) + if (GET (sumhashtab[cnt].file_offset) != 0) + { + assert (sumused < GET (head->sumhash_used)); + files[sumused].sum = (const unsigned char *) sumhashtab[cnt].sum; + files[sumused].file_offset = GET (sumhashtab[cnt].file_offset); + files[sumused++].nlink = 0; + } + + /* Sort by file locations. */ + qsort (files, sumused, sizeof (struct dataent), dataentcmp); + + /* Compute nlink fields. */ + for (cnt = 0; cnt < used; ++cnt) + { + struct locrecent *locrec; + int idx; + + locrec = (struct locrecent *) ((char *) ah.addr + + names[cnt].locrec_offset); + for (idx = 0; idx < __LC_LAST; ++idx) + if (GET (locrec->record[LC_ALL].offset) != 0 + ? (idx == LC_ALL + || (GET (locrec->record[idx].offset) + < GET (locrec->record[LC_ALL].offset)) + || ((GET (locrec->record[idx].offset) + + GET (locrec->record[idx].len)) + > (GET (locrec->record[LC_ALL].offset) + + GET (locrec->record[LC_ALL].len)))) + : idx != LC_ALL) + { + struct dataent *data, dataent; + + dataent.file_offset = GET (locrec->record[idx].offset); + data = (struct dataent *) bsearch (&dataent, files, sumused, + sizeof (struct dataent), + dataentcmp); + assert (data != NULL); + ++data->nlink; + } + } + + /* Print it. */ + for (cnt = 0; cnt < used; ++cnt) + { + struct locrecent *locrec; + int idx, i; + + locrec = (struct locrecent *) ((char *) ah.addr + + names[cnt].locrec_offset); + for (idx = 0; idx < __LC_LAST; ++idx) + if (idx != LC_ALL) + { + struct dataent *data, dataent; + + dataent.file_offset = GET (locrec->record[idx].offset); + if (GET (locrec->record[LC_ALL].offset) != 0 + && (dataent.file_offset + >= GET (locrec->record[LC_ALL].offset)) + && (dataent.file_offset + GET (locrec->record[idx].len) + <= (GET (locrec->record[LC_ALL].offset) + + GET (locrec->record[LC_ALL].len)))) + dataent.file_offset = GET (locrec->record[LC_ALL].offset); + + data = (struct dataent *) bsearch (&dataent, files, sumused, + sizeof (struct dataent), + dataentcmp); + printf ("%6d %7x %3d%c ", + GET (locrec->record[idx].len), + GET (locrec->record[idx].offset), + data->nlink, + (dataent.file_offset + == GET (locrec->record[LC_ALL].offset)) + ? '+' : ' '); + for (i = 0; i < 16; i += 4) + printf ("%02x%02x%02x%02x", + data->sum[i], data->sum[i + 1], + data->sum[i + 2], data->sum[i + 3]); + printf (" %s/%s\n", names[cnt].name, + idx == LC_MESSAGES ? "LC_MESSAGES/SYS_LC_MESSAGES" + : locnames[idx]); + } + } + } + else + for (cnt = 0; cnt < used; ++cnt) + puts (names[cnt].name); + + close_archive (&ah); + + exit (EXIT_SUCCESS); +} diff --git a/REORG.TODO/locale/programs/locfile-kw.gperf b/REORG.TODO/locale/programs/locfile-kw.gperf new file mode 100644 index 0000000000..3605d15c8e --- /dev/null +++ b/REORG.TODO/locale/programs/locfile-kw.gperf @@ -0,0 +1,201 @@ +%{ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@gnu.org>, 1996. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +#include "locfile-token.h" +%} +struct keyword_t ; +%% +escape_char, tok_escape_char, 0 +comment_char, tok_comment_char, 0 +repertoiremap, tok_repertoiremap, 0 +include, tok_include, 0 +LC_CTYPE, tok_lc_ctype, 0 +END, tok_end, 0 +copy, tok_copy, 0 +upper, tok_upper, 0 +lower, tok_lower, 0 +alpha, tok_alpha, 0 +digit, tok_digit, 0 +outdigit, tok_outdigit, 0 +alnum, tok_alnum, 0 +space, tok_space, 0 +cntrl, tok_cntrl, 0 +punct, tok_punct, 0 +graph, tok_graph, 0 +print, tok_print, 0 +xdigit, tok_xdigit, 0 +blank, tok_blank, 0 +charclass, tok_charclass, 0 +class, tok_class, 0 +charconv, tok_charconv, 0 +toupper, tok_toupper, 0 +tolower, tok_tolower, 0 +map, tok_map, 0 +translit_start, tok_translit_start, 0 +translit_end, tok_translit_end, 0 +translit_ignore, tok_translit_ignore, 0 +default_missing, tok_default_missing, 0 +LC_COLLATE, tok_lc_collate, 0 +coll_weight_max, tok_coll_weight_max, 0 +section-symbol, tok_section_symbol, 0 +collating-element, tok_collating_element, 0 +collating-symbol, tok_collating_symbol, 0 +symbol-equivalence, tok_symbol_equivalence, 0 +script, tok_script, 0 +order_start, tok_order_start, 0 +order_end, tok_order_end, 0 +from, tok_from, 0 +forward, tok_forward, 0 +backward, tok_backward, 0 +position, tok_position, 0 +UNDEFINED, tok_undefined, 0 +IGNORE, tok_ignore, 0 +reorder-after, tok_reorder_after, 0 +reorder-end, tok_reorder_end, 0 +reorder-sections-after, tok_reorder_sections_after, 0 +reorder-sections-end, tok_reorder_sections_end, 0 +define, tok_define, 0 +undef, tok_undef, 0 +ifdef, tok_ifdef, 0 +else, tok_else, 0 +elifdef, tok_elifdef, 0 +elifndef, tok_elifndef, 0 +endif, tok_endif, 0 +LC_MONETARY, tok_lc_monetary, 0 +int_curr_symbol, tok_int_curr_symbol, 0 +currency_symbol, tok_currency_symbol, 0 +mon_decimal_point, tok_mon_decimal_point, 0 +mon_thousands_sep, tok_mon_thousands_sep, 0 +mon_grouping, tok_mon_grouping, 0 +positive_sign, tok_positive_sign, 0 +negative_sign, tok_negative_sign, 0 +int_frac_digits, tok_int_frac_digits, 0 +frac_digits, tok_frac_digits, 0 +p_cs_precedes, tok_p_cs_precedes, 0 +p_sep_by_space, tok_p_sep_by_space, 0 +n_cs_precedes, tok_n_cs_precedes, 0 +n_sep_by_space, tok_n_sep_by_space, 0 +p_sign_posn, tok_p_sign_posn, 0 +n_sign_posn, tok_n_sign_posn, 0 +int_p_cs_precedes, tok_int_p_cs_precedes, 0 +int_p_sep_by_space, tok_int_p_sep_by_space, 0 +int_n_cs_precedes, tok_int_n_cs_precedes, 0 +int_n_sep_by_space, tok_int_n_sep_by_space, 0 +int_p_sign_posn, tok_int_p_sign_posn, 0 +int_n_sign_posn, tok_int_n_sign_posn, 0 +duo_int_curr_symbol, tok_duo_int_curr_symbol, 0 +duo_currency_symbol, tok_duo_currency_symbol, 0 +duo_int_frac_digits, tok_duo_int_frac_digits, 0 +duo_frac_digits, tok_duo_frac_digits, 0 +duo_p_cs_precedes, tok_duo_p_cs_precedes, 0 +duo_p_sep_by_space, tok_duo_p_sep_by_space, 0 +duo_n_cs_precedes, tok_duo_n_cs_precedes, 0 +duo_n_sep_by_space, tok_duo_n_sep_by_space, 0 +duo_int_p_cs_precedes, tok_duo_int_p_cs_precedes, 0 +duo_int_p_sep_by_space, tok_duo_int_p_sep_by_space, 0 +duo_int_n_cs_precedes, tok_duo_int_n_cs_precedes, 0 +duo_int_n_sep_by_space, tok_duo_int_n_sep_by_space, 0 +duo_p_sign_posn, tok_duo_p_sign_posn, 0 +duo_n_sign_posn, tok_duo_n_sign_posn, 0 +duo_int_p_sign_posn, tok_duo_int_p_sign_posn, 0 +duo_int_n_sign_posn, tok_duo_int_n_sign_posn, 0 +uno_valid_from, tok_uno_valid_from, 0 +uno_valid_to, tok_uno_valid_to, 0 +duo_valid_from, tok_duo_valid_from, 0 +duo_valid_to, tok_duo_valid_to, 0 +conversion_rate, tok_conversion_rate, 0 +LC_NUMERIC, tok_lc_numeric, 0 +decimal_point, tok_decimal_point, 0 +thousands_sep, tok_thousands_sep, 0 +grouping, tok_grouping, 0 +LC_TIME, tok_lc_time, 0 +abday, tok_abday, 0 +day, tok_day, 0 +week, tok_week, 0 +abmon, tok_abmon, 0 +mon, tok_mon, 0 +d_t_fmt, tok_d_t_fmt, 0 +d_fmt, tok_d_fmt, 0 +t_fmt, tok_t_fmt, 0 +am_pm, tok_am_pm, 0 +t_fmt_ampm, tok_t_fmt_ampm, 0 +era, tok_era, 0 +era_year, tok_era_year, 0 +era_d_fmt, tok_era_d_fmt, 0 +era_d_t_fmt, tok_era_d_t_fmt, 0 +era_t_fmt, tok_era_t_fmt, 0 +alt_digits, tok_alt_digits, 0 +first_weekday, tok_first_weekday, 0 +first_workday, tok_first_workday, 0 +cal_direction, tok_cal_direction, 0 +timezone, tok_timezone, 0 +date_fmt, tok_date_fmt, 0 +LC_MESSAGES, tok_lc_messages, 0 +yesexpr, tok_yesexpr, 0 +noexpr, tok_noexpr, 0 +yesstr, tok_yesstr, 0 +nostr, tok_nostr, 0 +LC_PAPER, tok_lc_paper, 0 +height, tok_height, 0 +width, tok_width, 0 +LC_NAME, tok_lc_name, 0 +name_fmt, tok_name_fmt, 0 +name_gen, tok_name_gen, 0 +name_mr, tok_name_mr, 0 +name_mrs, tok_name_mrs, 0 +name_miss, tok_name_miss, 0 +name_ms, tok_name_ms, 0 +LC_ADDRESS, tok_lc_address, 0 +postal_fmt, tok_postal_fmt, 0 +country_name, tok_country_name, 0 +country_post, tok_country_post, 0 +country_ab2, tok_country_ab2, 0 +country_ab3, tok_country_ab3, 0 +country_num, tok_country_num, 0 +country_car, tok_country_car, 0 +country_isbn, tok_country_isbn, 0 +lang_name, tok_lang_name, 0 +lang_ab, tok_lang_ab, 0 +lang_term, tok_lang_term, 0 +lang_lib, tok_lang_lib, 0 +LC_TELEPHONE, tok_lc_telephone, 0 +tel_int_fmt, tok_tel_int_fmt, 0 +tel_dom_fmt, tok_tel_dom_fmt, 0 +int_select, tok_int_select, 0 +int_prefix, tok_int_prefix, 0 +LC_MEASUREMENT, tok_lc_measurement, 0 +measurement, tok_measurement, 0 +LC_IDENTIFICATION, tok_lc_identification, 0 +title, tok_title, 0 +source, tok_source, 0 +address, tok_address, 0 +contact, tok_contact, 0 +email, tok_email, 0 +tel, tok_tel, 0 +fax, tok_fax, 0 +language, tok_language, 0 +territory, tok_territory, 0 +audience, tok_audience, 0 +application, tok_application, 0 +abbreviation, tok_abbreviation, 0 +revision, tok_revision, 0 +date, tok_date, 0 +category, tok_category, 0 diff --git a/REORG.TODO/locale/programs/locfile-kw.h b/REORG.TODO/locale/programs/locfile-kw.h new file mode 100644 index 0000000000..1cdca1941b --- /dev/null +++ b/REORG.TODO/locale/programs/locfile-kw.h @@ -0,0 +1,621 @@ +/* ANSI-C code produced by gperf version 3.0.4 */ +/* Command-line: gperf -acCgopt -k'1,2,5,9,$' -L ANSI-C -N locfile_hash locfile-kw.gperf */ + +#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \ + && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \ + && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \ + && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \ + && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \ + && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \ + && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \ + && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \ + && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \ + && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \ + && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \ + && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \ + && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \ + && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \ + && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \ + && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \ + && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \ + && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \ + && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \ + && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \ + && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \ + && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \ + && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126)) +/* The character set is not based on ISO-646. */ +#error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gnu-gperf@gnu.org>." +#endif + +#line 1 "locfile-kw.gperf" + +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@gnu.org>, 1996. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <string.h> + +#include "locfile-token.h" +#line 24 "locfile-kw.gperf" +struct keyword_t ; + +#define TOTAL_KEYWORDS 176 +#define MIN_WORD_LENGTH 3 +#define MAX_WORD_LENGTH 22 +#define MIN_HASH_VALUE 3 +#define MAX_HASH_VALUE 630 +/* maximum key range = 628, duplicates = 0 */ + +#ifdef __GNUC__ +__inline +#else +#ifdef __cplusplus +inline +#endif +#endif +static unsigned int +hash (register const char *str, register unsigned int len) +{ + static const unsigned short asso_values[] = + { + 631, 631, 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, 631, 631, + 5, 0, 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 5, 631, 0, 0, 0, + 0, 0, 10, 0, 631, 631, 0, 631, 0, 5, + 631, 631, 0, 0, 0, 10, 631, 631, 631, 0, + 631, 631, 631, 631, 631, 0, 631, 145, 80, 25, + 15, 0, 180, 105, 10, 35, 631, 50, 80, 160, + 5, 130, 40, 45, 5, 0, 10, 35, 40, 35, + 5, 10, 0, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631, 631, 631, 631, 631, + 631, 631, 631, 631, 631, 631 + }; + register int hval = len; + + switch (hval) + { + default: + hval += asso_values[(unsigned char)str[8]]; + /*FALLTHROUGH*/ + case 8: + case 7: + case 6: + case 5: + hval += asso_values[(unsigned char)str[4]]; + /*FALLTHROUGH*/ + case 4: + case 3: + case 2: + hval += asso_values[(unsigned char)str[1]]; + /*FALLTHROUGH*/ + case 1: + hval += asso_values[(unsigned char)str[0]]; + break; + } + return hval + asso_values[(unsigned char)str[len - 1]]; +} + +#ifdef __GNUC__ +__inline +#if defined __GNUC_STDC_INLINE__ || defined __GNUC_GNU_INLINE__ +__attribute__ ((__gnu_inline__)) +#endif +#endif +const struct keyword_t * +locfile_hash (register const char *str, register unsigned int len) +{ + static const struct keyword_t wordlist[] = + { + {""}, {""}, {""}, +#line 31 "locfile-kw.gperf" + {"END", tok_end, 0}, + {""}, {""}, +#line 70 "locfile-kw.gperf" + {"IGNORE", tok_ignore, 0}, +#line 129 "locfile-kw.gperf" + {"LC_TIME", tok_lc_time, 0}, +#line 30 "locfile-kw.gperf" + {"LC_CTYPE", tok_lc_ctype, 0}, + {""}, +#line 166 "locfile-kw.gperf" + {"LC_ADDRESS", tok_lc_address, 0}, +#line 151 "locfile-kw.gperf" + {"LC_MESSAGES", tok_lc_messages, 0}, +#line 159 "locfile-kw.gperf" + {"LC_NAME", tok_lc_name, 0}, +#line 156 "locfile-kw.gperf" + {"LC_PAPER", tok_lc_paper, 0}, +#line 184 "locfile-kw.gperf" + {"LC_MEASUREMENT", tok_lc_measurement, 0}, +#line 56 "locfile-kw.gperf" + {"LC_COLLATE", tok_lc_collate, 0}, + {""}, +#line 186 "locfile-kw.gperf" + {"LC_IDENTIFICATION", tok_lc_identification, 0}, +#line 199 "locfile-kw.gperf" + {"revision", tok_revision, 0}, +#line 69 "locfile-kw.gperf" + {"UNDEFINED", tok_undefined, 0}, +#line 125 "locfile-kw.gperf" + {"LC_NUMERIC", tok_lc_numeric, 0}, +#line 82 "locfile-kw.gperf" + {"LC_MONETARY", tok_lc_monetary, 0}, +#line 179 "locfile-kw.gperf" + {"LC_TELEPHONE", tok_lc_telephone, 0}, + {""}, {""}, {""}, +#line 75 "locfile-kw.gperf" + {"define", tok_define, 0}, +#line 152 "locfile-kw.gperf" + {"yesexpr", tok_yesexpr, 0}, +#line 141 "locfile-kw.gperf" + {"era_year", tok_era_year, 0}, + {""}, +#line 54 "locfile-kw.gperf" + {"translit_ignore", tok_translit_ignore, 0}, +#line 154 "locfile-kw.gperf" + {"yesstr", tok_yesstr, 0}, + {""}, +#line 89 "locfile-kw.gperf" + {"negative_sign", tok_negative_sign, 0}, + {""}, +#line 137 "locfile-kw.gperf" + {"t_fmt", tok_t_fmt, 0}, +#line 157 "locfile-kw.gperf" + {"height", tok_height, 0}, + {""}, {""}, +#line 52 "locfile-kw.gperf" + {"translit_start", tok_translit_start, 0}, +#line 136 "locfile-kw.gperf" + {"d_fmt", tok_d_fmt, 0}, + {""}, +#line 53 "locfile-kw.gperf" + {"translit_end", tok_translit_end, 0}, +#line 94 "locfile-kw.gperf" + {"n_cs_precedes", tok_n_cs_precedes, 0}, +#line 144 "locfile-kw.gperf" + {"era_t_fmt", tok_era_t_fmt, 0}, +#line 39 "locfile-kw.gperf" + {"space", tok_space, 0}, +#line 72 "locfile-kw.gperf" + {"reorder-end", tok_reorder_end, 0}, +#line 73 "locfile-kw.gperf" + {"reorder-sections-after", tok_reorder_sections_after, 0}, + {""}, +#line 142 "locfile-kw.gperf" + {"era_d_fmt", tok_era_d_fmt, 0}, +#line 187 "locfile-kw.gperf" + {"title", tok_title, 0}, + {""}, {""}, +#line 149 "locfile-kw.gperf" + {"timezone", tok_timezone, 0}, + {""}, +#line 74 "locfile-kw.gperf" + {"reorder-sections-end", tok_reorder_sections_end, 0}, + {""}, {""}, {""}, +#line 95 "locfile-kw.gperf" + {"n_sep_by_space", tok_n_sep_by_space, 0}, + {""}, {""}, +#line 100 "locfile-kw.gperf" + {"int_n_cs_precedes", tok_int_n_cs_precedes, 0}, + {""}, {""}, {""}, +#line 26 "locfile-kw.gperf" + {"escape_char", tok_escape_char, 0}, + {""}, +#line 28 "locfile-kw.gperf" + {"repertoiremap", tok_repertoiremap, 0}, +#line 46 "locfile-kw.gperf" + {"charclass", tok_charclass, 0}, +#line 43 "locfile-kw.gperf" + {"print", tok_print, 0}, +#line 44 "locfile-kw.gperf" + {"xdigit", tok_xdigit, 0}, +#line 110 "locfile-kw.gperf" + {"duo_n_cs_precedes", tok_duo_n_cs_precedes, 0}, +#line 127 "locfile-kw.gperf" + {"thousands_sep", tok_thousands_sep, 0}, +#line 195 "locfile-kw.gperf" + {"territory", tok_territory, 0}, +#line 36 "locfile-kw.gperf" + {"digit", tok_digit, 0}, + {""}, {""}, +#line 92 "locfile-kw.gperf" + {"p_cs_precedes", tok_p_cs_precedes, 0}, + {""}, {""}, +#line 62 "locfile-kw.gperf" + {"script", tok_script, 0}, +#line 29 "locfile-kw.gperf" + {"include", tok_include, 0}, + {""}, +#line 78 "locfile-kw.gperf" + {"else", tok_else, 0}, +#line 182 "locfile-kw.gperf" + {"int_select", tok_int_select, 0}, + {""}, {""}, {""}, +#line 132 "locfile-kw.gperf" + {"week", tok_week, 0}, +#line 33 "locfile-kw.gperf" + {"upper", tok_upper, 0}, + {""}, {""}, +#line 192 "locfile-kw.gperf" + {"tel", tok_tel, 0}, +#line 93 "locfile-kw.gperf" + {"p_sep_by_space", tok_p_sep_by_space, 0}, +#line 158 "locfile-kw.gperf" + {"width", tok_width, 0}, + {""}, +#line 98 "locfile-kw.gperf" + {"int_p_cs_precedes", tok_int_p_cs_precedes, 0}, + {""}, {""}, +#line 41 "locfile-kw.gperf" + {"punct", tok_punct, 0}, + {""}, {""}, +#line 101 "locfile-kw.gperf" + {"int_n_sep_by_space", tok_int_n_sep_by_space, 0}, + {""}, {""}, {""}, +#line 108 "locfile-kw.gperf" + {"duo_p_cs_precedes", tok_duo_p_cs_precedes, 0}, +#line 48 "locfile-kw.gperf" + {"charconv", tok_charconv, 0}, + {""}, +#line 47 "locfile-kw.gperf" + {"class", tok_class, 0}, +#line 114 "locfile-kw.gperf" + {"duo_int_n_cs_precedes", tok_duo_int_n_cs_precedes, 0}, +#line 115 "locfile-kw.gperf" + {"duo_int_n_sep_by_space", tok_duo_int_n_sep_by_space, 0}, +#line 111 "locfile-kw.gperf" + {"duo_n_sep_by_space", tok_duo_n_sep_by_space, 0}, +#line 119 "locfile-kw.gperf" + {"duo_int_n_sign_posn", tok_duo_int_n_sign_posn, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, +#line 58 "locfile-kw.gperf" + {"section-symbol", tok_section_symbol, 0}, +#line 183 "locfile-kw.gperf" + {"int_prefix", tok_int_prefix, 0}, + {""}, {""}, {""}, {""}, +#line 42 "locfile-kw.gperf" + {"graph", tok_graph, 0}, + {""}, {""}, +#line 99 "locfile-kw.gperf" + {"int_p_sep_by_space", tok_int_p_sep_by_space, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 112 "locfile-kw.gperf" + {"duo_int_p_cs_precedes", tok_duo_int_p_cs_precedes, 0}, +#line 113 "locfile-kw.gperf" + {"duo_int_p_sep_by_space", tok_duo_int_p_sep_by_space, 0}, +#line 109 "locfile-kw.gperf" + {"duo_p_sep_by_space", tok_duo_p_sep_by_space, 0}, +#line 118 "locfile-kw.gperf" + {"duo_int_p_sign_posn", tok_duo_int_p_sign_posn, 0}, +#line 155 "locfile-kw.gperf" + {"nostr", tok_nostr, 0}, + {""}, {""}, +#line 140 "locfile-kw.gperf" + {"era", tok_era, 0}, + {""}, +#line 84 "locfile-kw.gperf" + {"currency_symbol", tok_currency_symbol, 0}, + {""}, +#line 165 "locfile-kw.gperf" + {"name_ms", tok_name_ms, 0}, +#line 163 "locfile-kw.gperf" + {"name_mrs", tok_name_mrs, 0}, +#line 164 "locfile-kw.gperf" + {"name_miss", tok_name_miss, 0}, +#line 83 "locfile-kw.gperf" + {"int_curr_symbol", tok_int_curr_symbol, 0}, +#line 188 "locfile-kw.gperf" + {"source", tok_source, 0}, +#line 162 "locfile-kw.gperf" + {"name_mr", tok_name_mr, 0}, +#line 161 "locfile-kw.gperf" + {"name_gen", tok_name_gen, 0}, +#line 200 "locfile-kw.gperf" + {"date", tok_date, 0}, + {""}, {""}, +#line 189 "locfile-kw.gperf" + {"address", tok_address, 0}, +#line 160 "locfile-kw.gperf" + {"name_fmt", tok_name_fmt, 0}, +#line 32 "locfile-kw.gperf" + {"copy", tok_copy, 0}, +#line 103 "locfile-kw.gperf" + {"int_n_sign_posn", tok_int_n_sign_posn, 0}, + {""}, {""}, +#line 131 "locfile-kw.gperf" + {"day", tok_day, 0}, +#line 105 "locfile-kw.gperf" + {"duo_currency_symbol", tok_duo_currency_symbol, 0}, + {""}, {""}, {""}, +#line 150 "locfile-kw.gperf" + {"date_fmt", tok_date_fmt, 0}, +#line 64 "locfile-kw.gperf" + {"order_end", tok_order_end, 0}, +#line 117 "locfile-kw.gperf" + {"duo_n_sign_posn", tok_duo_n_sign_posn, 0}, + {""}, +#line 168 "locfile-kw.gperf" + {"country_name", tok_country_name, 0}, +#line 71 "locfile-kw.gperf" + {"reorder-after", tok_reorder_after, 0}, + {""}, {""}, +#line 153 "locfile-kw.gperf" + {"noexpr", tok_noexpr, 0}, +#line 50 "locfile-kw.gperf" + {"tolower", tok_tolower, 0}, +#line 196 "locfile-kw.gperf" + {"audience", tok_audience, 0}, + {""}, {""}, {""}, +#line 49 "locfile-kw.gperf" + {"toupper", tok_toupper, 0}, +#line 68 "locfile-kw.gperf" + {"position", tok_position, 0}, + {""}, +#line 40 "locfile-kw.gperf" + {"cntrl", tok_cntrl, 0}, + {""}, +#line 27 "locfile-kw.gperf" + {"comment_char", tok_comment_char, 0}, +#line 88 "locfile-kw.gperf" + {"positive_sign", tok_positive_sign, 0}, + {""}, {""}, {""}, {""}, +#line 61 "locfile-kw.gperf" + {"symbol-equivalence", tok_symbol_equivalence, 0}, + {""}, +#line 102 "locfile-kw.gperf" + {"int_p_sign_posn", tok_int_p_sign_posn, 0}, +#line 173 "locfile-kw.gperf" + {"country_car", tok_country_car, 0}, + {""}, {""}, +#line 104 "locfile-kw.gperf" + {"duo_int_curr_symbol", tok_duo_int_curr_symbol, 0}, + {""}, {""}, +#line 135 "locfile-kw.gperf" + {"d_t_fmt", tok_d_t_fmt, 0}, + {""}, {""}, +#line 116 "locfile-kw.gperf" + {"duo_p_sign_posn", tok_duo_p_sign_posn, 0}, +#line 185 "locfile-kw.gperf" + {"measurement", tok_measurement, 0}, +#line 174 "locfile-kw.gperf" + {"country_isbn", tok_country_isbn, 0}, +#line 37 "locfile-kw.gperf" + {"outdigit", tok_outdigit, 0}, + {""}, {""}, +#line 143 "locfile-kw.gperf" + {"era_d_t_fmt", tok_era_d_t_fmt, 0}, + {""}, {""}, {""}, +#line 34 "locfile-kw.gperf" + {"lower", tok_lower, 0}, +#line 181 "locfile-kw.gperf" + {"tel_dom_fmt", tok_tel_dom_fmt, 0}, +#line 169 "locfile-kw.gperf" + {"country_post", tok_country_post, 0}, +#line 148 "locfile-kw.gperf" + {"cal_direction", tok_cal_direction, 0}, + {""}, +#line 139 "locfile-kw.gperf" + {"t_fmt_ampm", tok_t_fmt_ampm, 0}, +#line 91 "locfile-kw.gperf" + {"frac_digits", tok_frac_digits, 0}, + {""}, {""}, +#line 175 "locfile-kw.gperf" + {"lang_name", tok_lang_name, 0}, +#line 90 "locfile-kw.gperf" + {"int_frac_digits", tok_int_frac_digits, 0}, + {""}, +#line 121 "locfile-kw.gperf" + {"uno_valid_to", tok_uno_valid_to, 0}, +#line 126 "locfile-kw.gperf" + {"decimal_point", tok_decimal_point, 0}, + {""}, +#line 133 "locfile-kw.gperf" + {"abmon", tok_abmon, 0}, + {""}, {""}, {""}, {""}, +#line 107 "locfile-kw.gperf" + {"duo_frac_digits", tok_duo_frac_digits, 0}, +#line 180 "locfile-kw.gperf" + {"tel_int_fmt", tok_tel_int_fmt, 0}, +#line 123 "locfile-kw.gperf" + {"duo_valid_to", tok_duo_valid_to, 0}, +#line 146 "locfile-kw.gperf" + {"first_weekday", tok_first_weekday, 0}, + {""}, +#line 130 "locfile-kw.gperf" + {"abday", tok_abday, 0}, + {""}, +#line 198 "locfile-kw.gperf" + {"abbreviation", tok_abbreviation, 0}, +#line 147 "locfile-kw.gperf" + {"first_workday", tok_first_workday, 0}, + {""}, {""}, +#line 97 "locfile-kw.gperf" + {"n_sign_posn", tok_n_sign_posn, 0}, + {""}, {""}, {""}, +#line 145 "locfile-kw.gperf" + {"alt_digits", tok_alt_digits, 0}, + {""}, {""}, +#line 128 "locfile-kw.gperf" + {"grouping", tok_grouping, 0}, + {""}, +#line 45 "locfile-kw.gperf" + {"blank", tok_blank, 0}, + {""}, {""}, +#line 194 "locfile-kw.gperf" + {"language", tok_language, 0}, +#line 120 "locfile-kw.gperf" + {"uno_valid_from", tok_uno_valid_from, 0}, + {""}, +#line 197 "locfile-kw.gperf" + {"application", tok_application, 0}, + {""}, +#line 80 "locfile-kw.gperf" + {"elifndef", tok_elifndef, 0}, + {""}, {""}, {""}, {""}, {""}, +#line 122 "locfile-kw.gperf" + {"duo_valid_from", tok_duo_valid_from, 0}, +#line 57 "locfile-kw.gperf" + {"coll_weight_max", tok_coll_weight_max, 0}, + {""}, +#line 79 "locfile-kw.gperf" + {"elifdef", tok_elifdef, 0}, +#line 67 "locfile-kw.gperf" + {"backward", tok_backward, 0}, +#line 106 "locfile-kw.gperf" + {"duo_int_frac_digits", tok_duo_int_frac_digits, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 96 "locfile-kw.gperf" + {"p_sign_posn", tok_p_sign_posn, 0}, + {""}, +#line 201 "locfile-kw.gperf" + {"category", tok_category, 0}, + {""}, {""}, {""}, {""}, +#line 134 "locfile-kw.gperf" + {"mon", tok_mon, 0}, + {""}, +#line 124 "locfile-kw.gperf" + {"conversion_rate", tok_conversion_rate, 0}, + {""}, {""}, {""}, {""}, {""}, +#line 63 "locfile-kw.gperf" + {"order_start", tok_order_start, 0}, + {""}, {""}, {""}, {""}, {""}, +#line 176 "locfile-kw.gperf" + {"lang_ab", tok_lang_ab, 0}, +#line 178 "locfile-kw.gperf" + {"lang_lib", tok_lang_lib, 0}, + {""}, {""}, {""}, +#line 190 "locfile-kw.gperf" + {"contact", tok_contact, 0}, + {""}, {""}, {""}, +#line 171 "locfile-kw.gperf" + {"country_ab3", tok_country_ab3, 0}, + {""}, {""}, {""}, +#line 191 "locfile-kw.gperf" + {"email", tok_email, 0}, +#line 170 "locfile-kw.gperf" + {"country_ab2", tok_country_ab2, 0}, + {""}, {""}, {""}, +#line 55 "locfile-kw.gperf" + {"default_missing", tok_default_missing, 0}, + {""}, {""}, +#line 193 "locfile-kw.gperf" + {"fax", tok_fax, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 172 "locfile-kw.gperf" + {"country_num", tok_country_num, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, +#line 51 "locfile-kw.gperf" + {"map", tok_map, 0}, +#line 65 "locfile-kw.gperf" + {"from", tok_from, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 86 "locfile-kw.gperf" + {"mon_thousands_sep", tok_mon_thousands_sep, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, +#line 81 "locfile-kw.gperf" + {"endif", tok_endif, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 76 "locfile-kw.gperf" + {"undef", tok_undef, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 59 "locfile-kw.gperf" + {"collating-element", tok_collating_element, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 66 "locfile-kw.gperf" + {"forward", tok_forward, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, +#line 85 "locfile-kw.gperf" + {"mon_decimal_point", tok_mon_decimal_point, 0}, + {""}, {""}, +#line 167 "locfile-kw.gperf" + {"postal_fmt", tok_postal_fmt, 0}, + {""}, {""}, {""}, {""}, {""}, +#line 60 "locfile-kw.gperf" + {"collating-symbol", tok_collating_symbol, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 35 "locfile-kw.gperf" + {"alpha", tok_alpha, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, +#line 38 "locfile-kw.gperf" + {"alnum", tok_alnum, 0}, + {""}, +#line 87 "locfile-kw.gperf" + {"mon_grouping", tok_mon_grouping, 0}, + {""}, +#line 177 "locfile-kw.gperf" + {"lang_term", tok_lang_term, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, +#line 77 "locfile-kw.gperf" + {"ifdef", tok_ifdef, 0}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, + {""}, {""}, {""}, {""}, +#line 138 "locfile-kw.gperf" + {"am_pm", tok_am_pm, 0} + }; + + if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) + { + register int key = hash (str, len); + + if (key <= MAX_HASH_VALUE && key >= 0) + { + register const char *s = wordlist[key].name; + + if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0') + return &wordlist[key]; + } + } + return 0; +} diff --git a/REORG.TODO/locale/programs/locfile-token.h b/REORG.TODO/locale/programs/locfile-token.h new file mode 100644 index 0000000000..0c32f2c70b --- /dev/null +++ b/REORG.TODO/locale/programs/locfile-token.h @@ -0,0 +1,258 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@gnu.org>, 1996. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifndef _TOKEN_H +#define _TOKEN_H + +enum token_t +{ + tok_none = 0, + + tok_eof, + tok_eol, + tok_bsymbol, + tok_ident, + tok_ellipsis2, + tok_ellipsis3, + tok_ellipsis4, + tok_ellipsis2_2, + tok_ellipsis4_2, + tok_semicolon, + tok_comma, + tok_open_brace, + tok_close_brace, + tok_charcode, + tok_ucs4, + tok_number, + tok_minus1, + tok_string, + tok_include, + + tok_escape_char, + tok_comment_char, + tok_charmap, + tok_end, + tok_g0esc, + tok_g1esc, + tok_g2esc, + tok_g3esc, + tok_escseq, + tok_addset, + + tok_charids, + + tok_code_set_name, + tok_mb_cur_max, + tok_mb_cur_min, + tok_charconv, + tok_width, + tok_width_variable, + tok_width_default, + tok_repertoiremap, + + tok_lc_ctype, + tok_copy, + /* Keep the following entries up to the next comment in this order! */ + tok_upper, + tok_lower, + tok_alpha, + tok_digit, + tok_xdigit, + tok_space, + tok_print, + tok_graph, + tok_blank, + tok_cntrl, + tok_punct, + tok_alnum, + /* OK, shuffling allowed again. */ + tok_outdigit, + tok_charclass, + tok_class, + tok_toupper, + tok_tolower, + tok_map, + tok_translit_start, + tok_translit_end, + tok_translit_ignore, + tok_default_missing, + tok_lc_collate, + tok_coll_weight_max, + tok_section_symbol, + tok_collating_element, + tok_collating_symbol, + tok_symbol_equivalence, + tok_script, + tok_order_start, + tok_order_end, + tok_from, + tok_forward, + tok_backward, + tok_position, + tok_undefined, + tok_ignore, + tok_reorder_after, + tok_reorder_end, + tok_reorder_sections_after, + tok_reorder_sections_end, + tok_define, + tok_undef, + tok_ifdef, + tok_ifndef, + tok_else, + tok_elifdef, + tok_elifndef, + tok_endif, + tok_lc_monetary, + tok_int_curr_symbol, + tok_currency_symbol, + tok_mon_decimal_point, + tok_mon_thousands_sep, + tok_mon_grouping, + tok_positive_sign, + tok_negative_sign, + tok_int_frac_digits, + tok_frac_digits, + tok_p_cs_precedes, + tok_p_sep_by_space, + tok_n_cs_precedes, + tok_n_sep_by_space, + tok_p_sign_posn, + tok_n_sign_posn, + tok_int_p_cs_precedes, + tok_int_p_sep_by_space, + tok_int_n_cs_precedes, + tok_int_n_sep_by_space, + tok_int_p_sign_posn, + tok_int_n_sign_posn, + tok_duo_int_curr_symbol, + tok_duo_currency_symbol, + tok_duo_int_frac_digits, + tok_duo_frac_digits, + tok_duo_p_cs_precedes, + tok_duo_p_sep_by_space, + tok_duo_n_cs_precedes, + tok_duo_n_sep_by_space, + tok_duo_int_p_cs_precedes, + tok_duo_int_p_sep_by_space, + tok_duo_int_n_cs_precedes, + tok_duo_int_n_sep_by_space, + tok_duo_p_sign_posn, + tok_duo_n_sign_posn, + tok_duo_int_p_sign_posn, + tok_duo_int_n_sign_posn, + tok_uno_valid_from, + tok_uno_valid_to, + tok_duo_valid_from, + tok_duo_valid_to, + tok_conversion_rate, + tok_lc_numeric, + tok_decimal_point, + tok_thousands_sep, + tok_grouping, + tok_lc_time, + tok_abday, + tok_day, + tok_abmon, + tok_mon, + tok_d_t_fmt, + tok_d_fmt, + tok_t_fmt, + tok_am_pm, + tok_t_fmt_ampm, + tok_era, + tok_era_year, + tok_era_d_fmt, + tok_era_d_t_fmt, + tok_era_t_fmt, + tok_alt_digits, + tok_week, + tok_first_weekday, + tok_first_workday, + tok_cal_direction, + tok_timezone, + tok_date_fmt, + tok_lc_messages, + tok_yesexpr, + tok_noexpr, + tok_yesstr, + tok_nostr, + tok_lc_paper, + tok_height, + tok_lc_name, + tok_name_fmt, + tok_name_gen, + tok_name_mr, + tok_name_mrs, + tok_name_miss, + tok_name_ms, + tok_lc_address, + tok_postal_fmt, + tok_country_name, + tok_country_post, + tok_country_ab2, + tok_country_ab3, + tok_country_num, + tok_country_car, + tok_country_isbn, + tok_lang_name, + tok_lang_ab, + tok_lang_term, + tok_lang_lib, + tok_lc_telephone, + tok_tel_int_fmt, + tok_tel_dom_fmt, + tok_int_select, + tok_int_prefix, + tok_lc_measurement, + tok_measurement, + tok_lc_identification, + tok_title, + tok_source, + tok_address, + tok_contact, + tok_email, + tok_tel, + tok_fax, + tok_language, + tok_territory, + tok_audience, + tok_application, + tok_abbreviation, + tok_revision, + tok_date, + tok_category, + + tok_error +}; + + +struct keyword_t +{ + const char *name; + enum token_t token; + int symname_or_ident; + + /* Only for locdef file. */ + int locale; + enum token_t base; + enum token_t group; + enum token_t list; +}; + + +#endif /* token.h */ diff --git a/REORG.TODO/locale/programs/locfile.c b/REORG.TODO/locale/programs/locfile.c new file mode 100644 index 0000000000..0990ef11be --- /dev/null +++ b/REORG.TODO/locale/programs/locfile.c @@ -0,0 +1,1001 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@gnu.org>, 1996. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <dirent.h> +#include <errno.h> +#include <fcntl.h> +#include <stdbool.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/param.h> +#include <sys/stat.h> +#include <assert.h> +#include <wchar.h> + +#include "../../crypt/md5.h" +#include "localedef.h" +#include "localeinfo.h" +#include "locfile.h" +#include "simple-hash.h" + +#include "locfile-kw.h" + +#define obstack_chunk_alloc xmalloc +#define obstack_chunk_free free + +/* Temporary storage of the locale data before writing it to the archive. */ +static locale_data_t to_archive; + + +int +locfile_read (struct localedef_t *result, const struct charmap_t *charmap) +{ + const char *filename = result->name; + const char *repertoire_name = result->repertoire_name; + int locale_mask = result->needed & ~result->avail; + struct linereader *ldfile; + int not_here = ALL_LOCALES; + + /* If no repertoire name was specified use the global one. */ + if (repertoire_name == NULL) + repertoire_name = repertoire_global; + + /* Open the locale definition file. */ + ldfile = lr_open (filename, locfile_hash); + if (ldfile == NULL) + { + if (filename != NULL && filename[0] != '/') + { + char *i18npath = getenv ("I18NPATH"); + if (i18npath != NULL && *i18npath != '\0') + { + const size_t pathlen = strlen (i18npath); + char i18npathbuf[pathlen + 1]; + char path[strlen (filename) + 1 + pathlen + + sizeof ("/locales/") - 1]; + char *next; + i18npath = memcpy (i18npathbuf, i18npath, pathlen + 1); + + while (ldfile == NULL + && (next = strsep (&i18npath, ":")) != NULL) + { + stpcpy (stpcpy (stpcpy (path, next), "/locales/"), filename); + + ldfile = lr_open (path, locfile_hash); + + if (ldfile == NULL) + { + stpcpy (stpcpy (stpcpy (path, next), "/"), filename); + + ldfile = lr_open (path, locfile_hash); + } + } + } + + /* Test in the default directory. */ + if (ldfile == NULL) + { + char path[strlen (filename) + 1 + sizeof (LOCSRCDIR)]; + + stpcpy (stpcpy (stpcpy (path, LOCSRCDIR), "/"), filename); + ldfile = lr_open (path, locfile_hash); + } + } + + if (ldfile == NULL) + return 1; + } + + /* Parse locale definition file and store result in RESULT. */ + while (1) + { + struct token *now = lr_token (ldfile, charmap, NULL, NULL, verbose); + enum token_t nowtok = now->tok; + struct token *arg; + + if (nowtok == tok_eof) + break; + + if (nowtok == tok_eol) + /* Ignore empty lines. */ + continue; + + switch (nowtok) + { + case tok_escape_char: + case tok_comment_char: + /* We need an argument. */ + arg = lr_token (ldfile, charmap, NULL, NULL, verbose); + + if (arg->tok != tok_ident) + { + SYNTAX_ERROR (_("bad argument")); + continue; + } + + if (arg->val.str.lenmb != 1) + { + lr_error (ldfile, _("\ +argument to `%s' must be a single character"), + nowtok == tok_escape_char + ? "escape_char" : "comment_char"); + + lr_ignore_rest (ldfile, 0); + continue; + } + + if (nowtok == tok_escape_char) + ldfile->escape_char = *arg->val.str.startmb; + else + ldfile->comment_char = *arg->val.str.startmb; + break; + + case tok_repertoiremap: + /* We need an argument. */ + arg = lr_token (ldfile, charmap, NULL, NULL, verbose); + + if (arg->tok != tok_ident) + { + SYNTAX_ERROR (_("bad argument")); + continue; + } + + if (repertoire_name == NULL) + { + char *newp = alloca (arg->val.str.lenmb + 1); + + *((char *) mempcpy (newp, arg->val.str.startmb, + arg->val.str.lenmb)) = '\0'; + repertoire_name = newp; + } + break; + + case tok_lc_ctype: + ctype_read (ldfile, result, charmap, repertoire_name, + (locale_mask & CTYPE_LOCALE) == 0); + result->avail |= locale_mask & CTYPE_LOCALE; + not_here ^= CTYPE_LOCALE; + continue; + + case tok_lc_collate: + collate_read (ldfile, result, charmap, repertoire_name, + (locale_mask & COLLATE_LOCALE) == 0); + result->avail |= locale_mask & COLLATE_LOCALE; + not_here ^= COLLATE_LOCALE; + continue; + + case tok_lc_monetary: + monetary_read (ldfile, result, charmap, repertoire_name, + (locale_mask & MONETARY_LOCALE) == 0); + result->avail |= locale_mask & MONETARY_LOCALE; + not_here ^= MONETARY_LOCALE; + continue; + + case tok_lc_numeric: + numeric_read (ldfile, result, charmap, repertoire_name, + (locale_mask & NUMERIC_LOCALE) == 0); + result->avail |= locale_mask & NUMERIC_LOCALE; + not_here ^= NUMERIC_LOCALE; + continue; + + case tok_lc_time: + time_read (ldfile, result, charmap, repertoire_name, + (locale_mask & TIME_LOCALE) == 0); + result->avail |= locale_mask & TIME_LOCALE; + not_here ^= TIME_LOCALE; + continue; + + case tok_lc_messages: + messages_read (ldfile, result, charmap, repertoire_name, + (locale_mask & MESSAGES_LOCALE) == 0); + result->avail |= locale_mask & MESSAGES_LOCALE; + not_here ^= MESSAGES_LOCALE; + continue; + + case tok_lc_paper: + paper_read (ldfile, result, charmap, repertoire_name, + (locale_mask & PAPER_LOCALE) == 0); + result->avail |= locale_mask & PAPER_LOCALE; + not_here ^= PAPER_LOCALE; + continue; + + case tok_lc_name: + name_read (ldfile, result, charmap, repertoire_name, + (locale_mask & NAME_LOCALE) == 0); + result->avail |= locale_mask & NAME_LOCALE; + not_here ^= NAME_LOCALE; + continue; + + case tok_lc_address: + address_read (ldfile, result, charmap, repertoire_name, + (locale_mask & ADDRESS_LOCALE) == 0); + result->avail |= locale_mask & ADDRESS_LOCALE; + not_here ^= ADDRESS_LOCALE; + continue; + + case tok_lc_telephone: + telephone_read (ldfile, result, charmap, repertoire_name, + (locale_mask & TELEPHONE_LOCALE) == 0); + result->avail |= locale_mask & TELEPHONE_LOCALE; + not_here ^= TELEPHONE_LOCALE; + continue; + + case tok_lc_measurement: + measurement_read (ldfile, result, charmap, repertoire_name, + (locale_mask & MEASUREMENT_LOCALE) == 0); + result->avail |= locale_mask & MEASUREMENT_LOCALE; + not_here ^= MEASUREMENT_LOCALE; + continue; + + case tok_lc_identification: + identification_read (ldfile, result, charmap, repertoire_name, + (locale_mask & IDENTIFICATION_LOCALE) == 0); + result->avail |= locale_mask & IDENTIFICATION_LOCALE; + not_here ^= IDENTIFICATION_LOCALE; + continue; + + default: + SYNTAX_ERROR (_("\ +syntax error: not inside a locale definition section")); + continue; + } + + /* The rest of the line must be empty. */ + lr_ignore_rest (ldfile, 1); + } + + /* We read all of the file. */ + lr_close (ldfile); + + /* Mark the categories which are not contained in the file. We assume + them to be available and the default data will be used. */ + result->avail |= not_here; + + return 0; +} + + +/* Semantic checking of locale specifications. */ + +static void (*const check_funcs[]) (struct localedef_t *, + const struct charmap_t *) = +{ + [LC_CTYPE] = ctype_finish, + [LC_COLLATE] = collate_finish, + [LC_MESSAGES] = messages_finish, + [LC_MONETARY] = monetary_finish, + [LC_NUMERIC] = numeric_finish, + [LC_TIME] = time_finish, + [LC_PAPER] = paper_finish, + [LC_NAME] = name_finish, + [LC_ADDRESS] = address_finish, + [LC_TELEPHONE] = telephone_finish, + [LC_MEASUREMENT] = measurement_finish, + [LC_IDENTIFICATION] = identification_finish +}; + +void +check_all_categories (struct localedef_t *definitions, + const struct charmap_t *charmap) +{ + int cnt; + + for (cnt = 0; cnt < sizeof (check_funcs) / sizeof (check_funcs[0]); ++cnt) + if (check_funcs[cnt] != NULL) + check_funcs[cnt] (definitions, charmap); +} + + +/* Writing the locale data files. All files use the same output_path. */ + +static void (*const write_funcs[]) (struct localedef_t *, + const struct charmap_t *, const char *) = +{ + [LC_CTYPE] = ctype_output, + [LC_COLLATE] = collate_output, + [LC_MESSAGES] = messages_output, + [LC_MONETARY] = monetary_output, + [LC_NUMERIC] = numeric_output, + [LC_TIME] = time_output, + [LC_PAPER] = paper_output, + [LC_NAME] = name_output, + [LC_ADDRESS] = address_output, + [LC_TELEPHONE] = telephone_output, + [LC_MEASUREMENT] = measurement_output, + [LC_IDENTIFICATION] = identification_output +}; + + +void +write_all_categories (struct localedef_t *definitions, + const struct charmap_t *charmap, const char *locname, + const char *output_path) +{ + int cnt; + + for (cnt = 0; cnt < sizeof (write_funcs) / sizeof (write_funcs[0]); ++cnt) + if (write_funcs[cnt] != NULL) + write_funcs[cnt] (definitions, charmap, output_path); + + if (! no_archive) + { + /* The data has to be added to the archive. Do this now. */ + struct locarhandle ah; + + /* Open the archive. This call never returns if we cannot + successfully open the archive. */ + ah.fname = NULL; + open_archive (&ah, false); + + if (add_locale_to_archive (&ah, locname, to_archive, true) != 0) + error (EXIT_FAILURE, errno, _("cannot add to locale archive")); + + /* We are done. */ + close_archive (&ah); + } +} + + +/* Return a NULL terminated list of the directories next to output_path + that have the same owner, group, permissions and device as output_path. */ +static const char ** +siblings_uncached (const char *output_path) +{ + size_t len; + char *base, *p; + struct stat64 output_stat; + DIR *dirp; + int nelems; + const char **elems; + + /* Remove trailing slashes and trailing pathname component. */ + len = strlen (output_path); + base = (char *) alloca (len); + memcpy (base, output_path, len); + p = base + len; + while (p > base && p[-1] == '/') + p--; + if (p == base) + return NULL; + do + p--; + while (p > base && p[-1] != '/'); + if (p == base) + return NULL; + *--p = '\0'; + len = p - base; + + /* Get the properties of output_path. */ + if (lstat64 (output_path, &output_stat) < 0 || !S_ISDIR (output_stat.st_mode)) + return NULL; + + /* Iterate through the directories in base directory. */ + dirp = opendir (base); + if (dirp == NULL) + return NULL; + nelems = 0; + elems = NULL; + for (;;) + { + struct dirent64 *other_dentry; + const char *other_name; + char *other_path; + struct stat64 other_stat; + + other_dentry = readdir64 (dirp); + if (other_dentry == NULL) + break; + + other_name = other_dentry->d_name; + if (strcmp (other_name, ".") == 0 || strcmp (other_name, "..") == 0) + continue; + + other_path = (char *) xmalloc (len + 1 + strlen (other_name) + 2); + memcpy (other_path, base, len); + other_path[len] = '/'; + strcpy (other_path + len + 1, other_name); + + if (lstat64 (other_path, &other_stat) >= 0 + && S_ISDIR (other_stat.st_mode) + && other_stat.st_uid == output_stat.st_uid + && other_stat.st_gid == output_stat.st_gid + && other_stat.st_mode == output_stat.st_mode + && other_stat.st_dev == output_stat.st_dev) + { + /* Found a subdirectory. Add a trailing slash and store it. */ + p = other_path + len + 1 + strlen (other_name); + *p++ = '/'; + *p = '\0'; + elems = (const char **) xrealloc ((char *) elems, + (nelems + 2) * sizeof (char **)); + elems[nelems++] = other_path; + } + else + free (other_path); + } + closedir (dirp); + + if (elems != NULL) + elems[nelems] = NULL; + return elems; +} + + +/* Return a NULL terminated list of the directories next to output_path + that have the same owner, group, permissions and device as output_path. + Cache the result for future calls. */ +static const char ** +siblings (const char *output_path) +{ + static const char *last_output_path; + static const char **last_result; + + if (output_path != last_output_path) + { + if (last_result != NULL) + { + const char **p; + + for (p = last_result; *p != NULL; p++) + free ((char *) *p); + free (last_result); + } + + last_output_path = output_path; + last_result = siblings_uncached (output_path); + } + return last_result; +} + + +/* Read as many bytes from a file descriptor as possible. */ +static ssize_t +full_read (int fd, void *bufarea, size_t nbyte) +{ + char *buf = (char *) bufarea; + + while (nbyte > 0) + { + ssize_t retval = read (fd, buf, nbyte); + + if (retval == 0) + break; + else if (retval > 0) + { + buf += retval; + nbyte -= retval; + } + else if (errno != EINTR) + return retval; + } + return buf - (char *) bufarea; +} + + +/* Compare the contents of two regular files of the same size. Return 0 + if they are equal, 1 if they are different, or -1 if an error occurs. */ +static int +compare_files (const char *filename1, const char *filename2, size_t size, + size_t blocksize) +{ + int fd1, fd2; + int ret = -1; + + fd1 = open (filename1, O_RDONLY); + if (fd1 >= 0) + { + fd2 = open (filename2, O_RDONLY); + if (fd2 >= 0) + { + char *buf1 = (char *) xmalloc (2 * blocksize); + char *buf2 = buf1 + blocksize; + + ret = 0; + while (size > 0) + { + size_t bytes = (size < blocksize ? size : blocksize); + + if (full_read (fd1, buf1, bytes) < (ssize_t) bytes) + { + ret = -1; + break; + } + if (full_read (fd2, buf2, bytes) < (ssize_t) bytes) + { + ret = -1; + break; + } + if (memcmp (buf1, buf2, bytes) != 0) + { + ret = 1; + break; + } + size -= bytes; + } + + free (buf1); + close (fd2); + } + close (fd1); + } + return ret; +} + +/* True if the locale files use the opposite endianness to the + machine running localedef. */ +bool swap_endianness_p; + +/* When called outside a start_locale_structure/end_locale_structure + or start_locale_prelude/end_locale_prelude block, record that the + next byte in FILE's obstack will be the first byte of a new element. + Do likewise for the first call inside a start_locale_structure/ + end_locale_structure block. */ +static void +record_offset (struct locale_file *file) +{ + if (file->structure_stage < 2) + { + assert (file->next_element < file->n_elements); + file->offsets[file->next_element++] + = (obstack_object_size (&file->data) + + (file->n_elements + 2) * sizeof (uint32_t)); + if (file->structure_stage == 1) + file->structure_stage = 2; + } +} + +/* Initialize FILE for a new output file. N_ELEMENTS is the number + of elements in the file. */ +void +init_locale_data (struct locale_file *file, size_t n_elements) +{ + file->n_elements = n_elements; + file->next_element = 0; + file->offsets = xmalloc (sizeof (uint32_t) * n_elements); + obstack_init (&file->data); + file->structure_stage = 0; +} + +/* Align the size of FILE's obstack object to BOUNDARY bytes. */ +void +align_locale_data (struct locale_file *file, size_t boundary) +{ + size_t size = -obstack_object_size (&file->data) & (boundary - 1); + obstack_blank (&file->data, size); + memset (obstack_next_free (&file->data) - size, 0, size); +} + +/* Record that FILE's next element contains no data. */ +void +add_locale_empty (struct locale_file *file) +{ + record_offset (file); +} + +/* Record that FILE's next element consists of SIZE bytes starting at DATA. */ +void +add_locale_raw_data (struct locale_file *file, const void *data, size_t size) +{ + record_offset (file); + obstack_grow (&file->data, data, size); +} + +/* Finish the current object on OBSTACK and use it as the data for FILE's + next element. */ +void +add_locale_raw_obstack (struct locale_file *file, struct obstack *obstack) +{ + size_t size = obstack_object_size (obstack); + record_offset (file); + obstack_grow (&file->data, obstack_finish (obstack), size); +} + +/* Use STRING as FILE's next element. */ +void +add_locale_string (struct locale_file *file, const char *string) +{ + record_offset (file); + obstack_grow (&file->data, string, strlen (string) + 1); +} + +/* Likewise for wide strings. */ +void +add_locale_wstring (struct locale_file *file, const uint32_t *string) +{ + add_locale_uint32_array (file, string, wcslen ((const wchar_t *) string) + 1); +} + +/* Record that FILE's next element is the 32-bit integer VALUE. */ +void +add_locale_uint32 (struct locale_file *file, uint32_t value) +{ + align_locale_data (file, LOCFILE_ALIGN); + record_offset (file); + value = maybe_swap_uint32 (value); + obstack_grow (&file->data, &value, sizeof (value)); +} + +/* Record that FILE's next element is an array of N_ELEMS integers + starting at DATA. */ +void +add_locale_uint32_array (struct locale_file *file, + const uint32_t *data, size_t n_elems) +{ + align_locale_data (file, LOCFILE_ALIGN); + record_offset (file); + obstack_grow (&file->data, data, n_elems * sizeof (uint32_t)); + maybe_swap_uint32_obstack (&file->data, n_elems); +} + +/* Record that FILE's next element is the single byte given by VALUE. */ +void +add_locale_char (struct locale_file *file, char value) +{ + record_offset (file); + obstack_1grow (&file->data, value); +} + +/* Start building an element that contains several different pieces of data. + Subsequent calls to add_locale_* will add data to the same element up + till the next call to end_locale_structure. The element's alignment + is dictated by the first piece of data added to it. */ +void +start_locale_structure (struct locale_file *file) +{ + assert (file->structure_stage == 0); + file->structure_stage = 1; +} + +/* Finish a structure element that was started by start_locale_structure. + Empty structures are OK and behave like add_locale_empty. */ +void +end_locale_structure (struct locale_file *file) +{ + record_offset (file); + assert (file->structure_stage == 2); + file->structure_stage = 0; +} + +/* Start building data that goes before the next element's recorded offset. + Subsequent calls to add_locale_* will add data to the file without + treating any of it as the start of a new element. Calling + end_locale_prelude switches back to the usual behavior. */ +void +start_locale_prelude (struct locale_file *file) +{ + assert (file->structure_stage == 0); + file->structure_stage = 3; +} + +/* End a block started by start_locale_prelude. */ +void +end_locale_prelude (struct locale_file *file) +{ + assert (file->structure_stage == 3); + file->structure_stage = 0; +} + +/* Write a locale file, with contents given by FILE. */ +void +write_locale_data (const char *output_path, int catidx, const char *category, + struct locale_file *file) +{ + size_t cnt, step, maxiov; + int fd; + char *fname; + const char **other_paths; + uint32_t header[2]; + size_t n_elem; + struct iovec vec[3]; + + assert (file->n_elements == file->next_element); + header[0] = LIMAGIC (catidx); + header[1] = file->n_elements; + vec[0].iov_len = sizeof (header); + vec[0].iov_base = header; + vec[1].iov_len = sizeof (uint32_t) * file->n_elements; + vec[1].iov_base = file->offsets; + vec[2].iov_len = obstack_object_size (&file->data); + vec[2].iov_base = obstack_finish (&file->data); + maybe_swap_uint32_array (vec[0].iov_base, 2); + maybe_swap_uint32_array (vec[1].iov_base, file->n_elements); + n_elem = 3; + if (! no_archive) + { + /* The data will be added to the archive. For now we simply + generate the image which will be written. First determine + the size. */ + int cnt; + void *endp; + + to_archive[catidx].size = 0; + for (cnt = 0; cnt < n_elem; ++cnt) + to_archive[catidx].size += vec[cnt].iov_len; + + /* Allocate the memory for it. */ + to_archive[catidx].addr = xmalloc (to_archive[catidx].size); + + /* Fill it in. */ + for (cnt = 0, endp = to_archive[catidx].addr; cnt < n_elem; ++cnt) + endp = mempcpy (endp, vec[cnt].iov_base, vec[cnt].iov_len); + + /* Compute the MD5 sum for the data. */ + __md5_buffer (to_archive[catidx].addr, to_archive[catidx].size, + to_archive[catidx].sum); + + return; + } + + fname = xmalloc (strlen (output_path) + 2 * strlen (category) + 7); + + /* Normally we write to the directory pointed to by the OUTPUT_PATH. + But for LC_MESSAGES we have to take care for the translation + data. This means we need to have a directory LC_MESSAGES in + which we place the file under the name SYS_LC_MESSAGES. */ + sprintf (fname, "%s%s", output_path, category); + fd = -2; + if (strcmp (category, "LC_MESSAGES") == 0) + { + struct stat64 st; + + if (stat64 (fname, &st) < 0) + { + if (mkdir (fname, 0777) >= 0) + { + fd = -1; + errno = EISDIR; + } + } + else if (!S_ISREG (st.st_mode)) + { + fd = -1; + errno = EISDIR; + } + } + + /* Create the locale file with nlinks == 1; this avoids crashing processes + which currently use the locale and damaging files belonging to other + locales as well. */ + if (fd == -2) + { + unlink (fname); + fd = creat (fname, 0666); + } + + if (fd == -1) + { + int save_err = errno; + + if (errno == EISDIR) + { + sprintf (fname, "%1$s%2$s/SYS_%2$s", output_path, category); + unlink (fname); + fd = creat (fname, 0666); + if (fd == -1) + save_err = errno; + } + + if (fd == -1) + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, save_err, _("\ +cannot open output file `%s' for category `%s'"), fname, category)); + free (fname); + return; + } + } + +#ifdef UIO_MAXIOV + maxiov = UIO_MAXIOV; +#else + maxiov = sysconf (_SC_UIO_MAXIOV); +#endif + + /* Write the data using writev. But we must take care for the + limitation of the implementation. */ + for (cnt = 0; cnt < n_elem; cnt += step) + { + step = n_elem - cnt; + if (maxiov > 0) + step = MIN (maxiov, step); + + if (writev (fd, &vec[cnt], step) < 0) + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, errno, _("\ +failure while writing data for category `%s'"), category)); + break; + } + } + + close (fd); + + /* Compare the file with the locale data files for the same category in + other locales, and see if we can reuse it, to save disk space. */ + other_paths = siblings (output_path); + if (other_paths != NULL) + { + struct stat64 fname_stat; + + if (lstat64 (fname, &fname_stat) >= 0 + && S_ISREG (fname_stat.st_mode)) + { + const char *fname_tail = fname + strlen (output_path); + const char **other_p; + int seen_count; + ino_t *seen_inodes; + + seen_count = 0; + for (other_p = other_paths; *other_p; other_p++) + seen_count++; + seen_inodes = (ino_t *) xmalloc (seen_count * sizeof (ino_t)); + seen_count = 0; + + for (other_p = other_paths; *other_p; other_p++) + { + const char *other_path = *other_p; + size_t other_path_len = strlen (other_path); + char *other_fname; + struct stat64 other_fname_stat; + + other_fname = + (char *) xmalloc (other_path_len + strlen (fname_tail) + 1); + memcpy (other_fname, other_path, other_path_len); + strcpy (other_fname + other_path_len, fname_tail); + + if (lstat64 (other_fname, &other_fname_stat) >= 0 + && S_ISREG (other_fname_stat.st_mode) + /* Consider only files on the same device. + Otherwise hard linking won't work anyway. */ + && other_fname_stat.st_dev == fname_stat.st_dev + /* Consider only files with the same permissions. + Otherwise there are security risks. */ + && other_fname_stat.st_uid == fname_stat.st_uid + && other_fname_stat.st_gid == fname_stat.st_gid + && other_fname_stat.st_mode == fname_stat.st_mode + /* Don't compare fname with itself. */ + && other_fname_stat.st_ino != fname_stat.st_ino + /* Files must have the same size, otherwise they + cannot be the same. */ + && other_fname_stat.st_size == fname_stat.st_size) + { + /* Skip this file if we have already read it (under a + different name). */ + int i; + + for (i = seen_count - 1; i >= 0; i--) + if (seen_inodes[i] == other_fname_stat.st_ino) + break; + if (i < 0) + { + /* Now compare fname and other_fname for real. */ + blksize_t blocksize; + +#ifdef _STATBUF_ST_BLKSIZE + blocksize = MAX (fname_stat.st_blksize, + other_fname_stat.st_blksize); + if (blocksize > 8 * 1024) + blocksize = 8 * 1024; +#else + blocksize = 8 * 1024; +#endif + + if (compare_files (fname, other_fname, + fname_stat.st_size, blocksize) == 0) + { + /* Found! other_fname is identical to fname. */ + /* Link other_fname to fname. But use a temporary + file, in case hard links don't work on the + particular filesystem. */ + char * tmp_fname = + (char *) xmalloc (strlen (fname) + 4 + 1); + + strcpy (stpcpy (tmp_fname, fname), ".tmp"); + + if (link (other_fname, tmp_fname) >= 0) + { + unlink (fname); + if (rename (tmp_fname, fname) < 0) + { + if (!be_quiet) + WITH_CUR_LOCALE (error (0, errno, _("\ +cannot create output file `%s' for category `%s'"), fname, category)); + } + free (tmp_fname); + free (other_fname); + break; + } + free (tmp_fname); + } + + /* Don't compare with this file a second time. */ + seen_inodes[seen_count++] = other_fname_stat.st_ino; + } + } + free (other_fname); + } + free (seen_inodes); + } + } + + free (fname); +} + + +/* General handling of `copy'. */ +void +handle_copy (struct linereader *ldfile, const struct charmap_t *charmap, + const char *repertoire_name, struct localedef_t *result, + enum token_t token, int locale, const char *locale_name, + int ignore_content) +{ + struct token *now; + int warned = 0; + + now = lr_token (ldfile, charmap, result, NULL, verbose); + if (now->tok != tok_string) + lr_error (ldfile, _("expecting string argument for `copy'")); + else if (!ignore_content) + { + if (now->val.str.startmb == NULL) + lr_error (ldfile, _("\ +locale name should consist only of portable characters")); + else + { + (void) add_to_readlist (locale, now->val.str.startmb, + repertoire_name, 1, NULL); + result->copy_name[locale] = now->val.str.startmb; + } + } + + lr_ignore_rest (ldfile, now->tok == tok_string); + + /* The rest of the line must be empty and the next keyword must be + `END xxx'. */ + while ((now = lr_token (ldfile, charmap, result, NULL, verbose))->tok + != tok_end && now->tok != tok_eof) + { + if (warned == 0) + { + lr_error (ldfile, _("\ +no other keyword shall be specified when `copy' is used")); + warned = 1; + } + + lr_ignore_rest (ldfile, 0); + } + + if (now->tok != tok_eof) + { + /* Handle `END xxx'. */ + now = lr_token (ldfile, charmap, result, NULL, verbose); + + if (now->tok != token) + lr_error (ldfile, _("\ +`%1$s' definition does not end with `END %1$s'"), locale_name); + + lr_ignore_rest (ldfile, now->tok == token); + } + else + /* When we come here we reached the end of the file. */ + lr_error (ldfile, _("%s: premature end of file"), locale_name); +} diff --git a/REORG.TODO/locale/programs/locfile.h b/REORG.TODO/locale/programs/locfile.h new file mode 100644 index 0000000000..3407e13c13 --- /dev/null +++ b/REORG.TODO/locale/programs/locfile.h @@ -0,0 +1,279 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@gnu.org>, 1996. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifndef _LOCFILE_H +#define _LOCFILE_H 1 + +#include <byteswap.h> +#include <stdbool.h> +#include <stdint.h> +#include <sys/uio.h> + +#include "obstack.h" +#include "linereader.h" +#include "localedef.h" + +/* Structure for storing the contents of a category file. */ +struct locale_file +{ + size_t n_elements, next_element; + uint32_t *offsets; + struct obstack data; + int structure_stage; +}; + + +/* Macros used in the parser. */ +#define SYNTAX_ERROR(string, args...) \ + do \ + { \ + lr_error (ldfile, string, ## args); \ + lr_ignore_rest (ldfile, 0); \ + } \ + while (0) + + +/* General handling of `copy'. */ +extern void handle_copy (struct linereader *ldfile, + const struct charmap_t *charmap, + const char *repertoire_name, + struct localedef_t *result, enum token_t token, + int locale, const char *locale_name, + int ignore_content); + +/* Found in locfile.c. */ +extern int locfile_read (struct localedef_t *result, + const struct charmap_t *charmap); + +/* Check validity of all the locale data. */ +extern void check_all_categories (struct localedef_t *definitions, + const struct charmap_t *charmap); + +/* Write out all locale categories. */ +extern void write_all_categories (struct localedef_t *definitions, + const struct charmap_t *charmap, + const char *locname, + const char *output_path); + +extern bool swap_endianness_p; + +/* Change the output to be big-endian if BIG_ENDIAN is true and + little-endian otherwise. */ +static inline void +set_big_endian (bool big_endian) +{ + swap_endianness_p = (big_endian != (__BYTE_ORDER == __BIG_ENDIAN)); +} + +/* Munge VALUE so that, when stored, it has the correct byte order + for the output files. */ +static uint32_t +__attribute__ ((unused)) +maybe_swap_uint32 (uint32_t value) +{ + return swap_endianness_p ? bswap_32 (value) : value; +} + +/* Likewise, but munge an array of N uint32_ts starting at ARRAY. */ +static inline void +maybe_swap_uint32_array (uint32_t *array, size_t n) +{ + if (swap_endianness_p) + while (n-- > 0) + array[n] = bswap_32 (array[n]); +} + +/* Like maybe_swap_uint32_array, but the array of N elements is at + the end of OBSTACK's current object. */ +static inline void +maybe_swap_uint32_obstack (struct obstack *obstack, size_t n) +{ + maybe_swap_uint32_array ((uint32_t *) obstack_next_free (obstack) - n, n); +} + +/* Write out the data. */ +extern void init_locale_data (struct locale_file *file, size_t n_elements); +extern void align_locale_data (struct locale_file *file, size_t boundary); +extern void add_locale_empty (struct locale_file *file); +extern void add_locale_raw_data (struct locale_file *file, const void *data, + size_t size); +extern void add_locale_raw_obstack (struct locale_file *file, + struct obstack *obstack); +extern void add_locale_string (struct locale_file *file, const char *string); +extern void add_locale_wstring (struct locale_file *file, + const uint32_t *string); +extern void add_locale_uint32 (struct locale_file *file, uint32_t value); +extern void add_locale_uint32_array (struct locale_file *file, + const uint32_t *data, size_t n_elems); +extern void add_locale_char (struct locale_file *file, char value); +extern void start_locale_structure (struct locale_file *file); +extern void end_locale_structure (struct locale_file *file); +extern void start_locale_prelude (struct locale_file *file); +extern void end_locale_prelude (struct locale_file *file); +extern void write_locale_data (const char *output_path, int catidx, + const char *category, struct locale_file *file); + + +/* Entrypoints for the parsers of the individual categories. */ + +/* Handle LC_CTYPE category. */ +extern void ctype_read (struct linereader *ldfile, + struct localedef_t *result, + const struct charmap_t *charmap, + const char *repertoire_name, + int ignore_content); +extern void ctype_finish (struct localedef_t *locale, + const struct charmap_t *charmap); +extern void ctype_output (struct localedef_t *locale, + const struct charmap_t *charmap, + const char *output_path); +extern uint32_t *find_translit (struct localedef_t *locale, + const struct charmap_t *charmap, uint32_t wch); + +/* Handle LC_COLLATE category. */ +extern void collate_read (struct linereader *ldfile, + struct localedef_t *result, + const struct charmap_t *charmap, + const char *repertoire_name, + int ignore_content); +extern void collate_finish (struct localedef_t *locale, + const struct charmap_t *charmap); +extern void collate_output (struct localedef_t *locale, + const struct charmap_t *charmap, + const char *output_path); + +/* Handle LC_MONETARY category. */ +extern void monetary_read (struct linereader *ldfile, + struct localedef_t *result, + const struct charmap_t *charmap, + const char *repertoire_name, + int ignore_content); +extern void monetary_finish (struct localedef_t *locale, + const struct charmap_t *charmap); +extern void monetary_output (struct localedef_t *locale, + const struct charmap_t *charmap, + const char *output_path); + +/* Handle LC_NUMERIC category. */ +extern void numeric_read (struct linereader *ldfile, + struct localedef_t *result, + const struct charmap_t *charmap, + const char *repertoire_name, + int ignore_content); +extern void numeric_finish (struct localedef_t *locale, + const struct charmap_t *charmap); +extern void numeric_output (struct localedef_t *locale, + const struct charmap_t *charmap, + const char *output_path); + +/* Handle LC_MESSAGES category. */ +extern void messages_read (struct linereader *ldfile, + struct localedef_t *result, + const struct charmap_t *charmap, + const char *repertoire_name, + int ignore_content); +extern void messages_finish (struct localedef_t *locale, + const struct charmap_t *charmap); +extern void messages_output (struct localedef_t *locale, + const struct charmap_t *charmap, + const char *output_path); + +/* Handle LC_TIME category. */ +extern void time_read (struct linereader *ldfile, + struct localedef_t *result, + const struct charmap_t *charmap, + const char *repertoire_name, + int ignore_content); +extern void time_finish (struct localedef_t *locale, + const struct charmap_t *charmap); +extern void time_output (struct localedef_t *locale, + const struct charmap_t *charmap, + const char *output_path); + +/* Handle LC_PAPER category. */ +extern void paper_read (struct linereader *ldfile, + struct localedef_t *result, + const struct charmap_t *charmap, + const char *repertoire_name, + int ignore_content); +extern void paper_finish (struct localedef_t *locale, + const struct charmap_t *charmap); +extern void paper_output (struct localedef_t *locale, + const struct charmap_t *charmap, + const char *output_path); + +/* Handle LC_NAME category. */ +extern void name_read (struct linereader *ldfile, + struct localedef_t *result, + const struct charmap_t *charmap, + const char *repertoire_name, + int ignore_content); +extern void name_finish (struct localedef_t *locale, + const struct charmap_t *charmap); +extern void name_output (struct localedef_t *locale, + const struct charmap_t *charmap, + const char *output_path); + +/* Handle LC_ADDRESS category. */ +extern void address_read (struct linereader *ldfile, + struct localedef_t *result, + const struct charmap_t *charmap, + const char *repertoire_name, + int ignore_content); +extern void address_finish (struct localedef_t *locale, + const struct charmap_t *charmap); +extern void address_output (struct localedef_t *locale, + const struct charmap_t *charmap, + const char *output_path); + +/* Handle LC_TELEPHONE category. */ +extern void telephone_read (struct linereader *ldfile, + struct localedef_t *result, + const struct charmap_t *charmap, + const char *repertoire_name, + int ignore_content); +extern void telephone_finish (struct localedef_t *locale, + const struct charmap_t *charmap); +extern void telephone_output (struct localedef_t *locale, + const struct charmap_t *charmap, + const char *output_path); + +/* Handle LC_MEASUREMENT category. */ +extern void measurement_read (struct linereader *ldfile, + struct localedef_t *result, + const struct charmap_t *charmap, + const char *repertoire_name, + int ignore_content); +extern void measurement_finish (struct localedef_t *locale, + const struct charmap_t *charmap); +extern void measurement_output (struct localedef_t *locale, + const struct charmap_t *charmap, + const char *output_path); + +/* Handle LC_IDENTIFICATION category. */ +extern void identification_read (struct linereader *ldfile, + struct localedef_t *result, + const struct charmap_t *charmap, + const char *repertoire_name, + int ignore_content); +extern void identification_finish (struct localedef_t *locale, + const struct charmap_t *charmap); +extern void identification_output (struct localedef_t *locale, + const struct charmap_t *charmap, + const char *output_path); + +#endif /* locfile.h */ diff --git a/REORG.TODO/locale/programs/repertoire.c b/REORG.TODO/locale/programs/repertoire.c new file mode 100644 index 0000000000..61f2c055e7 --- /dev/null +++ b/REORG.TODO/locale/programs/repertoire.c @@ -0,0 +1,524 @@ +/* Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <errno.h> +#include <error.h> +#include <limits.h> +#include <obstack.h> +#include <search.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <stdint.h> + +#include "localedef.h" +#include "linereader.h" +#include "charmap.h" +#include "repertoire.h" +#include "simple-hash.h" + + +/* Simple keyword hashing for the repertoiremap. */ +static const struct keyword_t *repertoiremap_hash (const char *str, + unsigned int len); +static void repertoire_new_char (struct linereader *lr, hash_table *ht, + hash_table *rt, struct obstack *ob, + uint32_t value, const char *from, + const char *to, int decimal_ellipsis); +static int repertoire_compare (const void *p1, const void *p2); + +/* Already known repertoire maps. */ +static void *known; + +/* List of repertoire maps which are not available and which have been + reported to not be. */ +static void *unavailable; + + +struct repertoire_t * +repertoire_read (const char *filename) +{ + struct linereader *repfile; + struct repertoire_t *result; + struct repertoire_t **resultp; + struct repertoire_t search; + int state; + char *from_name = NULL; + char *to_name = NULL; + enum token_t ellipsis = tok_none; + + search.name = filename; + resultp = tfind (&search, &known, &repertoire_compare); + if (resultp != NULL) + return *resultp; + + /* Determine path. */ + repfile = lr_open (filename, repertoiremap_hash); + if (repfile == NULL) + { + if (strchr (filename, '/') == NULL) + { + char *i18npath = getenv ("I18NPATH"); + if (i18npath != NULL && *i18npath != '\0') + { + const size_t pathlen = strlen (i18npath); + char i18npathbuf[pathlen + 1]; + char path[strlen (filename) + 1 + pathlen + + sizeof ("/repertoiremaps/") - 1]; + char *next; + i18npath = memcpy (i18npathbuf, i18npath, pathlen + 1); + + while (repfile == NULL + && (next = strsep (&i18npath, ":")) != NULL) + { + stpcpy (stpcpy (stpcpy (path, next), "/repertoiremaps/"), + filename); + + repfile = lr_open (path, repertoiremap_hash); + + if (repfile == NULL) + { + stpcpy (stpcpy (stpcpy (path, next), "/"), filename); + + repfile = lr_open (path, repertoiremap_hash); + } + } + } + + if (repfile == NULL) + { + /* Look in the systems charmap directory. */ + char *buf = xmalloc (strlen (filename) + 1 + + sizeof (REPERTOIREMAP_PATH)); + + stpcpy (stpcpy (stpcpy (buf, REPERTOIREMAP_PATH), "/"), + filename); + repfile = lr_open (buf, repertoiremap_hash); + + free (buf); + } + } + + if (repfile == NULL) + return NULL; + } + + /* We don't want symbolic names in string to be translated. */ + repfile->translate_strings = 0; + + /* Allocate room for result. */ + result = (struct repertoire_t *) xmalloc (sizeof (struct repertoire_t)); + memset (result, '\0', sizeof (struct repertoire_t)); + + result->name = xstrdup (filename); + +#define obstack_chunk_alloc malloc +#define obstack_chunk_free free + obstack_init (&result->mem_pool); + + if (init_hash (&result->char_table, 256) + || init_hash (&result->reverse_table, 256) + || init_hash (&result->seq_table, 256)) + { + free (result); + return NULL; + } + + /* We use a state machine to describe the charmap description file + format. */ + state = 1; + while (1) + { + /* What's on? */ + struct token *now = lr_token (repfile, NULL, NULL, NULL, verbose); + enum token_t nowtok = now->tok; + struct token *arg; + + if (nowtok == tok_eof) + break; + + switch (state) + { + case 1: + /* We haven't yet read any character definition. This is where + we accept escape_char and comment_char definitions. */ + if (nowtok == tok_eol) + /* Ignore empty lines. */ + continue; + + if (nowtok == tok_escape_char || nowtok == tok_comment_char) + { + /* We know that we need an argument. */ + arg = lr_token (repfile, NULL, NULL, NULL, verbose); + + if (arg->tok != tok_ident) + { + lr_error (repfile, _("syntax error in prolog: %s"), + _("bad argument")); + + lr_ignore_rest (repfile, 0); + continue; + } + + if (arg->val.str.lenmb != 1) + { + lr_error (repfile, _("\ +argument to <%s> must be a single character"), + nowtok == tok_escape_char ? "escape_char" + : "comment_char"); + + lr_ignore_rest (repfile, 0); + continue; + } + + if (nowtok == tok_escape_char) + repfile->escape_char = *arg->val.str.startmb; + else + repfile->comment_char = *arg->val.str.startmb; + + lr_ignore_rest (repfile, 1); + continue; + } + + if (nowtok == tok_charids) + { + lr_ignore_rest (repfile, 1); + + state = 2; + continue; + } + + /* Otherwise we start reading the character definitions. */ + state = 2; + /* FALLTHROUGH */ + + case 2: + /* We are now are in the body. Each line + must have the format "%s %s %s\n" or "%s...%s %s %s\n". */ + if (nowtok == tok_eol) + /* Ignore empty lines. */ + continue; + + if (nowtok == tok_end) + { + state = 90; + continue; + } + + if (nowtok != tok_bsymbol) + { + lr_error (repfile, + _("syntax error in repertoire map definition: %s"), + _("no symbolic name given")); + + lr_ignore_rest (repfile, 0); + continue; + } + + /* If the previous line was not completely correct free the + used memory. */ + if (from_name != NULL) + obstack_free (&result->mem_pool, from_name); + + from_name = (char *) obstack_copy0 (&result->mem_pool, + now->val.str.startmb, + now->val.str.lenmb); + to_name = NULL; + + state = 3; + continue; + + case 3: + /* We have two possibilities: We can see an ellipsis or an + encoding value. */ + if (nowtok == tok_ellipsis3 || nowtok == tok_ellipsis4 + || nowtok == tok_ellipsis2) + { + ellipsis = nowtok; + state = 4; + continue; + } + /* FALLTHROUGH */ + + case 5: + /* We expect a value of the form <Uxxxx> or <Uxxxxxxxx> where + the xxx mean a hexadecimal value. */ + state = 2; + + errno = 0; + if (nowtok != tok_ucs4) + { + lr_error (repfile, + _("syntax error in repertoire map definition: %s"), + _("no <Uxxxx> or <Uxxxxxxxx> value given")); + + lr_ignore_rest (repfile, 0); + continue; + } + + /* We've found a new valid definition. */ + repertoire_new_char (repfile, &result->char_table, + &result->reverse_table, &result->mem_pool, + now->val.ucs4, from_name, to_name, + ellipsis != tok_ellipsis2); + + /* Ignore the rest of the line. */ + lr_ignore_rest (repfile, 0); + + from_name = NULL; + to_name = NULL; + + continue; + + case 4: + if (nowtok != tok_bsymbol) + { + lr_error (repfile, + _("syntax error in repertoire map definition: %s"), + _("no symbolic name given for end of range")); + + lr_ignore_rest (repfile, 0); + state = 2; + continue; + } + + /* Copy the to-name in a safe place. */ + to_name = (char *) obstack_copy0 (&result->mem_pool, + repfile->token.val.str.startmb, + repfile->token.val.str.lenmb); + + state = 5; + continue; + + case 90: + if (nowtok != tok_charids) + lr_error (repfile, _("\ +%1$s: definition does not end with `END %1$s'"), "CHARIDS"); + + lr_ignore_rest (repfile, nowtok == tok_charids); + break; + } + + break; + } + + if (state != 2 && state != 90 && !be_quiet) + WITH_CUR_LOCALE (error (0, 0, _("%s: premature end of file"), + repfile->fname)); + + lr_close (repfile); + + if (tsearch (result, &known, &repertoire_compare) == NULL) + /* Something went wrong. */ + WITH_CUR_LOCALE (error (0, errno, _("cannot save new repertoire map"))); + + return result; +} + + +void +repertoire_complain (const char *name) +{ + if (tfind (name, &unavailable, (__compar_fn_t) strcmp) == NULL) + { + WITH_CUR_LOCALE (error (0, errno, _("\ +repertoire map file `%s' not found"), name)); + + /* Remember that we reported this map. */ + tsearch (name, &unavailable, (__compar_fn_t) strcmp); + } +} + + +static int +repertoire_compare (const void *p1, const void *p2) +{ + struct repertoire_t *r1 = (struct repertoire_t *) p1; + struct repertoire_t *r2 = (struct repertoire_t *) p2; + + return strcmp (r1->name, r2->name); +} + + +static const struct keyword_t * +repertoiremap_hash (const char *str, unsigned int len) +{ + static const struct keyword_t wordlist[] = + { + {"escape_char", tok_escape_char, 0}, + {"comment_char", tok_comment_char, 0}, + {"CHARIDS", tok_charids, 0}, + {"END", tok_end, 0}, + }; + + if (len == 11 && memcmp (wordlist[0].name, str, 11) == 0) + return &wordlist[0]; + if (len == 12 && memcmp (wordlist[1].name, str, 12) == 0) + return &wordlist[1]; + if (len == 7 && memcmp (wordlist[2].name, str, 7) == 0) + return &wordlist[2]; + if (len == 3 && memcmp (wordlist[3].name, str, 3) == 0) + return &wordlist[3]; + + return NULL; +} + + +static void +repertoire_new_char (struct linereader *lr, hash_table *ht, hash_table *rt, + struct obstack *ob, uint32_t value, const char *from, + const char *to, int decimal_ellipsis) +{ + char *from_end; + char *to_end; + const char *cp; + char *buf = NULL; + int prefix_len, len1, len2; + unsigned long int from_nr, to_nr, cnt; + + if (to == NULL) + { + insert_entry (ht, from, strlen (from), + (void *) (unsigned long int) value); + /* Please note that it isn't a bug if a symbol is defined more + than once. All later definitions are simply discarded. */ + + insert_entry (rt, obstack_copy (ob, &value, sizeof (value)), + sizeof (value), (void *) from); + + return; + } + + /* We have a range: the names must have names with equal prefixes + and an equal number of digits, where the second number is greater + or equal than the first. */ + len1 = strlen (from); + len2 = strlen (to); + + if (len1 != len2) + { + invalid_range: + lr_error (lr, _("invalid names for character range")); + return; + } + + cp = &from[len1 - 1]; + if (decimal_ellipsis) + while (isdigit (*cp) && cp >= from) + --cp; + else + while (isxdigit (*cp) && cp >= from) + { + if (!isdigit (*cp) && !isupper (*cp)) + lr_error (lr, _("\ +hexadecimal range format should use only capital characters")); + --cp; + } + + prefix_len = (cp - from) + 1; + + if (cp == &from[len1 - 1] || strncmp (from, to, prefix_len) != 0) + goto invalid_range; + + errno = 0; + from_nr = strtoul (&from[prefix_len], &from_end, decimal_ellipsis ? 10 : 16); + if (*from_end != '\0' || (from_nr == ULONG_MAX && errno == ERANGE) + || ((to_nr = strtoul (&to[prefix_len], &to_end, + decimal_ellipsis ? 10 : 16)) == ULONG_MAX + && errno == ERANGE) + || *to_end != '\0') + { + lr_error (lr, _("<%s> and <%s> are invalid names for range"), + from, to); + return; + } + + if (from_nr > to_nr) + { + lr_error (lr, _("upper limit in range is smaller than lower limit")); + return; + } + + for (cnt = from_nr; cnt <= to_nr; ++cnt) + { + uint32_t this_value = value + (cnt - from_nr); + + obstack_printf (ob, decimal_ellipsis ? "%.*s%0*ld" : "%.*s%0*lX", + prefix_len, from, len1 - prefix_len, cnt); + obstack_1grow (ob, '\0'); + + insert_entry (ht, buf, len1, + (void *) (unsigned long int) this_value); + /* Please note we don't examine the return value since it is no error + if we have two definitions for a symbol. */ + + insert_entry (rt, obstack_copy (ob, &this_value, sizeof (this_value)), + sizeof (this_value), (void *) from); + } +} + + +uint32_t +repertoire_find_value (const struct repertoire_t *rep, const char *name, + size_t len) +{ + void *result; + + if (rep == NULL) + return ILLEGAL_CHAR_VALUE; + + if (find_entry ((hash_table *) &rep->char_table, name, len, &result) < 0) + return ILLEGAL_CHAR_VALUE; + + return (uint32_t) ((unsigned long int) result); +} + + +const char * +repertoire_find_symbol (const struct repertoire_t *rep, uint32_t ucs) +{ + void *result; + + if (rep == NULL) + return NULL; + + if (find_entry ((hash_table *) &rep->reverse_table, &ucs, sizeof (ucs), + &result) < 0) + return NULL; + + return (const char *) result; +} + + +struct charseq * +repertoire_find_seq (const struct repertoire_t *rep, uint32_t ucs) +{ + void *result; + + if (rep == NULL) + return NULL; + + if (find_entry ((hash_table *) &rep->seq_table, &ucs, sizeof (ucs), + &result) < 0) + return NULL; + + return (struct charseq *) result; +} diff --git a/REORG.TODO/locale/programs/repertoire.h b/REORG.TODO/locale/programs/repertoire.h new file mode 100644 index 0000000000..f07ffcf1f4 --- /dev/null +++ b/REORG.TODO/locale/programs/repertoire.h @@ -0,0 +1,64 @@ +/* Copyright (C) 1998-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifndef _REPERTOIREMAP_H +#define _REPERTOIREMAP_H 1 + +#include <obstack.h> +#include <stdint.h> + +#include "charmap.h" +#include "simple-hash.h" + +struct repertoire_t +{ + const char *name; + struct obstack mem_pool; + hash_table char_table; + hash_table reverse_table; + hash_table seq_table; +}; + + +/* We need one value to mark the error case. Let's use 0xffffffff. + I.e., it is placed in the last page of ISO 10646. For now only the + first is used and we have plenty of room. */ +#define ILLEGAL_CHAR_VALUE ((uint32_t) 0xffffffffu) + +/* Another value is needed to signal that a value is not yet determined. */ +#define UNINITIALIZED_CHAR_VALUE ((uint32_t) 0xfffffffeu) + + +/* Prototypes for repertoire map handling functions. */ +extern struct repertoire_t *repertoire_read (const char *filename); + +/* Report missing repertoire map. */ +extern void repertoire_complain (const char *name); + +/* Return UCS4 value of character with given NAME. */ +extern uint32_t repertoire_find_value (const struct repertoire_t *repertoire, + const char *name, size_t len); + +/* Return symbol for given UCS4 value. */ +extern const char *repertoire_find_symbol (const struct repertoire_t *repertoire, + uint32_t ucs); + +/* Query the has table to memoize mapping from UCS4 to byte sequences. */ +extern struct charseq *repertoire_find_seq (const struct repertoire_t *rep, + uint32_t ucs); + +#endif /* repertoiremap.h */ diff --git a/REORG.TODO/locale/programs/simple-hash.c b/REORG.TODO/locale/programs/simple-hash.c new file mode 100644 index 0000000000..5e62e249a6 --- /dev/null +++ b/REORG.TODO/locale/programs/simple-hash.c @@ -0,0 +1,291 @@ +/* Implement simple hashing table with string based keys. + Copyright (C) 1994-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, October 1994. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <inttypes.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <sys/types.h> + +#include <obstack.h> + +#ifdef HAVE_VALUES_H +# include <values.h> +#endif + +#include "simple-hash.h" + +#define obstack_chunk_alloc malloc +#define obstack_chunk_free free + +#ifndef BITSPERBYTE +# define BITSPERBYTE 8 +#endif + +#define hashval_t uint32_t +#include "hashval.h" + +#include <programs/xmalloc.h> + +typedef struct hash_entry +{ + unsigned long used; + const void *key; + size_t keylen; + void *data; + struct hash_entry *next; +} +hash_entry; + +/* Prototypes for local functions. */ +static void insert_entry_2 (hash_table *htab, const void *key, size_t keylen, + unsigned long hval, size_t idx, void *data); +static size_t lookup (const hash_table *htab, const void *key, size_t keylen, + unsigned long int hval); +static int is_prime (unsigned long int candidate); + + +int +init_hash (hash_table *htab, unsigned long int init_size) +{ + /* We need the size to be a prime. */ + init_size = next_prime (init_size); + + /* Initialize the data structure. */ + htab->size = init_size; + htab->filled = 0; + htab->first = NULL; + htab->table = (void *) xcalloc (init_size + 1, sizeof (hash_entry)); + if (htab->table == NULL) + return -1; + + obstack_init (&htab->mem_pool); + + return 0; +} + + +int +delete_hash (hash_table *htab) +{ + free (htab->table); + obstack_free (&htab->mem_pool, NULL); + return 0; +} + + +int +insert_entry (hash_table *htab, const void *key, size_t keylen, void *data) +{ + unsigned long int hval = compute_hashval (key, keylen); + hash_entry *table = (hash_entry *) htab->table; + size_t idx = lookup (htab, key, keylen, hval); + + if (table[idx].used) + /* We don't want to overwrite the old value. */ + return -1; + else + { + /* An empty bucket has been found. */ + insert_entry_2 (htab, obstack_copy (&htab->mem_pool, key, keylen), + keylen, hval, idx, data); + return 0; + } +} + +static void +insert_entry_2 (hash_table *htab, const void *key, size_t keylen, + unsigned long int hval, size_t idx, void *data) +{ + hash_entry *table = (hash_entry *) htab->table; + + table[idx].used = hval; + table[idx].key = key; + table[idx].keylen = keylen; + table[idx].data = data; + + /* List the new value in the list. */ + if ((hash_entry *) htab->first == NULL) + { + table[idx].next = &table[idx]; + htab->first = &table[idx]; + } + else + { + table[idx].next = ((hash_entry *) htab->first)->next; + ((hash_entry *) htab->first)->next = &table[idx]; + htab->first = &table[idx]; + } + + ++htab->filled; + if (100 * htab->filled > 75 * htab->size) + { + /* Table is filled more than 75%. Resize the table. + Experiments have shown that for best performance, this threshold + must lie between 40% and 85%. */ + unsigned long int old_size = htab->size; + + htab->size = next_prime (htab->size * 2); + htab->filled = 0; + htab->first = NULL; + htab->table = (void *) xcalloc (1 + htab->size, sizeof (hash_entry)); + + for (idx = 1; idx <= old_size; ++idx) + if (table[idx].used) + insert_entry_2 (htab, table[idx].key, table[idx].keylen, + table[idx].used, + lookup (htab, table[idx].key, table[idx].keylen, + table[idx].used), + table[idx].data); + + free (table); + } +} + + +int +find_entry (const hash_table *htab, const void *key, size_t keylen, + void **result) +{ + hash_entry *table = (hash_entry *) htab->table; + size_t idx = lookup (htab, key, keylen, compute_hashval (key, keylen)); + + if (table[idx].used == 0) + return -1; + + *result = table[idx].data; + return 0; +} + + +int +set_entry (hash_table *htab, const void *key, size_t keylen, void *newval) +{ + hash_entry *table = (hash_entry *) htab->table; + size_t idx = lookup (htab, key, keylen, compute_hashval (key, keylen)); + + if (table[idx].used == 0) + return -1; + + table[idx].data = newval; + return 0; +} + + +int +iterate_table (const hash_table *htab, void **ptr, const void **key, + size_t *keylen, void **data) +{ + if (*ptr == NULL) + { + if (htab->first == NULL) + return -1; + *ptr = (void *) ((hash_entry *) htab->first)->next; + } + else + { + if (*ptr == htab->first) + return -1; + *ptr = (void *) (((hash_entry *) *ptr)->next); + } + + *key = ((hash_entry *) *ptr)->key; + *keylen = ((hash_entry *) *ptr)->keylen; + *data = ((hash_entry *) *ptr)->data; + return 0; +} + + +/* References: + [Aho,Sethi,Ullman] Compilers: Principles, Techniques and Tools, 1986 + [Knuth] The Art of Computer Programming, part3 (6.4) */ + +static size_t +lookup (const hash_table *htab, const void *key, size_t keylen, + unsigned long int hval) +{ + unsigned long int hash; + size_t idx; + hash_entry *table = (hash_entry *) htab->table; + + /* First hash function: simply take the modul but prevent zero. */ + hash = 1 + hval % htab->size; + + idx = hash; + + if (table[idx].used) + { + if (table[idx].used == hval && table[idx].keylen == keylen + && memcmp (table[idx].key, key, keylen) == 0) + return idx; + + /* Second hash function as suggested in [Knuth]. */ + hash = 1 + hval % (htab->size - 2); + + do + { + if (idx <= hash) + idx = htab->size + idx - hash; + else + idx -= hash; + + /* If entry is found use it. */ + if (table[idx].used == hval && table[idx].keylen == keylen + && memcmp (table[idx].key, key, keylen) == 0) + return idx; + } + while (table[idx].used); + } + return idx; +} + + +unsigned long int +next_prime (unsigned long int seed) +{ + /* Make it definitely odd. */ + seed |= 1; + + while (!is_prime (seed)) + seed += 2; + + return seed; +} + + +static int +is_prime (unsigned long int candidate) +{ + /* No even number and none less than 10 will be passed here. */ + unsigned long int divn = 3; + unsigned long int sq = divn * divn; + + while (sq < candidate && candidate % divn != 0) + { + ++divn; + sq += 4 * divn; + ++divn; + } + + return candidate % divn != 0; +} diff --git a/REORG.TODO/locale/programs/simple-hash.h b/REORG.TODO/locale/programs/simple-hash.h new file mode 100644 index 0000000000..92ce9508e9 --- /dev/null +++ b/REORG.TODO/locale/programs/simple-hash.h @@ -0,0 +1,53 @@ +/* Copyright (C) 1995-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@gnu.org>, 1995. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifndef _SIMPLE_HASH_H +#define _SIMPLE_HASH_H + +#include <inttypes.h> +#include <obstack.h> +#include <stdint.h> + +typedef struct hash_table +{ + unsigned long int size; + unsigned long int filled; + void *first; + void *table; + struct obstack mem_pool; +} +hash_table; + + +extern int init_hash (hash_table *htab, unsigned long int init_size) __THROW; +extern int delete_hash (hash_table *htab) __THROW; +extern int insert_entry (hash_table *htab, const void *key, size_t keylen, + void *data) __THROW; +extern int find_entry (const hash_table *htab, const void *key, size_t keylen, + void **result) __THROW; +extern int set_entry (hash_table *htab, const void *key, size_t keylen, + void *newval) __THROW; + +extern int iterate_table (const hash_table *htab, void **ptr, + const void **key, size_t *keylen, void **data) + __THROW; + +extern uint32_t compute_hashval (const void *key, size_t keylen) + __THROW; +extern unsigned long int next_prime (unsigned long int seed) __THROW; + +#endif /* simple-hash.h */ diff --git a/REORG.TODO/locale/programs/xmalloc.c b/REORG.TODO/locale/programs/xmalloc.c new file mode 100644 index 0000000000..92468b8c7f --- /dev/null +++ b/REORG.TODO/locale/programs/xmalloc.c @@ -0,0 +1,106 @@ +/* xmalloc.c -- malloc with out of memory checking + Copyright (C) 1990-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#define VOID void + +#include <sys/types.h> + +#if STDC_HEADERS || _LIBC +#include <stdlib.h> +static VOID *fixup_null_alloc (size_t n) __THROW; +VOID *xmalloc (size_t n) __THROW; +VOID *xcalloc (size_t n, size_t s) __THROW; +VOID *xrealloc (VOID *p, size_t n) __THROW; +#else +VOID *calloc (); +VOID *malloc (); +VOID *realloc (); +void free (); +#endif + +#include <libintl.h> +#include "error.h" + +#ifndef _ +# define _(str) gettext (str) +#endif + +#ifndef EXIT_FAILURE +#define EXIT_FAILURE 4 +#endif + +/* Exit value when the requested amount of memory is not available. + The caller may set it to some other value. */ +int xmalloc_exit_failure = EXIT_FAILURE; + +static VOID * +fixup_null_alloc (size_t n) +{ + VOID *p; + + p = 0; + if (n == 0) + p = malloc ((size_t) 1); + if (p == 0) + error (xmalloc_exit_failure, 0, _("memory exhausted")); + return p; +} + +/* Allocate N bytes of memory dynamically, with error checking. */ + +VOID * +xmalloc (size_t n) +{ + VOID *p; + + p = malloc (n); + if (p == 0) + p = fixup_null_alloc (n); + return p; +} + +/* Allocate memory for N elements of S bytes, with error checking. */ + +VOID * +xcalloc (size_t n, size_t s) +{ + VOID *p; + + p = calloc (n, s); + if (p == 0) + p = fixup_null_alloc (n); + return p; +} + +/* Change the size of an allocated block of memory P to N bytes, + with error checking. + If P is NULL, run xmalloc. */ + +VOID * +xrealloc (VOID *p, size_t n) +{ + if (p == 0) + return xmalloc (n); + p = realloc (p, n); + if (p == 0) + p = fixup_null_alloc (n); + return p; +} diff --git a/REORG.TODO/locale/programs/xstrdup.c b/REORG.TODO/locale/programs/xstrdup.c new file mode 100644 index 0000000000..dcd89b160f --- /dev/null +++ b/REORG.TODO/locale/programs/xstrdup.c @@ -0,0 +1,36 @@ +/* xstrdup.c -- copy a string with out of memory checking + Copyright (C) 1990-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#if defined STDC_HEADERS || defined HAVE_STRING_H || _LIBC +# include <string.h> +#else +# include <strings.h> +#endif +void *xmalloc (size_t n) __THROW; +char *xstrdup (char *string) __THROW; + +/* Return a newly allocated copy of STRING. */ + +char * +xstrdup (char *string) +{ + return strcpy (xmalloc (strlen (string) + 1), string); +} |