diff options
Diffstat (limited to 'REORG.TODO/catgets')
-rw-r--r-- | REORG.TODO/catgets/Depend | 1 | ||||
-rw-r--r-- | REORG.TODO/catgets/Makefile | 96 | ||||
-rw-r--r-- | REORG.TODO/catgets/Versions | 10 | ||||
-rw-r--r-- | REORG.TODO/catgets/catgets.c | 149 | ||||
-rw-r--r-- | REORG.TODO/catgets/catgetsinfo.h | 56 | ||||
-rw-r--r-- | REORG.TODO/catgets/config.h | 14 | ||||
-rw-r--r-- | REORG.TODO/catgets/gencat.c | 1357 | ||||
-rw-r--r-- | REORG.TODO/catgets/nl_types.h | 54 | ||||
-rw-r--r-- | REORG.TODO/catgets/open_catalog.c | 336 | ||||
-rw-r--r-- | REORG.TODO/catgets/sample.SJIS | 14 | ||||
-rw-r--r-- | REORG.TODO/catgets/test-gencat.c | 34 | ||||
-rwxr-xr-x | REORG.TODO/catgets/test-gencat.sh | 53 | ||||
-rw-r--r-- | REORG.TODO/catgets/test1.msg | 5 | ||||
-rw-r--r-- | REORG.TODO/catgets/test2.msg | 8 | ||||
-rw-r--r-- | REORG.TODO/catgets/tst-catgets.c | 100 | ||||
-rw-r--r-- | REORG.TODO/catgets/xopen-msg.awk | 72 |
16 files changed, 2359 insertions, 0 deletions
diff --git a/REORG.TODO/catgets/Depend b/REORG.TODO/catgets/Depend new file mode 100644 index 0000000000..88c6f3aa95 --- /dev/null +++ b/REORG.TODO/catgets/Depend @@ -0,0 +1 @@ +intl diff --git a/REORG.TODO/catgets/Makefile b/REORG.TODO/catgets/Makefile new file mode 100644 index 0000000000..e5f49491fb --- /dev/null +++ b/REORG.TODO/catgets/Makefile @@ -0,0 +1,96 @@ +# Copyright (C) 1996-2017 Free Software Foundation, Inc. +# This file is part of the GNU C Library. + +# The GNU C Library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. + +# The GNU C Library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with the GNU C Library; if not, see +# <http://www.gnu.org/licenses/>. + +# +# Makefile for catgets. +# +subdir := catgets + +include ../Makeconfig + +headers = nl_types.h +routines = catgets open_catalog +others = gencat +install-bin = gencat +extra-objs = $(gencat-modules:=.o) + +tests = tst-catgets +test-srcs = test-gencat + +ifeq ($(run-built-tests),yes) +tests-special += $(objpfx)de/libc.cat $(objpfx)test1.cat $(objpfx)test2.cat \ + $(objpfx)sample.SJIS.cat $(objpfx)test-gencat.out +tests-special += $(objpfx)tst-catgets-mem.out +endif + +gencat-modules = xmalloc + +# To find xmalloc.c +vpath %.c ../locale/programs + + +include ../Rules + +$(objpfx)gencat: $(gencat-modules:%=$(objpfx)%.o) + +catgets-CPPFLAGS := -DNLSPATH='"$(localedir)/%L/%N:$(localedir)/%L/LC_MESSAGES/%N:$(localedir)/%l/%N:$(localedir)/%l/LC_MESSAGES/%N:"' + +generated += de.msg test1.cat test1.h test2.cat test2.h sample.SJIS.cat \ + test-gencat.h +generated += tst-catgets.mtrace tst-catgets-mem.out + +generated-dirs += de + +tst-catgets-ENV = NLSPATH="$(objpfx)%l/%N.cat" LANG=de MALLOC_TRACE=$(objpfx)tst-catgets.mtrace + +ifeq ($(run-built-tests),yes) +# This test just checks whether the program produces any error or not. +# The result is not tested. +$(objpfx)test1.cat: test1.msg $(objpfx)gencat + $(built-program-cmd-before-env) \ + $(run-program-env) LC_ALL=hr_HR.ISO-8859-2 \ + $(built-program-cmd-after-env) -H $(objpfx)test1.h $@ $<; \ + $(evaluate-test) +$(objpfx)test2.cat: test2.msg $(objpfx)gencat + $(built-program-cmd) -H $(objpfx)test2.h $@ $<; \ + $(evaluate-test) +$(objpfx)de/libc.cat: $(objpfx)de.msg $(objpfx)gencat + $(make-target-directory) + $(built-program-cmd-before-env) \ + $(run-program-env) LC_ALL=de_DE.ISO-8859-1 \ + $(built-program-cmd-after-env) $@ $<; \ + $(evaluate-test) +$(objpfx)tst-catgets.out: $(objpfx)de/libc.cat + +# Generate a non-simple input file. +$(objpfx)de.msg: xopen-msg.awk $(..)po/de.po + LC_ALL=C $(AWK) -f $^ $< > $@ + +$(objpfx)test-gencat.out: test-gencat.sh $(objpfx)test-gencat \ + $(objpfx)sample.SJIS.cat + $(SHELL) $< $(common-objpfx) '$(test-program-cmd-before-env)' \ + '$(run-program-env)' '$(test-program-cmd-after-env)'; \ + $(evaluate-test) + +$(objpfx)sample.SJIS.cat: sample.SJIS $(objpfx)gencat + $(built-program-cmd) -H $(objpfx)test-gencat.h < $(word 1,$^) > $@; \ + $(evaluate-test) + +$(objpfx)tst-catgets-mem.out: $(objpfx)tst-catgets.out + $(common-objpfx)malloc/mtrace $(objpfx)tst-catgets.mtrace > $@; \ + $(evaluate-test) +endif diff --git a/REORG.TODO/catgets/Versions b/REORG.TODO/catgets/Versions new file mode 100644 index 0000000000..6434f2bd03 --- /dev/null +++ b/REORG.TODO/catgets/Versions @@ -0,0 +1,10 @@ +libc { + GLIBC_2.0 { + # c* + catclose; catgets; catopen; + } + GLIBC_PRIVATE { + # functions with required interface outside normal name space + __open_catalog; + } +} diff --git a/REORG.TODO/catgets/catgets.c b/REORG.TODO/catgets/catgets.c new file mode 100644 index 0000000000..886ed1acce --- /dev/null +++ b/REORG.TODO/catgets/catgets.c @@ -0,0 +1,149 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper, <drepper@gnu.org>. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <errno.h> +#include <locale.h> +#include <nl_types.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/mman.h> + +#include "catgetsinfo.h" + + +/* Open the catalog and return a descriptor for the catalog. */ +nl_catd +catopen (const char *cat_name, int flag) +{ + __nl_catd result; + const char *env_var = NULL; + const char *nlspath = NULL; + char *tmp = NULL; + + if (strchr (cat_name, '/') == NULL) + { + if (flag == NL_CAT_LOCALE) + /* Use the current locale setting for LC_MESSAGES. */ + env_var = setlocale (LC_MESSAGES, NULL); + else + /* Use the LANG environment variable. */ + env_var = getenv ("LANG"); + + if (env_var == NULL || *env_var == '\0' + || (__libc_enable_secure && strchr (env_var, '/') != NULL)) + env_var = "C"; + + nlspath = getenv ("NLSPATH"); + if (nlspath != NULL && *nlspath != '\0') + { + /* Append the system dependent directory. */ + size_t len = strlen (nlspath) + 1 + sizeof NLSPATH; + tmp = malloc (len); + + if (__glibc_unlikely (tmp == NULL)) + return (nl_catd) -1; + + __stpcpy (__stpcpy (__stpcpy (tmp, nlspath), ":"), NLSPATH); + nlspath = tmp; + } + else + nlspath = NLSPATH; + } + + result = (__nl_catd) malloc (sizeof (*result)); + if (result == NULL) + { + /* We cannot get enough memory. */ + result = (nl_catd) -1; + } + else if (__open_catalog (cat_name, nlspath, env_var, result) != 0) + { + /* Couldn't open the file. */ + free ((void *) result); + result = (nl_catd) -1; + } + + free (tmp); + return (nl_catd) result; +} + + +/* Return message from message catalog. */ +char * +catgets (nl_catd catalog_desc, int set, int message, const char *string) +{ + __nl_catd catalog; + size_t idx; + size_t cnt; + + /* Be generous if catalog which failed to be open is used. */ + if (catalog_desc == (nl_catd) -1 || ++set <= 0 || message < 0) + return (char *) string; + + catalog = (__nl_catd) catalog_desc; + + idx = ((set * message) % catalog->plane_size) * 3; + cnt = 0; + do + { + if (catalog->name_ptr[idx + 0] == (u_int32_t) set + && catalog->name_ptr[idx + 1] == (u_int32_t) message) + return (char *) &catalog->strings[catalog->name_ptr[idx + 2]]; + + idx += catalog->plane_size * 3; + } + while (++cnt < catalog->plane_depth); + + __set_errno (ENOMSG); + return (char *) string; +} + + +/* Return resources used for loaded message catalog. */ +int +catclose (nl_catd catalog_desc) +{ + __nl_catd catalog; + + /* Be generous if catalog which failed to be open is used. */ + if (catalog_desc == (nl_catd) -1) + { + __set_errno (EBADF); + return -1; + } + + catalog = (__nl_catd) catalog_desc; + +#ifdef _POSIX_MAPPED_FILES + if (catalog->status == mmapped) + __munmap ((void *) catalog->file_ptr, catalog->file_size); + else +#endif /* _POSIX_MAPPED_FILES */ + if (catalog->status == malloced) + free ((void *) catalog->file_ptr); + else + { + __set_errno (EBADF); + return -1; + } + + free ((void *) catalog); + + return 0; +} diff --git a/REORG.TODO/catgets/catgetsinfo.h b/REORG.TODO/catgets/catgetsinfo.h new file mode 100644 index 0000000000..e32d1933fb --- /dev/null +++ b/REORG.TODO/catgets/catgetsinfo.h @@ -0,0 +1,56 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sys/types.h> + + +struct catalog_obj +{ + u_int32_t magic; + u_int32_t plane_size; + u_int32_t plane_depth; + /* This is in fact two arrays in one: always a pair of name and + pointer into the data area. */ + u_int32_t name_ptr[0]; +}; + + +/* This structure will be filled after loading the catalog. */ +typedef struct catalog_info +{ + enum { mmapped, malloced } status; + + size_t plane_size; + size_t plane_depth; + u_int32_t *name_ptr; + const char *strings; + + struct catalog_obj *file_ptr; + size_t file_size; +} *__nl_catd; + + + +/* The magic number to signal we really have a catalog file. */ +#define CATGETS_MAGIC 0x960408deU + + +/* Prototypes for helper functions. */ +extern int __open_catalog (const char *cat_name, const char *nlspath, + const char *env_var, __nl_catd __catalog); +libc_hidden_proto (__open_catalog) diff --git a/REORG.TODO/catgets/config.h b/REORG.TODO/catgets/config.h new file mode 100644 index 0000000000..ce7887b351 --- /dev/null +++ b/REORG.TODO/catgets/config.h @@ -0,0 +1,14 @@ +#ifndef _CG_CONFIG_H +#define _CG_CONFIG_H + +/* Use the internal textdomain used for libc messages. */ +#define PACKAGE _libc_intl_domainname +#ifndef VERSION +/* Get libc version number. */ +#include "../version.h" +#endif + + +#include_next <config.h> + +#endif diff --git a/REORG.TODO/catgets/gencat.c b/REORG.TODO/catgets/gencat.c new file mode 100644 index 0000000000..6fd1de793b --- /dev/null +++ b/REORG.TODO/catgets/gencat.c @@ -0,0 +1,1357 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper <drepper@redhat.com>, 1996. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include <argp.h> +#include <assert.h> +#include <ctype.h> +#include <endian.h> +#include <errno.h> +#include <error.h> +#include <fcntl.h> +#include <iconv.h> +#include <langinfo.h> +#include <locale.h> +#include <libintl.h> +#include <limits.h> +#include <nl_types.h> +#include <obstack.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <wchar.h> + +#include "version.h" + +#include "catgetsinfo.h" + + +#define SWAPU32(w) \ + (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24)) + +struct message_list +{ + int number; + const char *message; + + const char *fname; + size_t line; + const char *symbol; + + struct message_list *next; +}; + + +struct set_list +{ + int number; + int deleted; + struct message_list *messages; + int last_message; + + const char *fname; + size_t line; + const char *symbol; + + struct set_list *next; +}; + + +struct catalog +{ + struct set_list *all_sets; + struct set_list *current_set; + size_t total_messages; + wint_t quote_char; + int last_set; + + struct obstack mem_pool; +}; + + +/* If non-zero force creation of new file, not using existing one. */ +static int force_new; + +/* Name of output file. */ +static const char *output_name; + +/* Name of generated C header file. */ +static const char *header_name; + +/* Name and version of program. */ +static void print_version (FILE *stream, struct argp_state *state); +void (*argp_program_version_hook) (FILE *, struct argp_state *) = print_version; + +#define OPT_NEW 1 + +/* Definitions of arguments for argp functions. */ +static const struct argp_option options[] = +{ + { "header", 'H', N_("NAME"), 0, + N_("Create C header file NAME containing symbol definitions") }, + { "new", OPT_NEW, NULL, 0, + N_("Do not use existing catalog, force new output file") }, + { "output", 'o', N_("NAME"), 0, N_("Write output to file NAME") }, + { NULL, 0, NULL, 0, NULL } +}; + +/* Short description of program. */ +static const char doc[] = N_("Generate message catalog.\ +\vIf INPUT-FILE is -, input is read from standard input. If OUTPUT-FILE\n\ +is -, output is written to standard output.\n"); + +/* Strings for arguments in help texts. */ +static const char args_doc[] = N_("\ +-o OUTPUT-FILE [INPUT-FILE]...\n[OUTPUT-FILE [INPUT-FILE]...]"); + +/* Prototype for option handler. */ +static error_t parse_opt (int key, char *arg, struct argp_state *state); + +/* Function to print some extra text in the help message. */ +static char *more_help (int key, const char *text, void *input); + +/* Data structure to communicate with argp functions. */ +static struct argp argp = +{ + options, parse_opt, args_doc, doc, NULL, more_help +}; + + +/* Wrapper functions with error checking for standard functions. */ +#include <programs/xmalloc.h> + +/* Prototypes for local functions. */ +static void error_print (void); +static struct catalog *read_input_file (struct catalog *current, + const char *fname); +static void write_out (struct catalog *result, const char *output_name, + const char *header_name); +static struct set_list *find_set (struct catalog *current, int number); +static void normalize_line (const char *fname, size_t line, iconv_t cd, + wchar_t *string, wchar_t quote_char, + wchar_t escape_char); +static void read_old (struct catalog *catalog, const char *file_name); +static int open_conversion (const char *codesetp, iconv_t *cd_towcp, + iconv_t *cd_tombp, wchar_t *escape_charp); + + +int +main (int argc, char *argv[]) +{ + struct catalog *result; + int remaining; + + /* Set program name for messages. */ + error_print_progname = error_print; + + /* Set locale via LC_ALL. */ + setlocale (LC_ALL, ""); + + /* Set the text message domain. */ + textdomain (PACKAGE); + + /* Initialize local variables. */ + result = NULL; + + /* Parse and process arguments. */ + argp_parse (&argp, argc, argv, 0, &remaining, NULL); + + /* Determine output file. */ + if (output_name == NULL) + output_name = remaining < argc ? argv[remaining++] : "-"; + + /* Process all input files. */ + setlocale (LC_CTYPE, "C"); + if (remaining < argc) + do + result = read_input_file (result, argv[remaining]); + while (++remaining < argc); + else + result = read_input_file (NULL, "-"); + + /* Write out the result. */ + if (result != NULL) + write_out (result, output_name, header_name); + + return error_message_count != 0; +} + + +/* Handle program arguments. */ +static error_t +parse_opt (int key, char *arg, struct argp_state *state) +{ + switch (key) + { + case 'H': + header_name = arg; + break; + case OPT_NEW: + force_new = 1; + break; + case 'o': + output_name = arg; + break; + default: + return ARGP_ERR_UNKNOWN; + } + return 0; +} + + +static char * +more_help (int key, const char *text, void *input) +{ + char *tp = NULL; + switch (key) + { + case ARGP_KEY_HELP_EXTRA: + /* We print some extra information. */ + if (asprintf (&tp, gettext ("\ +For bug reporting instructions, please see:\n\ +%s.\n"), REPORT_BUGS_TO) < 0) + return NULL; + return tp; + default: + break; + } + return (char *) text; +} + +/* Print the version information. */ +static void +print_version (FILE *stream, struct argp_state *state) +{ + fprintf (stream, "gencat %s%s\n", PKGVERSION, VERSION); + fprintf (stream, gettext ("\ +Copyright (C) %s Free Software Foundation, Inc.\n\ +This is free software; see the source for copying conditions. There is NO\n\ +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\ +"), "2017"); + fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper"); +} + + +/* The address of this function will be assigned to the hook in the + error functions. */ +static void +error_print (void) +{ + /* We don't want the program name to be printed in messages. Emacs' + compile.el does not like this. */ +} + + +static struct catalog * +read_input_file (struct catalog *current, const char *fname) +{ + FILE *fp; + char *buf; + size_t len; + size_t line_number; + wchar_t *wbuf; + size_t wbufsize; + iconv_t cd_towc = (iconv_t) -1; + iconv_t cd_tomb = (iconv_t) -1; + wchar_t escape_char = L'\\'; + char *codeset = NULL; + + if (strcmp (fname, "-") == 0 || strcmp (fname, "/dev/stdin") == 0) + { + fp = stdin; + fname = gettext ("*standard input*"); + } + else + fp = fopen (fname, "r"); + if (fp == NULL) + { + error (0, errno, gettext ("cannot open input file `%s'"), fname); + return current; + } + + /* If we haven't seen anything yet, allocate result structure. */ + if (current == NULL) + { + current = (struct catalog *) xcalloc (1, sizeof (*current)); + +#define obstack_chunk_alloc malloc +#define obstack_chunk_free free + obstack_init (¤t->mem_pool); + + current->current_set = find_set (current, NL_SETD); + } + + buf = NULL; + len = 0; + line_number = 0; + + wbufsize = 1024; + wbuf = (wchar_t *) xmalloc (wbufsize); + + while (!feof (fp)) + { + int continued; + int used; + size_t start_line = line_number + 1; + char *this_line; + + do + { + int act_len; + + act_len = getline (&buf, &len, fp); + if (act_len <= 0) + break; + ++line_number; + + /* It the line continued? */ + continued = 0; + if (buf[act_len - 1] == '\n') + { + --act_len; + + /* There might be more than one backslash at the end of + the line. Only if there is an odd number of them is + the line continued. */ + if (act_len > 0 && buf[act_len - 1] == '\\') + { + int temp_act_len = act_len; + + do + { + --temp_act_len; + continued = !continued; + } + while (temp_act_len > 0 && buf[temp_act_len - 1] == '\\'); + + if (continued) + --act_len; + } + } + + /* Append to currently selected line. */ + obstack_grow (¤t->mem_pool, buf, act_len); + } + while (continued); + + obstack_1grow (¤t->mem_pool, '\0'); + this_line = (char *) obstack_finish (¤t->mem_pool); + + used = 0; + if (this_line[0] == '$') + { + if (isblank (this_line[1])) + { + int cnt = 1; + while (isblank (this_line[cnt])) + ++cnt; + if (strncmp (&this_line[cnt], "codeset=", 8) != 0) + /* This is a comment line. Do nothing. */; + else if (codeset != NULL) + /* Ignore multiple codeset. */; + else + { + int start = cnt + 8; + cnt = start; + while (this_line[cnt] != '\0' && !isspace (this_line[cnt])) + ++cnt; + if (cnt != start) + { + int len = cnt - start; + codeset = xmalloc (len + 1); + *((char *) mempcpy (codeset, &this_line[start], len)) + = '\0'; + } + } + } + else if (strncmp (&this_line[1], "set", 3) == 0) + { + int cnt = sizeof ("set"); + int set_number; + const char *symbol = NULL; + while (isspace (this_line[cnt])) + ++cnt; + + if (isdigit (this_line[cnt])) + { + set_number = atol (&this_line[cnt]); + + /* If the given number for the character set is + higher than any we used for symbolic set names + avoid clashing by using only higher numbers for + the following symbolic definitions. */ + if (set_number > current->last_set) + current->last_set = set_number; + } + else + { + /* See whether it is a reasonable identifier. */ + int start = cnt; + while (isalnum (this_line[cnt]) || this_line[cnt] == '_') + ++cnt; + + if (cnt == start) + { + /* No correct character found. */ + error_at_line (0, 0, fname, start_line, + gettext ("illegal set number")); + set_number = 0; + } + else + { + /* We have found seomthing that looks like a + correct identifier. */ + struct set_list *runp; + + this_line[cnt] = '\0'; + used = 1; + symbol = &this_line[start]; + + /* Test whether the identifier was already used. */ + runp = current->all_sets; + while (runp != 0) + if (runp->symbol != NULL + && strcmp (runp->symbol, symbol) == 0) + break; + else + runp = runp->next; + + if (runp != NULL) + { + /* We cannot allow duplicate identifiers for + message sets. */ + error_at_line (0, 0, fname, start_line, + gettext ("duplicate set definition")); + error_at_line (0, 0, runp->fname, runp->line, + gettext ("\ +this is the first definition")); + set_number = 0; + } + else + /* Allocate next free message set for identifier. */ + set_number = ++current->last_set; + } + } + + if (set_number != 0) + { + /* We found a legal set number. */ + current->current_set = find_set (current, set_number); + if (symbol != NULL) + used = 1; + current->current_set->symbol = symbol; + current->current_set->fname = fname; + current->current_set->line = start_line; + } + } + else if (strncmp (&this_line[1], "delset", 6) == 0) + { + int cnt = sizeof ("delset"); + while (isspace (this_line[cnt])) + ++cnt; + + if (isdigit (this_line[cnt])) + { + size_t set_number = atol (&this_line[cnt]); + struct set_list *set; + + /* Mark the message set with the given number as + deleted. */ + set = find_set (current, set_number); + set->deleted = 1; + } + else + { + /* See whether it is a reasonable identifier. */ + int start = cnt; + while (isalnum (this_line[cnt]) || this_line[cnt] == '_') + ++cnt; + + if (cnt == start) + error_at_line (0, 0, fname, start_line, + gettext ("illegal set number")); + else + { + const char *symbol; + struct set_list *runp; + + this_line[cnt] = '\0'; + used = 1; + symbol = &this_line[start]; + + /* We have a symbolic set name. This name must + appear somewhere else in the catalogs read so + far. */ + for (runp = current->all_sets; runp != NULL; + runp = runp->next) + { + if (strcmp (runp->symbol, symbol) == 0) + { + runp->deleted = 1; + break; + } + } + if (runp == NULL) + /* Name does not exist before. */ + error_at_line (0, 0, fname, start_line, + gettext ("unknown set `%s'"), symbol); + } + } + } + else if (strncmp (&this_line[1], "quote", 5) == 0) + { + char buf[2]; + char *bufptr; + size_t buflen; + char *wbufptr; + size_t wbuflen; + int cnt; + + cnt = sizeof ("quote"); + while (isspace (this_line[cnt])) + ++cnt; + + /* We need the conversion. */ + if (cd_towc == (iconv_t) -1 + && open_conversion (codeset, &cd_towc, &cd_tomb, + &escape_char) != 0) + /* Something is wrong. */ + goto out; + + /* Yes, the quote char can be '\0'; this means no quote + char. The function using the information works on + wide characters so we have to convert it here. */ + buf[0] = this_line[cnt]; + buf[1] = '\0'; + bufptr = buf; + buflen = 2; + + wbufptr = (char *) wbuf; + wbuflen = wbufsize; + + /* Flush the state. */ + iconv (cd_towc, NULL, NULL, NULL, NULL); + + iconv (cd_towc, &bufptr, &buflen, &wbufptr, &wbuflen); + if (buflen != 0 || (wchar_t *) wbufptr != &wbuf[2]) + error_at_line (0, 0, fname, start_line, + gettext ("invalid quote character")); + else + /* Use the converted wide character. */ + current->quote_char = wbuf[0]; + } + else + { + int cnt; + cnt = 2; + while (this_line[cnt] != '\0' && !isspace (this_line[cnt])) + ++cnt; + this_line[cnt] = '\0'; + error_at_line (0, 0, fname, start_line, + gettext ("unknown directive `%s': line ignored"), + &this_line[1]); + } + } + else if (isalnum (this_line[0]) || this_line[0] == '_') + { + const char *ident = this_line; + char *line = this_line; + int message_number; + + do + ++line; + while (line[0] != '\0' && !isspace (line[0])); + if (line[0] != '\0') + *line++ = '\0'; /* Terminate the identifier. */ + + /* Now we found the beginning of the message itself. */ + + if (isdigit (ident[0])) + { + struct message_list *runp; + struct message_list *lastp; + + message_number = atoi (ident); + + /* Find location to insert the new message. */ + runp = current->current_set->messages; + lastp = NULL; + while (runp != NULL) + if (runp->number == message_number) + break; + else + { + lastp = runp; + runp = runp->next; + } + if (runp != NULL) + { + /* Oh, oh. There is already a message with this + number in the message set. */ + if (runp->symbol == NULL) + { + /* The existing message had its number specified + by the user. Fatal collision type uh, oh. */ + error_at_line (0, 0, fname, start_line, + gettext ("duplicated message number")); + error_at_line (0, 0, runp->fname, runp->line, + gettext ("this is the first definition")); + message_number = 0; + } + else + { + /* Collision was with number auto-assigned to a + symbolic. Change existing symbolic number + and move to end the list (if not already there). */ + runp->number = ++current->current_set->last_message; + + if (runp->next != NULL) + { + struct message_list *endp; + + if (lastp == NULL) + current->current_set->messages=runp->next; + else + lastp->next=runp->next; + + endp = runp->next; + while (endp->next != NULL) + endp = endp->next; + + endp->next = runp; + runp->next = NULL; + } + } + } + ident = NULL; /* We don't have a symbol. */ + + if (message_number != 0 + && message_number > current->current_set->last_message) + current->current_set->last_message = message_number; + } + else if (ident[0] != '\0') + { + struct message_list *runp; + + /* Test whether the symbolic name was not used for + another message in this message set. */ + runp = current->current_set->messages; + while (runp != NULL) + if (runp->symbol != NULL && strcmp (ident, runp->symbol) == 0) + break; + else + runp = runp->next; + if (runp != NULL) + { + /* The name is already used. */ + error_at_line (0, 0, fname, start_line, gettext ("\ +duplicated message identifier")); + error_at_line (0, 0, runp->fname, runp->line, + gettext ("this is the first definition")); + message_number = 0; + } + else + /* Give the message the next unused number. */ + message_number = ++current->current_set->last_message; + } + else + message_number = 0; + + if (message_number != 0) + { + char *inbuf; + size_t inlen; + char *outbuf; + size_t outlen; + struct message_list *newp; + size_t line_len = strlen (line) + 1; + size_t ident_len = 0; + + /* We need the conversion. */ + if (cd_towc == (iconv_t) -1 + && open_conversion (codeset, &cd_towc, &cd_tomb, + &escape_char) != 0) + /* Something is wrong. */ + goto out; + + /* Convert to a wide character string. We have to + interpret escape sequences which will be impossible + without doing the conversion if the codeset of the + message is stateful. */ + while (1) + { + inbuf = line; + inlen = line_len; + outbuf = (char *) wbuf; + outlen = wbufsize; + + /* Flush the state. */ + iconv (cd_towc, NULL, NULL, NULL, NULL); + + iconv (cd_towc, &inbuf, &inlen, &outbuf, &outlen); + if (inlen == 0) + { + /* The string is converted. */ + assert (outlen < wbufsize); + assert (wbuf[(wbufsize - outlen) / sizeof (wchar_t) - 1] + == L'\0'); + break; + } + + if (outlen != 0) + { + /* Something is wrong with this string, we ignore it. */ + error_at_line (0, 0, fname, start_line, gettext ("\ +invalid character: message ignored")); + goto ignore; + } + + /* The output buffer is too small. */ + wbufsize *= 2; + wbuf = (wchar_t *) xrealloc (wbuf, wbufsize); + } + + /* Strip quote characters, change escape sequences into + correct characters etc. */ + normalize_line (fname, start_line, cd_towc, wbuf, + current->quote_char, escape_char); + + if (ident) + ident_len = line - this_line; + + /* Now the string is free of escape sequences. Convert it + back into a multibyte character string. First free the + memory allocated for the original string. */ + obstack_free (¤t->mem_pool, this_line); + + used = 1; /* Yes, we use the line. */ + + /* Now fill in the new string. It should never happen that + the replaced string is longer than the original. */ + inbuf = (char *) wbuf; + inlen = (wcslen (wbuf) + 1) * sizeof (wchar_t); + + outlen = obstack_room (¤t->mem_pool); + obstack_blank (¤t->mem_pool, outlen); + this_line = (char *) obstack_base (¤t->mem_pool); + outbuf = this_line + ident_len; + outlen -= ident_len; + + /* Flush the state. */ + iconv (cd_tomb, NULL, NULL, NULL, NULL); + + iconv (cd_tomb, &inbuf, &inlen, &outbuf, &outlen); + if (inlen != 0) + { + error_at_line (0, 0, fname, start_line, + gettext ("invalid line")); + goto ignore; + } + assert (outbuf[-1] == '\0'); + + /* Free the memory in the obstack we don't use. */ + obstack_blank (¤t->mem_pool, -(int) outlen); + line = obstack_finish (¤t->mem_pool); + + newp = (struct message_list *) xmalloc (sizeof (*newp)); + newp->number = message_number; + newp->message = line + ident_len; + /* Remember symbolic name; is NULL if no is given. */ + newp->symbol = ident ? line : NULL; + /* Remember where we found the character. */ + newp->fname = fname; + newp->line = start_line; + + /* Find place to insert to message. We keep them in a + sorted single linked list. */ + if (current->current_set->messages == NULL + || current->current_set->messages->number > message_number) + { + newp->next = current->current_set->messages; + current->current_set->messages = newp; + } + else + { + struct message_list *runp; + runp = current->current_set->messages; + while (runp->next != NULL) + if (runp->next->number > message_number) + break; + else + runp = runp->next; + newp->next = runp->next; + runp->next = newp; + } + } + ++current->total_messages; + } + else + { + size_t cnt; + + cnt = 0; + /* See whether we have any non-white space character in this + line. */ + while (this_line[cnt] != '\0' && isspace (this_line[cnt])) + ++cnt; + + if (this_line[cnt] != '\0') + /* Yes, some unknown characters found. */ + error_at_line (0, 0, fname, start_line, + gettext ("malformed line ignored")); + } + + ignore: + /* We can save the memory for the line if it was not used. */ + if (!used) + obstack_free (¤t->mem_pool, this_line); + } + + /* Close the conversion modules. */ + iconv_close (cd_towc); + iconv_close (cd_tomb); + free (codeset); + + out: + free (wbuf); + + if (fp != stdin) + fclose (fp); + return current; +} + + +static void +write_out (struct catalog *catalog, const char *output_name, + const char *header_name) +{ + /* Computing the "optimal" size. */ + struct set_list *set_run; + size_t best_total, best_size, best_depth; + size_t act_size, act_depth; + struct catalog_obj obj; + struct obstack string_pool; + const char *strings; + size_t strings_size; + uint32_t *array1, *array2; + size_t cnt; + int fd; + + /* If not otherwise told try to read file with existing + translations. */ + if (!force_new) + read_old (catalog, output_name); + + /* Initialize best_size with a very high value. */ + best_total = best_size = best_depth = UINT_MAX; + + /* We need some start size for testing. Let's start with + TOTAL_MESSAGES / 5, which theoretically provides a mean depth of + 5. */ + act_size = 1 + catalog->total_messages / 5; + + /* We determine the size of a hash table here. Because the message + numbers can be chosen arbitrary by the programmer we cannot use + the simple method of accessing the array using the message + number. The algorithm is based on the trivial hash function + NUMBER % TABLE_SIZE, where collisions are stored in a second + dimension up to TABLE_DEPTH. We here compute TABLE_SIZE so that + the needed space (= TABLE_SIZE * TABLE_DEPTH) is minimal. */ + while (act_size <= best_total) + { + size_t deep[act_size]; + + act_depth = 1; + memset (deep, '\0', act_size * sizeof (size_t)); + set_run = catalog->all_sets; + while (set_run != NULL) + { + struct message_list *message_run; + + message_run = set_run->messages; + while (message_run != NULL) + { + size_t idx = (message_run->number * set_run->number) % act_size; + + ++deep[idx]; + if (deep[idx] > act_depth) + { + act_depth = deep[idx]; + if (act_depth * act_size > best_total) + break; + } + message_run = message_run->next; + } + set_run = set_run->next; + } + + if (act_depth * act_size <= best_total) + { + /* We have found a better solution. */ + best_total = act_depth * act_size; + best_size = act_size; + best_depth = act_depth; + } + + ++act_size; + } + + /* let's be prepared for an empty message file. */ + if (best_size == UINT_MAX) + { + best_size = 1; + best_depth = 1; + } + + /* OK, now we have the size we will use. Fill in the header, build + the table and the second one with swapped byte order. */ + obj.magic = CATGETS_MAGIC; + obj.plane_size = best_size; + obj.plane_depth = best_depth; + + /* Allocate room for all needed arrays. */ + array1 = + (uint32_t *) alloca (best_size * best_depth * sizeof (uint32_t) * 3); + memset (array1, '\0', best_size * best_depth * sizeof (uint32_t) * 3); + array2 + = (uint32_t *) alloca (best_size * best_depth * sizeof (uint32_t) * 3); + obstack_init (&string_pool); + + set_run = catalog->all_sets; + while (set_run != NULL) + { + struct message_list *message_run; + + message_run = set_run->messages; + while (message_run != NULL) + { + size_t idx = (((message_run->number * set_run->number) % best_size) + * 3); + /* Determine collision depth. */ + while (array1[idx] != 0) + idx += best_size * 3; + + /* Store set number, message number and pointer into string + space, relative to the first string. */ + array1[idx + 0] = set_run->number; + array1[idx + 1] = message_run->number; + array1[idx + 2] = obstack_object_size (&string_pool); + + /* Add current string to the continuous space containing all + strings. */ + obstack_grow0 (&string_pool, message_run->message, + strlen (message_run->message)); + + message_run = message_run->next; + } + + set_run = set_run->next; + } + strings_size = obstack_object_size (&string_pool); + strings = obstack_finish (&string_pool); + + /* Compute ARRAY2 by changing the byte order. */ + for (cnt = 0; cnt < best_size * best_depth * 3; ++cnt) + array2[cnt] = SWAPU32 (array1[cnt]); + + /* Now we can write out the whole data. */ + if (strcmp (output_name, "-") == 0 + || strcmp (output_name, "/dev/stdout") == 0) + fd = STDOUT_FILENO; + else + { + fd = creat (output_name, 0666); + if (fd < 0) + error (EXIT_FAILURE, errno, gettext ("cannot open output file `%s'"), + output_name); + } + + /* Write out header. */ + write (fd, &obj, sizeof (obj)); + + /* We always write out the little endian version of the index + arrays. */ +#if __BYTE_ORDER == __LITTLE_ENDIAN + write (fd, array1, best_size * best_depth * sizeof (uint32_t) * 3); + write (fd, array2, best_size * best_depth * sizeof (uint32_t) * 3); +#elif __BYTE_ORDER == __BIG_ENDIAN + write (fd, array2, best_size * best_depth * sizeof (uint32_t) * 3); + write (fd, array1, best_size * best_depth * sizeof (uint32_t) * 3); +#else +# error Cannot handle __BYTE_ORDER byte order +#endif + + /* Finally write the strings. */ + write (fd, strings, strings_size); + + if (fd != STDOUT_FILENO) + close (fd); + + /* If requested now write out the header file. */ + if (header_name != NULL) + { + int first = 1; + FILE *fp; + + /* Open output file. "-" or "/dev/stdout" means write to + standard output. */ + if (strcmp (header_name, "-") == 0 + || strcmp (header_name, "/dev/stdout") == 0) + fp = stdout; + else + { + fp = fopen (header_name, "w"); + if (fp == NULL) + error (EXIT_FAILURE, errno, + gettext ("cannot open output file `%s'"), header_name); + } + + /* Iterate over all sets and all messages. */ + set_run = catalog->all_sets; + while (set_run != NULL) + { + struct message_list *message_run; + + /* If the current message set has a symbolic name write this + out first. */ + if (set_run->symbol != NULL) + fprintf (fp, "%s#define %sSet %#x\t/* %s:%Zu */\n", + first ? "" : "\n", set_run->symbol, set_run->number - 1, + set_run->fname, set_run->line); + first = 0; + + message_run = set_run->messages; + while (message_run != NULL) + { + /* If the current message has a symbolic name write + #define out. But we have to take care for the set + not having a symbolic name. */ + if (message_run->symbol != NULL) + { + if (set_run->symbol == NULL) + fprintf (fp, "#define AutomaticSet%d%s %#x\t/* %s:%Zu */\n", + set_run->number, message_run->symbol, + message_run->number, message_run->fname, + message_run->line); + else + fprintf (fp, "#define %s%s %#x\t/* %s:%Zu */\n", + set_run->symbol, message_run->symbol, + message_run->number, message_run->fname, + message_run->line); + } + + message_run = message_run->next; + } + + set_run = set_run->next; + } + + if (fp != stdout) + fclose (fp); + } +} + + +static struct set_list * +find_set (struct catalog *current, int number) +{ + struct set_list *result = current->all_sets; + + /* We must avoid set number 0 because a set of this number signals + in the tables that the entry is not occupied. */ + ++number; + + while (result != NULL) + if (result->number == number) + return result; + else + result = result->next; + + /* Prepare new message set. */ + result = (struct set_list *) xcalloc (1, sizeof (*result)); + result->number = number; + result->next = current->all_sets; + current->all_sets = result; + + return result; +} + + +/* Normalize given string *in*place* by processing escape sequences + and quote characters. */ +static void +normalize_line (const char *fname, size_t line, iconv_t cd, wchar_t *string, + wchar_t quote_char, wchar_t escape_char) +{ + int is_quoted; + wchar_t *rp = string; + wchar_t *wp = string; + + if (quote_char != L'\0' && *rp == quote_char) + { + is_quoted = 1; + ++rp; + } + else + is_quoted = 0; + + while (*rp != L'\0') + if (*rp == quote_char) + /* We simply end the string when we find the first time an + not-escaped quote character. */ + break; + else if (*rp == escape_char) + { + ++rp; + if (quote_char != L'\0' && *rp == quote_char) + /* This is an extension to XPG. */ + *wp++ = *rp++; + else + /* Recognize escape sequences. */ + switch (*rp) + { + case L'n': + *wp++ = L'\n'; + ++rp; + break; + case L't': + *wp++ = L'\t'; + ++rp; + break; + case L'v': + *wp++ = L'\v'; + ++rp; + break; + case L'b': + *wp++ = L'\b'; + ++rp; + break; + case L'r': + *wp++ = L'\r'; + ++rp; + break; + case L'f': + *wp++ = L'\f'; + ++rp; + break; + case L'0' ... L'7': + { + int number; + char cbuf[2]; + char *cbufptr; + size_t cbufin; + wchar_t wcbuf[2]; + char *wcbufptr; + size_t wcbufin; + + number = *rp++ - L'0'; + while (number <= (255 / 8) && *rp >= L'0' && *rp <= L'7') + { + number *= 8; + number += *rp++ - L'0'; + } + + cbuf[0] = (char) number; + cbuf[1] = '\0'; + cbufptr = cbuf; + cbufin = 2; + + wcbufptr = (char *) wcbuf; + wcbufin = sizeof (wcbuf); + + /* Flush the state. */ + iconv (cd, NULL, NULL, NULL, NULL); + + iconv (cd, &cbufptr, &cbufin, &wcbufptr, &wcbufin); + if (cbufptr != &cbuf[2] || (wchar_t *) wcbufptr != &wcbuf[2]) + error_at_line (0, 0, fname, line, + gettext ("invalid escape sequence")); + else + *wp++ = wcbuf[0]; + } + break; + default: + if (*rp == escape_char) + { + *wp++ = escape_char; + ++rp; + } + else + /* Simply ignore the backslash character. */; + break; + } + } + else + *wp++ = *rp++; + + /* If we saw a quote character at the beginning we expect another + one at the end. */ + if (is_quoted && *rp != quote_char) + error_at_line (0, 0, fname, line, gettext ("unterminated message")); + + /* Terminate string. */ + *wp = L'\0'; + return; +} + + +static void +read_old (struct catalog *catalog, const char *file_name) +{ + struct catalog_info old_cat_obj; + struct set_list *set = NULL; + int last_set = -1; + size_t cnt; + + /* Try to open catalog, but don't look through the NLSPATH. */ + if (__open_catalog (file_name, NULL, NULL, &old_cat_obj) != 0) + { + if (errno == ENOENT) + /* No problem, the catalog simply does not exist. */ + return; + else + error (EXIT_FAILURE, errno, + gettext ("while opening old catalog file")); + } + + /* OK, we have the catalog loaded. Now read all messages and merge + them. When set and message number clash for any message the new + one is used. If the new one is empty it indicates that the + message should be deleted. */ + for (cnt = 0; cnt < old_cat_obj.plane_size * old_cat_obj.plane_depth; ++cnt) + { + struct message_list *message, *last; + + if (old_cat_obj.name_ptr[cnt * 3 + 0] == 0) + /* No message in this slot. */ + continue; + + if (old_cat_obj.name_ptr[cnt * 3 + 0] - 1 != (uint32_t) last_set) + { + last_set = old_cat_obj.name_ptr[cnt * 3 + 0] - 1; + set = find_set (catalog, old_cat_obj.name_ptr[cnt * 3 + 0] - 1); + } + + last = NULL; + message = set->messages; + while (message != NULL) + { + if ((uint32_t) message->number >= old_cat_obj.name_ptr[cnt * 3 + 1]) + break; + last = message; + message = message->next; + } + + if (message == NULL + || (uint32_t) message->number > old_cat_obj.name_ptr[cnt * 3 + 1]) + { + /* We have found a message which is not yet in the catalog. + Insert it at the right position. */ + struct message_list *newp; + + newp = (struct message_list *) xmalloc (sizeof(*newp)); + newp->number = old_cat_obj.name_ptr[cnt * 3 + 1]; + newp->message = + &old_cat_obj.strings[old_cat_obj.name_ptr[cnt * 3 + 2]]; + newp->fname = NULL; + newp->line = 0; + newp->symbol = NULL; + newp->next = message; + + if (last == NULL) + set->messages = newp; + else + last->next = newp; + + ++catalog->total_messages; + } + else if (*message->message == '\0') + { + /* The new empty message has overridden the old one thus + "deleting" it as required. Now remove the empty remains. */ + if (last == NULL) + set->messages = message->next; + else + last->next = message->next; + } + } +} + + +static int +open_conversion (const char *codeset, iconv_t *cd_towcp, iconv_t *cd_tombp, + wchar_t *escape_charp) +{ + char buf[2]; + char *bufptr; + size_t bufsize; + wchar_t wbuf[2]; + char *wbufptr; + size_t wbufsize; + + /* If the input file does not specify the codeset use the locale's. */ + if (codeset == NULL) + { + setlocale (LC_ALL, ""); + codeset = nl_langinfo (CODESET); + setlocale (LC_ALL, "C"); + } + + /* Get the conversion modules. */ + *cd_towcp = iconv_open ("WCHAR_T", codeset); + *cd_tombp = iconv_open (codeset, "WCHAR_T"); + if (*cd_towcp == (iconv_t) -1 || *cd_tombp == (iconv_t) -1) + { + error (0, 0, gettext ("conversion modules not available")); + if (*cd_towcp != (iconv_t) -1) + iconv_close (*cd_towcp); + + return 1; + } + + /* One special case for historical reasons is the backslash + character. In some codesets the byte value 0x5c is not mapped to + U005c in Unicode. These charsets then don't have a backslash + character at all. Therefore we have to live with whatever the + codeset provides and recognize, instead of the U005c, the character + the byte value 0x5c is mapped to. */ + buf[0] = '\\'; + buf[1] = '\0'; + bufptr = buf; + bufsize = 2; + + wbufptr = (char *) wbuf; + wbufsize = sizeof (wbuf); + + iconv (*cd_towcp, &bufptr, &bufsize, &wbufptr, &wbufsize); + if (bufsize != 0 || wbufsize != 0) + { + /* Something went wrong, we couldn't convert the byte 0x5c. Go + on with using U005c. */ + error (0, 0, gettext ("cannot determine escape character")); + *escape_charp = L'\\'; + } + else + *escape_charp = wbuf[0]; + + return 0; +} diff --git a/REORG.TODO/catgets/nl_types.h b/REORG.TODO/catgets/nl_types.h new file mode 100644 index 0000000000..1d734ad31c --- /dev/null +++ b/REORG.TODO/catgets/nl_types.h @@ -0,0 +1,54 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _NL_TYPES_H +#define _NL_TYPES_H 1 + +#include <features.h> + +/* The default message set used by the gencat program. */ +#define NL_SETD 1 + +/* Value for FLAG parameter of `catgets' to say we want XPG4 compliance. */ +#define NL_CAT_LOCALE 1 + + +__BEGIN_DECLS + +/* Message catalog descriptor type. */ +typedef void *nl_catd; + +/* Type used by `nl_langinfo'. */ +typedef int nl_item; + +/* Open message catalog for later use, returning descriptor. + + This function is a possible cancellation point and therefore not + marked with __THROW. */ +extern nl_catd catopen (const char *__cat_name, int __flag) __nonnull ((1)); + +/* Return translation with NUMBER in SET of CATALOG; if not found + return STRING. */ +extern char *catgets (nl_catd __catalog, int __set, int __number, + const char *__string) __THROW __nonnull ((1)); + +/* Close message CATALOG. */ +extern int catclose (nl_catd __catalog) __THROW __nonnull ((1)); + +__END_DECLS + +#endif /* nl_types.h */ diff --git a/REORG.TODO/catgets/open_catalog.c b/REORG.TODO/catgets/open_catalog.c new file mode 100644 index 0000000000..cbb305a3f7 --- /dev/null +++ b/REORG.TODO/catgets/open_catalog.c @@ -0,0 +1,336 @@ +/* Copyright (C) 1996-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper, <drepper@gnu.org>. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <byteswap.h> +#include <endian.h> +#include <errno.h> +#include <fcntl.h> +#include <string.h> +#include <stdlib.h> +#include <unistd.h> +#ifdef _POSIX_MAPPED_FILES +# include <sys/mman.h> +#endif +#include <sys/stat.h> + +#include "catgetsinfo.h" +#include <not-cancel.h> + + +#define SWAPU32(w) bswap_32 (w) + + +int +__open_catalog (const char *cat_name, const char *nlspath, const char *env_var, + __nl_catd catalog) +{ + int fd = -1; + struct stat64 st; + int swapping; + size_t cnt; + size_t max_offset; + size_t tab_size; + const char *lastp; + int result = -1; + char *buf = NULL; + + if (strchr (cat_name, '/') != NULL || nlspath == NULL) + fd = open_not_cancel_2 (cat_name, O_RDONLY); + else + { + const char *run_nlspath = nlspath; +#define ENOUGH(n) \ + if (__glibc_unlikely (bufact + (n) >= bufmax)) \ + { \ + char *old_buf = buf; \ + bufmax += (bufmax < 256 + (n)) ? 256 + (n) : bufmax; \ + buf = realloc (buf, bufmax); \ + if (__glibc_unlikely (buf == NULL)) \ + { \ + free (old_buf); \ + return -1; \ + } \ + } + + /* The RUN_NLSPATH variable contains a colon separated list of + descriptions where we expect to find catalogs. We have to + recognize certain % substitutions and stop when we found the + first existing file. */ + size_t bufact; + size_t bufmax = 0; + size_t len; + + fd = -1; + while (*run_nlspath != '\0') + { + bufact = 0; + + if (*run_nlspath == ':') + { + /* Leading colon or adjacent colons - treat same as %N. */ + len = strlen (cat_name); + ENOUGH (len); + memcpy (&buf[bufact], cat_name, len); + bufact += len; + } + else + while (*run_nlspath != ':' && *run_nlspath != '\0') + if (*run_nlspath == '%') + { + const char *tmp; + + ++run_nlspath; /* We have seen the `%'. */ + switch (*run_nlspath++) + { + case 'N': + /* Use the catalog name. */ + len = strlen (cat_name); + ENOUGH (len); + memcpy (&buf[bufact], cat_name, len); + bufact += len; + break; + case 'L': + /* Use the current locale category value. */ + len = strlen (env_var); + ENOUGH (len); + memcpy (&buf[bufact], env_var, len); + bufact += len; + break; + case 'l': + /* Use language element of locale category value. */ + tmp = env_var; + do + { + ENOUGH (1); + buf[bufact++] = *tmp++; + } + while (*tmp != '\0' && *tmp != '_' && *tmp != '.'); + break; + case 't': + /* Use territory element of locale category value. */ + tmp = env_var; + do + ++tmp; + while (*tmp != '\0' && *tmp != '_' && *tmp != '.'); + if (*tmp == '_') + { + ++tmp; + do + { + ENOUGH (1); + buf[bufact++] = *tmp++; + } + while (*tmp != '\0' && *tmp != '.'); + } + break; + case 'c': + /* Use code set element of locale category value. */ + tmp = env_var; + do + ++tmp; + while (*tmp != '\0' && *tmp != '.'); + if (*tmp == '.') + { + ++tmp; + do + { + ENOUGH (1); + buf[bufact++] = *tmp++; + } + while (*tmp != '\0'); + } + break; + case '%': + ENOUGH (1); + buf[bufact++] = '%'; + break; + default: + /* Unknown variable: ignore this path element. */ + bufact = 0; + while (*run_nlspath != '\0' && *run_nlspath != ':') + ++run_nlspath; + break; + } + } + else + { + ENOUGH (1); + buf[bufact++] = *run_nlspath++; + } + + ENOUGH (1); + buf[bufact] = '\0'; + + if (bufact != 0) + { + fd = open_not_cancel_2 (buf, O_RDONLY); + if (fd >= 0) + break; + } + + ++run_nlspath; + } + } + + /* Avoid dealing with directories and block devices */ + if (__builtin_expect (fd, 0) < 0) + { + free (buf); + return -1; + } + + if (__builtin_expect (__fxstat64 (_STAT_VER, fd, &st), 0) < 0) + goto close_unlock_return; + + if (__builtin_expect (!S_ISREG (st.st_mode), 0) + || (size_t) st.st_size < sizeof (struct catalog_obj)) + { + /* `errno' is not set correctly but the file is not usable. + Use an reasonable error value. */ + __set_errno (EINVAL); + goto close_unlock_return; + } + + catalog->file_size = st.st_size; +#ifdef _POSIX_MAPPED_FILES +# ifndef MAP_COPY + /* Linux seems to lack read-only copy-on-write. */ +# define MAP_COPY MAP_PRIVATE +# endif +# ifndef MAP_FILE + /* Some systems do not have this flag; it is superfluous. */ +# define MAP_FILE 0 +# endif + catalog->file_ptr = + (struct catalog_obj *) __mmap (NULL, st.st_size, PROT_READ, + MAP_FILE|MAP_COPY, fd, 0); + if (__builtin_expect (catalog->file_ptr != (struct catalog_obj *) MAP_FAILED, + 1)) + /* Tell the world we managed to mmap the file. */ + catalog->status = mmapped; + else +#endif /* _POSIX_MAPPED_FILES */ + { + /* mmap failed perhaps because the system call is not + implemented. Try to load the file. */ + size_t todo; + catalog->file_ptr = malloc (st.st_size); + if (catalog->file_ptr == NULL) + goto close_unlock_return; + + todo = st.st_size; + /* Save read, handle partial reads. */ + do + { + size_t now = read_not_cancel (fd, (((char *) catalog->file_ptr) + + (st.st_size - todo)), todo); + if (now == 0 || now == (size_t) -1) + { +#ifdef EINTR + if (now == (size_t) -1 && errno == EINTR) + continue; +#endif + free ((void *) catalog->file_ptr); + goto close_unlock_return; + } + todo -= now; + } + while (todo > 0); + catalog->status = malloced; + } + + /* Determine whether the file is a catalog file and if yes whether + it is written using the correct byte order. Else we have to swap + the values. */ + if (__glibc_likely (catalog->file_ptr->magic == CATGETS_MAGIC)) + swapping = 0; + else if (catalog->file_ptr->magic == SWAPU32 (CATGETS_MAGIC)) + swapping = 1; + else + { + invalid_file: + /* Invalid file. Free the resources and mark catalog as not + usable. */ +#ifdef _POSIX_MAPPED_FILES + if (catalog->status == mmapped) + __munmap ((void *) catalog->file_ptr, catalog->file_size); + else +#endif /* _POSIX_MAPPED_FILES */ + free (catalog->file_ptr); + goto close_unlock_return; + } + +#define SWAP(x) (swapping ? SWAPU32 (x) : (x)) + + /* Get dimensions of the used hashing table. */ + catalog->plane_size = SWAP (catalog->file_ptr->plane_size); + catalog->plane_depth = SWAP (catalog->file_ptr->plane_depth); + + /* The file contains two versions of the pointer tables. Pick the + right one for the local byte order. */ +#if __BYTE_ORDER == __LITTLE_ENDIAN + catalog->name_ptr = &catalog->file_ptr->name_ptr[0]; +#elif __BYTE_ORDER == __BIG_ENDIAN + catalog->name_ptr = &catalog->file_ptr->name_ptr[catalog->plane_size + * catalog->plane_depth + * 3]; +#else +# error Cannot handle __BYTE_ORDER byte order +#endif + + /* The rest of the file contains all the strings. They are + addressed relative to the position of the first string. */ + catalog->strings = + (const char *) &catalog->file_ptr->name_ptr[catalog->plane_size + * catalog->plane_depth * 3 * 2]; + + /* Determine the largest string offset mentioned in the table. */ + max_offset = 0; + tab_size = 3 * catalog->plane_size * catalog->plane_depth; + for (cnt = 2; cnt < tab_size; cnt += 3) + if (catalog->name_ptr[cnt] > max_offset) + max_offset = catalog->name_ptr[cnt]; + + /* Now we can check whether the file is large enough to contain the + tables it says it contains. */ + if ((size_t) st.st_size + <= (sizeof (struct catalog_obj) + 2 * tab_size + max_offset)) + /* The last string is not contained in the file. */ + goto invalid_file; + + lastp = catalog->strings + max_offset; + max_offset = (st.st_size + - sizeof (struct catalog_obj) + 2 * tab_size + max_offset); + while (*lastp != '\0') + { + if (--max_offset == 0) + goto invalid_file; + ++lastp; + } + + /* We succeeded. */ + result = 0; + + /* Release the lock again. */ + close_unlock_return: + close_not_cancel_no_status (fd); + free (buf); + + return result; +} +libc_hidden_def (__open_catalog) diff --git a/REORG.TODO/catgets/sample.SJIS b/REORG.TODO/catgets/sample.SJIS new file mode 100644 index 0000000000..d0ac0baa55 --- /dev/null +++ b/REORG.TODO/catgets/sample.SJIS @@ -0,0 +1,14 @@ +$ test message catalog + +$ codeset=sjis +$quote " +$set 1 +1 sample1:ABCDEF: +2 sample2:日本語: +3 sample3:予定表: +4 sample4:TEST\tTAB: +5 sample5:機能\t十種類: +6 double slash\\ +7 "another line" +$set Another +FOO "message foo" diff --git a/REORG.TODO/catgets/test-gencat.c b/REORG.TODO/catgets/test-gencat.c new file mode 100644 index 0000000000..282974ca2f --- /dev/null +++ b/REORG.TODO/catgets/test-gencat.c @@ -0,0 +1,34 @@ +#include <locale.h> +#include <nl_types.h> +#include <stdio.h> +#include <stdlib.h> + +static int +do_test (void) +{ + nl_catd catalog; + setlocale (LC_ALL, ""); + + printf ("LC_MESSAGES = %s\n", setlocale (LC_MESSAGES, NULL)); + + catalog = catopen ("sample", NL_CAT_LOCALE); + if (catalog == (nl_catd) -1) + { + printf ("no catalog: %m\n"); + exit (1); + } + + printf ("%s\n", catgets(catalog, 1, 1, "sample 1")); + printf ("%s\n", catgets(catalog, 1, 2, "sample 2")); + printf ("%s\n", catgets(catalog, 1, 3, "sample 3")); + printf ("%s\n", catgets(catalog, 1, 4, "sample 4")); + printf ("%s\n", catgets(catalog, 1, 5, "sample 5")); + printf ("%s\n", catgets(catalog, 1, 6, "sample 6")); + printf ("%s\n", catgets(catalog, 1, 7, "sample 7")); + catclose (catalog); + + return 0; +} + +#define TEST_FUNCTION do_test () +#include "../test-skeleton.c" diff --git a/REORG.TODO/catgets/test-gencat.sh b/REORG.TODO/catgets/test-gencat.sh new file mode 100755 index 0000000000..a6e0bc4815 --- /dev/null +++ b/REORG.TODO/catgets/test-gencat.sh @@ -0,0 +1,53 @@ +#!/bin/sh +# Test escape character handling in gencat. +# Copyright (C) 2000-2017 Free Software Foundation, Inc. +# This file is part of the GNU C Library. + +# The GNU C Library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. + +# The GNU C Library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with the GNU C Library; if not, see +# <http://www.gnu.org/licenses/>. + +set -e + +common_objpfx=$1 +test_program_cmd_before_env=$2 +run_program_env=$3 +test_program_cmd_after_env=$4 + +# Run the test program. +${test_program_cmd_before_env} \ + ${run_program_env} \ + NLSPATH=${common_objpfx}catgets/%N.%c.cat LC_ALL=ja_JP.SJIS \ + ${test_program_cmd_after_env} \ + > ${common_objpfx}catgets/test-gencat.out + +# Compare with the expected result. +cmp - ${common_objpfx}catgets/test-gencat.out <<"EOF" +LC_MESSAGES = ja_JP.SJIS +sample1:ABCDEF: +sample2:日本語: +sample3:予定表: +sample4:TEST TAB: +sample5:機能 十種類: +double slash\ +another line +EOF +res=$? + +cat <<EOF | +#define AnotherSet 0x2 /* *standard input*:13 */ +#define AnotherFOO 0x1 /* *standard input*:14 */ +EOF +cmp ${common_objpfx}catgets/test-gencat.h - || res=1 + +exit $res diff --git a/REORG.TODO/catgets/test1.msg b/REORG.TODO/catgets/test1.msg new file mode 100644 index 0000000000..a868c53dcf --- /dev/null +++ b/REORG.TODO/catgets/test1.msg @@ -0,0 +1,5 @@ +$set 10 +$quote " +$ Pozn疥ka +Welcome Vtejte v testovacm programu +Author Autor: Vladimir Michl diff --git a/REORG.TODO/catgets/test2.msg b/REORG.TODO/catgets/test2.msg new file mode 100644 index 0000000000..be54e75370 --- /dev/null +++ b/REORG.TODO/catgets/test2.msg @@ -0,0 +1,8 @@ +$set 42 +$ Two messages with auto-assigned numbers. +Number1 Auto-assigned #1 +Number2 Auto-assigned #2 +$ Three messages with explicitly assigned numbers. +0 Explicitly assigned #1 +1 Explicitly assigned #2 +2 Explicitly assigned #3 diff --git a/REORG.TODO/catgets/tst-catgets.c b/REORG.TODO/catgets/tst-catgets.c new file mode 100644 index 0000000000..7169ceb841 --- /dev/null +++ b/REORG.TODO/catgets/tst-catgets.c @@ -0,0 +1,100 @@ +#include <assert.h> +#include <mcheck.h> +#include <nl_types.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/resource.h> + + +static const char *msgs[] = +{ +#define INPUT(str) +#define OUTPUT(str) str, +#include <intl/msgs.h> +}; +#define nmsgs (sizeof (msgs) / sizeof (msgs[0])) + + +/* Test for unbounded alloca. */ +static int +do_bz17905 (void) +{ + char *buf; + struct rlimit rl; + nl_catd result __attribute__ ((unused)); + + const int sz = 1024 * 1024; + + getrlimit (RLIMIT_STACK, &rl); + rl.rlim_cur = sz; + setrlimit (RLIMIT_STACK, &rl); + + buf = malloc (sz + 1); + memset (buf, 'A', sz); + buf[sz] = '\0'; + setenv ("NLSPATH", buf, 1); + + result = catopen (buf, NL_CAT_LOCALE); + assert (result == (nl_catd) -1); + + free (buf); + return 0; +} + +#define ROUNDS 5 + +static int +do_test (void) +{ + int rnd; + int result = 0; + + mtrace (); + + /* We do this a few times to stress the memory handling. */ + for (rnd = 0; rnd < ROUNDS; ++rnd) + { + nl_catd cd = catopen ("libc", 0); + size_t cnt; + + if (cd == (nl_catd) -1) + { + printf ("cannot load catalog: %m\n"); + result = 1; + break; + } + + /* Go through all the messages and compare the result. */ + for (cnt = 0; cnt < nmsgs; ++cnt) + { + char *trans; + + trans = catgets (cd, 1, 1 + cnt, + "+#+# if this comes backs it's an error"); + + if (trans == NULL) + { + printf ("catgets return NULL for %zd\n", cnt); + result = 1; + } + else if (strcmp (trans, msgs[cnt]) != 0 && msgs[cnt][0] != '\0') + { + printf ("expected \"%s\", got \"%s\"\n", msgs[cnt], trans); + result = 1; + } + } + + if (catclose (cd) != 0) + { + printf ("catclose failed: %m\n"); + result = 1; + } + } + + result += do_bz17905 (); + return result; +} + +#define TEST_FUNCTION do_test () +#include "../test-skeleton.c" diff --git a/REORG.TODO/catgets/xopen-msg.awk b/REORG.TODO/catgets/xopen-msg.awk new file mode 100644 index 0000000000..547025fb81 --- /dev/null +++ b/REORG.TODO/catgets/xopen-msg.awk @@ -0,0 +1,72 @@ +# xopen-msg.awk - Convert Uniforum style .po file to X/Open style .msg file +# Copyright (C) 2012-2017 Free Software Foundation, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see <http://www.gnu.org/licenses/>. +# +# +# The first directive in the .msg should be the definition of the +# message set number. We use always set number 1. +# +BEGIN { + print "$set 1 # Automatically created by xopen-msg.awk" + num = 0 +} + +# +# The .msg file contains, other then the .po file, only the translations +# but each given a unique ID. Starting from 1 and incrementing by 1 for +# each message we assign them to the messages. +# It is important that the .po file used to generate the ../intl/msg.h file +# (with po2test.awk) is the same as the one used here. (At least the order +# of declarations must not be changed.) +# +function output_message() { + # Ignore messages containing <PRI.*> which would have to be replaced + # by the correct format depending on the word size + if (msg && msg !~ /<PRI.*>/) { + if (msgtype == "msgid") { + # We copy the original message as a comment into the .msg file. + gsub(/\n/, "\n$ ", msg) + printf "$ Original Message: %s\n", msg + } else { + gsub(/\n/, "\\\n", msg) + printf "%d %s\n", ++num, msg + } + } + msg = 0 +} + +$1 ~ "msg(id|str)" { + # Output collected message + output_message() + # Collect next message + msgtype = $1 + sub(/^msg(id|str)[ \t]*"/, "", $0) + sub(/"$/, "", $0) + msg = $0 + next +} + +/^".*"/ { + # Append to current message + sub(/^"/, "", $0) + sub(/"$/, "", $0) + msg = msg "\n" $0 + next +} + +END { + # Output last collected message + output_message() +} |