diff options
author | Ulrich Drepper <drepper@redhat.com> | 1999-12-25 23:41:39 +0000 |
---|---|---|
committer | Ulrich Drepper <drepper@redhat.com> | 1999-12-25 23:41:39 +0000 |
commit | 450bf66ef223ad83e7032920652445817865770b (patch) | |
tree | 1bfd6848a2453f4ad2c9cdca8e4e4c817e995798 /locale | |
parent | ce40141c6b68a40687f460450e1d07a0a78e1559 (diff) | |
download | glibc-450bf66ef223ad83e7032920652445817865770b.tar.gz |
Update.
1999-12-25 Ulrich Drepper <drepper@cygnus.com>
* locale/C-collate.c (_nl_C_LC_COLLATE): Add one more entry for the
indirect table.
* locale/langinfo.h: Likewise.
* locale/categories.def: Likewise. Remove reference to postload
functions.
* locale/lc-collate.c (_nl_postload_collate): Removed. Also remove
__collate_tablemb, __collate_weightmb, and __collate_extramb.
* locale/localeinfo.h: Remove declaration for removed variables above.
Remove prototype for _nl_get_era_entry.
* locale/weight.h: Complete rewrite for new collate implementation.
* locale/programs/ld-collate.c: Many changes to make output file
usable in strxfrm/strcoll.
* string/strxfrm.c: Complete rewrite for new collate implementation.
* wcsmbs/wcsxfrm.c: Don't use strxfrm.c, implement dummy implementation
locally.
1999-12-25 Shinya Hanataka <hanataka@abyss.rim.or.jp>
* locale/programs/ld-ctype.c (allocate_arrays): Correctly assign
transformation values for chars >255.
* wctype/wctrans.c: Return pointer unmodified.
Diffstat (limited to 'locale')
-rw-r--r-- | locale/C-collate.c | 3 | ||||
-rw-r--r-- | locale/categories.def | 3 | ||||
-rw-r--r-- | locale/langinfo.h | 1 | ||||
-rw-r--r-- | locale/lc-collate.c | 18 | ||||
-rw-r--r-- | locale/localeinfo.h | 9 | ||||
-rw-r--r-- | locale/programs/ld-collate.c | 121 | ||||
-rw-r--r-- | locale/programs/ld-ctype.c | 6 | ||||
-rw-r--r-- | locale/weight.h | 251 |
8 files changed, 166 insertions, 246 deletions
diff --git a/locale/C-collate.c b/locale/C-collate.c index 94f6e0f60f..7875f5de22 100644 --- a/locale/C-collate.c +++ b/locale/C-collate.c @@ -150,12 +150,13 @@ const struct locale_data _nl_C_LC_COLLATE = _nl_C_name, NULL, 0, 0, /* no file mapped */ UNDELETABLE, - 5, + 6, { { word: 0 }, { string: NULL }, { string: NULL }, { string: NULL }, + { string: NULL }, { string: NULL } } }; diff --git a/locale/categories.def b/locale/categories.def index 06d79ed202..40fc74213c 100644 --- a/locale/categories.def +++ b/locale/categories.def @@ -47,7 +47,8 @@ DEFINE_CATEGORY DEFINE_ELEMENT (_NL_COLLATE_TABLEMB, "collate-tablemb", std, string) DEFINE_ELEMENT (_NL_COLLATE_WEIGHTMB, "collate-weightmb", std, string) DEFINE_ELEMENT (_NL_COLLATE_EXTRAMB, "collate-extramb", std, string) - ), _nl_postload_collate) + DEFINE_ELEMENT (_NL_COLLATE_INDIRECTMB, "collate-indirectmb", std, string) + ), NO_POSTLOAD) /* The actual definition of ctype is meaningless here. It is hard coded in diff --git a/locale/langinfo.h b/locale/langinfo.h index ff48fab35f..3f39298c17 100644 --- a/locale/langinfo.h +++ b/locale/langinfo.h @@ -235,6 +235,7 @@ enum _NL_COLLATE_TABLEMB, _NL_COLLATE_WEIGHTMB, _NL_COLLATE_EXTRAMB, + _NL_COLLATE_INDIRECTMB, _NL_NUM_LC_COLLATE, /* LC_CTYPE category: character classification. diff --git a/locale/lc-collate.c b/locale/lc-collate.c index 02262b5ce2..623be06e26 100644 --- a/locale/lc-collate.c +++ b/locale/lc-collate.c @@ -22,21 +22,3 @@ _NL_CURRENT_DEFINE (LC_COLLATE); - -const int32_t *__collate_tablemb; -const unsigned char *__collate_weightmb; -const unsigned char *__collate_extramb; - -/* We are called after loading LC_CTYPE data to load it into - the variables used by the collation functions and regex. */ -void -_nl_postload_collate (void) -{ -#define paste(a,b) paste1(a,b) -#define paste1(a,b) a##b -#define current(x) _NL_CURRENT (LC_COLLATE, paste(_NL_COLLATE_,x)) - - __collate_tablemb = (const int32_t *) current (TABLEMB); - __collate_weightmb = (const unsigned char *) current (WEIGHTMB); - __collate_extramb = (const unsigned char *) current (EXTRAMB); -} diff --git a/locale/localeinfo.h b/locale/localeinfo.h index 078e205f4f..ced96ac4a9 100644 --- a/locale/localeinfo.h +++ b/locale/localeinfo.h @@ -165,9 +165,6 @@ extern void _nl_unload_locale (struct locale_data *locale); extern void _nl_remove_locale (int locale, struct locale_data *data); -/* initialize `era' entries */ -extern void _nl_init_era_entries (void); - /* Return `era' entry which corresponds to TP. Used in strftime. */ extern struct era_entry *_nl_get_era_entry (const struct tm *tp); @@ -180,10 +177,4 @@ extern const char *_nl_get_alt_digit (unsigned int number); /* Similar, but now for wide characters. */ extern const wchar_t *_nl_get_walt_digit (unsigned int number); - -/* Global variables for LC_COLLATE category data. */ -extern const int32_t *__collate_tablemb; -extern const unsigned char *__collate_extrweightmb; -extern const unsigned char *__collate_extramb; - #endif /* localeinfo.h */ diff --git a/locale/programs/ld-collate.c b/locale/programs/ld-collate.c index 65229275ff..c629bd477a 100644 --- a/locale/programs/ld-collate.c +++ b/locale/programs/ld-collate.c @@ -137,9 +137,6 @@ struct locale_collate_t /* To make handling of errors easier we have another section. */ struct section_list error_section; - /* Number of sorting rules given in order_start line. */ - uint32_t nrules; - /* Start of the order list. */ struct element_t *start; @@ -176,7 +173,7 @@ struct locale_collate_t /* We have a few global variables which are used for reading all LC_COLLATE category descriptions in all files. */ -static int nrules; +static uint32_t nrules; /* These are definitions used by some of the functions for handling @@ -426,7 +423,7 @@ read_directions (struct linereader *ldfile, struct token *arg, if (! warned) { lr_error (ldfile, _("\ -%s: `%s' mentioned twice in definition of weight %d in category `%s'"), +%s: `%s' mentioned twice in definition of weight %d"), "LC_COLLATE", "position", cnt + 1); } } @@ -450,7 +447,13 @@ read_directions (struct linereader *ldfile, struct token *arg, /* See whether we have to increment the counter. */ if (arg->tok != tok_comma && rules[cnt] != 0) - ++cnt; + { + /* Add the default `forward' if we have seen only `position'. */ + if (rules[cnt] == sort_position) + rules[cnt] = sort_position | sort_forward; + + ++cnt; + } if (arg->tok == tok_eof || arg->tok == tok_eol) /* End of line or file, so we exit the loop. */ @@ -876,7 +879,7 @@ insert_value (struct linereader *ldfile, struct token *arg, elem->nmbs = seq->nbytes; } - if (elem->wcs == NULL && seq != ILLEGAL_CHAR_VALUE) + if (elem->wcs == NULL && wc != ILLEGAL_CHAR_VALUE) { uint32_t wcs[2] = { wc, 0 }; @@ -1552,7 +1555,7 @@ collate_finish (struct localedef_t *locale, struct charmap_t *charmap) } -static inline int32_t +static int32_t output_weight (struct obstack *pool, struct locale_collate_t *collate, struct element_t *elem) { @@ -1575,25 +1578,18 @@ output_weight (struct obstack *pool, struct locale_collate_t *collate, int len = 0; int i; - /* Add the direction. */ - obstack_1grow (pool, elem->section->rules[cnt]); - for (i = 0; i < elem->weights[cnt].cnt; ++i) - /* Encode the weight value. */ - if (elem->weights[cnt].w[i] == NULL) - { - /* This entry was IGNORE. */ - buf[len++] = IGNORE_CHAR; - } - else + /* Encode the weight value. We do nothing for IGNORE entries. */ + if (elem->weights[cnt].w[i] != NULL) len += utf8_encode (&buf[len], elem->weights[cnt].w[i]->mborder[cnt]); /* And add the buffer content. */ + obstack_1grow (pool, len); obstack_grow (pool, buf, len); } - return retval; + return retval | ((elem->section->ruleidx & 0x7f) << 24); } @@ -1611,11 +1607,13 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap, int32_t tablemb[256]; struct obstack weightpool; struct obstack extrapool; + struct obstack indirectpool; struct section_list *sect; int i; obstack_init (&weightpool); obstack_init (&extrapool); + obstack_init (&indirectpool); data.magic = LIMAGIC (LC_COLLATE); data.n = nelems; @@ -1629,7 +1627,7 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap, cnt = 0; assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_NRULES)); - iov[2 + cnt].iov_base = &collate->nrules; + iov[2 + cnt].iov_base = &nrules; iov[2 + cnt].iov_len = sizeof (uint32_t); idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len; ++cnt; @@ -1638,7 +1636,12 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap, for (sect = collate->sections, i = 0; sect != NULL; sect = sect->next) if (sect->ruleidx == i) { - obstack_grow (&weightpool, sect->rules, nrules); + int j; + + obstack_make_room (&weightpool, nrules); + + for (j = 0; j < nrules; ++j) + obstack_1grow_fast (&weightpool, sect->rules[j]); ++i; } /* And align the output. */ @@ -1674,7 +1677,7 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap, && collate->mbheads[ch]->nmbs == 1) { tablemb[ch] = output_weight (&weightpool, collate, - collate->mbheads[ch]); + collate->mbheads[ch]); } else { @@ -1719,38 +1722,60 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap, { int i; - /* More than one consecutive entry. We mark this by having - a negative index into the weight table. */ - weightidx = -weightidx; - /* Now add first the initial byte sequence. */ added = ((sizeof (int32_t) + 1 + 1 + 2 * (runp->nmbs - 1) + __alignof__ (int32_t) - 1) & ~(__alignof__ (int32_t) - 1)); obstack_make_room (&extrapool, added); + /* More than one consecutive entry. We mark this by having + a negative index into the indirect table. */ if (sizeof (int32_t) == sizeof (int)) - obstack_int_grow_fast (&extrapool, weightidx); + obstack_int_grow_fast (&extrapool, + obstack_object_size (&indirectpool) + / sizeof (int32_t)); else - obstack_grow (&extrapool, &weightidx, sizeof (int32_t)); - obstack_1grow_fast (&extrapool, runp->section->ruleidx); + { + int32_t i = (obstack_object_size (&indirectpool) + / sizeof (int32_t)); + obstack_grow (&extrapool, &i, sizeof (int32_t)); + } obstack_1grow_fast (&extrapool, runp->nmbs - 1); for (i = 1; i < runp->nmbs; ++i) obstack_1grow_fast (&extrapool, runp->mbs[i]); - /* Now find the end of the consecutive sequence. */ - do - runp = runp->next; - while (runp->mbnext != NULL - && runp->nmbs == runp->mbnext->nmbs - && memcmp (runp->mbs, runp->mbnext->mbs, - runp->nmbs - 1) == 0 - && (runp->mbs[runp->nmbs - 1] + 1 - == runp->mbnext->mbs[runp->nmbs - 1])); - - /* And add the end by sequence. Without length this time. */ + /* Now find the end of the consecutive sequence and + add all the indeces in the indirect pool. */ + while (1) + { + if (sizeof (int32_t) == sizeof (int)) + obstack_int_grow_fast (&extrapool, weightidx); + else + obstack_grow (&extrapool, &weightidx, sizeof (int32_t)); + + runp = runp->next; + if (runp->mbnext == NULL + || runp->nmbs != runp->mbnext->nmbs + || memcmp (runp->mbs, runp->mbnext->mbs, + runp->nmbs - 1) != 0 + || (runp->mbs[runp->nmbs - 1] + 1 + != runp->mbnext->mbs[runp->nmbs - 1])) + break; + + /* Insert the weight. */ + weightidx = output_weight (&weightpool, collate, runp); + } + + /* And add the end byte sequence. Without length this + time. */ for (i = 1; i < runp->nmbs; ++i) obstack_1grow_fast (&extrapool, runp->mbs[i]); + + weightidx = output_weight (&weightpool, collate, runp); + if (sizeof (int32_t) == sizeof (int)) + obstack_int_grow_fast (&extrapool, weightidx); + else + obstack_grow (&extrapool, &weightidx, sizeof (int32_t)); } else { @@ -1768,7 +1793,6 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap, obstack_int_grow_fast (&extrapool, weightidx); else obstack_grow (&extrapool, &weightidx, sizeof (int32_t)); - obstack_1grow_fast (&extrapool, runp->section->ruleidx); obstack_1grow_fast (&extrapool, runp->nmbs - 1); for (i = 1; i < runp->nmbs; ++i) obstack_1grow_fast (&extrapool, runp->mbs[i]); @@ -1835,6 +1859,12 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap, idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len; ++cnt; + assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_INDIRECTMB)); + iov[2 + cnt].iov_len = obstack_object_size (&indirectpool); + iov[2 + cnt].iov_base = obstack_finish (&indirectpool); + idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len; + ++cnt; + assert (cnt == _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE)); @@ -1842,6 +1872,7 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap, obstack_free (&weightpool, NULL); obstack_free (&extrapool, NULL); + obstack_free (&indirectpool, NULL); } @@ -2291,16 +2322,16 @@ error while adding equivalent collating symbol")); uint32_t cnt; /* This means we have exactly one rule: `forward'. */ - if (collate->nrules > 1) + if (nrules > 1) lr_error (ldfile, _("\ %s: invalid number of sorting rules"), "LC_COLLATE"); else - collate->nrules = 1; + nrules = 1; sp->rules = obstack_alloc (&collate->mempool, (sizeof (enum coll_sort_rule) - * collate->nrules)); - for (cnt = 0; cnt < collate->nrules; ++cnt) + * nrules)); + for (cnt = 0; cnt < nrules; ++cnt) sp->rules[cnt] = sort_forward; /* Next line. */ diff --git a/locale/programs/ld-ctype.c b/locale/programs/ld-ctype.c index 86d086021d..d98b7bdfd2 100644 --- a/locale/programs/ld-ctype.c +++ b/locale/programs/ld-ctype.c @@ -3073,10 +3073,8 @@ Computing table size for character classes might take a while..."), while (idx2 < ctype->map_collection_act[idx]) { if (ctype->map_collection[idx][idx2] != 0) - *find_idx (ctype, &ctype->map32[idx], - &ctype->map_collection_max[idx], - &ctype->map_collection_act[idx], - ctype->names[idx2]) = ctype->map_collection[idx][idx2]; + ctype->map32[idx][ctype->charnames[idx2]] = + ctype->map_collection[idx][idx2]; ++idx2; } } diff --git a/locale/weight.h b/locale/weight.h index 6e31e2d495..356ee57855 100644 --- a/locale/weight.h +++ b/locale/weight.h @@ -17,191 +17,106 @@ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#include <alloca.h> -#include <errno.h> -#include <langinfo.h> -#include "localeinfo.h" - -#ifndef STRING_TYPE -# error STRING_TYPE not defined -#endif +/* Find index of weight. */ +static inline int32_t +findidx (const unsigned char **cpp) +{ + int_fast32_t i = table[*(*cpp)++]; + const unsigned char *cp; -#ifndef USTRING_TYPE -# error USTRING_TYPE not defined -#endif + if (i >= 0) + /* This is an index into the weight table. Cool. */ + return i; -typedef struct weight_t -{ - struct weight_t *prev; - struct weight_t *next; - struct data_pair + /* Oh well, more than one sequence starting with this byte. + Search for the correct one. */ + cp = &extra[-i]; + while (1) { - int number; - const uint32_t *value; - } data[0]; -} weight_t; - - -/* The following five macros grant access to the values in the - collate locale file that do not depend on byte order. */ -#ifndef USE_IN_EXTENDED_LOCALE_MODEL -# define collate_nrules \ - (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES)) -# define collate_hash_size \ - (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_HASH_SIZE)) -# define collate_hash_layers \ - (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_HASH_LAYERS)) -# define collate_undefined \ - (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_UNDEFINED_WC)) -# define collate_rules \ - ((uint32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_RULES)) - -static __inline void get_weight (const STRING_TYPE **str, weight_t *result); -static __inline void -get_weight (const STRING_TYPE **str, weight_t *result) -#else -# define collate_nrules \ - current->values[_NL_ITEM_INDEX (_NL_COLLATE_NRULES)].word -# define collate_hash_size \ - current->values[_NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE)].word -# define collate_hash_layers \ - current->values[_NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS)].word -# define collate_undefined \ - current->values[_NL_ITEM_INDEX (_NL_COLLATE_UNDEFINED_WC)].word -# define collate_rules \ - ((uint32_t *) current->values[_NL_ITEM_INDEX (_NL_COLLATE_RULES)].string) - -static __inline void get_weight (const STRING_TYPE **str, weight_t *result, - struct locale_data *current, - const uint32_t *__collate_tablewc, - const uint32_t *__collate_extrawc); -static __inline void -get_weight (const STRING_TYPE **str, weight_t *result, - struct locale_data *current, const uint32_t *__collate_tablewc, - const uint32_t *__collate_extrawc) -#endif -{ - unsigned int ch = *((USTRING_TYPE *) (*str))++; - size_t slot; + size_t nhere; + const unsigned char *usrc = *cpp; - if (sizeof (STRING_TYPE) == 1) - slot = ch * (collate_nrules + 1); - else - { - const size_t level_size = collate_hash_size * (collate_nrules + 1); - size_t level; + /* The first thing is the index. */ + i = *((int32_t *) cp); + cp += sizeof (int32_t); - slot = (ch % collate_hash_size) * (collate_nrules + 1); + /* Next is the length of the byte sequence. These are always + short byte sequences so there is no reason to call any + function (even if they are inlined). */ + nhere = *cp++; - level = 0; - while (__collate_tablewc[slot] != (uint32_t) ch) + if (i >= 0) { - if (__collate_tablewc[slot + 1] == 0 - || ++level >= collate_hash_layers) - { - size_t idx = collate_undefined; - size_t cnt; + /* It is a single character. If it matches we found our + index. Note that at the end of each list there is an + entry of length zero which represents the single byte + sequence. The first (and here only) byte was tested + already. */ + size_t cnt; - for (cnt = 0; cnt < collate_nrules; ++cnt) - { - result->data[cnt].number = __collate_extrawc[idx++]; - result->data[cnt].value = &__collate_extrawc[idx]; - idx += result->data[cnt].number; - } - /* The Unix standard requires that a character outside - the domain is signalled by setting `errno'. */ - __set_errno (EINVAL); - return; - } - slot += level_size; - } - } + for (cnt = 0; cnt < nhere; ++cnt) + if (cp[cnt] != usrc[cnt]) + break; - if (__collate_tablewc[slot + 1] != (uint32_t) FORWARD_CHAR) - { - /* We have a simple form. One value for each weight. */ - size_t cnt; + if (cnt == nhere) + { + /* Found it. */ + *cpp += nhere; + return i; + } - for (cnt = 0; cnt < collate_nrules; ++cnt) - { - result->data[cnt].number = 1; - result->data[cnt].value = &__collate_tablewc[slot + 1 + cnt]; + /* Up to the next entry. */ + cp += nhere; } - return; - } + else + { + /* This is a range of characters. First decide whether the + current byte sequence lies in the range. */ + size_t cnt; + size_t offset = 0; - /* We now look for any collation element which starts with CH. - There might none, but the last list member is a catch-all case - because it is simple the character CH. The value of this entry - might be the same as UNDEFINED. */ - slot = __collate_tablewc[slot + 2]; + for (cnt = 0; cnt < nhere; ++cnt) + if (cp[cnt] != usrc[cnt]) + break; - while (1) - { - size_t idx; + if (cnt != nhere) + { + if (cp[cnt] > usrc[cnt]) + { + /* Cannot be in this range. */ + cp += 2 * nhere; + continue; + } - /* This is a comparison between a uint32_t array (aka wchar_t) and - an 8-bit string. */ - for (idx = 0; __collate_extrawc[slot + 2 + idx] != 0; ++idx) - if (__collate_extrawc[slot + 2 + idx] != (uint32_t) (*str)[idx]) - break; + /* Test against the end of the range. */ + for (cnt = 0; cnt < nhere; ++cnt) + if (cp[nhere + cnt] != usrc[cnt]) + break; - /* When the loop finished with all character of the collation - element used, we found the longest prefix. */ - if (__collate_extrawc[slot + 2 + idx] == 0) - { - size_t cnt; + if (cnt != nhere && cp[nhere + cnt] < usrc[cnt]) + { + /* Cannot be in this range. */ + cp += 2 * nhere; + continue; + } - *str += idx; - idx += slot + 3; - for (cnt = 0; cnt < collate_nrules; ++cnt) - { - result->data[cnt].number = __collate_extrawc[idx++]; - result->data[cnt].value = &__collate_extrawc[idx]; - idx += result->data[cnt].number; + /* This range matches the next characters. Now find + the offset in the indirect table. */ + for (cnt = 0; cp[cnt] == usrc[cnt]; ++cnt); + + do + { + offset <<= 8; + offset += usrc[cnt] - cp[cnt]; + } + while (++cnt < nhere); } - return; - } - /* To next entry in list. */ - slot += __collate_extrawc[slot]; + *cpp += nhere; + return offset; + } } -} - -/* To process a string efficiently we retrieve all information about - the string at once. The following macro constructs a double linked - list of this information. It is a macro because we use `alloca' - and we use a double linked list because of the backward collation - order. - - We have this strange extra macro since the functions which use the - given locale (not the global one) cannot use the global tables. */ -#ifndef USE_IN_EXTENDED_LOCALE_MODEL -# define call_get_weight(strp, newp) get_weight ((strp), (newp)) -#else -# define call_get_weight(strp, newp) \ - get_weight ((strp), (newp), current, collate_table, collate_extra) -#endif - -#define get_string(str, forw, backw) \ - do \ - { \ - weight_t *newp; \ - while (*str != '\0') \ - { \ - newp = (weight_t *) alloca (sizeof (weight_t) \ - + (collate_nrules \ - * sizeof (struct data_pair))); \ - \ - newp->prev = backw; \ - if (backw == NULL) \ - forw = newp; \ - else \ - backw->next = newp; \ - newp->next = NULL; \ - backw = newp; \ - call_get_weight (&str, newp); \ - } \ - } \ - while (0) + /* NOTREACHED */ + return 0x43219876; +} |