summaryrefslogtreecommitdiff
path: root/posix
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2000-05-24 20:22:51 +0000
committerUlrich Drepper <drepper@redhat.com>2000-05-24 20:22:51 +0000
commitacb5ee2e561276d64c6e26ef4b82f59a4db5ae90 (patch)
tree1f7ebfcaf8bf2874ae5cdb6348205dccfd9499c2 /posix
parentb7cbee1cb029f6471aa069552a69f04a3d1b4d70 (diff)
downloadglibc-acb5ee2e561276d64c6e26ef4b82f59a4db5ae90.tar.gz
Update.
2000-05-24 Ulrich Drepper <drepper@redhat.com> * locale/programs/ld-collate.c (struct element_t): Add mbseqorder and wcseqorder members. (struct locale_collate_t): Likewise. (collate_finish): Assign collation sequence value to each character. Create tables for output. (collate_output): Write out tables with collation sequence information. * locale/C-collate.c: Provide C locale data for collation sequence table. * locale/langinfo.h: Add _NL_COLLATE_COLLSEQMB and _NL_COLLATE_COLLSEQWC. * locale/categories.def: Add entries for _NL_COLLATE_COLLSEQMB and _NL_COLLATE_COLLSEQWC. * posix/fnmatch.c: Define SUFFIX and WIDE_CHAR_VERSION before include fnmatch_loop.c. * posix/fnmatch_loop.c: Don't use strcoll while determining whether character is matched by range expression. Use collation sequence table. Outside glibc fall back on simple character value comparison.
Diffstat (limited to 'posix')
-rw-r--r--posix/fnmatch.c15
-rw-r--r--posix/fnmatch_loop.c135
2 files changed, 132 insertions, 18 deletions
diff --git a/posix/fnmatch.c b/posix/fnmatch.c
index 18abf5da27..c4b11080fe 100644
--- a/posix/fnmatch.c
+++ b/posix/fnmatch.c
@@ -48,6 +48,15 @@
# include <wctype.h>
#endif
+/* We need some of the locale data (the collation sequence information)
+ but there is no interface to get this information in general. Therefore
+ we support a correct implementation only in glibc. */
+#ifdef _LIBC
+# include "../locale/localeinfo.h"
+
+# define CONCAT(a,b) __CONCAT(a,b)
+#endif
+
/* Comment out all this code if we are using the GNU C Library, and are not
actually compiling the library itself. This code is part of the GNU C
Library, but also included in many other GNU distributions. Compiling
@@ -192,6 +201,7 @@ __wcschrnul (s, c)
# define STRCHR(S, C) strchr (S, C)
# define STRCHRNUL(S, C) __strchrnul (S, C)
# define STRCOLL(S1, S2) strcoll (S1, S2)
+# define SUFFIX MB
# include "fnmatch_loop.c"
@@ -209,7 +219,10 @@ __wcschrnul (s, c)
# define BTOWC(C) (C)
# define STRCHR(S, C) wcschr (S, C)
# define STRCHRNUL(S, C) __wcschrnul (S, C)
-# define STRCOLL(S1, S2) wcscoll (S1, S2)
+# define STRCOLL(S1, S2) wcscoll (S1, S2)
+# define SUFFIX WC
+# define WIDE_CHAR_VERSION 1
+
# undef IS_CHAR_CLASS
# ifdef _LIBC
diff --git a/posix/fnmatch_loop.c b/posix/fnmatch_loop.c
index 5f6c05710e..831bd0631a 100644
--- a/posix/fnmatch_loop.c
+++ b/posix/fnmatch_loop.c
@@ -31,6 +31,16 @@ FCT (pattern, string, no_leading_period, flags)
{
register const CHAR *p = pattern, *n = string;
register UCHAR c;
+#ifdef _LIBC
+ const UCHAR *collseq = (const UCHAR *)
+ _NL_CURRENT(LC_COLLATE, CONCAT(_NL_COLLATE_COLLSEQ,SUFFIX));
+# ifdef WIDE_CHAR_VERSION
+ const wint_t *names = (const wint_t *)
+ _NL_CURRENT (LC_COLLATE, _NL_COLLATE_NAMES);
+ size_t size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_HASH_SIZE);
+ size_t layers = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_HASH_LAYERS);
+# endif
+#endif
while ((c = *p++) != L('\0'))
{
@@ -210,9 +220,9 @@ FCT (pattern, string, no_leading_period, flags)
/* Leave room for the null. */
CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
size_t c1 = 0;
-# if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
+#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
wctype_t wt;
-# endif
+#endif
const CHAR *startp = p;
for (;;)
@@ -240,7 +250,7 @@ FCT (pattern, string, no_leading_period, flags)
}
str[c1] = L('\0');
-# if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
+#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
wt = IS_CHAR_CLASS (str);
if (wt == 0)
/* Invalid character class name. */
@@ -248,7 +258,7 @@ FCT (pattern, string, no_leading_period, flags)
if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
goto matched;
-# else
+#else
if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n))
|| (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n))
|| (STREQ (str, L("blank")) && ISBLANK ((UCHAR) *n))
@@ -262,7 +272,7 @@ FCT (pattern, string, no_leading_period, flags)
|| (STREQ (str, L("upper")) && ISUPPER ((UCHAR) *n))
|| (STREQ (str, L("xdigit")) && ISXDIGIT ((UCHAR) *n)))
goto matched;
-# endif
+#endif
}
else if (c == L('\0'))
/* [ (unterminated) loses. */
@@ -279,27 +289,117 @@ FCT (pattern, string, no_leading_period, flags)
if (c == L('-') && *p != L(']'))
{
- /* It is a range. */
- CHAR lo[2];
- CHAR fc[2];
+#if _LIBC
+ /* We have to find the collation sequence
+ value for C. Collation sequence is nothing
+ we can regularly access. The sequence
+ value is defined by the order in which the
+ definitions of the collation values for the
+ various characters appear in the source
+ file. A strange concept, nowhere
+ documented. */
+ int32_t fseqidx;
+ int32_t lseqidx;
UCHAR cend = *p++;
+# ifdef WIDE_CHAR_VERSION
+ size_t cnt;
+# endif
+
if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
cend = *p++;
if (cend == L('\0'))
return FNM_NOMATCH;
- lo[0] = cold;
- lo[1] = L('\0');
- fc[0] = fn;
- fc[1] = L('\0');
- if (STRCOLL (lo, fc) <= 0)
+# ifdef WIDE_CHAR_VERSION
+ /* Search in the `names' array for the characters. */
+ fseqidx = fn % size;
+ cnt = 0;
+ while (names[fseqidx] != fn)
{
- CHAR hi[2];
- hi[0] = FOLD (cend);
- hi[1] = L('\0');
- if (STRCOLL (fc, hi) <= 0)
+ if (++cnt == layers)
+ /* XXX We don't know anything about
+ the character we are supposed to
+ match. This means we are failing. */
+ goto range_not_matched;
+
+ fseqidx += size;
+ }
+ lseqidx = cold % size;
+ cnt = 0;
+ while (names[lseqidx] != cold)
+ {
+ if (++cnt == layers)
+ {
+ lseqidx = -1;
+ break;
+ }
+ lseqidx += size;
+ }
+# else
+ fseqidx = fn;
+ lseqidx = cold;
+# endif
+
+ /* XXX It is not entirely clear to me how to handle
+ characters which are not mentioned in the
+ collation specification. */
+ if (
+# ifdef WIDE_CHAR_VERSION
+ lseqidx == -1 ||
+# endif
+ collseq[lseqidx] <= collseq[fseqidx])
+ {
+ /* We have to look at the upper bound. */
+ int32_t hseqidx;
+
+ cend = FOLD (cend);
+# ifdef WIDE_CHAR_VERSION
+ hseqidx = cend % size;
+ cnt = 0;
+ while (names[hseqidx] != cend)
+ {
+ if (++cnt == layers)
+ {
+ /* Hum, no information about the upper
+ bound. The matching succeeds if the
+ lower bound is matched exactly. */
+ if (lseqidx == -1 || cold != fn)
+ goto range_not_matched;
+
+ goto matched;
+ }
+ }
+# else
+ hseqidx = cend;
+# endif
+
+ if (
+# ifdef WIDE_CHAR_VERSION
+ (lseqidx == -1
+ && collseq[fseqidx] == collseq[hseqidx]) ||
+# endif
+ collseq[fseqidx] <= collseq[hseqidx])
goto matched;
}
+# ifdef WIDE_CHAR_VERSION
+ range_not_matched:
+# endif
+#else
+ /* We use a boring value comparison of the character
+ values. This is better than comparing using
+ `strcoll' since the latter would have surprising
+ and sometimes fatal consequences. */
+ UCHAR cend = *p++;
+
+ if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
+ cend = *p++;
+ if (cend == L('\0'))
+ return FNM_NOMATCH;
+
+ /* It is a range. */
+ if (cold <= fc && fc <= c)
+ goto matched;
+#endif
c = *p++;
}
@@ -371,3 +471,4 @@ FCT (pattern, string, no_leading_period, flags)
#undef STRCOLL
#undef L
#undef BTOWC
+#undef SUFFIX