summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGreg Hudson <ghudson@mit.edu>2022-12-10 01:26:36 -0500
committerGreg Hudson <ghudson@mit.edu>2022-12-26 02:30:31 -0500
commit50b7ae4b971d2e7b9d16230de966ec5452a367c6 (patch)
tree395d01c23c34612287895867ea9a96338a5fd387
parentc5507dff5f5b2936937442b43f6c021b4f8a493d (diff)
downloadkrb5-50b7ae4b971d2e7b9d16230de966ec5452a367c6.tar.gz
Remove unused Unicode functions
-rw-r--r--src/include/k5-unicode.h29
-rw-r--r--src/include/k5-utf8.h74
-rw-r--r--src/lib/krb5/unicode/Makefile.in11
-rw-r--r--src/lib/krb5/unicode/ucdata/bidiapi.txt84
-rw-r--r--src/lib/krb5/unicode/ucdata/ucpgba.c755
-rw-r--r--src/lib/krb5/unicode/ucdata/ucpgba.h166
-rw-r--r--src/lib/krb5/unicode/ucdata/ucpgba.man97
-rw-r--r--src/lib/krb5/unicode/ucstr.c248
-rw-r--r--src/lib/krb5/unicode/ure/README212
-rw-r--r--src/lib/krb5/unicode/ure/ure.c2139
-rw-r--r--src/lib/krb5/unicode/ure/ure.h152
-rw-r--r--src/lib/krb5/unicode/ure/urestubs.c125
-rw-r--r--src/lib/krb5/unicode/utbm/README121
-rw-r--r--src/lib/krb5/unicode/utbm/utbm.c475
-rw-r--r--src/lib/krb5/unicode/utbm/utbm.h110
-rw-r--r--src/lib/krb5/unicode/utbm/utbmstub.c108
-rw-r--r--src/util/support/libkrb5support-fixed.exports1
-rw-r--r--src/util/support/t_utf8.c6
-rw-r--r--src/util/support/utf8.c343
19 files changed, 7 insertions, 5249 deletions
diff --git a/src/include/k5-unicode.h b/src/include/k5-unicode.h
index 45c1788b2..81c495f65 100644
--- a/src/include/k5-unicode.h
+++ b/src/include/k5-unicode.h
@@ -87,41 +87,12 @@
typedef krb5_ucs4 krb5_unicode;
-int krb5int_ucstrncmp(
- const krb5_unicode *,
- const krb5_unicode *,
- size_t);
-
-int krb5int_ucstrncasecmp(
- const krb5_unicode *,
- const krb5_unicode *,
- size_t);
-
-krb5_unicode *krb5int_ucstrnchr(
- const krb5_unicode *,
- size_t,
- krb5_unicode);
-
-krb5_unicode *krb5int_ucstrncasechr(
- const krb5_unicode *,
- size_t,
- krb5_unicode);
-
-void krb5int_ucstr2upper(
- krb5_unicode *,
- size_t);
-
#define KRB5_UTF8_NOCASEFOLD 0x0U
#define KRB5_UTF8_CASEFOLD 0x1U
#define KRB5_UTF8_ARG1NFC 0x2U
#define KRB5_UTF8_ARG2NFC 0x4U
#define KRB5_UTF8_APPROX 0x8U
-krb5_error_code krb5int_utf8_normalize(
- const krb5_data *,
- krb5_data **,
- unsigned);
-
int krb5int_utf8_normcmp(
const krb5_data *,
const krb5_data *,
diff --git a/src/include/k5-utf8.h b/src/include/k5-utf8.h
index 7cc8cda47..11949f9f3 100644
--- a/src/include/k5-utf8.h
+++ b/src/include/k5-utf8.h
@@ -73,9 +73,6 @@
typedef uint16_t krb5_ucs2;
typedef uint32_t krb5_ucs4;
-int krb5int_utf8_to_ucs2(const char *p, krb5_ucs2 *out);
-size_t krb5int_ucs2_to_utf8(krb5_ucs2 c, char *buf);
-
int krb5int_utf8_to_ucs4(const char *p, krb5_ucs4 *out);
size_t krb5int_ucs4_to_utf8(krb5_ucs4 c, char *buf);
@@ -96,49 +93,6 @@ int k5_utf16le_to_utf8(const uint8_t *utf16bytes, size_t nbytes,
int k5_utf8_to_utf16le(const char *utf8, uint8_t **utf16_out,
size_t *nbytes_out);
-/* returns the number of bytes in the UTF-8 string */
-size_t krb5int_utf8_bytes(const char *);
-/* returns the number of UTF-8 characters in the string */
-size_t krb5int_utf8_chars(const char *);
-/* returns the number of UTF-8 characters in the counted string */
-size_t krb5int_utf8c_chars(const char *, size_t);
-/* returns the length (in bytes) of the UTF-8 character */
-int krb5int_utf8_offset(const char *);
-/* returns the length (in bytes) indicated by the UTF-8 character */
-int krb5int_utf8_charlen(const char *);
-
-/* returns the length (in bytes) indicated by the UTF-8 character
- * also checks that shortest possible encoding was used
- */
-int krb5int_utf8_charlen2(const char *);
-
-/* copies a UTF-8 character and returning number of bytes copied */
-int krb5int_utf8_copy(char *, const char *);
-
-/* returns pointer of next UTF-8 character in string */
-char *krb5int_utf8_next( const char *);
-/* returns pointer of previous UTF-8 character in string */
-char *krb5int_utf8_prev( const char *);
-
-/* primitive ctype routines -- not aware of non-ascii characters */
-int krb5int_utf8_isascii( const char *);
-int krb5int_utf8_isalpha( const char *);
-int krb5int_utf8_isalnum( const char *);
-int krb5int_utf8_isdigit( const char *);
-int krb5int_utf8_isxdigit( const char *);
-int krb5int_utf8_isspace( const char *);
-
-/* span characters not in set, return bytes spanned */
-size_t krb5int_utf8_strcspn( const char* str, const char *set);
-/* span characters in set, return bytes spanned */
-size_t krb5int_utf8_strspn( const char* str, const char *set);
-/* return first occurrence of character in string */
-char *krb5int_utf8_strchr( const char* str, const char *chr);
-/* return first character of set in string */
-char *krb5int_utf8_strpbrk( const char* str, const char *set);
-/* reentrant tokenizer */
-char *krb5int_utf8_strtok( char* sp, const char* sep, char **last);
-
/* Optimizations */
extern const char krb5int_utf8_lentab[128];
extern const char krb5int_utf8_mintab[32];
@@ -157,38 +111,10 @@ extern const char krb5int_utf8_mintab[32];
(krb5int_utf8_mintab[KRB5_UTF8_BV(p) & 0x1f] & (p)[1])) ? \
l : 0)
-#define KRB5_UTF8_OFFSET(p) (KRB5_UTF8_ISASCII(p) \
- ? 1 : krb5int_utf8_offset((p)) )
-
-#define KRB5_UTF8_COPY(d,s) (KRB5_UTF8_ISASCII(s) \
- ? (*(d) = *(s), 1) : krb5int_utf8_copy((d),(s)))
-
-#define KRB5_UTF8_NEXT(p) (KRB5_UTF8_ISASCII(p) \
- ? (char *)(p)+1 : krb5int_utf8_next((p)))
-
-#define KRB5_UTF8_INCR(p) ((p) = KRB5_UTF8_NEXT(p))
-
-/* For symmetry */
-#define KRB5_UTF8_PREV(p) (krb5int_utf8_prev((p)))
-#define KRB5_UTF8_DECR(p) ((p)=KRB5_UTF8_PREV((p)))
-
/*
* these macros assume 'x' is an ASCII x
* and assume the "C" locale
*/
-#define KRB5_ASCII(c) (!((c) & 0x80))
-#define KRB5_SPACE(c) ((c) == ' ' || (c) == '\t' || (c) == '\n')
-#define KRB5_DIGIT(c) ((c) >= '0' && (c) <= '9')
-#define KRB5_LOWER(c) ((c) >= 'a' && (c) <= 'z')
#define KRB5_UPPER(c) ((c) >= 'A' && (c) <= 'Z')
-#define KRB5_ALPHA(c) (KRB5_LOWER(c) || KRB5_UPPER(c))
-#define KRB5_ALNUM(c) (KRB5_ALPHA(c) || KRB5_DIGIT(c))
-
-#define KRB5_LDH(c) (KRB5_ALNUM(c) || (c) == '-')
-
-#define KRB5_HEXLOWER(c) ((c) >= 'a' && (c) <= 'f')
-#define KRB5_HEXUPPER(c) ((c) >= 'A' && (c) <= 'F')
-#define KRB5_HEX(c) (KRB5_DIGIT(c) || \
- KRB5_HEXLOWER(c) || KRB5_HEXUPPER(c))
#endif /* K5_UTF8_H */
diff --git a/src/lib/krb5/unicode/Makefile.in b/src/lib/krb5/unicode/Makefile.in
index e23028df8..d7dc0f5f5 100644
--- a/src/lib/krb5/unicode/Makefile.in
+++ b/src/lib/krb5/unicode/Makefile.in
@@ -6,19 +6,15 @@ BUILDTOP=$(REL)..$(S)..$(S)..
##DOS##OBJFILE=..\$(OUTPRE)$(PREFIXDIR).lst
XXDIR = $(srcdir)/ucdata/
-XXHEADERS = ucdata.h ure.h uctable.h
-XXSRCS = ucdata.c ucgendat.c ure.c urestubs.c
+XXHEADERS = ucdata.h uctable.h
+XXSRCS = ucdata.c ucgendat.c
STLIBOBJS= \
ucdata.o \
- ure.o \
- urestubs.o \
ucstr.o
OBJS= \
$(OUTPRE)ucdata.$(OBJEXT) \
- $(OUTPRE)ure.$(OBJEXT) \
- $(OUTPRE)urestubs.$(OBJEXT) \
$(OUTPRE)ucstr.$(OBJEXT)
SRCS= \
@@ -57,9 +53,6 @@ ucgendat: ucgendat.o
##DOS## $(CP) $(srcdir)\ucdata\ucdata.c ucdata.c
##DOS## $(CP) $(srcdir)\ucdata\ucgendat.c ucgendat.c
##DOS## $(CP) $(srcdir)\ucdata\uctable.h uctable.h
-##DOS## $(CP) $(srcdir)\ure\ure.h ure.h
-##DOS## $(CP) $(srcdir)\ure\ure.c ure.c
-##DOS## $(CP) $(srcdir)\ure\urestubs.c urestubs.c
##DOS## $(CP) nul .links
$(XXSRCS) $(XXHEADERS) : .links
diff --git a/src/lib/krb5/unicode/ucdata/bidiapi.txt b/src/lib/krb5/unicode/ucdata/bidiapi.txt
deleted file mode 100644
index dffd12e5f..000000000
--- a/src/lib/krb5/unicode/ucdata/bidiapi.txt
+++ /dev/null
@@ -1,84 +0,0 @@
-#
-# $Id: bidiapi.txt,v 1.2 1999/11/19 15:24:29 mleisher Exp $
-#
-
- "Pretty Good Bidi Algorithm" API
-
-The PGBA (Pretty Good Bidi Algorithm) is an effective alternative to the
-Unicode BiDi algorithm. It currently provides only implicit reordering and
-does not yet support explicit reordering codes that the Unicode BiDi algorithm
-supports. In addition to reordering, the PGBA includes cursor movement
-support for both visual and logical navigation.
-
------------------------------------------------------------------------------
-
-#define UCPGBA_LTR 0
-#define UCPGBA_RTL 1
-
- These macros appear in the `direction' field of the data structures.
-
-#define UCPGBA_CURSOR_VISUAL 0
-#define UCPGBA_CURSOR_LOGICAL 1
-
- These macros are used to set the cursor movement for each reordered string.
-
------------------------------------------------------------------------------
-
-ucstring_t *ucstring_create(unsigned long *source, unsigned long start,
- unsigned long end, int default_direction,
- int cursor_motion)
-
- This function will create a reordered string by using the implicit
- directionality of the characters in the specified substring.
-
- The `default_direction' parameter should be one of UCPGBA_LTR or UCPGBA_RTL
- and is used only in cases where a string contains no characters with strong
- directionality.
-
- The `cursor_motion' parameter should be one of UCPGBA_CURSOR_VISUAL or
- UCPGBA_CURSOR_LOGICAL, and is used to specify the initial cursor motion
- behavior. This behavior can be switched at any time using
- ustring_set_cursor_motion().
-
------------------------------------------------------------------------------
-
-void ucstring_free(ucstring_t *string)
-
- This function will deallocate the memory used by the string, incuding the
- string itself.
-
------------------------------------------------------------------------------
-
-void ucstring_cursor_info(ustring_t *string, int *direction,
- unsigned long *position)
-
- This function will return the text position of the internal cursor and the
- directionality of the text at that position. The position returned is the
- original text position of the character.
-
------------------------------------------------------------------------------
-
-int ucstring_set_cursor_motion(ucstring_t *string, int cursor_motion)
-
- This function will change the cursor motion type and return the previous
- cursor motion type.
-
------------------------------------------------------------------------------
-
-int ucstring_cursor_right(ucstring_t *string, int count)
-
- This function will move the internal cursor to the right according to the
- type of cursor motion set for the string.
-
- If no cursor motion is performed, it returns 0. Otherwise it will return a
- 1.
-
------------------------------------------------------------------------------
-
-int ucstring_cursor_left(ucstring_t *string, int count)
-
- This function will move the internal cursor to the left according to the
- type of cursor motion set for the string.
-
- If no cursor motion is performed, it returns 0. Otherwise it will return a
- 1.
diff --git a/src/lib/krb5/unicode/ucdata/ucpgba.c b/src/lib/krb5/unicode/ucdata/ucpgba.c
deleted file mode 100644
index 51907035b..000000000
--- a/src/lib/krb5/unicode/ucdata/ucpgba.c
+++ /dev/null
@@ -1,755 +0,0 @@
-/*
- * Copyright 1998-2008 The OpenLDAP Foundation.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted only as authorized by the OpenLDAP
- * Public License.
- *
- * A copy of this license is available in file LICENSE in the
- * top-level directory of the distribution or, alternatively, at
- * <https://www.OpenLDAP.org/license.html>.
- */
-/* Copyright 2001 Computing Research Labs, New Mexico State University
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
- * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
- * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/*
- * This work is part of OpenLDAP Software <https://www.openldap.org/>.
- * $OpenLDAP: pkg/ldap/libraries/liblunicode/ucdata/ucpgba.c,v 1.9 2008/01/07 23:20:05 kurt Exp $
- * $Id: ucpgba.c,v 1.5 2001/01/02 18:46:20 mleisher Exp $
- */
-
-#include "k5-int.h"
-#include "k5-utf8.h"
-#include "k5-unicode.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "ucdata.h"
-#include "ucpgba.h"
-
-/*
- * These macros are used while reordering of RTL runs of text for the
- * special case of non-spacing characters being in runs of weakly
- * directional text. They check for weak and non-spacing, and digits and
- * non-spacing.
- */
-#define ISWEAKSPECIAL(cc) ucisprop(cc, UC_EN|UC_ES|UC_MN, UC_ET|UC_AN|UC_CS)
-#define ISDIGITSPECIAL(cc) ucisprop(cc, UC_ND|UC_MN, 0)
-
-/*
- * These macros are used while breaking a string into runs of text in
- * different directions. Descriptions:
- *
- * ISLTR_LTR - Test for members of an LTR run in an LTR context. This looks
- * for characters with ltr, non-spacing, weak, and neutral
- * properties.
- *
- * ISRTL_RTL - Test for members of an RTL run in an RTL context. This looks
- * for characters with rtl, non-spacing, weak, and neutral
- * properties.
- *
- * ISRTL_NEUTRAL - Test for RTL or neutral characters.
- *
- * ISWEAK_NEUTRAL - Test for weak or neutral characters.
- */
-#define ISLTR_LTR(cc) ucisprop(cc, UC_L|UC_MN|UC_EN|UC_ES,\
- UC_ET|UC_CS|UC_B|UC_S|UC_WS|UC_ON)
-
-#define ISRTL_RTL(cc) ucisprop(cc, UC_R|UC_MN|UC_EN|UC_ES,\
- UC_ET|UC_AN|UC_CS|UC_B|UC_S|UC_WS|UC_ON)
-
-#define ISRTL_NEUTRAL(cc) ucisprop(cc, UC_R, UC_B|UC_S|UC_WS|UC_ON)
-#define ISWEAK_NEUTRAL(cc) ucisprop(cc, UC_EN|UC_ES, \
- UC_B|UC_S|UC_WS|UC_ON|UC_ET|UC_AN|UC_CS)
-
-/*
- * This table is temporarily hard-coded here until it can be constructed
- * automatically somehow.
- */
-static unsigned long _symmetric_pairs[] = {
- 0x0028, 0x0029, 0x0029, 0x0028, 0x003C, 0x003E, 0x003E, 0x003C,
- 0x005B, 0x005D, 0x005D, 0x005B, 0x007B, 0x007D, 0x007D, 0x007B,
- 0x2045, 0x2046, 0x2046, 0x2045, 0x207D, 0x207E, 0x207E, 0x207D,
- 0x208D, 0x208E, 0x208E, 0x208D, 0x3008, 0x3009, 0x3009, 0x3008,
- 0x300A, 0x300B, 0x300B, 0x300A, 0x300C, 0x300D, 0x300D, 0x300C,
- 0x300E, 0x300F, 0x300F, 0x300E, 0x3010, 0x3011, 0x3011, 0x3010,
- 0x3014, 0x3015, 0x3015, 0x3014, 0x3016, 0x3017, 0x3017, 0x3016,
- 0x3018, 0x3019, 0x3019, 0x3018, 0x301A, 0x301B, 0x301B, 0x301A,
- 0xFD3E, 0xFD3F, 0xFD3F, 0xFD3E, 0xFE59, 0xFE5A, 0xFE5A, 0xFE59,
- 0xFE5B, 0xFE5C, 0xFE5C, 0xFE5B, 0xFE5D, 0xFE5E, 0xFE5E, 0xFE5D,
- 0xFF08, 0xFF09, 0xFF09, 0xFF08, 0xFF3B, 0xFF3D, 0xFF3D, 0xFF3B,
- 0xFF5B, 0xFF5D, 0xFF5D, 0xFF5B, 0xFF62, 0xFF63, 0xFF63, 0xFF62,
-};
-
-static int _symmetric_pairs_size =
-sizeof(_symmetric_pairs)/sizeof(_symmetric_pairs[0]);
-
-/*
- * This routine looks up the other form of a symmetric pair.
- */
-static unsigned long
-_ucsymmetric_pair(unsigned long c)
-{
- int i;
-
- for (i = 0; i < _symmetric_pairs_size; i += 2) {
- if (_symmetric_pairs[i] == c)
- return _symmetric_pairs[i+1];
- }
- return c;
-}
-
-/*
- * This routine creates a new run, copies the text into it, links it into the
- * logical text order chain and returns it to the caller to be linked into
- * the visual text order chain.
- */
-static ucrun_t *
-_add_run(ucstring_t *str, unsigned long *src,
- unsigned long start, unsigned long end, int direction)
-{
- long i, t;
- ucrun_t *run;
-
- run = (ucrun_t *) malloc(sizeof(ucrun_t));
- run->visual_next = run->visual_prev = 0;
- run->direction = direction;
-
- run->cursor = ~0;
-
- run->chars = (unsigned long *)
- malloc(sizeof(unsigned long) * ((end - start) << 1));
- run->positions = run->chars + (end - start);
-
- run->source = src;
- run->start = start;
- run->end = end;
-
- if (direction == UCPGBA_RTL) {
- /*
- * Copy the source text into the run in reverse order and select
- * replacements for the pairwise punctuation and the <> characters.
- */
- for (i = 0, t = end - 1; start < end; start++, t--, i++) {
- run->positions[i] = t;
- if (ucissymmetric(src[t]) || src[t] == '<' || src[t] == '>')
- run->chars[i] = _ucsymmetric_pair(src[t]);
- else
- run->chars[i] = src[t];
- }
- } else {
- /*
- * Copy the source text into the run directly.
- */
- for (i = start; i < end; i++) {
- run->positions[i - start] = i;
- run->chars[i - start] = src[i];
- }
- }
-
- /*
- * Add the run to the logical list for cursor traversal.
- */
- if (str->logical_first == 0)
- str->logical_first = str->logical_last = run;
- else {
- run->logical_prev = str->logical_last;
- str->logical_last->logical_next = run;
- str->logical_last = run;
- }
-
- return run;
-}
-
-static void
-_ucadd_rtl_segment(ucstring_t *str, unsigned long *source, unsigned long start,
- unsigned long end)
-{
- unsigned long s, e;
- ucrun_t *run, *lrun;
-
- /*
- * This is used to splice runs into strings with overall LTR direction.
- * The `lrun' variable will never be NULL because at least one LTR run was
- * added before this RTL run.
- */
- lrun = str->visual_last;
-
- for (e = s = start; s < end;) {
- for (; e < end && ISRTL_NEUTRAL(source[e]); e++) ;
-
- if (e > s) {
- run = _add_run(str, source, s, e, UCPGBA_RTL);
-
- /*
- * Add the run to the visual list for cursor traversal.
- */
- if (str->visual_first != 0) {
- if (str->direction == UCPGBA_LTR) {
- run->visual_prev = lrun;
- run->visual_next = lrun->visual_next;
- if (lrun->visual_next != 0)
- lrun->visual_next->visual_prev = run;
- lrun->visual_next = run;
- if (lrun == str->visual_last)
- str->visual_last = run;
- } else {
- run->visual_next = str->visual_first;
- str->visual_first->visual_prev = run;
- str->visual_first = run;
- }
- } else
- str->visual_first = str->visual_last = run;
- }
-
- /*
- * Handle digits in a special way. This makes sure the weakly
- * directional characters appear on the expected sides of a number
- * depending on whether that number is Arabic or not.
- */
- for (s = e; e < end && ISWEAKSPECIAL(source[e]); e++) {
- if (!ISDIGITSPECIAL(source[e]) &&
- (e + 1 == end || !ISDIGITSPECIAL(source[e + 1])))
- break;
- }
-
- if (e > s) {
- run = _add_run(str, source, s, e, UCPGBA_LTR);
-
- /*
- * Add the run to the visual list for cursor traversal.
- */
- if (str->visual_first != 0) {
- if (str->direction == UCPGBA_LTR) {
- run->visual_prev = lrun;
- run->visual_next = lrun->visual_next;
- if (lrun->visual_next != 0)
- lrun->visual_next->visual_prev = run;
- lrun->visual_next = run;
- if (lrun == str->visual_last)
- str->visual_last = run;
- } else {
- run->visual_next = str->visual_first;
- str->visual_first->visual_prev = run;
- str->visual_first = run;
- }
- } else
- str->visual_first = str->visual_last = run;
- }
-
- /*
- * Collect all weak non-digit sequences for an RTL segment. These
- * will appear as part of the next RTL segment or will be added as
- * an RTL segment by themselves.
- */
- for (s = e; e < end && ucisweak(source[e]) && !ucisdigit(source[e]);
- e++) ;
- }
-
- /*
- * Capture any weak non-digit sequences that occur at the end of the RTL
- * run.
- */
- if (e > s) {
- run = _add_run(str, source, s, e, UCPGBA_RTL);
-
- /*
- * Add the run to the visual list for cursor traversal.
- */
- if (str->visual_first != 0) {
- if (str->direction == UCPGBA_LTR) {
- run->visual_prev = lrun;
- run->visual_next = lrun->visual_next;
- if (lrun->visual_next != 0)
- lrun->visual_next->visual_prev = run;
- lrun->visual_next = run;
- if (lrun == str->visual_last)
- str->visual_last = run;
- } else {
- run->visual_next = str->visual_first;
- str->visual_first->visual_prev = run;
- str->visual_first = run;
- }
- } else
- str->visual_first = str->visual_last = run;
- }
-}
-
-static void
-_ucadd_ltr_segment(ucstring_t *str, unsigned long *source, unsigned long start,
- unsigned long end)
-{
- ucrun_t *run;
-
- run = _add_run(str, source, start, end, UCPGBA_LTR);
-
- /*
- * Add the run to the visual list for cursor traversal.
- */
- if (str->visual_first != 0) {
- if (str->direction == UCPGBA_LTR) {
- run->visual_prev = str->visual_last;
- str->visual_last->visual_next = run;
- str->visual_last = run;
- } else {
- run->visual_next = str->visual_first;
- str->visual_first->visual_prev = run;
- str->visual_first = run;
- }
- } else
- str->visual_first = str->visual_last = run;
-}
-
-ucstring_t *
-ucstring_create(unsigned long *source, unsigned long start, unsigned long end,
- int default_direction, int cursor_motion)
-{
- int rtl_first;
- unsigned long s, e, ld;
- ucstring_t *str;
-
- str = (ucstring_t *) malloc(sizeof(ucstring_t));
-
- /*
- * Set the initial values.
- */
- str->cursor_motion = cursor_motion;
- str->logical_first = str->logical_last = 0;
- str->visual_first = str->visual_last = str->cursor = 0;
- str->source = source;
- str->start = start;
- str->end = end;
-
- /*
- * If the length of the string is 0, then just return it at this point.
- */
- if (start == end)
- return str;
-
- /*
- * This flag indicates whether the collection loop for RTL is called
- * before the LTR loop the first time.
- */
- rtl_first = 0;
-
- /*
- * Look for the first character in the string that has strong
- * directionality.
- */
- for (s = start; s < end && !ucisstrong(source[s]); s++) ;
-
- if (s == end)
- /*
- * If the string contains no characters with strong directionality, use
- * the default direction.
- */
- str->direction = default_direction;
- else
- str->direction = ucisrtl(source[s]) ? UCPGBA_RTL : UCPGBA_LTR;
-
- if (str->direction == UCPGBA_RTL)
- /*
- * Set the flag that causes the RTL collection loop to run first.
- */
- rtl_first = 1;
-
- /*
- * This loop now separates the string into runs based on directionality.
- */
- for (s = e = 0; s < end; s = e) {
- if (!rtl_first) {
- /*
- * Determine the next run of LTR text.
- */
-
- ld = s;
- while (e < end && ISLTR_LTR(source[e])) {
- if (ucisdigit(source[e]) &&
- !(0x660 <= source[e] && source[e] <= 0x669))
- ld = e;
- e++;
- }
- if (str->direction != UCPGBA_LTR) {
- while (e > ld && ISWEAK_NEUTRAL(source[e - 1]))
- e--;
- }
-
- /*
- * Add the LTR segment to the string.
- */
- if (e > s)
- _ucadd_ltr_segment(str, source, s, e);
- }
-
- /*
- * Determine the next run of RTL text.
- */
- ld = s = e;
- while (e < end && ISRTL_RTL(source[e])) {
- if (ucisdigit(source[e]) &&
- !(0x660 <= source[e] && source[e] <= 0x669))
- ld = e;
- e++;
- }
- if (str->direction != UCPGBA_RTL) {
- while (e > ld && ISWEAK_NEUTRAL(source[e - 1]))
- e--;
- }
-
- /*
- * Add the RTL segment to the string.
- */
- if (e > s)
- _ucadd_rtl_segment(str, source, s, e);
-
- /*
- * Clear the flag that allowed the RTL collection loop to run first
- * for strings with overall RTL directionality.
- */
- rtl_first = 0;
- }
-
- /*
- * Set up the initial cursor run.
- */
- str->cursor = str->logical_first;
- if (str != 0)
- str->cursor->cursor = (str->cursor->direction == UCPGBA_RTL) ?
- str->cursor->end - str->cursor->start : 0;
-
- return str;
-}
-
-void
-ucstring_free(ucstring_t *s)
-{
- ucrun_t *l, *r;
-
- if (s == 0)
- return;
-
- for (l = 0, r = s->visual_first; r != 0; r = r->visual_next) {
- if (r->end > r->start)
- free((char *) r->chars);
- if (l)
- free((char *) l);
- l = r;
- }
- if (l)
- free((char *) l);
-
- free((char *) s);
-}
-
-int
-ucstring_set_cursor_motion(ucstring_t *str, int cursor_motion)
-{
- int n;
-
- if (str == 0)
- return -1;
-
- n = str->cursor_motion;
- str->cursor_motion = cursor_motion;
- return n;
-}
-
-static int
-_ucstring_visual_cursor_right(ucstring_t *str, int count)
-{
- int cnt = count;
- unsigned long size;
- ucrun_t *cursor;
-
- if (str == 0)
- return 0;
-
- cursor = str->cursor;
- while (cnt > 0) {
- size = cursor->end - cursor->start;
- if ((cursor->direction == UCPGBA_RTL && cursor->cursor + 1 == size) ||
- cursor->cursor + 1 > size) {
- /*
- * If the next run is NULL, then the cursor is already on the
- * far right end already.
- */
- if (cursor->visual_next == 0)
- /*
- * If movement occured, then report it.
- */
- return (cnt != count);
-
- /*
- * Move to the next run.
- */
- str->cursor = cursor = cursor->visual_next;
- cursor->cursor = (cursor->direction == UCPGBA_RTL) ? -1 : 0;
- size = cursor->end - cursor->start;
- } else
- cursor->cursor++;
- cnt--;
- }
- return 1;
-}
-
-static int
-_ucstring_logical_cursor_right(ucstring_t *str, int count)
-{
- int cnt = count;
- unsigned long size;
- ucrun_t *cursor;
-
- if (str == 0)
- return 0;
-
- cursor = str->cursor;
- while (cnt > 0) {
- size = cursor->end - cursor->start;
- if (str->direction == UCPGBA_RTL) {
- if (cursor->direction == UCPGBA_RTL) {
- if (cursor->cursor + 1 == size) {
- if (cursor == str->logical_first)
- /*
- * Already at the beginning of the string.
- */
- return (cnt != count);
-
- str->cursor = cursor = cursor->logical_prev;
- size = cursor->end - cursor->start;
- cursor->cursor = (cursor->direction == UCPGBA_LTR) ?
- size : 0;
- } else
- cursor->cursor++;
- } else {
- if (cursor->cursor == 0) {
- if (cursor == str->logical_first)
- /*
- * At the beginning of the string already.
- */
- return (cnt != count);
-
- str->cursor = cursor = cursor->logical_prev;
- size = cursor->end - cursor->start;
- cursor->cursor = (cursor->direction == UCPGBA_LTR) ?
- size : 0;
- } else
- cursor->cursor--;
- }
- } else {
- if (cursor->direction == UCPGBA_RTL) {
- if (cursor->cursor == 0) {
- if (cursor == str->logical_last)
- /*
- * Already at the end of the string.
- */
- return (cnt != count);
-
- str->cursor = cursor = cursor->logical_next;
- size = cursor->end - cursor->start;
- cursor->cursor = (cursor->direction == UCPGBA_LTR) ?
- 0 : size - 1;
- } else
- cursor->cursor--;
- } else {
- if (cursor->cursor + 1 > size) {
- if (cursor == str->logical_last)
- /*
- * Already at the end of the string.
- */
- return (cnt != count);
-
- str->cursor = cursor = cursor->logical_next;
- cursor->cursor = (cursor->direction == UCPGBA_LTR) ?
- 0 : size - 1;
- } else
- cursor->cursor++;
- }
- }
- cnt--;
- }
- return 1;
-}
-
-int
-ucstring_cursor_right(ucstring_t *str, int count)
-{
- if (str == 0)
- return 0;
- return (str->cursor_motion == UCPGBA_CURSOR_VISUAL) ?
- _ucstring_visual_cursor_right(str, count) :
- _ucstring_logical_cursor_right(str, count);
-}
-
-static int
-_ucstring_visual_cursor_left(ucstring_t *str, int count)
-{
- int cnt = count;
- unsigned long size;
- ucrun_t *cursor;
-
- if (str == 0)
- return 0;
-
- cursor = str->cursor;
- while (cnt > 0) {
- size = cursor->end - cursor->start;
- if ((cursor->direction == UCPGBA_LTR && cursor->cursor == 0) ||
- cursor->cursor - 1 < -1) {
- /*
- * If the preceding run is NULL, then the cursor is already on the
- * far left end already.
- */
- if (cursor->visual_prev == 0)
- /*
- * If movement occured, then report it.
- */
- return (cnt != count);
-
- /*
- * Move to the previous run.
- */
- str->cursor = cursor = cursor->visual_prev;
- size = cursor->end - cursor->start;
- cursor->cursor = (cursor->direction == UCPGBA_RTL) ?
- size : size - 1;
- } else
- cursor->cursor--;
- cnt--;
- }
- return 1;
-}
-
-static int
-_ucstring_logical_cursor_left(ucstring_t *str, int count)
-{
- int cnt = count;
- unsigned long size;
- ucrun_t *cursor;
-
- if (str == 0)
- return 0;
-
- cursor = str->cursor;
- while (cnt > 0) {
- size = cursor->end - cursor->start;
- if (str->direction == UCPGBA_RTL) {
- if (cursor->direction == UCPGBA_RTL) {
- if (cursor->cursor == -1) {
- if (cursor == str->logical_last)
- /*
- * Already at the end of the string.
- */
- return (cnt != count);
-
- str->cursor = cursor = cursor->logical_next;
- size = cursor->end - cursor->start;
- cursor->cursor = (cursor->direction == UCPGBA_LTR) ?
- 0 : size - 1;
- } else
- cursor->cursor--;
- } else {
- if (cursor->cursor + 1 > size) {
- if (cursor == str->logical_last)
- /*
- * At the end of the string already.
- */
- return (cnt != count);
-
- str->cursor = cursor = cursor->logical_next;
- size = cursor->end - cursor->start;
- cursor->cursor = (cursor->direction == UCPGBA_LTR) ?
- 0 : size - 1;
- } else
- cursor->cursor++;
- }
- } else {
- if (cursor->direction == UCPGBA_RTL) {
- if (cursor->cursor + 1 == size) {
- if (cursor == str->logical_first)
- /*
- * Already at the beginning of the string.
- */
- return (cnt != count);
-
- str->cursor = cursor = cursor->logical_prev;
- size = cursor->end - cursor->start;
- cursor->cursor = (cursor->direction == UCPGBA_LTR) ?
- size : 0;
- } else
- cursor->cursor++;
- } else {
- if (cursor->cursor == 0) {
- if (cursor == str->logical_first)
- /*
- * Already at the beginning of the string.
- */
- return (cnt != count);
-
- str->cursor = cursor = cursor->logical_prev;
- cursor->cursor = (cursor->direction == UCPGBA_LTR) ?
- size : 0;
- } else
- cursor->cursor--;
- }
- }
- cnt--;
- }
- return 1;
-}
-
-int
-ucstring_cursor_left(ucstring_t *str, int count)
-{
- if (str == 0)
- return 0;
- return (str->cursor_motion == UCPGBA_CURSOR_VISUAL) ?
- _ucstring_visual_cursor_left(str, count) :
- _ucstring_logical_cursor_left(str, count);
-}
-
-void
-ucstring_cursor_info(ucstring_t *str, int *direction, unsigned long *position)
-{
- long c;
- unsigned long size;
- ucrun_t *cursor;
-
- if (str == 0 || direction == 0 || position == 0)
- return;
-
- cursor = str->cursor;
-
- *direction = cursor->direction;
-
- c = cursor->cursor;
- size = cursor->end - cursor->start;
-
- if (c == size)
- *position = (cursor->direction == UCPGBA_RTL) ?
- cursor->start : cursor->positions[c - 1];
- else if (c == -1)
- *position = (cursor->direction == UCPGBA_RTL) ?
- cursor->end : cursor->start;
- else
- *position = cursor->positions[c];
-}
diff --git a/src/lib/krb5/unicode/ucdata/ucpgba.h b/src/lib/krb5/unicode/ucdata/ucpgba.h
deleted file mode 100644
index 7e1d570d4..000000000
--- a/src/lib/krb5/unicode/ucdata/ucpgba.h
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * Copyright 1998-2008 The OpenLDAP Foundation.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted only as authorized by the OpenLDAP
- * Public License.
- *
- * A copy of this license is available in file LICENSE in the
- * top-level directory of the distribution or, alternatively, at
- * <https://www.OpenLDAP.org/license.html>.
- */
-/* Copyright 1999 Computing Research Labs, New Mexico State University
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
- * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
- * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/*
- * This work is part of OpenLDAP Software <https://www.openldap.org/>.
- * $OpenLDAP: pkg/ldap/libraries/liblunicode/ucdata/ucpgba.h,v 1.10 2008/01/07 23:20:05 kurt Exp $
- * $Id: ucpgba.h,v 1.4 1999/11/19 15:24:30 mleisher Exp $
- */
-
-#ifndef _h_ucpgba
-#define _h_ucpgba
-
-#include "k5-int.h"
-
-/***************************************************************************
- *
- * Macros and types.
- *
- ***************************************************************************/
-
-/*
- * These are the direction values that can appear in render runs and render
- * strings.
- */
-#define UCPGBA_LTR 0
-#define UCPGBA_RTL 1
-
-/*
- * These are the flags for cursor motion.
- */
-#define UCPGBA_CURSOR_VISUAL 0
-#define UCPGBA_CURSOR_LOGICAL 1
-
-/*
- * This structure is used to contain runs of text in a particular direction.
- */
-typedef struct _ucrun_t {
- struct _ucrun_t *visual_prev; /* Pointer to the previous visual run. */
- struct _ucrun_t *visual_next; /* Pointer to the next visual run. */
-
- struct _ucrun_t *logical_prev; /* Pointer to the previous logical run. */
- struct _ucrun_t *logical_next; /* Pointer to the next logical run. */
-
- int direction; /* Direction of the run. */
-
- long cursor; /* Position of "cursor" in the string. */
-
- unsigned long *chars; /* List of characters for the run. */
- unsigned long *positions; /* List of original positions in source. */
-
- unsigned long *source; /* The source string. */
- unsigned long start; /* Beginning offset in the source string. */
- unsigned long end; /* Ending offset in the source string. */
-} ucrun_t;
-
-/*
- * This represents a string of runs rendered up to a point that is not
- * platform specific.
- */
-typedef struct _ucstring_t {
- int direction; /* Overall direction of the string. */
-
- int cursor_motion; /* Logical or visual cursor motion flag. */
-
- ucrun_t *cursor; /* The run containing the "cursor." */
-
- ucrun_t *logical_first; /* First run in the logical order. */
- ucrun_t *logical_last; /* Last run in the logical order. */
-
- ucrun_t *visual_first; /* First run in the visual order. */
- ucrun_t *visual_last; /* Last run in the visual order. */
-
- unsigned long *source; /* The source string. */
- unsigned long start; /* The beginning offset in the source. */
- unsigned long end; /* The ending offset in the source. */
-} ucstring_t;
-
-/***************************************************************************
- *
- * API
- *
- ***************************************************************************/
-
-/*
- * This creates and reorders the specified substring using the
- * "Pretty Good Bidi Algorithm." A default direction is provided for cases
- * of a string containing no strong direction characters and the default
- * cursor motion should be provided.
- */
-ucstring_t *
-ucstring_create (unsigned long *source,
- unsigned long start,
- unsigned long end,
- int default_direction,
- int cursor_motion);
-/*
- * This releases the string.
- */
-void ucstring_free (ucstring_t *string);
-
-/*
- * This changes the cursor motion flag for the string.
- */
-int
-ucstring_set_cursor_motion (ucstring_t *string,
- int cursor_motion);
-
-/*
- * This function will move the cursor to the right depending on the
- * type of cursor motion that was specified for the string.
- *
- * A 0 is returned if no cursor motion is performed, otherwise a
- * 1 is returned.
- */
-int
-ucstring_cursor_right (ucstring_t *string, int count);
-
-/*
- * This function will move the cursor to the left depending on the
- * type of cursor motion that was specified for the string.
- *
- * A 0 is returned if no cursor motion is performed, otherwise a
- * 1 is returned.
- */
-int
-ucstring_cursor_left (ucstring_t *string, int count);
-
-/*
- * This routine retrieves the direction of the run containing the cursor
- * and the actual position in the original text string.
- */
-void
-ucstring_cursor_info (ucstring_t *string, int *direction,
- unsigned long *position);
-
-#endif /* _h_ucpgba */
diff --git a/src/lib/krb5/unicode/ucdata/ucpgba.man b/src/lib/krb5/unicode/ucdata/ucpgba.man
deleted file mode 100644
index 448650972..000000000
--- a/src/lib/krb5/unicode/ucdata/ucpgba.man
+++ /dev/null
@@ -1,97 +0,0 @@
-.\"
-.\" $Id: ucpgba.man,v 1.1 1999/11/19 16:08:34 mleisher Exp $
-.\"
-.TH ucpgba 3 "19 November 1999"
-.SH NAME
-ucpgba \- functions for doing bidirectional reordering of Unicode text and
-logical and visual cursor motion
-
-.SH SYNOPSIS
-.nf
-#include <ucdata.h>
-#include <ucpgba.h>
-
-ucstring_t *ucstring_create(unsigned long *source, unsigned long start,
- unsigned long end, int default_direction,
- int cursor_motion)
-.sp
-void ucstring_free(ucstring_t *string)
-.sp
-int ucstring_set_cursor_motion(ucstring_t *string, int cursor_motion)
-.sp
-int ucstring_cursor_right(ucstring_t *string, int count)
-.sp
-int ucstring_cursor_left(ucstring_t *string, int count)
-.sp
-void ucstring_cursor_info(ucstring_t *string, int *direction,
- unsigned long *position)
-
-.SH DESCRIPTION
-.TP 4
-.BR Macros
-UCPGBA_LTR
-.br
-UCPGBA_RTL
-.br
-UCPGBA_CURSOR_VISUAL
-.br
-UCPGBA_CURSOR_LOGICAL
-
-.TP 4
-.BR ucstring_create()
-This function will create a reordered string by using the implicit
-directionality of the characters in the specified substring.
-.sp
-The `default_direction' parameter should be one of UCPGBA_LTR or UCPGBA_RTL
-and is used only in cases where a string contains no characters with strong
-directionality.
-.sp
-The `cursor_motion' parameter should be one of UCPGBA_CURSOR_VISUAL or
-UCPGBA_CURSOR_LOGICAL, and is used to specify the initial cursor motion
-behavior. This behavior can be switched at any time using
-ustring_set_cursor_motion().
-
-.TP 4
-.BR ucstring_free()
-This function will deallocate the memory used by the string, incuding the
-string itself.
-
-.TP 4
-.BR ucstring_cursor_info()
-This function will return the text position of the internal cursor and the
-directionality of the text at that position. The position returned is the
-original text position of the character.
-
-.TP 4
-.BR ucstring_set_cursor_motion()
-This function will change the cursor motion type and return the previous
-cursor motion type.
-
-.TP 4
-.BR ucstring_cursor_right()
-This function will move the internal cursor to the right according to the
-type of cursor motion set for the string.
-.sp
-If no cursor motion is performed, it returns 0. Otherwise it will return a 1.
-
-.TP 4
-.BR ucstring_cursor_left()
-This function will move the internal cursor to the left according to the
-type of cursor motion set for the string.
-.sp
-If no cursor motion is performed, it returns 0. Otherwise it will return a 1.
-
-.SH "SEE ALSO"
-ucdata(3)
-
-.SH ACKNOWLEDGMENTS
-These are people who have helped with patches or alerted me about problems.
-
-.SH AUTHOR
-Mark Leisher
-.br
-Computing Research Lab
-.br
-New Mexico State University
-.br
-Email: mleisher@crl.nmsu.edu
diff --git a/src/lib/krb5/unicode/ucstr.c b/src/lib/krb5/unicode/ucstr.c
index 0257882cd..0a2e5ab41 100644
--- a/src/lib/krb5/unicode/ucstr.c
+++ b/src/lib/krb5/unicode/ucstr.c
@@ -23,7 +23,7 @@
#include <ctype.h>
-int
+static int
krb5int_ucstrncmp(
const krb5_unicode * u1,
const krb5_unicode * u2,
@@ -40,7 +40,7 @@ krb5int_ucstrncmp(
return 0;
}
-int
+static int
krb5int_ucstrncasecmp(
const krb5_unicode * u1,
const krb5_unicode * u2,
@@ -60,47 +60,6 @@ krb5int_ucstrncasecmp(
return 0;
}
-krb5_unicode *
-krb5int_ucstrnchr(
- const krb5_unicode * u,
- size_t n,
- krb5_unicode c)
-{
- for (; 0 < n; ++u, --n) {
- if (*u == c) {
- return (krb5_unicode *) u;
- }
- }
-
- return NULL;
-}
-
-krb5_unicode *
-krb5int_ucstrncasechr(
- const krb5_unicode * u,
- size_t n,
- krb5_unicode c)
-{
- c = uctolower(c);
- for (; 0 < n; ++u, --n) {
- if ((krb5_unicode) uctolower(*u) == c) {
- return (krb5_unicode *) u;
- }
- }
-
- return NULL;
-}
-
-void
-krb5int_ucstr2upper(
- krb5_unicode * u,
- size_t n)
-{
- for (; 0 < n; ++u, --n) {
- *u = uctoupper(*u);
- }
-}
-
/* Return true if data contains valid UTF-8 sequences. */
krb5_boolean
k5_utf8_validate(const krb5_data *data)
@@ -127,211 +86,8 @@ k5_utf8_validate(const krb5_data *data)
return !in.status;
}
-#define TOUPPER(c) (islower(c) ? toupper(c) : (c))
#define TOLOWER(c) (isupper(c) ? tolower(c) : (c))
-krb5_error_code
-krb5int_utf8_normalize(
- const krb5_data * data,
- krb5_data ** newdataptr,
- unsigned flags)
-{
- int i, j, len, clen, outpos = 0, ucsoutlen, outsize;
- char *out = NULL, *outtmp, *s;
- krb5_ucs4 *ucs = NULL, *p, *ucsout = NULL;
- krb5_data *newdata;
- krb5_error_code retval = 0;
-
- static unsigned char mask[] = {
- 0, 0x7f, 0x1f, 0x0f, 0x07, 0x03, 0x01};
-
- unsigned casefold = flags & KRB5_UTF8_CASEFOLD;
- unsigned approx = flags & KRB5_UTF8_APPROX;
-
- *newdataptr = NULL;
-
- s = data->data;
- len = data->length;
-
- newdata = malloc(sizeof(*newdata));
- if (newdata == NULL)
- return ENOMEM;
-
- /*
- * Should first check to see if string is already in proper normalized
- * form. This is almost as time consuming as the normalization though.
- */
-
- /* finish off everything up to character before first non-ascii */
- if (KRB5_UTF8_ISASCII(s)) {
- if (casefold) {
- outsize = len + 7;
- out = malloc(outsize);
- if (out == NULL) {
- retval = ENOMEM;
- goto cleanup;
- }
-
- for (i = 1; (i < len) && KRB5_UTF8_ISASCII(s + i); i++) {
- out[outpos++] = TOLOWER(s[i - 1]);
- }
- if (i == len) {
- out[outpos++] = TOLOWER(s[len - 1]);
- goto cleanup;
- }
- } else {
- for (i = 1; (i < len) && KRB5_UTF8_ISASCII(s + i); i++) {
- /* empty */
- }
-
- if (i == len) {
- newdata->length = len;
- newdata->data = k5memdup0(s, len, &retval);
- if (newdata->data == NULL)
- goto cleanup;
- *newdataptr = newdata;
- return 0;
- }
- outsize = len + 7;
- out = malloc(outsize);
- if (out == NULL) {
- retval = ENOMEM;
- goto cleanup;
- }
- outpos = i - 1;
- memcpy(out, s, outpos);
- }
- } else {
- outsize = len + 7;
- out = malloc(outsize);
- if (out == NULL) {
- retval = ENOMEM;
- goto cleanup;
- }
- i = 0;
- }
-
- p = ucs = malloc(len * sizeof(*ucs));
- if (ucs == NULL) {
- retval = ENOMEM;
- goto cleanup;
- }
- /* convert character before first non-ascii to ucs-4 */
- if (i > 0) {
- *p = casefold ? TOLOWER(s[i - 1]) : s[i - 1];
- p++;
- }
- /* s[i] is now first non-ascii character */
- for (;;) {
- /* s[i] is non-ascii */
- /* convert everything up to next ascii to ucs-4 */
- while (i < len) {
- /* KRB5_UTF8_CHARLEN only looks at the first byte; use it to guard
- * against small read overruns. */
- if (KRB5_UTF8_CHARLEN(s + i) > len - i) {
- retval = KRB5_ERR_INVALID_UTF8;
- goto cleanup;
- }
- clen = KRB5_UTF8_CHARLEN2(s + i, clen);
- if (clen == 0) {
- retval = KRB5_ERR_INVALID_UTF8;
- goto cleanup;
- }
- if (clen == 1) {
- /* ascii */
- break;
- }
- *p = s[i] & mask[clen];
- i++;
- for (j = 1; j < clen; j++) {
- if ((s[i] & 0xc0) != 0x80) {
- retval = KRB5_ERR_INVALID_UTF8;
- goto cleanup;
- }
- *p <<= 6;
- *p |= s[i] & 0x3f;
- i++;
- }
- if (casefold) {
- *p = uctolower(*p);
- }
- p++;
- }
- /* normalize ucs of length p - ucs */
- uccompatdecomp(ucs, p - ucs, &ucsout, &ucsoutlen);
- if (approx) {
- for (j = 0; j < ucsoutlen; j++) {
- if (ucsout[j] < 0x80) {
- out[outpos++] = ucsout[j];
- }
- }
- } else {
- ucsoutlen = uccanoncomp(ucsout, ucsoutlen);
- /* convert ucs to utf-8 and store in out */
- for (j = 0; j < ucsoutlen; j++) {
- /*
- * allocate more space if not enough room for 6 bytes and
- * terminator
- */
- if (outsize - outpos < 7) {
- outsize = ucsoutlen - j + outpos + 6;
- outtmp = realloc(out, outsize);
- if (outtmp == NULL) {
- retval = ENOMEM;
- goto cleanup;
- }
- out = outtmp;
- }
- outpos += krb5int_ucs4_to_utf8(ucsout[j], &out[outpos]);
- }
- }
-
- free(ucsout);
- ucsout = NULL;
-
- if (i == len) {
- break;
- }
-
- /* Allocate more space in out if necessary */
- if (len - i >= outsize - outpos) {
- outsize += 1 + ((len - i) - (outsize - outpos));
- outtmp = realloc(out, outsize);
- if (outtmp == NULL) {
- retval = ENOMEM;
- goto cleanup;
- }
- out = outtmp;
- }
- /* s[i] is ascii */
- /* finish off everything up to char before next non-ascii */
- for (i++; (i < len) && KRB5_UTF8_ISASCII(s + i); i++) {
- out[outpos++] = casefold ? TOLOWER(s[i - 1]) : s[i - 1];
- }
- if (i == len) {
- out[outpos++] = casefold ? TOLOWER(s[len - 1]) : s[len - 1];
- break;
- }
- /* convert character before next non-ascii to ucs-4 */
- *ucs = casefold ? TOLOWER(s[i - 1]) : s[i - 1];
- p = ucs + 1;
- }
-
-cleanup:
- free(ucs);
- free(ucsout);
- if (retval) {
- free(out);
- free(newdata);
- return retval;
- }
- out[outpos] = '\0';
- newdata->data = out;
- newdata->length = outpos;
- *newdataptr = newdata;
- return 0;
-}
-
/* compare UTF8-strings, optionally ignore casing */
/* slow, should be optimized */
int
diff --git a/src/lib/krb5/unicode/ure/README b/src/lib/krb5/unicode/ure/README
deleted file mode 100644
index c9918f5fd..000000000
--- a/src/lib/krb5/unicode/ure/README
+++ /dev/null
@@ -1,212 +0,0 @@
-#
-# $Id: README,v 1.3 1999/09/21 15:47:43 mleisher Exp $
-#
-# Copyright 1997, 1998, 1999 Computing Research Labs,
-# New Mexico State University
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-# THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
-# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
-# OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
-# THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-#
-
-
- Unicode and Regular Expressions
- Version 0.5
-
-This is a simple regular expression package for matching against Unicode text
-in UCS2 form. The implementation of this URE package is a variation on the
-RE->DFA algorithm done by Mark Hopkins (markh@csd4.csd.uwm.edu). Mark
-Hopkins' algorithm had the virtue of being very simple, so it was used as a
-model.
-
----------------------------------------------------------------------------
-
-Assumptions:
-
- o Regular expression and text already normalized.
-
- o Conversion to lower case assumes a 1-1 mapping.
-
-Definitions:
-
- Separator - any one of U+2028, U+2029, '\n', '\r'.
-
-Operators:
- . - match any character.
- * - match zero or more of the last subexpression.
- + - match one or more of the last subexpression.
- ? - match zero or one of the last subexpression.
- () - subexpression grouping.
-
- Notes:
-
- o The "." operator normally does not match separators, but a flag is
- available for the ure_exec() function that will allow this operator to
- match a separator.
-
-Literals and Constants:
-
- c - literal UCS2 character.
- \x.... - hexadecimal number of up to 4 digits.
- \X.... - hexadecimal number of up to 4 digits.
- \u.... - hexadecimal number of up to 4 digits.
- \U.... - hexadecimal number of up to 4 digits.
-
-Character classes:
-
- [...] - Character class.
- [^...] - Negated character class.
- \pN1,N2,...,Nn - Character properties class.
- \PN1,N2,...,Nn - Negated character properties class.
-
- POSIX character classes recognized:
-
- :alnum:
- :alpha:
- :cntrl:
- :digit:
- :graph:
- :lower:
- :print:
- :punct:
- :space:
- :upper:
- :xdigit:
-
- Notes:
-
- o Character property classes are \p or \P followed by a comma separated
- list of integers between 1 and 32. These integers are references to
- the following character properties:
-
- N Character Property
- --------------------------
- 1 _URE_NONSPACING
- 2 _URE_COMBINING
- 3 _URE_NUMDIGIT
- 4 _URE_NUMOTHER
- 5 _URE_SPACESEP
- 6 _URE_LINESEP
- 7 _URE_PARASEP
- 8 _URE_CNTRL
- 9 _URE_PUA
- 10 _URE_UPPER
- 11 _URE_LOWER
- 12 _URE_TITLE
- 13 _URE_MODIFIER
- 14 _URE_OTHERLETTER
- 15 _URE_DASHPUNCT
- 16 _URE_OPENPUNCT
- 17 _URE_CLOSEPUNCT
- 18 _URE_OTHERPUNCT
- 19 _URE_MATHSYM
- 20 _URE_CURRENCYSYM
- 21 _URE_OTHERSYM
- 22 _URE_LTR
- 23 _URE_RTL
- 24 _URE_EURONUM
- 25 _URE_EURONUMSEP
- 26 _URE_EURONUMTERM
- 27 _URE_ARABNUM
- 28 _URE_COMMONSEP
- 29 _URE_BLOCKSEP
- 30 _URE_SEGMENTSEP
- 31 _URE_WHITESPACE
- 32 _URE_OTHERNEUT
-
- o Character classes can contain literals, constants, and character
- property classes. Example:
-
- [abc\U10A\p1,3,4]
-
----------------------------------------------------------------------------
-
-Before using URE
-----------------
-Before URE is used, two functions need to be created. One to check if a
-character matches a set of URE character properties, and one to convert a
-character to lower case.
-
-Stubs for these function are located in the urestubs.c file.
-
-Using URE
----------
-
-Sample pseudo-code fragment.
-
- ure_buffer_t rebuf;
- ure_dfa_t dfa;
- ucs2_t *re, *text;
- unsigned long relen, textlen;
- unsigned long match_start, match_end;
-
- /*
- * Allocate the dynamic storage needed to compile regular expressions.
- */
- rebuf = ure_buffer_create();
-
- for each regular expression in a list {
- re = next regular expression;
- relen = length(re);
-
- /*
- * Compile the regular expression with the case insensitive flag
- * turned on.
- */
- dfa = ure_compile(re, relen, 1, rebuf);
-
- /*
- * Look for the first match in some text. The matching will be done
- * in a case insensitive manner because the expression was compiled
- * with the case insensitive flag on.
- */
- if (ure_exec(dfa, 0, text, textlen, &match_start, &match_end))
- printf("MATCH: %ld %ld\n", match_start, match_end);
-
- /*
- * Look for the first match in some text, ignoring non-spacing
- * characters.
- */
- if (ure_exec(dfa, URE_IGNORE_NONSPACING, text, textlen,
- &match_start, &match_end))
- printf("MATCH: %ld %ld\n", match_start, match_end);
-
- /*
- * Free the DFA.
- */
- ure_free_dfa(dfa);
- }
-
- /*
- * Free the dynamic storage used for compiling the expressions.
- */
- ure_free_buffer(rebuf);
-
----------------------------------------------------------------------------
-
-Mark Leisher <mleisher@crl.nmsu.edu>
-29 March 1997
-
-===========================================================================
-
-CHANGES
--------
-
-Version: 0.5
-Date : 21 September 1999
-==========================
- 1. Added copyright stuff and put in CVS.
diff --git a/src/lib/krb5/unicode/ure/ure.c b/src/lib/krb5/unicode/ure/ure.c
deleted file mode 100644
index 7b3048713..000000000
--- a/src/lib/krb5/unicode/ure/ure.c
+++ /dev/null
@@ -1,2139 +0,0 @@
-/*
- * Copyright 1998-2008 The OpenLDAP Foundation.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted only as authorized by the OpenLDAP
- * Public License.
- *
- * A copy of this license is available in file LICENSE in the
- * top-level directory of the distribution or, alternatively, at
- * <https://www.OpenLDAP.org/license.html>.
- */
-/* Copyright 1997, 1998, 1999 Computing Research Labs,
- * New Mexico State University
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
- * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
- * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/*
- * This work is part of OpenLDAP Software <https://www.openldap.org/>.
- * $OpenLDAP: pkg/ldap/libraries/liblunicode/ure/ure.c,v 1.19 2008/01/07 23:20:05 kurt Exp $
- * $Id: ure.c,v 1.2 1999/09/21 15:47:43 mleisher Exp $"
- */
-
-#include <k5-int.h>
-
-#include <stdlib.h>
-#include <string.h>
-#ifndef _WIN32
-#include <unistd.h>
-#endif
-
-#include "ure.h"
-
-/*
- * Flags used internally in the DFA.
- */
-#define _URE_DFA_CASEFOLD 0x01
-#define _URE_DFA_BLANKLINE 0x02
-
-static unsigned long cclass_flags[] = {
- 0,
- _URE_NONSPACING,
- _URE_COMBINING,
- _URE_NUMDIGIT,
- _URE_NUMOTHER,
- _URE_SPACESEP,
- _URE_LINESEP,
- _URE_PARASEP,
- _URE_CNTRL,
- _URE_PUA,
- _URE_UPPER,
- _URE_LOWER,
- _URE_TITLE,
- _URE_MODIFIER,
- _URE_OTHERLETTER,
- _URE_DASHPUNCT,
- _URE_OPENPUNCT,
- _URE_CLOSEPUNCT,
- _URE_OTHERPUNCT,
- _URE_MATHSYM,
- _URE_CURRENCYSYM,
- _URE_OTHERSYM,
- _URE_LTR,
- _URE_RTL,
- _URE_EURONUM,
- _URE_EURONUMSEP,
- _URE_EURONUMTERM,
- _URE_ARABNUM,
- _URE_COMMONSEP,
- _URE_BLOCKSEP,
- _URE_SEGMENTSEP,
- _URE_WHITESPACE,
- _URE_OTHERNEUT,
-};
-
-/*
- * Symbol types for the DFA.
- */
-#define _URE_ANY_CHAR 1
-#define _URE_CHAR 2
-#define _URE_CCLASS 3
-#define _URE_NCCLASS 4
-#define _URE_BOL_ANCHOR 5
-#define _URE_EOL_ANCHOR 6
-
-/*
- * Op codes for converting the NFA to a DFA.
- */
-#define _URE_SYMBOL 10
-#define _URE_PAREN 11
-#define _URE_QUEST 12
-#define _URE_STAR 13
-#define _URE_PLUS 14
-#define _URE_ONE 15
-#define _URE_AND 16
-#define _URE_OR 17
-
-#define _URE_NOOP 0xffff
-
-#define _URE_REGSTART 0x8000
-#define _URE_REGEND 0x4000
-
-/*
- * Structure used to handle a compacted range of characters.
- */
-typedef struct {
- ucs4_t min_code;
- ucs4_t max_code;
-} _ure_range_t;
-
-typedef struct {
- _ure_range_t *ranges;
- ucs2_t ranges_used;
- ucs2_t ranges_size;
-} _ure_ccl_t;
-
-typedef union {
- ucs4_t chr;
- _ure_ccl_t ccl;
-} _ure_sym_t;
-
-/*
- * This is a general element structure used for expressions and stack
- * elements.
- */
-typedef struct {
- ucs2_t reg;
- ucs2_t onstack;
- ucs2_t type;
- ucs2_t lhs;
- ucs2_t rhs;
-} _ure_elt_t;
-
-/*
- * This is a structure used to track a list or a stack of states.
- */
-typedef struct {
- ucs2_t *slist;
- ucs2_t slist_size;
- ucs2_t slist_used;
-} _ure_stlist_t;
-
-/*
- * Structure to track the list of unique states for a symbol
- * during reduction.
- */
-typedef struct {
- ucs2_t id;
- ucs2_t type;
- unsigned long mods;
- unsigned long props;
- _ure_sym_t sym;
- _ure_stlist_t states;
-} _ure_symtab_t;
-
-/*
- * Structure to hold a single state.
- */
-typedef struct {
- ucs2_t id;
- ucs2_t accepting;
- ucs2_t pad;
- _ure_stlist_t st;
- _ure_elt_t *trans;
- ucs2_t trans_size;
- ucs2_t trans_used;
-} _ure_state_t;
-
-/*
- * Structure used for keeping lists of states.
- */
-typedef struct {
- _ure_state_t *states;
- ucs2_t states_size;
- ucs2_t states_used;
-} _ure_statetable_t;
-
-/*
- * Structure to track pairs of DFA states when equivalent states are
- * merged.
- */
-typedef struct {
- ucs2_t l;
- ucs2_t r;
-} _ure_equiv_t;
-
-/*
- * Structure used for constructing the NFA and reducing to a minimal DFA.
- */
-typedef struct _ure_buffer_t {
- int reducing;
- int error;
- unsigned long flags;
-
- _ure_stlist_t stack;
-
- /*
- * Table of unique symbols encountered.
- */
- _ure_symtab_t *symtab;
- ucs2_t symtab_size;
- ucs2_t symtab_used;
-
- /*
- * Tracks the unique expressions generated for the NFA and when the NFA is
- * reduced.
- */
- _ure_elt_t *expr;
- ucs2_t expr_used;
- ucs2_t expr_size;
-
- /*
- * The reduced table of unique groups of NFA states.
- */
- _ure_statetable_t states;
-
- /*
- * Tracks states when equivalent states are merged.
- */
- _ure_equiv_t *equiv;
- ucs2_t equiv_used;
- ucs2_t equiv_size;
-} _ure_buffer_t;
-
-typedef struct {
- ucs2_t symbol;
- ucs2_t next_state;
-} _ure_trans_t;
-
-typedef struct {
- ucs2_t accepting;
- ucs2_t ntrans;
- _ure_trans_t *trans;
-} _ure_dstate_t;
-
-typedef struct _ure_dfa_t {
- unsigned long flags;
-
- _ure_symtab_t *syms;
- ucs2_t nsyms;
-
- _ure_dstate_t *states;
- ucs2_t nstates;
-
- _ure_trans_t *trans;
- ucs2_t ntrans;
-} _ure_dfa_t;
-
-/*************************************************************************
- *
- * Functions.
- *
- *************************************************************************/
-
-static void
-_ure_memmove(char *dest, char *src, unsigned long bytes)
-{
- long i, j;
-
- i = (long) bytes;
- j = i & 7;
- i = (i + 7) >> 3;
-
- /*
- * Do a memmove using Ye Olde Duff's Device for efficiency.
- */
- if (src < dest) {
- src += bytes;
- dest += bytes;
-
- switch (j) {
- case 0: do {
- *--dest = *--src;
- case 7: *--dest = *--src;
- case 6: *--dest = *--src;
- case 5: *--dest = *--src;
- case 4: *--dest = *--src;
- case 3: *--dest = *--src;
- case 2: *--dest = *--src;
- case 1: *--dest = *--src;
- } while (--i > 0);
- }
- } else if (src > dest) {
- switch (j) {
- case 0: do {
- *dest++ = *src++;
- case 7: *dest++ = *src++;
- case 6: *dest++ = *src++;
- case 5: *dest++ = *src++;
- case 4: *dest++ = *src++;
- case 3: *dest++ = *src++;
- case 2: *dest++ = *src++;
- case 1: *dest++ = *src++;
- } while (--i > 0);
- }
- }
-}
-
-static void
-_ure_push(ucs2_t v, _ure_buffer_t *b)
-{
- _ure_stlist_t *s;
-
- if (b == 0)
- return;
-
- /*
- * If the `reducing' parameter is non-zero, check to see if the value
- * passed is already on the stack.
- */
- if (b->reducing != 0 && b->expr[v].onstack != 0)
- return;
-
- s = &b->stack;
- if (s->slist_used == s->slist_size) {
- if (s->slist_size == 0)
- s->slist = (ucs2_t *) malloc(sizeof(ucs2_t) << 3);
- else
- s->slist = (ucs2_t *) realloc((char *) s->slist,
- sizeof(ucs2_t) * (s->slist_size + 8));
- s->slist_size += 8;
- }
- s->slist[s->slist_used++] = v;
-
- /*
- * If the `reducing' parameter is non-zero, flag the element as being on
- * the stack.
- */
- if (b->reducing != 0)
- b->expr[v].onstack = 1;
-}
-
-static ucs2_t
-_ure_peek(_ure_buffer_t *b)
-{
- if (b == 0 || b->stack.slist_used == 0)
- return _URE_NOOP;
-
- return b->stack.slist[b->stack.slist_used - 1];
-}
-
-static ucs2_t
-_ure_pop(_ure_buffer_t *b)
-{
- ucs2_t v;
-
- if (b == 0 || b->stack.slist_used == 0)
- return _URE_NOOP;
-
- v = b->stack.slist[--b->stack.slist_used];
- if (b->reducing)
- b->expr[v].onstack = 0;
-
- return v;
-}
-
-/*************************************************************************
- *
- * Start symbol parse functions.
- *
- *************************************************************************/
-
-/*
- * Parse a comma-separated list of integers that represent character
- * properties. Combine them into a mask that is returned in the `mask'
- * variable, and return the number of characters consumed.
- */
-static unsigned long
-_ure_prop_list(ucs2_t *pp, unsigned long limit, unsigned long *mask,
- _ure_buffer_t *b)
-{
- unsigned long n, m;
- ucs2_t *sp, *ep;
-
- sp = pp;
- ep = sp + limit;
-
- for (m = n = 0; b->error == _URE_OK && sp < ep; sp++) {
- if (*sp == ',') {
- /*
- * Encountered a comma, so select the next character property flag
- * and reset the number.
- */
- m |= cclass_flags[n];
- n = 0;
- } else if (*sp >= '0' && *sp <= '9')
- /*
- * Encountered a digit, so start or continue building the cardinal
- * that represents the character property flag.
- */
- n = (n * 10) + (*sp - '0');
- else
- /*
- * Encountered something that is not part of the property list.
- * Indicate that we are done.
- */
- break;
-
- /*
- * If a property number greater than 32 occurs, then there is a
- * problem. Most likely a missing comma separator.
- */
- if (n > 32)
- b->error = _URE_INVALID_PROPERTY;
- }
-
- if (b->error == _URE_OK && n != 0)
- m |= cclass_flags[n];
-
- /*
- * Set the mask that represents the group of character properties.
- */
- *mask = m;
-
- /*
- * Return the number of characters consumed.
- */
- return sp - pp;
-}
-
-/*
- * Collect a hex number with 1 to 4 digits and return the number
- * of characters used.
- */
-static unsigned long
-_ure_hex(ucs2_t *np, unsigned long limit, ucs4_t *n)
-{
- ucs2_t i;
- ucs2_t *sp, *ep;
- ucs4_t nn;
-
- sp = np;
- ep = sp + limit;
-
- for (nn = 0, i = 0; i < 4 && sp < ep; i++, sp++) {
- if (*sp >= '0' && *sp <= '9')
- nn = (nn << 4) + (*sp - '0');
- else if (*sp >= 'A' && *sp <= 'F')
- nn = (nn << 4) + ((*sp - 'A') + 10);
- else if (*sp >= 'a' && *sp <= 'f')
- nn = (nn << 4) + ((*sp - 'a') + 10);
- else
- /*
- * Encountered something that is not a hex digit.
- */
- break;
- }
-
- /*
- * Assign the character code collected and return the number of
- * characters used.
- */
- *n = nn;
-
- return sp - np;
-}
-
-/*
- * Insert a range into a character class, removing duplicates and ordering
- * them in increasing range-start order.
- */
-static void
-_ure_add_range(_ure_ccl_t *ccl, _ure_range_t *r, _ure_buffer_t *b)
-{
- ucs2_t i;
- ucs4_t tmp;
- _ure_range_t *rp;
-
- /*
- * If the `casefold' flag is set, then make sure both endpoints of the
- * range are converted to lower case.
- */
- if (b->flags & _URE_DFA_CASEFOLD) {
- r->min_code = _ure_tolower(r->min_code);
- r->max_code = _ure_tolower(r->max_code);
- }
-
- /*
- * Swap the range endpoints if they are not in increasing order.
- */
- if (r->min_code > r->max_code) {
- tmp = r->min_code;
- r->min_code = r->max_code;
- r->max_code = tmp;
- }
-
- for (i = 0, rp = ccl->ranges;
- i < ccl->ranges_used && r->min_code < rp->min_code; i++, rp++) ;
-
- /*
- * Check for a duplicate.
- */
- if (i < ccl->ranges_used &&
- r->min_code == rp->min_code && r->max_code == rp->max_code)
- return;
-
- if (ccl->ranges_used == ccl->ranges_size) {
- if (ccl->ranges_size == 0)
- ccl->ranges = (_ure_range_t *) malloc(sizeof(_ure_range_t) << 3);
- else
- ccl->ranges = (_ure_range_t *)
- realloc((char *) ccl->ranges,
- sizeof(_ure_range_t) * (ccl->ranges_size + 8));
- ccl->ranges_size += 8;
- }
-
- rp = ccl->ranges + ccl->ranges_used;
-
- if (i < ccl->ranges_used)
- _ure_memmove((char *) (rp + 1), (char *) rp,
- sizeof(_ure_range_t) * (ccl->ranges_used - i));
-
- ccl->ranges_used++;
- rp->min_code = r->min_code;
- rp->max_code = r->max_code;
-}
-
-#define _URE_ALPHA_MASK (_URE_UPPER|_URE_LOWER|_URE_OTHERLETTER|\
-_URE_MODIFIER|_URE_TITLE|_URE_NONSPACING|_URE_COMBINING)
-#define _URE_ALNUM_MASK (_URE_ALPHA_MASK|_URE_NUMDIGIT)
-#define _URE_PUNCT_MASK (_URE_DASHPUNCT|_URE_OPENPUNCT|_URE_CLOSEPUNCT|\
-_URE_OTHERPUNCT)
-#define _URE_GRAPH_MASK (_URE_NUMDIGIT|_URE_NUMOTHER|_URE_ALPHA_MASK|\
-_URE_MATHSYM|_URE_CURRENCYSYM|_URE_OTHERSYM)
-#define _URE_PRINT_MASK (_URE_GRAPH_MASK|_URE_SPACESEP)
-#define _URE_SPACE_MASK (_URE_SPACESEP|_URE_LINESEP|_URE_PARASEP)
-
-typedef void (*_ure_cclsetup_t)(
- _ure_symtab_t *sym,
- unsigned long mask,
- _ure_buffer_t *b
-);
-
-typedef struct {
- ucs2_t key;
- unsigned int len : 8;
- unsigned int next : 8;
- _ure_cclsetup_t func;
- unsigned long mask;
-} _ure_trie_t;
-
-static void
-_ure_ccl_setup(_ure_symtab_t *sym, unsigned long mask, _ure_buffer_t *b)
-{
- sym->props |= mask;
-}
-
-static void
-_ure_space_setup(_ure_symtab_t *sym, unsigned long mask, _ure_buffer_t *b)
-{
- _ure_range_t range;
-
- sym->props |= mask;
-
- /*
- * Add the additional characters needed for handling isspace().
- */
- range.min_code = range.max_code = '\t';
- _ure_add_range(&sym->sym.ccl, &range, b);
- range.min_code = range.max_code = '\r';
- _ure_add_range(&sym->sym.ccl, &range, b);
- range.min_code = range.max_code = '\n';
- _ure_add_range(&sym->sym.ccl, &range, b);
- range.min_code = range.max_code = '\f';
- _ure_add_range(&sym->sym.ccl, &range, b);
- range.min_code = range.max_code = 0xfeff;
- _ure_add_range(&sym->sym.ccl, &range, b);
-}
-
-static void
-_ure_xdigit_setup(_ure_symtab_t *sym, unsigned long mask, _ure_buffer_t *b)
-{
- _ure_range_t range;
-
- /*
- * Add the additional characters needed for handling isxdigit().
- */
- range.min_code = '0';
- range.max_code = '9';
- _ure_add_range(&sym->sym.ccl, &range, b);
- range.min_code = 'A';
- range.max_code = 'F';
- _ure_add_range(&sym->sym.ccl, &range, b);
- range.min_code = 'a';
- range.max_code = 'f';
- _ure_add_range(&sym->sym.ccl, &range, b);
-}
-
-static const _ure_trie_t cclass_trie[] = {
- {0x003a, 1, 1, 0, 0},
- {0x0061, 9, 10, 0, 0},
- {0x0063, 8, 19, 0, 0},
- {0x0064, 7, 24, 0, 0},
- {0x0067, 6, 29, 0, 0},
- {0x006c, 5, 34, 0, 0},
- {0x0070, 4, 39, 0, 0},
- {0x0073, 3, 49, 0, 0},
- {0x0075, 2, 54, 0, 0},
- {0x0078, 1, 59, 0, 0},
- {0x006c, 1, 11, 0, 0},
- {0x006e, 2, 13, 0, 0},
- {0x0070, 1, 16, 0, 0},
- {0x0075, 1, 14, 0, 0},
- {0x006d, 1, 15, 0, 0},
- {0x003a, 1, 16, _ure_ccl_setup, _URE_ALNUM_MASK},
- {0x0068, 1, 17, 0, 0},
- {0x0061, 1, 18, 0, 0},
- {0x003a, 1, 19, _ure_ccl_setup, _URE_ALPHA_MASK},
- {0x006e, 1, 20, 0, 0},
- {0x0074, 1, 21, 0, 0},
- {0x0072, 1, 22, 0, 0},
- {0x006c, 1, 23, 0, 0},
- {0x003a, 1, 24, _ure_ccl_setup, _URE_CNTRL},
- {0x0069, 1, 25, 0, 0},
- {0x0067, 1, 26, 0, 0},
- {0x0069, 1, 27, 0, 0},
- {0x0074, 1, 28, 0, 0},
- {0x003a, 1, 29, _ure_ccl_setup, _URE_NUMDIGIT},
- {0x0072, 1, 30, 0, 0},
- {0x0061, 1, 31, 0, 0},
- {0x0070, 1, 32, 0, 0},
- {0x0068, 1, 33, 0, 0},
- {0x003a, 1, 34, _ure_ccl_setup, _URE_GRAPH_MASK},
- {0x006f, 1, 35, 0, 0},
- {0x0077, 1, 36, 0, 0},
- {0x0065, 1, 37, 0, 0},
- {0x0072, 1, 38, 0, 0},
- {0x003a, 1, 39, _ure_ccl_setup, _URE_LOWER},
- {0x0072, 2, 41, 0, 0},
- {0x0075, 1, 45, 0, 0},
- {0x0069, 1, 42, 0, 0},
- {0x006e, 1, 43, 0, 0},
- {0x0074, 1, 44, 0, 0},
- {0x003a, 1, 45, _ure_ccl_setup, _URE_PRINT_MASK},
- {0x006e, 1, 46, 0, 0},
- {0x0063, 1, 47, 0, 0},
- {0x0074, 1, 48, 0, 0},
- {0x003a, 1, 49, _ure_ccl_setup, _URE_PUNCT_MASK},
- {0x0070, 1, 50, 0, 0},
- {0x0061, 1, 51, 0, 0},
- {0x0063, 1, 52, 0, 0},
- {0x0065, 1, 53, 0, 0},
- {0x003a, 1, 54, _ure_space_setup, _URE_SPACE_MASK},
- {0x0070, 1, 55, 0, 0},
- {0x0070, 1, 56, 0, 0},
- {0x0065, 1, 57, 0, 0},
- {0x0072, 1, 58, 0, 0},
- {0x003a, 1, 59, _ure_ccl_setup, _URE_UPPER},
- {0x0064, 1, 60, 0, 0},
- {0x0069, 1, 61, 0, 0},
- {0x0067, 1, 62, 0, 0},
- {0x0069, 1, 63, 0, 0},
- {0x0074, 1, 64, 0, 0},
- {0x003a, 1, 65, _ure_xdigit_setup, 0},
-};
-
-/*
- * Probe for one of the POSIX colon delimited character classes in the static
- * trie.
- */
-static unsigned long
-_ure_posix_ccl(ucs2_t *cp, unsigned long limit, _ure_symtab_t *sym,
- _ure_buffer_t *b)
-{
- int i;
- unsigned long n;
- const _ure_trie_t *tp;
- ucs2_t *sp, *ep;
-
- /*
- * If the number of characters left is less than 7, then this cannot be
- * interpreted as one of the colon delimited classes.
- */
- if (limit < 7)
- return 0;
-
- sp = cp;
- ep = sp + limit;
- tp = cclass_trie;
- for (i = 0; sp < ep && i < 8; i++, sp++) {
- n = tp->len;
-
- for (; n > 0 && tp->key != *sp; tp++, n--) ;
-
- if (n == 0)
- return 0;
-
- if (*sp == ':' && (i == 6 || i == 7)) {
- sp++;
- break;
- }
- if (sp + 1 < ep)
- tp = cclass_trie + tp->next;
- }
- if (tp->func == 0)
- return 0;
-
- (*tp->func)(sym, tp->mask, b);
-
- return sp - cp;
-}
-
-/*
- * Construct a list of ranges and return the number of characters consumed.
- */
-static unsigned long
-_ure_cclass(ucs2_t *cp, unsigned long limit, _ure_symtab_t *symp,
- _ure_buffer_t *b)
-{
- int range_end;
- unsigned long n;
- ucs2_t *sp, *ep;
- ucs4_t c, last;
- _ure_ccl_t *cclp;
- _ure_range_t range;
-
- sp = cp;
- ep = sp + limit;
-
- if (*sp == '^') {
- symp->type = _URE_NCCLASS;
- sp++;
- } else
- symp->type = _URE_CCLASS;
-
- for (last = 0, range_end = 0;
- b->error == _URE_OK && sp < ep && *sp != ']'; ) {
- c = *sp++;
- if (c == '\\') {
- if (sp == ep) {
- /*
- * The EOS was encountered when expecting the reverse solidus
- * to be followed by the character it is escaping. Set an
- * error code and return the number of characters consumed up
- * to this point.
- */
- b->error = _URE_UNEXPECTED_EOS;
- return sp - cp;
- }
-
- c = *sp++;
- switch (c) {
- case 'a':
- c = 0x07;
- break;
- case 'b':
- c = 0x08;
- break;
- case 'f':
- c = 0x0c;
- break;
- case 'n':
- c = 0x0a;
- break;
- case 'r':
- c = 0x0d;
- break;
- case 't':
- c = 0x09;
- break;
- case 'v':
- c = 0x0b;
- break;
- case 'p':
- case 'P':
- sp += _ure_prop_list(sp, ep - sp, &symp->props, b);
- /*
- * Invert the bit mask of the properties if this is a negated
- * character class or if 'P' is used to specify a list of
- * character properties that should *not* match in a
- * character class.
- */
- if (c == 'P')
- symp->props = ~symp->props;
- continue;
- break;
- case 'x':
- case 'X':
- case 'u':
- case 'U':
- if (sp < ep &&
- ((*sp >= '0' && *sp <= '9') ||
- (*sp >= 'A' && *sp <= 'F') ||
- (*sp >= 'a' && *sp <= 'f')))
- sp += _ure_hex(sp, ep - sp, &c);
- }
- } else if (c == ':') {
- /*
- * Probe for a POSIX colon delimited character class.
- */
- sp--;
- if ((n = _ure_posix_ccl(sp, ep - sp, symp, b)) == 0)
- sp++;
- else {
- sp += n;
- continue;
- }
- }
-
- cclp = &symp->sym.ccl;
-
- /*
- * Check to see if the current character is a low surrogate that needs
- * to be combined with a preceding high surrogate.
- */
- if (last != 0) {
- if (c >= 0xdc00 && c <= 0xdfff)
- /*
- * Construct the UTF16 character code.
- */
- c = 0x10000 + (((last & 0x03ff) << 10) | (c & 0x03ff));
- else {
- /*
- * Add the isolated high surrogate to the range.
- */
- if (range_end == 1)
- range.max_code = last & 0xffff;
- else
- range.min_code = range.max_code = last & 0xffff;
-
- _ure_add_range(cclp, &range, b);
- range_end = 0;
- }
- }
-
- /*
- * Clear the last character code.
- */
- last = 0;
-
- /*
- * This slightly awkward code handles the different cases needed to
- * construct a range.
- */
- if (c >= 0xd800 && c <= 0xdbff) {
- /*
- * If the high surrogate is followed by a range indicator, simply
- * add it as the range start. Otherwise, save it in case the next
- * character is a low surrogate.
- */
- if (*sp == '-') {
- sp++;
- range.min_code = c;
- range_end = 1;
- } else
- last = c;
- } else if (range_end == 1) {
- range.max_code = c;
- _ure_add_range(cclp, &range, b);
- range_end = 0;
- } else {
- range.min_code = range.max_code = c;
- if (*sp == '-') {
- sp++;
- range_end = 1;
- } else
- _ure_add_range(cclp, &range, b);
- }
- }
-
- if (sp < ep && *sp == ']')
- sp++;
- else
- /*
- * The parse was not terminated by the character class close symbol
- * (']'), so set an error code.
- */
- b->error = _URE_CCLASS_OPEN;
-
- return sp - cp;
-}
-
-/*
- * Probe for a low surrogate hex code.
- */
-static unsigned long
-_ure_probe_ls(ucs2_t *ls, unsigned long limit, ucs4_t *c)
-{
- ucs4_t i, code;
- ucs2_t *sp, *ep;
-
- for (i = code = 0, sp = ls, ep = sp + limit; i < 4 && sp < ep; sp++) {
- if (*sp >= '0' && *sp <= '9')
- code = (code << 4) + (*sp - '0');
- else if (*sp >= 'A' && *sp <= 'F')
- code = (code << 4) + ((*sp - 'A') + 10);
- else if (*sp >= 'a' && *sp <= 'f')
- code = (code << 4) + ((*sp - 'a') + 10);
- else
- break;
- }
-
- *c = code;
- return (0xdc00 <= code && code <= 0xdfff) ? sp - ls : 0;
-}
-
-static unsigned long
-_ure_compile_symbol(ucs2_t *sym, unsigned long limit, _ure_symtab_t *symp,
- _ure_buffer_t *b)
-{
- ucs4_t c;
- ucs2_t *sp, *ep;
-
- sp = sym;
- ep = sym + limit;
-
- if ((c = *sp++) == '\\') {
-
- if (sp == ep) {
- /*
- * The EOS was encountered when expecting the reverse solidus to
- * be followed by the character it is escaping. Set an error code
- * and return the number of characters consumed up to this point.
- */
- b->error = _URE_UNEXPECTED_EOS;
- return sp - sym;
- }
-
- c = *sp++;
- switch (c) {
- case 'p':
- case 'P':
- symp->type = (c == 'p') ? _URE_CCLASS : _URE_NCCLASS;
- sp += _ure_prop_list(sp, ep - sp, &symp->props, b);
- break;
- case 'a':
- symp->type = _URE_CHAR;
- symp->sym.chr = 0x07;
- break;
- case 'b':
- symp->type = _URE_CHAR;
- symp->sym.chr = 0x08;
- break;
- case 'f':
- symp->type = _URE_CHAR;
- symp->sym.chr = 0x0c;
- break;
- case 'n':
- symp->type = _URE_CHAR;
- symp->sym.chr = 0x0a;
- break;
- case 'r':
- symp->type = _URE_CHAR;
- symp->sym.chr = 0x0d;
- break;
- case 't':
- symp->type = _URE_CHAR;
- symp->sym.chr = 0x09;
- break;
- case 'v':
- symp->type = _URE_CHAR;
- symp->sym.chr = 0x0b;
- break;
- case 'x':
- case 'X':
- case 'u':
- case 'U':
- /*
- * Collect between 1 and 4 digits representing a UCS2 code. Fall
- * through to the next case.
- */
- if (sp < ep &&
- ((*sp >= '0' && *sp <= '9') ||
- (*sp >= 'A' && *sp <= 'F') ||
- (*sp >= 'a' && *sp <= 'f')))
- sp += _ure_hex(sp, ep - sp, &c);
- /* FALLTHROUGH */
- default:
- /*
- * Simply add an escaped character here.
- */
- symp->type = _URE_CHAR;
- symp->sym.chr = c;
- }
- } else if (c == '^' || c == '$')
- /*
- * Handle the BOL and EOL anchors. This actually consists simply of
- * setting a flag that indicates that the user supplied anchor match
- * function should be called. This needs to be done instead of simply
- * matching line/paragraph separators because beginning-of-text and
- * end-of-text tests are needed as well.
- */
- symp->type = (c == '^') ? _URE_BOL_ANCHOR : _URE_EOL_ANCHOR;
- else if (c == '[')
- /*
- * Construct a character class.
- */
- sp += _ure_cclass(sp, ep - sp, symp, b);
- else if (c == '.')
- symp->type = _URE_ANY_CHAR;
- else {
- symp->type = _URE_CHAR;
- symp->sym.chr = c;
- }
-
- /*
- * If the symbol type happens to be a character and is a high surrogate,
- * then probe forward to see if it is followed by a low surrogate that
- * needs to be added.
- */
- if (sp < ep && symp->type == _URE_CHAR &&
- 0xd800 <= symp->sym.chr && symp->sym.chr <= 0xdbff) {
-
- if (0xdc00 <= *sp && *sp <= 0xdfff) {
- symp->sym.chr = 0x10000 + (((symp->sym.chr & 0x03ff) << 10) |
- (*sp & 0x03ff));
- sp++;
- } else if (*sp == '\\' && (*(sp + 1) == 'x' || *(sp + 1) == 'X' ||
- *(sp + 1) == 'u' || *(sp + 1) == 'U')) {
- sp += _ure_probe_ls(sp + 2, ep - (sp + 2), &c);
- if (0xdc00 <= c && c <= 0xdfff) {
- /*
- * Take into account the \[xu] in front of the hex code.
- */
- sp += 2;
- symp->sym.chr = 0x10000 + (((symp->sym.chr & 0x03ff) << 10) |
- (c & 0x03ff));
- }
- }
- }
-
- /*
- * Last, make sure any _URE_CHAR type symbols are changed to lower case if
- * the `casefold' flag is set.
- */
- if ((b->flags & _URE_DFA_CASEFOLD) && symp->type == _URE_CHAR)
- symp->sym.chr = _ure_tolower(symp->sym.chr);
-
- /*
- * If the symbol constructed is anything other than one of the anchors,
- * make sure the _URE_DFA_BLANKLINE flag is removed.
- */
- if (symp->type != _URE_BOL_ANCHOR && symp->type != _URE_EOL_ANCHOR)
- b->flags &= ~_URE_DFA_BLANKLINE;
-
- /*
- * Return the number of characters consumed.
- */
- return sp - sym;
-}
-
-static int
-_ure_sym_neq(_ure_symtab_t *a, _ure_symtab_t *b)
-{
- if (a->type != b->type || a->mods != b->mods || a->props != b->props)
- return 1;
-
- if (a->type == _URE_CCLASS || a->type == _URE_NCCLASS) {
- if (a->sym.ccl.ranges_used != b->sym.ccl.ranges_used)
- return 1;
- if (a->sym.ccl.ranges_used > 0 &&
- memcmp((char *) a->sym.ccl.ranges, (char *) b->sym.ccl.ranges,
- sizeof(_ure_range_t) * a->sym.ccl.ranges_used) != 0)
- return 1;
- } else if (a->type == _URE_CHAR && a->sym.chr != b->sym.chr)
- return 1;
- return 0;
-}
-
-/*
- * Construct a symbol, but only keep unique symbols.
- */
-static ucs2_t
-_ure_make_symbol(ucs2_t *sym, unsigned long limit, unsigned long *consumed,
- _ure_buffer_t *b)
-{
- ucs2_t i;
- _ure_symtab_t *sp, symbol;
-
- /*
- * Build the next symbol so we can test to see if it is already in the
- * symbol table.
- */
- (void) memset((char *) &symbol, '\0', sizeof(_ure_symtab_t));
- *consumed = _ure_compile_symbol(sym, limit, &symbol, b);
-
- /*
- * Check to see if the symbol exists.
- */
- for (i = 0, sp = b->symtab;
- i < b->symtab_used && _ure_sym_neq(&symbol, sp); i++, sp++) ;
-
- if (i < b->symtab_used) {
- /*
- * Free up any ranges used for the symbol.
- */
- if ((symbol.type == _URE_CCLASS || symbol.type == _URE_NCCLASS) &&
- symbol.sym.ccl.ranges_size > 0)
- free((char *) symbol.sym.ccl.ranges);
-
- return b->symtab[i].id;
- }
-
- /*
- * Need to add the new symbol.
- */
- if (b->symtab_used == b->symtab_size) {
- if (b->symtab_size == 0)
- b->symtab = (_ure_symtab_t *) malloc(sizeof(_ure_symtab_t) << 3);
- else
- b->symtab = (_ure_symtab_t *)
- realloc((char *) b->symtab,
- sizeof(_ure_symtab_t) * (b->symtab_size + 8));
- sp = b->symtab + b->symtab_size;
- (void) memset((char *) sp, '\0', sizeof(_ure_symtab_t) << 3);
- b->symtab_size += 8;
- }
-
- symbol.id = b->symtab_used++;
- (void) memmove((char *) &b->symtab[symbol.id], (char *) &symbol,
- sizeof(_ure_symtab_t));
-
- return symbol.id;
-}
-
-/*************************************************************************
- *
- * End symbol parse functions.
- *
- *************************************************************************/
-
-static ucs2_t
-_ure_make_expr(ucs2_t type, ucs2_t lhs, ucs2_t rhs, _ure_buffer_t *b)
-{
- ucs2_t i;
-
- if (b == 0)
- return _URE_NOOP;
-
- /*
- * Determine if the expression already exists or not.
- */
- for (i = 0; i < b->expr_used; i++) {
- if (b->expr[i].type == type && b->expr[i].lhs == lhs &&
- b->expr[i].rhs == rhs)
- break;
- }
- if (i < b->expr_used)
- return i;
-
- /*
- * Need to add a new expression.
- */
- if (b->expr_used == b->expr_size) {
- if (b->expr_size == 0)
- b->expr = (_ure_elt_t *) malloc(sizeof(_ure_elt_t) << 3);
- else
- b->expr = (_ure_elt_t *)
- realloc((char *) b->expr,
- sizeof(_ure_elt_t) * (b->expr_size + 8));
- b->expr_size += 8;
- }
-
- b->expr[b->expr_used].onstack = 0;
- b->expr[b->expr_used].type = type;
- b->expr[b->expr_used].lhs = lhs;
- b->expr[b->expr_used].rhs = rhs;
-
- return b->expr_used++;
-}
-
-static unsigned char spmap[] = {
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-};
-
-#define _ure_isspecial(cc) ((cc) > 0x20 && (cc) < 0x7f && \
- (spmap[(cc) >> 3] & (1 << ((cc) & 7))))
-
-/*
- * Convert the regular expression into an NFA in a form that will be easy to
- * reduce to a DFA. The starting state for the reduction will be returned.
- */
-static ucs2_t
-_ure_re2nfa(ucs2_t *re, unsigned long relen, _ure_buffer_t *b)
-{
- ucs2_t c, state, top, sym, *sp, *ep;
- unsigned long used;
-
- state = _URE_NOOP;
-
- sp = re;
- ep = sp + relen;
- while (b->error == _URE_OK && sp < ep) {
- c = *sp++;
- switch (c) {
- case '(':
- _ure_push(_URE_PAREN, b);
- break;
- case ')':
- /*
- * Check for the case of too many close parentheses.
- */
- if (_ure_peek(b) == _URE_NOOP) {
- b->error = _URE_UNBALANCED_GROUP;
- break;
- }
-
- while ((top = _ure_peek(b)) == _URE_AND || top == _URE_OR)
- /*
- * Make an expression with the AND or OR operator and its right
- * hand side.
- */
- state = _ure_make_expr(_ure_pop(b), _ure_pop(b), state, b);
-
- /*
- * Remove the _URE_PAREN off the stack.
- */
- (void) _ure_pop(b);
- break;
- case '*':
- state = _ure_make_expr(_URE_STAR, state, _URE_NOOP, b);
- break;
- case '+':
- state = _ure_make_expr(_URE_PLUS, state, _URE_NOOP, b);
- break;
- case '?':
- state = _ure_make_expr(_URE_QUEST, state, _URE_NOOP, b);
- break;
- case '|':
- while ((top = _ure_peek(b)) == _URE_AND || top == _URE_OR)
- /*
- * Make an expression with the AND or OR operator and its right
- * hand side.
- */
- state = _ure_make_expr(_ure_pop(b), _ure_pop(b), state, b);
-
- _ure_push(state, b);
- _ure_push(_URE_OR, b);
- break;
- default:
- sp--;
- sym = _ure_make_symbol(sp, ep - sp, &used, b);
- sp += used;
- state = _ure_make_expr(_URE_SYMBOL, sym, _URE_NOOP, b);
- break;
- }
-
- if (c != '(' && c != '|' && sp < ep &&
- (!_ure_isspecial(*sp) || *sp == '(')) {
- _ure_push(state, b);
- _ure_push(_URE_AND, b);
- }
- }
- while ((top = _ure_peek(b)) == _URE_AND || top == _URE_OR)
- /*
- * Make an expression with the AND or OR operator and its right
- * hand side.
- */
- state = _ure_make_expr(_ure_pop(b), _ure_pop(b), state, b);
-
- if (b->stack.slist_used > 0)
- b->error = _URE_UNBALANCED_GROUP;
-
- return (b->error == _URE_OK) ? state : _URE_NOOP;
-}
-
-static void
-_ure_add_symstate(ucs2_t sym, ucs2_t state, _ure_buffer_t *b)
-{
- ucs2_t i, *stp;
- _ure_symtab_t *sp;
-
- /*
- * Locate the symbol in the symbol table so the state can be added.
- * If the symbol doesn't exist, then a real problem exists.
- */
- for (i = 0, sp = b->symtab; i < b->symtab_used && sym != sp->id;
- i++, sp++) ;
-
- /*
- * Now find out if the state exists in the symbol's state list.
- */
- for (i = 0, stp = sp->states.slist;
- i < sp->states.slist_used && state > *stp; i++, stp++) ;
-
- if (i == sp->states.slist_used || state < *stp) {
- /*
- * Need to add the state in order.
- */
- if (sp->states.slist_used == sp->states.slist_size) {
- if (sp->states.slist_size == 0)
- sp->states.slist = (ucs2_t *) malloc(sizeof(ucs2_t) << 3);
- else
- sp->states.slist = (ucs2_t *)
- realloc((char *) sp->states.slist,
- sizeof(ucs2_t) * (sp->states.slist_size + 8));
- sp->states.slist_size += 8;
- }
- if (i < sp->states.slist_used)
- (void) _ure_memmove((char *) (sp->states.slist + i + 1),
- (char *) (sp->states.slist + i),
- sizeof(ucs2_t) * (sp->states.slist_used - i));
- sp->states.slist[i] = state;
- sp->states.slist_used++;
- }
-}
-
-static ucs2_t
-_ure_add_state(ucs2_t nstates, ucs2_t *states, _ure_buffer_t *b)
-{
- ucs2_t i;
- _ure_state_t *sp;
-
- for (i = 0, sp = b->states.states; i < b->states.states_used; i++, sp++) {
- if (sp->st.slist_used == nstates &&
- memcmp((char *) states, (char *) sp->st.slist,
- sizeof(ucs2_t) * nstates) == 0)
- break;
- }
-
- if (i == b->states.states_used) {
- /*
- * Need to add a new DFA state (set of NFA states).
- */
- if (b->states.states_used == b->states.states_size) {
- if (b->states.states_size == 0)
- b->states.states = (_ure_state_t *)
- malloc(sizeof(_ure_state_t) << 3);
- else
- b->states.states = (_ure_state_t *)
- realloc((char *) b->states.states,
- sizeof(_ure_state_t) * (b->states.states_size + 8));
- sp = b->states.states + b->states.states_size;
- (void) memset((char *) sp, '\0', sizeof(_ure_state_t) << 3);
- b->states.states_size += 8;
- }
-
- sp = b->states.states + b->states.states_used++;
- sp->id = i;
-
- if (sp->st.slist_used + nstates > sp->st.slist_size) {
- if (sp->st.slist_size == 0)
- sp->st.slist = (ucs2_t *)
- malloc(sizeof(ucs2_t) * (sp->st.slist_used + nstates));
- else
- sp->st.slist = (ucs2_t *)
- realloc((char *) sp->st.slist,
- sizeof(ucs2_t) * (sp->st.slist_used + nstates));
- sp->st.slist_size = sp->st.slist_used + nstates;
- }
- sp->st.slist_used = nstates;
- (void) memmove((char *) sp->st.slist, (char *) states,
- sizeof(ucs2_t) * nstates);
- }
-
- /*
- * Return the ID of the DFA state representing a group of NFA states.
- */
- return i;
-}
-
-static void
-_ure_reduce(ucs2_t start, _ure_buffer_t *b)
-{
- ucs2_t i, j, state, eval, syms, rhs;
- ucs2_t s1, s2, ns1, ns2;
- _ure_state_t *sp;
- _ure_symtab_t *smp;
-
- b->reducing = 1;
-
- /*
- * Add the starting state for the reduction.
- */
- _ure_add_state(1, &start, b);
-
- /*
- * Process each set of NFA states that get created.
- */
- for (i = 0; i < b->states.states_used; i++) {
- sp = b->states.states + i;
-
- /*
- * Push the current states on the stack.
- */
- for (j = 0; j < sp->st.slist_used; j++)
- _ure_push(sp->st.slist[j], b);
-
- /*
- * Reduce the NFA states.
- */
- for (j = sp->accepting = syms = 0; j < b->stack.slist_used; j++) {
- state = b->stack.slist[j];
- eval = 1;
-
- /*
- * This inner loop is the iterative equivalent of recursively
- * reducing subexpressions generated as a result of a reduction.
- */
- while (eval) {
- switch (b->expr[state].type) {
- case _URE_SYMBOL:
- ns1 = _ure_make_expr(_URE_ONE, _URE_NOOP, _URE_NOOP, b);
- _ure_add_symstate(b->expr[state].lhs, ns1, b);
- syms++;
- eval = 0;
- break;
- case _URE_ONE:
- sp->accepting = 1;
- eval = 0;
- break;
- case _URE_QUEST:
- s1 = b->expr[state].lhs;
- ns1 = _ure_make_expr(_URE_ONE, _URE_NOOP, _URE_NOOP, b);
- state = _ure_make_expr(_URE_OR, ns1, s1, b);
- break;
- case _URE_PLUS:
- s1 = b->expr[state].lhs;
- ns1 = _ure_make_expr(_URE_STAR, s1, _URE_NOOP, b);
- state = _ure_make_expr(_URE_AND, s1, ns1, b);
- break;
- case _URE_STAR:
- s1 = b->expr[state].lhs;
- ns1 = _ure_make_expr(_URE_ONE, _URE_NOOP, _URE_NOOP, b);
- ns2 = _ure_make_expr(_URE_PLUS, s1, _URE_NOOP, b);
- state = _ure_make_expr(_URE_OR, ns1, ns2, b);
- break;
- case _URE_OR:
- s1 = b->expr[state].lhs;
- s2 = b->expr[state].rhs;
- _ure_push(s1, b);
- _ure_push(s2, b);
- eval = 0;
- break;
- case _URE_AND:
- s1 = b->expr[state].lhs;
- s2 = b->expr[state].rhs;
- switch (b->expr[s1].type) {
- case _URE_SYMBOL:
- _ure_add_symstate(b->expr[s1].lhs, s2, b);
- syms++;
- eval = 0;
- break;
- case _URE_ONE:
- state = s2;
- break;
- case _URE_QUEST:
- ns1 = b->expr[s1].lhs;
- ns2 = _ure_make_expr(_URE_AND, ns1, s2, b);
- state = _ure_make_expr(_URE_OR, s2, ns2, b);
- break;
- case _URE_PLUS:
- ns1 = b->expr[s1].lhs;
- ns2 = _ure_make_expr(_URE_OR, s2, state, b);
- state = _ure_make_expr(_URE_AND, ns1, ns2, b);
- break;
- case _URE_STAR:
- ns1 = b->expr[s1].lhs;
- ns2 = _ure_make_expr(_URE_AND, ns1, state, b);
- state = _ure_make_expr(_URE_OR, s2, ns2, b);
- break;
- case _URE_OR:
- ns1 = b->expr[s1].lhs;
- ns2 = b->expr[s1].rhs;
- ns1 = _ure_make_expr(_URE_AND, ns1, s2, b);
- ns2 = _ure_make_expr(_URE_AND, ns2, s2, b);
- state = _ure_make_expr(_URE_OR, ns1, ns2, b);
- break;
- case _URE_AND:
- ns1 = b->expr[s1].lhs;
- ns2 = b->expr[s1].rhs;
- ns2 = _ure_make_expr(_URE_AND, ns2, s2, b);
- state = _ure_make_expr(_URE_AND, ns1, ns2, b);
- break;
- }
- }
- }
- }
-
- /*
- * Clear the state stack.
- */
- while (_ure_pop(b) != _URE_NOOP) ;
-
- /*
- * Reset the state pointer because the reduction may have moved it
- * during a reallocation.
- */
- sp = b->states.states + i;
-
- /*
- * Generate the DFA states for the symbols collected during the
- * current reduction.
- */
- if (sp->trans_used + syms > sp->trans_size) {
- if (sp->trans_size == 0)
- sp->trans = (_ure_elt_t *)
- malloc(sizeof(_ure_elt_t) * (sp->trans_used + syms));
- else
- sp->trans = (_ure_elt_t *)
- realloc((char *) sp->trans,
- sizeof(_ure_elt_t) * (sp->trans_used + syms));
- sp->trans_size = sp->trans_used + syms;
- }
-
- /*
- * Go through the symbol table and generate the DFA state transitions
- * for each symbol that has collected NFA states.
- */
- for (j = syms = 0, smp = b->symtab; j < b->symtab_used; j++, smp++) {
- sp = b->states.states + i;
-
- if (smp->states.slist_used > 0) {
- sp->trans[syms].lhs = smp->id;
- rhs = _ure_add_state(smp->states.slist_used,
- smp->states.slist, b);
- /*
- * Reset the state pointer in case the reallocation moves it
- * in memory.
- */
- sp = b->states.states + i;
- sp->trans[syms].rhs = rhs;
-
- smp->states.slist_used = 0;
- syms++;
- }
- }
-
- /*
- * Set the number of transitions actually used.
- */
- sp->trans_used = syms;
- }
- b->reducing = 0;
-}
-
-static void
-_ure_add_equiv(ucs2_t l, ucs2_t r, _ure_buffer_t *b)
-{
- ucs2_t tmp;
-
- l = b->states.states[l].id;
- r = b->states.states[r].id;
-
- if (l == r)
- return;
-
- if (l > r) {
- tmp = l;
- l = r;
- r = tmp;
- }
-
- /*
- * Check to see if the equivalence pair already exists.
- */
- for (tmp = 0; tmp < b->equiv_used &&
- (b->equiv[tmp].l != l || b->equiv[tmp].r != r);
- tmp++) ;
-
- if (tmp < b->equiv_used)
- return;
-
- if (b->equiv_used == b->equiv_size) {
- if (b->equiv_size == 0)
- b->equiv = (_ure_equiv_t *) malloc(sizeof(_ure_equiv_t) << 3);
- else
- b->equiv = (_ure_equiv_t *) realloc((char *) b->equiv,
- sizeof(_ure_equiv_t) *
- (b->equiv_size + 8));
- b->equiv_size += 8;
- }
- b->equiv[b->equiv_used].l = l;
- b->equiv[b->equiv_used].r = r;
- b->equiv_used++;
-}
-
-/*
- * Merge the DFA states that are equivalent.
- */
-static void
-_ure_merge_equiv(_ure_buffer_t *b)
-{
- ucs2_t i, j, k, eq, done;
- _ure_state_t *sp1, *sp2, *ls, *rs;
-
- for (i = 0; i < b->states.states_used; i++) {
- sp1 = b->states.states + i;
- if (sp1->id != i)
- continue;
- for (j = 0; j < i; j++) {
- sp2 = b->states.states + j;
- if (sp2->id != j)
- continue;
- b->equiv_used = 0;
- _ure_add_equiv(i, j, b);
- for (eq = 0, done = 0; eq < b->equiv_used; eq++) {
- ls = b->states.states + b->equiv[eq].l;
- rs = b->states.states + b->equiv[eq].r;
- if (ls->accepting != rs->accepting ||
- ls->trans_used != rs->trans_used) {
- done = 1;
- break;
- }
- for (k = 0; k < ls->trans_used &&
- ls->trans[k].lhs == rs->trans[k].lhs; k++) ;
- if (k < ls->trans_used) {
- done = 1;
- break;
- }
-
- for (k = 0; k < ls->trans_used; k++)
- _ure_add_equiv(ls->trans[k].rhs, rs->trans[k].rhs, b);
- }
- if (done == 0)
- break;
- }
- for (eq = 0; j < i && eq < b->equiv_used; eq++)
- b->states.states[b->equiv[eq].r].id =
- b->states.states[b->equiv[eq].l].id;
- }
-
- /*
- * Renumber the states appropriately.
- */
- for (i = eq = 0, sp1 = b->states.states; i < b->states.states_used;
- sp1++, i++)
- sp1->id = (sp1->id == i) ? eq++ : b->states.states[sp1->id].id;
-}
-
-/*************************************************************************
- *
- * API.
- *
- *************************************************************************/
-
-ure_buffer_t
-ure_buffer_create(void)
-{
- ure_buffer_t b;
-
- b = (ure_buffer_t) calloc(1, sizeof(_ure_buffer_t));
-
- return b;
-}
-
-void
-ure_buffer_free(ure_buffer_t buf)
-{
- unsigned long i;
-
- if (buf == 0)
- return;
-
- if (buf->stack.slist_size > 0)
- free((char *) buf->stack.slist);
-
- if (buf->expr_size > 0)
- free((char *) buf->expr);
-
- for (i = 0; i < buf->symtab_size; i++) {
- if (buf->symtab[i].states.slist_size > 0)
- free((char *) buf->symtab[i].states.slist);
- }
-
- if (buf->symtab_size > 0)
- free((char *) buf->symtab);
-
- for (i = 0; i < buf->states.states_size; i++) {
- if (buf->states.states[i].trans_size > 0)
- free((char *) buf->states.states[i].trans);
- if (buf->states.states[i].st.slist_size > 0)
- free((char *) buf->states.states[i].st.slist);
- }
-
- if (buf->states.states_size > 0)
- free((char *) buf->states.states);
-
- if (buf->equiv_size > 0)
- free((char *) buf->equiv);
-
- free((char *) buf);
-}
-
-ure_dfa_t
-ure_compile(ucs2_t *re, unsigned long relen, int casefold, ure_buffer_t buf)
-{
- ucs2_t i, j, state;
- _ure_state_t *sp;
- _ure_dstate_t *dsp;
- _ure_trans_t *tp;
- ure_dfa_t dfa;
-
- if (re == 0 || *re == 0 || relen == 0 || buf == 0)
- return 0;
-
- /*
- * Reset the various fields of the compilation buffer. Default the flags
- * to indicate the presense of the "^$" pattern. If any other pattern
- * occurs, then this flag will be removed. This is done to catch this
- * special pattern and handle it specially when matching.
- */
- buf->flags = _URE_DFA_BLANKLINE | ((casefold) ? _URE_DFA_CASEFOLD : 0);
- buf->reducing = 0;
- buf->stack.slist_used = 0;
- buf->expr_used = 0;
-
- for (i = 0; i < buf->symtab_used; i++)
- buf->symtab[i].states.slist_used = 0;
- buf->symtab_used = 0;
-
- for (i = 0; i < buf->states.states_used; i++) {
- buf->states.states[i].st.slist_used = 0;
- buf->states.states[i].trans_used = 0;
- }
- buf->states.states_used = 0;
-
- /*
- * Construct the NFA. If this stage returns a 0, then an error occurred or
- * an empty expression was passed.
- */
- if ((state = _ure_re2nfa(re, relen, buf)) == _URE_NOOP)
- return 0;
-
- /*
- * Do the expression reduction to get the initial DFA.
- */
- _ure_reduce(state, buf);
-
- /*
- * Merge all the equivalent DFA states.
- */
- _ure_merge_equiv(buf);
-
- /*
- * Construct the minimal DFA.
- */
- dfa = (ure_dfa_t) malloc(sizeof(_ure_dfa_t));
- (void) memset((char *) dfa, '\0', sizeof(_ure_dfa_t));
-
- dfa->flags = buf->flags & (_URE_DFA_CASEFOLD|_URE_DFA_BLANKLINE);
-
- /*
- * Free up the NFA state groups and transfer the symbols from the buffer
- * to the DFA.
- */
- for (i = 0; i < buf->symtab_size; i++) {
- if (buf->symtab[i].states.slist_size > 0)
- free((char *) buf->symtab[i].states.slist);
- }
- dfa->syms = buf->symtab;
- dfa->nsyms = buf->symtab_used;
-
- buf->symtab_used = buf->symtab_size = 0;
-
- /*
- * Collect the total number of states and transitions needed for the DFA.
- */
- for (i = state = 0, sp = buf->states.states; i < buf->states.states_used;
- i++, sp++) {
- if (sp->id == state) {
- dfa->nstates++;
- dfa->ntrans += sp->trans_used;
- state++;
- }
- }
-
- /*
- * Allocate enough space for the states and transitions.
- */
- dfa->states = (_ure_dstate_t *) malloc(sizeof(_ure_dstate_t) *
- dfa->nstates);
- dfa->trans = (_ure_trans_t *) malloc(sizeof(_ure_trans_t) * dfa->ntrans);
-
- /*
- * Actually transfer the DFA states from the buffer.
- */
- dsp = dfa->states;
- tp = dfa->trans;
- for (i = state = 0, sp = buf->states.states; i < buf->states.states_used;
- i++, sp++) {
- if (sp->id == state) {
- dsp->trans = tp;
- dsp->ntrans = sp->trans_used;
- dsp->accepting = sp->accepting;
-
- /*
- * Add the transitions for the state.
- */
- for (j = 0; j < dsp->ntrans; j++, tp++) {
- tp->symbol = sp->trans[j].lhs;
- tp->next_state = buf->states.states[sp->trans[j].rhs].id;
- }
-
- dsp++;
- state++;
- }
- }
-
- return dfa;
-}
-
-void
-ure_dfa_free(ure_dfa_t dfa)
-{
- ucs2_t i;
-
- if (dfa == 0)
- return;
-
- for (i = 0; i < dfa->nsyms; i++) {
- if ((dfa->syms[i].type == _URE_CCLASS ||
- dfa->syms[i].type == _URE_NCCLASS) &&
- dfa->syms[i].sym.ccl.ranges_size > 0)
- free((char *) dfa->syms[i].sym.ccl.ranges);
- }
- if (dfa->nsyms > 0)
- free((char *) dfa->syms);
-
- if (dfa->nstates > 0)
- free((char *) dfa->states);
- if (dfa->ntrans > 0)
- free((char *) dfa->trans);
- free((char *) dfa);
-}
-
-void
-ure_write_dfa(ure_dfa_t dfa, FILE *out)
-{
- ucs2_t i, j, k, h, l;
- _ure_dstate_t *sp;
- _ure_symtab_t *sym;
- _ure_range_t *rp;
-
- if (dfa == 0 || out == 0)
- return;
-
- /*
- * Write all the different character classes.
- */
- for (i = 0, sym = dfa->syms; i < dfa->nsyms; i++, sym++) {
- if (sym->type == _URE_CCLASS || sym->type == _URE_NCCLASS) {
- fprintf(out, "C%hd = ", sym->id);
- if (sym->sym.ccl.ranges_used > 0) {
- putc('[', out);
- if (sym->type == _URE_NCCLASS)
- putc('^', out);
- }
- if (sym->props != 0) {
- if (sym->type == _URE_NCCLASS)
- fprintf(out, "\\P");
- else
- fprintf(out, "\\p");
- for (k = h = 0; k < 32; k++) {
- if (sym->props & (1 << k)) {
- if (h != 0)
- putc(',', out);
- fprintf(out, "%d", k + 1);
- h = 1;
- }
- }
- }
- /*
- * Dump the ranges.
- */
- for (k = 0, rp = sym->sym.ccl.ranges;
- k < sym->sym.ccl.ranges_used; k++, rp++) {
- /*
- * Check for UTF16 characters.
- */
- if (0x10000 <= rp->min_code &&
- rp->min_code <= 0x10ffff) {
- h = (ucs2_t) (((rp->min_code - 0x10000) >> 10) + 0xd800);
- l = (ucs2_t) (((rp->min_code - 0x10000) & 1023) + 0xdc00);
- fprintf(out, "\\x%04hX\\x%04hX", h, l);
- } else
- fprintf(out, "\\x%04lX",
- (unsigned long)(rp->min_code & 0xffff));
- if (rp->max_code != rp->min_code) {
- putc('-', out);
- if (rp->max_code >= 0x10000 &&
- rp->max_code <= 0x10ffff) {
- h = (ucs2_t) (((rp->max_code - 0x10000) >> 10) + 0xd800);
- l = (ucs2_t) (((rp->max_code - 0x10000) & 1023) + 0xdc00);
- fprintf(out, "\\x%04hX\\x%04hX", h, l);
- } else
- fprintf(out, "\\x%04lX",
- (unsigned long)(rp->max_code & 0xffff));
- }
- }
- if (sym->sym.ccl.ranges_used > 0)
- putc(']', out);
- putc('\n', out);
- }
- }
-
- for (i = 0, sp = dfa->states; i < dfa->nstates; i++, sp++) {
- fprintf(out, "S%hd = ", i);
- if (sp->accepting) {
- fprintf(out, "1 ");
- if (sp->ntrans)
- fprintf(out, "| ");
- }
- for (j = 0; j < sp->ntrans; j++) {
- if (j > 0)
- fprintf(out, "| ");
-
- sym = dfa->syms + sp->trans[j].symbol;
- switch (sym->type) {
- case _URE_CHAR:
- if (0x10000 <= sym->sym.chr && sym->sym.chr <= 0x10ffff) {
- /*
- * Take care of UTF16 characters.
- */
- h = (ucs2_t) (((sym->sym.chr - 0x10000) >> 10) + 0xd800);
- l = (ucs2_t) (((sym->sym.chr - 0x10000) & 1023) + 0xdc00);
- fprintf(out, "\\x%04hX\\x%04hX ", h, l);
- } else
- fprintf(out, "\\x%04lX ",
- (unsigned long)(sym->sym.chr & 0xffff));
- break;
- case _URE_ANY_CHAR:
- fprintf(out, "<any> ");
- break;
- case _URE_BOL_ANCHOR:
- fprintf(out, "<bol-anchor> ");
- break;
- case _URE_EOL_ANCHOR:
- fprintf(out, "<eol-anchor> ");
- break;
- case _URE_CCLASS:
- case _URE_NCCLASS:
- fprintf(out, "[C%hd] ", sym->id);
- break;
- }
- fprintf(out, "S%hd", sp->trans[j].next_state);
- if (j + 1 < sp->ntrans)
- putc(' ', out);
- }
- putc('\n', out);
- }
-}
-
-#define _ure_issep(cc) ((cc) == '\n' || (cc) == '\r' || (cc) == 0x2028 ||\
- (cc) == 0x2029)
-
-int
-ure_exec(ure_dfa_t dfa, int flags, ucs2_t *text, unsigned long textlen,
- unsigned long *match_start, unsigned long *match_end)
-{
- int i, j, matched, found;
- unsigned long ms, me;
- ucs4_t c;
- ucs2_t *sp, *ep, *lp;
- _ure_dstate_t *stp;
- _ure_symtab_t *sym;
- _ure_range_t *rp;
-
- if (dfa == 0 || text == 0)
- return 0;
-
- /*
- * Handle the special case of an empty string matching the "^$" pattern.
- */
- if (textlen == 0 && (dfa->flags & _URE_DFA_BLANKLINE)) {
- *match_start = *match_end = 0;
- return 1;
- }
-
- sp = text;
- ep = sp + textlen;
-
- ms = me = ~0;
-
- stp = dfa->states;
-
- for (found = 0; found == 0 && sp < ep; ) {
- lp = sp;
- c = *sp++;
-
- /*
- * Check to see if this is a high surrogate that should be
- * combined with a following low surrogate.
- */
- if (sp < ep && 0xd800 <= c && c <= 0xdbff &&
- 0xdc00 <= *sp && *sp <= 0xdfff)
- c = 0x10000 + (((c & 0x03ff) << 10) | (*sp++ & 0x03ff));
-
- /*
- * Determine if the character is non-spacing and should be skipped.
- */
- if (_ure_matches_properties(_URE_NONSPACING, c) &&
- (flags & URE_IGNORE_NONSPACING)) {
- sp++;
- continue;
- }
-
- if (dfa->flags & _URE_DFA_CASEFOLD)
- c = _ure_tolower(c);
-
- /*
- * See if one of the transitions matches.
- */
- for (i = 0, matched = 0; matched == 0 && i < stp->ntrans; i++) {
- sym = dfa->syms + stp->trans[i].symbol;
- switch (sym->type) {
- case _URE_ANY_CHAR:
- if ((flags & URE_DOT_MATCHES_SEPARATORS) ||
- !_ure_issep(c))
- matched = 1;
- break;
- case _URE_CHAR:
- if (c == sym->sym.chr)
- matched = 1;
- break;
- case _URE_BOL_ANCHOR:
- if (lp == text) {
- sp = lp;
- matched = 1;
- } else if (_ure_issep(c)) {
- if (c == '\r' && sp < ep && *sp == '\n')
- sp++;
- lp = sp;
- matched = 1;
- }
- break;
- case _URE_EOL_ANCHOR:
- if (_ure_issep(c)) {
- /*
- * Put the pointer back before the separator so the match
- * end position will be correct. This case will also
- * cause the `sp' pointer to be advanced over the current
- * separator once the match end point has been recorded.
- */
- sp = lp;
- matched = 1;
- }
- break;
- case _URE_CCLASS:
- case _URE_NCCLASS:
- if (sym->props != 0)
- matched = _ure_matches_properties(sym->props, c);
- for (j = 0, rp = sym->sym.ccl.ranges;
- j < sym->sym.ccl.ranges_used; j++, rp++) {
- if (rp->min_code <= c && c <= rp->max_code)
- matched = 1;
- }
- if (sym->type == _URE_NCCLASS)
- matched = !matched;
- break;
- }
-
- if (matched) {
- if (ms == ~0UL)
- ms = lp - text;
- else
- me = sp - text;
- stp = dfa->states + stp->trans[i].next_state;
-
- /*
- * If the match was an EOL anchor, adjust the pointer past the
- * separator that caused the match. The correct match
- * position has been recorded already.
- */
- if (sym->type == _URE_EOL_ANCHOR) {
- /*
- * Skip the character that caused the match.
- */
- sp++;
-
- /*
- * Handle the infamous CRLF situation.
- */
- if (sp < ep && c == '\r' && *sp == '\n')
- sp++;
- }
- }
- }
-
- if (matched == 0) {
- if (stp->accepting == 0) {
- /*
- * If the last state was not accepting, then reset
- * and start over.
- */
- stp = dfa->states;
- ms = me = ~0;
- } else
- /*
- * The last state was accepting, so terminate the matching
- * loop to avoid more work.
- */
- found = 1;
- } else if (sp == ep) {
- if (!stp->accepting) {
- /*
- * This ugly hack is to make sure the end-of-line anchors
- * match when the source text hits the end. This is only done
- * if the last subexpression matches.
- */
- for (i = 0; found == 0 && i < stp->ntrans; i++) {
- sym = dfa->syms + stp->trans[i].symbol;
- if (sym->type ==_URE_EOL_ANCHOR) {
- stp = dfa->states + stp->trans[i].next_state;
- if (stp->accepting) {
- me = sp - text;
- found = 1;
- } else
- break;
- }
- }
- } else {
- /*
- * Make sure any conditions that match all the way to the end
- * of the string match.
- */
- found = 1;
- me = sp - text;
- }
- }
- }
-
- if (found == 0)
- ms = me = ~0;
-
- *match_start = ms;
- *match_end = me;
-
- return (ms != ~0UL) ? 1 : 0;
-}
diff --git a/src/lib/krb5/unicode/ure/ure.h b/src/lib/krb5/unicode/ure/ure.h
deleted file mode 100644
index b83c97ed9..000000000
--- a/src/lib/krb5/unicode/ure/ure.h
+++ /dev/null
@@ -1,152 +0,0 @@
-/*
- * Copyright 1998-2008 The OpenLDAP Foundation.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted only as authorized by the OpenLDAP
- * Public License.
- *
- * A copy of this license is available in file LICENSE in the
- * top-level directory of the distribution or, alternatively, at
- * <http://www.OpenLDAP.org/license.html>.
- */
-/* Copyright 1997, 1998, 1999 Computing Research Labs,
- * New Mexico State University
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
- * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
- * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/*
- * This work is part of OpenLDAP Software <http://www.openldap.org/>.
- * $OpenLDAP: pkg/ldap/libraries/liblunicode/ure/ure.h,v 1.15 2008/01/07 23:20:05 kurt Exp $
- * $Id: ure.h,v 1.2 1999/09/21 15:47:44 mleisher Exp $
- */
-
-#ifndef _h_ure
-#define _h_ure
-
-#include "k5-int.h"
-
-#include <stdio.h>
-
-/*
- * Set of character class flags.
- */
-#define _URE_NONSPACING 0x00000001
-#define _URE_COMBINING 0x00000002
-#define _URE_NUMDIGIT 0x00000004
-#define _URE_NUMOTHER 0x00000008
-#define _URE_SPACESEP 0x00000010
-#define _URE_LINESEP 0x00000020
-#define _URE_PARASEP 0x00000040
-#define _URE_CNTRL 0x00000080
-#define _URE_PUA 0x00000100
-
-#define _URE_UPPER 0x00000200
-#define _URE_LOWER 0x00000400
-#define _URE_TITLE 0x00000800
-#define _URE_MODIFIER 0x00001000
-#define _URE_OTHERLETTER 0x00002000
-#define _URE_DASHPUNCT 0x00004000
-#define _URE_OPENPUNCT 0x00008000
-#define _URE_CLOSEPUNCT 0x00010000
-#define _URE_OTHERPUNCT 0x00020000
-#define _URE_MATHSYM 0x00040000
-#define _URE_CURRENCYSYM 0x00080000
-#define _URE_OTHERSYM 0x00100000
-
-#define _URE_LTR 0x00200000
-#define _URE_RTL 0x00400000
-
-#define _URE_EURONUM 0x00800000
-#define _URE_EURONUMSEP 0x01000000
-#define _URE_EURONUMTERM 0x02000000
-#define _URE_ARABNUM 0x04000000
-#define _URE_COMMONSEP 0x08000000
-
-#define _URE_BLOCKSEP 0x10000000
-#define _URE_SEGMENTSEP 0x20000000
-
-#define _URE_WHITESPACE 0x40000000
-#define _URE_OTHERNEUT 0x80000000
-
-/*
- * Error codes.
- */
-#define _URE_OK 0
-#define _URE_UNEXPECTED_EOS -1
-#define _URE_CCLASS_OPEN -2
-#define _URE_UNBALANCED_GROUP -3
-#define _URE_INVALID_PROPERTY -4
-
-/*
- * Options that can be combined for searching.
- */
-#define URE_IGNORE_NONSPACING 0x01
-#define URE_DOT_MATCHES_SEPARATORS 0x02
-
-typedef krb5_ui_4 ucs4_t;
-typedef krb5_ui_2 ucs2_t;
-
-/*
- * Opaque type for memory used when compiling expressions.
- */
-typedef struct _ure_buffer_t *ure_buffer_t;
-
-/*
- * Opaque type for the minimal DFA used when matching.
- */
-typedef struct _ure_dfa_t *ure_dfa_t;
-
-/*************************************************************************
- *
- * API.
- *
- *************************************************************************/
-
-ure_buffer_t ure_buffer_create (void);
-
-void ure_buffer_free (ure_buffer_t buf);
-
-ure_dfa_t
-ure_compile (ucs2_t *re, unsigned long relen,
- int casefold, ure_buffer_t buf);
-
-void ure_dfa_free (ure_dfa_t dfa);
-
-void ure_write_dfa (ure_dfa_t dfa, FILE *out);
-
-int
-ure_exec (ure_dfa_t dfa, int flags, ucs2_t *text,
- unsigned long textlen, unsigned long *match_start,
- unsigned long *match_end);
-
-/*************************************************************************
- *
- * Prototypes for stub functions used for URE. These need to be rewritten to
- * use the Unicode support available on the system.
- *
- *************************************************************************/
-
-ucs4_t _ure_tolower (ucs4_t c);
-
-int
-_ure_matches_properties (unsigned long props, ucs4_t c);
-
-#endif /* _h_ure */
diff --git a/src/lib/krb5/unicode/ure/urestubs.c b/src/lib/krb5/unicode/ure/urestubs.c
deleted file mode 100644
index 0f1795124..000000000
--- a/src/lib/krb5/unicode/ure/urestubs.c
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Copyright 1998-2008 The OpenLDAP Foundation.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted only as authorized by the OpenLDAP
- * Public License.
- *
- * A copy of this license is available in file LICENSE in the
- * top-level directory of the distribution or, alternatively, at
- * <https://www.OpenLDAP.org/license.html>.
- */
-/*
- * Copyright 1997, 1998, 1999 Computing Research Labs,
- * New Mexico State University
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
- * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
- * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/*
- * This work is part of OpenLDAP Software <https://www.openldap.org/>.
- * $OpenLDAP: pkg/ldap/libraries/liblunicode/ure/urestubs.c,v 1.16 2008/01/07 23:20:05 kurt Exp $
- * $Id: urestubs.c,v 1.2 1999/09/21 15:47:44 mleisher Exp $"
- */
-
-#include "k5-int.h"
-
-#include "ure.h"
-
-#include "ucdata.h"
-
-/*
- * This file contains stub routines needed by the URE package to test
- * character properties and other Unicode implementation specific details.
- */
-
-/*
- * This routine should return the lower case equivalent for the character or,
- * if there is no lower case quivalent, the character itself.
- */
-ucs4_t _ure_tolower(ucs4_t c)
-{
- return uctoupper(c);
-}
-
-static struct ucmaskmap {
- unsigned long mask1;
- unsigned long mask2;
-} masks[32] = {
- { UC_MN, 0 }, /* _URE_NONSPACING */
- { UC_MC, 0 }, /* _URE_COMBINING */
- { UC_ND, 0 }, /* _URE_NUMDIGIT */
- { UC_NL|UC_NO, 0 }, /* _URE_NUMOTHER */
- { UC_ZS, 0 }, /* _URE_SPACESEP */
- { UC_ZL, 0 }, /* _URE_LINESEP */
- { UC_ZP, 0 }, /* _URE_PARASEP */
- { UC_CC, 0 }, /* _URE_CNTRL */
- { UC_CO, 0 }, /* _URE_PUA */
-
- { UC_LU, 0 }, /* _URE_UPPER */
- { UC_LL, 0 }, /* _URE_LOWER */
- { UC_LT, 0 }, /* _URE_TITLE */
- { UC_LM, 0 }, /* _URE_MODIFIER */
- { UC_LO, 0 }, /* _URE_OTHERLETTER */
- { UC_PD, 0 }, /* _URE_DASHPUNCT */
- { UC_PS, 0 }, /* _URE_OPENPUNCT */
- { UC_PC, 0 }, /* _URE_CLOSEPUNCT */
- { UC_PO, 0 }, /* _URE_OTHERPUNCT */
- { UC_SM, 0 }, /* _URE_MATHSYM */
- { UC_SC, 0 }, /* _URE_CURRENCYSYM */
- { UC_SO, 0 }, /* _URE_OTHERSYM */
-
- { UC_L, 0 }, /* _URE_LTR */
- { UC_R, 0 }, /* _URE_RTL */
-
- { 0, UC_EN }, /* _URE_EURONUM */
- { 0, UC_ES }, /* _URE_EURONUMSEP */
- { 0, UC_ET }, /* _URE_EURONUMTERM */
- { 0, UC_AN }, /* _URE_ARABNUM */
- { 0, UC_CS }, /* _URE_COMMONSEP */
-
- { 0, UC_B }, /* _URE_BLOCKSEP */
- { 0, UC_S }, /* _URE_SEGMENTSEP */
-
- { 0, UC_WS }, /* _URE_WHITESPACE */
- { 0, UC_ON } /* _URE_OTHERNEUT */
-};
-
-
-/*
- * This routine takes a set of URE character property flags (see ure.h) along
- * with a character and tests to see if the character has one or more of those
- * properties.
- */
-int
-_ure_matches_properties(unsigned long props, ucs4_t c)
-{
- int i;
- unsigned long mask1=0, mask2=0;
-
- for( i=0; i<32; i++ ) {
- if( props & (1 << i) ) {
- mask1 |= masks[i].mask1;
- mask2 |= masks[i].mask2;
- }
- }
-
- return ucisprop( mask1, mask2, c );
-}
diff --git a/src/lib/krb5/unicode/utbm/README b/src/lib/krb5/unicode/utbm/README
deleted file mode 100644
index 8c0212dcf..000000000
--- a/src/lib/krb5/unicode/utbm/README
+++ /dev/null
@@ -1,121 +0,0 @@
-#
-# $Id: README,v 1.1 1999/09/21 15:45:17 mleisher Exp $
-#
-# Copyright 1997, 1998, 1999 Computing Research Labs,
-# New Mexico State University
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-# THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
-# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
-# OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
-# THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-#
-
- Unicode and Boyer-Moore Searching
- Version 0.2
-
-UTBM (Unicode Tuned Boyer-Moore) is a simple package that provides tuned
-Boyer-Moore searches on Unicode UCS2 text (handles high and low surrogates).
-
----------------------------------------------------------------------------
-
-Assumptions:
-
- o Search pattern and text already normalized in some fasion.
-
- o Upper, lower, and title case conversions are one-to-one.
-
- o For conversions between upper, lower, and title case, UCS2 characters
- always convert to other UCS2 characters, and UTF-16 characters always
- convert to other UTF-16 characters.
-
-Flags:
-
- UTBM provides three processing flags:
-
- o UTBM_CASEFOLD - search in a case-insensitive manner.
-
- o UTBM_IGNORE_NONSPACING - ignore non-spacing characters in the pattern and
- the text.
-
- o UTBM_SPACE_COMPRESS - view as a *single space*, sequential groups of
- U+2028, U+2029, '\n', '\r', '\t', and any
- character identified as a space by the Unicode
- support on the platform.
-
- This flag also causes all characters identified
- as control by the Unicode support on the
- platform to be ignored (except for '\n', '\r',
- and '\t').
-
----------------------------------------------------------------------------
-
-Before using UTBM
------------------
-Before UTBM is used, some functions need to be created. The "utbmstub.c" file
-contains stubs that need to be rewritten so they work with the Unicode support
-on the platform on which this package is being used.
-
-Using UTBM
-----------
-
-Sample pseudo-code fragment.
-
- utbm_pattern_t pat;
- ucs2_t *pattern, *text;
- unsigned long patternlen, textlen;
- unsigned long flags, match_start, match_end;
-
- /*
- * Allocate the dynamic storage needed for a search pattern.
- */
- pat = utbm_create_pattern();
-
- /*
- * Set the search flags desired.
- */
- flags = UTBM_CASEFOLD|UTBM_IGNORE_NONSPACING;
-
- /*
- * Compile the search pattern.
- */
- utbm_compile(pattern, patternlen, flags, pat);
-
- /*
- * Find the first occurance of the search pattern in the text.
- */
- if (utbm_exec(pat, text, textlen, &match_start, &match_end))
- printf("MATCH: %ld %ld\n", match_start, match_end);
-
- /*
- * Free the dynamic storage used for the search pattern.
- */
- ure_free_pattern(pat);
-
----------------------------------------------------------------------------
-
-Mark Leisher <mleisher@crl.nmsu.edu>
-2 May 1997
-
-===========================================================================
-
-CHANGES
--------
-
-Version: 0.2
-Date : 21 September 1999
-==========================
- 1. Added copyright stuff and put in CVS.
-
diff --git a/src/lib/krb5/unicode/utbm/utbm.c b/src/lib/krb5/unicode/utbm/utbm.c
deleted file mode 100644
index cc895e56a..000000000
--- a/src/lib/krb5/unicode/utbm/utbm.c
+++ /dev/null
@@ -1,475 +0,0 @@
-/*
- * Copyright 1998-2008 The OpenLDAP Foundation.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted only as authorized by the OpenLDAP
- * Public License.
- *
- * A copy of this license is available in file LICENSE in the
- * top-level directory of the distribution or, alternatively, at
- * <https://www.OpenLDAP.org/license.html>.
- */
-/* Copyright 1997, 1998, 1999 Computing Research Labs,
- * New Mexico State University
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
- * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
- * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/*
- * This work is part of OpenLDAP Software <https://www.openldap.org/>.
- * $OpenLDAP: pkg/ldap/libraries/liblunicode/utbm/utbm.c,v 1.9 2008/01/07 23:20:05 kurt Exp $
- * $Id: utbm.c,v 1.1 1999/09/21 15:45:17 mleisher Exp $
- */
-
-/*
- * Assumptions:
- * 1. Case conversions of UTF-16 characters must also be UTF-16 characters.
- * 2. Case conversions are all one-to-one.
- * 3. Text and pattern have already been normalized in some fashion.
- */
-
-#include <stdlib.h>
-#include <unistd.h>
-#include <string.h>
-#include "utbm.h"
-
-/*
- * Single pattern character.
- */
-typedef struct {
- ucs4_t lc;
- ucs4_t uc;
- ucs4_t tc;
-} _utbm_char_t;
-
-typedef struct {
- _utbm_char_t *ch;
- unsigned long skip;
-} _utbm_skip_t;
-
-typedef struct _utbm_pattern_t {
- unsigned long flags;
-
- _utbm_char_t *pat;
- unsigned long pat_used;
- unsigned long pat_size;
- unsigned long patlen;
-
- _utbm_skip_t *skip;
- unsigned long skip_used;
- unsigned long skip_size;
-
- unsigned long md4;
-} _utbm_pattern_t;
-
-/*************************************************************************
- *
- * Support functions.
- *
- *************************************************************************/
-
-/*
- * Routine to look up the skip value for a character.
- */
-static unsigned long
-_utbm_skip(utbm_pattern_t p, ucs2_t *start, ucs2_t *end)
-{
- unsigned long i;
- ucs4_t c1, c2;
- _utbm_skip_t *sp;
-
- if (start >= end)
- return 0;
-
- c1 = *start;
- c2 = (start + 1 < end) ? *(start + 1) : ~0;
- if (0xd800 <= c1 && c1 <= 0xdbff && 0xdc00 <= c2 && c2 <= 0xdfff)
- c1 = 0x10000 + (((c1 & 0x03ff) << 10) | (c2 & 0x03ff));
-
- for (i = 0, sp = p->skip; i < p->skip_used; i++, sp++) {
- if (!((c1 ^ sp->ch->uc) & (c1 ^ sp->ch->lc) & (c1 ^ sp->ch->tc))) {
- return ((unsigned long) (end - start) < sp->skip) ?
- end - start : sp->skip;
- }
- }
- return p->patlen;
-}
-
-static int
-_utbm_match(utbm_pattern_t pat, ucs2_t *text, ucs2_t *start, ucs2_t *end,
- unsigned long *match_start, unsigned long *match_end)
-{
- int check_space;
- ucs4_t c1, c2;
- unsigned long count;
- _utbm_char_t *cp;
-
- /*
- * Set the potential match endpoint first.
- */
- *match_end = (start - text) + 1;
-
- c1 = *start;
- c2 = (start + 1 < end) ? *(start + 1) : ~0;
- if (0xd800 <= c1 && c1 <= 0xdbff && 0xdc00 <= c2 && c2 <= 0xdfff) {
- c1 = 0x10000 + (((c1 & 0x03ff) << 10) | (c2 & 0x03ff));
- /*
- * Adjust the match end point to occur after the UTF-16 character.
- */
- *match_end = *match_end + 1;
- }
-
- if (pat->pat_used == 1) {
- *match_start = start - text;
- return 1;
- }
-
- /*
- * Compare backward.
- */
- cp = pat->pat + (pat->pat_used - 1);
-
- for (count = pat->patlen; start > text && count > 0;) {
- /*
- * Ignore non-spacing characters if indicated.
- */
- if (pat->flags & UTBM_IGNORE_NONSPACING) {
- while (start > text && _utbm_nonspacing(c1)) {
- c2 = *--start;
- c1 = (start - 1 > text) ? *(start - 1) : ~0;
- if (0xdc00 <= c2 && c2 <= 0xdfff &&
- 0xd800 <= c1 && c1 <= 0xdbff) {
- c1 = 0x10000 + (((c1 & 0x03ff) << 10) | (c2 & 0x03ff));
- start--;
- } else
- c1 = c2;
- }
- }
-
- /*
- * Handle space compression if indicated.
- */
- if (pat->flags & UTBM_SPACE_COMPRESS) {
- check_space = 0;
- while (start > text &&
- (_utbm_isspace(c1, 1) || _utbm_iscntrl(c1))) {
- check_space = _utbm_isspace(c1, 1);
- c2 = *--start;
- c1 = (start - 1 > text) ? *(start - 1) : ~0;
- if (0xdc00 <= c2 && c2 <= 0xdfff &&
- 0xd800 <= c1 && c1 <= 0xdbff) {
- c1 = 0x10000 + (((c1 & 0x03ff) << 10) | (c2 & 0x03ff));
- start--;
- } else
- c1 = c2;
- }
- /*
- * Handle things if space compression was indicated and one or
- * more member characters were found.
- */
- if (check_space) {
- if (cp->uc != ' ')
- return 0;
- cp--;
- count--;
- }
- }
-
- /*
- * Handle the normal comparison cases.
- */
- if (count > 0 && ((c1 ^ cp->uc) & (c1 ^ cp->lc) & (c1 ^ cp->tc)))
- return 0;
-
- count -= (c1 >= 0x10000) ? 2 : 1;
- if (count > 0) {
- cp--;
-
- /*
- * Get the next preceding character.
- */
- if (start > text) {
- c2 = *--start;
- c1 = (start - 1 > text) ? *(start - 1) : ~0;
- if (0xdc00 <= c2 && c2 <= 0xdfff &&
- 0xd800 <= c1 && c1 <= 0xdbff) {
- c1 = 0x10000 + (((c1 & 0x03ff) << 10) | (c2 & 0x03ff));
- start--;
- } else
- c1 = c2;
- }
- }
- }
-
- /*
- * Set the match start position.
- */
- *match_start = start - text;
- return 1;
-}
-
-/*************************************************************************
- *
- * API.
- *
- *************************************************************************/
-
-utbm_pattern_t
-utbm_create_pattern(void)
-{
- utbm_pattern_t p;
-
- p = (utbm_pattern_t) malloc(sizeof(_utbm_pattern_t));
- (void) memset((char *) p, '\0', sizeof(_utbm_pattern_t));
- return p;
-}
-
-void
-utbm_free_pattern(utbm_pattern_t pattern)
-{
- if (pattern == 0)
- return;
-
- if (pattern->pat_size > 0)
- free((char *) pattern->pat);
-
- if (pattern->skip_size > 0)
- free((char *) pattern->skip);
-
- free((char *) pattern);
-}
-
-void
-utbm_compile(ucs2_t *pat, unsigned long patlen, unsigned long flags,
- utbm_pattern_t p)
-{
- int have_space;
- unsigned long i, j, k, slen;
- _utbm_char_t *cp;
- _utbm_skip_t *sp;
- ucs4_t c1, c2, sentinel;
-
- if (p == 0 || pat == 0 || *pat == 0 || patlen == 0)
- return;
-
- /*
- * Reset the pattern buffer.
- */
- p->patlen = p->pat_used = p->skip_used = 0;
-
- /*
- * Set the flags.
- */
- p->flags = flags;
-
- /*
- * Initialize the extra skip flag.
- */
- p->md4 = 1;
-
- /*
- * Allocate more storage if necessary.
- */
- if (patlen > p->pat_size) {
- if (p->pat_size == 0) {
- p->pat = (_utbm_char_t *) malloc(sizeof(_utbm_char_t) * patlen);
- p->skip = (_utbm_skip_t *) malloc(sizeof(_utbm_skip_t) * patlen);
- } else {
- p->pat = (_utbm_char_t *)
- realloc((char *) p->pat, sizeof(_utbm_char_t) * patlen);
- p->skip = (_utbm_skip_t *)
- realloc((char *) p->skip, sizeof(_utbm_skip_t) * patlen);
- }
- p->pat_size = p->skip_size = patlen;
- }
-
- /*
- * Preprocess the pattern to remove controls (if specified) and determine
- * case.
- */
- for (have_space = 0, cp = p->pat, i = 0; i < patlen; i++) {
- c1 = pat[i];
- c2 = (i + 1 < patlen) ? pat[i + 1] : ~0;
- if (0xd800 <= c1 && c1 <= 0xdbff && 0xdc00 <= c2 && c2 <= 0xdfff)
- c1 = 0x10000 + (((c1 & 0x03ff) << 10) | (c2 & 0x03ff));
-
- /*
- * Make sure the `have_space' flag is turned off if the character
- * is not an appropriate one.
- */
- if (!_utbm_isspace(c1, flags & UTBM_SPACE_COMPRESS))
- have_space = 0;
-
- /*
- * If non-spacing characters should be ignored, do it here.
- */
- if ((flags & UTBM_IGNORE_NONSPACING) && _utbm_nonspacing(c1))
- continue;
-
- /*
- * Check if spaces and controls need to be compressed.
- */
- if (flags & UTBM_SPACE_COMPRESS) {
- if (_utbm_isspace(c1, 1)) {
- if (!have_space) {
- /*
- * Add a space and set the flag.
- */
- cp->uc = cp->lc = cp->tc = ' ';
- cp++;
-
- /*
- * Increase the real pattern length.
- */
- p->patlen++;
- sentinel = ' ';
- have_space = 1;
- }
- continue;
- }
-
- /*
- * Ignore all control characters.
- */
- if (_utbm_iscntrl(c1))
- continue;
- }
-
- /*
- * Add the character.
- */
- if (flags & UTBM_CASEFOLD) {
- cp->uc = _utbm_toupper(c1);
- cp->lc = _utbm_tolower(c1);
- cp->tc = _utbm_totitle(c1);
- } else
- cp->uc = cp->lc = cp->tc = c1;
-
- /*
- * Set the sentinel character.
- */
- sentinel = cp->uc;
-
- /*
- * Move to the next character.
- */
- cp++;
-
- /*
- * Increase the real pattern length appropriately.
- */
- p->patlen += (c1 >= 0x10000) ? 2 : 1;
-
- /*
- * Increment the loop index for UTF-16 characters.
- */
- i += (c1 >= 0x10000) ? 1 : 0;
-
- }
-
- /*
- * Set the number of characters actually used.
- */
- p->pat_used = cp - p->pat;
-
- /*
- * Go through and construct the skip array and determine the actual length
- * of the pattern in UCS2 terms.
- */
- slen = p->patlen - 1;
- cp = p->pat;
- for (i = k = 0; i < p->pat_used; i++, cp++) {
- /*
- * Locate the character in the skip array.
- */
- for (sp = p->skip, j = 0;
- j < p->skip_used && sp->ch->uc != cp->uc; j++, sp++) ;
-
- /*
- * If the character is not found, set the new skip element and
- * increase the number of skip elements.
- */
- if (j == p->skip_used) {
- sp->ch = cp;
- p->skip_used++;
- }
-
- /*
- * Set the updated skip value. If the character is UTF-16 and is
- * not the last one in the pattern, add one to its skip value.
- */
- sp->skip = slen - k;
- if (cp->uc >= 0x10000 && k + 2 < slen)
- sp->skip++;
-
- /*
- * Set the new extra skip for the sentinel character.
- */
- if (((cp->uc >= 0x10000 && k + 2 <= slen) || k + 1 <= slen) &&
- cp->uc == sentinel)
- p->md4 = slen - k;
-
- /*
- * Increase the actual index.
- */
- k += (cp->uc >= 0x10000) ? 2 : 1;
- }
-}
-
-int
-utbm_exec(utbm_pattern_t pat, ucs2_t *text, unsigned long textlen,
- unsigned long *match_start, unsigned long *match_end)
-{
- unsigned long k;
- ucs2_t *start, *end;
-
- if (pat == 0 || pat->pat_used == 0 || text == 0 || textlen == 0 ||
- textlen < pat->patlen)
- return 0;
-
- start = text + pat->patlen;
- end = text + textlen;
-
- /*
- * Adjust the start point if it points to a low surrogate.
- */
- if (0xdc00 <= *start && *start <= 0xdfff &&
- 0xd800 <= *(start - 1) && *(start - 1) <= 0xdbff)
- start--;
-
- while (start < end) {
- while ((k = _utbm_skip(pat, start, end))) {
- start += k;
- if (start < end && 0xdc00 <= *start && *start <= 0xdfff &&
- 0xd800 <= *(start - 1) && *(start - 1) <= 0xdbff)
- start--;
- }
-
- if (start < end &&
- _utbm_match(pat, text, start, end, match_start, match_end))
- return 1;
-
- start += pat->md4;
- if (start < end && 0xdc00 <= *start && *start <= 0xdfff &&
- 0xd800 <= *(start - 1) && *(start - 1) <= 0xdbff)
- start--;
- }
- return 0;
-}
diff --git a/src/lib/krb5/unicode/utbm/utbm.h b/src/lib/krb5/unicode/utbm/utbm.h
deleted file mode 100644
index 1ab8b91cf..000000000
--- a/src/lib/krb5/unicode/utbm/utbm.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Copyright 1998-2008 The OpenLDAP Foundation.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted only as authorized by the OpenLDAP
- * Public License.
- *
- * A copy of this license is available in file LICENSE in the
- * top-level directory of the distribution or, alternatively, at
- * <https://www.OpenLDAP.org/license.html>.
- */
-/* Copyright 1997, 1998, 1999 Computing Research Labs,
- * New Mexico State University
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
- * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
- * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/*
- * This work is part of OpenLDAP Software <https://www.openldap.org/>.
- * $OpenLDAP: pkg/ldap/libraries/liblunicode/utbm/utbm.h,v 1.10 2008/01/07 23:20:05 kurt Exp $
- * $Id: utbm.h,v 1.1 1999/09/21 15:45:18 mleisher Exp $
- */
-
-#ifndef _h_utbm
-#define _h_utbm
-
-#include "k5-int.h"
-
-/*************************************************************************
- *
- * Types.
- *
- *************************************************************************/
-
-/*
- * Fundamental character types.
- */
-typedef krb5_ui_4 ucs4_t;
-typedef krb5_ui_2 ucs2_t;
-
-/*
- * An opaque type used for the search pattern.
- */
-typedef struct _utbm_pattern_t *utbm_pattern_t;
-
-/*************************************************************************
- *
- * Flags.
- *
- *************************************************************************/
-
-#define UTBM_CASEFOLD 0x01
-#define UTBM_IGNORE_NONSPACING 0x02
-#define UTBM_SPACE_COMPRESS 0x04
-
-/*************************************************************************
- *
- * API.
- *
- *************************************************************************/
-
-utbm_pattern_t utbm_create_pattern (void);
-
-void utbm_free_pattern (utbm_pattern_t pattern);
-
-void
-utbm_compile (ucs2_t *pat, unsigned long patlen,
- unsigned long flags, utbm_pattern_t pattern);
-
-int
-utbm_exec (utbm_pattern_t pat, ucs2_t *text,
- unsigned long textlen, unsigned long *match_start,
- unsigned long *match_end);
-
-/*************************************************************************
- *
- * Prototypes for the stub functions needed.
- *
- *************************************************************************/
-
-int _utbm_isspace (ucs4_t c, int compress);
-
-int _utbm_iscntrl (ucs4_t c);
-
-int _utbm_nonspacing (ucs4_t c);
-
-ucs4_t _utbm_tolower (ucs4_t c);
-
-ucs4_t _utbm_toupper (ucs4_t c);
-
-ucs4_t _utbm_totitle (ucs4_t c);
-
-#endif /* _h_utbm */
diff --git a/src/lib/krb5/unicode/utbm/utbmstub.c b/src/lib/krb5/unicode/utbm/utbmstub.c
deleted file mode 100644
index 9a6f60a6f..000000000
--- a/src/lib/krb5/unicode/utbm/utbmstub.c
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright 1998-2008 The OpenLDAP Foundation.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted only as authorized by the OpenLDAP
- * Public License.
- *
- * A copy of this license is available in file LICENSE in the
- * top-level directory of the distribution or, alternatively, at
- * <https://www.OpenLDAP.org/license.html>.
- */
-/* Copyright 1997, 1998, 1999 Computing Research Labs,
- * New Mexico State University
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
- * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
- * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
- * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/*
- * This work is part of OpenLDAP Software <https://www.openldap.org/>.
- * $OpenLDAP: pkg/ldap/libraries/liblunicode/utbm/utbmstub.c,v 1.8 2008/01/07 23:20:05 kurt Exp $
- * $Id: utbmstub.c,v 1.1 1999/09/21 15:45:18 mleisher Exp $
- */
-
-#include "utbm.h"
-
-/*
- * This should be redefined to use the `isspace' function available in the
- * Unicode support on the platform where this is being used.
- */
-#define _platform_isspace(x) 0
-
-/*
- * Return non-zero for any character that should be considered the equivalent
- * of a space character. Return zero otherwise.
- */
-int
-_utbm_isspace(ucs4_t c, int compress)
-{
- if (compress)
- return (c == 0x09 || c == 0x0a || c == 0x0d ||
- c == 0x2028 || c == 0x2029 || _platform_isspace(c)) ? 1 : 0;
-
- return _platform_isspace(c);
-
-}
-
-/*
- * Return non-zero if the character is a control character, or zero otherwise.
- */
-int
-_utbm_iscntrl(ucs4_t c)
-{
- return 0;
-}
-
-/*
- * Return non-zero if the character is a non-spacing character, or zero
- * otherwise.
- */
-int
-_utbm_nonspacing(ucs4_t c)
-{
- return 0;
-}
-
-/*
- * Convert a character to lower case.
- */
-ucs4_t
-_utbm_tolower(ucs4_t c)
-{
- return c;
-}
-
-/*
- * Convert a character to upper case.
- */
-ucs4_t
-_utbm_toupper(ucs4_t c)
-{
- return c;
-}
-
-/*
- * Convert a character to title case.
- */
-ucs4_t
-_utbm_totitle(ucs4_t c)
-{
- return c;
-}
diff --git a/src/util/support/libkrb5support-fixed.exports b/src/util/support/libkrb5support-fixed.exports
index df3c78f9e..0bafe1c84 100644
--- a/src/util/support/libkrb5support-fixed.exports
+++ b/src/util/support/libkrb5support-fixed.exports
@@ -95,5 +95,4 @@ krb5int_ucs4_to_utf8
krb5int_utf8_to_ucs4
krb5int_utf8_lentab
krb5int_utf8_mintab
-krb5int_utf8_next
krb5int_zap
diff --git a/src/util/support/t_utf8.c b/src/util/support/t_utf8.c
index 583270165..6493bae3e 100644
--- a/src/util/support/t_utf8.c
+++ b/src/util/support/t_utf8.c
@@ -49,13 +49,13 @@
#endif
/*
- * len is 0 for invalid encoding prefixes (krb5int_utf8_charlen2() partially
+ * len is 0 for invalid encoding prefixes (KRB5_UTF8_CHARLEN2() partially
* enforces the validity of the first two bytes, based on masking the second
* byte. It doesn't check whether bit 6 is 0, though, and doesn't catch the
* range between U+110000 and U+13FFFF).
*
* ucs is 0 for invalid encodings (including ones with valid prefixes according
- * to krb5int_utf8_charlen2(); krb5int_utf8_to_ucs4() will still fail on them
+ * to KRB5_UTF8_CHARLEN2(); krb5int_utf8_to_ucs4() will still fail on them
* because it checks more things.) Code points above U+10FFFF are excluded by
* the actual test code and remain in the table for possibly testing the old
* implementation that didn't exclude them.
@@ -129,7 +129,7 @@ test_decode(struct testcase *t, int high4)
int len, status = 0;
krb5_ucs4 u = 0;
- len = krb5int_utf8_charlen2(t->p);
+ len = KRB5_UTF8_CHARLEN2(t->p, len);
if (len != t->len) {
printf("expected len=%d, got len=%d\n", t->len, len);
status = 1;
diff --git a/src/util/support/utf8.c b/src/util/support/utf8.c
index dfbf12baa..08bdcf9a3 100644
--- a/src/util/support/utf8.c
+++ b/src/util/support/utf8.c
@@ -53,50 +53,6 @@
#include "supp-int.h"
/*
- * return the number of bytes required to hold the
- * NULL-terminated UTF-8 string NOT INCLUDING the
- * termination.
- */
-size_t krb5int_utf8_bytes(const char *p)
-{
- size_t bytes;
-
- for (bytes = 0; p[bytes]; bytes++)
- ;
-
- return bytes;
-}
-
-size_t krb5int_utf8_chars(const char *p)
-{
- /* could be optimized and could check for invalid sequences */
- size_t chars = 0;
-
- for ( ; *p ; KRB5_UTF8_INCR(p))
- chars++;
-
- return chars;
-}
-
-size_t krb5int_utf8c_chars(const char *p, size_t length)
-{
- /* could be optimized and could check for invalid sequences */
- size_t chars = 0;
- const char *end = p + length;
-
- for ( ; p < end; KRB5_UTF8_INCR(p))
- chars++;
-
- return chars;
-}
-
-/* return offset to next character */
-int krb5int_utf8_offset(const char *p)
-{
- return KRB5_UTF8_NEXT(p) - p;
-}
-
-/*
* Returns length indicated by first byte.
*/
const char krb5int_utf8_lentab[] = {
@@ -109,14 +65,6 @@ const char krb5int_utf8_lentab[] = {
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
-int krb5int_utf8_charlen(const char *p)
-{
- if (!(*p & 0x80))
- return 1;
-
- return krb5int_utf8_lentab[*(const unsigned char *)p ^ 0x80];
-}
-
/*
* Make sure the UTF-8 char used the shortest possible encoding
* returns charlen if valid, 0 if not.
@@ -147,18 +95,6 @@ c krb5int_utf8_mintab[] = {
(c)0x00, (c)0x00, (c)0x00, (c)0x00, (c)0x00, (c)0x00, (c)0x00, (c)0x00 };
#undef c
-int krb5int_utf8_charlen2(const char *p)
-{
- int i = KRB5_UTF8_CHARLEN(p);
-
- if (i > 2) {
- if (!(krb5int_utf8_mintab[*p & 0x1f] & p[1]))
- i = 0;
- }
-
- return i;
-}
-
/*
* Convert a UTF8 character to a UCS4 character. Return 0 on success,
* -1 on failure.
@@ -194,17 +130,6 @@ int krb5int_utf8_to_ucs4(const char *p, krb5_ucs4 *out)
return 0;
}
-int krb5int_utf8_to_ucs2(const char *p, krb5_ucs2 *out)
-{
- krb5_ucs4 ch;
-
- *out = 0;
- if (krb5int_utf8_to_ucs4(p, &ch) == -1 || ch > 0xFFFF)
- return -1;
- *out = (krb5_ucs2) ch;
- return 0;
-}
-
/* conv UCS-4 to UTF-8 */
size_t krb5int_ucs4_to_utf8(krb5_ucs4 c, char *buf)
{
@@ -241,271 +166,3 @@ size_t krb5int_ucs4_to_utf8(krb5_ucs4 c, char *buf)
return len;
}
-
-size_t krb5int_ucs2_to_utf8(krb5_ucs2 c, char *buf)
-{
- return krb5int_ucs4_to_utf8((krb5_ucs4)c, buf);
-}
-
-/*
- * Advance to the next UTF-8 character
- *
- * Ignores length of multibyte character, instead rely on
- * continuation markers to find start of next character.
- * This allows for "resyncing" of when invalid characters
- * are provided provided the start of the next character
- * is appears within the 6 bytes examined.
- */
-char *krb5int_utf8_next(const char *p)
-{
- int i;
- const unsigned char *u = (const unsigned char *) p;
-
- if (KRB5_UTF8_ISASCII(u)) {
- return (char *) &p[1];
- }
-
- for (i = 1; i < 6; i++) {
- if ((u[i] & 0xc0) != 0x80) {
- return (char *) &p[i];
- }
- }
-
- return (char *) &p[i];
-}
-
-/*
- * Advance to the previous UTF-8 character
- *
- * Ignores length of multibyte character, instead rely on
- * continuation markers to find start of next character.
- * This allows for "resyncing" of when invalid characters
- * are provided provided the start of the next character
- * is appears within the 6 bytes examined.
- */
-char *krb5int_utf8_prev(const char *p)
-{
- int i;
- const unsigned char *u = (const unsigned char *) p;
-
- for (i = -1; i>-6 ; i--) {
- if ((u[i] & 0xc0 ) != 0x80) {
- return (char *) &p[i];
- }
- }
-
- return (char *) &p[i];
-}
-
-/*
- * Copy one UTF-8 character from src to dst returning
- * number of bytes copied.
- *
- * Ignores length of multibyte character, instead rely on
- * continuation markers to find start of next character.
- * This allows for "resyncing" of when invalid characters
- * are provided provided the start of the next character
- * is appears within the 6 bytes examined.
- */
-int krb5int_utf8_copy(char* dst, const char *src)
-{
- int i;
- const unsigned char *u = (const unsigned char *) src;
-
- dst[0] = src[0];
-
- if (KRB5_UTF8_ISASCII(u)) {
- return 1;
- }
-
- for (i=1; i<6; i++) {
- if ((u[i] & 0xc0) != 0x80) {
- return i;
- }
- dst[i] = src[i];
- }
-
- return i;
-}
-
-#ifndef UTF8_ALPHA_CTYPE
-/*
- * UTF-8 ctype routines
- * Only deals with characters < 0x80 (ie: US-ASCII)
- */
-
-int krb5int_utf8_isascii(const char * p)
-{
- unsigned c = * (const unsigned char *) p;
-
- return KRB5_ASCII(c);
-}
-
-int krb5int_utf8_isdigit(const char * p)
-{
- unsigned c = * (const unsigned char *) p;
-
- if (!KRB5_ASCII(c))
- return 0;
-
- return KRB5_DIGIT( c );
-}
-
-int krb5int_utf8_isxdigit(const char * p)
-{
- unsigned c = * (const unsigned char *) p;
-
- if (!KRB5_ASCII(c))
- return 0;
-
- return KRB5_HEX(c);
-}
-
-int krb5int_utf8_isspace(const char * p)
-{
- unsigned c = * (const unsigned char *) p;
-
- if (!KRB5_ASCII(c))
- return 0;
-
- switch(c) {
- case ' ':
- case '\t':
- case '\n':
- case '\r':
- case '\v':
- case '\f':
- return 1;
- }
-
- return 0;
-}
-
-/*
- * These are not needed by the C SDK and are
- * not "good enough" for general use.
- */
-int krb5int_utf8_isalpha(const char * p)
-{
- unsigned c = * (const unsigned char *) p;
-
- if (!KRB5_ASCII(c))
- return 0;
-
- return KRB5_ALPHA(c);
-}
-
-int krb5int_utf8_isalnum(const char * p)
-{
- unsigned c = * (const unsigned char *) p;
-
- if (!KRB5_ASCII(c))
- return 0;
-
- return KRB5_ALNUM(c);
-}
-#endif
-
-
-/*
- * UTF-8 string routines
- */
-
-/* like strchr() */
-char *krb5int_utf8_strchr(const char *str, const char *chr)
-{
- krb5_ucs4 chs, ch;
-
- if (krb5int_utf8_to_ucs4(chr, &ch) == -1)
- return NULL;
- for ( ; *str != '\0'; KRB5_UTF8_INCR(str)) {
- if (krb5int_utf8_to_ucs4(str, &chs) == 0 && chs == ch)
- return (char *)str;
- }
-
- return NULL;
-}
-
-/* like strcspn() but returns number of bytes, not characters */
-size_t krb5int_utf8_strcspn(const char *str, const char *set)
-{
- const char *cstr, *cset;
- krb5_ucs4 chstr, chset;
-
- for (cstr = str; *cstr != '\0'; KRB5_UTF8_INCR(cstr)) {
- for (cset = set; *cset != '\0'; KRB5_UTF8_INCR(cset)) {
- if (krb5int_utf8_to_ucs4(cstr, &chstr) == 0
- && krb5int_utf8_to_ucs4(cset, &chset) == 0 && chstr == chset)
- return cstr - str;
- }
- }
-
- return cstr - str;
-}
-
-/* like strspn() but returns number of bytes, not characters */
-size_t krb5int_utf8_strspn(const char *str, const char *set)
-{
- const char *cstr, *cset;
- krb5_ucs4 chstr, chset;
-
- for (cstr = str; *cstr != '\0'; KRB5_UTF8_INCR(cstr)) {
- for (cset = set; ; KRB5_UTF8_INCR(cset)) {
- if (*cset == '\0')
- return cstr - str;
- if (krb5int_utf8_to_ucs4(cstr, &chstr) == 0
- && krb5int_utf8_to_ucs4(cset, &chset) == 0 && chstr == chset)
- break;
- }
- }
-
- return cstr - str;
-}
-
-/* like strpbrk(), replaces strchr() as well */
-char *krb5int_utf8_strpbrk(const char *str, const char *set)
-{
- const char *cset;
- krb5_ucs4 chstr, chset;
-
- for ( ; *str != '\0'; KRB5_UTF8_INCR(str)) {
- for (cset = set; *cset != '\0'; KRB5_UTF8_INCR(cset)) {
- if (krb5int_utf8_to_ucs4(str, &chstr) == 0
- && krb5int_utf8_to_ucs4(cset, &chset) == 0 && chstr == chset)
- return (char *)str;
- }
- }
-
- return NULL;
-}
-
-/* like strtok_r(), not strtok() */
-char *krb5int_utf8_strtok(char *str, const char *sep, char **last)
-{
- char *begin;
- char *end;
-
- if (last == NULL)
- return NULL;
-
- begin = str ? str : *last;
-
- begin += krb5int_utf8_strspn(begin, sep);
-
- if (*begin == '\0') {
- *last = NULL;
- return NULL;
- }
-
- end = &begin[krb5int_utf8_strcspn(begin, sep)];
-
- if (*end != '\0') {
- char *next = KRB5_UTF8_NEXT(end);
- *end = '\0';
- end = next;
- }
-
- *last = end;
-
- return begin;
-}