summaryrefslogtreecommitdiff
path: root/regex/regcomp.c
diff options
context:
space:
mode:
authorunknown <bar@gw.udmsearch.izhnet.ru>2002-03-06 20:04:13 +0400
committerunknown <bar@gw.udmsearch.izhnet.ru>2002-03-06 20:04:13 +0400
commit654db69b8247a7e6be4e039a558f635af9abae57 (patch)
treec6287ade2a44504c39beaf6f8b3da5437184fae4 /regex/regcomp.c
parent325c22a784e54c0c26271d55896b60538c211724 (diff)
downloadmariadb-git-654db69b8247a7e6be4e039a558f635af9abae57.tar.gz
Regex library is switched to use new ctype tools
to allow usage of many character sets at a time. include/m_ctype.h: Added condition to simplify migrating from old ctype Added new style toupper, tolower which accepts charset in first argument regex/debug.c: Added charset argument regex/debug.ih: added charset argument regex/engine.c: added charset argument regex/engine.ih: added charset arguent regex/main.c: added charset argument regex/regcomp.c: added CHARSET_INFO field regex/regcomp.ih: Added charset argument regex/regex.h: Added #include <m_ctype.h> for CHARSET_INFO Added charset argument for regcomp() regex/regex2.h: New charset argument for ISWORD() regex/regexec.c: New charset argument regex/reginit.c: Move to new style ctype. However still needs fixes: instead of single static cclass variable, each charset must have it's own variable. sql/item_cmpfunc.cc: Pass charset field into regcomp() This will be fixed tommorow to use String->charset instead of default_charset_info
Diffstat (limited to 'regex/regcomp.c')
-rw-r--r--regex/regcomp.c42
1 files changed, 25 insertions, 17 deletions
diff --git a/regex/regcomp.c b/regex/regcomp.c
index 6f8221a706d..8a4ebbdfe17 100644
--- a/regex/regcomp.c
+++ b/regex/regcomp.c
@@ -28,6 +28,7 @@ struct parse {
# define NPAREN 10 /* we need to remember () 1-9 for back refs */
sopno pbegin[NPAREN]; /* -> ( ([0] unused) */
sopno pend[NPAREN]; /* -> ) ([0] unused) */
+ CHARSET_INFO *charset; /* for ctype things */
};
#include "regcomp.ih"
@@ -99,10 +100,11 @@ static int never = 0; /* for use in asserts; shuts lint up */
= #define REG_DUMP 0200
*/
int /* 0 success, otherwise REG_something */
-regcomp(preg, pattern, cflags)
+regcomp(preg, pattern, cflags, charset)
regex_t *preg;
const char *pattern;
int cflags;
+CHARSET_INFO *charset;
{
struct parse pa;
register struct re_guts *g;
@@ -116,6 +118,7 @@ int cflags;
#endif
regex_init(); /* Init cclass if neaded */
+ preg->charset=charset;
cflags = GOODFLAGS(cflags);
if ((cflags&REG_EXTENDED) && (cflags&REG_NOSPEC))
return(REG_INVARG);
@@ -146,6 +149,7 @@ int cflags;
p->end = p->next + len;
p->error = 0;
p->ncsalloc = 0;
+ p->charset = preg->charset;
for (i = 0; i < NPAREN; i++) {
p->pbegin[i] = 0;
p->pend[i] = 0;
@@ -327,7 +331,7 @@ register struct parse *p;
ordinary(p, c);
break;
case '{': /* okay as ordinary except if digit follows */
- if(REQUIRE(!MORE() || !isdigit(PEEK()), REG_BADRPT)) {}
+ if(REQUIRE(!MORE() || !my_isdigit(p->charset,PEEK()), REG_BADRPT)) {}
/* FALLTHROUGH */
default:
ordinary(p, c);
@@ -339,7 +343,8 @@ register struct parse *p;
c = PEEK();
/* we call { a repetition if followed by a digit */
if (!( c == '*' || c == '+' || c == '?' ||
- (c == '{' && MORE2() && isdigit(PEEK2())) ))
+ (c == '{' && MORE2() &&
+ my_isdigit(p->charset,PEEK2())) ))
return; /* no repetition, we're done */
NEXT();
@@ -368,7 +373,7 @@ register struct parse *p;
case '{':
count = p_count(p);
if (EAT(',')) {
- if (isdigit(PEEK())) {
+ if (my_isdigit(p->charset,PEEK())) {
count2 = p_count(p);
if(REQUIRE(count <= count2, REG_BADBR)) {}
} else /* single number with comma */
@@ -389,7 +394,8 @@ register struct parse *p;
return;
c = PEEK();
if (!( c == '*' || c == '+' || c == '?' ||
- (c == '{' && MORE2() && isdigit(PEEK2())) ) )
+ (c == '{' && MORE2() &&
+ my_isdigit(p->charset,PEEK2())) ) )
return;
SETERROR(REG_BADRPT);
}
@@ -546,7 +552,7 @@ int starordinary; /* is a leading * an ordinary character? */
} else if (EATTWO('\\', '{')) {
count = p_count(p);
if (EAT(',')) {
- if (MORE() && isdigit(PEEK())) {
+ if (MORE() && my_isdigit(p->charset,PEEK())) {
count2 = p_count(p);
if(REQUIRE(count <= count2, REG_BADBR)) {}
} else /* single number with comma */
@@ -577,7 +583,7 @@ register struct parse *p;
register int count = 0;
register int ndigits = 0;
- while (MORE() && isdigit(PEEK()) && count <= DUPMAX) {
+ while (MORE() && my_isdigit(p->charset,PEEK()) && count <= DUPMAX) {
count = count*10 + (GETNEXT() - '0');
ndigits++;
}
@@ -632,8 +638,8 @@ register struct parse *p;
register int ci;
for (i = p->g->csetsize - 1; i >= 0; i--)
- if (CHIN(cs, i) && isalpha(i)) {
- ci = othercase(i);
+ if (CHIN(cs, i) && my_isalpha(p->charset,i)) {
+ ci = othercase(p->charset,i);
if (ci != i)
CHadd(cs, ci);
}
@@ -744,7 +750,7 @@ register cset *cs;
register char *u;
register char c;
- while (MORE() && isalpha(PEEK()))
+ while (MORE() && my_isalpha(p->charset,PEEK()))
NEXT();
len = p->next - sp;
for (cp = cclasses; cp->name != NULL; cp++)
@@ -837,14 +843,15 @@ int endc; /* name ended by endc,']' */
== static char othercase(int ch);
*/
static char /* if no counterpart, return ch */
-othercase(ch)
+othercase(charset,ch)
+CHARSET_INFO *charset;
int ch;
{
- assert(isalpha(ch));
- if (isupper(ch))
- return(tolower(ch));
- else if (islower(ch))
- return(toupper(ch));
+ assert(my_isalpha(charset,ch));
+ if (my_isupper(charset,ch))
+ return(my_tolower(charset,ch));
+ else if (my_islower(charset,ch))
+ return(my_toupper(charset,ch));
else /* peculiar, but could happen */
return(ch);
}
@@ -887,7 +894,8 @@ register int ch;
{
register cat_t *cap = p->g->categories;
- if ((p->g->cflags&REG_ICASE) && isalpha(ch) && othercase(ch) != ch)
+ if ((p->g->cflags&REG_ICASE) && my_isalpha(p->charset,ch) &&
+ othercase(p->charset,ch) != ch)
bothcases(p, ch);
else {
EMIT(OCHAR, (unsigned char)ch);