summaryrefslogtreecommitdiff
path: root/strings/ctype-uca-scanner_next.inl
diff options
context:
space:
mode:
Diffstat (limited to 'strings/ctype-uca-scanner_next.inl')
-rw-r--r--strings/ctype-uca-scanner_next.inl74
1 files changed, 38 insertions, 36 deletions
diff --git a/strings/ctype-uca-scanner_next.inl b/strings/ctype-uca-scanner_next.inl
index 79d25487b42..acab31f21ef 100644
--- a/strings/ctype-uca-scanner_next.inl
+++ b/strings/ctype-uca-scanner_next.inl
@@ -1,5 +1,5 @@
/* Copyright (c) 2004, 2013, Oracle and/or its affiliates.
- Copyright (c) 2009, 2021, MariaDB
+ Copyright (c) 2009, 2021, MariaDB
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public
@@ -55,13 +55,8 @@ MY_FUNCTION_NAME(scanner_next)(my_uca_scanner *scanner)
#else
#define LOCAL_MAX_CONTRACTION_LENGTH MY_UCA_MAX_CONTRACTION
#endif
- /*
- Check if the weights for the previous character have been
- already fully scanned. If yes, then get the next character and
- initialize wbeg and wlength to its weight string.
- */
-
- if (scanner->wbeg[0])
+ uint16 weight= my_uca_scanner_next_expansion_weight(scanner);
+ if (weight)
{
/*
More weights left from the previous step.
@@ -69,7 +64,7 @@ MY_FUNCTION_NAME(scanner_next)(my_uca_scanner *scanner)
Return "0" as "nchars". The real nchars was set on a previous
iteration.
*/
- SCANNER_NEXT_RETURN(*scanner->wbeg++, 0);
+ SCANNER_NEXT_RETURN(weight, 0);
}
#ifdef SCANNER_NEXT_NCHARS
@@ -79,39 +74,44 @@ MY_FUNCTION_NAME(scanner_next)(my_uca_scanner *scanner)
#endif
{
const uint16 *wpage;
- my_wc_t wc[MY_UCA_MAX_CONTRACTION];
int mblen;
+ my_wc_t currwc= 0;
+ const uint16 *cweight;
/* Get next character */
#if MY_UCA_ASCII_OPTIMIZE
/* Get next ASCII character */
if (scanner->sbeg < scanner->send && scanner->sbeg[0] < 0x80)
{
- wc[0]= scanner->sbeg[0];
+ currwc= scanner->sbeg[0];
scanner->sbeg+= 1;
#if MY_UCA_COMPILE_CONTRACTIONS
- if (my_uca_needs_context_handling(scanner->level, wc[0]))
+ if (my_uca_needs_context_handling(scanner->level, currwc))
{
- const MY_CONTRACTION *cnt= my_uca_context_weight_find(scanner, wc,
+ const MY_CONTRACTION *cnt= my_uca_context_weight_find(scanner, currwc,
LOCAL_MAX_CONTRACTION_LENGTH);
if (cnt)
- SCANNER_NEXT_RETURN_CONTRACTION(cnt, ignorable_nchars);
+ {
+ if ((weight= my_uca_scanner_set_weight(scanner, cnt->weight)))
+ SCANNER_NEXT_RETURN_CONTRACTION(cnt, ignorable_nchars);
+ continue; /* Ignorable contraction */
+ }
}
#endif
scanner->page= 0;
- scanner->code= (int) wc[0];
- scanner->wbeg= scanner->level->weights[0] + scanner->code * scanner->level->lengths[0];
- if (scanner->wbeg[0])
- SCANNER_NEXT_RETURN(*scanner->wbeg++, ignorable_nchars + 1);
- continue;
+ scanner->code= (int) currwc;
+ cweight= scanner->level->weights[0] + scanner->code * scanner->level->lengths[0];
+ if ((weight= my_uca_scanner_set_weight(scanner, cweight)))
+ SCANNER_NEXT_RETURN(weight, ignorable_nchars + 1);
+ continue; /* Ignorable character */
}
else
#endif
/* Get next MB character */
- if (((mblen= MY_MB_WC(scanner, wc, scanner->sbeg,
- scanner->send)) <= 0))
+ if (((mblen= MY_MB_WC(scanner, &currwc, scanner->sbeg,
+ scanner->send)) <= 0))
{
if (scanner->sbeg >= scanner->send)
{
@@ -136,26 +136,29 @@ MY_FUNCTION_NAME(scanner_next)(my_uca_scanner *scanner)
}
scanner->sbeg+= mblen;
- if (wc[0] > scanner->level->maxchar)
+ if (currwc > scanner->level->maxchar)
{
- /* Return 0xFFFD as weight for all characters outside BMP */
- scanner->wbeg= nochar;
- SCANNER_NEXT_RETURN(0xFFFD, ignorable_nchars + 1);
+ SCANNER_NEXT_RETURN(my_uca_scanner_set_weight_outside_maxchar(scanner),
+ ignorable_nchars + 1);
}
#if MY_UCA_COMPILE_CONTRACTIONS
- if (my_uca_needs_context_handling(scanner->level, wc[0]))
+ if (my_uca_needs_context_handling(scanner->level, currwc))
{
- const MY_CONTRACTION *cnt= my_uca_context_weight_find(scanner, wc,
+ const MY_CONTRACTION *cnt= my_uca_context_weight_find(scanner, currwc,
LOCAL_MAX_CONTRACTION_LENGTH);
if (cnt)
- SCANNER_NEXT_RETURN_CONTRACTION(cnt, ignorable_nchars);
+ {
+ if ((weight= my_uca_scanner_set_weight(scanner, cnt->weight)))
+ SCANNER_NEXT_RETURN_CONTRACTION(cnt, ignorable_nchars);
+ continue; /* Ignorable contraction */
+ }
}
#endif
/* Process single character */
- scanner->page= wc[0] >> 8;
- scanner->code= wc[0] & 0xFF;
+ scanner->page= currwc >> 8;
+ scanner->code= currwc & 0xFF;
/* If weight page for w[0] does not exist, then calculate algoritmically */
if (!(wpage= scanner->level->weights[scanner->page]))
@@ -163,14 +166,13 @@ MY_FUNCTION_NAME(scanner_next)(my_uca_scanner *scanner)
ignorable_nchars + 1);
/* Calculate pointer to w[0]'s weight, using page and offset */
- scanner->wbeg= wpage +
- scanner->code * scanner->level->lengths[scanner->page];
- if (scanner->wbeg[0])
- break;
- /* Skip ignorable character and continue the loop */
+ cweight= wpage + scanner->code * scanner->level->lengths[scanner->page];
+ if ((weight= my_uca_scanner_set_weight(scanner, cweight)))
+ SCANNER_NEXT_RETURN(weight, ignorable_nchars + 1);
+ continue; /* Ignorable character */
}
- SCANNER_NEXT_RETURN(*scanner->wbeg++, ignorable_nchars + 1);
+ SCANNER_NEXT_RETURN(0, 0); /* Not reachable */
}
#undef SCANNER_NEXT_NCHARS