summaryrefslogtreecommitdiff
path: root/regcomp.c
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2012-03-19 15:38:06 -0600
committerKarl Williamson <public@khwilliamson.com>2012-03-19 18:23:44 -0600
commit4b88fb76efce8c436e63b907c9842345d4fa77c7 (patch)
tree67d8be3146bf0c32e93bd8209c141ed72c5a0ae2 /regcomp.c
parent27d6c58a7e12243bef66c58b38e7d1415d9ca07e (diff)
downloadperl-4b88fb76efce8c436e63b907c9842345d4fa77c7.tar.gz
Use the new utf8 to code point functions
These functions should be used in preference to the old ones which can read beyond the end of the input string.
Diffstat (limited to 'regcomp.c')
-rw-r--r--regcomp.c11
1 files changed, 7 insertions, 4 deletions
diff --git a/regcomp.c b/regcomp.c
index e3da6e9351..8c287bfba9 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -3480,8 +3480,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
UV uc;
if (UTF) {
const U8 * const s = (U8*)STRING(scan);
+ uc = utf8_to_uvchr_buf(s, s + l, NULL);
l = utf8_length(s, s + l);
- uc = utf8_to_uvchr(s, NULL);
} else {
uc = *((U8*)STRING(scan));
}
@@ -3575,8 +3575,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
}
if (UTF) {
const U8 * const s = (U8 *)STRING(scan);
+ uc = utf8_to_uvchr_buf(s, s + l, NULL);
l = utf8_length(s, s + l);
- uc = utf8_to_uvchr(s, NULL);
}
else if (has_exactf_sharp_s) {
RExC_seen |= REG_SEEN_EXACTF_SHARP_S;
@@ -9822,7 +9822,10 @@ tryagain:
for (foldbuf = tmpbuf;
foldlen;
foldlen -= numlen) {
- ender = utf8_to_uvchr(foldbuf, &numlen);
+
+ /* tmpbuf has been constructed by us, so we
+ * know it is valid utf8 */
+ ender = valid_utf8_to_uvchr(foldbuf, &numlen);
if (numlen > 0) {
const STRLEN unilen = reguni(pRExC_state, ender, s);
s += unilen;
@@ -9858,7 +9861,7 @@ tryagain:
for (foldbuf = tmpbuf;
foldlen;
foldlen -= numlen) {
- ender = utf8_to_uvchr(foldbuf, &numlen);
+ ender = valid_utf8_to_uvchr(foldbuf, &numlen);
if (numlen > 0) {
const STRLEN unilen = reguni(pRExC_state, ender, s);
len += unilen;