summaryrefslogtreecommitdiff
path: root/regexec.c
diff options
context:
space:
mode:
authorJarkko Hietaniemi <jhi@iki.fi>2001-11-30 01:16:22 +0000
committerJarkko Hietaniemi <jhi@iki.fi>2001-11-30 01:16:22 +0000
commitcadb39a9446639e3c297a768022eb9c72347992a (patch)
tree5b60c2cc75c82fd17442a5079781525984e078c9 /regexec.c
parent596717cee028c8ad9e0b419ef9143521a52d81b0 (diff)
downloadperl-cadb39a9446639e3c297a768022eb9c72347992a.tar.gz
Add a note about folding vs lowercase.
p4raw-id: //depot/perl@13376
Diffstat (limited to 'regexec.c')
-rw-r--r--regexec.c8
1 files changed, 8 insertions, 0 deletions
diff --git a/regexec.c b/regexec.c
index a8acb0631e..415bc70415 100644
--- a/regexec.c
+++ b/regexec.c
@@ -959,6 +959,14 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
if (do_utf8) {
STRLEN len;
+ /* The ibcmp_utf8() uses to_uni_fold() which is more
+ * correct folding for Unicode than using lowercase.
+ * However, it doesn't work quite fully since the folding
+ * is a one-to-many mapping and the regex optimizer is
+ * unaware of this, so it may throw out good matches.
+ * Fortunately, not getting this right is allowed
+ * for Unicode Regular Expression Support level 1,
+ * only one-to-one matching is required. --jhi */
if (c1 == c2)
while (s <= e) {
if ( utf8_to_uvchr((U8*)s, &len) == c1