Add a note about folding vs lowercase.

p4raw-id: //depot/perl@13376
author: Jarkko Hietaniemi <jhi@iki.fi> 2001-11-30 01:16:22 +0000
committer: Jarkko Hietaniemi <jhi@iki.fi> 2001-11-30 01:16:22 +0000
commit: cadb39a9446639e3c297a768022eb9c72347992a (patch)
tree: 5b60c2cc75c82fd17442a5079781525984e078c9 /regexec.c
parent: 596717cee028c8ad9e0b419ef9143521a52d81b0 (diff)
download: perl-cadb39a9446639e3c297a768022eb9c72347992a.tar.gz
1 files changed, 8 insertions, 0 deletions
diff --git a/regexec.c b/regexec.c
index a8acb0631e..415bc70415 100644
--- a/regexec.c
+++ b/regexec.c
@@ -959,6 +959,14 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
 
 	    if (do_utf8) {
 		STRLEN len;
+		/* The ibcmp_utf8() uses to_uni_fold() which is more
+		 * correct folding for Unicode than using lowercase.
+		 * However, it doesn't work quite fully since the folding
+		 * is a one-to-many mapping and the regex optimizer is
+		 * unaware of this, so it may throw out good matches.
+		 * Fortunately, not getting this right is allowed
+		 * for Unicode Regular Expression Support level 1,
+		 * only one-to-one matching is required. --jhi */
 		if (c1 == c2)
 		    while (s <= e) {
 			if ( utf8_to_uvchr((U8*)s, &len) == c1
author	Jarkko Hietaniemi <jhi@iki.fi>	2001-11-30 01:16:22 +0000
committer	Jarkko Hietaniemi <jhi@iki.fi>	2001-11-30 01:16:22 +0000
commit	cadb39a9446639e3c297a768022eb9c72347992a (patch)
tree	5b60c2cc75c82fd17442a5079781525984e078c9 /regexec.c
parent	596717cee028c8ad9e0b419ef9143521a52d81b0 (diff)
download	perl-cadb39a9446639e3c297a768022eb9c72347992a.tar.gz