summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2014-12-29 11:01:59 -0700
committerKarl Williamson <khw@cpan.org>2014-12-29 13:52:57 -0700
commitd635b7101aac73db76a54016b58991ba7cd8d778 (patch)
tree52dd9b77dd758da1581989b12c45f8a6c03cd4cb
parent8bdce3944e3c1dd192c971851b33f718084e1942 (diff)
downloadperl-d635b7101aac73db76a54016b58991ba7cd8d778.tar.gz
foldEQ_utf8(): Add some internal flags
The comments explain their purpose
-rw-r--r--utf8.c13
-rw-r--r--utf8.h2
2 files changed, 14 insertions, 1 deletions
diff --git a/utf8.c b/utf8.c
index 57b1580251..b5470a8c71 100644
--- a/utf8.c
+++ b/utf8.c
@@ -3941,7 +3941,15 @@ L<http://www.unicode.org/unicode/reports/tr21/> (Case Mappings).
* routine. This allows that step to be skipped.
* Currently, this requires s1 to be encoded as UTF-8
* (u1 must be true), which is asserted for.
+ * FOLDEQ_S1_FOLDS_SANE With either NOMIX_ASCII or LOCALE, no folds may
+ * cross certain boundaries. Hence, the caller should
+ * let this function do the folding instead of
+ * pre-folding. This code contains an assertion to
+ * that effect. However, if the caller knows what
+ * it's doing, it can pass this flag to indicate that,
+ * and the assertion is skipped.
* FOLDEQ_S2_ALREADY_FOLDED Similarly.
+ * FOLDEQ_S2_FOLDS_SANE
*/
I32
Perl_foldEQ_utf8_flags(pTHX_ const char *s1, char **pe1, UV l1, bool u1, const char *s2, char **pe2, UV l2, bool u2, U32 flags)
@@ -3962,7 +3970,10 @@ Perl_foldEQ_utf8_flags(pTHX_ const char *s1, char **pe1, UV l1, bool u1, const c
PERL_ARGS_ASSERT_FOLDEQ_UTF8_FLAGS;
assert( ! ((flags & (FOLDEQ_UTF8_NOMIX_ASCII | FOLDEQ_LOCALE))
- && (flags & (FOLDEQ_S1_ALREADY_FOLDED | FOLDEQ_S2_ALREADY_FOLDED))));
+ && (((flags & FOLDEQ_S1_ALREADY_FOLDED)
+ && !(flags & FOLDEQ_S1_FOLDS_SANE))
+ || ((flags & FOLDEQ_S2_ALREADY_FOLDED)
+ && !(flags & FOLDEQ_S2_FOLDS_SANE)))));
/* The algorithm is to trial the folds without regard to the flags on
* the first line of the above assert(), and then see if the result
* violates them. This means that the inputs can't be pre-folded to a
diff --git a/utf8.h b/utf8.h
index a9e662764d..aaf878cbff 100644
--- a/utf8.h
+++ b/utf8.h
@@ -75,6 +75,8 @@ than just the ASCII characters, so C<is_invariant_string> is preferred.
#define FOLDEQ_LOCALE (1 << 1)
#define FOLDEQ_S1_ALREADY_FOLDED (1 << 2)
#define FOLDEQ_S2_ALREADY_FOLDED (1 << 3)
+#define FOLDEQ_S1_FOLDS_SANE (1 << 4)
+#define FOLDEQ_S2_FOLDS_SANE (1 << 5)
#define ibcmp_utf8(s1, pe1, l1, u1, s2, pe2, l2, u2) \
cBOOL(! foldEQ_utf8(s1, pe1, l1, u1, s2, pe2, l2, u2))