diff options
author | Karl Williamson <khw@cpan.org> | 2014-12-29 11:01:59 -0700 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2014-12-29 13:52:57 -0700 |
commit | d635b7101aac73db76a54016b58991ba7cd8d778 (patch) | |
tree | 52dd9b77dd758da1581989b12c45f8a6c03cd4cb | |
parent | 8bdce3944e3c1dd192c971851b33f718084e1942 (diff) | |
download | perl-d635b7101aac73db76a54016b58991ba7cd8d778.tar.gz |
foldEQ_utf8(): Add some internal flags
The comments explain their purpose
-rw-r--r-- | utf8.c | 13 | ||||
-rw-r--r-- | utf8.h | 2 |
2 files changed, 14 insertions, 1 deletions
@@ -3941,7 +3941,15 @@ L<http://www.unicode.org/unicode/reports/tr21/> (Case Mappings). * routine. This allows that step to be skipped. * Currently, this requires s1 to be encoded as UTF-8 * (u1 must be true), which is asserted for. + * FOLDEQ_S1_FOLDS_SANE With either NOMIX_ASCII or LOCALE, no folds may + * cross certain boundaries. Hence, the caller should + * let this function do the folding instead of + * pre-folding. This code contains an assertion to + * that effect. However, if the caller knows what + * it's doing, it can pass this flag to indicate that, + * and the assertion is skipped. * FOLDEQ_S2_ALREADY_FOLDED Similarly. + * FOLDEQ_S2_FOLDS_SANE */ I32 Perl_foldEQ_utf8_flags(pTHX_ const char *s1, char **pe1, UV l1, bool u1, const char *s2, char **pe2, UV l2, bool u2, U32 flags) @@ -3962,7 +3970,10 @@ Perl_foldEQ_utf8_flags(pTHX_ const char *s1, char **pe1, UV l1, bool u1, const c PERL_ARGS_ASSERT_FOLDEQ_UTF8_FLAGS; assert( ! ((flags & (FOLDEQ_UTF8_NOMIX_ASCII | FOLDEQ_LOCALE)) - && (flags & (FOLDEQ_S1_ALREADY_FOLDED | FOLDEQ_S2_ALREADY_FOLDED)))); + && (((flags & FOLDEQ_S1_ALREADY_FOLDED) + && !(flags & FOLDEQ_S1_FOLDS_SANE)) + || ((flags & FOLDEQ_S2_ALREADY_FOLDED) + && !(flags & FOLDEQ_S2_FOLDS_SANE))))); /* The algorithm is to trial the folds without regard to the flags on * the first line of the above assert(), and then see if the result * violates them. This means that the inputs can't be pre-folded to a @@ -75,6 +75,8 @@ than just the ASCII characters, so C<is_invariant_string> is preferred. #define FOLDEQ_LOCALE (1 << 1) #define FOLDEQ_S1_ALREADY_FOLDED (1 << 2) #define FOLDEQ_S2_ALREADY_FOLDED (1 << 3) +#define FOLDEQ_S1_FOLDS_SANE (1 << 4) +#define FOLDEQ_S2_FOLDS_SANE (1 << 5) #define ibcmp_utf8(s1, pe1, l1, u1, s2, pe2, l2, u2) \ cBOOL(! foldEQ_utf8(s1, pe1, l1, u1, s2, pe2, l2, u2)) |