summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2011-12-30 08:51:34 -0700
committerKarl Williamson <public@khwilliamson.com>2011-12-30 11:08:30 -0700
commite35c60199c9f4785aaed6664c8f422ef5b22526c (patch)
treec12921b9171607464ed0d036fe8df153121336f6
parent2f3f243e7f2fb711af248d55c57fb9e7752e1feb (diff)
downloadperl-e35c60199c9f4785aaed6664c8f422ef5b22526c.tar.gz
Unicode::UCD: Extend prop_invmap() to handle multi-entries per code point
This is in preparation for Unicode 6.1, in which the Name_Alias property has more than one entry in its table per code point.
-rw-r--r--lib/Unicode/UCD.pm31
1 files changed, 31 insertions, 0 deletions
diff --git a/lib/Unicode/UCD.pm b/lib/Unicode/UCD.pm
index ee704fb21e..d5892f9c1a 100644
--- a/lib/Unicode/UCD.pm
+++ b/lib/Unicode/UCD.pm
@@ -2533,6 +2533,9 @@ RETRY:
# the property name, and 32 means we will accept 32 bit return values.
my $swash = utf8::SWASHNEW(__PACKAGE__, "To$prop", undef, 32, 0);
+ # If there are multiple entries for a single code point;
+ my $has_multiples = 0;
+
# If didn't find it, could be because needs a proxy. And if was the
# 'Block' or 'Name' property, use a proxy even if did find it. Finding it
# would be the result of the installation changing mktables to output the
@@ -2817,6 +2820,31 @@ RETRY:
push @invmap, $missing;
}
}
+ elsif (@invlist > 1 && $invlist[-2] == $begin) {
+
+ # Here we handle the case where the input has multiple entries for
+ # each code point. mktables should have made sure that each such
+ # range contains only one code point. At this point, $invlist[-1]
+ # is the $missing that was added at the end of the last loop
+ # iteration, and [-2] is the last real input code point, and that
+ # code point is the same as the one we are adding now, making the
+ # new one a multiple entry. Add it to the existing entry, either
+ # by pushing it to the existing list of multiple entries, or
+ # converting the single current entry into a list with both on it.
+ # This is all we need do for this iteration.
+
+ if ($end != $begin) {
+ croak __PACKAGE__, "Multiple maps per code point in '$prop' require single-element ranges: begin=$begin, end=$end, map=$map";
+ }
+ if (! ref $invmap[-2]) {
+ $invmap[-2] = [ $invmap[-2], $map ];
+ }
+ else {
+ push @{$invmap[-2]}, $map;
+ }
+ $has_multiples = 1;
+ next;
+ }
elsif ($invlist[-1] == $begin) {
# If the input isn't in the most compact form, so that there are
@@ -3044,6 +3072,9 @@ RETRY:
# All others are simple scalars
$format = 's';
}
+ if ($has_multiples && $format !~ /l/) {
+ croak __PACKAGE__, "Wrong format '$format' for prop_invmap('$prop'); should indicate has lists";
+ }
return (\@invlist, \@invmap, $format, $missing);
}