diff options
author | Karl Williamson <public@khwilliamson.com> | 2011-12-30 08:51:34 -0700 |
---|---|---|
committer | Karl Williamson <public@khwilliamson.com> | 2011-12-30 11:08:30 -0700 |
commit | e35c60199c9f4785aaed6664c8f422ef5b22526c (patch) | |
tree | c12921b9171607464ed0d036fe8df153121336f6 | |
parent | 2f3f243e7f2fb711af248d55c57fb9e7752e1feb (diff) | |
download | perl-e35c60199c9f4785aaed6664c8f422ef5b22526c.tar.gz |
Unicode::UCD: Extend prop_invmap() to handle multi-entries per code point
This is in preparation for Unicode 6.1, in which the Name_Alias property
has more than one entry in its table per code point.
-rw-r--r-- | lib/Unicode/UCD.pm | 31 |
1 files changed, 31 insertions, 0 deletions
diff --git a/lib/Unicode/UCD.pm b/lib/Unicode/UCD.pm index ee704fb21e..d5892f9c1a 100644 --- a/lib/Unicode/UCD.pm +++ b/lib/Unicode/UCD.pm @@ -2533,6 +2533,9 @@ RETRY: # the property name, and 32 means we will accept 32 bit return values. my $swash = utf8::SWASHNEW(__PACKAGE__, "To$prop", undef, 32, 0); + # If there are multiple entries for a single code point; + my $has_multiples = 0; + # If didn't find it, could be because needs a proxy. And if was the # 'Block' or 'Name' property, use a proxy even if did find it. Finding it # would be the result of the installation changing mktables to output the @@ -2817,6 +2820,31 @@ RETRY: push @invmap, $missing; } } + elsif (@invlist > 1 && $invlist[-2] == $begin) { + + # Here we handle the case where the input has multiple entries for + # each code point. mktables should have made sure that each such + # range contains only one code point. At this point, $invlist[-1] + # is the $missing that was added at the end of the last loop + # iteration, and [-2] is the last real input code point, and that + # code point is the same as the one we are adding now, making the + # new one a multiple entry. Add it to the existing entry, either + # by pushing it to the existing list of multiple entries, or + # converting the single current entry into a list with both on it. + # This is all we need do for this iteration. + + if ($end != $begin) { + croak __PACKAGE__, "Multiple maps per code point in '$prop' require single-element ranges: begin=$begin, end=$end, map=$map"; + } + if (! ref $invmap[-2]) { + $invmap[-2] = [ $invmap[-2], $map ]; + } + else { + push @{$invmap[-2]}, $map; + } + $has_multiples = 1; + next; + } elsif ($invlist[-1] == $begin) { # If the input isn't in the most compact form, so that there are @@ -3044,6 +3072,9 @@ RETRY: # All others are simple scalars $format = 's'; } + if ($has_multiples && $format !~ /l/) { + croak __PACKAGE__, "Wrong format '$format' for prop_invmap('$prop'); should indicate has lists"; + } return (\@invlist, \@invmap, $format, $missing); } |