summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2015-08-18 14:47:48 -0600
committerKarl Williamson <khw@cpan.org>2015-08-18 15:03:35 -0600
commit4b06b8deeb339269bad2112a4e6603e43f7bee36 (patch)
tree5cc8d08bafec32e3a02dfa9914356d7e7809e942
parentf3106bc89eb4bbffee5ca7cb67bd63d2f3ce87bf (diff)
downloadperl-4b06b8deeb339269bad2112a4e6603e43f7bee36.tar.gz
mktables: Add code point ages to --annotate option
This can be useful information.
-rw-r--r--charclass_invlists.h2
-rw-r--r--lib/unicore/mktables31
-rw-r--r--regcharclass.h2
3 files changed, 30 insertions, 5 deletions
diff --git a/charclass_invlists.h b/charclass_invlists.h
index 64406bfe78..8f14d3167b 100644
--- a/charclass_invlists.h
+++ b/charclass_invlists.h
@@ -99537,7 +99537,7 @@ static const UV XPosixXDigit_invlist[] = { /* for EBCDIC POSIX-BC */
* 1a0687fb9c6c4567e853913549df0944fe40821279a3e9cdaa6ab8679bc286fd lib/unicore/extracted/DLineBreak.txt
* 40bcfed3ca727c19e1331f6c33806231d5f7eeeabd2e6a9e06a3740c85d0c250 lib/unicore/extracted/DNumType.txt
* a18d502bad39d527ac5586d7bc93e29f565859e3bcc24ada627eff606d6f5fed lib/unicore/extracted/DNumValues.txt
- * 46f739fb5c9daf6fb457ed67f821d88d9eadd2df17b098f385b3b50f99c01acf lib/unicore/mktables
+ * ad739a46951b5f46396074b0682a2cfeed24b633a742a8e1aa0e337f69ef8b1c lib/unicore/mktables
* 462c9aaa608fb2014cd9649af1c5c009485c60b9c8b15b89401fdc10cf6161c6 lib/unicore/version
* c6884f4d629f04d1316f3476cb1050b6a1b98ca30c903262955d4eae337c6b1e regen/charset_translations.pl
* 8a097f8f726bb1619af2f27f149ab87e60a1602f790147e3a561358be16abd27 regen/mk_invlists.pl
diff --git a/lib/unicore/mktables b/lib/unicore/mktables
index 27cb45ab9d..81539367a2 100644
--- a/lib/unicore/mktables
+++ b/lib/unicore/mktables
@@ -1441,6 +1441,7 @@ sub objaddr($) {
# after all the input has been processed. But most can be skipped, as they
# have the same descriptive phrases, such as being unassigned
my @viacode; # Contains the 1 million character names
+my @age; # And their ages ("" if none)
my @printable; # boolean: And are those characters printable?
my @annotate_char_type; # Contains a type of those characters, specifically
# for the purposes of annotation.
@@ -1499,12 +1500,14 @@ sub populate_char_info ($) {
$annotate_char_type[$i] = $ABOVE_UNICODE_TYPE;
$printable[$i] = 0;
$end = $MAX_WORKING_CODEPOINT;
+ $age[$i] = "";
}
elsif ($gc-> table('Private_use')->contains($i)) {
$viacode[$i] = 'Private Use';
$annotate_char_type[$i] = $PRIVATE_USE_TYPE;
$printable[$i] = 0;
$end = $gc->table('Private_Use')->containing_range($i)->end;
+ $age[$i] = property_ref("Age")->value_of($i);
}
elsif ((defined ($nonchar =
Property::property_ref('Noncharacter_Code_Point'))
@@ -1515,11 +1518,13 @@ sub populate_char_info ($) {
$printable[$i] = 0;
$end = property_ref('Noncharacter_Code_Point')->table('Y')->
containing_range($i)->end;
+ $age[$i] = property_ref("Age")->value_of($i);
}
elsif ($gc-> table('Control')->contains($i)) {
$viacode[$i] = property_ref('Name_Alias')->value_of($i) || 'Control';
$annotate_char_type[$i] = $CONTROL_TYPE;
$printable[$i] = 0;
+ $age[$i] = property_ref("Age")->value_of($i);
}
elsif ($gc-> table('Unassigned')->contains($i)) {
$annotate_char_type[$i] = $UNASSIGNED_TYPE;
@@ -1540,12 +1545,14 @@ sub populate_char_info ($) {
$unassigned_sans_noncharacters->
containing_range($i)->end);
}
+ $age[$i] = property_ref("Age")->value_of($i);
}
elsif ($perl->table('_Perl_Surrogate')->contains($i)) {
$viacode[$i] = 'Surrogate';
$annotate_char_type[$i] = $SURROGATE_TYPE;
$printable[$i] = 0;
$end = $gc->table('Surrogate')->containing_range($i)->end;
+ $age[$i] = property_ref("Age")->value_of($i);
}
else {
Carp::my_carp_bug("Can't figure out how to annotate "
@@ -1561,7 +1568,18 @@ sub populate_char_info ($) {
# appended to the name, do that.
elsif ($annotate_char_type[$i] == $CP_IN_NAME) {
$viacode[$i] .= sprintf("-%04X", $i);
- $end = $perl_charname->containing_range($i)->end;
+
+ # Do all these as groups of the same age, instead of individually,
+ # because their names are so meaningless, and there are typically
+ # large quantities of them.
+ my $Age = property_ref("Age");
+ $age[$i] = $Age->value_of($i);
+ my $limit = $perl_charname->containing_range($i)->end;
+ $end = $i + 1;
+ while ($end <= $limit && $Age->value_of($end) == $age[$i]) {
+ $end++;
+ }
+ $end--;
}
# And here, has a name, but if it's a hangul syllable one, replace it with
@@ -1574,8 +1592,12 @@ sub populate_char_info ($) {
my $T = $TBase + $SIndex % $TCount;
$viacode[$i] = "HANGUL SYLLABLE $Jamo{$L}$Jamo{$V}";
$viacode[$i] .= $Jamo{$T} if $T != $TBase;
+ $age[$i] = property_ref("Age")->value_of($i);
$end = $perl_charname->containing_range($i)->end;
}
+ else {
+ $age[$i] = property_ref("Age")->value_of($i);
+ }
return if ! defined wantarray;
return $i if ! defined $end; # If not a range, return the input
@@ -6341,7 +6363,10 @@ END
else { # Indent if not displaying code points
$annotation = " " x 4;
}
- $annotation .= " $range_name" if $range_name;
+ if ($range_name) {
+ $annotation .= " $age[$i]" if $age[$i];
+ $annotation .= " $range_name";
+ }
# Include the number of code points in the
# range
@@ -6418,7 +6443,7 @@ END
}
if ($include_cp) {
- $annotation = sprintf "%04X", $i;
+ $annotation = sprintf "%04X %s", $i, $age[$i];
if ($use_adjustments) {
$annotation .= " => $output_value";
}
diff --git a/regcharclass.h b/regcharclass.h
index f7cd2fde5b..f4b6298276 100644
--- a/regcharclass.h
+++ b/regcharclass.h
@@ -2514,7 +2514,7 @@
* 1a0687fb9c6c4567e853913549df0944fe40821279a3e9cdaa6ab8679bc286fd lib/unicore/extracted/DLineBreak.txt
* 40bcfed3ca727c19e1331f6c33806231d5f7eeeabd2e6a9e06a3740c85d0c250 lib/unicore/extracted/DNumType.txt
* a18d502bad39d527ac5586d7bc93e29f565859e3bcc24ada627eff606d6f5fed lib/unicore/extracted/DNumValues.txt
- * 46f739fb5c9daf6fb457ed67f821d88d9eadd2df17b098f385b3b50f99c01acf lib/unicore/mktables
+ * ad739a46951b5f46396074b0682a2cfeed24b633a742a8e1aa0e337f69ef8b1c lib/unicore/mktables
* 462c9aaa608fb2014cd9649af1c5c009485c60b9c8b15b89401fdc10cf6161c6 lib/unicore/version
* c6884f4d629f04d1316f3476cb1050b6a1b98ca30c903262955d4eae337c6b1e regen/charset_translations.pl
* d9c04ac46bdd81bb3e26519f2b8eb6242cb12337205add3f7cf092b0c58dccc4 regen/regcharclass.pl