summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2012-01-07 12:10:41 -0700
committerKarl Williamson <public@khwilliamson.com>2012-02-09 10:13:57 -0700
commit9d9177bec752277fb0bb090203f47d85c3aba878 (patch)
treef312c12f5dd773346d960b45315375b032d72ce8
parentf59ff1943250231e4a9af32ff93e52cdfba13134 (diff)
downloadperl-9d9177bec752277fb0bb090203f47d85c3aba878.tar.gz
Add regen/mk_invlists.pl, charclass_invlists.h
This will be used to generate compile-time inversion lists in a C hdr file that can be included in programs for initialization speed Three simple inversion lists are included in this initial commit
-rw-r--r--MANIFEST2
-rwxr-xr-xMakefile.SH3
-rw-r--r--charclass_invlists.h39
-rw-r--r--lib/unicore/README.perl4
-rw-r--r--regen/mk_invlists.pl83
5 files changed, 130 insertions, 1 deletions
diff --git a/MANIFEST b/MANIFEST
index 431c44ed7f..1aff2c6397 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -8,6 +8,7 @@ beos/beosish.h BeOS port
beos/nm.c BeOS port
cflags.SH A script that emits C compilation flags per file
Changes Describe how to peruse changes between releases
+charclass_invlists.h Compiled-in inversion lists
config_h.SH Produces config.h
configpm Produces lib/Config.pm
Configure Portability tool
@@ -4862,6 +4863,7 @@ regen/embed.pl Produces {embed,embedvar,proto}.h
regen/feature.pl Generates feature.pm
regen/keywords.pl Program to write keywords.h
regen/mg_vtable.pl generate mg_vtable.h
+regen/mk_invlists.pl Generates charclass_invlists.h
regen/mk_PL_charclass.pl Populate the PL_charclass table
regen/opcode.pl Opcode header generator
regen/opcodes Opcode data
diff --git a/Makefile.SH b/Makefile.SH
index b8845da8cb..5cd7179237 100755
--- a/Makefile.SH
+++ b/Makefile.SH
@@ -458,7 +458,8 @@ h2 = embed.h form.h gv.h handy.h hv.h keywords.h mg.h op.h opcode.h
h3 = pad.h patchlevel.h perl.h perlapi.h perly.h pp.h proto.h regcomp.h
h4 = regexp.h scope.h sv.h unixish.h util.h iperlsys.h thread.h
h5 = utf8.h warnings.h mydtrace.h op_reg_common.h l1_char_class_tab.h
-h = $(h1) $(h2) $(h3) $(h4) $(h5)
+h6 = charclass_invlists.h
+h = $(h1) $(h2) $(h3) $(h4) $(h5) $(h6)
c1 = av.c scope.c op.c doop.c doio.c dump.c gv.c hv.c mg.c reentr.c mro.c perl.c
c2 = perly.c pp.c pp_hot.c pp_ctl.c pp_sys.c regcomp.c regexec.c utf8.c sv.c
diff --git a/charclass_invlists.h b/charclass_invlists.h
new file mode 100644
index 0000000000..dbb32d857b
--- /dev/null
+++ b/charclass_invlists.h
@@ -0,0 +1,39 @@
+/* -*- buffer-read-only: t -*-
+ * !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+ * This file is built by regen/mk_invlists.pl from Unicode::UCD.
+ * Any changes made here will be lost!
+ */
+
+/* See the generating file for comments */
+
+
+UV Latin1_invlist[] = {
+ 2, /* Number of elements */
+ 0, /* Current iteration position */
+ 1064334010, /* Version and data structure type */
+ 0, /* 0 if this is the first element of the list proper;
+ 1 if the next element is the first */
+ 256,
+ 0
+};
+
+UV AboveLatin1_invlist[] = {
+ 1, /* Number of elements */
+ 0, /* Current iteration position */
+ 1064334010, /* Version and data structure type */
+ 1, /* 0 if this is the first element of the list proper;
+ 1 if the next element is the first */
+ 256
+};
+
+UV ASCII_invlist[] = {
+ 2, /* Number of elements */
+ 0, /* Current iteration position */
+ 1064334010, /* Version and data structure type */
+ 0, /* 0 if this is the first element of the list proper;
+ 1 if the next element is the first */
+ 128,
+ 0
+};
+
+/* ex: set ro: */
diff --git a/lib/unicore/README.perl b/lib/unicore/README.perl
index 2518a604a5..88152d2ccd 100644
--- a/lib/unicore/README.perl
+++ b/lib/unicore/README.perl
@@ -114,6 +114,10 @@ Also, you should regen l1_char_class_tab.h, by
perl regen/mk_L_charclass.pl
+and, regen charclass_invlists.h by
+
+perl regen/mk_invlists.pl
+
Finally:
p4 submit
diff --git a/regen/mk_invlists.pl b/regen/mk_invlists.pl
new file mode 100644
index 0000000000..5b69708b46
--- /dev/null
+++ b/regen/mk_invlists.pl
@@ -0,0 +1,83 @@
+#!perl -w
+use 5.015;
+use strict;
+use warnings;
+use Unicode::UCD "prop_invlist";
+require 'regen/regen_lib.pl';
+
+# This program outputs charclass_invlists.h, which contains various inversion
+# lists in the form of C arrays that are to be used as-is for inversion lists.
+# Thus, the lists it contains are essentially pre-compiled, and need only a
+# light-weight fast wrapper to make them usable at run-time.
+
+# As such, this code knows about the internal structure of these lists, and
+# any change made to that has to be done here as well. A random number stored
+# in the headers is used to minimize the possibility of things getting
+# out-of-sync, or the wrong data structure being passed. Currently that
+# random number is:
+my $VERSION_DATA_STRUCTURE_TYPE = 1064334010;
+
+my $out_fh = open_new('charclass_invlists.h', '>',
+ {style => '*', by => $0,
+ from => "Unicode::UCD"});
+
+print $out_fh "/* See the generating file for comments */\n\n";
+
+sub output_invlist ($$) {
+ my $name = shift;
+ my $invlist = shift; # Reference to inversion list array
+
+ # Output the inversion list $invlist using the name $name for it.
+ # It is output in the exact internal form for inversion lists.
+
+ my $zero_or_one; # Is the last element of the header 0, or 1 ?
+
+ # If the first element is 0, it goes in the header, instead of the body
+ if ($invlist->[0] == 0) {
+ shift @$invlist;
+
+ $zero_or_one = 0;
+
+ # Add a dummy 0 at the end so that the length is constant. inversion
+ # lists are always stored with enough room so that if they change from
+ # beginning with 0, they don't have to grow.
+ push @$invlist, 0;
+ }
+ else {
+ $zero_or_one = 1;
+ }
+
+ print $out_fh "\nUV ${name}_invlist[] = {\n";
+
+ print $out_fh "\t", scalar @$invlist, ",\t/* Number of elements */\n";
+ print $out_fh "\t0,\t/* Current iteration position */\n";
+ print $out_fh "\t$VERSION_DATA_STRUCTURE_TYPE, /* Version and data structure type */\n";
+ print $out_fh "\t", $zero_or_one,
+ ",\t/* 0 if this is the first element of the list proper;",
+ "\n\t\t 1 if the next element is the first */\n";
+
+ # The main body are the UVs passed in to this routine. Do the final
+ # element separately
+ for my $i (0 .. @$invlist - 1 - 1) {
+ print $out_fh "\t$invlist->[$i],\n";
+ }
+
+ # The final element does not have a trailing comma, as C can't handle it.
+ print $out_fh "\t$invlist->[-1]\n";
+
+ print $out_fh "};\n";
+}
+
+output_invlist("Latin1", [ 0, 256 ]);
+output_invlist("AboveLatin1", [ 256 ]);
+
+for my $prop (qw(
+ ASCII
+ )
+) {
+
+ my @invlist = prop_invlist($prop);
+ output_invlist($prop, \@invlist);
+}
+
+read_only_bottom_close_and_rename($out_fh)