summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--regen/regcomp.pl760
-rw-r--r--regnodes.h4
2 files changed, 481 insertions, 283 deletions
diff --git a/regen/regcomp.pl b/regen/regcomp.pl
index b90efc7595..9890a1aa69 100644
--- a/regen/regcomp.pl
+++ b/regen/regcomp.pl
@@ -1,5 +1,6 @@
#!/usr/bin/perl -w
-#
+#
+#
# Regenerate (overwriting only if changed):
#
# pod/perldebguts.pod
@@ -23,113 +24,204 @@ BEGIN {
}
use strict;
-open DESC, 'regcomp.sym';
-
-my $ind = 0;
-my (@name,@rest,@type,@code,@args,@flags,@longj,@cmnt);
-my ($longest_name_length,$desc,$lastregop) = 0;
-my (%seen_op, %type_alias);
-while (<DESC>) {
- # Special pod comments
- if (/^#\* ?/) { $cmnt[$ind] .= "# $'"; }
- # Truly blank lines possibly surrounding pod comments
- elsif (/^\s*$/) { $cmnt[$ind] .= "\n" }
-
- next if /^(?:#|\s*$)/;
- chomp; # No \z in 5.004
- s/\s*$//;
- if (/^-+\s*$/) {
- $lastregop= $ind;
- next;
+# NOTE I don't think anyone actually knows what all of these properties mean,
+# and I suspect some of them are outright unused. This is a first attempt to
+# clean up the generation so maybe one day we can move to something more self
+# documenting. (One might argue that an array of hashes of properties would
+# be easier to use.)
+#
+# Why we use the term regnode and nodes, and not say, opcodes, I am not sure.
+
+# General thoughts:
+# 1. We use a single continuum to represent both opcodes and states,
+# and in regexec.c we switch on the combined set.
+# 2. Opcodes have more information associated to them, states are simpler,
+# basically just an identifier/number that can be used to switch within
+# the state machine.
+# 3. Some opcode are order dependent.
+# 4. Output files often use "tricks" to reduce diff effects. Some of what
+# we do below is more clumsy looking than it could be because of this.
+
+# Op/state properties:
+#
+# Property In Descr
+# ----------------------------------------------------------------------------
+# name Both Name of op/state
+# id Both integer value for this opcode/state
+# optype Both Either 'op' or 'state'
+# line_num Both line_num number of the input file for this item.
+# type Op Type of node (aka regkind)
+# code Op what code is associated with this node (???)
+# args Op what type of args the node has (which regnode struct)
+# flags Op (???)
+# longj Op Whether this node is a longjump
+# comment Both Comment about node, if any
+# pod_comment Both Special comments for pod output (preceding lines in def)
+
+# Global State
+my @all; # all opcodes/state
+my %all; # hash of all opcode/state names
+
+my @ops; # array of just opcodes
+my @states; # array of just states
+
+my $longest_name_length= 0; # track lengths of names for nicer reports
+my (%type_alias); # map the type (??)
+
+# register a newly constructed node into our state tables.
+# ensures that we have no name collisions (on name anyway),
+# and issues the "id" for the node.
+sub register_node {
+ my ($node)= @_;
+
+ if ( $all{ $node->{name} } ) {
+ die "Duplicate item '$node->{name}' in regcomp.sym line $node->{line_num} "
+ . "previously defined on line $all{ $node->{name} }{line_num}\n";
+ } elsif (!$node->{optype}) {
+ die "must have an optype in node ", Dumper($node);
+ } elsif ($node->{optype} eq "op") {
+ push @ops, $node;
+ } elsif ($node->{optype} eq "state") {
+ push @states, $node;
+ } else {
+ die "Uknown optype '$node->{optype}' in ", Dumper($node);
}
- unless ($lastregop) {
- ($name[$ind], $desc, $rest[$ind]) = /^(\S+)\s+([^\t]+?)\s*;\s*(.*)/;
+ $node->{id}= 0 + @all;
+ push @all, $node;
+ $all{ $node->{name} }= $node;
+}
- if (defined $seen_op{$name[$ind]}) {
- die "Duplicate regop $name[$ind] in regcomp.sym line $. previously defined on line $seen_op{$name[$ind]}\n";
- } else {
- $seen_op{$name[$ind]}= $.;
- }
+# Parse and add an opcode definition to the global state.
+# An opcode definition looks like this:
+#
+# +- args
+# | +- flags
+# | | +- longjmp
+# Name Type code | | | ; comment
+# --------------------------------------------------------------------------
+# IFMATCH BRANCHJ, off 1 . 2 ; Succeeds if the following matches.
+# UNLESSM BRANCHJ, off 1 . 2 ; Fails if the following matches.
+# SUSPEND BRANCHJ, off 1 V 1 ; "Independent" sub-RE.
+# IFTHEN BRANCHJ, off 1 V 1 ; Switch, should be preceded by switcher.
+# GROUPP GROUPP, num 1 ; Whether the group matched.
+#
+# Not every opcode definition has all of these. We should maybe make this
+# nicer/easier to read in the future. Also note that the above is tab
+# sensitive.
+
+sub parse_opcode_def {
+ my ( $text, $line_num, $pod_comment )= @_;
+ my $node= {
+ line_num => $line_num,
+ pod_comment => $pod_comment,
+ optype => "op",
+ };
- ($type[$ind], $code[$ind], $args[$ind], $flags[$ind], $longj[$ind])
- = split /[,\s]\s*/, $desc;
+ # first split the line into three, the initial NAME, a middle part
+ # that we call "desc" which contains various (not well documented) things,
+ # and a comment section.
+ @{$node}{qw(name desc comment)}= /^(\S+)\s+([^\t]+?)\s*;\s*(.*)/
+ or die "Failed to match $_";
- if (!defined $seen_op{$type[$ind]} and !defined $type_alias{$type[$ind]}) {
- #warn "Regop type '$type[$ind]' from regcomp.sym line $. is not an existing regop, and will be aliased to $name[$ind]\n"
- # if -t STDERR;
- $type_alias{$type[$ind]}= $name[$ind];
- }
+ # the content of the "desc" field from the first step is extracted here:
+ @{$node}{qw(type code args flags longj)}= split /[,\s]\s*/, $node->{desc};
- $longest_name_length = length $name[$ind]
- if length $name[$ind] > $longest_name_length;
- ++$ind;
- } else {
- my ($type,@lists)=split /\s+/, $_;
- die "No list? $type" if !@lists;
- foreach my $list (@lists) {
- my ($names,$special)=split /:/, $list , 2;
- $special ||= "";
- foreach my $name (split /,/,$names) {
- my $real= $name eq 'resume'
- ? "resume_$type"
- : "${type}_$name";
- my @suffix;
- if (!$special) {
- @suffix=("");
- } elsif ($special=~/\d/) {
- @suffix=(1..$special);
- } elsif ($special eq 'FAIL') {
- @suffix=("","_fail");
- } else {
- die "unknown :type ':$special'";
- }
- foreach my $suffix (@suffix) {
- $name[$ind]="$real$suffix";
- $type[$ind]=$type;
- $rest[$ind]="state for $type";
- ++$ind;
- }
+ $node->{$_} //= "" for qw(type code args flags longj);
+
+ register_node($node); # has to be before the type_alias code below
+
+ if ( !$all{ $node->{type} } and !$type_alias{ $node->{type} } ) {
+
+ #warn "Regop type '$node->{type}' from regcomp.sym line $line_num"
+ # ." is not an existing regop, and will be aliased to $node->{name}\n"
+ # if -t STDERR;
+ $type_alias{ $node->{type} }= $node->{name};
+ }
+
+ $longest_name_length= length $node->{name}
+ if length $node->{name} > $longest_name_length;
+}
+
+# parse out a state definition and add the resulting data
+# into the global state. may create multiple new states from
+# a single definition (this is part of the point).
+# Format for states:
+# REGOP \t typelist [ \t typelist]
+# typelist= namelist
+# = namelist:FAIL
+# = name:count
+# Eg:
+# WHILEM A_pre,A_min,A_max,B_min,B_max:FAIL
+# BRANCH next:FAIL
+# CURLYM A,B:FAIL
+#
+# The CURLYM definition would create the states:
+# CURLYM_A, CURLYM_A_fail, CURLYM_B, CURLYM_B_fail
+sub parse_state_def {
+ my ( $text, $line_num, $pod_comment )= @_;
+ my ( $type, @lists )= split /\s+/, $text;
+ die "No list? $type" if !@lists;
+ foreach my $list (@lists) {
+ my ( $names, $special )= split /:/, $list, 2;
+ $special ||= "";
+ foreach my $name ( split /,/, $names ) {
+ my $real=
+ $name eq 'resume'
+ ? "resume_$type"
+ : "${type}_$name";
+ my @suffix;
+ if ( !$special ) {
+ @suffix= ("");
+ }
+ elsif ( $special =~ /\d/ ) {
+ @suffix= ( 1 .. $special );
+ }
+ elsif ( $special eq 'FAIL' ) {
+ @suffix= ( "", "_fail" );
+ }
+ else {
+ die "unknown :type ':$special'";
+ }
+ foreach my $suffix (@suffix) {
+ my $node= {
+ name => "$real$suffix",
+ optype => "state",
+ type => $type || "",
+ comment => "state for $type",
+ line_num => $line_num,
+ };
+ register_node($node);
}
}
-
}
}
-# use fixed width to keep the diffs between regcomp.pl recompiles
-# as small as possible.
-my ($width,$rwidth,$twidth)=(22,12,9);
-$lastregop ||= $ind;
-my $tot = $ind;
-close DESC;
-die "Too many regexp/state opcodes! Maximum is 256, but there are $lastregop in file!"
- if $lastregop>256;
sub process_flags {
- my ($flag, $varname, $comment) = @_;
- $comment = '' unless defined $comment;
-
- $ind = 0;
- my @selected;
- my $bitmap = '';
- do {
- my $set = $flags[$ind] && $flags[$ind] eq $flag ? 1 : 0;
- # Whilst I could do this with vec, I'd prefer to do longhand the arithmetic
- # ops in the C code.
- my $current = do {
- local $^W;
- ord do {
- substr $bitmap, ($ind >> 3);
- }
- };
- substr($bitmap, ($ind >> 3), 1) = chr($current | ($set << ($ind & 7)));
-
- push @selected, $name[$ind] if $set;
- } while (++$ind < $lastregop);
- my $out_string = join ', ', @selected, 0;
- $out_string =~ s/(.{1,70},) /$1\n /g;
+ my ( $flag, $varname, $comment )= @_;
+ $comment= '' unless defined $comment;
+
+ my @selected;
+ my $bitmap= '';
+ for my $node (@ops) {
+ my $set= $node->{flags} && $node->{flags} eq $flag ? 1 : 0;
+
+ # Whilst I could do this with vec, I'd prefer to do longhand the arithmetic
+ # ops in the C code.
+ my $current= do {
+ no warnings;
+ ord substr $bitmap, ( $node->{id} >> 3 );
+ };
+ substr( $bitmap, ( $node->{id} >> 3 ), 1 )=
+ chr( $current | ( $set << ( $node->{id} & 7 ) ) );
+
+ push @selected, $node->{name} if $set;
+ }
+ my $out_string= join ', ', @selected, 0;
+ $out_string =~ s/(.{1,70},) /$1\n /g;
- my $out_mask = join ', ', map {sprintf "0x%02X", ord $_} split '', $bitmap;
+ my $out_mask= join ', ', map { sprintf "0x%02X", ord $_ } split '', $bitmap;
- return $comment . <<"EOP";
+ return $comment . <<"EOP";
#define REGNODE_\U$varname\E(node) (PL_${varname}_bitmask[(node) >> 3] & (1 << ((node) & 7)))
#ifndef DOINIT
@@ -150,36 +242,78 @@ EXTCONST U8 PL_${varname}_bitmask[] = {
EOP
}
-my $out = open_new('regnodes.h', '>',
- { by => 'regen/regcomp.pl', from => 'regcomp.sym' });
-printf $out <<EOP,
+sub read_definition {
+ my ( $file )= @_;
+ my ( $seen_sep, $pod_comment )= "";
+ open my $in_fh, "<", $file
+ or die "Failed to open '$file' for reading: $!";
+ while (<$in_fh>) {
+
+ # Special pod comments
+ if (/^#\* ?/) { $pod_comment .= "# $'"; }
+
+ # Truly blank lines possibly surrounding pod comments
+ elsif (/^\s*$/) { $pod_comment .= "\n" }
+
+ next if /\A\s*#/ || /\A\s*\z/;
+
+ s/\s*\z//;
+ if (/^-+\s*$/) {
+ $seen_sep= 1;
+ next;
+ }
+
+ if ($seen_sep) {
+ parse_state_def( $_, $., $pod_comment );
+ }
+ else {
+ parse_opcode_def( $_, $., $pod_comment );
+ }
+ $pod_comment= "";
+ }
+ close $in_fh;
+ die "Too many regexp/state opcodes! Maximum is 256, but there are ", 0 + @all,
+ " in file!"
+ if @all > 256;
+}
+
+# use fixed width to keep the diffs between regcomp.pl recompiles
+# as small as possible.
+my ( $width, $rwidth, $twidth )= ( 22, 12, 9 );
+
+sub print_state_defs {
+ my ($out)= @_;
+ printf $out <<EOP,
/* Regops and State definitions */
#define %*s\t%d
#define %*s\t%d
EOP
- -$width, REGNODE_MAX => $lastregop - 1,
- -$width, REGMATCH_STATE_MAX => $tot - 1
-;
-
-my %rev_type_alias= reverse %type_alias;
-for ($ind=0; $ind < $lastregop ; ++$ind) {
- printf $out "#define\t%*s\t%d\t/* %#04x %s */\n",
- -$width, $name[$ind], $ind, $ind, $rest[$ind];
- if (defined(my $alias= $rev_type_alias{$name[$ind]})) {
- printf $out "#define\t%*s\t%d\t/* %#04x %s */\n",
- -$width, $alias, $ind, $ind, "type alias";
- }
+ -$width,
+ REGNODE_MAX => $#ops,
+ -$width, REGMATCH_STATE_MAX => $#all;
+
+ my %rev_type_alias= reverse %type_alias;
+ for my $node (@ops) {
+ printf $out "#define\t%*s\t%d\t/* %#04x %s */\n",
+ -$width, $node->{name}, $node->{id}, $node->{id}, $node->{comment};
+ if ( defined( my $alias= $rev_type_alias{ $node->{name} } ) ) {
+ printf $out "#define\t%*s\t%d\t/* %#04x %s */\n",
+ -$width, $alias, $node->{id}, $node->{id}, "type alias";
+ }
+ }
-}
-print $out "\t/* ------------ States ------------- */\n";
-for ( ; $ind < $tot ; $ind++) {
- printf $out "#define\t%*s\t(REGNODE_MAX + %d)\t/* %s */\n",
- -$width, $name[$ind], $ind - $lastregop + 1, $rest[$ind];
+ print $out "\t/* ------------ States ------------- */\n";
+ for my $node (@states) {
+ printf $out "#define\t%*s\t(REGNODE_MAX + %d)\t/* %s */\n",
+ -$width, $node->{name}, $node->{id} - $#ops, $node->{comment};
+ }
}
-print $out <<EOP;
+sub print_regkind {
+ my ($out)= @_;
+ print $out <<EOP;
/* PL_regkind[] What type of regop or state is this. */
@@ -188,54 +322,80 @@ EXTCONST U8 PL_regkind[];
#else
EXTCONST U8 PL_regkind[] = {
EOP
+ use Data::Dumper;
+ foreach my $node (@all) {
+ print Dumper($node) if !defined $node->{type} or !defined( $node->{name} );
+ printf $out "\t%*s\t/* %*s */\n",
+ -1 - $twidth, "$node->{type},", -$width, $node->{name};
+ print $out "\t/* ------------ States ------------- */\n"
+ if $node->{id} == $#ops and $node->{id} != $#all;
+ }
-$ind = 0;
-do {
- printf $out "\t%*s\t/* %*s */\n",
- -1-$twidth, "$type[$ind],", -$width, $name[$ind];
- print $out "\t/* ------------ States ------------- */\n"
- if $ind + 1 == $lastregop and $lastregop != $tot;
-} while (++$ind < $tot);
-
-print $out <<EOP;
+ print $out <<EOP;
};
#endif
+EOP
+}
+
+sub wrap_ifdef_print {
+ my $out= shift;
+ my $token= shift;
+ print $out <<EOP;
+
+#ifdef $token
+EOP
+ $_->($out) for @_;
+ print $out <<EOP;
+#endif /* $token */
+
+EOP
+}
+
+sub print_regarglen {
+ my ($out)= @_;
+ print $out <<EOP;
/* regarglen[] - How large is the argument part of the node (in regnodes) */
-#ifdef REG_COMP_C
static const U8 regarglen[] = {
EOP
-$ind = 0;
-do {
- my $size = 0;
- $size = "EXTRA_SIZE(struct regnode_$args[$ind])" if $args[$ind];
-
- printf $out "\t%*s\t/* %*s */\n",
- -37, "$size,",-$rwidth,$name[$ind];
-} while (++$ind < $lastregop);
+ foreach my $node (@ops) {
+ my $size= 0;
+ $size= "EXTRA_SIZE(struct regnode_$node->{args})" if $node->{args};
-print $out <<EOP;
+ printf $out "\t%*s\t/* %*s */\n", -37, "$size,", -$rwidth, $node->{name};
+ }
+
+ print $out <<EOP;
};
+EOP
+}
+
+sub print_reg_off_by_arg {
+ my ($out)= @_;
+ print $out <<EOP;
/* reg_off_by_arg[] - Which argument holds the offset to the next node */
static const char reg_off_by_arg[] = {
EOP
-$ind = 0;
-do {
- my $size = $longj[$ind] || 0;
+ foreach my $node (@ops) {
+ my $size= $node->{longj} || 0;
- printf $out "\t%d,\t/* %*s */\n",
- $size, -$rwidth, $name[$ind]
-} while (++$ind < $lastregop);
+ printf $out "\t%d,\t/* %*s */\n", $size, -$rwidth, $node->{name};
+ }
-print $out <<EOP;
+ print $out <<EOP;
};
-#endif /* REG_COMP_C */
+EOP
+}
+
+sub print_reg_name {
+ my ($out)= @_;
+ print $out <<EOP;
/* reg_name[] - Opcode/state names in string form, for debugging */
@@ -245,30 +405,30 @@ EXTCONST char * PL_reg_name[];
EXTCONST char * const PL_reg_name[] = {
EOP
-$ind = 0;
-my $ofs = 0;
-my $sym = "";
-do {
- my $size = $longj[$ind] || 0;
-
- printf $out "\t%*s\t/* $sym%#04x */\n",
- -3-$width,qq("$name[$ind]",), $ind - $ofs;
- if ($ind + 1 == $lastregop and $lastregop != $tot) {
- print $out "\t/* ------------ States ------------- */\n";
- $ofs = $lastregop - 1;
- $sym = 'REGNODE_MAX +';
- }
-
-} while (++$ind < $tot);
+ my $ofs= 0;
+ my $sym= "";
+ foreach my $node (@all) {
+ my $size= $node->{longj} || 0;
+
+ printf $out "\t%*s\t/* $sym%#04x */\n",
+ -3 - $width, qq("$node->{name}",), $node->{id} - $ofs;
+ if ( $node->{id} == $#ops and @ops != @all ) {
+ print $out "\t/* ------------ States ------------- */\n";
+ $ofs= $#ops;
+ $sym= 'REGNODE_MAX +';
+ }
+ }
-print $out <<EOP;
+ print $out <<EOP;
};
#endif /* DOINIT */
EOP
+}
-{
-print $out <<EOP;
+sub print_reg_extflags_name {
+ my ($out)= @_;
+ print $out <<EOP;
/* PL_reg_extflags_name[] - Opcode/state names in string form, for debugging */
#ifndef DOINIT
@@ -277,99 +437,103 @@ EXTCONST char * PL_reg_extflags_name[];
EXTCONST char * const PL_reg_extflags_name[] = {
EOP
-my %rxfv;
-my %definitions; # Remember what the symbol definitions are
-my $val = 0;
-my %reverse;
-my $REG_EXTFLAGS_NAME_SIZE = 0;
-foreach my $file ("op_reg_common.h", "regexp.h") {
- open FH,"<$file" or die "Can't read $file: $!";
- while (<FH>) {
-
- # optional leading '_'. Return symbol in $1, and strip it from
- # rest of line
- if (s/^ \# \s* define \s+ ( _? RXf_ \w+ ) \s+ //xi) {
- chomp;
- my $define = $1;
- my $orig= $_;
- s{ /\* .*? \*/ }{ }x; # Replace comments by a blank
-
- # Replace any prior defined symbols by their values
- foreach my $key (keys %definitions) {
- s/\b$key\b/$definitions{$key}/g;
- }
+ my %rxfv;
+ my %definitions; # Remember what the symbol definitions are
+ my $val= 0;
+ my %reverse;
+ my $REG_EXTFLAGS_NAME_SIZE= 0;
+ foreach my $file ( "op_reg_common.h", "regexp.h" ) {
+ open my $in_fh, "<", $file or die "Can't read '$file': $!";
+ while (<$in_fh>) {
+
+ # optional leading '_'. Return symbol in $1, and strip it from
+ # comment of line
+ if (s/^ \# \s* define \s+ ( _? RXf_ \w+ ) \s+ //xi) {
+ chomp;
+ my $define= $1;
+ my $orig= $_;
+ s{ /\* .*? \*/ }{ }x; # Replace comments by a blank
+
+ # Replace any prior defined symbols by their values
+ foreach my $key ( keys %definitions ) {
+ s/\b$key\b/$definitions{$key}/g;
+ }
- # Remove the U suffix from unsigned int literals
- s/\b([0-9]+)U\b/$1/g;
+ # Remove the U suffix from unsigned int literals
+ s/\b([0-9]+)U\b/$1/g;
- my $newval = eval $_; # Get numeric definition
+ my $newval= eval $_; # Get numeric definition
- $definitions{$define} = $newval;
+ $definitions{$define}= $newval;
- next unless $_ =~ /<</; # Bit defines use left shift
- if($val & $newval) {
- my @names=($define, $reverse{$newval});
- s/PMf_// for @names;
- if ($names[0] ne $names[1]) {
- die sprintf "ERROR: both $define and $reverse{$newval} use 0x%08X (%s:%s)", $newval, $orig, $_;
+ next unless $_ =~ /<</; # Bit defines use left shift
+ if ( $val & $newval ) {
+ my @names= ( $define, $reverse{$newval} );
+ s/PMf_// for @names;
+ if ( $names[0] ne $names[1] ) {
+ die sprintf
+ "ERROR: both $define and $reverse{$newval} use 0x%08X (%s:%s)",
+ $newval, $orig, $_;
+ }
+ next;
}
- next;
+ $val |= $newval;
+ $rxfv{$define}= $newval;
+ $reverse{$newval}= $define;
}
- $val|=$newval;
- $rxfv{$define}= $newval;
- $reverse{$newval} = $define;
}
}
-}
-my %vrxf=reverse %rxfv;
-printf $out "\t/* Bits in extflags defined: %s */\n", unpack 'B*', pack 'N', $val;
-my %multibits;
-for (0..31) {
- my $power_of_2 = 2**$_;
- my $n=$vrxf{$power_of_2};
- my $extra = "";
- if (! $n) {
-
- # Here, there was no name that matched exactly the bit. It could be
- # either that it is unused, or the name matches multiple bits.
- if (! ($val & $power_of_2)) {
- $n = "UNUSED_BIT_$_";
- }
- else {
-
- # Here, must be because it matches multiple bits. Look through
- # all possibilities until find one that matches this one. Use
- # that name, and all the bits it matches
- foreach my $name (keys %rxfv) {
- if ($rxfv{$name} & $power_of_2) {
- $n = $name . ( $multibits{$name}++ );
- $extra= sprintf qq{ : "%s" - 0x%08x}, $name, $rxfv{$name}
- if $power_of_2 != $rxfv{$name};
- last;
+ my %vrxf= reverse %rxfv;
+ printf $out "\t/* Bits in extflags defined: %s */\n", unpack 'B*', pack 'N',
+ $val;
+ my %multibits;
+ for ( 0 .. 31 ) {
+ my $power_of_2= 2**$_;
+ my $n= $vrxf{$power_of_2};
+ my $extra= "";
+ if ( !$n ) {
+
+ # Here, there was no name that matched exactly the bit. It could be
+ # either that it is unused, or the name matches multiple bits.
+ if ( !( $val & $power_of_2 ) ) {
+ $n= "UNUSED_BIT_$_";
+ }
+ else {
+
+ # Here, must be because it matches multiple bits. Look through
+ # all possibilities until find one that matches this one. Use
+ # that name, and all the bits it matches
+ foreach my $name ( keys %rxfv ) {
+ if ( $rxfv{$name} & $power_of_2 ) {
+ $n= $name . ( $multibits{$name}++ );
+ $extra= sprintf qq{ : "%s" - 0x%08x}, $name,
+ $rxfv{$name}
+ if $power_of_2 != $rxfv{$name};
+ last;
+ }
}
}
}
+ s/\bRXf_(PMf_)?// for $n, $extra;
+ printf $out qq(\t%-20s/* 0x%08x%s */\n), qq("$n",), $power_of_2, $extra;
+ $REG_EXTFLAGS_NAME_SIZE++;
}
- s/\bRXf_(PMf_)?// for $n, $extra;
- printf $out qq(\t%-20s/* 0x%08x%s */\n),
- qq("$n",),$power_of_2, $extra;
- $REG_EXTFLAGS_NAME_SIZE++;
-}
-
-print $out <<EOP;
+
+ print $out <<EOP;
};
#endif /* DOINIT */
-EOP
-print $out <<EOQ
#ifdef DEBUGGING
# define REG_EXTFLAGS_NAME_SIZE $REG_EXTFLAGS_NAME_SIZE
#endif
+EOP
-EOQ
}
-{
-print $out <<EOP;
+
+sub print_reg_intflags_name {
+ my ($out)= @_;
+ print $out <<EOP;
+
/* PL_reg_intflags_name[] - Opcode/state names in string form, for debugging */
#ifndef DOINIT
@@ -378,37 +542,42 @@ EXTCONST char * PL_reg_intflags_name[];
EXTCONST char * const PL_reg_intflags_name[] = {
EOP
-my %rxfv;
-my %definitions; # Remember what the symbol definitions are
-my $val = 0;
-my %reverse;
-my $REG_INTFLAGS_NAME_SIZE = 0;
-foreach my $file ("regcomp.h") {
- open my $fh, "<", $file or die "Can't read $file: $!";
- while (<$fh>) {
- # optional leading '_'. Return symbol in $1, and strip it from
- # rest of line
- if (m/^ \# \s* define \s+ ( PREGf_ ( \w+ ) ) \s+ 0x([0-9a-f]+)(?:\s*\/\*(.*)\*\/)?/xi) {
- chomp;
- my $define = $1;
- my $abbr= $2;
- my $hex= $3;
- my $comment= $4;
- my $val= hex($hex);
- $comment= $comment ? " - $comment" : "";
-
- printf $out qq(\t%-30s/* 0x%08x - %s%s */\n), qq("$abbr",), $val, $define, $comment;
- $REG_INTFLAGS_NAME_SIZE++;
+ my %rxfv;
+ my %definitions; # Remember what the symbol definitions are
+ my $val= 0;
+ my %reverse;
+ my $REG_INTFLAGS_NAME_SIZE= 0;
+ foreach my $file ("regcomp.h") {
+ open my $fh, "<", $file or die "Can't read $file: $!";
+ while (<$fh>) {
+
+ # optional leading '_'. Return symbol in $1, and strip it from
+ # comment of line
+ if (
+ m/^ \# \s* define \s+ ( PREGf_ ( \w+ ) ) \s+ 0x([0-9a-f]+)(?:\s*\/\*(.*)\*\/)?/xi
+ )
+ {
+ chomp;
+ my $define= $1;
+ my $abbr= $2;
+ my $hex= $3;
+ my $comment= $4;
+ my $val= hex($hex);
+ $comment= $comment ? " - $comment" : "";
+
+ printf $out qq(\t%-30s/* 0x%08x - %s%s */\n), qq("$abbr",),
+ $val, $define, $comment;
+ $REG_INTFLAGS_NAME_SIZE++;
+ }
}
}
-}
-print $out <<EOP;
+ print $out <<EOP;
};
#endif /* DOINIT */
EOP
-print $out <<EOQ;
+ print $out <<EOQ;
#ifdef DEBUGGING
# define REG_INTFLAGS_NAME_SIZE $REG_INTFLAGS_NAME_SIZE
#endif
@@ -416,62 +585,89 @@ print $out <<EOQ;
EOQ
}
-print $out process_flags('V', 'varies', <<'EOC');
+sub print_process_flags {
+ my ($out)= @_;
+
+ print $out process_flags( 'V', 'varies', <<'EOC');
/* The following have no fixed length. U8 so we can do strchr() on it. */
EOC
-print $out process_flags('S', 'simple', <<'EOC');
+ print $out process_flags( 'S', 'simple', <<'EOC');
/* The following always have a length of 1. U8 we can do strchr() on it. */
/* (Note that length 1 means "one character" under UTF8, not "one octet".) */
EOC
-read_only_bottom_close_and_rename($out);
+}
-my $guts = open_new('pod/perldebguts.pod', '>');
+sub do_perldebguts {
+ my $guts= open_new( 'pod/perldebguts.pod', '>' );
-my $code;
-my $name_fmt = '<' x ($longest_name_length-1);
-my $descr_fmt = '<' x (58-$longest_name_length);
-eval <<EOD;
+ my $node;
+ my $code;
+ my $name_fmt= '<' x ( $longest_name_length - 1 );
+ my $descr_fmt= '<' x ( 58 - $longest_name_length );
+ eval <<EOD or die $@;
format GuTS =
^*~~
- \$cmnt[\$_]
+ \$node->{pod_comment}
^$name_fmt ^<<<<<<<<< ^$descr_fmt~~
- \$name[\$_], \$code, \$rest[\$_]
+ \$node->{name}, \$code, \$node->{comment}//''
.
+1;
EOD
-
-select +(select($guts), do {
- $~ = "GuTS";
+
+ my $old_fh= select($guts);
+ $~= "GuTS";
open my $oldguts, "pod/perldebguts.pod"
or die "$0 cannot open pod/perldebguts.pod for reading: $!";
- while(<$oldguts>) {
+ while (<$oldguts>) {
print;
last if /=for regcomp.pl begin/;
}
- print <<'end';
+ print <<'END_OF_DESCR';
# TYPE arg-description [num-args] [longjump-len] DESCRIPTION
-end
- for (0..$lastregop-1) {
- $code = "$code[$_] ".($args[$_]||"");
- $code .= " $longj[$_]" if $longj[$_];
- if ($cmnt[$_] ||= "") {
+END_OF_DESCR
+ for my $n (@ops) {
+ $node= $n;
+ $code= "$node->{code} " . ( $node->{args} || "" );
+ $code .= " $node->{longj}" if $node->{longj};
+ if ( $node->{pod_comment} ||= "" ) {
+
# Trim multiple blanks
- $cmnt[$_] =~ s/^\n\n+/\n/; $cmnt[$_] =~ s/\n\n+$/\n\n/
+ $node->{pod_comment} =~ s/^\n\n+/\n/;
+ $node->{pod_comment} =~ s/\n\n+$/\n\n/;
}
write;
}
print "\n";
- while(<$oldguts>) {
+ while (<$oldguts>) {
last if /=for regcomp.pl end/;
}
do { print } while <$oldguts>;
+ select $old_fh;
+ close_and_rename($guts);
+}
-})[0];
+read_definition("regcomp.sym");
+my $out= open_new( 'regnodes.h', '>',
+ { by => 'regen/regcomp.pl', from => 'regcomp.sym' } );
+print_state_defs($out);
+print_regkind($out);
+wrap_ifdef_print(
+ $out,
+ "REG_COMP_C",
+ \&print_regarglen,
+ \&print_reg_off_by_arg
+);
+print_reg_name($out);
+print_reg_extflags_name($out);
+print_reg_intflags_name($out);
+print_process_flags($out);
+read_only_bottom_close_and_rename($out);
-close_and_rename($guts);
+do_perldebguts();
diff --git a/regnodes.h b/regnodes.h
index c05003d1e0..f27abe0c7c 100644
--- a/regnodes.h
+++ b/regnodes.h
@@ -291,9 +291,10 @@ EXTCONST U8 PL_regkind[] = {
};
#endif
+#ifdef REG_COMP_C
+
/* regarglen[] - How large is the argument part of the node (in regnodes) */
-#ifdef REG_COMP_C
static const U8 regarglen[] = {
0, /* END */
0, /* SUCCEED */
@@ -492,6 +493,7 @@ static const char reg_off_by_arg[] = {
#endif /* REG_COMP_C */
+
/* reg_name[] - Opcode/state names in string form, for debugging */
#ifndef DOINIT