summaryrefslogtreecommitdiff
path: root/regcomp.pl
diff options
context:
space:
mode:
authorYves Orton <demerphq@gmail.com>2006-09-25 22:09:07 +0200
committerDave Mitchell <davem@fdisolutions.com>2006-09-25 22:01:36 +0000
commit03363afd3c2c023b9096c021741ecc63bc0de7dd (patch)
treef27d40ad2d3d91b064c59e82fc9799526b80d6c0 /regcomp.pl
parent84da74a7adb7db2354917b83df794f4983438fcd (diff)
downloadperl-03363afd3c2c023b9096c021741ecc63bc0de7dd.tar.gz
Automate generation of the regmatch() state constants
Subject: Re: Problem with EVAL handling in bleads iterative regex code. Message-Id: <9b18b3110609251109t4cb1d443y87d7a7dc94fcfc24@mail.gmail.com> p4raw-id: //depot/perl@28892
Diffstat (limited to 'regcomp.pl')
-rw-r--r--regcomp.pl133
1 files changed, 93 insertions, 40 deletions
diff --git a/regcomp.pl b/regcomp.pl
index febd550552..bfea6e25cd 100644
--- a/regcomp.pl
+++ b/regcomp.pl
@@ -3,49 +3,102 @@ BEGIN {
require 'regen_lib.pl';
}
#use Fatal qw(open close rename chmod unlink);
+use strict;
+use warnings;
+
open DESC, 'regcomp.sym';
-$ind = 0;
+my $ind = 0;
+my (@name,@rest,@type,@code,@args,@longj);
+my ($desc,$lastregop);
while (<DESC>) {
- next if /^\s*($|\#)/;
- $ind++;
- chomp;
- ($name[$ind], $desc, $rest[$ind]) = split /\t+/, $_, 3;
- ($type[$ind], $code[$ind], $args[$ind], $longj[$ind])
- = split /[,\s]\s*/, $desc, 4;
+ s/#.*$//;
+ next if /^\s*$/;
+ s/\s*\z//;
+ if (/^-+\s*$/) {
+ $lastregop= $ind;
+ next;
+ }
+ unless ($lastregop) {
+ $ind++;
+ ($name[$ind], $desc, $rest[$ind]) = split /\t+/, $_, 3;
+ ($type[$ind], $code[$ind], $args[$ind], $longj[$ind])
+ = split /[,\s]\s*/, $desc, 4;
+ } else {
+ my ($type,@lists)=split /\s*\t+\s*/, $_;
+ die "No list? $type" if !@lists;
+ foreach my $list (@lists) {
+ my ($names,$special)=split /:/, $list , 2;
+ $special ||= "";
+ foreach my $name (split /,/,$names) {
+ my $real= $name eq 'resume'
+ ? "resume_$type"
+ : "${type}_$name";
+ my @suffix;
+ if (!$special) {
+ @suffix=("");
+ } elsif ($special=~/\d/) {
+ @suffix=(1..$special);
+ } elsif ($special eq 'FAIL') {
+ @suffix=("","_fail");
+ } else {
+ die "unknown :type ':$special'";
+ }
+ foreach my $suffix (@suffix) {
+ $ind++;
+ $name[$ind]="$real$suffix";
+ $type[$ind]=$type;
+ $rest[$ind]="Regmatch state for $type";
+ }
+ }
+ }
+
+ }
+}
+my ($width,$rwidth,$twidth)=(0,0,0);
+for (1..@name) {
+ $width=length($name[$_]) if $name[$_] and $width<length($name[$_]);
+ $twidth=length($type[$_]) if $type[$_] and $twidth<length($type[$_]);
+ $rwidth=$width if $_ == $lastregop;
}
+$lastregop ||= $ind;
+my $tot = $ind;
close DESC;
-$tot = $ind;
-die "Too many regexp opcodes! Maximum is 256, but there are $tot in file!"
- if $tot>256;
+die "Too many regexp/state opcodes! Maximum is 256, but there are $lastregop in file!"
+ if $lastregop>256;
-$tmp_h = 'tmp_reg.h';
+my $tmp_h = 'tmp_reg.h';
unlink $tmp_h if -f $tmp_h;
open OUT, ">$tmp_h";
+#*OUT=\*STDOUT;
binmode OUT;
-print OUT <<EOP;
+printf OUT <<EOP,
/* -*- buffer-read-only: t -*-
!!!!!!! DO NOT EDIT THIS FILE !!!!!!!
This file is built by regcomp.pl from regcomp.sym.
Any changes made here will be lost!
*/
+#define %*s\t%d
+#define %*s\t%d
+
EOP
+-$width,REGNODE_MAX=>$lastregop-1,-$width,REGMATCH_STATE_MAX=>$tot-1;
$ind = 0;
while (++$ind <= $tot) {
- $oind = $ind - 1;
- $hind = sprintf "%#4x", $oind;
- print OUT <<EOP;
-#define $name[$ind] $oind /* $hind $rest[$ind] */
-EOP
+ my $oind = $ind - 1;
+ printf OUT "#define\t%*s\t%d\t/*%#04x %s*/\n",
+ -$width, $name[$ind], $ind-1, $ind-1, $rest[$ind];
+ print OUT "\n\t/* ------------ States ------------- */\n\n"
+ if $ind == $lastregop and $lastregop != $tot;
}
print OUT <<EOP;
-#define REGNODE_MAX $oind
+
#ifndef DOINIT
EXTCONST U8 PL_regkind[];
@@ -55,9 +108,10 @@ EOP
$ind = 0;
while (++$ind <= $tot) {
- print OUT <<EOP;
- $type[$ind], /* $name[$ind] */
-EOP
+ printf OUT "\t%*s\t/* %*s */\n",
+ -1-$twidth, "$type[$ind],", -$width, $name[$ind];
+ print OUT "\t/* ------------ States ------------- */\n"
+ if $ind == $lastregop and $lastregop != $tot;
}
print OUT <<EOP;
@@ -70,13 +124,12 @@ static const U8 regarglen[] = {
EOP
$ind = 0;
-while (++$ind <= $tot) {
- $size = 0;
+while (++$ind <= $lastregop) {
+ my $size = 0;
$size = "EXTRA_SIZE(struct regnode_$args[$ind])" if $args[$ind];
- print OUT <<EOP;
- $size, /* $name[$ind] */
-EOP
+ printf OUT "\t%*s\t/* %*s */\n",
+ -37, "$size,",-$rwidth,$name[$ind];
}
print OUT <<EOP;
@@ -86,37 +139,37 @@ static const char reg_off_by_arg[] = {
EOP
$ind = 0;
-while (++$ind <= $tot) {
- $size = $longj[$ind] || 0;
+while (++$ind <= $lastregop) {
+ my $size = $longj[$ind] || 0;
- print OUT <<EOP;
- $size, /* $name[$ind] */
-EOP
+ printf OUT "\t%d,\t/* %*s */\n",
+ $size, -$rwidth, $name[$ind]
}
print OUT <<EOP;
};
#ifdef DEBUGGING
-static const char * const reg_name[] = {
+extern const char * const reg_name[] = {
EOP
$ind = 0;
while (++$ind <= $tot) {
- $hind = sprintf "%#4x", $ind-1;
- $size = $longj[$ind] || 0;
+ my $size = $longj[$ind] || 0;
- print OUT <<EOP;
- "$name[$ind]", /* $hind */
-EOP
+ printf OUT "\t%*s\t/* %#04x */\n",
+ -3-$width,qq("$name[$ind]",),$ind-1;
+ print OUT "\t/* ------------ States ------------- */\n"
+ if $ind == $lastregop and $lastregop != $tot;
}
print OUT <<EOP;
};
-
-static const int reg_num = $tot;
-
#endif /* DEBUGGING */
+#else
+#ifdef DEBUGGING
+extern const char * const reg_name[];
+#endif
#endif /* REG_COMP_C */
/* ex: set ro: */