#!/usr/bin/perl -w # # Regenerate (overwriting only if changed): # # opcode.h # opnames.h # pp_proto.h # # from information stored in regen/opcodes, plus the # values hardcoded into this script in @raw_alias. # # Accepts the standard regen_lib -q and -v args. # # This script is normally invoked from regen.pl. use strict; BEGIN { # Get function prototypes require 'regen/regen_lib.pl'; } my $oc = open_new('opcode.h', '>', {by => 'regen/opcode.pl', from => 'its data', file => 'opcode.h', style => '*', copyright => [1993 .. 2007]}); my $on = open_new('opnames.h', '>', { by => 'regen/opcode.pl', from => 'its data', style => '*', file => 'opnames.h', copyright => [1999 .. 2008] }); # Read data. my %seen; my (@ops, %desc, %check, %ckname, %flags, %args, %opnum); open OPS, 'regen/opcodes' or die $!; while () { chop; next unless $_; next if /^#/; my ($key, $desc, $check, $flags, $args) = split(/\t+/, $_, 5); $args = '' unless defined $args; warn qq[Description "$desc" duplicates $seen{$desc}\n] if $seen{$desc} and $key ne "transr"; die qq[Opcode "$key" duplicates $seen{$key}\n] if $seen{$key}; $seen{$desc} = qq[description of opcode "$key"]; $seen{$key} = qq[opcode "$key"]; push(@ops, $key); $opnum{$key} = $#ops; $desc{$key} = $desc; $check{$key} = $check; $ckname{$check}++; $flags{$key} = $flags; $args{$key} = $args; } # Set up aliases my %alias; # Format is "this function" => "does these op names" my @raw_alias = ( Perl_do_kv => [qw( keys values )], Perl_unimplemented_op => [qw(padany mapstart custom)], # All the ops with a body of { return NORMAL; } Perl_pp_null => [qw(scalar regcmaybe lineseq scope)], Perl_pp_goto => ['dump'], Perl_pp_require => ['dofile'], Perl_pp_untie => ['dbmclose'], Perl_pp_sysread => {read => '', recv => '#ifdef HAS_SOCKET'}, Perl_pp_sysseek => ['seek'], Perl_pp_ioctl => ['fcntl'], Perl_pp_ssockopt => {gsockopt => '#ifdef HAS_SOCKET'}, Perl_pp_getpeername => {getsockname => '#ifdef HAS_SOCKET'}, Perl_pp_stat => ['lstat'], Perl_pp_ftrowned => [qw(fteowned ftzero ftsock ftchr ftblk ftfile ftdir ftpipe ftsuid ftsgid ftsvtx)], Perl_pp_fttext => ['ftbinary'], Perl_pp_gmtime => ['localtime'], Perl_pp_semget => [qw(shmget msgget)], Perl_pp_semctl => [qw(shmctl msgctl)], Perl_pp_ghostent => [qw(ghbyname ghbyaddr)], Perl_pp_gnetent => [qw(gnbyname gnbyaddr)], Perl_pp_gprotoent => [qw(gpbyname gpbynumber)], Perl_pp_gservent => [qw(gsbyname gsbyport)], Perl_pp_gpwent => [qw(gpwnam gpwuid)], Perl_pp_ggrent => [qw(ggrnam ggrgid)], Perl_pp_ftis => [qw(ftsize ftmtime ftatime ftctime)], Perl_pp_chown => [qw(unlink chmod utime kill)], Perl_pp_link => ['symlink'], Perl_pp_ftrread => [qw(ftrwrite ftrexec fteread ftewrite fteexec)], Perl_pp_shmwrite => [qw(shmread msgsnd msgrcv semop)], Perl_pp_syswrite => {send => '#ifdef HAS_SOCKET'}, Perl_pp_defined => [qw(dor dorassign)], Perl_pp_and => ['andassign'], Perl_pp_or => ['orassign'], Perl_pp_ucfirst => ['lcfirst'], Perl_pp_sle => [qw(slt sgt sge)], Perl_pp_print => ['say'], Perl_pp_index => ['rindex'], Perl_pp_oct => ['hex'], Perl_pp_shift => ['pop'], Perl_pp_sin => [qw(cos exp log sqrt)], Perl_pp_bit_or => ['bit_xor'], Perl_pp_rv2av => ['rv2hv'], Perl_pp_akeys => ['avalues'], Perl_pp_rkeys => [qw(rvalues reach)], Perl_pp_trans => [qw(trans transr)], Perl_pp_chop => [qw(chop chomp)], Perl_pp_schop => [qw(schop schomp)], Perl_pp_bind => {connect => '#ifdef HAS_SOCKET'}, Perl_pp_preinc => ['i_preinc', 'predec', 'i_predec'], Perl_pp_postinc => ['i_postinc', 'postdec', 'i_postdec'], Perl_pp_ehostent => [qw(enetent eprotoent eservent spwent epwent sgrent egrent)], Perl_pp_shostent => [qw(snetent sprotoent sservent)], Perl_pp_aelemfast => ['aelemfast_lex'], ); while (my ($func, $names) = splice @raw_alias, 0, 2) { if (ref $names eq 'ARRAY') { foreach (@$names) { $alias{$_} = [$func, '']; } } else { while (my ($opname, $cond) = each %$names) { $alias{$opname} = [$func, $cond]; } } } foreach my $sock_func (qw(socket bind listen accept shutdown ssockopt getpeername)) { $alias{$sock_func} = ["Perl_pp_$sock_func", '#ifdef HAS_SOCKET'], } # Emit defines. print $oc "#ifndef PERL_GLOBAL_STRUCT_INIT\n\n"; { my $last_cond = ''; my @unimplemented; sub unimplemented { if (@unimplemented) { print $oc "#else\n"; foreach (@unimplemented) { print $oc "#define $_ Perl_unimplemented_op\n"; } print $oc "#endif\n"; @unimplemented = (); } } for (@ops) { my ($impl, $cond) = @{$alias{$_} || ["Perl_pp_$_", '']}; my $op_func = "Perl_pp_$_"; if ($cond ne $last_cond) { # A change in condition. (including to or from no condition) unimplemented(); $last_cond = $cond; if ($last_cond) { print $oc "$last_cond\n"; } } push @unimplemented, $op_func if $last_cond; print $oc "#define $op_func $impl\n" if $impl ne $op_func; } # If the last op was conditional, we need to close it out: unimplemented(); } print $on "typedef enum opcode {\n"; my $i = 0; for (@ops) { print $on "\t", tab(3,"OP_\U$_"), " = ", $i++, ",\n"; } print $on "\t", tab(3,"OP_max"), "\n"; print $on "} opcode;\n"; print $on "\n#define MAXO ", scalar @ops, "\n"; # Emit op names and descriptions. print $oc <<'END'; START_EXTERN_C #ifndef DOINIT EXTCONST char* const PL_op_name[]; #else EXTCONST char* const PL_op_name[] = { END for (@ops) { print $oc qq(\t"$_",\n); } print $oc <<'END'; }; #endif #ifndef DOINIT EXTCONST char* const PL_op_desc[]; #else EXTCONST char* const PL_op_desc[] = { END for (@ops) { my($safe_desc) = $desc{$_}; # Have to escape double quotes and escape characters. $safe_desc =~ s/([\\"])/\\$1/g; print $oc qq(\t"$safe_desc",\n); } print $oc <<'END'; }; #endif END_EXTERN_C #endif /* !PERL_GLOBAL_STRUCT_INIT */ END # Emit ppcode switch array. print $oc <<'END'; START_EXTERN_C #ifdef PERL_GLOBAL_STRUCT_INIT # define PERL_PPADDR_INITED static const Perl_ppaddr_t Gppaddr[] #else # ifndef PERL_GLOBAL_STRUCT # define PERL_PPADDR_INITED EXT Perl_ppaddr_t PL_ppaddr[] /* or perlvars.h */ # endif #endif /* PERL_GLOBAL_STRUCT */ #if (defined(DOINIT) && !defined(PERL_GLOBAL_STRUCT)) || defined(PERL_GLOBAL_STRUCT_INIT) # define PERL_PPADDR_INITED = { END for (@ops) { my $op_func = "Perl_pp_$_"; my $name = $alias{$_}; if ($name && $name->[0] ne $op_func) { print $oc "\t$op_func,\t/* implemented by $name->[0] */\n"; } else { print $oc "\t$op_func,\n"; } } print $oc <<'END'; } #endif #ifdef PERL_PPADDR_INITED ; #endif #ifdef PERL_GLOBAL_STRUCT_INIT # define PERL_CHECK_INITED static const Perl_check_t Gcheck[] #else # ifndef PERL_GLOBAL_STRUCT # define PERL_CHECK_INITED EXT Perl_check_t PL_check[] /* or perlvars.h */ # endif #endif #if (defined(DOINIT) && !defined(PERL_GLOBAL_STRUCT)) || defined(PERL_GLOBAL_STRUCT_INIT) # define PERL_CHECK_INITED = { END for (@ops) { print $oc "\t", tab(3, "Perl_$check{$_},"), "\t/* $_ */\n"; } print $oc <<'END'; } #endif #ifdef PERL_CHECK_INITED ; #endif /* #ifdef PERL_CHECK_INITED */ #ifndef PERL_GLOBAL_STRUCT_INIT #ifndef DOINIT EXTCONST U32 PL_opargs[]; #else EXTCONST U32 PL_opargs[] = { END # Emit allowed argument types. my $ARGBITS = 32; my %argnum = ( 'S', 1, # scalar 'L', 2, # list 'A', 3, # array value 'H', 4, # hash value 'C', 5, # code value 'F', 6, # file value 'R', 7, # scalar reference ); my %opclass = ( '0', 0, # baseop '1', 1, # unop '2', 2, # binop '|', 3, # logop '@', 4, # listop '/', 5, # pmop '$', 6, # svop_or_padop '#', 7, # padop '"', 8, # pvop_or_svop '{', 9, # loop ';', 10, # cop '%', 11, # baseop_or_unop '-', 12, # filestatop '}', 13, # loopexop ); my %opflags = ( 'm' => 1, # needs stack mark 'f' => 2, # fold constants 's' => 4, # always produces scalar 't' => 8, # needs target scalar 'T' => 8 | 16, # ... which may be lexical 'i' => 0, # always produces integer (unused since e7311069) 'I' => 32, # has corresponding int op 'd' => 64, # danger, unknown side effects 'u' => 128, # defaults to $_ ); my %OP_IS_SOCKET; # /Fs/ my %OP_IS_FILETEST; # /F-/ my %OP_IS_FT_ACCESS; # /F-+/ my %OP_IS_NUMCOMPARE; # /S $flags)\n] unless exists $opclass{$flags}; $argsum |= $opclass{$flags} << $OCSHIFT; my $argshift = $OASHIFT; for my $arg (split(' ',$args{$op})) { if ($arg =~ s/^D//) { # handle 1st, just to put D 1st. $OP_IS_DIRHOP{$op} = $opnum{$op}; } if ($arg =~ /^F/) { # record opnums of these opnames $OP_IS_SOCKET{$op} = $opnum{$op} if $arg =~ s/s//; $OP_IS_FILETEST{$op} = $opnum{$op} if $arg =~ s/-//; $OP_IS_FT_ACCESS{$op} = $opnum{$op} if $arg =~ s/\+//; } elsif ($arg =~ /^S= $ARGBITS || $argnum > ((1 << ($ARGBITS - $argshift)) - 1); $argsum += $argnum << $argshift; $argshift += 4; } $argsum = sprintf("0x%08x", $argsum); print $oc "\t", tab(3, "$argsum,"), "/* $op */\n"; } print $oc <<'END'; }; #endif #endif /* !PERL_GLOBAL_STRUCT_INIT */ END_EXTERN_C END # Emit OP_IS_* macros print $on <<'EO_OP_IS_COMMENT'; /* the OP_IS_* macros are optimized to a simple range check because all the member OPs are contiguous in regen/opcodes table. opcode.pl verifies the range contiguity, or generates an OR-equals expression */ EO_OP_IS_COMMENT gen_op_is_macro( \%OP_IS_SOCKET, 'OP_IS_SOCKET'); gen_op_is_macro( \%OP_IS_FILETEST, 'OP_IS_FILETEST'); gen_op_is_macro( \%OP_IS_FT_ACCESS, 'OP_IS_FILETEST_ACCESS'); gen_op_is_macro( \%OP_IS_NUMCOMPARE, 'OP_IS_NUMCOMPARE'); gen_op_is_macro( \%OP_IS_DIRHOP, 'OP_IS_DIRHOP'); sub gen_op_is_macro { my ($op_is, $macname) = @_; if (keys %$op_is) { # get opnames whose numbers are lowest and highest my ($first, @rest) = sort { $op_is->{$a} <=> $op_is->{$b} } keys %$op_is; my $last = pop @rest; # @rest slurped, get its last die "Invalid range of ops: $first .. $last\n" unless $last; print $on "\n#define $macname(op) \\\n\t("; # verify that op-ct matches 1st..last range (and fencepost) # (we know there are no dups) if ( $op_is->{$last} - $op_is->{$first} == scalar @rest + 1) { # contiguous ops -> optimized version print $on "(op) >= OP_" . uc($first) . " && (op) <= OP_" . uc($last); } else { print $on join(" || \\\n\t ", map { "(op) == OP_" . uc() } sort keys %$op_is); } print $on ")\n"; } } my $pp = open_new('pp_proto.h', '>', { by => 'opcode.pl', from => 'its data' }); { my %funcs; for (@ops) { my $name = $alias{$_} ? $alias{$_}[0] : "Perl_pp_$_"; ++$funcs{$name}; } print $pp "PERL_CALLCONV OP *$_(pTHX);\n" foreach sort keys %funcs; } foreach ($oc, $on, $pp) { read_only_bottom_close_and_rename($_); } # Some comments about 'T' opcode classifier: # Safe to set if the ppcode uses: # tryAMAGICbin, tryAMAGICun, SETn, SETi, SETu, PUSHn, PUSHTARG, SETTARG, # SETs(TARG), XPUSHn, XPUSHu, # Unsafe to set if the ppcode uses dTARG or [X]RETPUSH[YES|NO|UNDEF] # lt and friends do SETs (including ncmp, but not scmp) # Additional mode of failure: the opcode can modify TARG before it "used" # all the arguments (or may call an external function which does the same). # If the target coincides with one of the arguments ==> kaboom. # pp.c pos substr each not OK (RETPUSHUNDEF) # substr vec also not OK due to LV to target (are they???) # ref not OK (RETPUSHNO) # trans not OK (dTARG; TARG = sv_newmortal();) # ucfirst etc not OK: TMP arg processed inplace # quotemeta not OK (unsafe when TARG == arg) # each repeat not OK too due to list context # pack split - unknown whether they are safe # sprintf: is calling do_sprintf(TARG,...) which can act on TARG # before other args are processed. # Suspicious wrt "additional mode of failure" (and only it): # schop, chop, postinc/dec, bit_and etc, negate, complement. # Also suspicious: 4-arg substr, sprintf, uc/lc (POK_only), reverse, pack. # substr/vec: doing TAINT_off()??? # pp_hot.c # readline - unknown whether it is safe # match subst not OK (dTARG) # grepwhile not OK (not always setting) # join not OK (unsafe when TARG == arg) # Suspicious wrt "additional mode of failure": concat (dealt with # in ck_sassign()), join (same). # pp_ctl.c # mapwhile flip caller not OK (not always setting) # pp_sys.c # backtick glob warn die not OK (not always setting) # warn not OK (RETPUSHYES) # open fileno getc sysread syswrite ioctl accept shutdown # ftsize(etc) readlink telldir fork alarm getlogin not OK (RETPUSHUNDEF) # umask select not OK (XPUSHs(&PL_sv_undef);) # fileno getc sysread syswrite tell not OK (meth("FILENO" "GETC")) # sselect shm* sem* msg* syscall - unknown whether they are safe # gmtime not OK (list context) # Suspicious wrt "additional mode of failure": warn, die, select.