#!/usr/local/bin/perl -w # # $Revision: 1.1.1.1 $ # # $Date: 2003-07-27 11:07:11 $ use XML::Parser; my $Usage =<<'End_of_Usage;'; Usage is: xmlfilter [-h] [-nl] [{-+}root] [{-+}el=elname] [{-+}el:elnamepat] [{-+}att:attname] [{-+}att:attname:attvalpat] xmlfile Prints on standard output the result of filtering the given xmlfile for elements according to the switches. A '-' option will drop the element from the output; a '+' will keep it. The output should also be a well-formed XML document. -h Print this message -nl Emit a newline prior to every start tag. [-+]root Drop (or keep) the root element. Defaults to keep. If the root element were named "foo", then -root would be equivalent to -el=foo. Note that even if you're dropping the root element, it's start and end tag are kept in order that the output remains a well-formed XML document. [-+]el=elname Drop (or keep) elements of type elname. [-+]el:elnamepat Drop (or keep) element whose type name matches elnamepat. [-+]att:attname Drop (or keep) elements which have an attribute = attname. [-+]att:attname:attvalpat Drop (or keep) elements which have an attribute = attname and for which the attribute value matches attvalpat. End_of_Usage; my $pass = 1; my $do_newline = 0; my $attcheck = 0; my %drop_el; my @drop_elpat; my %keep_el; my @keep_elpat; my %drop_att; my %keep_att; my $always_true = sub {1;}; my $root_element = ''; my $in_cdata = 0; # Process options while (defined($ARGV[0]) and $ARGV[0] =~ /^[-+]/) { my $opt = shift; if ($opt eq '-root') { $pass = 0; } elsif ($opt eq '+root') { $pass = 1; } elsif ($opt eq '-h') { print $Usage; exit; } elsif ($opt eq '-nl') { $do_newline = 1; } elsif ($opt =~ /^([-+])el([:=])(\S*)/) { my ($disp, $kind, $pattern) = ($1, $2, $3); my ($hashref, $aref); if ($disp eq '-') { $hashref = \%drop_el; $aref = \@drop_elpat; } else { $hashref = \%keep_el; $aref = \@keep_elpat; } if ($kind eq '=') { $hashref->{$pattern} = 1; } else { push(@$aref, $pattern); } } elsif ($opt =~ /^([-+])att:(\w+)(?::(\S*))?/) { my ($disp, $id, $pattern) = ($1, $2, $3); my $ref = ($disp eq '-') ? \%drop_att : \%keep_att; if (defined($pattern)) { $pattern =~ s!/!\\/!g; my $sub; eval "\$sub = sub {\$_[0] =~ /$pattern/;};"; $ref->{$id} = $sub; } else { $ref->{$id} = $always_true; } $attcheck = 1; } else { die "Unknown option: $opt\n$Usage"; } } my $drop_el_pattern = join('|', @drop_elpat); my $keep_el_pattern = join('|', @keep_elpat); my $drop_sub; if ($drop_el_pattern) { eval "\$drop_sub = sub {\$_[0] =~ /$drop_el_pattern/;}"; } else { $drop_sub = sub {}; } my $keep_sub; if ($keep_el_pattern) { eval "\$keep_sub = sub {\$_[0] =~ /$keep_el_pattern/;}"; } else { $keep_sub = sub {}; } my $doc = shift; die "No file specified\n$Usage" unless defined($doc); my @togglestack = (); my $p = new XML::Parser(ErrorContext => 2, Handlers => {Start => \&start_handler, End => \&end_handler } ); if ($pass) { $p->setHandlers(Char => \&char_handler, CdataStart => \&cdata_start, CdataEnd => \&cdata_end); } $p->parsefile($doc); print "\n" unless $pass; ################ ## End of main ################ sub start_handler { my $xp = shift; my $el = shift; unless ($root_element) { $root_element = $el; print "<$el>\n" unless $pass; } my ($elref, $attref, $sub); if ($pass) { $elref = \%drop_el; $attref = \%drop_att; $sub = $drop_sub; } else { $elref = \%keep_el; $attref = \%keep_att; $sub = $keep_sub; } if (defined($elref->{$el}) or &$sub($el) or check_atts($attref, @_)) { $pass = ! $pass; if ($pass) { $xp->setHandlers(Char => \&char_handler, CdataStart => \&cdata_start, CdataEnd => \&cdata_end); } else { $xp->setHandlers(Char => 0, CdataStart => 0, CdataEnd => 0); } push(@togglestack, $xp->depth); } if ($pass) { print "\n" if $do_newline; print "<$el"; while (@_) { my $id = shift; my $val = shift; $val = $xp->xml_escape($val, "'"); print " $id='$val'"; } print ">"; } } # End start_handler sub end_handler { my $xp = shift; my $el = shift; if ($pass) { print ""; } if (@togglestack and $togglestack[-1] == $xp->depth) { $pass = ! $pass; if ($pass) { $xp->setHandlers(Char => \&char_handler, CdataStart => \&cdata_start, CdataEnd => \&cdata_end); } else { $xp->setHandlers(Char => 0, CdataStart => 0, CdataEnd => 0); } pop(@togglestack); } } # End end_handler sub char_handler { my ($xp, $text) = @_; if (length($text)) { $text = $xp->xml_escape($text, '>') unless $in_cdata; print $text; } } # End char_handler sub cdata_start { my $xp = shift; print ''; $in_cdata = 0; } sub check_atts { return $attcheck unless $attcheck; my $ref = shift; while (@_) { my $id = shift; my $val = shift; if (defined($ref->{$id})) { my $ret = &{$ref->{$id}}($val); return $ret if $ret; } } return 0; } # End check_atts # Tell Emacs that this is really a perl script # Local Variables: # mode:perl # End: